3 * This file is part of LyX, the document processor.
4 * Licence details can be found in the file COPYING.
6 * \author Angus Leeming
9 * Full author contact details are available in file CREDITS.
17 #include "bufferparams.h"
20 #include "support/lstrings.h"
21 #include "support/std_sstream.h"
23 #include <boost/regex.hpp>
27 using lyx::support::ascii_lowercase;
28 using lyx::support::bformat;
29 using lyx::support::compare_ascii_no_case;
30 using lyx::support::contains;
31 using lyx::support::getVectorFromString;
32 using lyx::support::ltrim;
33 using lyx::support::prefixIs;
34 using lyx::support::rtrim;
35 using lyx::support::split;
36 using lyx::support::subst;
37 using lyx::support::token;
38 using lyx::support::trim;
41 using std::ostringstream;
49 vector<string> const init_possible_cite_commands()
51 char const * const pos[] = {
53 "citet", "citep", "citealt", "citealp",
54 "citeauthor", "citeyear", "citeyearpar",
55 "citet*", "citep*", "citealt*", "citealp*", "citeauthor*",
56 "Citet", "Citep", "Citealt", "Citealp", "Citeauthor",
57 "Citet*", "Citep*", "Citealt*", "Citealp*", "Citeauthor*",
59 "footcite", "footcitet", "footcitep", "footcitealt",
60 "footcitealp", "footciteauthor", "footciteyear",
66 size_t const size_pos = sizeof(pos) / sizeof(pos[0]);
68 return vector<string>(pos, pos + size_pos);
72 vector<string> const & possible_cite_commands()
74 static vector<string> const pos = init_possible_cite_commands();
79 bool is_possible_cite_command(string const & input)
81 vector<string> const & possibles = possible_cite_commands();
82 vector<string>::const_iterator const end = possibles.end();
83 return std::find(possibles.begin(), end, input) != end;
87 string const default_cite_command(CiteEngine engine)
94 case ENGINE_NATBIB_AUTHORYEAR:
97 case ENGINE_NATBIB_NUMERICAL:
110 string const asValidLatexCommand(string const & input,
111 CiteEngine_enum const & engine)
113 string const default_str = default_cite_command(engine);
114 if (!is_possible_cite_command(input))
120 output = default_str;
123 case ENGINE_NATBIB_AUTHORYEAR:
124 case ENGINE_NATBIB_NUMERICAL:
125 if (input == "cite" || input == "citefield" ||
126 input == "citetitle" || input == "cite*")
127 output = default_str;
128 else if (prefixIs(input, "foot"))
129 output = input.substr(4);
134 case ENGINE_JURABIB: {
135 // Jurabib does not support the 'uppercase' natbib style.
137 output = string(1, 'c') + input.substr(1);
141 // Jurabib does not support the 'full' natbib style.
142 string::size_type const n = output.size() - 1;
143 if (output != "cite*" && output[n] == '*')
144 output = output.substr(0, n);
154 string const familyName(string const & name)
156 // Very simple parser
159 // possible authorname combinations are:
160 // "Surname, FirstName"
162 // "FirstName Surname"
164 string::size_type idx = fname.find(',');
165 if (idx != string::npos)
166 return ltrim(fname.substr(0, idx));
167 idx = fname.rfind('.');
168 if (idx != string::npos)
169 fname = ltrim(fname.substr(idx + 1));
170 // test if we have a LaTeX Space in front
171 if (fname[0] == '\\')
172 return fname.substr(2);
178 string const getAbbreviatedAuthor(InfoMap const & map, string const & key)
180 BOOST_ASSERT(!map.empty());
182 InfoMap::const_iterator it = map.find(key);
185 string const & data = it->second;
187 // Is the entry a BibTeX one or one from lyx-layout "bibliography"?
188 string::size_type const pos = data.find("TheBibliographyRef");
189 if (pos != string::npos) {
194 string const opt = trim(data.substr(0, pos - 1));
199 split(opt, authors, '(');
203 string author = parseBibTeX(data, "author");
206 author = parseBibTeX(data, "editor");
208 if (author.empty()) {
209 author = parseBibTeX(data, "key");
215 vector<string> const authors = getVectorFromString(author, " and ");
219 if (authors.size() == 2)
220 return bformat(_("%1$s and %2$s"),
221 familyName(authors[0]), familyName(authors[1]));
223 if (authors.size() > 2)
224 return bformat(_("%1$s et al."), familyName(authors[0]));
226 return familyName(authors[0]);
230 string const getYear(InfoMap const & map, string const & key)
232 BOOST_ASSERT(!map.empty());
234 InfoMap::const_iterator it = map.find(key);
237 string const & data = it->second;
239 // Is the entry a BibTeX one or one from lyx-layout "bibliography"?
240 string::size_type const pos = data.find("TheBibliographyRef");
241 if (pos != string::npos) {
247 trim(data.substr(0, pos - 1));
252 string const tmp = split(opt, authors, '(');
254 split(tmp, year, ')');
259 string year = parseBibTeX(data, "year");
269 // A functor for use with std::sort, leading to case insensitive sorting
270 struct compareNoCase: public std::binary_function<string, string, bool>
272 bool operator()(string const & s1, string const & s2) const {
273 return compare_ascii_no_case(s1, s2) < 0;
280 vector<string> const getKeys(InfoMap const & map)
282 vector<string> bibkeys;
283 InfoMap::const_iterator it = map.begin();
284 InfoMap::const_iterator end = map.end();
285 for (; it != end; ++it) {
286 bibkeys.push_back(it->first);
289 std::sort(bibkeys.begin(), bibkeys.end(), compareNoCase());
294 string const getInfo(InfoMap const & map, string const & key)
296 BOOST_ASSERT(!map.empty());
298 InfoMap::const_iterator it = map.find(key);
301 string const & data = it->second;
303 // is the entry a BibTeX one or one from lyx-layout "bibliography"?
304 string const separator("TheBibliographyRef");
305 string::size_type const pos = data.find(separator);
306 if (pos != string::npos) {
307 string::size_type const pos2 = pos + separator.size();
308 string const info = trim(data.substr(pos2));
312 // Search for all possible "required" keys
313 string author = parseBibTeX(data, "author");
315 author = parseBibTeX(data, "editor");
317 string year = parseBibTeX(data, "year");
318 string title = parseBibTeX(data, "title");
319 string booktitle = parseBibTeX(data, "booktitle");
320 string chapter = parseBibTeX(data, "chapter");
321 string number = parseBibTeX(data, "number");
322 string volume = parseBibTeX(data, "volume");
323 string pages = parseBibTeX(data, "pages");
325 string media = parseBibTeX(data, "journal");
327 media = parseBibTeX(data, "publisher");
329 media = parseBibTeX(data, "school");
331 media = parseBibTeX(data, "institution");
333 ostringstream result;
335 result << author << ", ";
338 if (!booktitle.empty())
339 result << ", in " << booktitle;
340 if (!chapter.empty())
341 result << ", Ch. " << chapter;
343 result << ", " << media;
345 result << ", vol. " << volume;
347 result << ", no. " << number;
349 result << ", pp. " << pages;
351 result << ", " << year;
353 string const result_str = rtrim(result.str());
354 if (!result_str.empty())
357 // This should never happen (or at least be very unusual!)
364 // Escape special chars.
365 // All characters are literals except: '.|*?+(){}[]^$\'
366 // These characters are literals when preceded by a "\", which is done here
367 string const escape_special_chars(string const & expr)
369 // Search for all chars '.|*?+(){}[^$]\'
370 // Note that '[' and '\' must be escaped.
371 // This is a limitation of boost::regex, but all other chars in BREs
372 // are assumed literal.
373 boost::RegEx reg("[].|*?+(){}^$\\[\\\\]");
375 // $& is a perl-like expression that expands to all of the current match
376 // The '$' must be prefixed with the escape character '\' for
377 // boost to treat it as a literal.
378 // Thus, to prefix a matched expression with '\', we use:
379 return reg.Merge(expr, "\\\\$&");
383 // A functor for use with std::find_if, used to ascertain whether a
384 // data entry matches the required regex_
385 struct RegexMatch : public std::unary_function<string, bool>
387 // re and icase are used to construct an instance of boost::RegEx.
388 // if icase is true, then matching is insensitive to case
389 RegexMatch(InfoMap const & m, string const & re, bool icase)
390 : map_(m), regex_(re, icase) {}
392 bool operator()(string const & key) const {
396 // the data searched is the key + its associated BibTeX/biblio
399 InfoMap::const_iterator info = map_.find(key);
400 if (info != map_.end())
401 data += ' ' + info->second;
403 // Attempts to find a match for the current RE
404 // somewhere in data.
405 return regex_.Search(data);
408 bool validRE() const { return regex_.error_code() == 0; }
412 mutable boost::RegEx regex_;
418 vector<string>::const_iterator
419 searchKeys(InfoMap const & theMap,
420 vector<string> const & keys,
421 string const & search_expr,
422 vector<string>::const_iterator start,
427 // Preliminary checks
428 if (start < keys.begin() || start >= keys.end())
431 string expr = trim(search_expr);
436 // We must escape special chars in the search_expr so that
437 // it is treated as a simple string by boost::regex.
438 expr = escape_special_chars(expr);
440 // Build the functor that will be passed to find_if.
441 RegexMatch const match(theMap, expr, !caseSensitive);
442 if (!match.validRE())
445 // Search the vector of 'keys' from 'start' for one that matches the
446 // predicate 'match'. Searching can be forward or backward from start.
448 return std::find_if(start, keys.end(), match);
450 vector<string>::const_reverse_iterator rit(start);
451 vector<string>::const_reverse_iterator rend = keys.rend();
452 rit = std::find_if(rit, rend, match);
456 // This is correct and always safe.
457 // (See Meyer's Effective STL, Item 28.)
458 return (++rit).base();
462 string const parseBibTeX(string data, string const & findkey)
465 // at first we delete all characters right of '%' and
466 // replace tabs through a space and remove leading spaces
467 // we read the data line by line so that the \n are
471 string dummy = token(data,'\n', Entries);
472 while (!dummy.empty()) {
473 dummy = subst(dummy, '\t', ' '); // no tabs
474 dummy = ltrim(dummy); // no leading spaces
475 // ignore lines with a beginning '%' or ignore all right of %
476 string::size_type const idx =
477 dummy.empty() ? string::npos : dummy.find('%');
478 if (idx != string::npos)
479 dummy.erase(idx, string::npos);
480 // do we have a new token or a new line of
481 // the same one? In the first case we ignore
482 // the \n and in the second we replace it
484 if (!dummy.empty()) {
485 if (!contains(dummy, '='))
486 data_ += ' ' + dummy;
490 dummy = token(data, '\n', ++Entries);
493 // replace double commas with "" for easy scanning
494 data = subst(data_, ",,", "\"\"");
500 // now get only the important line of the bibtex entry.
501 // all entries are devided by ',' except the last one.
502 data += ','; // now we have same behaviour for all entries
503 // because the last one is "blah ... }"
506 // parsing of title and booktitle is different from the
507 // others, because booktitle contains title
509 dummy = token(data, ',', Entries++);
510 if (!dummy.empty()) {
511 found = contains(ascii_lowercase(dummy), findkey);
512 if (findkey == "title" &&
513 contains(ascii_lowercase(dummy), "booktitle"))
516 } while (!found && !dummy.empty());
521 // we are not sure, if we get all, because "key= "blah, blah" is
523 // Therefore we read all until the next "=" character, which follows a
526 dummy = token(data, ',', Entries++);
527 while (!contains(dummy, '=') && !dummy.empty()) {
528 keyvalue += ',' + dummy;
529 dummy = token(data, ',', Entries++);
532 // replace double "" with originals ,, (two commas)
533 // leaving us with the all-important line
534 data = subst(keyvalue, "\"\"", ",,");
539 // 2. if there is no opening '{' then a closing '{' is probably cruft.
540 if (!contains(data, '{'))
541 data = rtrim(data, "}");
542 // happens, when last keyword
543 string::size_type const idx =
544 !data.empty() ? data.find('=') : string::npos;
546 if (idx == string::npos)
549 data = trim(data.substr(idx));
551 if (data.length() < 2 || data[0] != '=') { // a valid entry?
554 // delete '=' and the following spaces
555 data = ltrim(data, " =");
556 if (data.length() < 2) {
557 return data; // not long enough to find delimiters
559 string::size_type keypos = 1;
561 if (data[0] == '{') {
563 } else if (data[0] == '"') {
566 // no {} and no "", pure data but with a
567 // possible ',' at the end
568 return rtrim(data, ",");
570 string tmp = data.substr(keypos);
571 while (tmp.find('{') != string::npos &&
572 tmp.find('}') != string::npos &&
573 tmp.find('{') < tmp.find('}') &&
574 tmp.find('{') < tmp.find(enclosing)) {
576 keypos += tmp.find('{') + 1;
577 tmp = data.substr(keypos);
578 keypos += tmp.find('}') + 1;
579 tmp = data.substr(keypos);
581 if (tmp.find(enclosing) == string::npos)
584 keypos += tmp.find(enclosing);
585 return data.substr(1, keypos - 1);
595 char const * const citeCommands[] = {
596 "cite", "citet", "citep", "citealt", "citealp", "citeauthor",
597 "citeyear", "citeyearpar" };
599 unsigned int const nCiteCommands =
600 sizeof(citeCommands) / sizeof(char *);
602 CiteStyle const citeStyles[] = {
603 CITE, CITET, CITEP, CITEALT, CITEALP,
604 CITEAUTHOR, CITEYEAR, CITEYEARPAR };
606 unsigned int const nCiteStyles =
607 sizeof(citeStyles) / sizeof(CiteStyle);
609 CiteStyle const citeStylesFull[] = {
610 CITET, CITEP, CITEALT, CITEALP, CITEAUTHOR };
612 unsigned int const nCiteStylesFull =
613 sizeof(citeStylesFull) / sizeof(CiteStyle);
615 CiteStyle const citeStylesUCase[] = {
616 CITET, CITEP, CITEALT, CITEALP, CITEAUTHOR };
618 unsigned int const nCiteStylesUCase =
619 sizeof(citeStylesUCase) / sizeof(CiteStyle);
624 CitationStyle::CitationStyle(string const & command)
625 : style(CITE), full(false), forceUCase(false)
630 string cmd = command;
636 string::size_type const n = cmd.size() - 1;
637 if (cmd != "cite" && cmd[n] == '*') {
639 cmd = cmd.substr(0,n);
642 char const * const * const last = citeCommands + nCiteCommands;
643 char const * const * const ptr = std::find(citeCommands, last, cmd);
646 size_t idx = ptr - citeCommands;
647 style = citeStyles[idx];
652 string const CitationStyle::asLatexStr() const
654 string cite = citeCommands[style];
656 CiteStyle const * last = citeStylesFull + nCiteStylesFull;
657 if (std::find(citeStylesFull, last, style) != last)
662 CiteStyle const * last = citeStylesUCase + nCiteStylesUCase;
663 if (std::find(citeStylesUCase, last, style) != last)
671 CiteEngine_enum getEngine(Buffer const & buffer)
673 return buffer.params().cite_engine;
677 vector<CiteStyle> const getCiteStyles(CiteEngine_enum const & engine)
679 unsigned int nStyles = 0;
680 unsigned int start = 0;
687 case ENGINE_NATBIB_AUTHORYEAR:
688 case ENGINE_NATBIB_NUMERICAL:
689 nStyles = nCiteStyles - 1;
693 nStyles = nCiteStyles;
698 typedef vector<CiteStyle> cite_vec;
700 cite_vec styles(nStyles);
701 cite_vec::size_type i = 0;
703 for (; i != styles.size(); ++i, ++j)
704 styles[i] = citeStyles[j];
711 getNumericalStrings(string const & key,
712 InfoMap const & map, vector<CiteStyle> const & styles)
715 return vector<string>();
718 string const author = getAbbreviatedAuthor(map, key);
719 string const year = getYear(map, key);
720 if (author.empty() || year.empty())
721 return vector<string>();
723 vector<string> vec(styles.size());
724 for (vector<string>::size_type i = 0; i != vec.size(); ++i) {
734 str = author + " [#ID]";
738 str = author + " #ID";
754 str = '(' + year + ')';
766 getAuthorYearStrings(string const & key,
767 InfoMap const & map, vector<CiteStyle> const & styles)
770 return vector<string>();
773 string const author = getAbbreviatedAuthor(map, key);
774 string const year = getYear(map, key);
775 if (author.empty() || year.empty())
776 return vector<string>();
778 vector<string> vec(styles.size());
779 for (vector<string>::size_type i = 0; i != vec.size(); ++i) {
784 // jurabib only: Author/Annotator
785 // (i.e. the "before" field, 2nd opt arg)
786 str = author + "/<" + _("before") + '>';
790 str = author + " (" + year + ')';
794 str = '(' + author + ", " + year + ')';
798 str = author + ' ' + year ;
802 str = author + ", " + year ;
814 str = '(' + year + ')';
824 } // namespace biblio