3 * This file is part of LyX, the document processor.
4 * Licence details can be found in the file COPYING.
6 * \author Angus Leeming
8 * \author Richard Kimberly Heck
10 * \author Jürgen Spitzmüller
12 * Full author contact details are available in file CREDITS.
17 #include "BiblioInfo.h"
20 #include "BufferParams.h"
24 #include "TextClass.h"
25 #include "TocBackend.h"
28 #include "support/convert.h"
29 #include "support/debug.h"
30 #include "support/docstream.h"
31 #include "support/FileName.h"
32 #include "support/gettext.h"
33 #include "support/lassert.h"
34 #include "support/lstrings.h"
35 #include "support/textutils.h"
42 using namespace lyx::support;
49 // Remove placeholders from names
50 docstring renormalize(docstring const & input)
52 docstring res = subst(input, from_ascii("$$space!"), from_ascii(" "));
53 return subst(res, from_ascii("$$comma!"), from_ascii(","));
57 // Split the surname into prefix ("von-part") and family name
58 pair<docstring, docstring> parseSurname(docstring const & sname)
60 // Split the surname into its tokens
61 vector<docstring> pieces = getVectorFromString(sname, from_ascii(" "));
62 if (pieces.size() < 2)
63 return make_pair(docstring(), sname);
65 // Now we look for pieces that begin with a lower case letter.
66 // All except for the very last token constitute the "von-part".
68 vector<docstring>::const_iterator it = pieces.begin();
69 vector<docstring>::const_iterator const en = pieces.end();
71 for (; it != en; ++it) {
74 // If this is the last piece, then what we now have is
75 // the family name, notwithstanding the casing.
78 char_type const c = (*it)[0];
79 // If the piece starts with a upper case char, we assume
80 // this is part of the surname.
83 // Nothing of the former, so add this piece to the prename
91 // Reconstruct the family name.
92 // Note that if we left the loop with because it + 1 == en,
93 // then this will still do the right thing, i.e., make surname
94 // just be the last piece.
97 for (; it != en; ++it) {
104 return make_pair(prefix, surname);
116 // gets the name parts (prename, surname, prefix, suffix) from an author-type string
117 name_parts nameParts(docstring const & iname)
123 // First we check for goupings (via {...}) and replace blanks and
124 // commas inside groups with temporary placeholders
127 docstring::const_iterator p = iname.begin();
128 while (p != iname.end()) {
129 // count grouping level
134 // generate string with probable placeholders
135 if (*p == ' ' && gl > 0)
136 name += from_ascii("$$space!");
137 else if (*p == ',' && gl > 0)
138 name += from_ascii("$$comma!");
144 // Now we look for a comma, and take the last name to be everything
145 // preceding the right-most one, so that we also get the name suffix
147 vector<docstring> pieces = getVectorFromString(name);
148 if (pieces.size() > 1) {
149 // Whether we have a name suffix or not, the prename is
151 res.prename = renormalize(pieces.back());
152 // The family name, conversely, is always the first item.
153 // However, it might contain a prefix (aka "von" part)
154 docstring const sname = pieces.front();
155 res.prefix = renormalize(parseSurname(sname).first);
156 res.surname = renormalize(parseSurname(sname).second);
157 // If we have three pieces (the maximum allowed by BibTeX),
158 // the second one is the name suffix.
159 if (pieces.size() > 2)
160 res.suffix = renormalize(pieces.at(1));
164 // OK, so now we want to look for the last name.
165 // Split on spaces, to get various tokens.
166 pieces = getVectorFromString(name, from_ascii(" "));
167 // No space: Only a family name given
168 if (pieces.size() < 2) {
169 res.surname = renormalize(pieces.back());
172 // If we get two pieces, assume "prename surname"
173 if (pieces.size() == 2) {
174 res.prename = renormalize(pieces.front());
175 res.surname = renormalize(pieces.back());
179 // More than 3 pieces: A name prefix (aka "von" part) might be included.
180 // We look for the first piece that begins with a lower case letter
181 // (which is the name prefix, if it is not the last token) or the last token.
183 vector<docstring>::const_iterator it = pieces.begin();
184 vector<docstring>::const_iterator const en = pieces.end();
186 for (; it != en; ++it) {
189 char_type const c = (*it)[0];
190 // If the piece starts with a lower case char, we assume
191 // this is the name prefix and thus prename is complete.
194 // Same if this is the last piece, which is always the surname.
197 // Nothing of the former, so add this piece to the prename
205 // Now reconstruct the family name and strip the prefix.
206 // Note that if we left the loop because it + 1 == en,
207 // then this will still do the right thing, i.e., make surname
208 // just be the last piece.
211 for (; it != en; ++it) {
218 res.prename = renormalize(prename);
219 res.prefix = renormalize(parseSurname(surname).first);
220 res.surname = renormalize(parseSurname(surname).second);
225 docstring constructName(docstring const & name, string const & scheme)
227 // re-constructs a name from name parts according
229 docstring const prename = nameParts(name).prename;
230 docstring const surname = nameParts(name).surname;
231 docstring const prefix = nameParts(name).prefix;
232 docstring const suffix = nameParts(name).suffix;
234 static regex const reg1("(.*)(\\{%prename%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)");
235 static regex const reg2("(.*)(\\{%suffix%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)");
236 static regex const reg3("(.*)(\\{%prefix%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)");
238 // Changing the first parameter of regex_match() may corrupt the
239 // second one. In this case we use the temporary string tmp.
240 if (regex_match(scheme, sub, reg1)) {
242 if (!prename.empty())
246 if (regex_match(res, sub, reg2)) {
247 string tmp = sub.str(1);
250 res = tmp + sub.str(5);
252 if (regex_match(res, sub, reg3)) {
253 string tmp = sub.str(1);
256 res = tmp + sub.str(5);
258 docstring result = from_ascii(res);
259 result = subst(result, from_ascii("%prename%"), prename);
260 result = subst(result, from_ascii("%surname%"), surname);
261 result = subst(result, from_ascii("%prefix%"), prefix);
262 result = subst(result, from_ascii("%suffix%"), suffix);
267 vector<docstring> const getAuthors(docstring const & author)
269 // We check for goupings (via {...}) and only consider " and "
270 // outside groups as author separator. This is to account
271 // for cases such as {{Barnes and Noble, Inc.}}, which
272 // need to be treated as one single family name.
273 // We use temporary placeholders in order to differentiate the
274 // diverse " and " cases.
276 // First, we temporarily replace all ampersands. It is rather unusual
277 // in author names, but can happen (consider cases such as "C \& A Corp.").
278 docstring iname = subst(author, from_ascii("&"), from_ascii("$$amp!"));
279 // Then, we temporarily make all " and " strings to ampersands in order
280 // to handle them later on a per-char level. Note that arbitrary casing
281 // ("And", "AND", "aNd", ...) is allowed in bibtex (#10465).
282 static regex const and_reg("(.* )([aA][nN][dD])( .*)");
284 string res = to_utf8(iname);
285 while (regex_match(res, sub, and_reg))
286 res = sub.str(1) + "&" + sub.str(3);
287 iname = from_utf8(res);
288 // Now we traverse through the string and replace the "&" by the proper
289 // output in- and outside groups
292 docstring::const_iterator p = iname.begin();
293 while (p != iname.end()) {
294 // count grouping level
299 // generate string with probable placeholders
302 // Inside groups, we output "and"
303 name += from_ascii("and");
305 // Outside groups, we output a separator
306 name += from_ascii("$$namesep!");
313 // re-insert the literal ampersands
314 name = subst(name, from_ascii("$$amp!"), from_ascii("&"));
316 // Now construct the actual vector
317 return getVectorFromString(name, from_ascii(" $$namesep! "));
321 bool multipleAuthors(docstring const & author)
323 return getAuthors(author).size() > 1;
327 // converts a string containing LaTeX commands into unicode
329 docstring convertLaTeXCommands(docstring const & str)
334 bool scanning_cmd = false;
335 bool scanning_math = false;
336 bool is_section = false;
337 bool escaped = false; // used to catch \$, etc.
338 while (!val.empty()) {
339 char_type const ch = val[0];
341 // if we're scanning math, we output everything until we
342 // find an unescaped $, at which point we break out.
349 scanning_math = false;
355 // if we're scanning a command name, then we just
356 // discard characters until we hit something that
359 if (!is_section && ch == 'S') {
364 if (isAlphaASCII(ch)) {
369 } else if (is_section) {
370 ret.push_back(0x00a7);
374 // so we're done with this command.
375 // now we fall through and check this character.
377 scanning_cmd = false;
380 // was the last character a \? If so, then this is something like:
381 // \\ or \$, so we'll just output it. That's probably not always right...
383 // exception: output \, as THIN SPACE
385 ret.push_back(0x2009);
394 ret += char_type(0x00a0);
402 scanning_math = true;
406 // Change text mode accents in the form
407 // {\v a} to \v{a} (see #9340).
408 // FIXME: This is a sort of mini-tex2lyx.
409 // Use the real tex2lyx instead!
410 static regex const tma_reg("^\\{\\\\[bcCdfGhHkrtuUv]\\s\\w\\}");
411 if (regex_search(to_utf8(val), tma_reg)) {
413 val.replace(2, 1, from_ascii("{"));
417 // Apart from the above, we just ignore braces
418 if (ch == '{' || ch == '}') {
423 // we're going to check things that look like commands, so if
424 // this doesn't, just output it.
431 // ok, could be a command of some sort
432 // let's see if it corresponds to some unicode
433 // unicodesymbols has things in the form: \"{u},
434 // whereas we may see things like: \"u. So we'll
435 // look for that and change it, if necessary.
436 // FIXME: This is a sort of mini-tex2lyx.
437 // Use the real tex2lyx instead!
438 static regex const reg("^\\\\\\W\\w");
439 if (regex_search(to_utf8(val), reg)) {
440 val.insert(3, from_ascii("}"));
441 val.insert(2, from_ascii("{"));
445 docstring const cnvtd = Encodings::fromLaTeXCommand(val,
446 Encodings::TEXT_CMD, termination, rem);
447 if (!cnvtd.empty()) {
448 // it did, so we'll take that bit and proceed with what's left
453 // it's a command of some sort
462 // Escape '<' and '>' and remove richtext markers (e.g. {!this is richtext!}) from a string.
463 docstring processRichtext(docstring const & str, bool richtext)
468 bool scanning_rich = false;
469 while (!val.empty()) {
470 char_type const ch = val[0];
471 if (ch == '{' && val.size() > 1 && val[1] == '!') {
472 // beginning of rich text
473 scanning_rich = true;
477 if (scanning_rich && ch == '!' && val.size() > 1 && val[1] == '}') {
479 scanning_rich = false;
487 // we need to escape '<' and '>'
495 } else if (!scanning_rich /* && !richtext */)
497 // else the character is discarded, which will happen only if
498 // richtext == false and we are scanning rich text
507 //////////////////////////////////////////////////////////////////////
511 //////////////////////////////////////////////////////////////////////
513 BibTeXInfo::BibTeXInfo(docstring const & key, docstring const & type)
514 : is_bibtex_(true), bib_key_(key), num_bib_key_(0), entry_type_(type),
515 info_(), format_(), modifier_(0)
520 docstring const BibTeXInfo::getAuthorOrEditorList(Buffer const * buf,
521 bool full, bool forceshort) const
523 docstring author = operator[]("author");
525 author = operator[]("editor");
527 return getAuthorList(buf, author, full, forceshort);
531 docstring const BibTeXInfo::getAuthorList(Buffer const * buf,
532 docstring const & author, bool const full, bool const forceshort,
533 bool const allnames, bool const beginning) const
535 // Maxnames treshold depend on engine
536 size_t maxnames = buf ?
537 buf->params().documentClass().max_citenames() : 2;
540 docstring const opt = label();
545 docstring const remainder = trim(split(opt, authors, '('));
546 if (remainder.empty())
547 // in this case, we didn't find a "(",
548 // so we don't have author (year)
551 // Natbib syntax is "Jones et al.(1990)Jones, Baker, and Williams"
552 docstring const fullauthors = trim(rsplit(remainder, ')'));
553 if (!fullauthors.empty())
562 // OK, we've got some names. Let's format them.
563 // Try to split the author list
564 vector<docstring> const authors = getAuthors(author);
568 CiteEngineType const engine_type = buf ? buf->params().citeEngineType()
569 : ENGINE_TYPE_DEFAULT;
571 // These are defined in the styles
573 buf ? buf->params().documentClass().getCiteMacro(engine_type, "B_etal")
575 string const namesep =
576 buf ? buf->params().documentClass().getCiteMacro(engine_type, "B_namesep")
578 string const lastnamesep =
579 buf ? buf->params().documentClass().getCiteMacro(engine_type, "B_lastnamesep")
581 string const pairnamesep =
582 buf ? buf->params().documentClass().getCiteMacro(engine_type, "B_pairnamesep")
584 string firstnameform =
585 buf ? buf->params().documentClass().getCiteMacro(engine_type, "!firstnameform")
586 : "{%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}{%prename%[[, %prename%]]}";
588 firstnameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!firstbynameform")
589 : "%prename% {%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}";
590 string othernameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!othernameform")
591 : "{%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}{%prename%[[, %prename%]]}";
593 othernameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!otherbynameform")
594 : "%prename% {%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}";
595 string citenameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!citenameform")
596 : "{%prefix%[[%prefix% ]]}%surname%";
598 // Shorten the list (with et al.) if forceshort is set
599 // and the list can actually be shortened, else if maxcitenames
600 // is passed and full is not set.
601 bool shorten = forceshort && authors.size() > 1;
602 vector<docstring>::const_iterator it = authors.begin();
603 vector<docstring>::const_iterator en = authors.end();
604 for (size_t i = 0; it != en; ++it, ++i) {
605 if (i >= maxnames && !full) {
609 if (*it == "others") {
610 retval += buf ? buf->B_(etal) : from_ascii(etal);
613 if (i > 0 && i == authors.size() - 1) {
614 if (authors.size() == 2)
615 retval += buf ? buf->B_(pairnamesep) : from_ascii(pairnamesep);
617 retval += buf ? buf->B_(lastnamesep) : from_ascii(lastnamesep);
619 retval += buf ? buf->B_(namesep) : from_ascii(namesep);
621 retval += (i == 0) ? constructName(*it, firstnameform)
622 : constructName(*it, othernameform);
624 retval += constructName(*it, citenameform);
628 retval = constructName(authors[0], firstnameform) + (buf ? buf->B_(etal) : from_ascii(etal));
630 retval = constructName(authors[0], citenameform) + (buf ? buf->B_(etal) : from_ascii(etal));
633 return convertLaTeXCommands(retval);
637 docstring const BibTeXInfo::getYear() const
640 // first try legacy year field
641 docstring year = operator[]("year");
644 // now try biblatex's date field
645 year = operator[]("date");
646 // Format is [-]YYYY-MM-DD*/[-]YYYY-MM-DD*
647 // We only want the years.
648 static regex const yreg("[-]?([\\d]{4}).*");
649 static regex const ereg(".*/[-]?([\\d]{4}).*");
651 string const date = to_utf8(year);
652 if (!regex_match(date, sm, yreg))
653 // cannot parse year.
655 year = from_ascii(sm[1]);
656 // check for an endyear
657 if (regex_match(date, sm, ereg))
658 year += char_type(0x2013) + from_ascii(sm[1]);
662 docstring const opt = label();
667 docstring tmp = split(opt, authors, '(');
669 // we don't have author (year)
672 tmp = split(tmp, year, ')');
677 void BibTeXInfo::getLocators(docstring & doi, docstring & url, docstring & file) const
680 // get "doi" entry from citation record
681 doi = operator[]("doi");
682 if (!doi.empty() && !prefixIs(doi,from_ascii("http")))
683 doi = "https://doi.org/" + doi;
684 // get "url" entry from citation record
685 url = operator[]("url");
686 // get "file" entry from citation record
687 file = operator[]("file");
689 // Jabref case, field has a format:
690 // Description:Location:Filetype;Description:Location:Filetype...
691 // We will grab only first pdf
693 docstring ret, filedest, tmp;
694 ret = split(file, tmp, ':');
695 tmp = split(ret, filedest, ':');
696 //TODO howto deal with relative directories?
697 FileName f(to_utf8(filedest));
699 file = "file:///" + filedest;
702 // kbibtex case, format:
703 // file1.pdf;file2.pdf
704 // We will grab only first pdf
707 kfile = operator[]("localfile");
708 if (!kfile.empty()) {
709 docstring filedest, tmp;
710 tmp = split(kfile, filedest, ';');
711 //TODO howto deal with relative directories?
712 FileName f(to_utf8(filedest));
714 file = "file:///" + filedest;
720 // try biblatex specific fields, see its manual
721 // 3.13.7 "Electronic Publishing Informationl"
722 docstring eprinttype = operator[]("eprinttype");
723 docstring eprint = operator[]("eprint");
727 if (eprinttype == "arxiv")
728 url = "https://arxiv.org/abs/" + eprint;
729 if (eprinttype == "jstor")
730 url = "https://www.jstor.org/stable/" + eprint;
731 if (eprinttype == "pubmed")
732 url = "http://www.ncbi.nlm.nih.gov/pubmed/" + eprint;
733 if (eprinttype == "hdl")
734 url = "https://hdl.handle.net/" + eprint;
735 if (eprinttype == "googlebooks")
736 url = "http://books.google.com/books?id=" + eprint;
741 // Here can be handled the bibliography environment. All one could do
742 // here is let LyX scan the entry for URL or HRef insets.
748 docstring parseOptions(docstring const & format, string & optkey,
749 docstring & ifpart, docstring & elsepart);
751 // Calls parseOptions to deal with an embedded option, such as:
752 // {%number%[[, no.~%number%]]}
753 // which must appear at the start of format. ifelsepart gets the
754 // whole of the option, and we return what's left after the option.
755 // we return format if there is an error.
756 docstring parseEmbeddedOption(docstring const & format, docstring & ifelsepart)
758 LASSERT(format[0] == '{' && format[1] == '%', return format);
762 docstring const rest = parseOptions(format, optkey, ifpart, elsepart);
763 if (format == rest) { // parse error
764 LYXERR0("ERROR! Couldn't parse `" << format <<"'.");
767 LASSERT(rest.size() <= format.size(),
768 { ifelsepart = docstring(); return format; });
769 ifelsepart = format.substr(0, format.size() - rest.size());
774 // Gets a "clause" from a format string, where the clause is
775 // delimited by '[[' and ']]'. Returns what is left after the
776 // clause is removed, and returns format if there is an error.
777 docstring getClause(docstring const & format, docstring & clause)
779 docstring fmt = format;
782 // we'll remove characters from the front of fmt as we
784 while (!fmt.empty()) {
785 if (fmt[0] == ']' && fmt.size() > 1 && fmt[1] == ']') {
790 // check for an embedded option
791 if (fmt[0] == '{' && fmt.size() > 1 && fmt[1] == '%') {
793 docstring const rest = parseEmbeddedOption(fmt, part);
795 LYXERR0("ERROR! Couldn't parse embedded option in `" << format <<"'.");
800 } else { // it's just a normal character
809 // parse an options string, which must appear at the start of the
810 // format parameter. puts the parsed bits in optkey, ifpart, and
811 // elsepart and returns what's left after the option is removed.
812 // if there's an error, it returns format itself.
813 docstring parseOptions(docstring const & format, string & optkey,
814 docstring & ifpart, docstring & elsepart)
816 LASSERT(format[0] == '{' && format[1] == '%', return format);
818 docstring fmt = format.substr(2);
819 size_t pos = fmt.find('%'); // end of key
820 if (pos == string::npos) {
821 LYXERR0("Error parsing `" << format <<"'. Can't find end of key.");
824 optkey = to_utf8(fmt.substr(0, pos));
825 fmt = fmt.substr(pos + 1);
826 // [[format]] should be next
827 if (fmt[0] != '[' || fmt[1] != '[') {
828 LYXERR0("Error parsing `" << format <<"'. Can't find '[[' after key.");
832 docstring curfmt = fmt;
833 fmt = getClause(curfmt, ifpart);
835 LYXERR0("Error parsing `" << format <<"'. Couldn't get if clause.");
839 if (fmt[0] == '}') // we're done, no else clause
840 return fmt.substr(1);
842 // else part should follow
843 if (fmt[0] != '[' || fmt[1] != '[') {
844 LYXERR0("Error parsing `" << format <<"'. Can't find else clause.");
849 fmt = getClause(curfmt, elsepart);
851 if (fmt == curfmt || fmt[0] != '}') {
852 LYXERR0("Error parsing `" << format <<"'. Can't find end of option.");
855 return fmt.substr(1);
862 Bug #9131 revealed an oddity in how we are generating citation information
863 when more than one key is given. We end up building a longer and longer format
864 string as we go, which we then have to re-parse, over and over and over again,
865 rather than generating the information for the individual keys and then putting
866 all of that together. We do that to deal with the way separators work, from what
867 I can tell, but it still feels like a hack. Fixing this would require quite a
868 bit of work, however.
870 docstring BibTeXInfo::expandFormat(docstring const & format,
871 BibTeXInfoList const & xrefs, int & counter, Buffer const & buf,
872 CiteItem const & ci, bool next, bool second) const
874 // incorrect use of macros could put us in an infinite loop
875 static int const max_passes = 5000;
876 // the use of overly large keys can lead to performance problems, due
877 // to eventual attempts to convert LaTeX macros to unicode. See bug
878 // #8944. By default, the size is limited to 128 (in CiteItem), but
879 // for specific purposes (such as XHTML export), it needs to be enlarged
880 // This is perhaps not the best solution, but it will have to do for now.
881 size_t const max_keysize = ci.max_key_size;
882 odocstringstream ret; // return value
884 bool scanning_key = false;
885 bool scanning_rich = false;
887 CiteEngineType const engine_type = buf.params().citeEngineType();
888 docstring fmt = format;
889 // we'll remove characters from the front of fmt as we
891 while (!fmt.empty()) {
892 if (counter > max_passes) {
893 LYXERR0("Recursion limit reached while parsing `"
898 char_type thischar = fmt[0];
899 if (thischar == '%') {
900 // beginning or end of key
903 scanning_key = false;
904 // so we replace the key with its value, which may be empty
908 buf.params().documentClass().getCiteMacro(engine_type, key);
909 fmt = from_utf8(val) + fmt.substr(1);
912 } else if (prefixIs(key, "B_")) {
913 // a translatable bit (to the Buffer language)
915 buf.params().documentClass().getCiteMacro(engine_type, key);
916 docstring const trans =
917 translateIfPossible(from_utf8(val), buf.params().language->code());
919 } else if (key[0] == '_') {
920 // a translatable bit (to the GUI language)
922 buf.params().documentClass().getCiteMacro(engine_type, key);
923 docstring const trans =
924 translateIfPossible(from_utf8(val));
927 docstring const val =
928 getValueForKey(key, buf, ci, xrefs, max_keysize);
930 ret << from_ascii("{!<span class=\"bib-" + key + "\">!}");
933 ret << from_ascii("{!</span>!}");
941 else if (thischar == '{') {
942 // beginning of option?
944 LYXERR0("ERROR: Found `{' when scanning key in `" << format << "'.");
947 if (fmt.size() > 1) {
949 // it is the beginning of an optional format
953 docstring const newfmt =
954 parseOptions(fmt, optkey, ifpart, elsepart);
955 if (newfmt == fmt) // parse error
958 docstring const val =
959 getValueForKey(optkey, buf, ci, xrefs);
960 if (optkey == "next" && next)
961 ret << ifpart; // without expansion
962 else if (optkey == "second" && second) {
964 ret << expandFormat(ifpart, xrefs, newcounter, buf,
966 } else if (!val.empty()) {
968 ret << expandFormat(ifpart, xrefs, newcounter, buf,
970 } else if (!elsepart.empty()) {
972 ret << expandFormat(elsepart, xrefs, newcounter, buf,
975 // fmt will have been shortened for us already
979 // beginning of rich text
980 scanning_rich = true;
982 ret << from_ascii("{!");
986 // we are here if '{' was not followed by % or !.
987 // So it's just a character.
990 else if (scanning_rich && thischar == '!'
991 && fmt.size() > 1 && fmt[1] == '}') {
993 scanning_rich = false;
995 ret << from_ascii("!}");
998 else if (scanning_key)
999 key += char(thischar);
1003 } catch (EncodingException & /* e */) {
1004 LYXERR0("Uncodable character '" << docstring(1, thischar) << " in citation label!");
1007 fmt = fmt.substr(1);
1010 LYXERR0("Never found end of key in `" << format << "'!");
1013 if (scanning_rich) {
1014 LYXERR0("Never found end of rich text in `" << format << "'!");
1021 docstring const & BibTeXInfo::getInfo(BibTeXInfoList const & xrefs,
1022 Buffer const & buf, CiteItem const & ci, docstring const & format_in) const
1024 bool const richtext = ci.richtext;
1026 CiteEngineType const engine_type = buf.params().citeEngineType();
1027 DocumentClass const & dc = buf.params().documentClass();
1028 docstring const & format = format_in.empty()?
1029 from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_)))
1032 if (format != format_) {
1033 // clear caches since format changed
1035 info_richtext_.clear();
1039 if (!richtext && !info_.empty()) {
1040 info_ = convertLaTeXCommands(processRichtext(info_, false));
1043 if (richtext && !info_richtext_.empty())
1044 return info_richtext_;
1047 BibTeXInfo::const_iterator it = find(from_ascii("ref"));
1053 info_ = expandFormat(format, xrefs, counter, buf,
1056 if (info_.empty()) {
1057 // this probably shouldn't happen
1062 info_richtext_ = convertLaTeXCommands(processRichtext(info_, true));
1063 return info_richtext_;
1066 info_ = convertLaTeXCommands(processRichtext(info_, false));
1071 docstring const BibTeXInfo::getLabel(BibTeXInfoList const & xrefs,
1072 Buffer const & buf, docstring const & format,
1073 CiteItem const & ci, bool next, bool second) const
1078 loclabel = expandFormat(format, xrefs, counter, buf, ci, next, second);
1080 if (!loclabel.empty() && !next) {
1081 loclabel = processRichtext(loclabel, ci.richtext);
1082 loclabel = convertLaTeXCommands(loclabel);
1089 docstring const & BibTeXInfo::operator[](docstring const & field) const
1091 BibTeXInfo::const_iterator it = find(field);
1094 static docstring const empty_value = docstring();
1099 docstring const & BibTeXInfo::operator[](string const & field) const
1101 return operator[](from_ascii(field));
1105 docstring BibTeXInfo::getValueForKey(string const & oldkey, Buffer const & buf,
1106 CiteItem const & ci, BibTeXInfoList const & xrefs, size_t maxsize) const
1108 // anything less is pointless
1109 LASSERT(maxsize >= 16, maxsize = 16);
1110 string key = oldkey;
1111 bool cleanit = false;
1112 if (prefixIs(oldkey, "clean:")) {
1113 key = oldkey.substr(6);
1117 docstring ret = operator[](key);
1118 if (ret.empty() && !xrefs.empty()) {
1119 // xr is a (reference to a) BibTeXInfo const *
1120 for (auto const & xr : xrefs) {
1121 if (xr && !(*xr)[key].empty()) {
1128 // some special keys
1129 // FIXME: dialog, textbefore and textafter have nothing to do with this
1130 if (key == "dialog" && ci.context == CiteItem::Dialog)
1131 ret = from_ascii("x"); // any non-empty string will do
1132 else if (key == "export" && ci.context == CiteItem::Export)
1133 ret = from_ascii("x"); // any non-empty string will do
1134 else if (key == "ifstar" && ci.Starred)
1135 ret = from_ascii("x"); // any non-empty string will do
1136 else if (key == "ifqualified" && ci.isQualified)
1137 ret = from_ascii("x"); // any non-empty string will do
1138 else if (key == "entrytype")
1140 else if (prefixIs(key, "ifentrytype:")
1141 && from_ascii(key.substr(12)) == entry_type_)
1142 ret = from_ascii("x"); // any non-empty string will do
1143 else if (key == "key")
1145 else if (key == "label")
1147 else if (key == "modifier" && modifier_ != 0)
1149 else if (key == "numericallabel")
1151 else if (prefixIs(key, "ifmultiple:")) {
1152 // Return whether we have multiple authors
1153 docstring const kind = operator[](from_ascii(key.substr(11)));
1154 if (multipleAuthors(kind))
1155 ret = from_ascii("x"); // any non-empty string will do
1157 else if (prefixIs(key, "abbrvnames:")) {
1158 // Special key to provide abbreviated name list,
1159 // with respect to maxcitenames. Suitable for Bibliography
1161 docstring const kind = operator[](from_ascii(key.substr(11)));
1162 ret = getAuthorList(&buf, kind, false, false, true);
1163 if (ci.forceUpperCase && isLowerCase(ret[0]))
1164 ret[0] = uppercase(ret[0]);
1165 } else if (prefixIs(key, "fullnames:")) {
1166 // Return a full name list. Suitable for Bibliography
1168 docstring const kind = operator[](from_ascii(key.substr(10)));
1169 ret = getAuthorList(&buf, kind, true, false, true);
1170 if (ci.forceUpperCase && isLowerCase(ret[0]))
1171 ret[0] = uppercase(ret[0]);
1172 } else if (prefixIs(key, "forceabbrvnames:")) {
1173 // Special key to provide abbreviated name lists,
1174 // irrespective of maxcitenames. Suitable for Bibliography
1176 docstring const kind = operator[](from_ascii(key.substr(15)));
1177 ret = getAuthorList(&buf, kind, false, true, true);
1178 if (ci.forceUpperCase && isLowerCase(ret[0]))
1179 ret[0] = uppercase(ret[0]);
1180 } else if (prefixIs(key, "abbrvbynames:")) {
1181 // Special key to provide abbreviated name list,
1182 // with respect to maxcitenames. Suitable for further names inside a
1183 // bibliography item // (such as "ed. by ...")
1184 docstring const kind = operator[](from_ascii(key.substr(11)));
1185 ret = getAuthorList(&buf, kind, false, false, true, false);
1186 if (ci.forceUpperCase && isLowerCase(ret[0]))
1187 ret[0] = uppercase(ret[0]);
1188 } else if (prefixIs(key, "fullbynames:")) {
1189 // Return a full name list. Suitable for further names inside a
1190 // bibliography item // (such as "ed. by ...")
1191 docstring const kind = operator[](from_ascii(key.substr(10)));
1192 ret = getAuthorList(&buf, kind, true, false, true, false);
1193 if (ci.forceUpperCase && isLowerCase(ret[0]))
1194 ret[0] = uppercase(ret[0]);
1195 } else if (prefixIs(key, "forceabbrvbynames:")) {
1196 // Special key to provide abbreviated name lists,
1197 // irrespective of maxcitenames. Suitable for further names inside a
1198 // bibliography item // (such as "ed. by ...")
1199 docstring const kind = operator[](from_ascii(key.substr(15)));
1200 ret = getAuthorList(&buf, kind, false, true, true, false);
1201 if (ci.forceUpperCase && isLowerCase(ret[0]))
1202 ret[0] = uppercase(ret[0]);
1203 } else if (key == "abbrvciteauthor") {
1204 // Special key to provide abbreviated author or
1205 // editor names (suitable for citation labels),
1206 // with respect to maxcitenames.
1207 ret = getAuthorOrEditorList(&buf, false, false);
1208 if (ci.forceUpperCase && isLowerCase(ret[0]))
1209 ret[0] = uppercase(ret[0]);
1210 } else if (key == "fullciteauthor") {
1211 // Return a full author or editor list (for citation labels)
1212 ret = getAuthorOrEditorList(&buf, true, false);
1213 if (ci.forceUpperCase && isLowerCase(ret[0]))
1214 ret[0] = uppercase(ret[0]);
1215 } else if (key == "forceabbrvciteauthor") {
1216 // Special key to provide abbreviated author or
1217 // editor names (suitable for citation labels),
1218 // irrespective of maxcitenames.
1219 ret = getAuthorOrEditorList(&buf, false, true);
1220 if (ci.forceUpperCase && isLowerCase(ret[0]))
1221 ret[0] = uppercase(ret[0]);
1222 } else if (key == "bibentry") {
1223 // Special key to provide the full bibliography entry: see getInfo()
1224 CiteEngineType const engine_type = buf.params().citeEngineType();
1225 DocumentClass const & dc = buf.params().documentClass();
1226 docstring const & format =
1227 from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_), false));
1229 ret = expandFormat(format, xrefs, counter, buf, ci, false, false);
1230 } else if (key == "textbefore")
1231 ret = ci.textBefore;
1232 else if (key == "textafter")
1234 else if (key == "curpretext") {
1235 vector<pair<docstring, docstring>> pres = ci.getPretexts();
1236 vector<pair<docstring, docstring>>::iterator it = pres.begin();
1238 for (; it != pres.end() ; ++it) {
1239 if ((*it).first == bib_key_ && numkey == num_bib_key_) {
1244 if ((*it).first == bib_key_)
1247 } else if (key == "curposttext") {
1248 vector<pair<docstring, docstring>> posts = ci.getPosttexts();
1249 vector<pair<docstring, docstring>>::iterator it = posts.begin();
1251 for (; it != posts.end() ; ++it) {
1252 if ((*it).first == bib_key_ && numkey == num_bib_key_) {
1257 if ((*it).first == bib_key_)
1260 } else if (key == "year")
1265 ret = xml::cleanAttr(ret);
1267 // make sure it is not too big
1268 support::truncateWithEllipsis(ret, maxsize);
1273 //////////////////////////////////////////////////////////////////////
1277 //////////////////////////////////////////////////////////////////////
1281 // A functor for use with sort, leading to case insensitive sorting
1282 bool compareNoCase(const docstring & a, const docstring & b) {
1283 return compare_no_case(a, b) < 0;
1289 vector<docstring> const BiblioInfo::getXRefs(BibTeXInfo const & data, bool const nested) const
1291 vector<docstring> result;
1292 if (!data.isBibTeX())
1294 // Legacy crossref field. This is not nestable.
1295 if (!nested && !data["crossref"].empty()) {
1296 docstring const xrefkey = data["crossref"];
1297 result.push_back(xrefkey);
1298 // However, check for nested xdatas
1299 BiblioInfo::const_iterator it = find(xrefkey);
1301 BibTeXInfo const & xref = it->second;
1302 vector<docstring> const nxdata = getXRefs(xref, true);
1303 if (!nxdata.empty())
1304 result.insert(result.end(), nxdata.begin(), nxdata.end());
1307 // Biblatex's xdata field. Infinitely nestable.
1308 // XData field can consist of a comma-separated list of keys
1309 vector<docstring> const xdatakeys = getVectorFromString(data["xdata"]);
1310 if (!xdatakeys.empty()) {
1311 for (auto const & xdatakey : xdatakeys) {
1312 result.push_back(xdatakey);
1313 BiblioInfo::const_iterator it = find(xdatakey);
1315 BibTeXInfo const & xdata = it->second;
1316 vector<docstring> const nxdata = getXRefs(xdata, true);
1317 if (!nxdata.empty())
1318 result.insert(result.end(), nxdata.begin(), nxdata.end());
1326 vector<docstring> const BiblioInfo::getKeys() const
1328 vector<docstring> bibkeys;
1329 for (auto const & bi : *this)
1330 bibkeys.push_back(bi.first);
1331 sort(bibkeys.begin(), bibkeys.end(), &compareNoCase);
1336 vector<docstring> const BiblioInfo::getFields() const
1338 vector<docstring> bibfields;
1339 for (auto const & fn : field_names_)
1340 bibfields.push_back(fn);
1341 sort(bibfields.begin(), bibfields.end());
1346 vector<docstring> const BiblioInfo::getEntries() const
1348 vector<docstring> bibentries;
1349 for (auto const & et : entry_types_)
1350 bibentries.push_back(et);
1351 sort(bibentries.begin(), bibentries.end());
1356 docstring const BiblioInfo::getAuthorOrEditorList(docstring const & key, Buffer const & buf) const
1358 BiblioInfo::const_iterator it = find(key);
1361 BibTeXInfo const & data = it->second;
1362 return data.getAuthorOrEditorList(&buf, false);
1366 docstring const BiblioInfo::getCiteNumber(docstring const & key) const
1368 BiblioInfo::const_iterator it = find(key);
1371 BibTeXInfo const & data = it->second;
1372 return data.citeNumber();
1375 void BiblioInfo::getLocators(docstring const & key, docstring & doi, docstring & url, docstring & file) const
1377 BiblioInfo::const_iterator it = find(key);
1380 BibTeXInfo const & data = it->second;
1381 data.getLocators(doi,url,file);
1385 docstring const BiblioInfo::getYear(docstring const & key, bool use_modifier) const
1387 BiblioInfo::const_iterator it = find(key);
1390 BibTeXInfo const & data = it->second;
1391 docstring year = data.getYear();
1393 // let's try the crossrefs
1394 vector<docstring> const xrefs = getXRefs(data);
1398 for (docstring const & xref : xrefs) {
1399 BiblioInfo::const_iterator const xrefit = find(xref);
1400 if (xrefit == end())
1402 BibTeXInfo const & xref_data = xrefit->second;
1403 year = xref_data.getYear();
1409 if (use_modifier && data.modifier() != 0)
1410 year += data.modifier();
1415 docstring const BiblioInfo::getYear(docstring const & key, Buffer const & buf, bool use_modifier) const
1417 docstring const year = getYear(key, use_modifier);
1419 return buf.B_("No year");
1424 docstring const BiblioInfo::getInfo(docstring const & key,
1425 Buffer const & buf, CiteItem const & ci, docstring const & format) const
1427 BiblioInfo::const_iterator it = find(key);
1429 return _("Bibliography entry not found!");
1430 BibTeXInfo const & data = it->second;
1431 BibTeXInfoList xrefptrs;
1432 for (docstring const & xref : getXRefs(data)) {
1433 BiblioInfo::const_iterator const xrefit = find(xref);
1434 if (xrefit != end())
1435 xrefptrs.push_back(&(xrefit->second));
1437 return data.getInfo(xrefptrs, buf, ci, format);
1441 docstring const BiblioInfo::getLabel(vector<docstring> keys,
1442 Buffer const & buf, string const & style, CiteItem const & ci) const
1444 size_t max_size = ci.max_size;
1445 // shorter makes no sense
1446 LASSERT(max_size >= 16, max_size = 16);
1448 // we can't display more than 10 of these, anyway
1449 // but since we truncate in the middle,
1450 // we need to split into two halfs.
1451 bool const too_many_keys = keys.size() > 10;
1452 vector<docstring> lkeys;
1453 if (too_many_keys) {
1454 lkeys.insert(lkeys.end(), keys.end() - 5, keys.end());
1456 keys.insert(keys.end(), lkeys.begin(), lkeys.end());
1459 CiteEngineType const engine_type = buf.params().citeEngineType();
1460 DocumentClass const & dc = buf.params().documentClass();
1461 docstring const & format = from_utf8(dc.getCiteFormat(engine_type, style, false, "cite"));
1462 docstring ret = format;
1463 vector<docstring>::const_iterator key = keys.begin();
1464 vector<docstring>::const_iterator ken = keys.end();
1465 vector<docstring> handled_keys;
1466 for (int i = 0; key != ken; ++key, ++i) {
1467 handled_keys.push_back(*key);
1469 for (auto const & k : handled_keys) {
1473 BiblioInfo::const_iterator it = find(*key);
1474 BibTeXInfo empty_data;
1475 empty_data.key(*key);
1476 BibTeXInfo & data = empty_data;
1477 vector<BibTeXInfo const *> xrefptrs;
1480 for (docstring const & xref : getXRefs(data)) {
1481 BiblioInfo::const_iterator const xrefit = find(xref);
1482 if (xrefit != end())
1483 xrefptrs.push_back(&(xrefit->second));
1487 ret = data.getLabel(xrefptrs, buf, ret, ci, key + 1 != ken, i == 1);
1490 support::truncateWithEllipsis(ret, max_size, true);
1496 bool BiblioInfo::isBibtex(docstring const & key) const
1499 split(key, key1, ',');
1500 BiblioInfo::const_iterator it = find(key1);
1503 return it->second.isBibTeX();
1507 BiblioInfo::CiteStringMap const BiblioInfo::getCiteStrings(
1508 vector<docstring> const & keys, vector<CitationStyle> const & styles,
1509 Buffer const & buf, CiteItem const & ci) const
1512 return vector<pair<docstring,docstring>>();
1515 CiteStringMap csm(styles.size());
1516 for (size_t i = 0; i != csm.size(); ++i) {
1517 style = styles[i].name;
1518 csm[i] = make_pair(from_ascii(style), getLabel(keys, buf, style, ci));
1525 void BiblioInfo::mergeBiblioInfo(BiblioInfo const & info)
1527 bimap_.insert(info.begin(), info.end());
1528 field_names_.insert(info.field_names_.begin(), info.field_names_.end());
1529 entry_types_.insert(info.entry_types_.begin(), info.entry_types_.end());
1535 // used in xhtml to sort a list of BibTeXInfo objects
1536 bool lSorter(BibTeXInfo const * lhs, BibTeXInfo const * rhs)
1538 docstring const lauth = lhs->getAuthorOrEditorList();
1539 docstring const rauth = rhs->getAuthorOrEditorList();
1540 docstring const lyear = lhs->getYear();
1541 docstring const ryear = rhs->getYear();
1542 docstring const ltitl = lhs->operator[]("title");
1543 docstring const rtitl = rhs->operator[]("title");
1544 return (lauth < rauth)
1545 || (lauth == rauth && lyear < ryear)
1546 || (lauth == rauth && lyear == ryear && ltitl < rtitl);
1552 void BiblioInfo::collectCitedEntries(Buffer const & buf)
1554 cited_entries_.clear();
1555 // We are going to collect all the citation keys used in the document,
1556 // getting them from the TOC.
1557 // FIXME We may want to collect these differently, in the first case,
1558 // so that we might have them in order of appearance.
1559 set<docstring> citekeys;
1560 Toc const & toc = *buf.tocBackend().toc("citation");
1561 for (auto const & t : toc) {
1562 if (t.str().empty())
1564 vector<docstring> const keys = getVectorFromString(t.str());
1565 citekeys.insert(keys.begin(), keys.end());
1567 if (citekeys.empty())
1570 // We have a set of the keys used in this document.
1571 // We will now convert it to a list of the BibTeXInfo objects used in
1573 vector<BibTeXInfo const *> bi;
1574 for (auto const & ck : citekeys) {
1575 BiblioInfo::const_iterator const bt = find(ck);
1576 if (bt == end() || !bt->second.isBibTeX())
1578 bi.push_back(&(bt->second));
1581 sort(bi.begin(), bi.end(), lSorter);
1583 // Now we can write the sorted keys
1584 // b is a BibTeXInfo const *
1585 for (auto const & b : bi)
1586 cited_entries_.push_back(b->key());
1590 void BiblioInfo::makeCitationLabels(Buffer const & buf)
1592 collectCitedEntries(buf);
1593 CiteEngineType const engine_type = buf.params().citeEngineType();
1594 bool const numbers = (engine_type & ENGINE_TYPE_NUMERICAL);
1598 // used to remember the last one we saw
1599 // we'll be comparing entries to see if we need to add
1600 // modifiers, like "1984a"
1601 map<docstring, BibTeXInfo>::iterator last = bimap_.end();
1603 // add letters to years
1604 for (auto const & ce : cited_entries_) {
1605 map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(ce);
1606 // this shouldn't happen, but...
1607 if (biit == bimap_.end())
1608 // ...fail gracefully, anyway.
1610 BibTeXInfo & entry = biit->second;
1612 docstring const num = convert<docstring>(++keynumber);
1613 entry.setCiteNumber(num);
1615 // The first test here is checking whether this is the first
1616 // time through the loop. If so, then we do not have anything
1617 // with which to compare.
1618 if (last != bimap_.end()
1619 && entry.getAuthorOrEditorList() == last->second.getAuthorOrEditorList()
1620 // we access the year via getYear() so as to get it from the xref,
1621 // if we need to do so
1622 && getYear(entry.key()) == getYear(last->second.key())) {
1623 if (modifier == 0) {
1624 // so the last one should have been 'a'
1625 last->second.setModifier('a');
1627 } else if (modifier == 'z')
1634 entry.setModifier(modifier);
1635 // remember the last one
1640 for (auto const & ce : cited_entries_) {
1641 map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(ce);
1642 // this shouldn't happen, but...
1643 if (biit == bimap_.end())
1644 // ...fail gracefully, anyway.
1646 BibTeXInfo & entry = biit->second;
1648 entry.label(entry.citeNumber());
1650 docstring const auth = entry.getAuthorOrEditorList(&buf, false);
1651 // we do it this way so as to access the xref, if necessary
1652 // note that this also gives us the modifier
1653 docstring const year = getYear(ce, buf, true);
1654 if (!auth.empty() && !year.empty())
1655 entry.label(auth + ' ' + year);
1657 entry.label(entry.key());
1663 //////////////////////////////////////////////////////////////////////
1667 //////////////////////////////////////////////////////////////////////
1670 CitationStyle citationStyleFromString(string const & command,
1671 BufferParams const & params)
1674 if (command.empty())
1677 string const alias = params.getCiteAlias(command);
1678 string cmd = alias.empty() ? command : alias;
1679 if (isUpperCase(command[0])) {
1680 cs.forceUpperCase = true;
1681 cmd[0] = lowercase(cmd[0]);
1684 size_t const n = command.size() - 1;
1685 if (command[n] == '*') {
1686 cs.hasStarredVersion = true;
1687 if (suffixIs(cmd, '*'))
1688 cmd = cmd.substr(0, cmd.size() - 1);
1696 string citationStyleToString(const CitationStyle & cs, bool const latex)
1698 string cmd = latex ? cs.cmd : cs.name;
1699 if (cs.forceUpperCase)
1700 cmd[0] = uppercase(cmd[0]);
1701 if (cs.hasStarredVersion)
1707 docstring authorsToDocBookAuthorGroup(docstring const & authorsString, XMLStream & xs, Buffer const & buf)
1709 // This function closely mimics getAuthorList, but produces DocBook instead of text.
1710 // It has been greatly simplified, as the complete list of authors is always produced. No separators are required,
1711 // as the output has a database-like shape.
1712 // constructName has also been merged within, as it becomes really simple and leads to no copy-paste.
1714 if (authorsString.empty()) {
1718 // Split the input list of authors into individual authors.
1719 vector<docstring> const authors = getAuthors(authorsString);
1721 // Retrieve the "et al." variation.
1722 string const etal = buf.params().documentClass().getCiteMacro(buf.params().citeEngineType(), "_etal");
1724 // Output the list of authors.
1725 xs << xml::StartTag("authorgroup");
1728 auto it = authors.cbegin();
1729 auto en = authors.cend();
1730 for (size_t i = 0; it != en; ++it, ++i) {
1731 xs << xml::StartTag("author");
1733 xs << xml::StartTag("personname");
1735 docstring name = *it;
1737 // All authors go in a <personname>. If more structure is known, use it; otherwise (just "et al."), print it as such.
1738 if (name == "others") {
1741 name_parts parts = nameParts(name);
1742 if (! parts.prefix.empty()) {
1743 xs << xml::StartTag("honorific");
1745 xs << xml::EndTag("honorific");
1748 if (! parts.prename.empty()) {
1749 xs << xml::StartTag("firstname");
1750 xs << parts.prename;
1751 xs << xml::EndTag("firstname");
1754 if (! parts.surname.empty()) {
1755 xs << xml::StartTag("surname");
1756 xs << parts.surname;
1757 xs << xml::EndTag("surname");
1760 if (! parts.suffix.empty()) {
1761 xs << xml::StartTag("othername", "role=\"suffix\"");
1763 xs << xml::EndTag("othername");
1768 xs << xml::EndTag("personname");
1770 xs << xml::EndTag("author");
1773 // Could add an affiliation after <personname>, but not stored in BibTeX.
1775 xs << xml::EndTag("authorgroup");