3 * This file is part of LyX, the document processor.
4 * Licence details can be found in the file COPYING.
6 * \author Angus Leeming
8 * \author Richard Kimberly Heck
10 * \author Jürgen Spitzmüller
12 * Full author contact details are available in file CREDITS.
17 #include "BiblioInfo.h"
20 #include "BufferParams.h"
24 #include "TextClass.h"
25 #include "TocBackend.h"
28 #include "support/convert.h"
29 #include "support/debug.h"
30 #include "support/docstream.h"
31 #include "support/FileName.h"
32 #include "support/gettext.h"
33 #include "support/lassert.h"
34 #include "support/lstrings.h"
35 #include "support/textutils.h"
42 using namespace lyx::support;
49 // Remove placeholders from names
50 docstring renormalize(docstring const & input)
52 docstring res = subst(input, from_ascii("$$space!"), from_ascii(" "));
53 return subst(res, from_ascii("$$comma!"), from_ascii(","));
57 // Split the surname into prefix ("von-part") and family name
58 pair<docstring, docstring> parseSurname(docstring const & sname)
60 // Split the surname into its tokens
61 vector<docstring> pieces = getVectorFromString(sname, from_ascii(" "));
62 if (pieces.size() < 2)
63 return make_pair(docstring(), sname);
65 // Now we look for pieces that begin with a lower case letter.
66 // All except for the very last token constitute the "von-part".
68 vector<docstring>::const_iterator it = pieces.begin();
69 vector<docstring>::const_iterator const en = pieces.end();
71 for (; it != en; ++it) {
74 // If this is the last piece, then what we now have is
75 // the family name, notwithstanding the casing.
78 char_type const c = (*it)[0];
79 // If the piece starts with a upper case char, we assume
80 // this is part of the surname.
83 // Nothing of the former, so add this piece to the prename
91 // Reconstruct the family name.
92 // Note that if we left the loop with because it + 1 == en,
93 // then this will still do the right thing, i.e., make surname
94 // just be the last piece.
97 for (; it != en; ++it) {
104 return make_pair(prefix, surname);
116 // gets the name parts (prename, surname, prefix, suffix) from an author-type string
117 name_parts nameParts(docstring const & iname)
123 // First we check for goupings (via {...}) and replace blanks and
124 // commas inside groups with temporary placeholders
127 docstring::const_iterator p = iname.begin();
128 while (p != iname.end()) {
129 // count grouping level
134 // generate string with probable placeholders
135 if (*p == ' ' && gl > 0)
136 name += from_ascii("$$space!");
137 else if (*p == ',' && gl > 0)
138 name += from_ascii("$$comma!");
144 // Now we look for a comma, and take the last name to be everything
145 // preceding the right-most one, so that we also get the name suffix
147 vector<docstring> pieces = getVectorFromString(name);
148 if (pieces.size() > 1) {
149 // Whether we have a name suffix or not, the prename is
151 res.prename = renormalize(pieces.back());
152 // The family name, conversely, is always the first item.
153 // However, it might contain a prefix (aka "von" part)
154 docstring const sname = pieces.front();
155 res.prefix = renormalize(parseSurname(sname).first);
156 res.surname = renormalize(parseSurname(sname).second);
157 // If we have three pieces (the maximum allowed by BibTeX),
158 // the second one is the name suffix.
159 if (pieces.size() > 2)
160 res.suffix = renormalize(pieces.at(1));
164 // OK, so now we want to look for the last name.
165 // Split on spaces, to get various tokens.
166 pieces = getVectorFromString(name, from_ascii(" "));
167 // No space: Only a family name given
168 if (pieces.size() < 2) {
169 res.surname = renormalize(pieces.back());
172 // If we get two pieces, assume "prename surname"
173 if (pieces.size() == 2) {
174 res.prename = renormalize(pieces.front());
175 res.surname = renormalize(pieces.back());
179 // More than 3 pieces: A name prefix (aka "von" part) might be included.
180 // We look for the first piece that begins with a lower case letter
181 // (which is the name prefix, if it is not the last token) or the last token.
183 vector<docstring>::const_iterator it = pieces.begin();
184 vector<docstring>::const_iterator const en = pieces.end();
186 for (; it != en; ++it) {
189 char_type const c = (*it)[0];
190 // If the piece starts with a lower case char, we assume
191 // this is the name prefix and thus prename is complete.
194 // Same if this is the last piece, which is always the surname.
197 // Nothing of the former, so add this piece to the prename
205 // Now reconstruct the family name and strip the prefix.
206 // Note that if we left the loop because it + 1 == en,
207 // then this will still do the right thing, i.e., make surname
208 // just be the last piece.
211 for (; it != en; ++it) {
218 res.prename = renormalize(prename);
219 res.prefix = renormalize(parseSurname(surname).first);
220 res.surname = renormalize(parseSurname(surname).second);
225 docstring constructName(docstring const & name, string const & scheme)
227 // re-constructs a name from name parts according
229 docstring const prename = nameParts(name).prename;
230 docstring const surname = nameParts(name).surname;
231 docstring const prefix = nameParts(name).prefix;
232 docstring const suffix = nameParts(name).suffix;
234 static regex const reg1("(.*)(\\{%prename%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)");
235 static regex const reg2("(.*)(\\{%suffix%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)");
236 static regex const reg3("(.*)(\\{%prefix%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)");
238 // Changing the first parameter of regex_match() may corrupt the
239 // second one. In this case we use the temporary string tmp.
240 if (regex_match(scheme, sub, reg1)) {
242 if (!prename.empty())
246 if (regex_match(res, sub, reg2)) {
247 string tmp = sub.str(1);
250 res = tmp + sub.str(5);
252 if (regex_match(res, sub, reg3)) {
253 string tmp = sub.str(1);
256 res = tmp + sub.str(5);
258 docstring result = from_ascii(res);
259 result = subst(result, from_ascii("%prename%"), prename);
260 result = subst(result, from_ascii("%surname%"), surname);
261 result = subst(result, from_ascii("%prefix%"), prefix);
262 result = subst(result, from_ascii("%suffix%"), suffix);
267 vector<docstring> const getAuthors(docstring const & author)
269 // We check for goupings (via {...}) and only consider " and "
270 // outside groups as author separator. This is to account
271 // for cases such as {{Barnes and Noble, Inc.}}, which
272 // need to be treated as one single family name.
273 // We use temporary placeholders in order to differentiate the
274 // diverse " and " cases.
276 // First, we temporarily replace all ampersands. It is rather unusual
277 // in author names, but can happen (consider cases such as "C \& A Corp.").
278 docstring iname = subst(author, from_ascii("&"), from_ascii("$$amp!"));
279 // Then, we temporarily make all " and " strings to ampersands in order
280 // to handle them later on a per-char level. Note that arbitrary casing
281 // ("And", "AND", "aNd", ...) is allowed in bibtex (#10465).
282 static regex const and_reg("(.* )([aA][nN][dD])( .*)");
284 string res = to_utf8(iname);
285 while (regex_match(res, sub, and_reg))
286 res = sub.str(1) + "&" + sub.str(3);
287 iname = from_utf8(res);
288 // Now we traverse through the string and replace the "&" by the proper
289 // output in- and outside groups
292 docstring::const_iterator p = iname.begin();
293 while (p != iname.end()) {
294 // count grouping level
299 // generate string with probable placeholders
302 // Inside groups, we output "and"
303 name += from_ascii("and");
305 // Outside groups, we output a separator
306 name += from_ascii("$$namesep!");
313 // re-insert the literal ampersands
314 name = subst(name, from_ascii("$$amp!"), from_ascii("&"));
316 // Now construct the actual vector
317 return getVectorFromString(name, from_ascii(" $$namesep! "));
321 bool multipleAuthors(docstring const & author)
323 return getAuthors(author).size() > 1;
327 // converts a string containing LaTeX commands into unicode
329 docstring convertLaTeXCommands(docstring const & str)
334 bool scanning_cmd = false;
335 bool scanning_math = false;
336 bool escaped = false; // used to catch \$, etc.
337 while (!val.empty()) {
338 char_type const ch = val[0];
340 // if we're scanning math, we output everything until we
341 // find an unescaped $, at which point we break out.
348 scanning_math = false;
354 // if we're scanning a command name, then we just
355 // discard characters until we hit something that
358 if (isAlphaASCII(ch)) {
363 // so we're done with this command.
364 // now we fall through and check this character.
365 scanning_cmd = false;
368 // was the last character a \? If so, then this is something like:
369 // \\ or \$, so we'll just output it. That's probably not always right...
371 // exception: output \, as THIN SPACE
373 ret.push_back(0x2009);
384 scanning_math = true;
388 // Change text mode accents in the form
389 // {\v a} to \v{a} (see #9340).
390 // FIXME: This is a sort of mini-tex2lyx.
391 // Use the real tex2lyx instead!
392 static regex const tma_reg("^\\{\\\\[bcCdfGhHkrtuUv]\\s\\w\\}");
393 if (regex_search(to_utf8(val), tma_reg)) {
395 val.replace(2, 1, from_ascii("{"));
399 // Apart from the above, we just ignore braces
400 if (ch == '{' || ch == '}') {
405 // we're going to check things that look like commands, so if
406 // this doesn't, just output it.
413 // ok, could be a command of some sort
414 // let's see if it corresponds to some unicode
415 // unicodesymbols has things in the form: \"{u},
416 // whereas we may see things like: \"u. So we'll
417 // look for that and change it, if necessary.
418 // FIXME: This is a sort of mini-tex2lyx.
419 // Use the real tex2lyx instead!
420 static regex const reg("^\\\\\\W\\w");
421 if (regex_search(to_utf8(val), reg)) {
422 val.insert(3, from_ascii("}"));
423 val.insert(2, from_ascii("{"));
427 docstring const cnvtd = Encodings::fromLaTeXCommand(val,
428 Encodings::TEXT_CMD, termination, rem);
429 if (!cnvtd.empty()) {
430 // it did, so we'll take that bit and proceed with what's left
435 // it's a command of some sort
444 // Escape '<' and '>' and remove richtext markers (e.g. {!this is richtext!}) from a string.
445 docstring processRichtext(docstring const & str, bool richtext)
450 bool scanning_rich = false;
451 while (!val.empty()) {
452 char_type const ch = val[0];
453 if (ch == '{' && val.size() > 1 && val[1] == '!') {
454 // beginning of rich text
455 scanning_rich = true;
459 if (scanning_rich && ch == '!' && val.size() > 1 && val[1] == '}') {
461 scanning_rich = false;
469 // we need to escape '<' and '>'
477 } else if (!scanning_rich /* && !richtext */)
479 // else the character is discarded, which will happen only if
480 // richtext == false and we are scanning rich text
489 //////////////////////////////////////////////////////////////////////
493 //////////////////////////////////////////////////////////////////////
495 BibTeXInfo::BibTeXInfo(docstring const & key, docstring const & type)
496 : is_bibtex_(true), bib_key_(key), num_bib_key_(0), entry_type_(type),
497 info_(), format_(), modifier_(0)
502 docstring const BibTeXInfo::getAuthorOrEditorList(Buffer const * buf,
503 bool full, bool forceshort) const
505 docstring author = operator[]("author");
507 author = operator[]("editor");
509 return getAuthorList(buf, author, full, forceshort);
513 docstring const BibTeXInfo::getAuthorList(Buffer const * buf,
514 docstring const & author, bool const full, bool const forceshort,
515 bool const allnames, bool const beginning) const
517 // Maxnames treshold depend on engine
518 size_t maxnames = buf ?
519 buf->params().documentClass().max_citenames() : 2;
522 docstring const opt = label();
527 docstring const remainder = trim(split(opt, authors, '('));
528 if (remainder.empty())
529 // in this case, we didn't find a "(",
530 // so we don't have author (year)
533 // Natbib syntax is "Jones et al.(1990)Jones, Baker, and Williams"
534 docstring const fullauthors = trim(rsplit(remainder, ')'));
535 if (!fullauthors.empty())
544 // OK, we've got some names. Let's format them.
545 // Try to split the author list
546 vector<docstring> const authors = getAuthors(author);
550 CiteEngineType const engine_type = buf ? buf->params().citeEngineType()
551 : ENGINE_TYPE_DEFAULT;
553 // These are defined in the styles
555 buf ? buf->params().documentClass().getCiteMacro(engine_type, "B_etal")
557 string const namesep =
558 buf ? buf->params().documentClass().getCiteMacro(engine_type, "B_namesep")
560 string const lastnamesep =
561 buf ? buf->params().documentClass().getCiteMacro(engine_type, "B_lastnamesep")
563 string const pairnamesep =
564 buf ? buf->params().documentClass().getCiteMacro(engine_type, "B_pairnamesep")
566 string firstnameform =
567 buf ? buf->params().documentClass().getCiteMacro(engine_type, "!firstnameform")
568 : "{%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}{%prename%[[, %prename%]]}";
570 firstnameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!firstbynameform")
571 : "%prename% {%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}";
572 string othernameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!othernameform")
573 : "{%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}{%prename%[[, %prename%]]}";
575 othernameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!otherbynameform")
576 : "%prename% {%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}";
577 string citenameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!citenameform")
578 : "{%prefix%[[%prefix% ]]}%surname%";
580 // Shorten the list (with et al.) if forceshort is set
581 // and the list can actually be shortened, else if maxcitenames
582 // is passed and full is not set.
583 bool shorten = forceshort && authors.size() > 1;
584 vector<docstring>::const_iterator it = authors.begin();
585 vector<docstring>::const_iterator en = authors.end();
586 for (size_t i = 0; it != en; ++it, ++i) {
587 if (i >= maxnames && !full) {
591 if (*it == "others") {
592 retval += buf ? buf->B_(etal) : from_ascii(etal);
595 if (i > 0 && i == authors.size() - 1) {
596 if (authors.size() == 2)
597 retval += buf ? buf->B_(pairnamesep) : from_ascii(pairnamesep);
599 retval += buf ? buf->B_(lastnamesep) : from_ascii(lastnamesep);
601 retval += buf ? buf->B_(namesep) : from_ascii(namesep);
603 retval += (i == 0) ? constructName(*it, firstnameform)
604 : constructName(*it, othernameform);
606 retval += constructName(*it, citenameform);
610 retval = constructName(authors[0], firstnameform) + (buf ? buf->B_(etal) : from_ascii(etal));
612 retval = constructName(authors[0], citenameform) + (buf ? buf->B_(etal) : from_ascii(etal));
615 return convertLaTeXCommands(retval);
619 docstring const BibTeXInfo::getYear() const
622 // first try legacy year field
623 docstring year = operator[]("year");
626 // now try biblatex's date field
627 year = operator[]("date");
628 // Format is [-]YYYY-MM-DD*/[-]YYYY-MM-DD*
629 // We only want the years.
630 static regex const yreg("[-]?([\\d]{4}).*");
631 static regex const ereg(".*/[-]?([\\d]{4}).*");
633 string const date = to_utf8(year);
634 if (!regex_match(date, sm, yreg))
635 // cannot parse year.
637 year = from_ascii(sm[1]);
638 // check for an endyear
639 if (regex_match(date, sm, ereg))
640 year += char_type(0x2013) + from_ascii(sm[1]);
644 docstring const opt = label();
649 docstring tmp = split(opt, authors, '(');
651 // we don't have author (year)
654 tmp = split(tmp, year, ')');
659 void BibTeXInfo::getLocators(docstring & doi, docstring & url, docstring & file) const
662 // get "doi" entry from citation record
663 doi = operator[]("doi");
664 if (!doi.empty() && !prefixIs(doi,from_ascii("http")))
665 doi = "https://doi.org/" + doi;
666 // get "url" entry from citation record
667 url = operator[]("url");
668 // get "file" entry from citation record
669 file = operator[]("file");
671 // Jabref case, field has a format:
672 // Description:Location:Filetype;Description:Location:Filetype...
673 // We will grab only first pdf
675 docstring ret, filedest, tmp;
676 ret = split(file, tmp, ':');
677 tmp = split(ret, filedest, ':');
678 //TODO howto deal with relative directories?
679 FileName f(to_utf8(filedest));
681 file = "file:///" + filedest;
684 // kbibtex case, format:
685 // file1.pdf;file2.pdf
686 // We will grab only first pdf
689 kfile = operator[]("localfile");
690 if (!kfile.empty()) {
691 docstring filedest, tmp;
692 tmp = split(kfile, filedest, ';');
693 //TODO howto deal with relative directories?
694 FileName f(to_utf8(filedest));
696 file = "file:///" + filedest;
702 // try biblatex specific fields, see its manual
703 // 3.13.7 "Electronic Publishing Informationl"
704 docstring eprinttype = operator[]("eprinttype");
705 docstring eprint = operator[]("eprint");
709 if (eprinttype == "arxiv")
710 url = "https://arxiv.org/abs/" + eprint;
711 if (eprinttype == "jstor")
712 url = "https://www.jstor.org/stable/" + eprint;
713 if (eprinttype == "pubmed")
714 url = "http://www.ncbi.nlm.nih.gov/pubmed/" + eprint;
715 if (eprinttype == "hdl")
716 url = "https://hdl.handle.net/" + eprint;
717 if (eprinttype == "googlebooks")
718 url = "http://books.google.com/books?id=" + eprint;
723 // Here can be handled the bibliography environment. All one could do
724 // here is let LyX scan the entry for URL or HRef insets.
730 docstring parseOptions(docstring const & format, string & optkey,
731 docstring & ifpart, docstring & elsepart);
733 // Calls parseOptions to deal with an embedded option, such as:
734 // {%number%[[, no.~%number%]]}
735 // which must appear at the start of format. ifelsepart gets the
736 // whole of the option, and we return what's left after the option.
737 // we return format if there is an error.
738 docstring parseEmbeddedOption(docstring const & format, docstring & ifelsepart)
740 LASSERT(format[0] == '{' && format[1] == '%', return format);
744 docstring const rest = parseOptions(format, optkey, ifpart, elsepart);
745 if (format == rest) { // parse error
746 LYXERR0("ERROR! Couldn't parse `" << format <<"'.");
749 LASSERT(rest.size() <= format.size(),
750 { ifelsepart = docstring(); return format; });
751 ifelsepart = format.substr(0, format.size() - rest.size());
756 // Gets a "clause" from a format string, where the clause is
757 // delimited by '[[' and ']]'. Returns what is left after the
758 // clause is removed, and returns format if there is an error.
759 docstring getClause(docstring const & format, docstring & clause)
761 docstring fmt = format;
764 // we'll remove characters from the front of fmt as we
766 while (!fmt.empty()) {
767 if (fmt[0] == ']' && fmt.size() > 1 && fmt[1] == ']') {
772 // check for an embedded option
773 if (fmt[0] == '{' && fmt.size() > 1 && fmt[1] == '%') {
775 docstring const rest = parseEmbeddedOption(fmt, part);
777 LYXERR0("ERROR! Couldn't parse embedded option in `" << format <<"'.");
782 } else { // it's just a normal character
791 // parse an options string, which must appear at the start of the
792 // format parameter. puts the parsed bits in optkey, ifpart, and
793 // elsepart and returns what's left after the option is removed.
794 // if there's an error, it returns format itself.
795 docstring parseOptions(docstring const & format, string & optkey,
796 docstring & ifpart, docstring & elsepart)
798 LASSERT(format[0] == '{' && format[1] == '%', return format);
800 docstring fmt = format.substr(2);
801 size_t pos = fmt.find('%'); // end of key
802 if (pos == string::npos) {
803 LYXERR0("Error parsing `" << format <<"'. Can't find end of key.");
806 optkey = to_utf8(fmt.substr(0, pos));
807 fmt = fmt.substr(pos + 1);
808 // [[format]] should be next
809 if (fmt[0] != '[' || fmt[1] != '[') {
810 LYXERR0("Error parsing `" << format <<"'. Can't find '[[' after key.");
814 docstring curfmt = fmt;
815 fmt = getClause(curfmt, ifpart);
817 LYXERR0("Error parsing `" << format <<"'. Couldn't get if clause.");
821 if (fmt[0] == '}') // we're done, no else clause
822 return fmt.substr(1);
824 // else part should follow
825 if (fmt[0] != '[' || fmt[1] != '[') {
826 LYXERR0("Error parsing `" << format <<"'. Can't find else clause.");
831 fmt = getClause(curfmt, elsepart);
833 if (fmt == curfmt || fmt[0] != '}') {
834 LYXERR0("Error parsing `" << format <<"'. Can't find end of option.");
837 return fmt.substr(1);
844 Bug #9131 revealed an oddity in how we are generating citation information
845 when more than one key is given. We end up building a longer and longer format
846 string as we go, which we then have to re-parse, over and over and over again,
847 rather than generating the information for the individual keys and then putting
848 all of that together. We do that to deal with the way separators work, from what
849 I can tell, but it still feels like a hack. Fixing this would require quite a
850 bit of work, however.
852 docstring BibTeXInfo::expandFormat(docstring const & format,
853 BibTeXInfoList const & xrefs, int & counter, Buffer const & buf,
854 CiteItem const & ci, bool next, bool second) const
856 // incorrect use of macros could put us in an infinite loop
857 static int const max_passes = 5000;
858 // the use of overly large keys can lead to performance problems, due
859 // to eventual attempts to convert LaTeX macros to unicode. See bug
860 // #8944. By default, the size is limited to 128 (in CiteItem), but
861 // for specific purposes (such as XHTML export), it needs to be enlarged
862 // This is perhaps not the best solution, but it will have to do for now.
863 size_t const max_keysize = ci.max_key_size;
864 odocstringstream ret; // return value
866 bool scanning_key = false;
867 bool scanning_rich = false;
869 CiteEngineType const engine_type = buf.params().citeEngineType();
870 docstring fmt = format;
871 // we'll remove characters from the front of fmt as we
873 while (!fmt.empty()) {
874 if (counter > max_passes) {
875 LYXERR0("Recursion limit reached while parsing `"
880 char_type thischar = fmt[0];
881 if (thischar == '%') {
882 // beginning or end of key
885 scanning_key = false;
886 // so we replace the key with its value, which may be empty
890 buf.params().documentClass().getCiteMacro(engine_type, key);
891 fmt = from_utf8(val) + fmt.substr(1);
894 } else if (prefixIs(key, "B_")) {
895 // a translatable bit (to the Buffer language)
897 buf.params().documentClass().getCiteMacro(engine_type, key);
898 docstring const trans =
899 translateIfPossible(from_utf8(val), buf.params().language->code());
901 } else if (key[0] == '_') {
902 // a translatable bit (to the GUI language)
904 buf.params().documentClass().getCiteMacro(engine_type, key);
905 docstring const trans =
906 translateIfPossible(from_utf8(val));
909 docstring const val =
910 getValueForKey(key, buf, ci, xrefs, max_keysize);
912 ret << from_ascii("{!<span class=\"bib-" + key + "\">!}");
915 ret << from_ascii("{!</span>!}");
923 else if (thischar == '{') {
924 // beginning of option?
926 LYXERR0("ERROR: Found `{' when scanning key in `" << format << "'.");
929 if (fmt.size() > 1) {
931 // it is the beginning of an optional format
935 docstring const newfmt =
936 parseOptions(fmt, optkey, ifpart, elsepart);
937 if (newfmt == fmt) // parse error
940 docstring const val =
941 getValueForKey(optkey, buf, ci, xrefs);
942 if (optkey == "next" && next)
943 ret << ifpart; // without expansion
944 else if (optkey == "second" && second) {
946 ret << expandFormat(ifpart, xrefs, newcounter, buf,
948 } else if (!val.empty()) {
950 ret << expandFormat(ifpart, xrefs, newcounter, buf,
952 } else if (!elsepart.empty()) {
954 ret << expandFormat(elsepart, xrefs, newcounter, buf,
957 // fmt will have been shortened for us already
961 // beginning of rich text
962 scanning_rich = true;
964 ret << from_ascii("{!");
968 // we are here if '{' was not followed by % or !.
969 // So it's just a character.
972 else if (scanning_rich && thischar == '!'
973 && fmt.size() > 1 && fmt[1] == '}') {
975 scanning_rich = false;
977 ret << from_ascii("!}");
980 else if (scanning_key)
981 key += char(thischar);
985 } catch (EncodingException & /* e */) {
986 LYXERR0("Uncodable character '" << docstring(1, thischar) << " in citation label!");
992 LYXERR0("Never found end of key in `" << format << "'!");
996 LYXERR0("Never found end of rich text in `" << format << "'!");
1003 docstring const & BibTeXInfo::getInfo(BibTeXInfoList const & xrefs,
1004 Buffer const & buf, CiteItem const & ci, docstring const & format_in) const
1006 bool const richtext = ci.richtext;
1008 CiteEngineType const engine_type = buf.params().citeEngineType();
1009 DocumentClass const & dc = buf.params().documentClass();
1010 docstring const & format = format_in.empty()?
1011 from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_)))
1014 if (format != format_) {
1015 // clear caches since format changed
1017 info_richtext_.clear();
1021 if (!richtext && !info_.empty()) {
1022 info_ = convertLaTeXCommands(processRichtext(info_, false));
1025 if (richtext && !info_richtext_.empty())
1026 return info_richtext_;
1029 BibTeXInfo::const_iterator it = find(from_ascii("ref"));
1035 info_ = expandFormat(format, xrefs, counter, buf,
1038 if (info_.empty()) {
1039 // this probably shouldn't happen
1044 info_richtext_ = convertLaTeXCommands(processRichtext(info_, true));
1045 return info_richtext_;
1048 info_ = convertLaTeXCommands(processRichtext(info_, false));
1053 docstring const BibTeXInfo::getLabel(BibTeXInfoList const & xrefs,
1054 Buffer const & buf, docstring const & format,
1055 CiteItem const & ci, bool next, bool second) const
1060 loclabel = expandFormat(format, xrefs, counter, buf, ci, next, second);
1062 if (!loclabel.empty() && !next) {
1063 loclabel = processRichtext(loclabel, ci.richtext);
1064 loclabel = convertLaTeXCommands(loclabel);
1071 docstring const & BibTeXInfo::operator[](docstring const & field) const
1073 BibTeXInfo::const_iterator it = find(field);
1076 static docstring const empty_value = docstring();
1081 docstring const & BibTeXInfo::operator[](string const & field) const
1083 return operator[](from_ascii(field));
1087 docstring BibTeXInfo::getValueForKey(string const & oldkey, Buffer const & buf,
1088 CiteItem const & ci, BibTeXInfoList const & xrefs, size_t maxsize) const
1090 // anything less is pointless
1091 LASSERT(maxsize >= 16, maxsize = 16);
1092 string key = oldkey;
1093 bool cleanit = false;
1094 if (prefixIs(oldkey, "clean:")) {
1095 key = oldkey.substr(6);
1099 docstring ret = operator[](key);
1100 if (ret.empty() && !xrefs.empty()) {
1101 // xr is a (reference to a) BibTeXInfo const *
1102 for (auto const & xr : xrefs) {
1103 if (xr && !(*xr)[key].empty()) {
1110 // some special keys
1111 // FIXME: dialog, textbefore and textafter have nothing to do with this
1112 if (key == "dialog" && ci.context == CiteItem::Dialog)
1113 ret = from_ascii("x"); // any non-empty string will do
1114 else if (key == "export" && ci.context == CiteItem::Export)
1115 ret = from_ascii("x"); // any non-empty string will do
1116 else if (key == "ifstar" && ci.Starred)
1117 ret = from_ascii("x"); // any non-empty string will do
1118 else if (key == "ifqualified" && ci.isQualified)
1119 ret = from_ascii("x"); // any non-empty string will do
1120 else if (key == "entrytype")
1122 else if (prefixIs(key, "ifentrytype:")
1123 && from_ascii(key.substr(12)) == entry_type_)
1124 ret = from_ascii("x"); // any non-empty string will do
1125 else if (key == "key")
1127 else if (key == "label")
1129 else if (key == "modifier" && modifier_ != 0)
1131 else if (key == "numericallabel")
1133 else if (prefixIs(key, "ifmultiple:")) {
1134 // Return whether we have multiple authors
1135 docstring const kind = operator[](from_ascii(key.substr(11)));
1136 if (multipleAuthors(kind))
1137 ret = from_ascii("x"); // any non-empty string will do
1139 else if (prefixIs(key, "abbrvnames:")) {
1140 // Special key to provide abbreviated name list,
1141 // with respect to maxcitenames. Suitable for Bibliography
1143 docstring const kind = operator[](from_ascii(key.substr(11)));
1144 ret = getAuthorList(&buf, kind, false, false, true);
1145 if (ci.forceUpperCase && isLowerCase(ret[0]))
1146 ret[0] = uppercase(ret[0]);
1147 } else if (prefixIs(key, "fullnames:")) {
1148 // Return a full name list. Suitable for Bibliography
1150 docstring const kind = operator[](from_ascii(key.substr(10)));
1151 ret = getAuthorList(&buf, kind, true, false, true);
1152 if (ci.forceUpperCase && isLowerCase(ret[0]))
1153 ret[0] = uppercase(ret[0]);
1154 } else if (prefixIs(key, "forceabbrvnames:")) {
1155 // Special key to provide abbreviated name lists,
1156 // irrespective of maxcitenames. Suitable for Bibliography
1158 docstring const kind = operator[](from_ascii(key.substr(15)));
1159 ret = getAuthorList(&buf, kind, false, true, true);
1160 if (ci.forceUpperCase && isLowerCase(ret[0]))
1161 ret[0] = uppercase(ret[0]);
1162 } else if (prefixIs(key, "abbrvbynames:")) {
1163 // Special key to provide abbreviated name list,
1164 // with respect to maxcitenames. Suitable for further names inside a
1165 // bibliography item // (such as "ed. by ...")
1166 docstring const kind = operator[](from_ascii(key.substr(11)));
1167 ret = getAuthorList(&buf, kind, false, false, true, false);
1168 if (ci.forceUpperCase && isLowerCase(ret[0]))
1169 ret[0] = uppercase(ret[0]);
1170 } else if (prefixIs(key, "fullbynames:")) {
1171 // Return a full name list. Suitable for further names inside a
1172 // bibliography item // (such as "ed. by ...")
1173 docstring const kind = operator[](from_ascii(key.substr(10)));
1174 ret = getAuthorList(&buf, kind, true, false, true, false);
1175 if (ci.forceUpperCase && isLowerCase(ret[0]))
1176 ret[0] = uppercase(ret[0]);
1177 } else if (prefixIs(key, "forceabbrvbynames:")) {
1178 // Special key to provide abbreviated name lists,
1179 // irrespective of maxcitenames. Suitable for further names inside a
1180 // bibliography item // (such as "ed. by ...")
1181 docstring const kind = operator[](from_ascii(key.substr(15)));
1182 ret = getAuthorList(&buf, kind, false, true, true, false);
1183 if (ci.forceUpperCase && isLowerCase(ret[0]))
1184 ret[0] = uppercase(ret[0]);
1185 } else if (key == "abbrvciteauthor") {
1186 // Special key to provide abbreviated author or
1187 // editor names (suitable for citation labels),
1188 // with respect to maxcitenames.
1189 ret = getAuthorOrEditorList(&buf, false, false);
1190 if (ci.forceUpperCase && isLowerCase(ret[0]))
1191 ret[0] = uppercase(ret[0]);
1192 } else if (key == "fullciteauthor") {
1193 // Return a full author or editor list (for citation labels)
1194 ret = getAuthorOrEditorList(&buf, true, false);
1195 if (ci.forceUpperCase && isLowerCase(ret[0]))
1196 ret[0] = uppercase(ret[0]);
1197 } else if (key == "forceabbrvciteauthor") {
1198 // Special key to provide abbreviated author or
1199 // editor names (suitable for citation labels),
1200 // irrespective of maxcitenames.
1201 ret = getAuthorOrEditorList(&buf, false, true);
1202 if (ci.forceUpperCase && isLowerCase(ret[0]))
1203 ret[0] = uppercase(ret[0]);
1204 } else if (key == "bibentry") {
1205 // Special key to provide the full bibliography entry: see getInfo()
1206 CiteEngineType const engine_type = buf.params().citeEngineType();
1207 DocumentClass const & dc = buf.params().documentClass();
1208 docstring const & format =
1209 from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_), false));
1211 ret = expandFormat(format, xrefs, counter, buf, ci, false, false);
1212 } else if (key == "textbefore")
1213 ret = ci.textBefore;
1214 else if (key == "textafter")
1216 else if (key == "curpretext") {
1217 vector<pair<docstring, docstring>> pres = ci.getPretexts();
1218 vector<pair<docstring, docstring>>::iterator it = pres.begin();
1220 for (; it != pres.end() ; ++it) {
1221 if ((*it).first == bib_key_ && numkey == num_bib_key_) {
1226 if ((*it).first == bib_key_)
1229 } else if (key == "curposttext") {
1230 vector<pair<docstring, docstring>> posts = ci.getPosttexts();
1231 vector<pair<docstring, docstring>>::iterator it = posts.begin();
1233 for (; it != posts.end() ; ++it) {
1234 if ((*it).first == bib_key_ && numkey == num_bib_key_) {
1239 if ((*it).first == bib_key_)
1242 } else if (key == "year")
1247 ret = xml::cleanAttr(ret);
1249 // make sure it is not too big
1250 support::truncateWithEllipsis(ret, maxsize);
1255 //////////////////////////////////////////////////////////////////////
1259 //////////////////////////////////////////////////////////////////////
1263 // A functor for use with sort, leading to case insensitive sorting
1264 bool compareNoCase(const docstring & a, const docstring & b) {
1265 return compare_no_case(a, b) < 0;
1271 vector<docstring> const BiblioInfo::getXRefs(BibTeXInfo const & data, bool const nested) const
1273 vector<docstring> result;
1274 if (!data.isBibTeX())
1276 // Legacy crossref field. This is not nestable.
1277 if (!nested && !data["crossref"].empty()) {
1278 docstring const xrefkey = data["crossref"];
1279 result.push_back(xrefkey);
1280 // However, check for nested xdatas
1281 BiblioInfo::const_iterator it = find(xrefkey);
1283 BibTeXInfo const & xref = it->second;
1284 vector<docstring> const nxdata = getXRefs(xref, true);
1285 if (!nxdata.empty())
1286 result.insert(result.end(), nxdata.begin(), nxdata.end());
1289 // Biblatex's xdata field. Infinitely nestable.
1290 // XData field can consist of a comma-separated list of keys
1291 vector<docstring> const xdatakeys = getVectorFromString(data["xdata"]);
1292 if (!xdatakeys.empty()) {
1293 for (auto const & xdatakey : xdatakeys) {
1294 result.push_back(xdatakey);
1295 BiblioInfo::const_iterator it = find(xdatakey);
1297 BibTeXInfo const & xdata = it->second;
1298 vector<docstring> const nxdata = getXRefs(xdata, true);
1299 if (!nxdata.empty())
1300 result.insert(result.end(), nxdata.begin(), nxdata.end());
1308 vector<docstring> const BiblioInfo::getKeys() const
1310 vector<docstring> bibkeys;
1311 for (auto const & bi : *this)
1312 bibkeys.push_back(bi.first);
1313 sort(bibkeys.begin(), bibkeys.end(), &compareNoCase);
1318 vector<docstring> const BiblioInfo::getFields() const
1320 vector<docstring> bibfields;
1321 for (auto const & fn : field_names_)
1322 bibfields.push_back(fn);
1323 sort(bibfields.begin(), bibfields.end());
1328 vector<docstring> const BiblioInfo::getEntries() const
1330 vector<docstring> bibentries;
1331 for (auto const & et : entry_types_)
1332 bibentries.push_back(et);
1333 sort(bibentries.begin(), bibentries.end());
1338 docstring const BiblioInfo::getAuthorOrEditorList(docstring const & key, Buffer const & buf) const
1340 BiblioInfo::const_iterator it = find(key);
1343 BibTeXInfo const & data = it->second;
1344 return data.getAuthorOrEditorList(&buf, false);
1348 docstring const BiblioInfo::getCiteNumber(docstring const & key) const
1350 BiblioInfo::const_iterator it = find(key);
1353 BibTeXInfo const & data = it->second;
1354 return data.citeNumber();
1357 void BiblioInfo::getLocators(docstring const & key, docstring & doi, docstring & url, docstring & file) const
1359 BiblioInfo::const_iterator it = find(key);
1362 BibTeXInfo const & data = it->second;
1363 data.getLocators(doi,url,file);
1367 docstring const BiblioInfo::getYear(docstring const & key, bool use_modifier) const
1369 BiblioInfo::const_iterator it = find(key);
1372 BibTeXInfo const & data = it->second;
1373 docstring year = data.getYear();
1375 // let's try the crossrefs
1376 vector<docstring> const xrefs = getXRefs(data);
1380 for (docstring const & xref : xrefs) {
1381 BiblioInfo::const_iterator const xrefit = find(xref);
1382 if (xrefit == end())
1384 BibTeXInfo const & xref_data = xrefit->second;
1385 year = xref_data.getYear();
1391 if (use_modifier && data.modifier() != 0)
1392 year += data.modifier();
1397 docstring const BiblioInfo::getYear(docstring const & key, Buffer const & buf, bool use_modifier) const
1399 docstring const year = getYear(key, use_modifier);
1401 return buf.B_("No year");
1406 docstring const BiblioInfo::getInfo(docstring const & key,
1407 Buffer const & buf, CiteItem const & ci, docstring const & format) const
1409 BiblioInfo::const_iterator it = find(key);
1411 return _("Bibliography entry not found!");
1412 BibTeXInfo const & data = it->second;
1413 BibTeXInfoList xrefptrs;
1414 for (docstring const & xref : getXRefs(data)) {
1415 BiblioInfo::const_iterator const xrefit = find(xref);
1416 if (xrefit != end())
1417 xrefptrs.push_back(&(xrefit->second));
1419 return data.getInfo(xrefptrs, buf, ci, format);
1423 docstring const BiblioInfo::getLabel(vector<docstring> keys,
1424 Buffer const & buf, string const & style, CiteItem const & ci) const
1426 size_t max_size = ci.max_size;
1427 // shorter makes no sense
1428 LASSERT(max_size >= 16, max_size = 16);
1430 // we can't display more than 10 of these, anyway
1431 // but since we truncate in the middle,
1432 // we need to split into two halfs.
1433 bool const too_many_keys = keys.size() > 10;
1434 vector<docstring> lkeys;
1435 if (too_many_keys) {
1436 lkeys.insert(lkeys.end(), keys.end() - 5, keys.end());
1438 keys.insert(keys.end(), lkeys.begin(), lkeys.end());
1441 CiteEngineType const engine_type = buf.params().citeEngineType();
1442 DocumentClass const & dc = buf.params().documentClass();
1443 docstring const & format = from_utf8(dc.getCiteFormat(engine_type, style, false, "cite"));
1444 docstring ret = format;
1445 vector<docstring>::const_iterator key = keys.begin();
1446 vector<docstring>::const_iterator ken = keys.end();
1447 vector<docstring> handled_keys;
1448 for (int i = 0; key != ken; ++key, ++i) {
1449 handled_keys.push_back(*key);
1451 for (auto const & k : handled_keys) {
1455 BiblioInfo::const_iterator it = find(*key);
1456 BibTeXInfo empty_data;
1457 empty_data.key(*key);
1458 BibTeXInfo & data = empty_data;
1459 vector<BibTeXInfo const *> xrefptrs;
1462 for (docstring const & xref : getXRefs(data)) {
1463 BiblioInfo::const_iterator const xrefit = find(xref);
1464 if (xrefit != end())
1465 xrefptrs.push_back(&(xrefit->second));
1469 ret = data.getLabel(xrefptrs, buf, ret, ci, key + 1 != ken, i == 1);
1472 support::truncateWithEllipsis(ret, max_size, true);
1478 bool BiblioInfo::isBibtex(docstring const & key) const
1481 split(key, key1, ',');
1482 BiblioInfo::const_iterator it = find(key1);
1485 return it->second.isBibTeX();
1489 BiblioInfo::CiteStringMap const BiblioInfo::getCiteStrings(
1490 vector<docstring> const & keys, vector<CitationStyle> const & styles,
1491 Buffer const & buf, CiteItem const & ci) const
1494 return vector<pair<docstring,docstring>>();
1497 CiteStringMap csm(styles.size());
1498 for (size_t i = 0; i != csm.size(); ++i) {
1499 style = styles[i].name;
1500 csm[i] = make_pair(from_ascii(style), getLabel(keys, buf, style, ci));
1507 void BiblioInfo::mergeBiblioInfo(BiblioInfo const & info)
1509 bimap_.insert(info.begin(), info.end());
1510 field_names_.insert(info.field_names_.begin(), info.field_names_.end());
1511 entry_types_.insert(info.entry_types_.begin(), info.entry_types_.end());
1517 // used in xhtml to sort a list of BibTeXInfo objects
1518 bool lSorter(BibTeXInfo const * lhs, BibTeXInfo const * rhs)
1520 docstring const lauth = lhs->getAuthorOrEditorList();
1521 docstring const rauth = rhs->getAuthorOrEditorList();
1522 docstring const lyear = lhs->getYear();
1523 docstring const ryear = rhs->getYear();
1524 docstring const ltitl = lhs->operator[]("title");
1525 docstring const rtitl = rhs->operator[]("title");
1526 return (lauth < rauth)
1527 || (lauth == rauth && lyear < ryear)
1528 || (lauth == rauth && lyear == ryear && ltitl < rtitl);
1534 void BiblioInfo::collectCitedEntries(Buffer const & buf)
1536 cited_entries_.clear();
1537 // We are going to collect all the citation keys used in the document,
1538 // getting them from the TOC.
1539 // FIXME We may want to collect these differently, in the first case,
1540 // so that we might have them in order of appearance.
1541 set<docstring> citekeys;
1542 Toc const & toc = *buf.tocBackend().toc("citation");
1543 for (auto const & t : toc) {
1544 if (t.str().empty())
1546 vector<docstring> const keys = getVectorFromString(t.str());
1547 citekeys.insert(keys.begin(), keys.end());
1549 if (citekeys.empty())
1552 // We have a set of the keys used in this document.
1553 // We will now convert it to a list of the BibTeXInfo objects used in
1555 vector<BibTeXInfo const *> bi;
1556 for (auto const & ck : citekeys) {
1557 BiblioInfo::const_iterator const bt = find(ck);
1558 if (bt == end() || !bt->second.isBibTeX())
1560 bi.push_back(&(bt->second));
1563 sort(bi.begin(), bi.end(), lSorter);
1565 // Now we can write the sorted keys
1566 // b is a BibTeXInfo const *
1567 for (auto const & b : bi)
1568 cited_entries_.push_back(b->key());
1572 void BiblioInfo::makeCitationLabels(Buffer const & buf)
1574 collectCitedEntries(buf);
1575 CiteEngineType const engine_type = buf.params().citeEngineType();
1576 bool const numbers = (engine_type & ENGINE_TYPE_NUMERICAL);
1580 // used to remember the last one we saw
1581 // we'll be comparing entries to see if we need to add
1582 // modifiers, like "1984a"
1583 map<docstring, BibTeXInfo>::iterator last = bimap_.end();
1585 // add letters to years
1586 for (auto const & ce : cited_entries_) {
1587 map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(ce);
1588 // this shouldn't happen, but...
1589 if (biit == bimap_.end())
1590 // ...fail gracefully, anyway.
1592 BibTeXInfo & entry = biit->second;
1594 docstring const num = convert<docstring>(++keynumber);
1595 entry.setCiteNumber(num);
1597 // The first test here is checking whether this is the first
1598 // time through the loop. If so, then we do not have anything
1599 // with which to compare.
1600 if (last != bimap_.end()
1601 && entry.getAuthorOrEditorList() == last->second.getAuthorOrEditorList()
1602 // we access the year via getYear() so as to get it from the xref,
1603 // if we need to do so
1604 && getYear(entry.key()) == getYear(last->second.key())) {
1605 if (modifier == 0) {
1606 // so the last one should have been 'a'
1607 last->second.setModifier('a');
1609 } else if (modifier == 'z')
1616 entry.setModifier(modifier);
1617 // remember the last one
1622 for (auto const & ce : cited_entries_) {
1623 map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(ce);
1624 // this shouldn't happen, but...
1625 if (biit == bimap_.end())
1626 // ...fail gracefully, anyway.
1628 BibTeXInfo & entry = biit->second;
1630 entry.label(entry.citeNumber());
1632 docstring const auth = entry.getAuthorOrEditorList(&buf, false);
1633 // we do it this way so as to access the xref, if necessary
1634 // note that this also gives us the modifier
1635 docstring const year = getYear(ce, buf, true);
1636 if (!auth.empty() && !year.empty())
1637 entry.label(auth + ' ' + year);
1639 entry.label(entry.key());
1645 //////////////////////////////////////////////////////////////////////
1649 //////////////////////////////////////////////////////////////////////
1652 CitationStyle citationStyleFromString(string const & command,
1653 BufferParams const & params)
1656 if (command.empty())
1659 string const alias = params.getCiteAlias(command);
1660 string cmd = alias.empty() ? command : alias;
1661 if (isUpperCase(command[0])) {
1662 cs.forceUpperCase = true;
1663 cmd[0] = lowercase(cmd[0]);
1666 size_t const n = command.size() - 1;
1667 if (command[n] == '*') {
1668 cs.hasStarredVersion = true;
1669 if (suffixIs(cmd, '*'))
1670 cmd = cmd.substr(0, cmd.size() - 1);
1678 string citationStyleToString(const CitationStyle & cs, bool const latex)
1680 string cmd = latex ? cs.cmd : cs.name;
1681 if (cs.forceUpperCase)
1682 cmd[0] = uppercase(cmd[0]);
1683 if (cs.hasStarredVersion)
1689 docstring authorsToDocBookAuthorGroup(docstring const & authorsString, XMLStream & xs, Buffer const & buf)
1691 // This function closely mimics getAuthorList, but produces DocBook instead of text.
1692 // It has been greatly simplified, as the complete list of authors is always produced. No separators are required,
1693 // as the output has a database-like shape.
1694 // constructName has also been merged within, as it becomes really simple and leads to no copy-paste.
1696 if (authorsString.empty()) {
1700 // Split the input list of authors into individual authors.
1701 vector<docstring> const authors = getAuthors(authorsString);
1703 // Retrieve the "et al." variation.
1704 string const etal = buf.params().documentClass().getCiteMacro(buf.params().citeEngineType(), "_etal");
1706 // Output the list of authors.
1707 xs << xml::StartTag("authorgroup");
1710 auto it = authors.cbegin();
1711 auto en = authors.cend();
1712 for (size_t i = 0; it != en; ++it, ++i) {
1713 xs << xml::StartTag("author");
1715 xs << xml::StartTag("personname");
1717 docstring name = *it;
1719 // All authors go in a <personname>. If more structure is known, use it; otherwise (just "et al."), print it as such.
1720 if (name == "others") {
1723 name_parts parts = nameParts(name);
1724 if (! parts.prefix.empty()) {
1725 xs << xml::StartTag("honorific");
1727 xs << xml::EndTag("honorific");
1730 if (! parts.prename.empty()) {
1731 xs << xml::StartTag("firstname");
1732 xs << parts.prename;
1733 xs << xml::EndTag("firstname");
1736 if (! parts.surname.empty()) {
1737 xs << xml::StartTag("surname");
1738 xs << parts.surname;
1739 xs << xml::EndTag("surname");
1742 if (! parts.suffix.empty()) {
1743 xs << xml::StartTag("othername", "role=\"suffix\"");
1745 xs << xml::EndTag("othername");
1750 xs << xml::EndTag("personname");
1752 xs << xml::EndTag("author");
1755 // Could add an affiliation after <personname>, but not stored in BibTeX.
1757 xs << xml::EndTag("authorgroup");