3 * This file is part of LyX, the document processor.
4 * Licence details can be found in the file COPYING.
6 * \author Angus Leeming
10 * \author Jürgen Spitzmüller
12 * Full author contact details are available in file CREDITS.
17 #include "BiblioInfo.h"
19 #include "BufferParams.h"
20 #include "buffer_funcs.h"
23 #include "InsetIterator.h"
26 #include "Paragraph.h"
27 #include "TextClass.h"
28 #include "TocBackend.h"
30 #include "support/convert.h"
31 #include "support/debug.h"
32 #include "support/docstream.h"
33 #include "support/FileName.h"
34 #include "support/gettext.h"
35 #include "support/lassert.h"
36 #include "support/lstrings.h"
37 #include "support/regex.h"
38 #include "support/textutils.h"
44 using namespace lyx::support;
51 // Remove placeholders from names
52 docstring renormalize(docstring const & input)
54 docstring res = subst(input, from_ascii("$$space!"), from_ascii(" "));
55 return subst(res, from_ascii("$$comma!"), from_ascii(","));
59 // Split the surname into prefix ("von-part") and family name
60 pair<docstring, docstring> parseSurname(docstring const & sname)
62 // Split the surname into its tokens
63 vector<docstring> pieces = getVectorFromString(sname, from_ascii(" "));
64 if (pieces.size() < 2)
65 return make_pair(docstring(), sname);
67 // Now we look for pieces that begin with a lower case letter.
68 // All except for the very last token constitute the "von-part".
70 vector<docstring>::const_iterator it = pieces.begin();
71 vector<docstring>::const_iterator const en = pieces.end();
73 for (; it != en; ++it) {
76 // If this is the last piece, then what we now have is
77 // the family name, notwithstanding the casing.
80 char_type const c = (*it)[0];
81 // If the piece starts with a upper case char, we assume
82 // this is part of the surname.
85 // Nothing of the former, so add this piece to the prename
93 // Reconstruct the family name.
94 // Note that if we left the loop with because it + 1 == en,
95 // then this will still do the right thing, i.e., make surname
96 // just be the last piece.
99 for (; it != en; ++it) {
106 return make_pair(prefix, surname);
118 // gets the name parts (prename, surname, prefix, suffix) from an author-type string
119 name_parts nameParts(docstring const & iname)
125 // First we check for goupings (via {...}) and replace blanks and
126 // commas inside groups with temporary placeholders
129 docstring::const_iterator p = iname.begin();
130 while (p != iname.end()) {
131 // count grouping level
136 // generate string with probable placeholders
137 if (*p == ' ' && gl > 0)
138 name += from_ascii("$$space!");
139 else if (*p == ',' && gl > 0)
140 name += from_ascii("$$comma!");
146 // Now we look for a comma, and take the last name to be everything
147 // preceding the right-most one, so that we also get the name suffix
149 vector<docstring> pieces = getVectorFromString(name);
150 if (pieces.size() > 1) {
151 // Whether we have a name suffix or not, the prename is
153 res.prename = renormalize(pieces.back());
154 // The family name, conversely, is always the first item.
155 // However, it might contain a prefix (aka "von" part)
156 docstring const sname = pieces.front();
157 res.prefix = renormalize(parseSurname(sname).first);
158 res.surname = renormalize(parseSurname(sname).second);
159 // If we have three pieces (the maximum allowed by BibTeX),
160 // the second one is the name suffix.
161 if (pieces.size() > 2)
162 res.suffix = renormalize(pieces.at(1));
166 // OK, so now we want to look for the last name.
167 // Split on spaces, to get various tokens.
168 pieces = getVectorFromString(name, from_ascii(" "));
169 // No space: Only a family name given
170 if (pieces.size() < 2) {
171 res.surname = renormalize(pieces.back());
174 // If we get two pieces, assume "prename surname"
175 if (pieces.size() == 2) {
176 res.prename = renormalize(pieces.front());
177 res.surname = renormalize(pieces.back());
181 // More than 3 pieces: A name prefix (aka "von" part) might be included.
182 // We look for the first piece that begins with a lower case letter
183 // (which is the name prefix, if it is not the last token) or the last token.
185 vector<docstring>::const_iterator it = pieces.begin();
186 vector<docstring>::const_iterator const en = pieces.end();
188 for (; it != en; ++it) {
191 char_type const c = (*it)[0];
192 // If the piece starts with a lower case char, we assume
193 // this is the name prefix and thus prename is complete.
196 // Same if this is the last piece, which is always the surname.
199 // Nothing of the former, so add this piece to the prename
207 // Now reconstruct the family name and strip the prefix.
208 // Note that if we left the loop because it + 1 == en,
209 // then this will still do the right thing, i.e., make surname
210 // just be the last piece.
213 for (; it != en; ++it) {
220 res.prename = renormalize(prename);
221 res.prefix = renormalize(parseSurname(surname).first);
222 res.surname = renormalize(parseSurname(surname).second);
227 docstring constructName(docstring const & name, string const & scheme)
229 // re-constructs a name from name parts according
231 docstring const prename = nameParts(name).prename;
232 docstring const surname = nameParts(name).surname;
233 docstring const prefix = nameParts(name).prefix;
234 docstring const suffix = nameParts(name).suffix;
236 static regex const reg1("(.*)(\\{%prename%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)");
237 static regex const reg2("(.*)(\\{%suffix%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)");
238 static regex const reg3("(.*)(\\{%prefix%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)");
240 // Changing the first parameter of regex_match() may corrupt the
241 // second one. In this case we use the temporary string tmp.
242 if (regex_match(scheme, sub, reg1)) {
244 if (!prename.empty())
248 if (regex_match(res, sub, reg2)) {
249 string tmp = sub.str(1);
252 res = tmp + sub.str(5);
254 if (regex_match(res, sub, reg3)) {
255 string tmp = sub.str(1);
258 res = tmp + sub.str(5);
260 docstring result = from_ascii(res);
261 result = subst(result, from_ascii("%prename%"), prename);
262 result = subst(result, from_ascii("%surname%"), surname);
263 result = subst(result, from_ascii("%prefix%"), prefix);
264 result = subst(result, from_ascii("%suffix%"), suffix);
269 vector<docstring> const getAuthors(docstring const & author)
271 // We check for goupings (via {...}) and only consider " and "
272 // outside groups as author separator. This is to account
273 // for cases such as {{Barnes and Noble, Inc.}}, which
274 // need to be treated as one single family name.
275 // We use temporary placeholders in order to differentiate the
276 // diverse " and " cases.
278 // First, we temporarily replace all ampersands. It is rather unusual
279 // in author names, but can happen (consider cases such as "C \& A Corp.").
280 docstring iname = subst(author, from_ascii("&"), from_ascii("$$amp!"));
281 // Then, we temporarily make all " and " strings to ampersands in order
282 // to handle them later on a per-char level.
283 iname = subst(iname, from_ascii(" and "), from_ascii(" & "));
284 // Now we traverse through the string and replace the "&" by the proper
285 // output in- and outside groups
288 docstring::const_iterator p = iname.begin();
289 while (p != iname.end()) {
290 // count grouping level
295 // generate string with probable placeholders
298 // Inside groups, we output "and"
299 name += from_ascii("and");
301 // Outside groups, we output a separator
302 name += from_ascii("$$namesep!");
309 // re-insert the literal ampersands
310 name = subst(name, from_ascii("$$amp!"), from_ascii("&"));
312 // Now construct the actual vector
313 return getVectorFromString(name, from_ascii(" $$namesep! "));
317 bool multipleAuthors(docstring const & author)
319 return getAuthors(author).size() > 1;
323 // converts a string containing LaTeX commands into unicode
325 docstring convertLaTeXCommands(docstring const & str)
330 bool scanning_cmd = false;
331 bool scanning_math = false;
332 bool escaped = false; // used to catch \$, etc.
333 while (!val.empty()) {
334 char_type const ch = val[0];
336 // if we're scanning math, we output everything until we
337 // find an unescaped $, at which point we break out.
344 scanning_math = false;
350 // if we're scanning a command name, then we just
351 // discard characters until we hit something that
354 if (isAlphaASCII(ch)) {
359 // so we're done with this command.
360 // now we fall through and check this character.
361 scanning_cmd = false;
364 // was the last character a \? If so, then this is something like:
365 // \\ or \$, so we'll just output it. That's probably not always right...
367 // exception: output \, as THIN SPACE
369 ret.push_back(0x2009);
380 scanning_math = true;
384 // Change text mode accents in the form
385 // {\v a} to \v{a} (see #9340).
386 // FIXME: This is a sort of mini-tex2lyx.
387 // Use the real tex2lyx instead!
388 static lyx::regex const tma_reg("^\\{\\\\[bcCdfGhHkrtuUv]\\s\\w\\}");
389 if (lyx::regex_search(to_utf8(val), tma_reg)) {
391 val.replace(2, 1, from_ascii("{"));
395 // Apart from the above, we just ignore braces
396 if (ch == '{' || ch == '}') {
401 // we're going to check things that look like commands, so if
402 // this doesn't, just output it.
409 // ok, could be a command of some sort
410 // let's see if it corresponds to some unicode
411 // unicodesymbols has things in the form: \"{u},
412 // whereas we may see things like: \"u. So we'll
413 // look for that and change it, if necessary.
414 // FIXME: This is a sort of mini-tex2lyx.
415 // Use the real tex2lyx instead!
416 static lyx::regex const reg("^\\\\\\W\\w");
417 if (lyx::regex_search(to_utf8(val), reg)) {
418 val.insert(3, from_ascii("}"));
419 val.insert(2, from_ascii("{"));
423 docstring const cnvtd = Encodings::fromLaTeXCommand(val,
424 Encodings::TEXT_CMD, termination, rem);
425 if (!cnvtd.empty()) {
426 // it did, so we'll take that bit and proceed with what's left
431 // it's a command of some sort
440 // Escape '<' and '>' and remove richtext markers (e.g. {!this is richtext!}) from a string.
441 docstring processRichtext(docstring const & str, bool richtext)
446 bool scanning_rich = false;
447 while (!val.empty()) {
448 char_type const ch = val[0];
449 if (ch == '{' && val.size() > 1 && val[1] == '!') {
450 // beginning of rich text
451 scanning_rich = true;
455 if (scanning_rich && ch == '!' && val.size() > 1 && val[1] == '}') {
457 scanning_rich = false;
465 // we need to escape '<' and '>'
473 } else if (!scanning_rich /* && !richtext */)
475 // else the character is discarded, which will happen only if
476 // richtext == false and we are scanning rich text
485 //////////////////////////////////////////////////////////////////////
489 //////////////////////////////////////////////////////////////////////
491 BibTeXInfo::BibTeXInfo(docstring const & key, docstring const & type)
492 : is_bibtex_(true), bib_key_(key), num_bib_key_(0), entry_type_(type),
493 info_(), format_(), modifier_(0)
498 docstring const BibTeXInfo::getAuthorOrEditorList(Buffer const * buf,
499 bool full, bool forceshort) const
501 docstring author = operator[]("author");
503 author = operator[]("editor");
505 return getAuthorList(buf, author, full, forceshort);
509 docstring const BibTeXInfo::getAuthorList(Buffer const * buf,
510 docstring const & author, bool const full, bool const forceshort,
511 bool const allnames, bool const beginning) const
513 // Maxnames treshold depend on engine
514 size_t maxnames = buf ?
515 buf->params().documentClass().max_citenames() : 2;
518 docstring const opt = label();
523 docstring const remainder = trim(split(opt, authors, '('));
524 if (remainder.empty())
525 // in this case, we didn't find a "(",
526 // so we don't have author (year)
529 // Natbib syntax is "Jones et al.(1990)Jones, Baker, and Williams"
530 docstring const fullauthors = trim(rsplit(remainder, ')'));
531 if (!fullauthors.empty())
540 // OK, we've got some names. Let's format them.
541 // Try to split the author list
542 vector<docstring> const authors = getAuthors(author);
546 CiteEngineType const engine_type = buf ? buf->params().citeEngineType()
547 : ENGINE_TYPE_DEFAULT;
549 // These are defined in the styles
551 buf ? buf->params().documentClass().getCiteMacro(engine_type, "B_etal")
553 string const namesep =
554 buf ? buf->params().documentClass().getCiteMacro(engine_type, "B_namesep")
556 string const lastnamesep =
557 buf ? buf->params().documentClass().getCiteMacro(engine_type, "B_lastnamesep")
559 string const pairnamesep =
560 buf ? buf->params().documentClass().getCiteMacro(engine_type, "B_pairnamesep")
562 string firstnameform =
563 buf ? buf->params().documentClass().getCiteMacro(engine_type, "!firstnameform")
564 : "{%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}{%prename%[[, %prename%]]}";
566 firstnameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!firstbynameform")
567 : "%prename% {%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}";
568 string othernameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!othernameform")
569 : "{%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}{%prename%[[, %prename%]]}";
571 othernameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!otherbynameform")
572 : "%prename% {%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}";
573 string citenameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!citenameform")
574 : "{%prefix%[[%prefix% ]]}%surname%";
576 // Shorten the list (with et al.) if forceshort is set
577 // and the list can actually be shortened, else if maxcitenames
578 // is passed and full is not set.
579 bool shorten = forceshort && authors.size() > 1;
580 vector<docstring>::const_iterator it = authors.begin();
581 vector<docstring>::const_iterator en = authors.end();
582 for (size_t i = 0; it != en; ++it, ++i) {
583 if (i >= maxnames && !full) {
587 if (*it == "others") {
588 retval += buf ? buf->B_(etal) : from_ascii(etal);
591 if (i > 0 && i == authors.size() - 1) {
592 if (authors.size() == 2)
593 retval += buf ? buf->B_(pairnamesep) : from_ascii(pairnamesep);
595 retval += buf ? buf->B_(lastnamesep) : from_ascii(lastnamesep);
597 retval += buf ? buf->B_(namesep) : from_ascii(namesep);
599 retval += (i == 0) ? constructName(*it, firstnameform)
600 : constructName(*it, othernameform);
602 retval += constructName(*it, citenameform);
606 retval = constructName(authors[0], firstnameform) + (buf ? buf->B_(etal) : from_ascii(etal));
608 retval = constructName(authors[0], citenameform) + (buf ? buf->B_(etal) : from_ascii(etal));
611 return convertLaTeXCommands(retval);
615 docstring const BibTeXInfo::getYear() const
618 // first try legacy year field
619 docstring year = operator[]("year");
622 // now try biblatex's date field
623 year = operator[]("date");
624 // Format is [-]YYYY-MM-DD*/[-]YYYY-MM-DD*
625 // We only want the years.
626 static regex const yreg("[-]?([\\d]{4}).*");
627 static regex const ereg(".*/[-]?([\\d]{4}).*");
629 string const date = to_utf8(year);
630 if (!regex_match(date, sm, yreg))
631 // cannot parse year.
633 year = from_ascii(sm[1]);
634 // check for an endyear
635 if (regex_match(date, sm, ereg))
636 year += char_type(0x2013) + from_ascii(sm[1]);
640 docstring const opt = label();
645 docstring tmp = split(opt, authors, '(');
647 // we don't have author (year)
650 tmp = split(tmp, year, ')');
655 void BibTeXInfo::getLocators(docstring & doi, docstring & url, docstring & file) const
658 // get "doi" entry from citation record
659 doi = operator[]("doi");
660 if (!doi.empty() && !prefixIs(doi,from_ascii("http")))
661 doi = "https://doi.org/" + doi;
662 // get "url" entry from citation record
663 url = operator[]("url");
664 // get "file" entry from citation record
665 file = operator[]("file");
667 // Jabref case, field has a format:
668 // Description:Location:Filetype;Description:Location:Filetype...
669 // We will grab only first pdf
671 docstring ret, filedest, tmp;
672 ret = split(file, tmp, ':');
673 tmp = split(ret, filedest, ':');
674 //TODO howto deal with relative directories?
675 FileName f(to_utf8(filedest));
677 file = "file:///" + filedest;
680 // kbibtex case, format:
681 // file1.pdf;file2.pdf
682 // We will grab only first pdf
685 kfile = operator[]("localfile");
686 if (!kfile.empty()) {
687 docstring filedest, tmp;
688 tmp = split(kfile, filedest, ';');
689 //TODO howto deal with relative directories?
690 FileName f(to_utf8(filedest));
692 file = "file:///" + filedest;
698 // try biblatex specific fields, see its manual
699 // 3.13.7 "Electronic Publishing Informationl"
700 docstring eprinttype = operator[]("eprinttype");
701 docstring eprint = operator[]("eprint");
705 if (eprinttype == "arxiv")
706 url = "https://arxiv.org/abs/" + eprint;
707 if (eprinttype == "jstor")
708 url = "https://www.jstor.org/stable/" + eprint;
709 if (eprinttype == "pubmed")
710 url = "http://www.ncbi.nlm.nih.gov/pubmed/" + eprint;
711 if (eprinttype == "hdl")
712 url = "https://hdl.handle.net/" + eprint;
713 if (eprinttype == "googlebooks")
714 url = "http://books.google.com/books?id=" + eprint;
719 // Here can be handled the bibliography environment. All one could do
720 // here is let LyX scan the entry for URL or HRef insets.
726 docstring parseOptions(docstring const & format, string & optkey,
727 docstring & ifpart, docstring & elsepart);
729 // Calls parseOptions to deal with an embedded option, such as:
730 // {%number%[[, no.~%number%]]}
731 // which must appear at the start of format. ifelsepart gets the
732 // whole of the option, and we return what's left after the option.
733 // we return format if there is an error.
734 docstring parseEmbeddedOption(docstring const & format, docstring & ifelsepart)
736 LASSERT(format[0] == '{' && format[1] == '%', return format);
740 docstring const rest = parseOptions(format, optkey, ifpart, elsepart);
741 if (format == rest) { // parse error
742 LYXERR0("ERROR! Couldn't parse `" << format <<"'.");
745 LASSERT(rest.size() <= format.size(),
746 { ifelsepart = docstring(); return format; });
747 ifelsepart = format.substr(0, format.size() - rest.size());
752 // Gets a "clause" from a format string, where the clause is
753 // delimited by '[[' and ']]'. Returns what is left after the
754 // clause is removed, and returns format if there is an error.
755 docstring getClause(docstring const & format, docstring & clause)
757 docstring fmt = format;
760 // we'll remove characters from the front of fmt as we
762 while (!fmt.empty()) {
763 if (fmt[0] == ']' && fmt.size() > 1 && fmt[1] == ']') {
768 // check for an embedded option
769 if (fmt[0] == '{' && fmt.size() > 1 && fmt[1] == '%') {
771 docstring const rest = parseEmbeddedOption(fmt, part);
773 LYXERR0("ERROR! Couldn't parse embedded option in `" << format <<"'.");
778 } else { // it's just a normal character
787 // parse an options string, which must appear at the start of the
788 // format parameter. puts the parsed bits in optkey, ifpart, and
789 // elsepart and returns what's left after the option is removed.
790 // if there's an error, it returns format itself.
791 docstring parseOptions(docstring const & format, string & optkey,
792 docstring & ifpart, docstring & elsepart)
794 LASSERT(format[0] == '{' && format[1] == '%', return format);
796 docstring fmt = format.substr(2);
797 size_t pos = fmt.find('%'); // end of key
798 if (pos == string::npos) {
799 LYXERR0("Error parsing `" << format <<"'. Can't find end of key.");
802 optkey = to_utf8(fmt.substr(0, pos));
803 fmt = fmt.substr(pos + 1);
804 // [[format]] should be next
805 if (fmt[0] != '[' || fmt[1] != '[') {
806 LYXERR0("Error parsing `" << format <<"'. Can't find '[[' after key.");
810 docstring curfmt = fmt;
811 fmt = getClause(curfmt, ifpart);
813 LYXERR0("Error parsing `" << format <<"'. Couldn't get if clause.");
817 if (fmt[0] == '}') // we're done, no else clause
818 return fmt.substr(1);
820 // else part should follow
821 if (fmt[0] != '[' || fmt[1] != '[') {
822 LYXERR0("Error parsing `" << format <<"'. Can't find else clause.");
827 fmt = getClause(curfmt, elsepart);
829 if (fmt == curfmt || fmt[0] != '}') {
830 LYXERR0("Error parsing `" << format <<"'. Can't find end of option.");
833 return fmt.substr(1);
840 Bug #9131 revealed an oddity in how we are generating citation information
841 when more than one key is given. We end up building a longer and longer format
842 string as we go, which we then have to re-parse, over and over and over again,
843 rather than generating the information for the individual keys and then putting
844 all of that together. We do that to deal with the way separators work, from what
845 I can tell, but it still feels like a hack. Fixing this would require quite a
846 bit of work, however.
848 docstring BibTeXInfo::expandFormat(docstring const & format,
849 BibTeXInfoList const & xrefs, int & counter, Buffer const & buf,
850 CiteItem const & ci, bool next, bool second) const
852 // incorrect use of macros could put us in an infinite loop
853 static int const max_passes = 5000;
854 // the use of overly large keys can lead to performance problems, due
855 // to eventual attempts to convert LaTeX macros to unicode. See bug
856 // #8944. By default, the size is limited to 128 (in CiteItem), but
857 // for specific purposes (such as XHTML export), it needs to be enlarged
858 // This is perhaps not the best solution, but it will have to do for now.
859 size_t const max_keysize = ci.max_key_size;
860 odocstringstream ret; // return value
862 bool scanning_key = false;
863 bool scanning_rich = false;
865 CiteEngineType const engine_type = buf.params().citeEngineType();
866 docstring fmt = format;
867 // we'll remove characters from the front of fmt as we
869 while (!fmt.empty()) {
870 if (counter > max_passes) {
871 LYXERR0("Recursion limit reached while parsing `"
876 char_type thischar = fmt[0];
877 if (thischar == '%') {
878 // beginning or end of key
881 scanning_key = false;
882 // so we replace the key with its value, which may be empty
886 buf.params().documentClass().getCiteMacro(engine_type, key);
887 fmt = from_utf8(val) + fmt.substr(1);
890 } else if (prefixIs(key, "B_")) {
891 // a translatable bit (to the Buffer language)
893 buf.params().documentClass().getCiteMacro(engine_type, key);
894 docstring const trans =
895 translateIfPossible(from_utf8(val), buf.params().language->code());
897 } else if (key[0] == '_') {
898 // a translatable bit (to the GUI language)
900 buf.params().documentClass().getCiteMacro(engine_type, key);
901 docstring const trans =
902 translateIfPossible(from_utf8(val));
905 docstring const val =
906 getValueForKey(key, buf, ci, xrefs, max_keysize);
908 ret << from_ascii("{!<span class=\"bib-" + key + "\">!}");
911 ret << from_ascii("{!</span>!}");
919 else if (thischar == '{') {
920 // beginning of option?
922 LYXERR0("ERROR: Found `{' when scanning key in `" << format << "'.");
925 if (fmt.size() > 1) {
927 // it is the beginning of an optional format
931 docstring const newfmt =
932 parseOptions(fmt, optkey, ifpart, elsepart);
933 if (newfmt == fmt) // parse error
936 docstring const val =
937 getValueForKey(optkey, buf, ci, xrefs);
938 if (optkey == "next" && next)
939 ret << ifpart; // without expansion
940 else if (optkey == "second" && second) {
942 ret << expandFormat(ifpart, xrefs, newcounter, buf,
944 } else if (!val.empty()) {
946 ret << expandFormat(ifpart, xrefs, newcounter, buf,
948 } else if (!elsepart.empty()) {
950 ret << expandFormat(elsepart, xrefs, newcounter, buf,
953 // fmt will have been shortened for us already
957 // beginning of rich text
958 scanning_rich = true;
960 ret << from_ascii("{!");
964 // we are here if '{' was not followed by % or !.
965 // So it's just a character.
968 else if (scanning_rich && thischar == '!'
969 && fmt.size() > 1 && fmt[1] == '}') {
971 scanning_rich = false;
973 ret << from_ascii("!}");
976 else if (scanning_key)
977 key += char(thischar);
981 } catch (EncodingException & /* e */) {
982 LYXERR0("Uncodable character '" << docstring(1, thischar) << " in citation label!");
988 LYXERR0("Never found end of key in `" << format << "'!");
992 LYXERR0("Never found end of rich text in `" << format << "'!");
999 docstring const & BibTeXInfo::getInfo(BibTeXInfoList const & xrefs,
1000 Buffer const & buf, CiteItem const & ci, docstring const & format_in) const
1002 bool const richtext = ci.richtext;
1004 CiteEngineType const engine_type = buf.params().citeEngineType();
1005 DocumentClass const & dc = buf.params().documentClass();
1006 docstring const & format = format_in.empty()?
1007 from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_)))
1010 if (format != format_) {
1011 // clear caches since format changed
1013 info_richtext_.clear();
1017 if (!richtext && !info_.empty()) {
1018 info_ = convertLaTeXCommands(processRichtext(info_, false));
1021 if (richtext && !info_richtext_.empty())
1022 return info_richtext_;
1025 BibTeXInfo::const_iterator it = find(from_ascii("ref"));
1031 info_ = expandFormat(format, xrefs, counter, buf,
1034 if (info_.empty()) {
1035 // this probably shouldn't happen
1040 info_richtext_ = convertLaTeXCommands(processRichtext(info_, true));
1041 return info_richtext_;
1044 info_ = convertLaTeXCommands(processRichtext(info_, false));
1049 docstring const BibTeXInfo::getLabel(BibTeXInfoList const & xrefs,
1050 Buffer const & buf, docstring const & format,
1051 CiteItem const & ci, bool next, bool second) const
1056 loclabel = expandFormat(format, xrefs, counter, buf, ci, next, second);
1058 if (!loclabel.empty() && !next) {
1059 loclabel = processRichtext(loclabel, ci.richtext);
1060 loclabel = convertLaTeXCommands(loclabel);
1067 docstring const & BibTeXInfo::operator[](docstring const & field) const
1069 BibTeXInfo::const_iterator it = find(field);
1072 static docstring const empty_value = docstring();
1077 docstring const & BibTeXInfo::operator[](string const & field) const
1079 return operator[](from_ascii(field));
1083 docstring BibTeXInfo::getValueForKey(string const & oldkey, Buffer const & buf,
1084 CiteItem const & ci, BibTeXInfoList const & xrefs, size_t maxsize) const
1086 // anything less is pointless
1087 LASSERT(maxsize >= 16, maxsize = 16);
1088 string key = oldkey;
1089 bool cleanit = false;
1090 if (prefixIs(oldkey, "clean:")) {
1091 key = oldkey.substr(6);
1095 docstring ret = operator[](key);
1096 if (ret.empty() && !xrefs.empty()) {
1097 // xr is a (reference to a) BibTeXInfo const *
1098 for (auto const & xr : xrefs) {
1099 if (xr && !(*xr)[key].empty()) {
1106 // some special keys
1107 // FIXME: dialog, textbefore and textafter have nothing to do with this
1108 if (key == "dialog" && ci.context == CiteItem::Dialog)
1109 ret = from_ascii("x"); // any non-empty string will do
1110 else if (key == "export" && ci.context == CiteItem::Export)
1111 ret = from_ascii("x"); // any non-empty string will do
1112 else if (key == "ifstar" && ci.Starred)
1113 ret = from_ascii("x"); // any non-empty string will do
1114 else if (key == "ifqualified" && ci.isQualified)
1115 ret = from_ascii("x"); // any non-empty string will do
1116 else if (key == "entrytype")
1118 else if (prefixIs(key, "ifentrytype:")
1119 && from_ascii(key.substr(12)) == entry_type_)
1120 ret = from_ascii("x"); // any non-empty string will do
1121 else if (key == "key")
1123 else if (key == "label")
1125 else if (key == "modifier" && modifier_ != 0)
1127 else if (key == "numericallabel")
1129 else if (prefixIs(key, "ifmultiple:")) {
1130 // Return whether we have multiple authors
1131 docstring const kind = operator[](from_ascii(key.substr(11)));
1132 if (multipleAuthors(kind))
1133 ret = from_ascii("x"); // any non-empty string will do
1135 else if (prefixIs(key, "abbrvnames:")) {
1136 // Special key to provide abbreviated name list,
1137 // with respect to maxcitenames. Suitable for Bibliography
1139 docstring const kind = operator[](from_ascii(key.substr(11)));
1140 ret = getAuthorList(&buf, kind, false, false, true);
1141 if (ci.forceUpperCase && isLowerCase(ret[0]))
1142 ret[0] = uppercase(ret[0]);
1143 } else if (prefixIs(key, "fullnames:")) {
1144 // Return a full name list. Suitable for Bibliography
1146 docstring const kind = operator[](from_ascii(key.substr(10)));
1147 ret = getAuthorList(&buf, kind, true, false, true);
1148 if (ci.forceUpperCase && isLowerCase(ret[0]))
1149 ret[0] = uppercase(ret[0]);
1150 } else if (prefixIs(key, "forceabbrvnames:")) {
1151 // Special key to provide abbreviated name lists,
1152 // irrespective of maxcitenames. Suitable for Bibliography
1154 docstring const kind = operator[](from_ascii(key.substr(15)));
1155 ret = getAuthorList(&buf, kind, false, true, true);
1156 if (ci.forceUpperCase && isLowerCase(ret[0]))
1157 ret[0] = uppercase(ret[0]);
1158 } else if (prefixIs(key, "abbrvbynames:")) {
1159 // Special key to provide abbreviated name list,
1160 // with respect to maxcitenames. Suitable for further names inside a
1161 // bibliography item // (such as "ed. by ...")
1162 docstring const kind = operator[](from_ascii(key.substr(11)));
1163 ret = getAuthorList(&buf, kind, false, false, true, false);
1164 if (ci.forceUpperCase && isLowerCase(ret[0]))
1165 ret[0] = uppercase(ret[0]);
1166 } else if (prefixIs(key, "fullbynames:")) {
1167 // Return a full name list. Suitable for further names inside a
1168 // bibliography item // (such as "ed. by ...")
1169 docstring const kind = operator[](from_ascii(key.substr(10)));
1170 ret = getAuthorList(&buf, kind, true, false, true, false);
1171 if (ci.forceUpperCase && isLowerCase(ret[0]))
1172 ret[0] = uppercase(ret[0]);
1173 } else if (prefixIs(key, "forceabbrvbynames:")) {
1174 // Special key to provide abbreviated name lists,
1175 // irrespective of maxcitenames. Suitable for further names inside a
1176 // bibliography item // (such as "ed. by ...")
1177 docstring const kind = operator[](from_ascii(key.substr(15)));
1178 ret = getAuthorList(&buf, kind, false, true, true, false);
1179 if (ci.forceUpperCase && isLowerCase(ret[0]))
1180 ret[0] = uppercase(ret[0]);
1181 } else if (key == "abbrvciteauthor") {
1182 // Special key to provide abbreviated author or
1183 // editor names (suitable for citation labels),
1184 // with respect to maxcitenames.
1185 ret = getAuthorOrEditorList(&buf, false, false);
1186 if (ci.forceUpperCase && isLowerCase(ret[0]))
1187 ret[0] = uppercase(ret[0]);
1188 } else if (key == "fullciteauthor") {
1189 // Return a full author or editor list (for citation labels)
1190 ret = getAuthorOrEditorList(&buf, true, false);
1191 if (ci.forceUpperCase && isLowerCase(ret[0]))
1192 ret[0] = uppercase(ret[0]);
1193 } else if (key == "forceabbrvciteauthor") {
1194 // Special key to provide abbreviated author or
1195 // editor names (suitable for citation labels),
1196 // irrespective of maxcitenames.
1197 ret = getAuthorOrEditorList(&buf, false, true);
1198 if (ci.forceUpperCase && isLowerCase(ret[0]))
1199 ret[0] = uppercase(ret[0]);
1200 } else if (key == "bibentry") {
1201 // Special key to provide the full bibliography entry: see getInfo()
1202 CiteEngineType const engine_type = buf.params().citeEngineType();
1203 DocumentClass const & dc = buf.params().documentClass();
1204 docstring const & format =
1205 from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_), false));
1207 ret = expandFormat(format, xrefs, counter, buf, ci, false, false);
1208 } else if (key == "textbefore")
1209 ret = ci.textBefore;
1210 else if (key == "textafter")
1212 else if (key == "curpretext") {
1213 vector<pair<docstring, docstring>> pres = ci.getPretexts();
1214 vector<pair<docstring, docstring>>::iterator it = pres.begin();
1216 for (; it != pres.end() ; ++it) {
1217 if ((*it).first == bib_key_ && numkey == num_bib_key_) {
1222 if ((*it).first == bib_key_)
1225 } else if (key == "curposttext") {
1226 vector<pair<docstring, docstring>> posts = ci.getPosttexts();
1227 vector<pair<docstring, docstring>>::iterator it = posts.begin();
1229 for (; it != posts.end() ; ++it) {
1230 if ((*it).first == bib_key_ && numkey == num_bib_key_) {
1235 if ((*it).first == bib_key_)
1238 } else if (key == "year")
1243 ret = xml::cleanAttr(ret);
1245 // make sure it is not too big
1246 support::truncateWithEllipsis(ret, maxsize);
1251 //////////////////////////////////////////////////////////////////////
1255 //////////////////////////////////////////////////////////////////////
1259 // A functor for use with sort, leading to case insensitive sorting
1260 bool compareNoCase(const docstring & a, const docstring & b) {
1261 return compare_no_case(a, b) < 0;
1267 vector<docstring> const BiblioInfo::getXRefs(BibTeXInfo const & data, bool const nested) const
1269 vector<docstring> result;
1270 if (!data.isBibTeX())
1272 // Legacy crossref field. This is not nestable.
1273 if (!nested && !data["crossref"].empty()) {
1274 docstring const xrefkey = data["crossref"];
1275 result.push_back(xrefkey);
1276 // However, check for nested xdatas
1277 BiblioInfo::const_iterator it = find(xrefkey);
1279 BibTeXInfo const & xref = it->second;
1280 vector<docstring> const nxdata = getXRefs(xref, true);
1281 if (!nxdata.empty())
1282 result.insert(result.end(), nxdata.begin(), nxdata.end());
1285 // Biblatex's xdata field. Infinitely nestable.
1286 // XData field can consist of a comma-separated list of keys
1287 vector<docstring> const xdatakeys = getVectorFromString(data["xdata"]);
1288 if (!xdatakeys.empty()) {
1289 for (auto const & xdatakey : xdatakeys) {
1290 result.push_back(xdatakey);
1291 BiblioInfo::const_iterator it = find(xdatakey);
1293 BibTeXInfo const & xdata = it->second;
1294 vector<docstring> const nxdata = getXRefs(xdata, true);
1295 if (!nxdata.empty())
1296 result.insert(result.end(), nxdata.begin(), nxdata.end());
1304 vector<docstring> const BiblioInfo::getKeys() const
1306 vector<docstring> bibkeys;
1307 for (auto const & bi : *this)
1308 bibkeys.push_back(bi.first);
1309 sort(bibkeys.begin(), bibkeys.end(), &compareNoCase);
1314 vector<docstring> const BiblioInfo::getFields() const
1316 vector<docstring> bibfields;
1317 for (auto const & fn : field_names_)
1318 bibfields.push_back(fn);
1319 sort(bibfields.begin(), bibfields.end());
1324 vector<docstring> const BiblioInfo::getEntries() const
1326 vector<docstring> bibentries;
1327 for (auto const & et : entry_types_)
1328 bibentries.push_back(et);
1329 sort(bibentries.begin(), bibentries.end());
1334 docstring const BiblioInfo::getAuthorOrEditorList(docstring const & key, Buffer const & buf) const
1336 BiblioInfo::const_iterator it = find(key);
1339 BibTeXInfo const & data = it->second;
1340 return data.getAuthorOrEditorList(&buf, false);
1344 docstring const BiblioInfo::getCiteNumber(docstring const & key) const
1346 BiblioInfo::const_iterator it = find(key);
1349 BibTeXInfo const & data = it->second;
1350 return data.citeNumber();
1353 void BiblioInfo::getLocators(docstring const & key, docstring & doi, docstring & url, docstring & file) const
1355 BiblioInfo::const_iterator it = find(key);
1358 BibTeXInfo const & data = it->second;
1359 data.getLocators(doi,url,file);
1363 docstring const BiblioInfo::getYear(docstring const & key, bool use_modifier) const
1365 BiblioInfo::const_iterator it = find(key);
1368 BibTeXInfo const & data = it->second;
1369 docstring year = data.getYear();
1371 // let's try the crossrefs
1372 vector<docstring> const xrefs = getXRefs(data);
1376 for (docstring const & xref : xrefs) {
1377 BiblioInfo::const_iterator const xrefit = find(xref);
1378 if (xrefit == end())
1380 BibTeXInfo const & xref_data = xrefit->second;
1381 year = xref_data.getYear();
1387 if (use_modifier && data.modifier() != 0)
1388 year += data.modifier();
1393 docstring const BiblioInfo::getYear(docstring const & key, Buffer const & buf, bool use_modifier) const
1395 docstring const year = getYear(key, use_modifier);
1397 return buf.B_("No year");
1402 docstring const BiblioInfo::getInfo(docstring const & key,
1403 Buffer const & buf, CiteItem const & ci, docstring const & format) const
1405 BiblioInfo::const_iterator it = find(key);
1407 return docstring(_("Bibliography entry not found!"));
1408 BibTeXInfo const & data = it->second;
1409 BibTeXInfoList xrefptrs;
1410 for (docstring const & xref : getXRefs(data)) {
1411 BiblioInfo::const_iterator const xrefit = find(xref);
1412 if (xrefit != end())
1413 xrefptrs.push_back(&(xrefit->second));
1415 return data.getInfo(xrefptrs, buf, ci, format);
1419 docstring const BiblioInfo::getLabel(vector<docstring> keys,
1420 Buffer const & buf, string const & style, CiteItem const & ci) const
1422 size_t max_size = ci.max_size;
1423 // shorter makes no sense
1424 LASSERT(max_size >= 16, max_size = 16);
1426 // we can't display more than 10 of these, anyway
1427 // but since we truncate in the middle,
1428 // we need to split into two halfs.
1429 bool const too_many_keys = keys.size() > 10;
1430 vector<docstring> lkeys;
1431 if (too_many_keys) {
1432 lkeys.insert(lkeys.end(), keys.end() - 5, keys.end());
1434 keys.insert(keys.end(), lkeys.begin(), lkeys.end());
1437 CiteEngineType const engine_type = buf.params().citeEngineType();
1438 DocumentClass const & dc = buf.params().documentClass();
1439 docstring const & format = from_utf8(dc.getCiteFormat(engine_type, style, false, "cite"));
1440 docstring ret = format;
1441 vector<docstring>::const_iterator key = keys.begin();
1442 vector<docstring>::const_iterator ken = keys.end();
1443 vector<docstring> handled_keys;
1444 for (int i = 0; key != ken; ++key, ++i) {
1445 handled_keys.push_back(*key);
1447 for (auto const & k : handled_keys) {
1451 BiblioInfo::const_iterator it = find(*key);
1452 BibTeXInfo empty_data;
1453 empty_data.key(*key);
1454 BibTeXInfo & data = empty_data;
1455 vector<BibTeXInfo const *> xrefptrs;
1458 for (docstring const & xref : getXRefs(data)) {
1459 BiblioInfo::const_iterator const xrefit = find(xref);
1460 if (xrefit != end())
1461 xrefptrs.push_back(&(xrefit->second));
1465 ret = data.getLabel(xrefptrs, buf, ret, ci, key + 1 != ken, i == 1);
1468 support::truncateWithEllipsis(ret, max_size, true);
1474 bool BiblioInfo::isBibtex(docstring const & key) const
1477 split(key, key1, ',');
1478 BiblioInfo::const_iterator it = find(key1);
1481 return it->second.isBibTeX();
1485 BiblioInfo::CiteStringMap const BiblioInfo::getCiteStrings(
1486 vector<docstring> const & keys, vector<CitationStyle> const & styles,
1487 Buffer const & buf, CiteItem const & ci) const
1490 return vector<pair<docstring,docstring>>();
1493 CiteStringMap csm(styles.size());
1494 for (size_t i = 0; i != csm.size(); ++i) {
1495 style = styles[i].name;
1496 csm[i] = make_pair(from_ascii(style), getLabel(keys, buf, style, ci));
1503 void BiblioInfo::mergeBiblioInfo(BiblioInfo const & info)
1505 bimap_.insert(info.begin(), info.end());
1506 field_names_.insert(info.field_names_.begin(), info.field_names_.end());
1507 entry_types_.insert(info.entry_types_.begin(), info.entry_types_.end());
1513 // used in xhtml to sort a list of BibTeXInfo objects
1514 bool lSorter(BibTeXInfo const * lhs, BibTeXInfo const * rhs)
1516 docstring const lauth = lhs->getAuthorOrEditorList();
1517 docstring const rauth = rhs->getAuthorOrEditorList();
1518 docstring const lyear = lhs->getYear();
1519 docstring const ryear = rhs->getYear();
1520 docstring const ltitl = lhs->operator[]("title");
1521 docstring const rtitl = rhs->operator[]("title");
1522 return (lauth < rauth)
1523 || (lauth == rauth && lyear < ryear)
1524 || (lauth == rauth && lyear == ryear && ltitl < rtitl);
1530 void BiblioInfo::collectCitedEntries(Buffer const & buf)
1532 cited_entries_.clear();
1533 // We are going to collect all the citation keys used in the document,
1534 // getting them from the TOC.
1535 // FIXME We may want to collect these differently, in the first case,
1536 // so that we might have them in order of appearance.
1537 set<docstring> citekeys;
1538 Toc const & toc = *buf.tocBackend().toc("citation");
1539 for (auto const & t : toc) {
1540 if (t.str().empty())
1542 vector<docstring> const keys = getVectorFromString(t.str());
1543 citekeys.insert(keys.begin(), keys.end());
1545 if (citekeys.empty())
1548 // We have a set of the keys used in this document.
1549 // We will now convert it to a list of the BibTeXInfo objects used in
1551 vector<BibTeXInfo const *> bi;
1552 for (auto const & ck : citekeys) {
1553 BiblioInfo::const_iterator const bt = find(ck);
1554 if (bt == end() || !bt->second.isBibTeX())
1556 bi.push_back(&(bt->second));
1559 sort(bi.begin(), bi.end(), lSorter);
1561 // Now we can write the sorted keys
1562 // b is a BibTeXInfo const *
1563 for (auto const & b : bi)
1564 cited_entries_.push_back(b->key());
1568 void BiblioInfo::makeCitationLabels(Buffer const & buf)
1570 collectCitedEntries(buf);
1571 CiteEngineType const engine_type = buf.params().citeEngineType();
1572 bool const numbers = (engine_type & ENGINE_TYPE_NUMERICAL);
1576 // used to remember the last one we saw
1577 // we'll be comparing entries to see if we need to add
1578 // modifiers, like "1984a"
1579 map<docstring, BibTeXInfo>::iterator last = bimap_.end();
1581 // add letters to years
1582 for (auto const & ce : cited_entries_) {
1583 map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(ce);
1584 // this shouldn't happen, but...
1585 if (biit == bimap_.end())
1586 // ...fail gracefully, anyway.
1588 BibTeXInfo & entry = biit->second;
1590 docstring const num = convert<docstring>(++keynumber);
1591 entry.setCiteNumber(num);
1593 // The first test here is checking whether this is the first
1594 // time through the loop. If so, then we do not have anything
1595 // with which to compare.
1596 if (last != bimap_.end()
1597 && entry.getAuthorOrEditorList() == last->second.getAuthorOrEditorList()
1598 // we access the year via getYear() so as to get it from the xref,
1599 // if we need to do so
1600 && getYear(entry.key()) == getYear(last->second.key())) {
1601 if (modifier == 0) {
1602 // so the last one should have been 'a'
1603 last->second.setModifier('a');
1605 } else if (modifier == 'z')
1612 entry.setModifier(modifier);
1613 // remember the last one
1618 for (auto const & ce : cited_entries_) {
1619 map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(ce);
1620 // this shouldn't happen, but...
1621 if (biit == bimap_.end())
1622 // ...fail gracefully, anyway.
1624 BibTeXInfo & entry = biit->second;
1626 entry.label(entry.citeNumber());
1628 docstring const auth = entry.getAuthorOrEditorList(&buf, false);
1629 // we do it this way so as to access the xref, if necessary
1630 // note that this also gives us the modifier
1631 docstring const year = getYear(ce, buf, true);
1632 if (!auth.empty() && !year.empty())
1633 entry.label(auth + ' ' + year);
1635 entry.label(entry.key());
1641 //////////////////////////////////////////////////////////////////////
1645 //////////////////////////////////////////////////////////////////////
1648 CitationStyle citationStyleFromString(string const & command,
1649 BufferParams const & params)
1652 if (command.empty())
1655 string const alias = params.getCiteAlias(command);
1656 string cmd = alias.empty() ? command : alias;
1657 if (isUpperCase(command[0])) {
1658 cs.forceUpperCase = true;
1659 cmd[0] = lowercase(cmd[0]);
1662 size_t const n = command.size() - 1;
1663 if (command[n] == '*') {
1664 cs.hasStarredVersion = true;
1665 if (suffixIs(cmd, '*'))
1666 cmd = cmd.substr(0, cmd.size() - 1);
1674 string citationStyleToString(const CitationStyle & cs, bool const latex)
1676 string cmd = latex ? cs.cmd : cs.name;
1677 if (cs.forceUpperCase)
1678 cmd[0] = uppercase(cmd[0]);
1679 if (cs.hasStarredVersion)
1685 docstring authorsToDocBookAuthorGroup(docstring const & authorsString, XMLStream & xs, Buffer const & buf)
1687 // This function closely mimics getAuthorList, but produces DocBook instead of text.
1688 // It has been greatly simplified, as the complete list of authors is always produced. No separators are required,
1689 // as the output has a database-like shape.
1690 // constructName has also been merged within, as it becomes really simple and leads to no copy-paste.
1692 if (authorsString.empty()) {
1696 // Split the input list of authors into individual authors.
1697 vector<docstring> const authors = getAuthors(authorsString);
1699 // Retrieve the "et al." variation.
1700 string const etal = buf.params().documentClass().getCiteMacro(buf.params().citeEngineType(), "_etal");
1702 // Output the list of authors.
1703 xs << xml::StartTag("authorgroup");
1706 auto it = authors.cbegin();
1707 auto en = authors.cend();
1708 for (size_t i = 0; it != en; ++it, ++i) {
1709 xs << xml::StartTag("author");
1711 xs << xml::StartTag("personname");
1713 docstring name = *it;
1715 // All authors go in a <personname>. If more structure is known, use it; otherwise (just "et al."), print it as such.
1716 if (name == "others") {
1719 name_parts parts = nameParts(name);
1720 if (! parts.prefix.empty()) {
1721 xs << xml::StartTag("honorific");
1723 xs << xml::EndTag("honorific");
1726 if (! parts.prename.empty()) {
1727 xs << xml::StartTag("firstname");
1728 xs << parts.prename;
1729 xs << xml::EndTag("firstname");
1732 if (! parts.surname.empty()) {
1733 xs << xml::StartTag("surname");
1734 xs << parts.surname;
1735 xs << xml::EndTag("surname");
1738 if (! parts.suffix.empty()) {
1739 xs << xml::StartTag("othername", "role=\"suffix\"");
1741 xs << xml::EndTag("othername");
1746 xs << xml::EndTag("personname");
1748 xs << xml::EndTag("author");
1751 // Could add an affiliation after <personname>, but not stored in BibTeX.
1753 xs << xml::EndTag("authorgroup");