3 * This file is part of LyX, the document processor.
4 * Licence details can be found in the file COPYING.
6 * \author Angus Leeming
10 * \author Jürgen Spitzmüller
12 * Full author contact details are available in file CREDITS.
17 #include "BiblioInfo.h"
19 #include "BufferParams.h"
24 #include "TextClass.h"
25 #include "TocBackend.h"
27 #include "support/convert.h"
28 #include "support/debug.h"
29 #include "support/docstream.h"
30 #include "support/FileName.h"
31 #include "support/gettext.h"
32 #include "support/lassert.h"
33 #include "support/lstrings.h"
34 #include "support/regex.h"
35 #include "support/textutils.h"
41 using namespace lyx::support;
48 // Remove placeholders from names
49 docstring renormalize(docstring const & input)
51 docstring res = subst(input, from_ascii("$$space!"), from_ascii(" "));
52 return subst(res, from_ascii("$$comma!"), from_ascii(","));
56 // Split the surname into prefix ("von-part") and family name
57 pair<docstring, docstring> parseSurname(docstring const & sname)
59 // Split the surname into its tokens
60 vector<docstring> pieces = getVectorFromString(sname, from_ascii(" "));
61 if (pieces.size() < 2)
62 return make_pair(docstring(), sname);
64 // Now we look for pieces that begin with a lower case letter.
65 // All except for the very last token constitute the "von-part".
67 vector<docstring>::const_iterator it = pieces.begin();
68 vector<docstring>::const_iterator const en = pieces.end();
70 for (; it != en; ++it) {
73 // If this is the last piece, then what we now have is
74 // the family name, notwithstanding the casing.
77 char_type const c = (*it)[0];
78 // If the piece starts with a upper case char, we assume
79 // this is part of the surname.
82 // Nothing of the former, so add this piece to the prename
90 // Reconstruct the family name.
91 // Note that if we left the loop with because it + 1 == en,
92 // then this will still do the right thing, i.e., make surname
93 // just be the last piece.
96 for (; it != en; ++it) {
103 return make_pair(prefix, surname);
115 // gets the name parts (prename, surname, prefix, suffix) from an author-type string
116 name_parts nameParts(docstring const & iname)
122 // First we check for goupings (via {...}) and replace blanks and
123 // commas inside groups with temporary placeholders
126 docstring::const_iterator p = iname.begin();
127 while (p != iname.end()) {
128 // count grouping level
133 // generate string with probable placeholders
134 if (*p == ' ' && gl > 0)
135 name += from_ascii("$$space!");
136 else if (*p == ',' && gl > 0)
137 name += from_ascii("$$comma!");
143 // Now we look for a comma, and take the last name to be everything
144 // preceding the right-most one, so that we also get the name suffix
146 vector<docstring> pieces = getVectorFromString(name);
147 if (pieces.size() > 1) {
148 // Whether we have a name suffix or not, the prename is
150 res.prename = renormalize(pieces.back());
151 // The family name, conversely, is always the first item.
152 // However, it might contain a prefix (aka "von" part)
153 docstring const sname = pieces.front();
154 res.prefix = renormalize(parseSurname(sname).first);
155 res.surname = renormalize(parseSurname(sname).second);
156 // If we have three pieces (the maximum allowed by BibTeX),
157 // the second one is the name suffix.
158 if (pieces.size() > 2)
159 res.suffix = renormalize(pieces.at(1));
163 // OK, so now we want to look for the last name.
164 // Split on spaces, to get various tokens.
165 pieces = getVectorFromString(name, from_ascii(" "));
166 // No space: Only a family name given
167 if (pieces.size() < 2) {
168 res.surname = renormalize(pieces.back());
171 // If we get two pieces, assume "prename surname"
172 if (pieces.size() == 2) {
173 res.prename = renormalize(pieces.front());
174 res.surname = renormalize(pieces.back());
178 // More than 3 pieces: A name prefix (aka "von" part) might be included.
179 // We look for the first piece that begins with a lower case letter
180 // (which is the name prefix, if it is not the last token) or the last token.
182 vector<docstring>::const_iterator it = pieces.begin();
183 vector<docstring>::const_iterator const en = pieces.end();
185 for (; it != en; ++it) {
188 char_type const c = (*it)[0];
189 // If the piece starts with a lower case char, we assume
190 // this is the name prefix and thus prename is complete.
193 // Same if this is the last piece, which is always the surname.
196 // Nothing of the former, so add this piece to the prename
204 // Now reconstruct the family name and strip the prefix.
205 // Note that if we left the loop because it + 1 == en,
206 // then this will still do the right thing, i.e., make surname
207 // just be the last piece.
210 for (; it != en; ++it) {
217 res.prename = renormalize(prename);
218 res.prefix = renormalize(parseSurname(surname).first);
219 res.surname = renormalize(parseSurname(surname).second);
224 docstring constructName(docstring const & name, string const & scheme)
226 // re-constructs a name from name parts according
228 docstring const prename = nameParts(name).prename;
229 docstring const surname = nameParts(name).surname;
230 docstring const prefix = nameParts(name).prefix;
231 docstring const suffix = nameParts(name).suffix;
233 static regex const reg1("(.*)(\\{%prename%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)");
234 static regex const reg2("(.*)(\\{%suffix%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)");
235 static regex const reg3("(.*)(\\{%prefix%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)");
237 // Changing the first parameter of regex_match() may corrupt the
238 // second one. In this case we use the temporary string tmp.
239 if (regex_match(scheme, sub, reg1)) {
241 if (!prename.empty())
245 if (regex_match(res, sub, reg2)) {
246 string tmp = sub.str(1);
249 res = tmp + sub.str(5);
251 if (regex_match(res, sub, reg3)) {
252 string tmp = sub.str(1);
255 res = tmp + sub.str(5);
257 docstring result = from_ascii(res);
258 result = subst(result, from_ascii("%prename%"), prename);
259 result = subst(result, from_ascii("%surname%"), surname);
260 result = subst(result, from_ascii("%prefix%"), prefix);
261 result = subst(result, from_ascii("%suffix%"), suffix);
266 vector<docstring> const getAuthors(docstring const & author)
268 // We check for goupings (via {...}) and only consider " and "
269 // outside groups as author separator. This is to account
270 // for cases such as {{Barnes and Noble, Inc.}}, which
271 // need to be treated as one single family name.
272 // We use temporary placeholders in order to differentiate the
273 // diverse " and " cases.
275 // First, we temporarily replace all ampersands. It is rather unusual
276 // in author names, but can happen (consider cases such as "C \& A Corp.").
277 docstring iname = subst(author, from_ascii("&"), from_ascii("$$amp!"));
278 // Then, we temporarily make all " and " strings to ampersands in order
279 // to handle them later on a per-char level.
280 iname = subst(iname, from_ascii(" and "), from_ascii(" & "));
281 // Now we traverse through the string and replace the "&" by the proper
282 // output in- and outside groups
285 docstring::const_iterator p = iname.begin();
286 while (p != iname.end()) {
287 // count grouping level
292 // generate string with probable placeholders
295 // Inside groups, we output "and"
296 name += from_ascii("and");
298 // Outside groups, we output a separator
299 name += from_ascii("$$namesep!");
306 // re-insert the literal ampersands
307 name = subst(name, from_ascii("$$amp!"), from_ascii("&"));
309 // Now construct the actual vector
310 return getVectorFromString(name, from_ascii(" $$namesep! "));
314 bool multipleAuthors(docstring const & author)
316 return getAuthors(author).size() > 1;
320 // converts a string containing LaTeX commands into unicode
322 docstring convertLaTeXCommands(docstring const & str)
327 bool scanning_cmd = false;
328 bool scanning_math = false;
329 bool escaped = false; // used to catch \$, etc.
330 while (!val.empty()) {
331 char_type const ch = val[0];
333 // if we're scanning math, we output everything until we
334 // find an unescaped $, at which point we break out.
341 scanning_math = false;
347 // if we're scanning a command name, then we just
348 // discard characters until we hit something that
351 if (isAlphaASCII(ch)) {
356 // so we're done with this command.
357 // now we fall through and check this character.
358 scanning_cmd = false;
361 // was the last character a \? If so, then this is something like:
362 // \\ or \$, so we'll just output it. That's probably not always right...
364 // exception: output \, as THIN SPACE
366 ret.push_back(0x2009);
377 scanning_math = true;
381 // Change text mode accents in the form
382 // {\v a} to \v{a} (see #9340).
383 // FIXME: This is a sort of mini-tex2lyx.
384 // Use the real tex2lyx instead!
385 static lyx::regex const tma_reg("^\\{\\\\[bcCdfGhHkrtuUv]\\s\\w\\}");
386 if (lyx::regex_search(to_utf8(val), tma_reg)) {
388 val.replace(2, 1, from_ascii("{"));
392 // Apart from the above, we just ignore braces
393 if (ch == '{' || ch == '}') {
398 // we're going to check things that look like commands, so if
399 // this doesn't, just output it.
406 // ok, could be a command of some sort
407 // let's see if it corresponds to some unicode
408 // unicodesymbols has things in the form: \"{u},
409 // whereas we may see things like: \"u. So we'll
410 // look for that and change it, if necessary.
411 // FIXME: This is a sort of mini-tex2lyx.
412 // Use the real tex2lyx instead!
413 static lyx::regex const reg("^\\\\\\W\\w");
414 if (lyx::regex_search(to_utf8(val), reg)) {
415 val.insert(3, from_ascii("}"));
416 val.insert(2, from_ascii("{"));
420 docstring const cnvtd = Encodings::fromLaTeXCommand(val,
421 Encodings::TEXT_CMD, termination, rem);
422 if (!cnvtd.empty()) {
423 // it did, so we'll take that bit and proceed with what's left
428 // it's a command of some sort
437 // Escape '<' and '>' and remove richtext markers (e.g. {!this is richtext!}) from a string.
438 docstring processRichtext(docstring const & str, bool richtext)
443 bool scanning_rich = false;
444 while (!val.empty()) {
445 char_type const ch = val[0];
446 if (ch == '{' && val.size() > 1 && val[1] == '!') {
447 // beginning of rich text
448 scanning_rich = true;
452 if (scanning_rich && ch == '!' && val.size() > 1 && val[1] == '}') {
454 scanning_rich = false;
462 // we need to escape '<' and '>'
470 } else if (!scanning_rich /* && !richtext */)
472 // else the character is discarded, which will happen only if
473 // richtext == false and we are scanning rich text
482 //////////////////////////////////////////////////////////////////////
486 //////////////////////////////////////////////////////////////////////
488 BibTeXInfo::BibTeXInfo(docstring const & key, docstring const & type)
489 : is_bibtex_(true), bib_key_(key), num_bib_key_(0), entry_type_(type),
490 info_(), format_(), modifier_(0)
495 docstring const BibTeXInfo::getAuthorOrEditorList(Buffer const * buf,
496 bool full, bool forceshort) const
498 docstring author = operator[]("author");
500 author = operator[]("editor");
502 return getAuthorList(buf, author, full, forceshort);
506 docstring const BibTeXInfo::getAuthorList(Buffer const * buf,
507 docstring const & author, bool const full, bool const forceshort,
508 bool const allnames, bool const beginning) const
510 // Maxnames treshold depend on engine
511 size_t maxnames = buf ?
512 buf->params().documentClass().max_citenames() : 2;
515 docstring const opt = label();
520 docstring const remainder = trim(split(opt, authors, '('));
521 if (remainder.empty())
522 // in this case, we didn't find a "(",
523 // so we don't have author (year)
526 // Natbib syntax is "Jones et al.(1990)Jones, Baker, and Williams"
527 docstring const fullauthors = trim(rsplit(remainder, ')'));
528 if (!fullauthors.empty())
537 // OK, we've got some names. Let's format them.
538 // Try to split the author list
539 vector<docstring> const authors = getAuthors(author);
543 CiteEngineType const engine_type = buf ? buf->params().citeEngineType()
544 : ENGINE_TYPE_DEFAULT;
546 // These are defined in the styles
548 buf ? buf->params().documentClass().getCiteMacro(engine_type, "B_etal")
550 string const namesep =
551 buf ? buf->params().documentClass().getCiteMacro(engine_type, "B_namesep")
553 string const lastnamesep =
554 buf ? buf->params().documentClass().getCiteMacro(engine_type, "B_lastnamesep")
556 string const pairnamesep =
557 buf ? buf->params().documentClass().getCiteMacro(engine_type, "B_pairnamesep")
559 string firstnameform =
560 buf ? buf->params().documentClass().getCiteMacro(engine_type, "!firstnameform")
561 : "{%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}{%prename%[[, %prename%]]}";
563 firstnameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!firstbynameform")
564 : "%prename% {%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}";
565 string othernameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!othernameform")
566 : "{%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}{%prename%[[, %prename%]]}";
568 othernameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!otherbynameform")
569 : "%prename% {%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}";
570 string citenameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!citenameform")
571 : "{%prefix%[[%prefix% ]]}%surname%";
573 // Shorten the list (with et al.) if forceshort is set
574 // and the list can actually be shortened, else if maxcitenames
575 // is passed and full is not set.
576 bool shorten = forceshort && authors.size() > 1;
577 vector<docstring>::const_iterator it = authors.begin();
578 vector<docstring>::const_iterator en = authors.end();
579 for (size_t i = 0; it != en; ++it, ++i) {
580 if (i >= maxnames && !full) {
584 if (*it == "others") {
585 retval += buf ? buf->B_(etal) : from_ascii(etal);
588 if (i > 0 && i == authors.size() - 1) {
589 if (authors.size() == 2)
590 retval += buf ? buf->B_(pairnamesep) : from_ascii(pairnamesep);
592 retval += buf ? buf->B_(lastnamesep) : from_ascii(lastnamesep);
594 retval += buf ? buf->B_(namesep) : from_ascii(namesep);
596 retval += (i == 0) ? constructName(*it, firstnameform)
597 : constructName(*it, othernameform);
599 retval += constructName(*it, citenameform);
603 retval = constructName(authors[0], firstnameform) + (buf ? buf->B_(etal) : from_ascii(etal));
605 retval = constructName(authors[0], citenameform) + (buf ? buf->B_(etal) : from_ascii(etal));
608 return convertLaTeXCommands(retval);
612 docstring const BibTeXInfo::getYear() const
615 // first try legacy year field
616 docstring year = operator[]("year");
619 // now try biblatex's date field
620 year = operator[]("date");
621 // Format is [-]YYYY-MM-DD*/[-]YYYY-MM-DD*
622 // We only want the years.
623 static regex const yreg("[-]?([\\d]{4}).*");
624 static regex const ereg(".*/[-]?([\\d]{4}).*");
626 string const date = to_utf8(year);
627 if (!regex_match(date, sm, yreg))
628 // cannot parse year.
630 year = from_ascii(sm[1]);
631 // check for an endyear
632 if (regex_match(date, sm, ereg))
633 year += char_type(0x2013) + from_ascii(sm[1]);
637 docstring const opt = label();
642 docstring tmp = split(opt, authors, '(');
644 // we don't have author (year)
647 tmp = split(tmp, year, ')');
652 void BibTeXInfo::getLocators(docstring & doi, docstring & url, docstring & file) const
655 // get "doi" entry from citation record
656 doi = operator[]("doi");
657 if (!doi.empty() && !prefixIs(doi,from_ascii("http")))
658 doi = "https://doi.org/" + doi;
659 // get "url" entry from citation record
660 url = operator[]("url");
661 // get "file" entry from citation record
662 file = operator[]("file");
664 // Jabref case, field has a format:
665 // Description:Location:Filetype;Description:Location:Filetype...
666 // We will grab only first pdf
668 docstring ret, filedest, tmp;
669 ret = split(file, tmp, ':');
670 tmp = split(ret, filedest, ':');
671 //TODO howto deal with relative directories?
672 FileName f(to_utf8(filedest));
674 file = "file:///" + filedest;
677 // kbibtex case, format:
678 // file1.pdf;file2.pdf
679 // We will grab only first pdf
682 kfile = operator[]("localfile");
683 if (!kfile.empty()) {
684 docstring filedest, tmp;
685 tmp = split(kfile, filedest, ';');
686 //TODO howto deal with relative directories?
687 FileName f(to_utf8(filedest));
689 file = "file:///" + filedest;
695 // try biblatex specific fields, see its manual
696 // 3.13.7 "Electronic Publishing Informationl"
697 docstring eprinttype = operator[]("eprinttype");
698 docstring eprint = operator[]("eprint");
702 if (eprinttype == "arxiv")
703 url = "https://arxiv.org/abs/" + eprint;
704 if (eprinttype == "jstor")
705 url = "https://www.jstor.org/stable/" + eprint;
706 if (eprinttype == "pubmed")
707 url = "http://www.ncbi.nlm.nih.gov/pubmed/" + eprint;
708 if (eprinttype == "hdl")
709 url = "https://hdl.handle.net/" + eprint;
710 if (eprinttype == "googlebooks")
711 url = "http://books.google.com/books?id=" + eprint;
716 // Here can be handled the bibliography environment. All one could do
717 // here is let LyX scan the entry for URL or HRef insets.
723 docstring parseOptions(docstring const & format, string & optkey,
724 docstring & ifpart, docstring & elsepart);
726 // Calls parseOptions to deal with an embedded option, such as:
727 // {%number%[[, no.~%number%]]}
728 // which must appear at the start of format. ifelsepart gets the
729 // whole of the option, and we return what's left after the option.
730 // we return format if there is an error.
731 docstring parseEmbeddedOption(docstring const & format, docstring & ifelsepart)
733 LASSERT(format[0] == '{' && format[1] == '%', return format);
737 docstring const rest = parseOptions(format, optkey, ifpart, elsepart);
738 if (format == rest) { // parse error
739 LYXERR0("ERROR! Couldn't parse `" << format <<"'.");
742 LASSERT(rest.size() <= format.size(),
743 { ifelsepart = docstring(); return format; });
744 ifelsepart = format.substr(0, format.size() - rest.size());
749 // Gets a "clause" from a format string, where the clause is
750 // delimited by '[[' and ']]'. Returns what is left after the
751 // clause is removed, and returns format if there is an error.
752 docstring getClause(docstring const & format, docstring & clause)
754 docstring fmt = format;
757 // we'll remove characters from the front of fmt as we
759 while (!fmt.empty()) {
760 if (fmt[0] == ']' && fmt.size() > 1 && fmt[1] == ']') {
765 // check for an embedded option
766 if (fmt[0] == '{' && fmt.size() > 1 && fmt[1] == '%') {
768 docstring const rest = parseEmbeddedOption(fmt, part);
770 LYXERR0("ERROR! Couldn't parse embedded option in `" << format <<"'.");
775 } else { // it's just a normal character
784 // parse an options string, which must appear at the start of the
785 // format parameter. puts the parsed bits in optkey, ifpart, and
786 // elsepart and returns what's left after the option is removed.
787 // if there's an error, it returns format itself.
788 docstring parseOptions(docstring const & format, string & optkey,
789 docstring & ifpart, docstring & elsepart)
791 LASSERT(format[0] == '{' && format[1] == '%', return format);
793 docstring fmt = format.substr(2);
794 size_t pos = fmt.find('%'); // end of key
795 if (pos == string::npos) {
796 LYXERR0("Error parsing `" << format <<"'. Can't find end of key.");
799 optkey = to_utf8(fmt.substr(0, pos));
800 fmt = fmt.substr(pos + 1);
801 // [[format]] should be next
802 if (fmt[0] != '[' || fmt[1] != '[') {
803 LYXERR0("Error parsing `" << format <<"'. Can't find '[[' after key.");
807 docstring curfmt = fmt;
808 fmt = getClause(curfmt, ifpart);
810 LYXERR0("Error parsing `" << format <<"'. Couldn't get if clause.");
814 if (fmt[0] == '}') // we're done, no else clause
815 return fmt.substr(1);
817 // else part should follow
818 if (fmt[0] != '[' || fmt[1] != '[') {
819 LYXERR0("Error parsing `" << format <<"'. Can't find else clause.");
824 fmt = getClause(curfmt, elsepart);
826 if (fmt == curfmt || fmt[0] != '}') {
827 LYXERR0("Error parsing `" << format <<"'. Can't find end of option.");
830 return fmt.substr(1);
837 Bug #9131 revealed an oddity in how we are generating citation information
838 when more than one key is given. We end up building a longer and longer format
839 string as we go, which we then have to re-parse, over and over and over again,
840 rather than generating the information for the individual keys and then putting
841 all of that together. We do that to deal with the way separators work, from what
842 I can tell, but it still feels like a hack. Fixing this would require quite a
843 bit of work, however.
845 docstring BibTeXInfo::expandFormat(docstring const & format,
846 BibTeXInfoList const & xrefs, int & counter, Buffer const & buf,
847 CiteItem const & ci, bool next, bool second) const
849 // incorrect use of macros could put us in an infinite loop
850 static int const max_passes = 5000;
851 // the use of overly large keys can lead to performance problems, due
852 // to eventual attempts to convert LaTeX macros to unicode. See bug
853 // #8944. By default, the size is limited to 128 (in CiteItem), but
854 // for specific purposes (such as XHTML export), it needs to be enlarged
855 // This is perhaps not the best solution, but it will have to do for now.
856 size_t const max_keysize = ci.max_key_size;
857 odocstringstream ret; // return value
859 bool scanning_key = false;
860 bool scanning_rich = false;
862 CiteEngineType const engine_type = buf.params().citeEngineType();
863 docstring fmt = format;
864 // we'll remove characters from the front of fmt as we
866 while (!fmt.empty()) {
867 if (counter > max_passes) {
868 LYXERR0("Recursion limit reached while parsing `"
873 char_type thischar = fmt[0];
874 if (thischar == '%') {
875 // beginning or end of key
878 scanning_key = false;
879 // so we replace the key with its value, which may be empty
883 buf.params().documentClass().getCiteMacro(engine_type, key);
884 fmt = from_utf8(val) + fmt.substr(1);
887 } else if (prefixIs(key, "B_")) {
888 // a translatable bit (to the Buffer language)
890 buf.params().documentClass().getCiteMacro(engine_type, key);
891 docstring const trans =
892 translateIfPossible(from_utf8(val), buf.params().language->code());
894 } else if (key[0] == '_') {
895 // a translatable bit (to the GUI language)
897 buf.params().documentClass().getCiteMacro(engine_type, key);
898 docstring const trans =
899 translateIfPossible(from_utf8(val));
902 docstring const val =
903 getValueForKey(key, buf, ci, xrefs, max_keysize);
905 ret << from_ascii("{!<span class=\"bib-" + key + "\">!}");
908 ret << from_ascii("{!</span>!}");
916 else if (thischar == '{') {
917 // beginning of option?
919 LYXERR0("ERROR: Found `{' when scanning key in `" << format << "'.");
922 if (fmt.size() > 1) {
924 // it is the beginning of an optional format
928 docstring const newfmt =
929 parseOptions(fmt, optkey, ifpart, elsepart);
930 if (newfmt == fmt) // parse error
933 docstring const val =
934 getValueForKey(optkey, buf, ci, xrefs);
935 if (optkey == "next" && next)
936 ret << ifpart; // without expansion
937 else if (optkey == "second" && second) {
939 ret << expandFormat(ifpart, xrefs, newcounter, buf,
941 } else if (!val.empty()) {
943 ret << expandFormat(ifpart, xrefs, newcounter, buf,
945 } else if (!elsepart.empty()) {
947 ret << expandFormat(elsepart, xrefs, newcounter, buf,
950 // fmt will have been shortened for us already
954 // beginning of rich text
955 scanning_rich = true;
957 ret << from_ascii("{!");
961 // we are here if '{' was not followed by % or !.
962 // So it's just a character.
965 else if (scanning_rich && thischar == '!'
966 && fmt.size() > 1 && fmt[1] == '}') {
968 scanning_rich = false;
970 ret << from_ascii("!}");
973 else if (scanning_key)
974 key += char(thischar);
978 } catch (EncodingException & /* e */) {
979 LYXERR0("Uncodable character '" << docstring(1, thischar) << " in citation label!");
985 LYXERR0("Never found end of key in `" << format << "'!");
989 LYXERR0("Never found end of rich text in `" << format << "'!");
996 docstring const & BibTeXInfo::getInfo(BibTeXInfoList const & xrefs,
997 Buffer const & buf, CiteItem const & ci, docstring const & format_in) const
999 bool const richtext = ci.richtext;
1001 CiteEngineType const engine_type = buf.params().citeEngineType();
1002 DocumentClass const & dc = buf.params().documentClass();
1003 docstring const & format = format_in.empty()?
1004 from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_)))
1007 if (format != format_) {
1008 // clear caches since format changed
1010 info_richtext_.clear();
1014 if (!richtext && !info_.empty()) {
1015 info_ = convertLaTeXCommands(processRichtext(info_, false));
1018 if (richtext && !info_richtext_.empty())
1019 return info_richtext_;
1022 BibTeXInfo::const_iterator it = find(from_ascii("ref"));
1028 info_ = expandFormat(format, xrefs, counter, buf,
1031 if (info_.empty()) {
1032 // this probably shouldn't happen
1037 info_richtext_ = convertLaTeXCommands(processRichtext(info_, true));
1038 return info_richtext_;
1041 info_ = convertLaTeXCommands(processRichtext(info_, false));
1046 docstring const BibTeXInfo::getLabel(BibTeXInfoList const & xrefs,
1047 Buffer const & buf, docstring const & format,
1048 CiteItem const & ci, bool next, bool second) const
1053 loclabel = expandFormat(format, xrefs, counter, buf, ci, next, second);
1055 if (!loclabel.empty() && !next) {
1056 loclabel = processRichtext(loclabel, ci.richtext);
1057 loclabel = convertLaTeXCommands(loclabel);
1064 docstring const & BibTeXInfo::operator[](docstring const & field) const
1066 BibTeXInfo::const_iterator it = find(field);
1069 static docstring const empty_value = docstring();
1074 docstring const & BibTeXInfo::operator[](string const & field) const
1076 return operator[](from_ascii(field));
1080 docstring BibTeXInfo::getValueForKey(string const & oldkey, Buffer const & buf,
1081 CiteItem const & ci, BibTeXInfoList const & xrefs, size_t maxsize) const
1083 // anything less is pointless
1084 LASSERT(maxsize >= 16, maxsize = 16);
1085 string key = oldkey;
1086 bool cleanit = false;
1087 if (prefixIs(oldkey, "clean:")) {
1088 key = oldkey.substr(6);
1092 docstring ret = operator[](key);
1093 if (ret.empty() && !xrefs.empty()) {
1094 // xr is a (reference to a) BibTeXInfo const *
1095 for (auto const & xr : xrefs) {
1096 if (xr && !(*xr)[key].empty()) {
1103 // some special keys
1104 // FIXME: dialog, textbefore and textafter have nothing to do with this
1105 if (key == "dialog" && ci.context == CiteItem::Dialog)
1106 ret = from_ascii("x"); // any non-empty string will do
1107 else if (key == "export" && ci.context == CiteItem::Export)
1108 ret = from_ascii("x"); // any non-empty string will do
1109 else if (key == "ifstar" && ci.Starred)
1110 ret = from_ascii("x"); // any non-empty string will do
1111 else if (key == "ifqualified" && ci.isQualified)
1112 ret = from_ascii("x"); // any non-empty string will do
1113 else if (key == "entrytype")
1115 else if (prefixIs(key, "ifentrytype:")
1116 && from_ascii(key.substr(12)) == entry_type_)
1117 ret = from_ascii("x"); // any non-empty string will do
1118 else if (key == "key")
1120 else if (key == "label")
1122 else if (key == "modifier" && modifier_ != 0)
1124 else if (key == "numericallabel")
1126 else if (prefixIs(key, "ifmultiple:")) {
1127 // Return whether we have multiple authors
1128 docstring const kind = operator[](from_ascii(key.substr(11)));
1129 if (multipleAuthors(kind))
1130 ret = from_ascii("x"); // any non-empty string will do
1132 else if (prefixIs(key, "abbrvnames:")) {
1133 // Special key to provide abbreviated name list,
1134 // with respect to maxcitenames. Suitable for Bibliography
1136 docstring const kind = operator[](from_ascii(key.substr(11)));
1137 ret = getAuthorList(&buf, kind, false, false, true);
1138 if (ci.forceUpperCase && isLowerCase(ret[0]))
1139 ret[0] = uppercase(ret[0]);
1140 } else if (prefixIs(key, "fullnames:")) {
1141 // Return a full name list. Suitable for Bibliography
1143 docstring const kind = operator[](from_ascii(key.substr(10)));
1144 ret = getAuthorList(&buf, kind, true, false, true);
1145 if (ci.forceUpperCase && isLowerCase(ret[0]))
1146 ret[0] = uppercase(ret[0]);
1147 } else if (prefixIs(key, "forceabbrvnames:")) {
1148 // Special key to provide abbreviated name lists,
1149 // irrespective of maxcitenames. Suitable for Bibliography
1151 docstring const kind = operator[](from_ascii(key.substr(15)));
1152 ret = getAuthorList(&buf, kind, false, true, true);
1153 if (ci.forceUpperCase && isLowerCase(ret[0]))
1154 ret[0] = uppercase(ret[0]);
1155 } else if (prefixIs(key, "abbrvbynames:")) {
1156 // Special key to provide abbreviated name list,
1157 // with respect to maxcitenames. Suitable for further names inside a
1158 // bibliography item // (such as "ed. by ...")
1159 docstring const kind = operator[](from_ascii(key.substr(11)));
1160 ret = getAuthorList(&buf, kind, false, false, true, false);
1161 if (ci.forceUpperCase && isLowerCase(ret[0]))
1162 ret[0] = uppercase(ret[0]);
1163 } else if (prefixIs(key, "fullbynames:")) {
1164 // Return a full name list. Suitable for further names inside a
1165 // bibliography item // (such as "ed. by ...")
1166 docstring const kind = operator[](from_ascii(key.substr(10)));
1167 ret = getAuthorList(&buf, kind, true, false, true, false);
1168 if (ci.forceUpperCase && isLowerCase(ret[0]))
1169 ret[0] = uppercase(ret[0]);
1170 } else if (prefixIs(key, "forceabbrvbynames:")) {
1171 // Special key to provide abbreviated name lists,
1172 // irrespective of maxcitenames. Suitable for further names inside a
1173 // bibliography item // (such as "ed. by ...")
1174 docstring const kind = operator[](from_ascii(key.substr(15)));
1175 ret = getAuthorList(&buf, kind, false, true, true, false);
1176 if (ci.forceUpperCase && isLowerCase(ret[0]))
1177 ret[0] = uppercase(ret[0]);
1178 } else if (key == "abbrvciteauthor") {
1179 // Special key to provide abbreviated author or
1180 // editor names (suitable for citation labels),
1181 // with respect to maxcitenames.
1182 ret = getAuthorOrEditorList(&buf, false, false);
1183 if (ci.forceUpperCase && isLowerCase(ret[0]))
1184 ret[0] = uppercase(ret[0]);
1185 } else if (key == "fullciteauthor") {
1186 // Return a full author or editor list (for citation labels)
1187 ret = getAuthorOrEditorList(&buf, true, false);
1188 if (ci.forceUpperCase && isLowerCase(ret[0]))
1189 ret[0] = uppercase(ret[0]);
1190 } else if (key == "forceabbrvciteauthor") {
1191 // Special key to provide abbreviated author or
1192 // editor names (suitable for citation labels),
1193 // irrespective of maxcitenames.
1194 ret = getAuthorOrEditorList(&buf, false, true);
1195 if (ci.forceUpperCase && isLowerCase(ret[0]))
1196 ret[0] = uppercase(ret[0]);
1197 } else if (key == "bibentry") {
1198 // Special key to provide the full bibliography entry: see getInfo()
1199 CiteEngineType const engine_type = buf.params().citeEngineType();
1200 DocumentClass const & dc = buf.params().documentClass();
1201 docstring const & format =
1202 from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_), false));
1204 ret = expandFormat(format, xrefs, counter, buf, ci, false, false);
1205 } else if (key == "textbefore")
1206 ret = ci.textBefore;
1207 else if (key == "textafter")
1209 else if (key == "curpretext") {
1210 vector<pair<docstring, docstring>> pres = ci.getPretexts();
1211 vector<pair<docstring, docstring>>::iterator it = pres.begin();
1213 for (; it != pres.end() ; ++it) {
1214 if ((*it).first == bib_key_ && numkey == num_bib_key_) {
1219 if ((*it).first == bib_key_)
1222 } else if (key == "curposttext") {
1223 vector<pair<docstring, docstring>> posts = ci.getPosttexts();
1224 vector<pair<docstring, docstring>>::iterator it = posts.begin();
1226 for (; it != posts.end() ; ++it) {
1227 if ((*it).first == bib_key_ && numkey == num_bib_key_) {
1232 if ((*it).first == bib_key_)
1235 } else if (key == "year")
1240 ret = xml::cleanAttr(ret);
1242 // make sure it is not too big
1243 support::truncateWithEllipsis(ret, maxsize);
1248 //////////////////////////////////////////////////////////////////////
1252 //////////////////////////////////////////////////////////////////////
1256 // A functor for use with sort, leading to case insensitive sorting
1257 bool compareNoCase(const docstring & a, const docstring & b) {
1258 return compare_no_case(a, b) < 0;
1264 vector<docstring> const BiblioInfo::getXRefs(BibTeXInfo const & data, bool const nested) const
1266 vector<docstring> result;
1267 if (!data.isBibTeX())
1269 // Legacy crossref field. This is not nestable.
1270 if (!nested && !data["crossref"].empty()) {
1271 docstring const xrefkey = data["crossref"];
1272 result.push_back(xrefkey);
1273 // However, check for nested xdatas
1274 BiblioInfo::const_iterator it = find(xrefkey);
1276 BibTeXInfo const & xref = it->second;
1277 vector<docstring> const nxdata = getXRefs(xref, true);
1278 if (!nxdata.empty())
1279 result.insert(result.end(), nxdata.begin(), nxdata.end());
1282 // Biblatex's xdata field. Infinitely nestable.
1283 // XData field can consist of a comma-separated list of keys
1284 vector<docstring> const xdatakeys = getVectorFromString(data["xdata"]);
1285 if (!xdatakeys.empty()) {
1286 for (auto const & xdatakey : xdatakeys) {
1287 result.push_back(xdatakey);
1288 BiblioInfo::const_iterator it = find(xdatakey);
1290 BibTeXInfo const & xdata = it->second;
1291 vector<docstring> const nxdata = getXRefs(xdata, true);
1292 if (!nxdata.empty())
1293 result.insert(result.end(), nxdata.begin(), nxdata.end());
1301 vector<docstring> const BiblioInfo::getKeys() const
1303 vector<docstring> bibkeys;
1304 for (auto const & bi : *this)
1305 bibkeys.push_back(bi.first);
1306 sort(bibkeys.begin(), bibkeys.end(), &compareNoCase);
1311 vector<docstring> const BiblioInfo::getFields() const
1313 vector<docstring> bibfields;
1314 for (auto const & fn : field_names_)
1315 bibfields.push_back(fn);
1316 sort(bibfields.begin(), bibfields.end());
1321 vector<docstring> const BiblioInfo::getEntries() const
1323 vector<docstring> bibentries;
1324 for (auto const & et : entry_types_)
1325 bibentries.push_back(et);
1326 sort(bibentries.begin(), bibentries.end());
1331 docstring const BiblioInfo::getAuthorOrEditorList(docstring const & key, Buffer const & buf) const
1333 BiblioInfo::const_iterator it = find(key);
1336 BibTeXInfo const & data = it->second;
1337 return data.getAuthorOrEditorList(&buf, false);
1341 docstring const BiblioInfo::getCiteNumber(docstring const & key) const
1343 BiblioInfo::const_iterator it = find(key);
1346 BibTeXInfo const & data = it->second;
1347 return data.citeNumber();
1350 void BiblioInfo::getLocators(docstring const & key, docstring & doi, docstring & url, docstring & file) const
1352 BiblioInfo::const_iterator it = find(key);
1355 BibTeXInfo const & data = it->second;
1356 data.getLocators(doi,url,file);
1360 docstring const BiblioInfo::getYear(docstring const & key, bool use_modifier) const
1362 BiblioInfo::const_iterator it = find(key);
1365 BibTeXInfo const & data = it->second;
1366 docstring year = data.getYear();
1368 // let's try the crossrefs
1369 vector<docstring> const xrefs = getXRefs(data);
1373 for (docstring const & xref : xrefs) {
1374 BiblioInfo::const_iterator const xrefit = find(xref);
1375 if (xrefit == end())
1377 BibTeXInfo const & xref_data = xrefit->second;
1378 year = xref_data.getYear();
1384 if (use_modifier && data.modifier() != 0)
1385 year += data.modifier();
1390 docstring const BiblioInfo::getYear(docstring const & key, Buffer const & buf, bool use_modifier) const
1392 docstring const year = getYear(key, use_modifier);
1394 return buf.B_("No year");
1399 docstring const BiblioInfo::getInfo(docstring const & key,
1400 Buffer const & buf, CiteItem const & ci, docstring const & format) const
1402 BiblioInfo::const_iterator it = find(key);
1404 return docstring(_("Bibliography entry not found!"));
1405 BibTeXInfo const & data = it->second;
1406 BibTeXInfoList xrefptrs;
1407 for (docstring const & xref : getXRefs(data)) {
1408 BiblioInfo::const_iterator const xrefit = find(xref);
1409 if (xrefit != end())
1410 xrefptrs.push_back(&(xrefit->second));
1412 return data.getInfo(xrefptrs, buf, ci, format);
1416 docstring const BiblioInfo::getLabel(vector<docstring> keys,
1417 Buffer const & buf, string const & style, CiteItem const & ci) const
1419 size_t max_size = ci.max_size;
1420 // shorter makes no sense
1421 LASSERT(max_size >= 16, max_size = 16);
1423 // we can't display more than 10 of these, anyway
1424 // but since we truncate in the middle,
1425 // we need to split into two halfs.
1426 bool const too_many_keys = keys.size() > 10;
1427 vector<docstring> lkeys;
1428 if (too_many_keys) {
1429 lkeys.insert(lkeys.end(), keys.end() - 5, keys.end());
1431 keys.insert(keys.end(), lkeys.begin(), lkeys.end());
1434 CiteEngineType const engine_type = buf.params().citeEngineType();
1435 DocumentClass const & dc = buf.params().documentClass();
1436 docstring const & format = from_utf8(dc.getCiteFormat(engine_type, style, false, "cite"));
1437 docstring ret = format;
1438 vector<docstring>::const_iterator key = keys.begin();
1439 vector<docstring>::const_iterator ken = keys.end();
1440 vector<docstring> handled_keys;
1441 for (int i = 0; key != ken; ++key, ++i) {
1442 handled_keys.push_back(*key);
1444 for (auto const & k : handled_keys) {
1448 BiblioInfo::const_iterator it = find(*key);
1449 BibTeXInfo empty_data;
1450 empty_data.key(*key);
1451 BibTeXInfo & data = empty_data;
1452 vector<BibTeXInfo const *> xrefptrs;
1455 for (docstring const & xref : getXRefs(data)) {
1456 BiblioInfo::const_iterator const xrefit = find(xref);
1457 if (xrefit != end())
1458 xrefptrs.push_back(&(xrefit->second));
1462 ret = data.getLabel(xrefptrs, buf, ret, ci, key + 1 != ken, i == 1);
1465 support::truncateWithEllipsis(ret, max_size, true);
1471 bool BiblioInfo::isBibtex(docstring const & key) const
1474 split(key, key1, ',');
1475 BiblioInfo::const_iterator it = find(key1);
1478 return it->second.isBibTeX();
1482 BiblioInfo::CiteStringMap const BiblioInfo::getCiteStrings(
1483 vector<docstring> const & keys, vector<CitationStyle> const & styles,
1484 Buffer const & buf, CiteItem const & ci) const
1487 return vector<pair<docstring,docstring>>();
1490 CiteStringMap csm(styles.size());
1491 for (size_t i = 0; i != csm.size(); ++i) {
1492 style = styles[i].name;
1493 csm[i] = make_pair(from_ascii(style), getLabel(keys, buf, style, ci));
1500 void BiblioInfo::mergeBiblioInfo(BiblioInfo const & info)
1502 bimap_.insert(info.begin(), info.end());
1503 field_names_.insert(info.field_names_.begin(), info.field_names_.end());
1504 entry_types_.insert(info.entry_types_.begin(), info.entry_types_.end());
1510 // used in xhtml to sort a list of BibTeXInfo objects
1511 bool lSorter(BibTeXInfo const * lhs, BibTeXInfo const * rhs)
1513 docstring const lauth = lhs->getAuthorOrEditorList();
1514 docstring const rauth = rhs->getAuthorOrEditorList();
1515 docstring const lyear = lhs->getYear();
1516 docstring const ryear = rhs->getYear();
1517 docstring const ltitl = lhs->operator[]("title");
1518 docstring const rtitl = rhs->operator[]("title");
1519 return (lauth < rauth)
1520 || (lauth == rauth && lyear < ryear)
1521 || (lauth == rauth && lyear == ryear && ltitl < rtitl);
1527 void BiblioInfo::collectCitedEntries(Buffer const & buf)
1529 cited_entries_.clear();
1530 // We are going to collect all the citation keys used in the document,
1531 // getting them from the TOC.
1532 // FIXME We may want to collect these differently, in the first case,
1533 // so that we might have them in order of appearance.
1534 set<docstring> citekeys;
1535 Toc const & toc = *buf.tocBackend().toc("citation");
1536 for (auto const & t : toc) {
1537 if (t.str().empty())
1539 vector<docstring> const keys = getVectorFromString(t.str());
1540 citekeys.insert(keys.begin(), keys.end());
1542 if (citekeys.empty())
1545 // We have a set of the keys used in this document.
1546 // We will now convert it to a list of the BibTeXInfo objects used in
1548 vector<BibTeXInfo const *> bi;
1549 for (auto const & ck : citekeys) {
1550 BiblioInfo::const_iterator const bt = find(ck);
1551 if (bt == end() || !bt->second.isBibTeX())
1553 bi.push_back(&(bt->second));
1556 sort(bi.begin(), bi.end(), lSorter);
1558 // Now we can write the sorted keys
1559 // b is a BibTeXInfo const *
1560 for (auto const & b : bi)
1561 cited_entries_.push_back(b->key());
1565 void BiblioInfo::makeCitationLabels(Buffer const & buf)
1567 collectCitedEntries(buf);
1568 CiteEngineType const engine_type = buf.params().citeEngineType();
1569 bool const numbers = (engine_type & ENGINE_TYPE_NUMERICAL);
1573 // used to remember the last one we saw
1574 // we'll be comparing entries to see if we need to add
1575 // modifiers, like "1984a"
1576 map<docstring, BibTeXInfo>::iterator last = bimap_.end();
1578 // add letters to years
1579 for (auto const & ce : cited_entries_) {
1580 map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(ce);
1581 // this shouldn't happen, but...
1582 if (biit == bimap_.end())
1583 // ...fail gracefully, anyway.
1585 BibTeXInfo & entry = biit->second;
1587 docstring const num = convert<docstring>(++keynumber);
1588 entry.setCiteNumber(num);
1590 // The first test here is checking whether this is the first
1591 // time through the loop. If so, then we do not have anything
1592 // with which to compare.
1593 if (last != bimap_.end()
1594 && entry.getAuthorOrEditorList() == last->second.getAuthorOrEditorList()
1595 // we access the year via getYear() so as to get it from the xref,
1596 // if we need to do so
1597 && getYear(entry.key()) == getYear(last->second.key())) {
1598 if (modifier == 0) {
1599 // so the last one should have been 'a'
1600 last->second.setModifier('a');
1602 } else if (modifier == 'z')
1609 entry.setModifier(modifier);
1610 // remember the last one
1615 for (auto const & ce : cited_entries_) {
1616 map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(ce);
1617 // this shouldn't happen, but...
1618 if (biit == bimap_.end())
1619 // ...fail gracefully, anyway.
1621 BibTeXInfo & entry = biit->second;
1623 entry.label(entry.citeNumber());
1625 docstring const auth = entry.getAuthorOrEditorList(&buf, false);
1626 // we do it this way so as to access the xref, if necessary
1627 // note that this also gives us the modifier
1628 docstring const year = getYear(ce, buf, true);
1629 if (!auth.empty() && !year.empty())
1630 entry.label(auth + ' ' + year);
1632 entry.label(entry.key());
1638 //////////////////////////////////////////////////////////////////////
1642 //////////////////////////////////////////////////////////////////////
1645 CitationStyle citationStyleFromString(string const & command,
1646 BufferParams const & params)
1649 if (command.empty())
1652 string const alias = params.getCiteAlias(command);
1653 string cmd = alias.empty() ? command : alias;
1654 if (isUpperCase(command[0])) {
1655 cs.forceUpperCase = true;
1656 cmd[0] = lowercase(cmd[0]);
1659 size_t const n = command.size() - 1;
1660 if (command[n] == '*') {
1661 cs.hasStarredVersion = true;
1662 if (suffixIs(cmd, '*'))
1663 cmd = cmd.substr(0, cmd.size() - 1);
1671 string citationStyleToString(const CitationStyle & cs, bool const latex)
1673 string cmd = latex ? cs.cmd : cs.name;
1674 if (cs.forceUpperCase)
1675 cmd[0] = uppercase(cmd[0]);
1676 if (cs.hasStarredVersion)
1682 docstring authorsToDocBookAuthorGroup(docstring const & authorsString, XMLStream & xs, Buffer const & buf)
1684 // This function closely mimics getAuthorList, but produces DocBook instead of text.
1685 // It has been greatly simplified, as the complete list of authors is always produced. No separators are required,
1686 // as the output has a database-like shape.
1687 // constructName has also been merged within, as it becomes really simple and leads to no copy-paste.
1689 if (authorsString.empty()) {
1693 // Split the input list of authors into individual authors.
1694 vector<docstring> const authors = getAuthors(authorsString);
1696 // Retrieve the "et al." variation.
1697 string const etal = buf.params().documentClass().getCiteMacro(buf.params().citeEngineType(), "_etal");
1699 // Output the list of authors.
1700 xs << xml::StartTag("authorgroup");
1703 auto it = authors.cbegin();
1704 auto en = authors.cend();
1705 for (size_t i = 0; it != en; ++it, ++i) {
1706 xs << xml::StartTag("author");
1708 xs << xml::StartTag("personname");
1710 docstring name = *it;
1712 // All authors go in a <personname>. If more structure is known, use it; otherwise (just "et al."), print it as such.
1713 if (name == "others") {
1716 name_parts parts = nameParts(name);
1717 if (! parts.prefix.empty()) {
1718 xs << xml::StartTag("honorific");
1720 xs << xml::EndTag("honorific");
1723 if (! parts.prename.empty()) {
1724 xs << xml::StartTag("firstname");
1725 xs << parts.prename;
1726 xs << xml::EndTag("firstname");
1729 if (! parts.surname.empty()) {
1730 xs << xml::StartTag("surname");
1731 xs << parts.surname;
1732 xs << xml::EndTag("surname");
1735 if (! parts.suffix.empty()) {
1736 xs << xml::StartTag("othername", "role=\"suffix\"");
1738 xs << xml::EndTag("othername");
1743 xs << xml::EndTag("personname");
1745 xs << xml::EndTag("author");
1748 // Could add an affiliation after <personname>, but not stored in BibTeX.
1750 xs << xml::EndTag("authorgroup");