3 * This file is part of LyX, the document processor.
4 * Licence details can be found in the file COPYING.
6 * \author Angus Leeming
10 * \author Jürgen Spitzmüller
12 * Full author contact details are available in file CREDITS.
17 #include "BiblioInfo.h"
19 #include "BufferParams.h"
20 #include "buffer_funcs.h"
23 #include "InsetIterator.h"
26 #include "Paragraph.h"
27 #include "TextClass.h"
28 #include "TocBackend.h"
30 #include "support/convert.h"
31 #include "support/debug.h"
32 #include "support/docstream.h"
33 #include "support/gettext.h"
34 #include "support/lassert.h"
35 #include "support/lstrings.h"
36 #include "support/regex.h"
37 #include "support/textutils.h"
43 using namespace lyx::support;
50 // Remove placeholders from names
51 docstring renormalize(docstring const & input)
53 docstring res = subst(input, from_ascii("$$space!"), from_ascii(" "));
54 return subst(res, from_ascii("$$comma!"), from_ascii(","));
58 // Split the surname into prefix ("von-part") and family name
59 pair<docstring, docstring> parseSurname(docstring const & sname)
61 // Split the surname into its tokens
62 vector<docstring> pieces = getVectorFromString(sname, from_ascii(" "));
63 if (pieces.size() < 2)
64 return make_pair(docstring(), sname);
66 // Now we look for pieces that begin with a lower case letter.
67 // All except for the very last token constitute the "von-part".
69 vector<docstring>::const_iterator it = pieces.begin();
70 vector<docstring>::const_iterator const en = pieces.end();
72 for (; it != en; ++it) {
75 // If this is the last piece, then what we now have is
76 // the family name, notwithstanding the casing.
79 char_type const c = (*it)[0];
80 // If the piece starts with a upper case char, we assume
81 // this is part of the surname.
84 // Nothing of the former, so add this piece to the prename
92 // Reconstruct the family name.
93 // Note that if we left the loop with because it + 1 == en,
94 // then this will still do the right thing, i.e., make surname
95 // just be the last piece.
98 for (; it != en; ++it) {
105 return make_pair(prefix, surname);
117 // gets the name parts (prename, surname, prefix, suffix) from an author-type string
118 name_parts nameParts(docstring const & iname)
124 // First we check for goupings (via {...}) and replace blanks and
125 // commas inside groups with temporary placeholders
128 docstring::const_iterator p = iname.begin();
129 while (p != iname.end()) {
130 // count grouping level
135 // generate string with probable placeholders
136 if (*p == ' ' && gl > 0)
137 name += from_ascii("$$space!");
138 else if (*p == ',' && gl > 0)
139 name += from_ascii("$$comma!");
145 // Now we look for a comma, and take the last name to be everything
146 // preceding the right-most one, so that we also get the name suffix
148 vector<docstring> pieces = getVectorFromString(name);
149 if (pieces.size() > 1) {
150 // Whether we have a name suffix or not, the prename is
152 res.prename = renormalize(pieces.back());
153 // The family name, conversely, is always the first item.
154 // However, it might contain a prefix (aka "von" part)
155 docstring const sname = pieces.front();
156 res.prefix = renormalize(parseSurname(sname).first);
157 res.surname = renormalize(parseSurname(sname).second);
158 // If we have three pieces (the maximum allowed by BibTeX),
159 // the second one is the name suffix.
160 if (pieces.size() > 2)
161 res.suffix = renormalize(pieces.at(1));
165 // OK, so now we want to look for the last name.
166 // Split on spaces, to get various tokens.
167 pieces = getVectorFromString(name, from_ascii(" "));
168 // No space: Only a family name given
169 if (pieces.size() < 2) {
170 res.surname = renormalize(pieces.back());
173 // If we get two pieces, assume "prename surname"
174 if (pieces.size() == 2) {
175 res.prename = renormalize(pieces.front());
176 res.surname = renormalize(pieces.back());
180 // More than 3 pieces: A name prefix (aka "von" part) might be included.
181 // We look for the first piece that begins with a lower case letter
182 // (which is the name prefix, if it is not the last token) or the last token.
184 vector<docstring>::const_iterator it = pieces.begin();
185 vector<docstring>::const_iterator const en = pieces.end();
187 for (; it != en; ++it) {
190 char_type const c = (*it)[0];
191 // If the piece starts with a lower case char, we assume
192 // this is the name prefix and thus prename is complete.
195 // Same if this is the last piece, which is always the surname.
198 // Nothing of the former, so add this piece to the prename
206 // Now reconstruct the family name and strip the prefix.
207 // Note that if we left the loop because it + 1 == en,
208 // then this will still do the right thing, i.e., make surname
209 // just be the last piece.
212 for (; it != en; ++it) {
219 res.prename = renormalize(prename);
220 res.prefix = renormalize(parseSurname(surname).first);
221 res.surname = renormalize(parseSurname(surname).second);
226 docstring constructName(docstring const & name, string const & scheme)
228 // re-constructs a name from name parts according
230 docstring const prename = nameParts(name).prename;
231 docstring const surname = nameParts(name).surname;
232 docstring const prefix = nameParts(name).prefix;
233 docstring const suffix = nameParts(name).suffix;
235 static regex const reg1("(.*)(\\{%prename%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)");
236 static regex const reg2("(.*)(\\{%suffix%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)");
237 static regex const reg3("(.*)(\\{%prefix%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)");
239 // Changing the first parameter of regex_match() may corrupt the
240 // second one. In this case we use the temporary string tmp.
241 if (regex_match(scheme, sub, reg1)) {
243 if (!prename.empty())
247 if (regex_match(res, sub, reg2)) {
248 string tmp = sub.str(1);
251 res = tmp + sub.str(5);
253 if (regex_match(res, sub, reg3)) {
254 string tmp = sub.str(1);
257 res = tmp + sub.str(5);
259 docstring result = from_ascii(res);
260 result = subst(result, from_ascii("%prename%"), prename);
261 result = subst(result, from_ascii("%surname%"), surname);
262 result = subst(result, from_ascii("%prefix%"), prefix);
263 result = subst(result, from_ascii("%suffix%"), suffix);
268 vector<docstring> const getAuthors(docstring const & author)
270 // We check for goupings (via {...}) and only consider " and "
271 // outside groups as author separator. This is to account
272 // for cases such as {{Barnes and Noble, Inc.}}, which
273 // need to be treated as one single family name.
274 // We use temporary placeholders in order to differentiate the
275 // diverse " and " cases.
277 // First, we temporarily replace all ampersands. It is rather unusual
278 // in author names, but can happen (consider cases such as "C \& A Corp.").
279 docstring iname = subst(author, from_ascii("&"), from_ascii("$$amp!"));
280 // Then, we temporarily make all " and " strings to ampersands in order
281 // to handle them later on a per-char level.
282 iname = subst(iname, from_ascii(" and "), from_ascii(" & "));
283 // Now we traverse through the string and replace the "&" by the proper
284 // output in- and outside groups
287 docstring::const_iterator p = iname.begin();
288 while (p != iname.end()) {
289 // count grouping level
294 // generate string with probable placeholders
297 // Inside groups, we output "and"
298 name += from_ascii("and");
300 // Outside groups, we output a separator
301 name += from_ascii("$$namesep!");
308 // re-insert the literal ampersands
309 name = subst(name, from_ascii("$$amp!"), from_ascii("&"));
311 // Now construct the actual vector
312 return getVectorFromString(name, from_ascii(" $$namesep! "));
316 bool multipleAuthors(docstring const & author)
318 return getAuthors(author).size() > 1;
322 // converts a string containing LaTeX commands into unicode
324 docstring convertLaTeXCommands(docstring const & str)
329 bool scanning_cmd = false;
330 bool scanning_math = false;
331 bool escaped = false; // used to catch \$, etc.
332 while (!val.empty()) {
333 char_type const ch = val[0];
335 // if we're scanning math, we output everything until we
336 // find an unescaped $, at which point we break out.
343 scanning_math = false;
349 // if we're scanning a command name, then we just
350 // discard characters until we hit something that
353 if (isAlphaASCII(ch)) {
358 // so we're done with this command.
359 // now we fall through and check this character.
360 scanning_cmd = false;
363 // was the last character a \? If so, then this is something like:
364 // \\ or \$, so we'll just output it. That's probably not always right...
366 // exception: output \, as THIN SPACE
368 ret.push_back(0x2009);
379 scanning_math = true;
383 // Change text mode accents in the form
384 // {\v a} to \v{a} (see #9340).
385 // FIXME: This is a sort of mini-tex2lyx.
386 // Use the real tex2lyx instead!
387 static lyx::regex const tma_reg("^\\{\\\\[bcCdfGhHkrtuUv]\\s\\w\\}");
388 if (lyx::regex_search(to_utf8(val), tma_reg)) {
390 val.replace(2, 1, from_ascii("{"));
394 // Apart from the above, we just ignore braces
395 if (ch == '{' || ch == '}') {
400 // we're going to check things that look like commands, so if
401 // this doesn't, just output it.
408 // ok, could be a command of some sort
409 // let's see if it corresponds to some unicode
410 // unicodesymbols has things in the form: \"{u},
411 // whereas we may see things like: \"u. So we'll
412 // look for that and change it, if necessary.
413 // FIXME: This is a sort of mini-tex2lyx.
414 // Use the real tex2lyx instead!
415 static lyx::regex const reg("^\\\\\\W\\w");
416 if (lyx::regex_search(to_utf8(val), reg)) {
417 val.insert(3, from_ascii("}"));
418 val.insert(2, from_ascii("{"));
422 docstring const cnvtd = Encodings::fromLaTeXCommand(val,
423 Encodings::TEXT_CMD, termination, rem);
424 if (!cnvtd.empty()) {
425 // it did, so we'll take that bit and proceed with what's left
430 // it's a command of some sort
439 // Escape '<' and '>' and remove richtext markers (e.g. {!this is richtext!}) from a string.
440 docstring processRichtext(docstring const & str, bool richtext)
445 bool scanning_rich = false;
446 while (!val.empty()) {
447 char_type const ch = val[0];
448 if (ch == '{' && val.size() > 1 && val[1] == '!') {
449 // beginning of rich text
450 scanning_rich = true;
454 if (scanning_rich && ch == '!' && val.size() > 1 && val[1] == '}') {
456 scanning_rich = false;
464 // we need to escape '<' and '>'
472 } else if (!scanning_rich /* && !richtext */)
474 // else the character is discarded, which will happen only if
475 // richtext == false and we are scanning rich text
484 //////////////////////////////////////////////////////////////////////
488 //////////////////////////////////////////////////////////////////////
490 BibTeXInfo::BibTeXInfo(docstring const & key, docstring const & type)
491 : is_bibtex_(true), bib_key_(key), num_bib_key_(0), entry_type_(type),
492 info_(), format_(), modifier_(0)
497 docstring const BibTeXInfo::getAuthorOrEditorList(Buffer const * buf,
498 bool full, bool forceshort) const
500 docstring author = operator[]("author");
502 author = operator[]("editor");
504 return getAuthorList(buf, author, full, forceshort);
508 docstring const BibTeXInfo::getAuthorList(Buffer const * buf,
509 docstring const & author, bool const full, bool const forceshort,
510 bool const allnames, bool const beginning) const
512 // Maxnames treshold depend on engine
513 size_t maxnames = buf ?
514 buf->params().documentClass().max_citenames() : 2;
517 docstring const opt = label();
522 docstring const remainder = trim(split(opt, authors, '('));
523 if (remainder.empty())
524 // in this case, we didn't find a "(",
525 // so we don't have author (year)
528 // Natbib syntax is "Jones et al.(1990)Jones, Baker, and Williams"
529 docstring const fullauthors = trim(rsplit(remainder, ')'));
530 if (!fullauthors.empty())
539 // OK, we've got some names. Let's format them.
540 // Try to split the author list
541 vector<docstring> const authors = getAuthors(author);
545 CiteEngineType const engine_type = buf ? buf->params().citeEngineType()
546 : ENGINE_TYPE_DEFAULT;
548 // These are defined in the styles
550 buf ? buf->params().documentClass().getCiteMacro(engine_type, "B_etal")
552 string const namesep =
553 buf ? buf->params().documentClass().getCiteMacro(engine_type, "B_namesep")
555 string const lastnamesep =
556 buf ? buf->params().documentClass().getCiteMacro(engine_type, "B_lastnamesep")
558 string const pairnamesep =
559 buf ? buf->params().documentClass().getCiteMacro(engine_type, "B_pairnamesep")
561 string firstnameform =
562 buf ? buf->params().documentClass().getCiteMacro(engine_type, "!firstnameform")
563 : "{%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}{%prename%[[, %prename%]]}";
565 firstnameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!firstbynameform")
566 : "%prename% {%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}";
567 string othernameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!othernameform")
568 : "{%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}{%prename%[[, %prename%]]}";
570 othernameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!otherbynameform")
571 : "%prename% {%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}";
572 string citenameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!citenameform")
573 : "{%prefix%[[%prefix% ]]}%surname%";
575 // Shorten the list (with et al.) if forceshort is set
576 // and the list can actually be shortened, else if maxcitenames
577 // is passed and full is not set.
578 bool shorten = forceshort && authors.size() > 1;
579 vector<docstring>::const_iterator it = authors.begin();
580 vector<docstring>::const_iterator en = authors.end();
581 for (size_t i = 0; it != en; ++it, ++i) {
582 if (i >= maxnames && !full) {
586 if (*it == "others") {
587 retval += buf ? buf->B_(etal) : from_ascii(etal);
590 if (i > 0 && i == authors.size() - 1) {
591 if (authors.size() == 2)
592 retval += buf ? buf->B_(pairnamesep) : from_ascii(pairnamesep);
594 retval += buf ? buf->B_(lastnamesep) : from_ascii(lastnamesep);
596 retval += buf ? buf->B_(namesep) : from_ascii(namesep);
598 retval += (i == 0) ? constructName(*it, firstnameform)
599 : constructName(*it, othernameform);
601 retval += constructName(*it, citenameform);
605 retval = constructName(authors[0], firstnameform) + (buf ? buf->B_(etal) : from_ascii(etal));
607 retval = constructName(authors[0], citenameform) + (buf ? buf->B_(etal) : from_ascii(etal));
610 return convertLaTeXCommands(retval);
614 docstring const BibTeXInfo::getYear() const
617 // first try legacy year field
618 docstring year = operator[]("year");
621 // now try biblatex's date field
622 year = operator[]("date");
623 // Format is [-]YYYY-MM-DD*/[-]YYYY-MM-DD*
624 // We only want the years.
625 static regex const yreg("[-]?([\\d]{4}).*");
626 static regex const ereg(".*/[-]?([\\d]{4}).*");
628 string const date = to_utf8(year);
629 if (!regex_match(date, sm, yreg))
630 // cannot parse year.
632 year = from_ascii(sm[1]);
633 // check for an endyear
634 if (regex_match(date, sm, ereg))
635 year += char_type(0x2013) + from_ascii(sm[1]);
639 docstring const opt = label();
644 docstring tmp = split(opt, authors, '(');
646 // we don't have author (year)
649 tmp = split(tmp, year, ')');
654 void BibTeXInfo::getLocators(docstring & doi, docstring & url, docstring & file) const
657 // get "doi" entry from citation record
658 doi = operator[]("doi");
659 if (!doi.empty() && !prefixIs(doi,from_ascii("http")))
660 doi = "https://doi.org/" + doi;
661 // get "url" entry from citation record
662 url = operator[]("url");
663 // get "file" entry from citation record
664 file = operator[]("file");
666 // Jabref case, field has a format:
667 // Description:Location:Filetype;Description:Location:Filetype...
668 // We will grab only first pdf
670 docstring ret, filedest, tmp;
671 ret = split(file, tmp, ':');
672 tmp = split(ret, filedest, ':');
673 //TODO howto deal with relative directories?
674 FileName f(to_utf8(filedest));
676 file = "file:///" + filedest;
679 // kbibtex case, format:
680 // file1.pdf;file2.pdf
681 // We will grab only first pdf
684 kfile = operator[]("localfile");
685 if (!kfile.empty()) {
686 docstring filedest, tmp;
687 tmp = split(kfile, filedest, ';');
688 //TODO howto deal with relative directories?
689 FileName f(to_utf8(filedest));
691 file = "file:///" + filedest;
697 // try biblatex specific fields, see its manual
698 // 3.13.7 "Electronic Publishing Informationl"
699 docstring eprinttype = operator[]("eprinttype");
700 docstring eprint = operator[]("eprint");
704 if (eprinttype == "arxiv")
705 url = "https://arxiv.org/abs/" + eprint;
706 if (eprinttype == "jstor")
707 url = "https://www.jstor.org/stable/" + eprint;
708 if (eprinttype == "pubmed")
709 url = "http://www.ncbi.nlm.nih.gov/pubmed/" + eprint;
710 if (eprinttype == "hdl")
711 url = "https://hdl.handle.net/" + eprint;
712 if (eprinttype == "googlebooks")
713 url = "http://books.google.com/books?id=" + eprint;
718 // Here can be handled the bibliography environment. All one could do
719 // here is let LyX scan the entry for URL or HRef insets.
725 docstring parseOptions(docstring const & format, string & optkey,
726 docstring & ifpart, docstring & elsepart);
728 // Calls parseOptions to deal with an embedded option, such as:
729 // {%number%[[, no.~%number%]]}
730 // which must appear at the start of format. ifelsepart gets the
731 // whole of the option, and we return what's left after the option.
732 // we return format if there is an error.
733 docstring parseEmbeddedOption(docstring const & format, docstring & ifelsepart)
735 LASSERT(format[0] == '{' && format[1] == '%', return format);
739 docstring const rest = parseOptions(format, optkey, ifpart, elsepart);
740 if (format == rest) { // parse error
741 LYXERR0("ERROR! Couldn't parse `" << format <<"'.");
744 LASSERT(rest.size() <= format.size(),
745 { ifelsepart = docstring(); return format; });
746 ifelsepart = format.substr(0, format.size() - rest.size());
751 // Gets a "clause" from a format string, where the clause is
752 // delimited by '[[' and ']]'. Returns what is left after the
753 // clause is removed, and returns format if there is an error.
754 docstring getClause(docstring const & format, docstring & clause)
756 docstring fmt = format;
759 // we'll remove characters from the front of fmt as we
761 while (!fmt.empty()) {
762 if (fmt[0] == ']' && fmt.size() > 1 && fmt[1] == ']') {
767 // check for an embedded option
768 if (fmt[0] == '{' && fmt.size() > 1 && fmt[1] == '%') {
770 docstring const rest = parseEmbeddedOption(fmt, part);
772 LYXERR0("ERROR! Couldn't parse embedded option in `" << format <<"'.");
777 } else { // it's just a normal character
786 // parse an options string, which must appear at the start of the
787 // format parameter. puts the parsed bits in optkey, ifpart, and
788 // elsepart and returns what's left after the option is removed.
789 // if there's an error, it returns format itself.
790 docstring parseOptions(docstring const & format, string & optkey,
791 docstring & ifpart, docstring & elsepart)
793 LASSERT(format[0] == '{' && format[1] == '%', return format);
795 docstring fmt = format.substr(2);
796 size_t pos = fmt.find('%'); // end of key
797 if (pos == string::npos) {
798 LYXERR0("Error parsing `" << format <<"'. Can't find end of key.");
801 optkey = to_utf8(fmt.substr(0, pos));
802 fmt = fmt.substr(pos + 1);
803 // [[format]] should be next
804 if (fmt[0] != '[' || fmt[1] != '[') {
805 LYXERR0("Error parsing `" << format <<"'. Can't find '[[' after key.");
809 docstring curfmt = fmt;
810 fmt = getClause(curfmt, ifpart);
812 LYXERR0("Error parsing `" << format <<"'. Couldn't get if clause.");
816 if (fmt[0] == '}') // we're done, no else clause
817 return fmt.substr(1);
819 // else part should follow
820 if (fmt[0] != '[' || fmt[1] != '[') {
821 LYXERR0("Error parsing `" << format <<"'. Can't find else clause.");
826 fmt = getClause(curfmt, elsepart);
828 if (fmt == curfmt || fmt[0] != '}') {
829 LYXERR0("Error parsing `" << format <<"'. Can't find end of option.");
832 return fmt.substr(1);
839 Bug #9131 revealed an oddity in how we are generating citation information
840 when more than one key is given. We end up building a longer and longer format
841 string as we go, which we then have to re-parse, over and over and over again,
842 rather than generating the information for the individual keys and then putting
843 all of that together. We do that to deal with the way separators work, from what
844 I can tell, but it still feels like a hack. Fixing this would require quite a
845 bit of work, however.
847 docstring BibTeXInfo::expandFormat(docstring const & format,
848 BibTeXInfoList const & xrefs, int & counter, Buffer const & buf,
849 CiteItem const & ci, bool next, bool second) const
851 // incorrect use of macros could put us in an infinite loop
852 static int const max_passes = 5000;
853 // the use of overly large keys can lead to performance problems, due
854 // to eventual attempts to convert LaTeX macros to unicode. See bug
855 // #8944. By default, the size is limited to 128 (in CiteItem), but
856 // for specific purposes (such as XHTML export), it needs to be enlarged
857 // This is perhaps not the best solution, but it will have to do for now.
858 size_t const max_keysize = ci.max_key_size;
859 odocstringstream ret; // return value
861 bool scanning_key = false;
862 bool scanning_rich = false;
864 CiteEngineType const engine_type = buf.params().citeEngineType();
865 docstring fmt = format;
866 // we'll remove characters from the front of fmt as we
868 while (!fmt.empty()) {
869 if (counter > max_passes) {
870 LYXERR0("Recursion limit reached while parsing `"
875 char_type thischar = fmt[0];
876 if (thischar == '%') {
877 // beginning or end of key
880 scanning_key = false;
881 // so we replace the key with its value, which may be empty
885 buf.params().documentClass().getCiteMacro(engine_type, key);
886 fmt = from_utf8(val) + fmt.substr(1);
889 } else if (prefixIs(key, "B_")) {
890 // a translatable bit (to the Buffer language)
892 buf.params().documentClass().getCiteMacro(engine_type, key);
893 docstring const trans =
894 translateIfPossible(from_utf8(val), buf.params().language->code());
896 } else if (key[0] == '_') {
897 // a translatable bit (to the GUI language)
899 buf.params().documentClass().getCiteMacro(engine_type, key);
900 docstring const trans =
901 translateIfPossible(from_utf8(val));
904 docstring const val =
905 getValueForKey(key, buf, ci, xrefs, max_keysize);
907 ret << from_ascii("{!<span class=\"bib-" + key + "\">!}");
910 ret << from_ascii("{!</span>!}");
918 else if (thischar == '{') {
919 // beginning of option?
921 LYXERR0("ERROR: Found `{' when scanning key in `" << format << "'.");
924 if (fmt.size() > 1) {
926 // it is the beginning of an optional format
930 docstring const newfmt =
931 parseOptions(fmt, optkey, ifpart, elsepart);
932 if (newfmt == fmt) // parse error
935 docstring const val =
936 getValueForKey(optkey, buf, ci, xrefs);
937 if (optkey == "next" && next)
938 ret << ifpart; // without expansion
939 else if (optkey == "second" && second) {
941 ret << expandFormat(ifpart, xrefs, newcounter, buf,
943 } else if (!val.empty()) {
945 ret << expandFormat(ifpart, xrefs, newcounter, buf,
947 } else if (!elsepart.empty()) {
949 ret << expandFormat(elsepart, xrefs, newcounter, buf,
952 // fmt will have been shortened for us already
956 // beginning of rich text
957 scanning_rich = true;
959 ret << from_ascii("{!");
963 // we are here if '{' was not followed by % or !.
964 // So it's just a character.
967 else if (scanning_rich && thischar == '!'
968 && fmt.size() > 1 && fmt[1] == '}') {
970 scanning_rich = false;
972 ret << from_ascii("!}");
975 else if (scanning_key)
976 key += char(thischar);
980 } catch (EncodingException & /* e */) {
981 LYXERR0("Uncodable character '" << docstring(1, thischar) << " in citation label!");
987 LYXERR0("Never found end of key in `" << format << "'!");
991 LYXERR0("Never found end of rich text in `" << format << "'!");
998 docstring const & BibTeXInfo::getInfo(BibTeXInfoList const & xrefs,
999 Buffer const & buf, CiteItem const & ci, docstring const & format_in) const
1001 bool const richtext = ci.richtext;
1003 CiteEngineType const engine_type = buf.params().citeEngineType();
1004 DocumentClass const & dc = buf.params().documentClass();
1005 docstring const & format = format_in.empty()?
1006 from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_)))
1009 if (format != format_) {
1010 // clear caches since format changed
1012 info_richtext_.clear();
1016 if (!richtext && !info_.empty()) {
1017 info_ = convertLaTeXCommands(processRichtext(info_, false));
1020 if (richtext && !info_richtext_.empty())
1021 return info_richtext_;
1024 BibTeXInfo::const_iterator it = find(from_ascii("ref"));
1030 info_ = expandFormat(format, xrefs, counter, buf,
1033 if (info_.empty()) {
1034 // this probably shouldn't happen
1039 info_richtext_ = convertLaTeXCommands(processRichtext(info_, true));
1040 return info_richtext_;
1043 info_ = convertLaTeXCommands(processRichtext(info_, false));
1048 docstring const BibTeXInfo::getLabel(BibTeXInfoList const xrefs,
1049 Buffer const & buf, docstring const & format,
1050 CiteItem const & ci, bool next, bool second) const
1055 loclabel = expandFormat(format, xrefs, counter, buf, ci, next, second);
1057 if (!loclabel.empty() && !next) {
1058 loclabel = processRichtext(loclabel, ci.richtext);
1059 loclabel = convertLaTeXCommands(loclabel);
1066 docstring const & BibTeXInfo::operator[](docstring const & field) const
1068 BibTeXInfo::const_iterator it = find(field);
1071 static docstring const empty_value = docstring();
1076 docstring const & BibTeXInfo::operator[](string const & field) const
1078 return operator[](from_ascii(field));
1082 docstring BibTeXInfo::getValueForKey(string const & oldkey, Buffer const & buf,
1083 CiteItem const & ci, BibTeXInfoList const & xrefs, size_t maxsize) const
1085 // anything less is pointless
1086 LASSERT(maxsize >= 16, maxsize = 16);
1087 string key = oldkey;
1088 bool cleanit = false;
1089 if (prefixIs(oldkey, "clean:")) {
1090 key = oldkey.substr(6);
1094 docstring ret = operator[](key);
1095 if (ret.empty() && !xrefs.empty()) {
1096 // xr is a (reference to a) BibTeXInfo const *
1097 for (auto const & xr : xrefs) {
1098 if (xr && !(*xr)[key].empty()) {
1105 // some special keys
1106 // FIXME: dialog, textbefore and textafter have nothing to do with this
1107 if (key == "dialog" && ci.context == CiteItem::Dialog)
1108 ret = from_ascii("x"); // any non-empty string will do
1109 else if (key == "export" && ci.context == CiteItem::Export)
1110 ret = from_ascii("x"); // any non-empty string will do
1111 else if (key == "ifstar" && ci.Starred)
1112 ret = from_ascii("x"); // any non-empty string will do
1113 else if (key == "ifqualified" && ci.isQualified)
1114 ret = from_ascii("x"); // any non-empty string will do
1115 else if (key == "entrytype")
1117 else if (prefixIs(key, "ifentrytype:")
1118 && from_ascii(key.substr(12)) == entry_type_)
1119 ret = from_ascii("x"); // any non-empty string will do
1120 else if (key == "key")
1122 else if (key == "label")
1124 else if (key == "modifier" && modifier_ != 0)
1126 else if (key == "numericallabel")
1128 else if (prefixIs(key, "ifmultiple:")) {
1129 // Return whether we have multiple authors
1130 docstring const kind = operator[](from_ascii(key.substr(11)));
1131 if (multipleAuthors(kind))
1132 ret = from_ascii("x"); // any non-empty string will do
1134 else if (prefixIs(key, "abbrvnames:")) {
1135 // Special key to provide abbreviated name list,
1136 // with respect to maxcitenames. Suitable for Bibliography
1138 docstring const kind = operator[](from_ascii(key.substr(11)));
1139 ret = getAuthorList(&buf, kind, false, false, true);
1140 if (ci.forceUpperCase && isLowerCase(ret[0]))
1141 ret[0] = uppercase(ret[0]);
1142 } else if (prefixIs(key, "fullnames:")) {
1143 // Return a full name list. Suitable for Bibliography
1145 docstring const kind = operator[](from_ascii(key.substr(10)));
1146 ret = getAuthorList(&buf, kind, true, false, true);
1147 if (ci.forceUpperCase && isLowerCase(ret[0]))
1148 ret[0] = uppercase(ret[0]);
1149 } else if (prefixIs(key, "forceabbrvnames:")) {
1150 // Special key to provide abbreviated name lists,
1151 // irrespective of maxcitenames. Suitable for Bibliography
1153 docstring const kind = operator[](from_ascii(key.substr(15)));
1154 ret = getAuthorList(&buf, kind, false, true, true);
1155 if (ci.forceUpperCase && isLowerCase(ret[0]))
1156 ret[0] = uppercase(ret[0]);
1157 } else if (prefixIs(key, "abbrvbynames:")) {
1158 // Special key to provide abbreviated name list,
1159 // with respect to maxcitenames. Suitable for further names inside a
1160 // bibliography item // (such as "ed. by ...")
1161 docstring const kind = operator[](from_ascii(key.substr(11)));
1162 ret = getAuthorList(&buf, kind, false, false, true, false);
1163 if (ci.forceUpperCase && isLowerCase(ret[0]))
1164 ret[0] = uppercase(ret[0]);
1165 } else if (prefixIs(key, "fullbynames:")) {
1166 // Return a full name list. Suitable for further names inside a
1167 // bibliography item // (such as "ed. by ...")
1168 docstring const kind = operator[](from_ascii(key.substr(10)));
1169 ret = getAuthorList(&buf, kind, true, false, true, false);
1170 if (ci.forceUpperCase && isLowerCase(ret[0]))
1171 ret[0] = uppercase(ret[0]);
1172 } else if (prefixIs(key, "forceabbrvbynames:")) {
1173 // Special key to provide abbreviated name lists,
1174 // irrespective of maxcitenames. Suitable for further names inside a
1175 // bibliography item // (such as "ed. by ...")
1176 docstring const kind = operator[](from_ascii(key.substr(15)));
1177 ret = getAuthorList(&buf, kind, false, true, true, false);
1178 if (ci.forceUpperCase && isLowerCase(ret[0]))
1179 ret[0] = uppercase(ret[0]);
1180 } else if (key == "abbrvciteauthor") {
1181 // Special key to provide abbreviated author or
1182 // editor names (suitable for citation labels),
1183 // with respect to maxcitenames.
1184 ret = getAuthorOrEditorList(&buf, false, false);
1185 if (ci.forceUpperCase && isLowerCase(ret[0]))
1186 ret[0] = uppercase(ret[0]);
1187 } else if (key == "fullciteauthor") {
1188 // Return a full author or editor list (for citation labels)
1189 ret = getAuthorOrEditorList(&buf, true, false);
1190 if (ci.forceUpperCase && isLowerCase(ret[0]))
1191 ret[0] = uppercase(ret[0]);
1192 } else if (key == "forceabbrvciteauthor") {
1193 // Special key to provide abbreviated author or
1194 // editor names (suitable for citation labels),
1195 // irrespective of maxcitenames.
1196 ret = getAuthorOrEditorList(&buf, false, true);
1197 if (ci.forceUpperCase && isLowerCase(ret[0]))
1198 ret[0] = uppercase(ret[0]);
1199 } else if (key == "bibentry") {
1200 // Special key to provide the full bibliography entry: see getInfo()
1201 CiteEngineType const engine_type = buf.params().citeEngineType();
1202 DocumentClass const & dc = buf.params().documentClass();
1203 docstring const & format =
1204 from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_), false));
1206 ret = expandFormat(format, xrefs, counter, buf, ci, false, false);
1207 } else if (key == "textbefore")
1208 ret = ci.textBefore;
1209 else if (key == "textafter")
1211 else if (key == "curpretext") {
1212 vector<pair<docstring, docstring>> pres = ci.getPretexts();
1213 vector<pair<docstring, docstring>>::iterator it = pres.begin();
1215 for (; it != pres.end() ; ++it) {
1216 if ((*it).first == bib_key_ && numkey == num_bib_key_) {
1221 if ((*it).first == bib_key_)
1224 } else if (key == "curposttext") {
1225 vector<pair<docstring, docstring>> posts = ci.getPosttexts();
1226 vector<pair<docstring, docstring>>::iterator it = posts.begin();
1228 for (; it != posts.end() ; ++it) {
1229 if ((*it).first == bib_key_ && numkey == num_bib_key_) {
1234 if ((*it).first == bib_key_)
1237 } else if (key == "year")
1242 ret = xml::cleanAttr(ret);
1244 // make sure it is not too big
1245 support::truncateWithEllipsis(ret, maxsize);
1250 //////////////////////////////////////////////////////////////////////
1254 //////////////////////////////////////////////////////////////////////
1258 // A functor for use with sort, leading to case insensitive sorting
1259 bool compareNoCase(const docstring & a, const docstring & b) {
1260 return compare_no_case(a, b) < 0;
1266 vector<docstring> const BiblioInfo::getXRefs(BibTeXInfo const & data, bool const nested) const
1268 vector<docstring> result;
1269 if (!data.isBibTeX())
1271 // Legacy crossref field. This is not nestable.
1272 if (!nested && !data["crossref"].empty()) {
1273 docstring const xrefkey = data["crossref"];
1274 result.push_back(xrefkey);
1275 // However, check for nested xdatas
1276 BiblioInfo::const_iterator it = find(xrefkey);
1278 BibTeXInfo const & xref = it->second;
1279 vector<docstring> const nxdata = getXRefs(xref, true);
1280 if (!nxdata.empty())
1281 result.insert(result.end(), nxdata.begin(), nxdata.end());
1284 // Biblatex's xdata field. Infinitely nestable.
1285 // XData field can consist of a comma-separated list of keys
1286 vector<docstring> const xdatakeys = getVectorFromString(data["xdata"]);
1287 if (!xdatakeys.empty()) {
1288 for (auto const & xdatakey : xdatakeys) {
1289 result.push_back(xdatakey);
1290 BiblioInfo::const_iterator it = find(xdatakey);
1292 BibTeXInfo const & xdata = it->second;
1293 vector<docstring> const nxdata = getXRefs(xdata, true);
1294 if (!nxdata.empty())
1295 result.insert(result.end(), nxdata.begin(), nxdata.end());
1303 vector<docstring> const BiblioInfo::getKeys() const
1305 vector<docstring> bibkeys;
1306 for (auto const & bi : *this)
1307 bibkeys.push_back(bi.first);
1308 sort(bibkeys.begin(), bibkeys.end(), &compareNoCase);
1313 vector<docstring> const BiblioInfo::getFields() const
1315 vector<docstring> bibfields;
1316 for (auto const & fn : field_names_)
1317 bibfields.push_back(fn);
1318 sort(bibfields.begin(), bibfields.end());
1323 vector<docstring> const BiblioInfo::getEntries() const
1325 vector<docstring> bibentries;
1326 for (auto const & et : entry_types_)
1327 bibentries.push_back(et);
1328 sort(bibentries.begin(), bibentries.end());
1333 docstring const BiblioInfo::getAuthorOrEditorList(docstring const & key, Buffer const & buf) const
1335 BiblioInfo::const_iterator it = find(key);
1338 BibTeXInfo const & data = it->second;
1339 return data.getAuthorOrEditorList(&buf, false);
1343 docstring const BiblioInfo::getCiteNumber(docstring const & key) const
1345 BiblioInfo::const_iterator it = find(key);
1348 BibTeXInfo const & data = it->second;
1349 return data.citeNumber();
1352 void BiblioInfo::getLocators(docstring const & key, docstring & doi, docstring & url, docstring & file) const
1354 BiblioInfo::const_iterator it = find(key);
1357 BibTeXInfo const & data = it->second;
1358 data.getLocators(doi,url,file);
1362 docstring const BiblioInfo::getYear(docstring const & key, bool use_modifier) const
1364 BiblioInfo::const_iterator it = find(key);
1367 BibTeXInfo const & data = it->second;
1368 docstring year = data.getYear();
1370 // let's try the crossrefs
1371 vector<docstring> const xrefs = getXRefs(data);
1375 for (docstring const & xref : xrefs) {
1376 BiblioInfo::const_iterator const xrefit = find(xref);
1377 if (xrefit == end())
1379 BibTeXInfo const & xref_data = xrefit->second;
1380 year = xref_data.getYear();
1386 if (use_modifier && data.modifier() != 0)
1387 year += data.modifier();
1392 docstring const BiblioInfo::getYear(docstring const & key, Buffer const & buf, bool use_modifier) const
1394 docstring const year = getYear(key, use_modifier);
1396 return buf.B_("No year");
1401 docstring const BiblioInfo::getInfo(docstring const & key,
1402 Buffer const & buf, CiteItem const & ci, docstring const & format) const
1404 BiblioInfo::const_iterator it = find(key);
1406 return docstring(_("Bibliography entry not found!"));
1407 BibTeXInfo const & data = it->second;
1408 BibTeXInfoList xrefptrs;
1409 for (docstring const & xref : getXRefs(data)) {
1410 BiblioInfo::const_iterator const xrefit = find(xref);
1411 if (xrefit != end())
1412 xrefptrs.push_back(&(xrefit->second));
1414 return data.getInfo(xrefptrs, buf, ci, format);
1418 docstring const BiblioInfo::getLabel(vector<docstring> keys,
1419 Buffer const & buf, string const & style, CiteItem const & ci) const
1421 size_t max_size = ci.max_size;
1422 // shorter makes no sense
1423 LASSERT(max_size >= 16, max_size = 16);
1425 // we can't display more than 10 of these, anyway
1426 // but since we truncate in the middle,
1427 // we need to split into two halfs.
1428 bool const too_many_keys = keys.size() > 10;
1429 vector<docstring> lkeys;
1430 if (too_many_keys) {
1431 lkeys.insert(lkeys.end(), keys.end() - 5, keys.end());
1433 keys.insert(keys.end(), lkeys.begin(), lkeys.end());
1436 CiteEngineType const engine_type = buf.params().citeEngineType();
1437 DocumentClass const & dc = buf.params().documentClass();
1438 docstring const & format = from_utf8(dc.getCiteFormat(engine_type, style, false, "cite"));
1439 docstring ret = format;
1440 vector<docstring>::const_iterator key = keys.begin();
1441 vector<docstring>::const_iterator ken = keys.end();
1442 vector<docstring> handled_keys;
1443 for (int i = 0; key != ken; ++key, ++i) {
1444 handled_keys.push_back(*key);
1446 for (auto const & k : handled_keys) {
1450 BiblioInfo::const_iterator it = find(*key);
1451 BibTeXInfo empty_data;
1452 empty_data.key(*key);
1453 BibTeXInfo & data = empty_data;
1454 vector<BibTeXInfo const *> xrefptrs;
1457 for (docstring const & xref : getXRefs(data)) {
1458 BiblioInfo::const_iterator const xrefit = find(xref);
1459 if (xrefit != end())
1460 xrefptrs.push_back(&(xrefit->second));
1464 ret = data.getLabel(xrefptrs, buf, ret, ci, key + 1 != ken, i == 1);
1467 support::truncateWithEllipsis(ret, max_size, true);
1473 bool BiblioInfo::isBibtex(docstring const & key) const
1476 split(key, key1, ',');
1477 BiblioInfo::const_iterator it = find(key1);
1480 return it->second.isBibTeX();
1484 BiblioInfo::CiteStringMap const BiblioInfo::getCiteStrings(
1485 vector<docstring> const & keys, vector<CitationStyle> const & styles,
1486 Buffer const & buf, CiteItem const & ci) const
1489 return vector<pair<docstring,docstring>>();
1492 CiteStringMap csm(styles.size());
1493 for (size_t i = 0; i != csm.size(); ++i) {
1494 style = styles[i].name;
1495 csm[i] = make_pair(from_ascii(style), getLabel(keys, buf, style, ci));
1502 void BiblioInfo::mergeBiblioInfo(BiblioInfo const & info)
1504 bimap_.insert(info.begin(), info.end());
1505 field_names_.insert(info.field_names_.begin(), info.field_names_.end());
1506 entry_types_.insert(info.entry_types_.begin(), info.entry_types_.end());
1512 // used in xhtml to sort a list of BibTeXInfo objects
1513 bool lSorter(BibTeXInfo const * lhs, BibTeXInfo const * rhs)
1515 docstring const lauth = lhs->getAuthorOrEditorList();
1516 docstring const rauth = rhs->getAuthorOrEditorList();
1517 docstring const lyear = lhs->getYear();
1518 docstring const ryear = rhs->getYear();
1519 docstring const ltitl = lhs->operator[]("title");
1520 docstring const rtitl = rhs->operator[]("title");
1521 return (lauth < rauth)
1522 || (lauth == rauth && lyear < ryear)
1523 || (lauth == rauth && lyear == ryear && ltitl < rtitl);
1529 void BiblioInfo::collectCitedEntries(Buffer const & buf)
1531 cited_entries_.clear();
1532 // We are going to collect all the citation keys used in the document,
1533 // getting them from the TOC.
1534 // FIXME We may want to collect these differently, in the first case,
1535 // so that we might have them in order of appearance.
1536 set<docstring> citekeys;
1537 Toc const & toc = *buf.tocBackend().toc("citation");
1538 for (auto const & t : toc) {
1539 if (t.str().empty())
1541 vector<docstring> const keys = getVectorFromString(t.str());
1542 citekeys.insert(keys.begin(), keys.end());
1544 if (citekeys.empty())
1547 // We have a set of the keys used in this document.
1548 // We will now convert it to a list of the BibTeXInfo objects used in
1550 vector<BibTeXInfo const *> bi;
1551 for (auto const & ck : citekeys) {
1552 BiblioInfo::const_iterator const bt = find(ck);
1553 if (bt == end() || !bt->second.isBibTeX())
1555 bi.push_back(&(bt->second));
1558 sort(bi.begin(), bi.end(), lSorter);
1560 // Now we can write the sorted keys
1561 // b is a BibTeXInfo const *
1562 for (auto const & b : bi)
1563 cited_entries_.push_back(b->key());
1567 void BiblioInfo::makeCitationLabels(Buffer const & buf)
1569 collectCitedEntries(buf);
1570 CiteEngineType const engine_type = buf.params().citeEngineType();
1571 bool const numbers = (engine_type & ENGINE_TYPE_NUMERICAL);
1575 // used to remember the last one we saw
1576 // we'll be comparing entries to see if we need to add
1577 // modifiers, like "1984a"
1578 map<docstring, BibTeXInfo>::iterator last = bimap_.end();
1580 // add letters to years
1581 for (auto const & ce : cited_entries_) {
1582 map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(ce);
1583 // this shouldn't happen, but...
1584 if (biit == bimap_.end())
1585 // ...fail gracefully, anyway.
1587 BibTeXInfo & entry = biit->second;
1589 docstring const num = convert<docstring>(++keynumber);
1590 entry.setCiteNumber(num);
1592 // The first test here is checking whether this is the first
1593 // time through the loop. If so, then we do not have anything
1594 // with which to compare.
1595 if (last != bimap_.end()
1596 && entry.getAuthorOrEditorList() == last->second.getAuthorOrEditorList()
1597 // we access the year via getYear() so as to get it from the xref,
1598 // if we need to do so
1599 && getYear(entry.key()) == getYear(last->second.key())) {
1600 if (modifier == 0) {
1601 // so the last one should have been 'a'
1602 last->second.setModifier('a');
1604 } else if (modifier == 'z')
1611 entry.setModifier(modifier);
1612 // remember the last one
1617 for (auto const & ce : cited_entries_) {
1618 map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(ce);
1619 // this shouldn't happen, but...
1620 if (biit == bimap_.end())
1621 // ...fail gracefully, anyway.
1623 BibTeXInfo & entry = biit->second;
1625 entry.label(entry.citeNumber());
1627 docstring const auth = entry.getAuthorOrEditorList(&buf, false);
1628 // we do it this way so as to access the xref, if necessary
1629 // note that this also gives us the modifier
1630 docstring const year = getYear(ce, buf, true);
1631 if (!auth.empty() && !year.empty())
1632 entry.label(auth + ' ' + year);
1634 entry.label(entry.key());
1640 //////////////////////////////////////////////////////////////////////
1644 //////////////////////////////////////////////////////////////////////
1647 CitationStyle citationStyleFromString(string const & command,
1648 BufferParams const & params)
1651 if (command.empty())
1654 string const alias = params.getCiteAlias(command);
1655 string cmd = alias.empty() ? command : alias;
1656 if (isUpperCase(command[0])) {
1657 cs.forceUpperCase = true;
1658 cmd[0] = lowercase(cmd[0]);
1661 size_t const n = command.size() - 1;
1662 if (command[n] == '*') {
1663 cs.hasStarredVersion = true;
1664 if (suffixIs(cmd, '*'))
1665 cmd = cmd.substr(0, cmd.size() - 1);
1673 string citationStyleToString(const CitationStyle & cs, bool const latex)
1675 string cmd = latex ? cs.cmd : cs.name;
1676 if (cs.forceUpperCase)
1677 cmd[0] = uppercase(cmd[0]);
1678 if (cs.hasStarredVersion)
1684 docstring authorsToDocBookAuthorGroup(docstring const & authorsString, XMLStream & xs, Buffer const & buf)
1686 // This function closely mimics getAuthorList, but produces DocBook instead of text.
1687 // It has been greatly simplified, as the complete list of authors is always produced. No separators are required,
1688 // as the output has a database-like shape.
1689 // constructName has also been merged within, as it becomes really simple and leads to no copy-paste.
1691 if (authorsString.empty()) {
1695 // Split the input list of authors into individual authors.
1696 vector<docstring> const authors = getAuthors(authorsString);
1698 // Retrieve the "et al." variation.
1699 string const etal = buf.params().documentClass().getCiteMacro(buf.params().citeEngineType(), "_etal");
1701 // Output the list of authors.
1702 xs << xml::StartTag("authorgroup");
1705 auto it = authors.cbegin();
1706 auto en = authors.cend();
1707 for (size_t i = 0; it != en; ++it, ++i) {
1708 xs << xml::StartTag("author");
1710 xs << xml::StartTag("personname");
1712 docstring name = *it;
1714 // All authors go in a <personname>. If more structure is known, use it; otherwise (just "et al."), print it as such.
1715 if (name == "others") {
1718 name_parts parts = nameParts(name);
1719 if (! parts.prefix.empty()) {
1720 xs << xml::StartTag("honorific");
1722 xs << xml::EndTag("honorific");
1725 if (! parts.prename.empty()) {
1726 xs << xml::StartTag("firstname");
1727 xs << parts.prename;
1728 xs << xml::EndTag("firstname");
1731 if (! parts.surname.empty()) {
1732 xs << xml::StartTag("surname");
1733 xs << parts.surname;
1734 xs << xml::EndTag("surname");
1737 if (! parts.suffix.empty()) {
1738 xs << xml::StartTag("othername", "role=\"suffix\"");
1740 xs << xml::EndTag("othername");
1745 xs << xml::EndTag("personname");
1747 xs << xml::EndTag("author");
1750 // Could add an affiliation after <personname>, but not stored in BibTeX.
1752 xs << xml::EndTag("authorgroup");