3 * This file is part of LyX, the document processor.
4 * Licence details can be found in the file COPYING.
6 * \author Angus Leeming
8 * \author Richard Kimberly Heck
10 * \author Jürgen Spitzmüller
12 * Full author contact details are available in file CREDITS.
17 #include "BiblioInfo.h"
20 #include "BufferParams.h"
24 #include "TextClass.h"
25 #include "TocBackend.h"
28 #include "support/convert.h"
29 #include "support/debug.h"
30 #include "support/docstream.h"
31 #include "support/FileName.h"
32 #include "support/gettext.h"
33 #include "support/lassert.h"
34 #include "support/lstrings.h"
35 #include "support/textutils.h"
42 using namespace lyx::support;
49 // Remove placeholders from names
50 docstring renormalize(docstring const & input)
52 docstring res = subst(input, from_ascii("$$space!"), from_ascii(" "));
53 return subst(res, from_ascii("$$comma!"), from_ascii(","));
57 // Split the surname into prefix ("von-part") and family name
58 pair<docstring, docstring> parseSurname(docstring const & sname)
60 // Split the surname into its tokens
61 vector<docstring> pieces = getVectorFromString(sname, from_ascii(" "));
62 if (pieces.size() < 2)
63 return make_pair(docstring(), sname);
65 // Now we look for pieces that begin with a lower case letter.
66 // All except for the very last token constitute the "von-part".
68 vector<docstring>::const_iterator it = pieces.begin();
69 vector<docstring>::const_iterator const en = pieces.end();
71 for (; it != en; ++it) {
74 // If this is the last piece, then what we now have is
75 // the family name, notwithstanding the casing.
78 char_type const c = (*it)[0];
79 // If the piece starts with a upper case char, we assume
80 // this is part of the surname.
83 // Nothing of the former, so add this piece to the prename
91 // Reconstruct the family name.
92 // Note that if we left the loop with because it + 1 == en,
93 // then this will still do the right thing, i.e., make surname
94 // just be the last piece.
97 for (; it != en; ++it) {
104 return make_pair(prefix, surname);
116 // gets the name parts (prename, surname, prefix, suffix) from an author-type string
117 name_parts nameParts(docstring const & iname)
123 // First we check for goupings (via {...}) and replace blanks and
124 // commas inside groups with temporary placeholders
127 docstring::const_iterator p = iname.begin();
128 while (p != iname.end()) {
129 // count grouping level
134 // generate string with probable placeholders
135 if (*p == ' ' && gl > 0)
136 name += from_ascii("$$space!");
137 else if (*p == ',' && gl > 0)
138 name += from_ascii("$$comma!");
144 // Now we look for a comma, and take the last name to be everything
145 // preceding the right-most one, so that we also get the name suffix
147 vector<docstring> pieces = getVectorFromString(name);
148 if (pieces.size() > 1) {
149 // Whether we have a name suffix or not, the prename is
151 res.prename = renormalize(pieces.back());
152 // The family name, conversely, is always the first item.
153 // However, it might contain a prefix (aka "von" part)
154 docstring const sname = pieces.front();
155 res.prefix = renormalize(parseSurname(sname).first);
156 res.surname = renormalize(parseSurname(sname).second);
157 // If we have three pieces (the maximum allowed by BibTeX),
158 // the second one is the name suffix.
159 if (pieces.size() > 2)
160 res.suffix = renormalize(pieces.at(1));
164 // OK, so now we want to look for the last name.
165 // Split on spaces, to get various tokens.
166 pieces = getVectorFromString(name, from_ascii(" "));
167 // No space: Only a family name given
168 if (pieces.size() < 2) {
169 res.surname = renormalize(pieces.back());
172 // If we get two pieces, assume "prename surname"
173 if (pieces.size() == 2) {
174 res.prename = renormalize(pieces.front());
175 res.surname = renormalize(pieces.back());
179 // More than 3 pieces: A name prefix (aka "von" part) might be included.
180 // We look for the first piece that begins with a lower case letter
181 // (which is the name prefix, if it is not the last token) or the last token.
183 vector<docstring>::const_iterator it = pieces.begin();
184 vector<docstring>::const_iterator const en = pieces.end();
186 for (; it != en; ++it) {
189 char_type const c = (*it)[0];
190 // If the piece starts with a lower case char, we assume
191 // this is the name prefix and thus prename is complete.
194 // Same if this is the last piece, which is always the surname.
197 // Nothing of the former, so add this piece to the prename
205 // Now reconstruct the family name and strip the prefix.
206 // Note that if we left the loop because it + 1 == en,
207 // then this will still do the right thing, i.e., make surname
208 // just be the last piece.
211 for (; it != en; ++it) {
218 res.prename = renormalize(prename);
219 res.prefix = renormalize(parseSurname(surname).first);
220 res.surname = renormalize(parseSurname(surname).second);
225 docstring constructName(docstring const & name, string const & scheme)
227 // re-constructs a name from name parts according
229 docstring const prename = nameParts(name).prename;
230 docstring const surname = nameParts(name).surname;
231 docstring const prefix = nameParts(name).prefix;
232 docstring const suffix = nameParts(name).suffix;
234 static regex const reg1("(.*)(\\{%prename%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)");
235 static regex const reg2("(.*)(\\{%suffix%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)");
236 static regex const reg3("(.*)(\\{%prefix%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)");
238 // Changing the first parameter of regex_match() may corrupt the
239 // second one. In this case we use the temporary string tmp.
240 if (regex_match(scheme, sub, reg1)) {
242 if (!prename.empty())
246 if (regex_match(res, sub, reg2)) {
247 string tmp = sub.str(1);
250 res = tmp + sub.str(5);
252 if (regex_match(res, sub, reg3)) {
253 string tmp = sub.str(1);
256 res = tmp + sub.str(5);
258 docstring result = from_ascii(res);
259 result = subst(result, from_ascii("%prename%"), prename);
260 result = subst(result, from_ascii("%surname%"), surname);
261 result = subst(result, from_ascii("%prefix%"), prefix);
262 result = subst(result, from_ascii("%suffix%"), suffix);
267 vector<docstring> const getAuthors(docstring const & author)
269 // We check for goupings (via {...}) and only consider " and "
270 // outside groups as author separator. This is to account
271 // for cases such as {{Barnes and Noble, Inc.}}, which
272 // need to be treated as one single family name.
273 // We use temporary placeholders in order to differentiate the
274 // diverse " and " cases.
276 // First, we temporarily replace all ampersands. It is rather unusual
277 // in author names, but can happen (consider cases such as "C \& A Corp.").
278 docstring iname = subst(author, from_ascii("&"), from_ascii("$$amp!"));
279 // Then, we temporarily make all " and " strings to ampersands in order
280 // to handle them later on a per-char level.
281 iname = subst(iname, from_ascii(" and "), from_ascii(" & "));
282 // Now we traverse through the string and replace the "&" by the proper
283 // output in- and outside groups
286 docstring::const_iterator p = iname.begin();
287 while (p != iname.end()) {
288 // count grouping level
293 // generate string with probable placeholders
296 // Inside groups, we output "and"
297 name += from_ascii("and");
299 // Outside groups, we output a separator
300 name += from_ascii("$$namesep!");
307 // re-insert the literal ampersands
308 name = subst(name, from_ascii("$$amp!"), from_ascii("&"));
310 // Now construct the actual vector
311 return getVectorFromString(name, from_ascii(" $$namesep! "));
315 bool multipleAuthors(docstring const & author)
317 return getAuthors(author).size() > 1;
321 // converts a string containing LaTeX commands into unicode
323 docstring convertLaTeXCommands(docstring const & str)
328 bool scanning_cmd = false;
329 bool scanning_math = false;
330 bool escaped = false; // used to catch \$, etc.
331 while (!val.empty()) {
332 char_type const ch = val[0];
334 // if we're scanning math, we output everything until we
335 // find an unescaped $, at which point we break out.
342 scanning_math = false;
348 // if we're scanning a command name, then we just
349 // discard characters until we hit something that
352 if (isAlphaASCII(ch)) {
357 // so we're done with this command.
358 // now we fall through and check this character.
359 scanning_cmd = false;
362 // was the last character a \? If so, then this is something like:
363 // \\ or \$, so we'll just output it. That's probably not always right...
365 // exception: output \, as THIN SPACE
367 ret.push_back(0x2009);
378 scanning_math = true;
382 // Change text mode accents in the form
383 // {\v a} to \v{a} (see #9340).
384 // FIXME: This is a sort of mini-tex2lyx.
385 // Use the real tex2lyx instead!
386 static regex const tma_reg("^\\{\\\\[bcCdfGhHkrtuUv]\\s\\w\\}");
387 if (regex_search(to_utf8(val), tma_reg)) {
389 val.replace(2, 1, from_ascii("{"));
393 // Apart from the above, we just ignore braces
394 if (ch == '{' || ch == '}') {
399 // we're going to check things that look like commands, so if
400 // this doesn't, just output it.
407 // ok, could be a command of some sort
408 // let's see if it corresponds to some unicode
409 // unicodesymbols has things in the form: \"{u},
410 // whereas we may see things like: \"u. So we'll
411 // look for that and change it, if necessary.
412 // FIXME: This is a sort of mini-tex2lyx.
413 // Use the real tex2lyx instead!
414 static regex const reg("^\\\\\\W\\w");
415 if (regex_search(to_utf8(val), reg)) {
416 val.insert(3, from_ascii("}"));
417 val.insert(2, from_ascii("{"));
421 docstring const cnvtd = Encodings::fromLaTeXCommand(val,
422 Encodings::TEXT_CMD, termination, rem);
423 if (!cnvtd.empty()) {
424 // it did, so we'll take that bit and proceed with what's left
429 // it's a command of some sort
438 // Escape '<' and '>' and remove richtext markers (e.g. {!this is richtext!}) from a string.
439 docstring processRichtext(docstring const & str, bool richtext)
444 bool scanning_rich = false;
445 while (!val.empty()) {
446 char_type const ch = val[0];
447 if (ch == '{' && val.size() > 1 && val[1] == '!') {
448 // beginning of rich text
449 scanning_rich = true;
453 if (scanning_rich && ch == '!' && val.size() > 1 && val[1] == '}') {
455 scanning_rich = false;
463 // we need to escape '<' and '>'
471 } else if (!scanning_rich /* && !richtext */)
473 // else the character is discarded, which will happen only if
474 // richtext == false and we are scanning rich text
483 //////////////////////////////////////////////////////////////////////
487 //////////////////////////////////////////////////////////////////////
489 BibTeXInfo::BibTeXInfo(docstring const & key, docstring const & type)
490 : is_bibtex_(true), bib_key_(key), num_bib_key_(0), entry_type_(type),
491 info_(), format_(), modifier_(0)
496 docstring const BibTeXInfo::getAuthorOrEditorList(Buffer const * buf,
497 bool full, bool forceshort) const
499 docstring author = operator[]("author");
501 author = operator[]("editor");
503 return getAuthorList(buf, author, full, forceshort);
507 docstring const BibTeXInfo::getAuthorList(Buffer const * buf,
508 docstring const & author, bool const full, bool const forceshort,
509 bool const allnames, bool const beginning) const
511 // Maxnames treshold depend on engine
512 size_t maxnames = buf ?
513 buf->params().documentClass().max_citenames() : 2;
516 docstring const opt = label();
521 docstring const remainder = trim(split(opt, authors, '('));
522 if (remainder.empty())
523 // in this case, we didn't find a "(",
524 // so we don't have author (year)
527 // Natbib syntax is "Jones et al.(1990)Jones, Baker, and Williams"
528 docstring const fullauthors = trim(rsplit(remainder, ')'));
529 if (!fullauthors.empty())
538 // OK, we've got some names. Let's format them.
539 // Try to split the author list
540 vector<docstring> const authors = getAuthors(author);
544 CiteEngineType const engine_type = buf ? buf->params().citeEngineType()
545 : ENGINE_TYPE_DEFAULT;
547 // These are defined in the styles
549 buf ? buf->params().documentClass().getCiteMacro(engine_type, "B_etal")
551 string const namesep =
552 buf ? buf->params().documentClass().getCiteMacro(engine_type, "B_namesep")
554 string const lastnamesep =
555 buf ? buf->params().documentClass().getCiteMacro(engine_type, "B_lastnamesep")
557 string const pairnamesep =
558 buf ? buf->params().documentClass().getCiteMacro(engine_type, "B_pairnamesep")
560 string firstnameform =
561 buf ? buf->params().documentClass().getCiteMacro(engine_type, "!firstnameform")
562 : "{%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}{%prename%[[, %prename%]]}";
564 firstnameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!firstbynameform")
565 : "%prename% {%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}";
566 string othernameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!othernameform")
567 : "{%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}{%prename%[[, %prename%]]}";
569 othernameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!otherbynameform")
570 : "%prename% {%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}";
571 string citenameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!citenameform")
572 : "{%prefix%[[%prefix% ]]}%surname%";
574 // Shorten the list (with et al.) if forceshort is set
575 // and the list can actually be shortened, else if maxcitenames
576 // is passed and full is not set.
577 bool shorten = forceshort && authors.size() > 1;
578 vector<docstring>::const_iterator it = authors.begin();
579 vector<docstring>::const_iterator en = authors.end();
580 for (size_t i = 0; it != en; ++it, ++i) {
581 if (i >= maxnames && !full) {
585 if (*it == "others") {
586 retval += buf ? buf->B_(etal) : from_ascii(etal);
589 if (i > 0 && i == authors.size() - 1) {
590 if (authors.size() == 2)
591 retval += buf ? buf->B_(pairnamesep) : from_ascii(pairnamesep);
593 retval += buf ? buf->B_(lastnamesep) : from_ascii(lastnamesep);
595 retval += buf ? buf->B_(namesep) : from_ascii(namesep);
597 retval += (i == 0) ? constructName(*it, firstnameform)
598 : constructName(*it, othernameform);
600 retval += constructName(*it, citenameform);
604 retval = constructName(authors[0], firstnameform) + (buf ? buf->B_(etal) : from_ascii(etal));
606 retval = constructName(authors[0], citenameform) + (buf ? buf->B_(etal) : from_ascii(etal));
609 return convertLaTeXCommands(retval);
613 docstring const BibTeXInfo::getYear() const
616 // first try legacy year field
617 docstring year = operator[]("year");
620 // now try biblatex's date field
621 year = operator[]("date");
622 // Format is [-]YYYY-MM-DD*/[-]YYYY-MM-DD*
623 // We only want the years.
624 static regex const yreg("[-]?([\\d]{4}).*");
625 static regex const ereg(".*/[-]?([\\d]{4}).*");
627 string const date = to_utf8(year);
628 if (!regex_match(date, sm, yreg))
629 // cannot parse year.
631 year = from_ascii(sm[1]);
632 // check for an endyear
633 if (regex_match(date, sm, ereg))
634 year += char_type(0x2013) + from_ascii(sm[1]);
638 docstring const opt = label();
643 docstring tmp = split(opt, authors, '(');
645 // we don't have author (year)
648 tmp = split(tmp, year, ')');
653 void BibTeXInfo::getLocators(docstring & doi, docstring & url, docstring & file) const
656 // get "doi" entry from citation record
657 doi = operator[]("doi");
658 if (!doi.empty() && !prefixIs(doi,from_ascii("http")))
659 doi = "https://doi.org/" + doi;
660 // get "url" entry from citation record
661 url = operator[]("url");
662 // get "file" entry from citation record
663 file = operator[]("file");
665 // Jabref case, field has a format:
666 // Description:Location:Filetype;Description:Location:Filetype...
667 // We will grab only first pdf
669 docstring ret, filedest, tmp;
670 ret = split(file, tmp, ':');
671 tmp = split(ret, filedest, ':');
672 //TODO howto deal with relative directories?
673 FileName f(to_utf8(filedest));
675 file = "file:///" + filedest;
678 // kbibtex case, format:
679 // file1.pdf;file2.pdf
680 // We will grab only first pdf
683 kfile = operator[]("localfile");
684 if (!kfile.empty()) {
685 docstring filedest, tmp;
686 tmp = split(kfile, filedest, ';');
687 //TODO howto deal with relative directories?
688 FileName f(to_utf8(filedest));
690 file = "file:///" + filedest;
696 // try biblatex specific fields, see its manual
697 // 3.13.7 "Electronic Publishing Informationl"
698 docstring eprinttype = operator[]("eprinttype");
699 docstring eprint = operator[]("eprint");
703 if (eprinttype == "arxiv")
704 url = "https://arxiv.org/abs/" + eprint;
705 if (eprinttype == "jstor")
706 url = "https://www.jstor.org/stable/" + eprint;
707 if (eprinttype == "pubmed")
708 url = "http://www.ncbi.nlm.nih.gov/pubmed/" + eprint;
709 if (eprinttype == "hdl")
710 url = "https://hdl.handle.net/" + eprint;
711 if (eprinttype == "googlebooks")
712 url = "http://books.google.com/books?id=" + eprint;
717 // Here can be handled the bibliography environment. All one could do
718 // here is let LyX scan the entry for URL or HRef insets.
724 docstring parseOptions(docstring const & format, string & optkey,
725 docstring & ifpart, docstring & elsepart);
727 // Calls parseOptions to deal with an embedded option, such as:
728 // {%number%[[, no.~%number%]]}
729 // which must appear at the start of format. ifelsepart gets the
730 // whole of the option, and we return what's left after the option.
731 // we return format if there is an error.
732 docstring parseEmbeddedOption(docstring const & format, docstring & ifelsepart)
734 LASSERT(format[0] == '{' && format[1] == '%', return format);
738 docstring const rest = parseOptions(format, optkey, ifpart, elsepart);
739 if (format == rest) { // parse error
740 LYXERR0("ERROR! Couldn't parse `" << format <<"'.");
743 LASSERT(rest.size() <= format.size(),
744 { ifelsepart = docstring(); return format; });
745 ifelsepart = format.substr(0, format.size() - rest.size());
750 // Gets a "clause" from a format string, where the clause is
751 // delimited by '[[' and ']]'. Returns what is left after the
752 // clause is removed, and returns format if there is an error.
753 docstring getClause(docstring const & format, docstring & clause)
755 docstring fmt = format;
758 // we'll remove characters from the front of fmt as we
760 while (!fmt.empty()) {
761 if (fmt[0] == ']' && fmt.size() > 1 && fmt[1] == ']') {
766 // check for an embedded option
767 if (fmt[0] == '{' && fmt.size() > 1 && fmt[1] == '%') {
769 docstring const rest = parseEmbeddedOption(fmt, part);
771 LYXERR0("ERROR! Couldn't parse embedded option in `" << format <<"'.");
776 } else { // it's just a normal character
785 // parse an options string, which must appear at the start of the
786 // format parameter. puts the parsed bits in optkey, ifpart, and
787 // elsepart and returns what's left after the option is removed.
788 // if there's an error, it returns format itself.
789 docstring parseOptions(docstring const & format, string & optkey,
790 docstring & ifpart, docstring & elsepart)
792 LASSERT(format[0] == '{' && format[1] == '%', return format);
794 docstring fmt = format.substr(2);
795 size_t pos = fmt.find('%'); // end of key
796 if (pos == string::npos) {
797 LYXERR0("Error parsing `" << format <<"'. Can't find end of key.");
800 optkey = to_utf8(fmt.substr(0, pos));
801 fmt = fmt.substr(pos + 1);
802 // [[format]] should be next
803 if (fmt[0] != '[' || fmt[1] != '[') {
804 LYXERR0("Error parsing `" << format <<"'. Can't find '[[' after key.");
808 docstring curfmt = fmt;
809 fmt = getClause(curfmt, ifpart);
811 LYXERR0("Error parsing `" << format <<"'. Couldn't get if clause.");
815 if (fmt[0] == '}') // we're done, no else clause
816 return fmt.substr(1);
818 // else part should follow
819 if (fmt[0] != '[' || fmt[1] != '[') {
820 LYXERR0("Error parsing `" << format <<"'. Can't find else clause.");
825 fmt = getClause(curfmt, elsepart);
827 if (fmt == curfmt || fmt[0] != '}') {
828 LYXERR0("Error parsing `" << format <<"'. Can't find end of option.");
831 return fmt.substr(1);
838 Bug #9131 revealed an oddity in how we are generating citation information
839 when more than one key is given. We end up building a longer and longer format
840 string as we go, which we then have to re-parse, over and over and over again,
841 rather than generating the information for the individual keys and then putting
842 all of that together. We do that to deal with the way separators work, from what
843 I can tell, but it still feels like a hack. Fixing this would require quite a
844 bit of work, however.
846 docstring BibTeXInfo::expandFormat(docstring const & format,
847 BibTeXInfoList const & xrefs, int & counter, Buffer const & buf,
848 CiteItem const & ci, bool next, bool second) const
850 // incorrect use of macros could put us in an infinite loop
851 static int const max_passes = 5000;
852 // the use of overly large keys can lead to performance problems, due
853 // to eventual attempts to convert LaTeX macros to unicode. See bug
854 // #8944. By default, the size is limited to 128 (in CiteItem), but
855 // for specific purposes (such as XHTML export), it needs to be enlarged
856 // This is perhaps not the best solution, but it will have to do for now.
857 size_t const max_keysize = ci.max_key_size;
858 odocstringstream ret; // return value
860 bool scanning_key = false;
861 bool scanning_rich = false;
863 CiteEngineType const engine_type = buf.params().citeEngineType();
864 docstring fmt = format;
865 // we'll remove characters from the front of fmt as we
867 while (!fmt.empty()) {
868 if (counter > max_passes) {
869 LYXERR0("Recursion limit reached while parsing `"
874 char_type thischar = fmt[0];
875 if (thischar == '%') {
876 // beginning or end of key
879 scanning_key = false;
880 // so we replace the key with its value, which may be empty
884 buf.params().documentClass().getCiteMacro(engine_type, key);
885 fmt = from_utf8(val) + fmt.substr(1);
888 } else if (prefixIs(key, "B_")) {
889 // a translatable bit (to the Buffer language)
891 buf.params().documentClass().getCiteMacro(engine_type, key);
892 docstring const trans =
893 translateIfPossible(from_utf8(val), buf.params().language->code());
895 } else if (key[0] == '_') {
896 // a translatable bit (to the GUI language)
898 buf.params().documentClass().getCiteMacro(engine_type, key);
899 docstring const trans =
900 translateIfPossible(from_utf8(val));
903 docstring const val =
904 getValueForKey(key, buf, ci, xrefs, max_keysize);
906 ret << from_ascii("{!<span class=\"bib-" + key + "\">!}");
909 ret << from_ascii("{!</span>!}");
917 else if (thischar == '{') {
918 // beginning of option?
920 LYXERR0("ERROR: Found `{' when scanning key in `" << format << "'.");
923 if (fmt.size() > 1) {
925 // it is the beginning of an optional format
929 docstring const newfmt =
930 parseOptions(fmt, optkey, ifpart, elsepart);
931 if (newfmt == fmt) // parse error
934 docstring const val =
935 getValueForKey(optkey, buf, ci, xrefs);
936 if (optkey == "next" && next)
937 ret << ifpart; // without expansion
938 else if (optkey == "second" && second) {
940 ret << expandFormat(ifpart, xrefs, newcounter, buf,
942 } else if (!val.empty()) {
944 ret << expandFormat(ifpart, xrefs, newcounter, buf,
946 } else if (!elsepart.empty()) {
948 ret << expandFormat(elsepart, xrefs, newcounter, buf,
951 // fmt will have been shortened for us already
955 // beginning of rich text
956 scanning_rich = true;
958 ret << from_ascii("{!");
962 // we are here if '{' was not followed by % or !.
963 // So it's just a character.
966 else if (scanning_rich && thischar == '!'
967 && fmt.size() > 1 && fmt[1] == '}') {
969 scanning_rich = false;
971 ret << from_ascii("!}");
974 else if (scanning_key)
975 key += char(thischar);
979 } catch (EncodingException & /* e */) {
980 LYXERR0("Uncodable character '" << docstring(1, thischar) << " in citation label!");
986 LYXERR0("Never found end of key in `" << format << "'!");
990 LYXERR0("Never found end of rich text in `" << format << "'!");
997 docstring const & BibTeXInfo::getInfo(BibTeXInfoList const & xrefs,
998 Buffer const & buf, CiteItem const & ci, docstring const & format_in) const
1000 bool const richtext = ci.richtext;
1002 CiteEngineType const engine_type = buf.params().citeEngineType();
1003 DocumentClass const & dc = buf.params().documentClass();
1004 docstring const & format = format_in.empty()?
1005 from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_)))
1008 if (format != format_) {
1009 // clear caches since format changed
1011 info_richtext_.clear();
1015 if (!richtext && !info_.empty()) {
1016 info_ = convertLaTeXCommands(processRichtext(info_, false));
1019 if (richtext && !info_richtext_.empty())
1020 return info_richtext_;
1023 BibTeXInfo::const_iterator it = find(from_ascii("ref"));
1029 info_ = expandFormat(format, xrefs, counter, buf,
1032 if (info_.empty()) {
1033 // this probably shouldn't happen
1038 info_richtext_ = convertLaTeXCommands(processRichtext(info_, true));
1039 return info_richtext_;
1042 info_ = convertLaTeXCommands(processRichtext(info_, false));
1047 docstring const BibTeXInfo::getLabel(BibTeXInfoList const & xrefs,
1048 Buffer const & buf, docstring const & format,
1049 CiteItem const & ci, bool next, bool second) const
1054 loclabel = expandFormat(format, xrefs, counter, buf, ci, next, second);
1056 if (!loclabel.empty() && !next) {
1057 loclabel = processRichtext(loclabel, ci.richtext);
1058 loclabel = convertLaTeXCommands(loclabel);
1065 docstring const & BibTeXInfo::operator[](docstring const & field) const
1067 BibTeXInfo::const_iterator it = find(field);
1070 static docstring const empty_value = docstring();
1075 docstring const & BibTeXInfo::operator[](string const & field) const
1077 return operator[](from_ascii(field));
1081 docstring BibTeXInfo::getValueForKey(string const & oldkey, Buffer const & buf,
1082 CiteItem const & ci, BibTeXInfoList const & xrefs, size_t maxsize) const
1084 // anything less is pointless
1085 LASSERT(maxsize >= 16, maxsize = 16);
1086 string key = oldkey;
1087 bool cleanit = false;
1088 if (prefixIs(oldkey, "clean:")) {
1089 key = oldkey.substr(6);
1093 docstring ret = operator[](key);
1094 if (ret.empty() && !xrefs.empty()) {
1095 // xr is a (reference to a) BibTeXInfo const *
1096 for (auto const & xr : xrefs) {
1097 if (xr && !(*xr)[key].empty()) {
1104 // some special keys
1105 // FIXME: dialog, textbefore and textafter have nothing to do with this
1106 if (key == "dialog" && ci.context == CiteItem::Dialog)
1107 ret = from_ascii("x"); // any non-empty string will do
1108 else if (key == "export" && ci.context == CiteItem::Export)
1109 ret = from_ascii("x"); // any non-empty string will do
1110 else if (key == "ifstar" && ci.Starred)
1111 ret = from_ascii("x"); // any non-empty string will do
1112 else if (key == "ifqualified" && ci.isQualified)
1113 ret = from_ascii("x"); // any non-empty string will do
1114 else if (key == "entrytype")
1116 else if (prefixIs(key, "ifentrytype:")
1117 && from_ascii(key.substr(12)) == entry_type_)
1118 ret = from_ascii("x"); // any non-empty string will do
1119 else if (key == "key")
1121 else if (key == "label")
1123 else if (key == "modifier" && modifier_ != 0)
1125 else if (key == "numericallabel")
1127 else if (prefixIs(key, "ifmultiple:")) {
1128 // Return whether we have multiple authors
1129 docstring const kind = operator[](from_ascii(key.substr(11)));
1130 if (multipleAuthors(kind))
1131 ret = from_ascii("x"); // any non-empty string will do
1133 else if (prefixIs(key, "abbrvnames:")) {
1134 // Special key to provide abbreviated name list,
1135 // with respect to maxcitenames. Suitable for Bibliography
1137 docstring const kind = operator[](from_ascii(key.substr(11)));
1138 ret = getAuthorList(&buf, kind, false, false, true);
1139 if (ci.forceUpperCase && isLowerCase(ret[0]))
1140 ret[0] = uppercase(ret[0]);
1141 } else if (prefixIs(key, "fullnames:")) {
1142 // Return a full name list. Suitable for Bibliography
1144 docstring const kind = operator[](from_ascii(key.substr(10)));
1145 ret = getAuthorList(&buf, kind, true, false, true);
1146 if (ci.forceUpperCase && isLowerCase(ret[0]))
1147 ret[0] = uppercase(ret[0]);
1148 } else if (prefixIs(key, "forceabbrvnames:")) {
1149 // Special key to provide abbreviated name lists,
1150 // irrespective of maxcitenames. Suitable for Bibliography
1152 docstring const kind = operator[](from_ascii(key.substr(15)));
1153 ret = getAuthorList(&buf, kind, false, true, true);
1154 if (ci.forceUpperCase && isLowerCase(ret[0]))
1155 ret[0] = uppercase(ret[0]);
1156 } else if (prefixIs(key, "abbrvbynames:")) {
1157 // Special key to provide abbreviated name list,
1158 // with respect to maxcitenames. Suitable for further names inside a
1159 // bibliography item // (such as "ed. by ...")
1160 docstring const kind = operator[](from_ascii(key.substr(11)));
1161 ret = getAuthorList(&buf, kind, false, false, true, false);
1162 if (ci.forceUpperCase && isLowerCase(ret[0]))
1163 ret[0] = uppercase(ret[0]);
1164 } else if (prefixIs(key, "fullbynames:")) {
1165 // Return a full name list. Suitable for further names inside a
1166 // bibliography item // (such as "ed. by ...")
1167 docstring const kind = operator[](from_ascii(key.substr(10)));
1168 ret = getAuthorList(&buf, kind, true, false, true, false);
1169 if (ci.forceUpperCase && isLowerCase(ret[0]))
1170 ret[0] = uppercase(ret[0]);
1171 } else if (prefixIs(key, "forceabbrvbynames:")) {
1172 // Special key to provide abbreviated name lists,
1173 // irrespective of maxcitenames. Suitable for further names inside a
1174 // bibliography item // (such as "ed. by ...")
1175 docstring const kind = operator[](from_ascii(key.substr(15)));
1176 ret = getAuthorList(&buf, kind, false, true, true, false);
1177 if (ci.forceUpperCase && isLowerCase(ret[0]))
1178 ret[0] = uppercase(ret[0]);
1179 } else if (key == "abbrvciteauthor") {
1180 // Special key to provide abbreviated author or
1181 // editor names (suitable for citation labels),
1182 // with respect to maxcitenames.
1183 ret = getAuthorOrEditorList(&buf, false, false);
1184 if (ci.forceUpperCase && isLowerCase(ret[0]))
1185 ret[0] = uppercase(ret[0]);
1186 } else if (key == "fullciteauthor") {
1187 // Return a full author or editor list (for citation labels)
1188 ret = getAuthorOrEditorList(&buf, true, false);
1189 if (ci.forceUpperCase && isLowerCase(ret[0]))
1190 ret[0] = uppercase(ret[0]);
1191 } else if (key == "forceabbrvciteauthor") {
1192 // Special key to provide abbreviated author or
1193 // editor names (suitable for citation labels),
1194 // irrespective of maxcitenames.
1195 ret = getAuthorOrEditorList(&buf, false, true);
1196 if (ci.forceUpperCase && isLowerCase(ret[0]))
1197 ret[0] = uppercase(ret[0]);
1198 } else if (key == "bibentry") {
1199 // Special key to provide the full bibliography entry: see getInfo()
1200 CiteEngineType const engine_type = buf.params().citeEngineType();
1201 DocumentClass const & dc = buf.params().documentClass();
1202 docstring const & format =
1203 from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_), false));
1205 ret = expandFormat(format, xrefs, counter, buf, ci, false, false);
1206 } else if (key == "textbefore")
1207 ret = ci.textBefore;
1208 else if (key == "textafter")
1210 else if (key == "curpretext") {
1211 vector<pair<docstring, docstring>> pres = ci.getPretexts();
1212 vector<pair<docstring, docstring>>::iterator it = pres.begin();
1214 for (; it != pres.end() ; ++it) {
1215 if ((*it).first == bib_key_ && numkey == num_bib_key_) {
1220 if ((*it).first == bib_key_)
1223 } else if (key == "curposttext") {
1224 vector<pair<docstring, docstring>> posts = ci.getPosttexts();
1225 vector<pair<docstring, docstring>>::iterator it = posts.begin();
1227 for (; it != posts.end() ; ++it) {
1228 if ((*it).first == bib_key_ && numkey == num_bib_key_) {
1233 if ((*it).first == bib_key_)
1236 } else if (key == "year")
1241 ret = xml::cleanAttr(ret);
1243 // make sure it is not too big
1244 support::truncateWithEllipsis(ret, maxsize);
1249 //////////////////////////////////////////////////////////////////////
1253 //////////////////////////////////////////////////////////////////////
1257 // A functor for use with sort, leading to case insensitive sorting
1258 bool compareNoCase(const docstring & a, const docstring & b) {
1259 return compare_no_case(a, b) < 0;
1265 vector<docstring> const BiblioInfo::getXRefs(BibTeXInfo const & data, bool const nested) const
1267 vector<docstring> result;
1268 if (!data.isBibTeX())
1270 // Legacy crossref field. This is not nestable.
1271 if (!nested && !data["crossref"].empty()) {
1272 docstring const xrefkey = data["crossref"];
1273 result.push_back(xrefkey);
1274 // However, check for nested xdatas
1275 BiblioInfo::const_iterator it = find(xrefkey);
1277 BibTeXInfo const & xref = it->second;
1278 vector<docstring> const nxdata = getXRefs(xref, true);
1279 if (!nxdata.empty())
1280 result.insert(result.end(), nxdata.begin(), nxdata.end());
1283 // Biblatex's xdata field. Infinitely nestable.
1284 // XData field can consist of a comma-separated list of keys
1285 vector<docstring> const xdatakeys = getVectorFromString(data["xdata"]);
1286 if (!xdatakeys.empty()) {
1287 for (auto const & xdatakey : xdatakeys) {
1288 result.push_back(xdatakey);
1289 BiblioInfo::const_iterator it = find(xdatakey);
1291 BibTeXInfo const & xdata = it->second;
1292 vector<docstring> const nxdata = getXRefs(xdata, true);
1293 if (!nxdata.empty())
1294 result.insert(result.end(), nxdata.begin(), nxdata.end());
1302 vector<docstring> const BiblioInfo::getKeys() const
1304 vector<docstring> bibkeys;
1305 for (auto const & bi : *this)
1306 bibkeys.push_back(bi.first);
1307 sort(bibkeys.begin(), bibkeys.end(), &compareNoCase);
1312 vector<docstring> const BiblioInfo::getFields() const
1314 vector<docstring> bibfields;
1315 for (auto const & fn : field_names_)
1316 bibfields.push_back(fn);
1317 sort(bibfields.begin(), bibfields.end());
1322 vector<docstring> const BiblioInfo::getEntries() const
1324 vector<docstring> bibentries;
1325 for (auto const & et : entry_types_)
1326 bibentries.push_back(et);
1327 sort(bibentries.begin(), bibentries.end());
1332 docstring const BiblioInfo::getAuthorOrEditorList(docstring const & key, Buffer const & buf) const
1334 BiblioInfo::const_iterator it = find(key);
1337 BibTeXInfo const & data = it->second;
1338 return data.getAuthorOrEditorList(&buf, false);
1342 docstring const BiblioInfo::getCiteNumber(docstring const & key) const
1344 BiblioInfo::const_iterator it = find(key);
1347 BibTeXInfo const & data = it->second;
1348 return data.citeNumber();
1351 void BiblioInfo::getLocators(docstring const & key, docstring & doi, docstring & url, docstring & file) const
1353 BiblioInfo::const_iterator it = find(key);
1356 BibTeXInfo const & data = it->second;
1357 data.getLocators(doi,url,file);
1361 docstring const BiblioInfo::getYear(docstring const & key, bool use_modifier) const
1363 BiblioInfo::const_iterator it = find(key);
1366 BibTeXInfo const & data = it->second;
1367 docstring year = data.getYear();
1369 // let's try the crossrefs
1370 vector<docstring> const xrefs = getXRefs(data);
1374 for (docstring const & xref : xrefs) {
1375 BiblioInfo::const_iterator const xrefit = find(xref);
1376 if (xrefit == end())
1378 BibTeXInfo const & xref_data = xrefit->second;
1379 year = xref_data.getYear();
1385 if (use_modifier && data.modifier() != 0)
1386 year += data.modifier();
1391 docstring const BiblioInfo::getYear(docstring const & key, Buffer const & buf, bool use_modifier) const
1393 docstring const year = getYear(key, use_modifier);
1395 return buf.B_("No year");
1400 docstring const BiblioInfo::getInfo(docstring const & key,
1401 Buffer const & buf, CiteItem const & ci, docstring const & format) const
1403 BiblioInfo::const_iterator it = find(key);
1405 return docstring(_("Bibliography entry not found!"));
1406 BibTeXInfo const & data = it->second;
1407 BibTeXInfoList xrefptrs;
1408 for (docstring const & xref : getXRefs(data)) {
1409 BiblioInfo::const_iterator const xrefit = find(xref);
1410 if (xrefit != end())
1411 xrefptrs.push_back(&(xrefit->second));
1413 return data.getInfo(xrefptrs, buf, ci, format);
1417 docstring const BiblioInfo::getLabel(vector<docstring> keys,
1418 Buffer const & buf, string const & style, CiteItem const & ci) const
1420 size_t max_size = ci.max_size;
1421 // shorter makes no sense
1422 LASSERT(max_size >= 16, max_size = 16);
1424 // we can't display more than 10 of these, anyway
1425 // but since we truncate in the middle,
1426 // we need to split into two halfs.
1427 bool const too_many_keys = keys.size() > 10;
1428 vector<docstring> lkeys;
1429 if (too_many_keys) {
1430 lkeys.insert(lkeys.end(), keys.end() - 5, keys.end());
1432 keys.insert(keys.end(), lkeys.begin(), lkeys.end());
1435 CiteEngineType const engine_type = buf.params().citeEngineType();
1436 DocumentClass const & dc = buf.params().documentClass();
1437 docstring const & format = from_utf8(dc.getCiteFormat(engine_type, style, false, "cite"));
1438 docstring ret = format;
1439 vector<docstring>::const_iterator key = keys.begin();
1440 vector<docstring>::const_iterator ken = keys.end();
1441 vector<docstring> handled_keys;
1442 for (int i = 0; key != ken; ++key, ++i) {
1443 handled_keys.push_back(*key);
1445 for (auto const & k : handled_keys) {
1449 BiblioInfo::const_iterator it = find(*key);
1450 BibTeXInfo empty_data;
1451 empty_data.key(*key);
1452 BibTeXInfo & data = empty_data;
1453 vector<BibTeXInfo const *> xrefptrs;
1456 for (docstring const & xref : getXRefs(data)) {
1457 BiblioInfo::const_iterator const xrefit = find(xref);
1458 if (xrefit != end())
1459 xrefptrs.push_back(&(xrefit->second));
1463 ret = data.getLabel(xrefptrs, buf, ret, ci, key + 1 != ken, i == 1);
1466 support::truncateWithEllipsis(ret, max_size, true);
1472 bool BiblioInfo::isBibtex(docstring const & key) const
1475 split(key, key1, ',');
1476 BiblioInfo::const_iterator it = find(key1);
1479 return it->second.isBibTeX();
1483 BiblioInfo::CiteStringMap const BiblioInfo::getCiteStrings(
1484 vector<docstring> const & keys, vector<CitationStyle> const & styles,
1485 Buffer const & buf, CiteItem const & ci) const
1488 return vector<pair<docstring,docstring>>();
1491 CiteStringMap csm(styles.size());
1492 for (size_t i = 0; i != csm.size(); ++i) {
1493 style = styles[i].name;
1494 csm[i] = make_pair(from_ascii(style), getLabel(keys, buf, style, ci));
1501 void BiblioInfo::mergeBiblioInfo(BiblioInfo const & info)
1503 bimap_.insert(info.begin(), info.end());
1504 field_names_.insert(info.field_names_.begin(), info.field_names_.end());
1505 entry_types_.insert(info.entry_types_.begin(), info.entry_types_.end());
1511 // used in xhtml to sort a list of BibTeXInfo objects
1512 bool lSorter(BibTeXInfo const * lhs, BibTeXInfo const * rhs)
1514 docstring const lauth = lhs->getAuthorOrEditorList();
1515 docstring const rauth = rhs->getAuthorOrEditorList();
1516 docstring const lyear = lhs->getYear();
1517 docstring const ryear = rhs->getYear();
1518 docstring const ltitl = lhs->operator[]("title");
1519 docstring const rtitl = rhs->operator[]("title");
1520 return (lauth < rauth)
1521 || (lauth == rauth && lyear < ryear)
1522 || (lauth == rauth && lyear == ryear && ltitl < rtitl);
1528 void BiblioInfo::collectCitedEntries(Buffer const & buf)
1530 cited_entries_.clear();
1531 // We are going to collect all the citation keys used in the document,
1532 // getting them from the TOC.
1533 // FIXME We may want to collect these differently, in the first case,
1534 // so that we might have them in order of appearance.
1535 set<docstring> citekeys;
1536 Toc const & toc = *buf.tocBackend().toc("citation");
1537 for (auto const & t : toc) {
1538 if (t.str().empty())
1540 vector<docstring> const keys = getVectorFromString(t.str());
1541 citekeys.insert(keys.begin(), keys.end());
1543 if (citekeys.empty())
1546 // We have a set of the keys used in this document.
1547 // We will now convert it to a list of the BibTeXInfo objects used in
1549 vector<BibTeXInfo const *> bi;
1550 for (auto const & ck : citekeys) {
1551 BiblioInfo::const_iterator const bt = find(ck);
1552 if (bt == end() || !bt->second.isBibTeX())
1554 bi.push_back(&(bt->second));
1557 sort(bi.begin(), bi.end(), lSorter);
1559 // Now we can write the sorted keys
1560 // b is a BibTeXInfo const *
1561 for (auto const & b : bi)
1562 cited_entries_.push_back(b->key());
1566 void BiblioInfo::makeCitationLabels(Buffer const & buf)
1568 collectCitedEntries(buf);
1569 CiteEngineType const engine_type = buf.params().citeEngineType();
1570 bool const numbers = (engine_type & ENGINE_TYPE_NUMERICAL);
1574 // used to remember the last one we saw
1575 // we'll be comparing entries to see if we need to add
1576 // modifiers, like "1984a"
1577 map<docstring, BibTeXInfo>::iterator last = bimap_.end();
1579 // add letters to years
1580 for (auto const & ce : cited_entries_) {
1581 map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(ce);
1582 // this shouldn't happen, but...
1583 if (biit == bimap_.end())
1584 // ...fail gracefully, anyway.
1586 BibTeXInfo & entry = biit->second;
1588 docstring const num = convert<docstring>(++keynumber);
1589 entry.setCiteNumber(num);
1591 // The first test here is checking whether this is the first
1592 // time through the loop. If so, then we do not have anything
1593 // with which to compare.
1594 if (last != bimap_.end()
1595 && entry.getAuthorOrEditorList() == last->second.getAuthorOrEditorList()
1596 // we access the year via getYear() so as to get it from the xref,
1597 // if we need to do so
1598 && getYear(entry.key()) == getYear(last->second.key())) {
1599 if (modifier == 0) {
1600 // so the last one should have been 'a'
1601 last->second.setModifier('a');
1603 } else if (modifier == 'z')
1610 entry.setModifier(modifier);
1611 // remember the last one
1616 for (auto const & ce : cited_entries_) {
1617 map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(ce);
1618 // this shouldn't happen, but...
1619 if (biit == bimap_.end())
1620 // ...fail gracefully, anyway.
1622 BibTeXInfo & entry = biit->second;
1624 entry.label(entry.citeNumber());
1626 docstring const auth = entry.getAuthorOrEditorList(&buf, false);
1627 // we do it this way so as to access the xref, if necessary
1628 // note that this also gives us the modifier
1629 docstring const year = getYear(ce, buf, true);
1630 if (!auth.empty() && !year.empty())
1631 entry.label(auth + ' ' + year);
1633 entry.label(entry.key());
1639 //////////////////////////////////////////////////////////////////////
1643 //////////////////////////////////////////////////////////////////////
1646 CitationStyle citationStyleFromString(string const & command,
1647 BufferParams const & params)
1650 if (command.empty())
1653 string const alias = params.getCiteAlias(command);
1654 string cmd = alias.empty() ? command : alias;
1655 if (isUpperCase(command[0])) {
1656 cs.forceUpperCase = true;
1657 cmd[0] = lowercase(cmd[0]);
1660 size_t const n = command.size() - 1;
1661 if (command[n] == '*') {
1662 cs.hasStarredVersion = true;
1663 if (suffixIs(cmd, '*'))
1664 cmd = cmd.substr(0, cmd.size() - 1);
1672 string citationStyleToString(const CitationStyle & cs, bool const latex)
1674 string cmd = latex ? cs.cmd : cs.name;
1675 if (cs.forceUpperCase)
1676 cmd[0] = uppercase(cmd[0]);
1677 if (cs.hasStarredVersion)
1683 docstring authorsToDocBookAuthorGroup(docstring const & authorsString, XMLStream & xs, Buffer const & buf)
1685 // This function closely mimics getAuthorList, but produces DocBook instead of text.
1686 // It has been greatly simplified, as the complete list of authors is always produced. No separators are required,
1687 // as the output has a database-like shape.
1688 // constructName has also been merged within, as it becomes really simple and leads to no copy-paste.
1690 if (authorsString.empty()) {
1694 // Split the input list of authors into individual authors.
1695 vector<docstring> const authors = getAuthors(authorsString);
1697 // Retrieve the "et al." variation.
1698 string const etal = buf.params().documentClass().getCiteMacro(buf.params().citeEngineType(), "_etal");
1700 // Output the list of authors.
1701 xs << xml::StartTag("authorgroup");
1704 auto it = authors.cbegin();
1705 auto en = authors.cend();
1706 for (size_t i = 0; it != en; ++it, ++i) {
1707 xs << xml::StartTag("author");
1709 xs << xml::StartTag("personname");
1711 docstring name = *it;
1713 // All authors go in a <personname>. If more structure is known, use it; otherwise (just "et al."), print it as such.
1714 if (name == "others") {
1717 name_parts parts = nameParts(name);
1718 if (! parts.prefix.empty()) {
1719 xs << xml::StartTag("honorific");
1721 xs << xml::EndTag("honorific");
1724 if (! parts.prename.empty()) {
1725 xs << xml::StartTag("firstname");
1726 xs << parts.prename;
1727 xs << xml::EndTag("firstname");
1730 if (! parts.surname.empty()) {
1731 xs << xml::StartTag("surname");
1732 xs << parts.surname;
1733 xs << xml::EndTag("surname");
1736 if (! parts.suffix.empty()) {
1737 xs << xml::StartTag("othername", "role=\"suffix\"");
1739 xs << xml::EndTag("othername");
1744 xs << xml::EndTag("personname");
1746 xs << xml::EndTag("author");
1749 // Could add an affiliation after <personname>, but not stored in BibTeX.
1751 xs << xml::EndTag("authorgroup");