3 * This file is part of LyX, the document processor.
4 * Licence details can be found in the file COPYING.
6 * \author Angus Leeming
10 * \author Jürgen Spitzmüller
12 * Full author contact details are available in file CREDITS.
17 #include "BiblioInfo.h"
19 #include "BufferParams.h"
20 #include "buffer_funcs.h"
23 #include "InsetIterator.h"
25 #include "output_xhtml.h"
26 #include "Paragraph.h"
27 #include "TextClass.h"
28 #include "TocBackend.h"
30 #include "support/convert.h"
31 #include "support/debug.h"
32 #include "support/docstream.h"
33 #include "support/gettext.h"
34 #include "support/lassert.h"
35 #include "support/lstrings.h"
36 #include "support/regex.h"
37 #include "support/textutils.h"
43 using namespace lyx::support;
50 // Remove placeholders from names
51 docstring renormalize(docstring const & input)
53 docstring res = subst(input, from_ascii("$$space!"), from_ascii(" "));
54 return subst(res, from_ascii("$$comma!"), from_ascii(","));
58 // Split the surname into prefix ("von-part") and family name
59 pair<docstring, docstring> parseSurname(docstring const & sname)
61 // Split the surname into its tokens
62 vector<docstring> pieces = getVectorFromString(sname, from_ascii(" "));
63 if (pieces.size() < 2)
64 return make_pair(docstring(), sname);
66 // Now we look for pieces that begin with a lower case letter.
67 // All except for the very last token constitute the "von-part".
69 vector<docstring>::const_iterator it = pieces.begin();
70 vector<docstring>::const_iterator const en = pieces.end();
72 for (; it != en; ++it) {
75 // If this is the last piece, then what we now have is
76 // the family name, notwithstanding the casing.
79 char_type const c = (*it)[0];
80 // If the piece starts with a upper case char, we assume
81 // this is part of the surname.
84 // Nothing of the former, so add this piece to the prename
92 // Reconstruct the family name.
93 // Note that if we left the loop with because it + 1 == en,
94 // then this will still do the right thing, i.e., make surname
95 // just be the last piece.
98 for (; it != en; ++it) {
105 return make_pair(prefix, surname);
117 // gets the name parts (prename, surname, prefix, suffix) from an author-type string
118 name_parts nameParts(docstring const & iname)
124 // First we check for goupings (via {...}) and replace blanks and
125 // commas inside groups with temporary placeholders
128 docstring::const_iterator p = iname.begin();
129 while (p != iname.end()) {
130 // count grouping level
135 // generate string with probable placeholders
136 if (*p == ' ' && gl > 0)
137 name += from_ascii("$$space!");
138 else if (*p == ',' && gl > 0)
139 name += from_ascii("$$comma!");
145 // Now we look for a comma, and take the last name to be everything
146 // preceding the right-most one, so that we also get the name suffix
148 vector<docstring> pieces = getVectorFromString(name);
149 if (pieces.size() > 1) {
150 // Whether we have a name suffix or not, the prename is
152 res.prename = renormalize(pieces.back());
153 // The family name, conversely, is always the first item.
154 // However, it might contain a prefix (aka "von" part)
155 docstring const sname = pieces.front();
156 res.prefix = renormalize(parseSurname(sname).first);
157 res.surname = renormalize(parseSurname(sname).second);
158 // If we have three pieces (the maximum allowed by BibTeX),
159 // the second one is the name suffix.
160 if (pieces.size() > 2)
161 res.suffix = renormalize(pieces.at(1));
165 // OK, so now we want to look for the last name.
166 // Split on spaces, to get various tokens.
167 pieces = getVectorFromString(name, from_ascii(" "));
168 // No space: Only a family name given
169 if (pieces.size() < 2) {
170 res.surname = renormalize(pieces.back());
173 // If we get two pieces, assume "prename surname"
174 if (pieces.size() == 2) {
175 res.prename = renormalize(pieces.front());
176 res.surname = renormalize(pieces.back());
180 // More than 3 pieces: A name prefix (aka "von" part) might be included.
181 // We look for the first piece that begins with a lower case letter
182 // (which is the name prefix, if it is not the last token) or the last token.
184 vector<docstring>::const_iterator it = pieces.begin();
185 vector<docstring>::const_iterator const en = pieces.end();
187 for (; it != en; ++it) {
190 char_type const c = (*it)[0];
191 // If the piece starts with a lower case char, we assume
192 // this is the name prefix and thus prename is complete.
195 // Same if this is the last piece, which is always the surname.
198 // Nothing of the former, so add this piece to the prename
206 // Now reconstruct the family name and strip the prefix.
207 // Note that if we left the loop because it + 1 == en,
208 // then this will still do the right thing, i.e., make surname
209 // just be the last piece.
212 for (; it != en; ++it) {
219 res.prename = renormalize(prename);
220 res.prefix = renormalize(parseSurname(surname).first);
221 res.surname = renormalize(parseSurname(surname).second);
226 docstring constructName(docstring const & name, string const & scheme)
228 // re-constructs a name from name parts according
230 docstring const prename = nameParts(name).prename;
231 docstring const surname = nameParts(name).surname;
232 docstring const prefix = nameParts(name).prefix;
233 docstring const suffix = nameParts(name).suffix;
235 static regex const reg1("(.*)(\\{%prename%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)");
236 static regex const reg2("(.*)(\\{%suffix%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)");
237 static regex const reg3("(.*)(\\{%prefix%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)");
239 // Changing the first parameter of regex_match() may corrupt the
240 // second one. In this case we use the temporary string tmp.
241 if (regex_match(scheme, sub, reg1)) {
243 if (!prename.empty())
247 if (regex_match(res, sub, reg2)) {
248 string tmp = sub.str(1);
251 res = tmp + sub.str(5);
253 if (regex_match(res, sub, reg3)) {
254 string tmp = sub.str(1);
257 res = tmp + sub.str(5);
259 docstring result = from_ascii(res);
260 result = subst(result, from_ascii("%prename%"), prename);
261 result = subst(result, from_ascii("%surname%"), surname);
262 result = subst(result, from_ascii("%prefix%"), prefix);
263 result = subst(result, from_ascii("%suffix%"), suffix);
268 vector<docstring> const getAuthors(docstring const & author)
270 // We check for goupings (via {...}) and only consider " and "
271 // outside groups as author separator. This is to account
272 // for cases such as {{Barnes and Noble, Inc.}}, which
273 // need to be treated as one single family name.
274 // We use temporary placeholders in order to differentiate the
275 // diverse " and " cases.
277 // First, we temporarily replace all ampersands. It is rather unusual
278 // in author names, but can happen (consider cases such as "C \& A Corp.").
279 docstring iname = subst(author, from_ascii("&"), from_ascii("$$amp!"));
280 // Then, we temporarily make all " and " strings to ampersands in order
281 // to handle them later on a per-char level.
282 iname = subst(iname, from_ascii(" and "), from_ascii(" & "));
283 // Now we traverse through the string and replace the "&" by the proper
284 // output in- and outside groups
287 docstring::const_iterator p = iname.begin();
288 while (p != iname.end()) {
289 // count grouping level
294 // generate string with probable placeholders
297 // Inside groups, we output "and"
298 name += from_ascii("and");
300 // Outside groups, we output a separator
301 name += from_ascii("$$namesep!");
308 // re-insert the literal ampersands
309 name = subst(name, from_ascii("$$amp!"), from_ascii("&"));
311 // Now construct the actual vector
312 return getVectorFromString(name, from_ascii(" $$namesep! "));
316 bool multipleAuthors(docstring const & author)
318 return getAuthors(author).size() > 1;
322 // converts a string containing LaTeX commands into unicode
324 docstring convertLaTeXCommands(docstring const & str)
329 bool scanning_cmd = false;
330 bool scanning_math = false;
331 bool escaped = false; // used to catch \$, etc.
332 while (!val.empty()) {
333 char_type const ch = val[0];
335 // if we're scanning math, we output everything until we
336 // find an unescaped $, at which point we break out.
343 scanning_math = false;
349 // if we're scanning a command name, then we just
350 // discard characters until we hit something that
353 if (isAlphaASCII(ch)) {
358 // so we're done with this command.
359 // now we fall through and check this character.
360 scanning_cmd = false;
363 // was the last character a \? If so, then this is something like:
364 // \\ or \$, so we'll just output it. That's probably not always right...
366 // exception: output \, as THIN SPACE
368 ret.push_back(0x2009);
379 scanning_math = true;
383 // Change text mode accents in the form
384 // {\v a} to \v{a} (see #9340).
385 // FIXME: This is a sort of mini-tex2lyx.
386 // Use the real tex2lyx instead!
387 static lyx::regex const tma_reg("^\\{\\\\[bcCdfGhHkrtuUv]\\s\\w\\}");
388 if (lyx::regex_search(to_utf8(val), tma_reg)) {
390 val.replace(2, 1, from_ascii("{"));
394 // Apart from the above, we just ignore braces
395 if (ch == '{' || ch == '}') {
400 // we're going to check things that look like commands, so if
401 // this doesn't, just output it.
408 // ok, could be a command of some sort
409 // let's see if it corresponds to some unicode
410 // unicodesymbols has things in the form: \"{u},
411 // whereas we may see things like: \"u. So we'll
412 // look for that and change it, if necessary.
413 // FIXME: This is a sort of mini-tex2lyx.
414 // Use the real tex2lyx instead!
415 static lyx::regex const reg("^\\\\\\W\\w");
416 if (lyx::regex_search(to_utf8(val), reg)) {
417 val.insert(3, from_ascii("}"));
418 val.insert(2, from_ascii("{"));
422 docstring const cnvtd = Encodings::fromLaTeXCommand(val,
423 Encodings::TEXT_CMD, termination, rem);
424 if (!cnvtd.empty()) {
425 // it did, so we'll take that bit and proceed with what's left
430 // it's a command of some sort
439 // Escape '<' and '>' and remove richtext markers (e.g. {!this is richtext!}) from a string.
440 docstring processRichtext(docstring const & str, bool richtext)
445 bool scanning_rich = false;
446 while (!val.empty()) {
447 char_type const ch = val[0];
448 if (ch == '{' && val.size() > 1 && val[1] == '!') {
449 // beginning of rich text
450 scanning_rich = true;
454 if (scanning_rich && ch == '!' && val.size() > 1 && val[1] == '}') {
456 scanning_rich = false;
464 // we need to escape '<' and '>'
472 } else if (!scanning_rich /* && !richtext */)
474 // else the character is discarded, which will happen only if
475 // richtext == false and we are scanning rich text
484 //////////////////////////////////////////////////////////////////////
488 //////////////////////////////////////////////////////////////////////
490 BibTeXInfo::BibTeXInfo(docstring const & key, docstring const & type)
491 : is_bibtex_(true), bib_key_(key), num_bib_key_(0), entry_type_(type), info_(),
497 docstring const BibTeXInfo::getAuthorOrEditorList(Buffer const * buf,
498 bool full, bool forceshort) const
500 docstring author = operator[]("author");
502 author = operator[]("editor");
504 return getAuthorList(buf, author, full, forceshort);
508 docstring const BibTeXInfo::getAuthorList(Buffer const * buf,
509 docstring const & author, bool const full, bool const forceshort,
510 bool const allnames, bool const beginning) const
512 // Maxnames treshold depend on engine
513 size_t maxnames = buf ?
514 buf->params().documentClass().max_citenames() : 2;
517 docstring const opt = label();
522 docstring const remainder = trim(split(opt, authors, '('));
523 if (remainder.empty())
524 // in this case, we didn't find a "(",
525 // so we don't have author (year)
528 // Natbib syntax is "Jones et al.(1990)Jones, Baker, and Williams"
529 docstring const fullauthors = trim(rsplit(remainder, ')'));
530 if (!fullauthors.empty())
539 // OK, we've got some names. Let's format them.
540 // Try to split the author list
541 vector<docstring> const authors = getAuthors(author);
545 CiteEngineType const engine_type = buf ? buf->params().citeEngineType()
546 : ENGINE_TYPE_DEFAULT;
548 // These are defined in the styles
550 buf ? buf->params().documentClass().getCiteMacro(engine_type, "B_etal")
552 string const namesep =
553 buf ? buf->params().documentClass().getCiteMacro(engine_type, "B_namesep")
555 string const lastnamesep =
556 buf ? buf->params().documentClass().getCiteMacro(engine_type, "B_lastnamesep")
558 string const pairnamesep =
559 buf ? buf->params().documentClass().getCiteMacro(engine_type, "B_pairnamesep")
561 string firstnameform =
562 buf ? buf->params().documentClass().getCiteMacro(engine_type, "!firstnameform")
563 : "{%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}{%prename%[[, %prename%]]}";
565 firstnameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!firstbynameform")
566 : "%prename% {%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}";
567 string othernameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!othernameform")
568 : "{%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}{%prename%[[, %prename%]]}";
570 othernameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!otherbynameform")
571 : "%prename% {%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}";
572 string citenameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!citenameform")
573 : "{%prefix%[[%prefix% ]]}%surname%";
575 // Shorten the list (with et al.) if forceshort is set
576 // and the list can actually be shortened, else if maxcitenames
577 // is passed and full is not set.
578 bool shorten = forceshort && authors.size() > 1;
579 vector<docstring>::const_iterator it = authors.begin();
580 vector<docstring>::const_iterator en = authors.end();
581 for (size_t i = 0; it != en; ++it, ++i) {
582 if (i >= maxnames && !full) {
586 if (*it == "others") {
587 retval += buf ? buf->B_(etal) : from_ascii(etal);
590 if (i > 0 && i == authors.size() - 1) {
591 if (authors.size() == 2)
592 retval += buf ? buf->B_(pairnamesep) : from_ascii(pairnamesep);
594 retval += buf ? buf->B_(lastnamesep) : from_ascii(lastnamesep);
596 retval += buf ? buf->B_(namesep) : from_ascii(namesep);
598 retval += (i == 0) ? constructName(*it, firstnameform)
599 : constructName(*it, othernameform);
601 retval += constructName(*it, citenameform);
605 retval = constructName(authors[0], firstnameform) + (buf ? buf->B_(etal) : from_ascii(etal));
607 retval = constructName(authors[0], citenameform) + (buf ? buf->B_(etal) : from_ascii(etal));
610 return convertLaTeXCommands(retval);
614 docstring const BibTeXInfo::getYear() const
617 // first try legacy year field
618 docstring year = operator[]("year");
621 // now try biblatex's date field
622 year = operator[]("date");
623 // Format is [-]YYYY-MM-DD*/[-]YYYY-MM-DD*
624 // We only want the years.
625 static regex const yreg("[-]?([\\d]{4}).*");
626 static regex const ereg(".*/[-]?([\\d]{4}).*");
628 string const date = to_utf8(year);
629 if (!regex_match(date, sm, yreg))
630 // cannot parse year.
632 year = from_ascii(sm[1]);
633 // check for an endyear
634 if (regex_match(date, sm, ereg))
635 year += char_type(0x2013) + from_ascii(sm[1]);
639 docstring const opt = label();
644 docstring tmp = split(opt, authors, '(');
646 // we don't have author (year)
649 tmp = split(tmp, year, ')');
656 docstring parseOptions(docstring const & format, string & optkey,
657 docstring & ifpart, docstring & elsepart);
659 // Calls parseOptions to deal with an embedded option, such as:
660 // {%number%[[, no.~%number%]]}
661 // which must appear at the start of format. ifelsepart gets the
662 // whole of the option, and we return what's left after the option.
663 // we return format if there is an error.
664 docstring parseEmbeddedOption(docstring const & format, docstring & ifelsepart)
666 LASSERT(format[0] == '{' && format[1] == '%', return format);
670 docstring const rest = parseOptions(format, optkey, ifpart, elsepart);
671 if (format == rest) { // parse error
672 LYXERR0("ERROR! Couldn't parse `" << format <<"'.");
675 LASSERT(rest.size() <= format.size(),
676 { ifelsepart = docstring(); return format; });
677 ifelsepart = format.substr(0, format.size() - rest.size());
682 // Gets a "clause" from a format string, where the clause is
683 // delimited by '[[' and ']]'. Returns what is left after the
684 // clause is removed, and returns format if there is an error.
685 docstring getClause(docstring const & format, docstring & clause)
687 docstring fmt = format;
690 // we'll remove characters from the front of fmt as we
692 while (!fmt.empty()) {
693 if (fmt[0] == ']' && fmt.size() > 1 && fmt[1] == ']') {
698 // check for an embedded option
699 if (fmt[0] == '{' && fmt.size() > 1 && fmt[1] == '%') {
701 docstring const rest = parseEmbeddedOption(fmt, part);
703 LYXERR0("ERROR! Couldn't parse embedded option in `" << format <<"'.");
708 } else { // it's just a normal character
717 // parse an options string, which must appear at the start of the
718 // format parameter. puts the parsed bits in optkey, ifpart, and
719 // elsepart and returns what's left after the option is removed.
720 // if there's an error, it returns format itself.
721 docstring parseOptions(docstring const & format, string & optkey,
722 docstring & ifpart, docstring & elsepart)
724 LASSERT(format[0] == '{' && format[1] == '%', return format);
726 docstring fmt = format.substr(2);
727 size_t pos = fmt.find('%'); // end of key
728 if (pos == string::npos) {
729 LYXERR0("Error parsing `" << format <<"'. Can't find end of key.");
732 optkey = to_utf8(fmt.substr(0, pos));
733 fmt = fmt.substr(pos + 1);
734 // [[format]] should be next
735 if (fmt[0] != '[' || fmt[1] != '[') {
736 LYXERR0("Error parsing `" << format <<"'. Can't find '[[' after key.");
740 docstring curfmt = fmt;
741 fmt = getClause(curfmt, ifpart);
743 LYXERR0("Error parsing `" << format <<"'. Couldn't get if clause.");
747 if (fmt[0] == '}') // we're done, no else clause
748 return fmt.substr(1);
750 // else part should follow
751 if (fmt[0] != '[' || fmt[1] != '[') {
752 LYXERR0("Error parsing `" << format <<"'. Can't find else clause.");
757 fmt = getClause(curfmt, elsepart);
759 if (fmt == curfmt || fmt[0] != '}') {
760 LYXERR0("Error parsing `" << format <<"'. Can't find end of option.");
763 return fmt.substr(1);
770 Bug #9131 revealed an oddity in how we are generating citation information
771 when more than one key is given. We end up building a longer and longer format
772 string as we go, which we then have to re-parse, over and over and over again,
773 rather than generating the information for the individual keys and then putting
774 all of that together. We do that to deal with the way separators work, from what
775 I can tell, but it still feels like a hack. Fixing this would require quite a
776 bit of work, however.
778 docstring BibTeXInfo::expandFormat(docstring const & format,
779 BibTeXInfoList const & xrefs, int & counter, Buffer const & buf,
780 CiteItem const & ci, bool next, bool second) const
782 // incorrect use of macros could put us in an infinite loop
783 static int const max_passes = 5000;
784 // the use of overly large keys can lead to performance problems, due
785 // to eventual attempts to convert LaTeX macros to unicode. See bug
786 // #8944. By default, the size is limited to 128 (in CiteItem), but
787 // for specific purposes (such as XHTML export), it needs to be enlarged
788 // This is perhaps not the best solution, but it will have to do for now.
789 size_t const max_keysize = ci.max_key_size;
790 odocstringstream ret; // return value
792 bool scanning_key = false;
793 bool scanning_rich = false;
795 CiteEngineType const engine_type = buf.params().citeEngineType();
796 docstring fmt = format;
797 // we'll remove characters from the front of fmt as we
799 while (!fmt.empty()) {
800 if (counter > max_passes) {
801 LYXERR0("Recursion limit reached while parsing `"
806 char_type thischar = fmt[0];
807 if (thischar == '%') {
808 // beginning or end of key
811 scanning_key = false;
812 // so we replace the key with its value, which may be empty
816 buf.params().documentClass().getCiteMacro(engine_type, key);
817 fmt = from_utf8(val) + fmt.substr(1);
820 } else if (prefixIs(key, "B_")) {
821 // a translatable bit (to the Buffer language)
823 buf.params().documentClass().getCiteMacro(engine_type, key);
824 docstring const trans =
825 translateIfPossible(from_utf8(val), buf.params().language->code());
827 } else if (key[0] == '_') {
828 // a translatable bit (to the GUI language)
830 buf.params().documentClass().getCiteMacro(engine_type, key);
831 docstring const trans =
832 translateIfPossible(from_utf8(val));
835 docstring const val =
836 getValueForKey(key, buf, ci, xrefs, max_keysize);
838 ret << from_ascii("{!<span class=\"bib-" + key + "\">!}");
841 ret << from_ascii("{!</span>!}");
849 else if (thischar == '{') {
850 // beginning of option?
852 LYXERR0("ERROR: Found `{' when scanning key in `" << format << "'.");
855 if (fmt.size() > 1) {
857 // it is the beginning of an optional format
861 docstring const newfmt =
862 parseOptions(fmt, optkey, ifpart, elsepart);
863 if (newfmt == fmt) // parse error
866 docstring const val =
867 getValueForKey(optkey, buf, ci, xrefs);
868 if (optkey == "next" && next)
869 ret << ifpart; // without expansion
870 else if (optkey == "second" && second) {
872 ret << expandFormat(ifpart, xrefs, newcounter, buf,
874 } else if (!val.empty()) {
876 ret << expandFormat(ifpart, xrefs, newcounter, buf,
878 } else if (!elsepart.empty()) {
880 ret << expandFormat(elsepart, xrefs, newcounter, buf,
883 // fmt will have been shortened for us already
887 // beginning of rich text
888 scanning_rich = true;
890 ret << from_ascii("{!");
894 // we are here if '{' was not followed by % or !.
895 // So it's just a character.
898 else if (scanning_rich && thischar == '!'
899 && fmt.size() > 1 && fmt[1] == '}') {
901 scanning_rich = false;
903 ret << from_ascii("!}");
906 else if (scanning_key)
907 key += char(thischar);
911 } catch (EncodingException & /* e */) {
912 LYXERR0("Uncodable character '" << docstring(1, thischar) << " in citation label!");
918 LYXERR0("Never found end of key in `" << format << "'!");
922 LYXERR0("Never found end of rich text in `" << format << "'!");
929 docstring const & BibTeXInfo::getInfo(BibTeXInfoList const & xrefs,
930 Buffer const & buf, CiteItem const & ci) const
932 bool const richtext = ci.richtext;
934 if (!richtext && !info_.empty())
936 if (richtext && !info_richtext_.empty())
937 return info_richtext_;
940 BibTeXInfo::const_iterator it = find(from_ascii("ref"));
945 CiteEngineType const engine_type = buf.params().citeEngineType();
946 DocumentClass const & dc = buf.params().documentClass();
947 docstring const & format =
948 from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_)));
950 info_ = expandFormat(format, xrefs, counter, buf,
954 // this probably shouldn't happen
959 info_richtext_ = convertLaTeXCommands(processRichtext(info_, true));
960 return info_richtext_;
963 info_ = convertLaTeXCommands(processRichtext(info_, false));
968 docstring const BibTeXInfo::getLabel(BibTeXInfoList const xrefs,
969 Buffer const & buf, docstring const & format,
970 CiteItem const & ci, bool next, bool second) const
975 loclabel = expandFormat(format, xrefs, counter, buf, ci, next, second);
977 if (!loclabel.empty() && !next) {
978 loclabel = processRichtext(loclabel, ci.richtext);
979 loclabel = convertLaTeXCommands(loclabel);
986 docstring const & BibTeXInfo::operator[](docstring const & field) const
988 BibTeXInfo::const_iterator it = find(field);
991 static docstring const empty_value = docstring();
996 docstring const & BibTeXInfo::operator[](string const & field) const
998 return operator[](from_ascii(field));
1002 docstring BibTeXInfo::getValueForKey(string const & oldkey, Buffer const & buf,
1003 CiteItem const & ci, BibTeXInfoList const & xrefs, size_t maxsize) const
1005 // anything less is pointless
1006 LASSERT(maxsize >= 16, maxsize = 16);
1007 string key = oldkey;
1008 bool cleanit = false;
1009 if (prefixIs(oldkey, "clean:")) {
1010 key = oldkey.substr(6);
1014 docstring ret = operator[](key);
1015 if (ret.empty() && !xrefs.empty()) {
1016 vector<BibTeXInfo const *>::const_iterator it = xrefs.begin();
1017 vector<BibTeXInfo const *>::const_iterator en = xrefs.end();
1018 for (; it != en; ++it) {
1019 if (*it && !(**it)[key].empty()) {
1026 // some special keys
1027 // FIXME: dialog, textbefore and textafter have nothing to do with this
1028 if (key == "dialog" && ci.context == CiteItem::Dialog)
1029 ret = from_ascii("x"); // any non-empty string will do
1030 else if (key == "export" && ci.context == CiteItem::Export)
1031 ret = from_ascii("x"); // any non-empty string will do
1032 else if (key == "ifstar" && ci.Starred)
1033 ret = from_ascii("x"); // any non-empty string will do
1034 else if (key == "ifqualified" && ci.isQualified)
1035 ret = from_ascii("x"); // any non-empty string will do
1036 else if (key == "entrytype")
1038 else if (prefixIs(key, "ifentrytype:")
1039 && from_ascii(key.substr(12)) == entry_type_)
1040 ret = from_ascii("x"); // any non-empty string will do
1041 else if (key == "key")
1043 else if (key == "label")
1045 else if (key == "modifier" && modifier_ != 0)
1047 else if (key == "numericallabel")
1049 else if (prefixIs(key, "ifmultiple:")) {
1050 // Return whether we have multiple authors
1051 docstring const kind = operator[](from_ascii(key.substr(11)));
1052 if (multipleAuthors(kind))
1053 ret = from_ascii("x"); // any non-empty string will do
1055 else if (prefixIs(key, "abbrvnames:")) {
1056 // Special key to provide abbreviated name list,
1057 // with respect to maxcitenames. Suitable for Bibliography
1059 docstring const kind = operator[](from_ascii(key.substr(11)));
1060 ret = getAuthorList(&buf, kind, false, false, true);
1061 if (ci.forceUpperCase && isLowerCase(ret[0]))
1062 ret[0] = uppercase(ret[0]);
1063 } else if (prefixIs(key, "fullnames:")) {
1064 // Return a full name list. Suitable for Bibliography
1066 docstring const kind = operator[](from_ascii(key.substr(10)));
1067 ret = getAuthorList(&buf, kind, true, false, true);
1068 if (ci.forceUpperCase && isLowerCase(ret[0]))
1069 ret[0] = uppercase(ret[0]);
1070 } else if (prefixIs(key, "forceabbrvnames:")) {
1071 // Special key to provide abbreviated name lists,
1072 // irrespective of maxcitenames. Suitable for Bibliography
1074 docstring const kind = operator[](from_ascii(key.substr(15)));
1075 ret = getAuthorList(&buf, kind, false, true, true);
1076 if (ci.forceUpperCase && isLowerCase(ret[0]))
1077 ret[0] = uppercase(ret[0]);
1078 } else if (prefixIs(key, "abbrvbynames:")) {
1079 // Special key to provide abbreviated name list,
1080 // with respect to maxcitenames. Suitable for further names inside a
1081 // bibliography item // (such as "ed. by ...")
1082 docstring const kind = operator[](from_ascii(key.substr(11)));
1083 ret = getAuthorList(&buf, kind, false, false, true, false);
1084 if (ci.forceUpperCase && isLowerCase(ret[0]))
1085 ret[0] = uppercase(ret[0]);
1086 } else if (prefixIs(key, "fullbynames:")) {
1087 // Return a full name list. Suitable for further names inside a
1088 // bibliography item // (such as "ed. by ...")
1089 docstring const kind = operator[](from_ascii(key.substr(10)));
1090 ret = getAuthorList(&buf, kind, true, false, true, false);
1091 if (ci.forceUpperCase && isLowerCase(ret[0]))
1092 ret[0] = uppercase(ret[0]);
1093 } else if (prefixIs(key, "forceabbrvbynames:")) {
1094 // Special key to provide abbreviated name lists,
1095 // irrespective of maxcitenames. Suitable for further names inside a
1096 // bibliography item // (such as "ed. by ...")
1097 docstring const kind = operator[](from_ascii(key.substr(15)));
1098 ret = getAuthorList(&buf, kind, false, true, true, false);
1099 if (ci.forceUpperCase && isLowerCase(ret[0]))
1100 ret[0] = uppercase(ret[0]);
1101 } else if (key == "abbrvciteauthor") {
1102 // Special key to provide abbreviated author or
1103 // editor names (suitable for citation labels),
1104 // with respect to maxcitenames.
1105 ret = getAuthorOrEditorList(&buf, false, false);
1106 if (ci.forceUpperCase && isLowerCase(ret[0]))
1107 ret[0] = uppercase(ret[0]);
1108 } else if (key == "fullciteauthor") {
1109 // Return a full author or editor list (for citation labels)
1110 ret = getAuthorOrEditorList(&buf, true, false);
1111 if (ci.forceUpperCase && isLowerCase(ret[0]))
1112 ret[0] = uppercase(ret[0]);
1113 } else if (key == "forceabbrvciteauthor") {
1114 // Special key to provide abbreviated author or
1115 // editor names (suitable for citation labels),
1116 // irrespective of maxcitenames.
1117 ret = getAuthorOrEditorList(&buf, false, true);
1118 if (ci.forceUpperCase && isLowerCase(ret[0]))
1119 ret[0] = uppercase(ret[0]);
1120 } else if (key == "bibentry") {
1121 // Special key to provide the full bibliography entry: see getInfo()
1122 CiteEngineType const engine_type = buf.params().citeEngineType();
1123 DocumentClass const & dc = buf.params().documentClass();
1124 docstring const & format =
1125 from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_), false));
1127 ret = expandFormat(format, xrefs, counter, buf, ci, false, false);
1128 } else if (key == "textbefore")
1129 ret = ci.textBefore;
1130 else if (key == "textafter")
1132 else if (key == "curpretext") {
1133 vector<pair<docstring, docstring>> pres = ci.getPretexts();
1134 vector<pair<docstring, docstring>>::iterator it = pres.begin();
1136 for (; it != pres.end() ; ++it) {
1137 if ((*it).first == bib_key_ && numkey == num_bib_key_) {
1142 if ((*it).first == bib_key_)
1145 } else if (key == "curposttext") {
1146 vector<pair<docstring, docstring>> posts = ci.getPosttexts();
1147 vector<pair<docstring, docstring>>::iterator it = posts.begin();
1149 for (; it != posts.end() ; ++it) {
1150 if ((*it).first == bib_key_ && numkey == num_bib_key_) {
1155 if ((*it).first == bib_key_)
1158 } else if (key == "year")
1163 ret = html::cleanAttr(ret);
1165 // make sure it is not too big
1166 support::truncateWithEllipsis(ret, maxsize);
1171 //////////////////////////////////////////////////////////////////////
1175 //////////////////////////////////////////////////////////////////////
1179 // A functor for use with sort, leading to case insensitive sorting
1180 class compareNoCase: public binary_function<docstring, docstring, bool>
1183 bool operator()(docstring const & s1, docstring const & s2) const {
1184 return compare_no_case(s1, s2) < 0;
1191 vector<docstring> const BiblioInfo::getXRefs(BibTeXInfo const & data, bool const nested) const
1193 vector<docstring> result;
1194 if (!data.isBibTeX())
1196 // Legacy crossref field. This is not nestable.
1197 if (!nested && !data["crossref"].empty()) {
1198 docstring const xrefkey = data["crossref"];
1199 result.push_back(xrefkey);
1200 // However, check for nested xdatas
1201 BiblioInfo::const_iterator it = find(xrefkey);
1203 BibTeXInfo const & xref = it->second;
1204 vector<docstring> const nxdata = getXRefs(xref, true);
1205 if (!nxdata.empty())
1206 result.insert(result.end(), nxdata.begin(), nxdata.end());
1209 // Biblatex's xdata field. Infinitely nestable.
1210 // XData field can consist of a comma-separated list of keys
1211 vector<docstring> const xdatakeys = getVectorFromString(data["xdata"]);
1212 if (!xdatakeys.empty()) {
1213 vector<docstring>::const_iterator xit = xdatakeys.begin();
1214 vector<docstring>::const_iterator xen = xdatakeys.end();
1215 for (; xit != xen; ++xit) {
1216 docstring const xdatakey = *xit;
1217 result.push_back(xdatakey);
1218 BiblioInfo::const_iterator it = find(xdatakey);
1220 BibTeXInfo const & xdata = it->second;
1221 vector<docstring> const nxdata = getXRefs(xdata, true);
1222 if (!nxdata.empty())
1223 result.insert(result.end(), nxdata.begin(), nxdata.end());
1231 vector<docstring> const BiblioInfo::getKeys() const
1233 vector<docstring> bibkeys;
1234 BiblioInfo::const_iterator it = begin();
1235 for (; it != end(); ++it)
1236 bibkeys.push_back(it->first);
1237 sort(bibkeys.begin(), bibkeys.end(), compareNoCase());
1242 vector<docstring> const BiblioInfo::getFields() const
1244 vector<docstring> bibfields;
1245 set<docstring>::const_iterator it = field_names_.begin();
1246 set<docstring>::const_iterator end = field_names_.end();
1247 for (; it != end; ++it)
1248 bibfields.push_back(*it);
1249 sort(bibfields.begin(), bibfields.end());
1254 vector<docstring> const BiblioInfo::getEntries() const
1256 vector<docstring> bibentries;
1257 set<docstring>::const_iterator it = entry_types_.begin();
1258 set<docstring>::const_iterator end = entry_types_.end();
1259 for (; it != end; ++it)
1260 bibentries.push_back(*it);
1261 sort(bibentries.begin(), bibentries.end());
1266 docstring const BiblioInfo::getAuthorOrEditorList(docstring const & key, Buffer const & buf) const
1268 BiblioInfo::const_iterator it = find(key);
1271 BibTeXInfo const & data = it->second;
1272 return data.getAuthorOrEditorList(&buf, false);
1276 docstring const BiblioInfo::getCiteNumber(docstring const & key) const
1278 BiblioInfo::const_iterator it = find(key);
1281 BibTeXInfo const & data = it->second;
1282 return data.citeNumber();
1286 docstring const BiblioInfo::getYear(docstring const & key, bool use_modifier) const
1288 BiblioInfo::const_iterator it = find(key);
1291 BibTeXInfo const & data = it->second;
1292 docstring year = data.getYear();
1294 // let's try the crossrefs
1295 vector<docstring> const xrefs = getXRefs(data);
1299 for (docstring const & xref : xrefs) {
1300 BiblioInfo::const_iterator const xrefit = find(xref);
1301 if (xrefit == end())
1303 BibTeXInfo const & xref_data = xrefit->second;
1304 year = xref_data.getYear();
1310 if (use_modifier && data.modifier() != 0)
1311 year += data.modifier();
1316 docstring const BiblioInfo::getYear(docstring const & key, Buffer const & buf, bool use_modifier) const
1318 docstring const year = getYear(key, use_modifier);
1320 return buf.B_("No year");
1325 docstring const BiblioInfo::getInfo(docstring const & key,
1326 Buffer const & buf, CiteItem const & ci) const
1328 BiblioInfo::const_iterator it = find(key);
1330 return docstring(_("Bibliography entry not found!"));
1331 BibTeXInfo const & data = it->second;
1332 BibTeXInfoList xrefptrs;
1333 for (docstring const & xref : getXRefs(data)) {
1334 BiblioInfo::const_iterator const xrefit = find(xref);
1335 if (xrefit != end())
1336 xrefptrs.push_back(&(xrefit->second));
1338 return data.getInfo(xrefptrs, buf, ci);
1342 docstring const BiblioInfo::getLabel(vector<docstring> keys,
1343 Buffer const & buf, string const & style, CiteItem const & ci) const
1345 size_t max_size = ci.max_size;
1346 // shorter makes no sense
1347 LASSERT(max_size >= 16, max_size = 16);
1349 // we can't display more than 10 of these, anyway
1350 // but since we truncate in the middle,
1351 // we need to split into two halfs.
1352 bool const too_many_keys = keys.size() > 10;
1353 vector<docstring> lkeys;
1354 if (too_many_keys) {
1355 lkeys.insert(lkeys.end(), keys.end() - 5, keys.end());
1357 keys.insert(keys.end(), lkeys.begin(), lkeys.end());
1360 CiteEngineType const engine_type = buf.params().citeEngineType();
1361 DocumentClass const & dc = buf.params().documentClass();
1362 docstring const & format = from_utf8(dc.getCiteFormat(engine_type, style, false, "cite"));
1363 docstring ret = format;
1364 vector<docstring>::const_iterator key = keys.begin();
1365 vector<docstring>::const_iterator ken = keys.end();
1366 vector<docstring> handled_keys;
1367 for (int i = 0; key != ken; ++key, ++i) {
1368 handled_keys.push_back(*key);
1370 for (auto const k : handled_keys) {
1374 BiblioInfo::const_iterator it = find(*key);
1375 BibTeXInfo empty_data;
1376 empty_data.key(*key);
1377 BibTeXInfo & data = empty_data;
1378 vector<BibTeXInfo const *> xrefptrs;
1381 for (docstring const & xref : getXRefs(data)) {
1382 BiblioInfo::const_iterator const xrefit = find(xref);
1383 if (xrefit != end())
1384 xrefptrs.push_back(&(xrefit->second));
1388 ret = data.getLabel(xrefptrs, buf, ret, ci, key + 1 != ken, i == 1);
1391 support::truncateWithEllipsis(ret, max_size, true);
1397 bool BiblioInfo::isBibtex(docstring const & key) const
1400 split(key, key1, ',');
1401 BiblioInfo::const_iterator it = find(key1);
1404 return it->second.isBibTeX();
1408 BiblioInfo::CiteStringMap const BiblioInfo::getCiteStrings(
1409 vector<docstring> const & keys, vector<CitationStyle> const & styles,
1410 Buffer const & buf, CiteItem const & ci) const
1413 return vector<pair<docstring,docstring>>();
1416 CiteStringMap csm(styles.size());
1417 for (size_t i = 0; i != csm.size(); ++i) {
1418 style = styles[i].name;
1419 csm[i] = make_pair(from_ascii(style), getLabel(keys, buf, style, ci));
1426 void BiblioInfo::mergeBiblioInfo(BiblioInfo const & info)
1428 bimap_.insert(info.begin(), info.end());
1429 field_names_.insert(info.field_names_.begin(), info.field_names_.end());
1430 entry_types_.insert(info.entry_types_.begin(), info.entry_types_.end());
1436 // used in xhtml to sort a list of BibTeXInfo objects
1437 bool lSorter(BibTeXInfo const * lhs, BibTeXInfo const * rhs)
1439 docstring const lauth = lhs->getAuthorOrEditorList();
1440 docstring const rauth = rhs->getAuthorOrEditorList();
1441 docstring const lyear = lhs->getYear();
1442 docstring const ryear = rhs->getYear();
1443 docstring const ltitl = lhs->operator[]("title");
1444 docstring const rtitl = rhs->operator[]("title");
1445 return (lauth < rauth)
1446 || (lauth == rauth && lyear < ryear)
1447 || (lauth == rauth && lyear == ryear && ltitl < rtitl);
1453 void BiblioInfo::collectCitedEntries(Buffer const & buf)
1455 cited_entries_.clear();
1456 // We are going to collect all the citation keys used in the document,
1457 // getting them from the TOC.
1458 // FIXME We may want to collect these differently, in the first case,
1459 // so that we might have them in order of appearance.
1460 set<docstring> citekeys;
1461 shared_ptr<Toc const> toc = buf.tocBackend().toc("citation");
1462 Toc::const_iterator it = toc->begin();
1463 Toc::const_iterator const en = toc->end();
1464 for (; it != en; ++it) {
1465 if (it->str().empty())
1467 vector<docstring> const keys = getVectorFromString(it->str());
1468 citekeys.insert(keys.begin(), keys.end());
1470 if (citekeys.empty())
1473 // We have a set of the keys used in this document.
1474 // We will now convert it to a list of the BibTeXInfo objects used in
1476 vector<BibTeXInfo const *> bi;
1477 set<docstring>::const_iterator cit = citekeys.begin();
1478 set<docstring>::const_iterator const cen = citekeys.end();
1479 for (; cit != cen; ++cit) {
1480 BiblioInfo::const_iterator const bt = find(*cit);
1481 if (bt == end() || !bt->second.isBibTeX())
1483 bi.push_back(&(bt->second));
1486 sort(bi.begin(), bi.end(), lSorter);
1488 // Now we can write the sorted keys
1489 vector<BibTeXInfo const *>::const_iterator bit = bi.begin();
1490 vector<BibTeXInfo const *>::const_iterator ben = bi.end();
1491 for (; bit != ben; ++bit)
1492 cited_entries_.push_back((*bit)->key());
1496 void BiblioInfo::makeCitationLabels(Buffer const & buf)
1498 collectCitedEntries(buf);
1499 CiteEngineType const engine_type = buf.params().citeEngineType();
1500 bool const numbers = (engine_type & ENGINE_TYPE_NUMERICAL);
1504 // used to remember the last one we saw
1505 // we'll be comparing entries to see if we need to add
1506 // modifiers, like "1984a"
1507 map<docstring, BibTeXInfo>::iterator last = bimap_.end();
1509 vector<docstring>::const_iterator it = cited_entries_.begin();
1510 vector<docstring>::const_iterator const en = cited_entries_.end();
1511 for (; it != en; ++it) {
1512 map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(*it);
1513 // this shouldn't happen, but...
1514 if (biit == bimap_.end())
1515 // ...fail gracefully, anyway.
1517 BibTeXInfo & entry = biit->second;
1519 docstring const num = convert<docstring>(++keynumber);
1520 entry.setCiteNumber(num);
1522 // The first test here is checking whether this is the first
1523 // time through the loop. If so, then we do not have anything
1524 // with which to compare.
1525 if (last != bimap_.end()
1526 && entry.getAuthorOrEditorList() == last->second.getAuthorOrEditorList()
1527 // we access the year via getYear() so as to get it from the xref,
1528 // if we need to do so
1529 && getYear(entry.key()) == getYear(last->second.key())) {
1530 if (modifier == 0) {
1531 // so the last one should have been 'a'
1532 last->second.setModifier('a');
1534 } else if (modifier == 'z')
1541 entry.setModifier(modifier);
1542 // remember the last one
1547 it = cited_entries_.begin();
1548 for (; it != en; ++it) {
1549 map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(*it);
1550 // this shouldn't happen, but...
1551 if (biit == bimap_.end())
1552 // ...fail gracefully, anyway.
1554 BibTeXInfo & entry = biit->second;
1556 entry.label(entry.citeNumber());
1558 docstring const auth = entry.getAuthorOrEditorList(&buf, false);
1559 // we do it this way so as to access the xref, if necessary
1560 // note that this also gives us the modifier
1561 docstring const year = getYear(*it, buf, true);
1562 if (!auth.empty() && !year.empty())
1563 entry.label(auth + ' ' + year);
1565 entry.label(entry.key());
1571 //////////////////////////////////////////////////////////////////////
1575 //////////////////////////////////////////////////////////////////////
1578 CitationStyle citationStyleFromString(string const & command,
1579 BufferParams const & params)
1582 if (command.empty())
1585 string const alias = params.getCiteAlias(command);
1586 string cmd = alias.empty() ? command : alias;
1587 if (isUpperCase(command[0])) {
1588 cs.forceUpperCase = true;
1589 cmd[0] = lowercase(cmd[0]);
1592 size_t const n = command.size() - 1;
1593 if (command[n] == '*') {
1594 cs.hasStarredVersion = true;
1595 if (suffixIs(cmd, '*'))
1596 cmd = cmd.substr(0, cmd.size() - 1);
1604 string citationStyleToString(const CitationStyle & cs, bool const latex)
1606 string cmd = latex ? cs.cmd : cs.name;
1607 if (cs.forceUpperCase)
1608 cmd[0] = uppercase(cmd[0]);
1609 if (cs.hasStarredVersion)