3 * This file is part of LyX, the document processor.
4 * Licence details can be found in the file COPYING.
6 * \author Angus Leeming
10 * \author Jürgen Spitzmüller
12 * Full author contact details are available in file CREDITS.
17 #include "BiblioInfo.h"
19 #include "BufferParams.h"
20 #include "buffer_funcs.h"
23 #include "InsetIterator.h"
25 #include "output_xhtml.h"
26 #include "Paragraph.h"
27 #include "TextClass.h"
28 #include "TocBackend.h"
30 #include "support/convert.h"
31 #include "support/debug.h"
32 #include "support/docstream.h"
33 #include "support/gettext.h"
34 #include "support/lassert.h"
35 #include "support/lstrings.h"
36 #include "support/regex.h"
37 #include "support/textutils.h"
43 using namespace lyx::support;
50 // Remove placeholders from names
51 docstring renormalize(docstring const & input)
53 docstring res = subst(input, from_ascii("$$space!"), from_ascii(" "));
54 return subst(res, from_ascii("$$comma!"), from_ascii(","));
58 // Split the surname into prefix ("von-part") and family name
59 pair<docstring, docstring> parseSurname(docstring const & sname)
61 // Split the surname into its tokens
62 vector<docstring> pieces = getVectorFromString(sname, from_ascii(" "));
63 if (pieces.size() < 2)
64 return make_pair(docstring(), sname);
66 // Now we look for pieces that begin with a lower case letter.
67 // All except for the very last token constitute the "von-part".
69 vector<docstring>::const_iterator it = pieces.begin();
70 vector<docstring>::const_iterator const en = pieces.end();
72 for (; it != en; ++it) {
75 // If this is the last piece, then what we now have is
76 // the family name, notwithstanding the casing.
79 char_type const c = (*it)[0];
80 // If the piece starts with a upper case char, we assume
81 // this is part of the surname.
84 // Nothing of the former, so add this piece to the prename
92 // Reconstruct the family name.
93 // Note that if we left the loop with because it + 1 == en,
94 // then this will still do the right thing, i.e., make surname
95 // just be the last piece.
98 for (; it != en; ++it) {
105 return make_pair(prefix, surname);
117 // gets the name parts (prename, surname, prefix, suffix) from an author-type string
118 name_parts nameParts(docstring const & iname)
124 // First we check for goupings (via {...}) and replace blanks and
125 // commas inside groups with temporary placeholders
128 docstring::const_iterator p = iname.begin();
129 while (p != iname.end()) {
130 // count grouping level
135 // generate string with probable placeholders
136 if (*p == ' ' && gl > 0)
137 name += from_ascii("$$space!");
138 else if (*p == ',' && gl > 0)
139 name += from_ascii("$$comma!");
145 // Now we look for a comma, and take the last name to be everything
146 // preceding the right-most one, so that we also get the name suffix
148 vector<docstring> pieces = getVectorFromString(name);
149 if (pieces.size() > 1) {
150 // Whether we have a name suffix or not, the prename is
152 res.prename = renormalize(pieces.back());
153 // The family name, conversely, is always the first item.
154 // However, it might contain a prefix (aka "von" part)
155 docstring const sname = pieces.front();
156 res.prefix = renormalize(parseSurname(sname).first);
157 res.surname = renormalize(parseSurname(sname).second);
158 // If we have three pieces (the maximum allowed by BibTeX),
159 // the second one is the name suffix.
160 if (pieces.size() > 2)
161 res.suffix = renormalize(pieces.at(1));
165 // OK, so now we want to look for the last name.
166 // Split on spaces, to get various tokens.
167 pieces = getVectorFromString(name, from_ascii(" "));
168 // No space: Only a family name given
169 if (pieces.size() < 2) {
170 res.surname = renormalize(pieces.back());
173 // If we get two pieces, assume "prename surname"
174 if (pieces.size() == 2) {
175 res.prename = renormalize(pieces.front());
176 res.surname = renormalize(pieces.back());
180 // More than 3 pieces: A name prefix (aka "von" part) might be included.
181 // We look for the first piece that begins with a lower case letter
182 // (which is the name prefix, if it is not the last token) or the last token.
184 vector<docstring>::const_iterator it = pieces.begin();
185 vector<docstring>::const_iterator const en = pieces.end();
187 for (; it != en; ++it) {
190 char_type const c = (*it)[0];
191 // If the piece starts with a lower case char, we assume
192 // this is the name prefix and thus prename is complete.
195 // Same if this is the last piece, which is always the surname.
198 // Nothing of the former, so add this piece to the prename
206 // Now reconstruct the family name and strip the prefix.
207 // Note that if we left the loop because it + 1 == en,
208 // then this will still do the right thing, i.e., make surname
209 // just be the last piece.
212 for (; it != en; ++it) {
219 res.prename = renormalize(prename);
220 res.prefix = renormalize(parseSurname(surname).first);
221 res.surname = renormalize(parseSurname(surname).second);
226 docstring constructName(docstring const & name, string const scheme)
228 // re-constructs a name from name parts according
230 docstring const prename = nameParts(name).prename;
231 docstring const surname = nameParts(name).surname;
232 docstring const prefix = nameParts(name).prefix;
233 docstring const suffix = nameParts(name).suffix;
235 static regex const reg1("(.*)(\\{%prename%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)");
236 static regex const reg2("(.*)(\\{%suffix%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)");
237 static regex const reg3("(.*)(\\{%prefix%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)");
239 if (regex_match(scheme, sub, reg1)) {
241 if (!prename.empty())
245 if (regex_match(res, sub, reg2)) {
251 if (regex_match(res, sub, reg3)) {
257 docstring result = from_ascii(res);
258 result = subst(result, from_ascii("%prename%"), prename);
259 result = subst(result, from_ascii("%surname%"), surname);
260 result = subst(result, from_ascii("%prefix%"), prefix);
261 result = subst(result, from_ascii("%suffix%"), suffix);
266 vector<docstring> const getAuthors(docstring const & author)
268 // We check for goupings (via {...}) and only consider " and "
269 // outside groups as author separator. This is to account
270 // for cases such as {{Barnes and Noble, Inc.}}, which
271 // need to be treated as one single family name.
272 // We use temporary placeholders in order to differentiate the
273 // diverse " and " cases.
275 // First, we temporarily replace all ampersands. It is rather unusual
276 // in author names, but can happen (consider cases such as "C \& A Corp.").
277 docstring iname = subst(author, from_ascii("&"), from_ascii("$$amp!"));
278 // Then, we temporarily make all " and " strings to ampersands in order
279 // to handle them later on a per-char level.
280 iname = subst(iname, from_ascii(" and "), from_ascii(" & "));
281 // Now we traverse through the string and replace the "&" by the proper
282 // output in- and outside groups
285 docstring::const_iterator p = iname.begin();
286 while (p != iname.end()) {
287 // count grouping level
292 // generate string with probable placeholders
295 // Inside groups, we output "and"
296 name += from_ascii("and");
298 // Outside groups, we output a separator
299 name += from_ascii("$$namesep!");
306 // re-insert the literal ampersands
307 name = subst(name, from_ascii("$$amp!"), from_ascii("&"));
309 // Now construct the actual vector
310 return getVectorFromString(name, from_ascii(" $$namesep! "));
314 bool multipleAuthors(docstring const author)
316 return getAuthors(author).size() > 1;
320 // converts a string containing LaTeX commands into unicode
322 docstring convertLaTeXCommands(docstring const & str)
327 bool scanning_cmd = false;
328 bool scanning_math = false;
329 bool escaped = false; // used to catch \$, etc.
330 while (!val.empty()) {
331 char_type const ch = val[0];
333 // if we're scanning math, we output everything until we
334 // find an unescaped $, at which point we break out.
341 scanning_math = false;
347 // if we're scanning a command name, then we just
348 // discard characters until we hit something that
351 if (isAlphaASCII(ch)) {
356 // so we're done with this command.
357 // now we fall through and check this character.
358 scanning_cmd = false;
361 // was the last character a \? If so, then this is something like:
362 // \\ or \$, so we'll just output it. That's probably not always right...
364 // exception: output \, as THIN SPACE
366 ret.push_back(0x2009);
377 scanning_math = true;
381 // Change text mode accents in the form
382 // {\v a} to \v{a} (see #9340).
383 // FIXME: This is a sort of mini-tex2lyx.
384 // Use the real tex2lyx instead!
385 static lyx::regex const tma_reg("^\\{\\\\[bcCdfGhHkrtuUv]\\s\\w\\}");
386 if (lyx::regex_search(to_utf8(val), tma_reg)) {
388 val.replace(2, 1, from_ascii("{"));
392 // Apart from the above, we just ignore braces
393 if (ch == '{' || ch == '}') {
398 // we're going to check things that look like commands, so if
399 // this doesn't, just output it.
406 // ok, could be a command of some sort
407 // let's see if it corresponds to some unicode
408 // unicodesymbols has things in the form: \"{u},
409 // whereas we may see things like: \"u. So we'll
410 // look for that and change it, if necessary.
411 // FIXME: This is a sort of mini-tex2lyx.
412 // Use the real tex2lyx instead!
413 static lyx::regex const reg("^\\\\\\W\\w");
414 if (lyx::regex_search(to_utf8(val), reg)) {
415 val.insert(3, from_ascii("}"));
416 val.insert(2, from_ascii("{"));
420 docstring const cnvtd = Encodings::fromLaTeXCommand(val,
421 Encodings::TEXT_CMD, termination, rem);
422 if (!cnvtd.empty()) {
423 // it did, so we'll take that bit and proceed with what's left
428 // it's a command of some sort
437 // Escape '<' and '>' and remove richtext markers (e.g. {!this is richtext!}) from a string.
438 docstring processRichtext(docstring const & str, bool richtext)
443 bool scanning_rich = false;
444 while (!val.empty()) {
445 char_type const ch = val[0];
446 if (ch == '{' && val.size() > 1 && val[1] == '!') {
447 // beginning of rich text
448 scanning_rich = true;
452 if (scanning_rich && ch == '!' && val.size() > 1 && val[1] == '}') {
454 scanning_rich = false;
462 // we need to escape '<' and '>'
470 } else if (!scanning_rich /* && !richtext */)
472 // else the character is discarded, which will happen only if
473 // richtext == false and we are scanning rich text
482 //////////////////////////////////////////////////////////////////////
486 //////////////////////////////////////////////////////////////////////
488 BibTeXInfo::BibTeXInfo(docstring const & key, docstring const & type)
489 : is_bibtex_(true), bib_key_(key), entry_type_(type), info_(),
495 docstring const BibTeXInfo::getAuthorOrEditorList(Buffer const * buf,
496 bool full, bool forceshort) const
498 docstring author = operator[]("author");
500 author = operator[]("editor");
502 return getAuthorList(buf, author, full, forceshort);
506 docstring const BibTeXInfo::getAuthorList(Buffer const * buf,
507 docstring const & author, bool const full, bool const forceshort,
508 bool const allnames, bool const beginning) const
510 // Maxnames treshold depend on engine
511 size_t maxnames = buf ?
512 buf->params().documentClass().max_citenames() : 2;
515 docstring const opt = label();
520 docstring const remainder = trim(split(opt, authors, '('));
521 if (remainder.empty())
522 // in this case, we didn't find a "(",
523 // so we don't have author (year)
531 // OK, we've got some names. Let's format them.
532 // Try to split the author list
533 vector<docstring> const authors = getAuthors(author);
537 CiteEngineType const engine_type = buf ? buf->params().citeEngineType()
538 : ENGINE_TYPE_DEFAULT;
540 // These are defined in the styles
542 buf ? buf->params().documentClass().getCiteMacro(engine_type, "_etal")
544 string const namesep =
545 buf ? buf->params().documentClass().getCiteMacro(engine_type, "_namesep")
547 string const lastnamesep =
548 buf ? buf->params().documentClass().getCiteMacro(engine_type, "_lastnamesep")
550 string const pairnamesep =
551 buf ? buf->params().documentClass().getCiteMacro(engine_type, "_pairnamesep")
553 string firstnameform =
554 buf ? buf->params().documentClass().getCiteMacro(engine_type, "!firstnameform")
555 : "{%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}{%prename%[[, %prename%]]}";
557 firstnameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!firstbynameform")
558 : "%prename% {%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}";
559 string othernameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!othernameform")
560 : "{%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}{%prename%[[, %prename%]]}";
562 othernameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!otherbynameform")
563 : "%prename% {%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}";
564 string citenameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!citenameform")
565 : "{%prefix%[[%prefix% ]]}%surname%";
567 // Shorten the list (with et al.) if forceshort is set
568 // and the list can actually be shortened, else if maxcitenames
569 // is passed and full is not set.
570 bool shorten = forceshort && authors.size() > 1;
571 vector<docstring>::const_iterator it = authors.begin();
572 vector<docstring>::const_iterator en = authors.end();
573 for (size_t i = 0; it != en; ++it, ++i) {
574 if (i >= maxnames && !full) {
578 if (*it == "others") {
579 retval += buf ? buf->B_(etal) : from_ascii(etal);
582 if (i > 0 && i == authors.size() - 1) {
583 if (authors.size() == 2)
584 retval += buf ? buf->B_(pairnamesep) : from_ascii(pairnamesep);
586 retval += buf ? buf->B_(lastnamesep) : from_ascii(lastnamesep);
588 retval += buf ? buf->B_(namesep) : from_ascii(namesep);
590 retval += (i == 0) ? constructName(*it, firstnameform)
591 : constructName(*it, othernameform);
593 retval += constructName(*it, citenameform);
597 retval = constructName(authors[0], firstnameform) + (buf ? buf->B_(etal) : from_ascii(etal));
599 retval = constructName(authors[0], citenameform) + (buf ? buf->B_(etal) : from_ascii(etal));
602 return convertLaTeXCommands(retval);
606 docstring const BibTeXInfo::getYear() const
609 // first try legacy year field
610 docstring year = operator[]("year");
613 // now try biblatex's date field
614 year = operator[]("date");
615 // Format is [-]YYYY-MM-DD*/[-]YYYY-MM-DD*
616 // We only want the years.
617 static regex const yreg("[-]?([\\d]{4}).*");
618 static regex const ereg(".*/[-]?([\\d]{4}).*");
620 string const date = to_utf8(year);
621 if (!regex_match(date, sm, yreg))
622 // cannot parse year.
624 year = from_ascii(sm[1]);
625 // check for an endyear
626 if (regex_match(date, sm, ereg))
627 year += char_type(0x2013) + from_ascii(sm[1]);
631 docstring const opt = label();
636 docstring tmp = split(opt, authors, '(');
638 // we don't have author (year)
641 tmp = split(tmp, year, ')');
648 docstring parseOptions(docstring const & format, string & optkey,
649 docstring & ifpart, docstring & elsepart);
651 // Calls parseOptions to deal with an embedded option, such as:
652 // {%number%[[, no.~%number%]]}
653 // which must appear at the start of format. ifelsepart gets the
654 // whole of the option, and we return what's left after the option.
655 // we return format if there is an error.
656 docstring parseEmbeddedOption(docstring const & format, docstring & ifelsepart)
658 LASSERT(format[0] == '{' && format[1] == '%', return format);
662 docstring const rest = parseOptions(format, optkey, ifpart, elsepart);
663 if (format == rest) { // parse error
664 LYXERR0("ERROR! Couldn't parse `" << format <<"'.");
667 LASSERT(rest.size() <= format.size(),
668 { ifelsepart = docstring(); return format; });
669 ifelsepart = format.substr(0, format.size() - rest.size());
674 // Gets a "clause" from a format string, where the clause is
675 // delimited by '[[' and ']]'. Returns what is left after the
676 // clause is removed, and returns format if there is an error.
677 docstring getClause(docstring const & format, docstring & clause)
679 docstring fmt = format;
682 // we'll remove characters from the front of fmt as we
684 while (!fmt.empty()) {
685 if (fmt[0] == ']' && fmt.size() > 1 && fmt[1] == ']') {
690 // check for an embedded option
691 if (fmt[0] == '{' && fmt.size() > 1 && fmt[1] == '%') {
693 docstring const rest = parseEmbeddedOption(fmt, part);
695 LYXERR0("ERROR! Couldn't parse embedded option in `" << format <<"'.");
700 } else { // it's just a normal character
709 // parse an options string, which must appear at the start of the
710 // format parameter. puts the parsed bits in optkey, ifpart, and
711 // elsepart and returns what's left after the option is removed.
712 // if there's an error, it returns format itself.
713 docstring parseOptions(docstring const & format, string & optkey,
714 docstring & ifpart, docstring & elsepart)
716 LASSERT(format[0] == '{' && format[1] == '%', return format);
718 docstring fmt = format.substr(2);
719 size_t pos = fmt.find('%'); // end of key
720 if (pos == string::npos) {
721 LYXERR0("Error parsing `" << format <<"'. Can't find end of key.");
724 optkey = to_utf8(fmt.substr(0, pos));
725 fmt = fmt.substr(pos + 1);
726 // [[format]] should be next
727 if (fmt[0] != '[' || fmt[1] != '[') {
728 LYXERR0("Error parsing `" << format <<"'. Can't find '[[' after key.");
732 docstring curfmt = fmt;
733 fmt = getClause(curfmt, ifpart);
735 LYXERR0("Error parsing `" << format <<"'. Couldn't get if clause.");
739 if (fmt[0] == '}') // we're done, no else clause
740 return fmt.substr(1);
742 // else part should follow
743 if (fmt[0] != '[' || fmt[1] != '[') {
744 LYXERR0("Error parsing `" << format <<"'. Can't find else clause.");
749 fmt = getClause(curfmt, elsepart);
751 if (fmt == curfmt || fmt[0] != '}') {
752 LYXERR0("Error parsing `" << format <<"'. Can't find end of option.");
755 return fmt.substr(1);
762 Bug #9131 revealed an oddity in how we are generating citation information
763 when more than one key is given. We end up building a longer and longer format
764 string as we go, which we then have to re-parse, over and over and over again,
765 rather than generating the information for the individual keys and then putting
766 all of that together. We do that to deal with the way separators work, from what
767 I can tell, but it still feels like a hack. Fixing this would require quite a
768 bit of work, however.
770 docstring BibTeXInfo::expandFormat(docstring const & format,
771 BibTeXInfoList const xrefs, int & counter, Buffer const & buf,
772 CiteItem const & ci, bool next, bool second) const
774 // incorrect use of macros could put us in an infinite loop
775 static int const max_passes = 5000;
776 // the use of overly large keys can lead to performance problems, due
777 // to eventual attempts to convert LaTeX macros to unicode. See bug
778 // #8944. By default, the size is limited to 128 (in CiteItem), but
779 // for specific purposes (such as XHTML export), it needs to be enlarged
780 // This is perhaps not the best solution, but it will have to do for now.
781 size_t const max_keysize = ci.max_key_size;
782 odocstringstream ret; // return value
784 bool scanning_key = false;
785 bool scanning_rich = false;
787 CiteEngineType const engine_type = buf.params().citeEngineType();
788 docstring fmt = format;
789 // we'll remove characters from the front of fmt as we
791 while (!fmt.empty()) {
792 if (counter > max_passes) {
793 LYXERR0("Recursion limit reached while parsing `"
798 char_type thischar = fmt[0];
799 if (thischar == '%') {
800 // beginning or end of key
803 scanning_key = false;
804 // so we replace the key with its value, which may be empty
808 buf.params().documentClass().getCiteMacro(engine_type, key);
809 fmt = from_utf8(val) + fmt.substr(1);
812 } else if (key[0] == '_') {
813 // a translatable bit
815 buf.params().documentClass().getCiteMacro(engine_type, key);
816 docstring const trans =
817 translateIfPossible(from_utf8(val), buf.params().language->code());
820 docstring const val =
821 getValueForKey(key, buf, ci, xrefs, max_keysize);
823 ret << from_ascii("{!<span class=\"bib-" + key + "\">!}");
826 ret << from_ascii("{!</span>!}");
834 else if (thischar == '{') {
835 // beginning of option?
837 LYXERR0("ERROR: Found `{' when scanning key in `" << format << "'.");
840 if (fmt.size() > 1) {
842 // it is the beginning of an optional format
846 docstring const newfmt =
847 parseOptions(fmt, optkey, ifpart, elsepart);
848 if (newfmt == fmt) // parse error
851 docstring const val =
852 getValueForKey(optkey, buf, ci, xrefs);
853 if (optkey == "next" && next)
854 ret << ifpart; // without expansion
855 else if (optkey == "second" && second) {
857 ret << expandFormat(ifpart, xrefs, newcounter, buf,
859 } else if (!val.empty()) {
861 ret << expandFormat(ifpart, xrefs, newcounter, buf,
863 } else if (!elsepart.empty()) {
865 ret << expandFormat(elsepart, xrefs, newcounter, buf,
868 // fmt will have been shortened for us already
872 // beginning of rich text
873 scanning_rich = true;
875 ret << from_ascii("{!");
879 // we are here if '{' was not followed by % or !.
880 // So it's just a character.
883 else if (scanning_rich && thischar == '!'
884 && fmt.size() > 1 && fmt[1] == '}') {
886 scanning_rich = false;
888 ret << from_ascii("!}");
891 else if (scanning_key)
892 key += char(thischar);
896 } catch (EncodingException & /* e */) {
897 LYXERR0("Uncodable character '" << docstring(1, thischar) << " in citation label!");
903 LYXERR0("Never found end of key in `" << format << "'!");
907 LYXERR0("Never found end of rich text in `" << format << "'!");
914 docstring const & BibTeXInfo::getInfo(BibTeXInfoList const xrefs,
915 Buffer const & buf, CiteItem const & ci) const
917 bool const richtext = ci.richtext;
919 if (!richtext && !info_.empty())
921 if (richtext && !info_richtext_.empty())
922 return info_richtext_;
925 BibTeXInfo::const_iterator it = find(from_ascii("ref"));
930 CiteEngineType const engine_type = buf.params().citeEngineType();
931 DocumentClass const & dc = buf.params().documentClass();
932 docstring const & format =
933 from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_)));
935 info_ = expandFormat(format, xrefs, counter, buf,
939 // this probably shouldn't happen
944 info_richtext_ = convertLaTeXCommands(processRichtext(info_, true));
945 return info_richtext_;
948 info_ = convertLaTeXCommands(processRichtext(info_, false));
953 docstring const BibTeXInfo::getLabel(BibTeXInfoList const xrefs,
954 Buffer const & buf, docstring const & format,
955 CiteItem const & ci, bool next, bool second) const
960 loclabel = expandFormat(format, xrefs, counter, buf, ci, next, second);
962 if (!loclabel.empty() && !next) {
963 loclabel = processRichtext(loclabel, ci.richtext);
964 loclabel = convertLaTeXCommands(loclabel);
971 docstring const & BibTeXInfo::operator[](docstring const & field) const
973 BibTeXInfo::const_iterator it = find(field);
976 static docstring const empty_value = docstring();
981 docstring const & BibTeXInfo::operator[](string const & field) const
983 return operator[](from_ascii(field));
987 docstring BibTeXInfo::getValueForKey(string const & oldkey, Buffer const & buf,
988 CiteItem const & ci, BibTeXInfoList const xrefs, size_t maxsize) const
990 // anything less is pointless
991 LASSERT(maxsize >= 16, maxsize = 16);
993 bool cleanit = false;
994 if (prefixIs(oldkey, "clean:")) {
995 key = oldkey.substr(6);
999 docstring ret = operator[](key);
1000 if (ret.empty() && !xrefs.empty()) {
1001 vector<BibTeXInfo const *>::const_iterator it = xrefs.begin();
1002 vector<BibTeXInfo const *>::const_iterator en = xrefs.end();
1003 for (; it != en; ++it) {
1004 if (*it && !(**it)[key].empty()) {
1011 // some special keys
1012 // FIXME: dialog, textbefore and textafter have nothing to do with this
1013 if (key == "dialog" && ci.context == CiteItem::Dialog)
1014 ret = from_ascii("x"); // any non-empty string will do
1015 else if (key == "export" && ci.context == CiteItem::Export)
1016 ret = from_ascii("x"); // any non-empty string will do
1017 else if (key == "ifstar" && ci.Starred)
1018 ret = from_ascii("x"); // any non-empty string will do
1019 else if (key == "ifqualified" && ci.isQualified)
1020 ret = from_ascii("x"); // any non-empty string will do
1021 else if (key == "entrytype")
1023 else if (prefixIs(key, "ifentrytype:")
1024 && from_ascii(key.substr(12)) == entry_type_)
1025 ret = from_ascii("x"); // any non-empty string will do
1026 else if (key == "key")
1028 else if (key == "label")
1030 else if (key == "modifier" && modifier_ != 0)
1032 else if (key == "numericallabel")
1034 else if (prefixIs(key, "ifmultiple:")) {
1035 // Return whether we have multiple authors
1036 docstring const kind = operator[](from_ascii(key.substr(11)));
1037 if (multipleAuthors(kind))
1038 ret = from_ascii("x"); // any non-empty string will do
1040 else if (prefixIs(key, "abbrvnames:")) {
1041 // Special key to provide abbreviated name list,
1042 // with respect to maxcitenames. Suitable for Bibliography
1044 docstring const kind = operator[](from_ascii(key.substr(11)));
1045 ret = getAuthorList(&buf, kind, false, false, true);
1046 if (ci.forceUpperCase && isLowerCase(ret[0]))
1047 ret[0] = uppercase(ret[0]);
1048 } else if (prefixIs(key, "fullnames:")) {
1049 // Return a full name list. Suitable for Bibliography
1051 docstring const kind = operator[](from_ascii(key.substr(10)));
1052 ret = getAuthorList(&buf, kind, true, false, true);
1053 if (ci.forceUpperCase && isLowerCase(ret[0]))
1054 ret[0] = uppercase(ret[0]);
1055 } else if (prefixIs(key, "forceabbrvnames:")) {
1056 // Special key to provide abbreviated name lists,
1057 // irrespective of maxcitenames. Suitable for Bibliography
1059 docstring const kind = operator[](from_ascii(key.substr(15)));
1060 ret = getAuthorList(&buf, kind, false, true, true);
1061 if (ci.forceUpperCase && isLowerCase(ret[0]))
1062 ret[0] = uppercase(ret[0]);
1063 } else if (prefixIs(key, "abbrvbynames:")) {
1064 // Special key to provide abbreviated name list,
1065 // with respect to maxcitenames. Suitable for further names inside a
1066 // bibliography item // (such as "ed. by ...")
1067 docstring const kind = operator[](from_ascii(key.substr(11)));
1068 ret = getAuthorList(&buf, kind, false, false, true, false);
1069 if (ci.forceUpperCase && isLowerCase(ret[0]))
1070 ret[0] = uppercase(ret[0]);
1071 } else if (prefixIs(key, "fullbynames:")) {
1072 // Return a full name list. Suitable for further names inside a
1073 // bibliography item // (such as "ed. by ...")
1074 docstring const kind = operator[](from_ascii(key.substr(10)));
1075 ret = getAuthorList(&buf, kind, true, false, true, false);
1076 if (ci.forceUpperCase && isLowerCase(ret[0]))
1077 ret[0] = uppercase(ret[0]);
1078 } else if (prefixIs(key, "forceabbrvbynames:")) {
1079 // Special key to provide abbreviated name lists,
1080 // irrespective of maxcitenames. Suitable for further names inside a
1081 // bibliography item // (such as "ed. by ...")
1082 docstring const kind = operator[](from_ascii(key.substr(15)));
1083 ret = getAuthorList(&buf, kind, false, true, true, false);
1084 if (ci.forceUpperCase && isLowerCase(ret[0]))
1085 ret[0] = uppercase(ret[0]);
1086 } else if (key == "abbrvciteauthor") {
1087 // Special key to provide abbreviated author or
1088 // editor names (suitable for citation labels),
1089 // with respect to maxcitenames.
1090 ret = getAuthorOrEditorList(&buf, false, false);
1091 if (ci.forceUpperCase && isLowerCase(ret[0]))
1092 ret[0] = uppercase(ret[0]);
1093 } else if (key == "fullciteauthor") {
1094 // Return a full author or editor list (for citation labels)
1095 ret = getAuthorOrEditorList(&buf, true, false);
1096 if (ci.forceUpperCase && isLowerCase(ret[0]))
1097 ret[0] = uppercase(ret[0]);
1098 } else if (key == "forceabbrvciteauthor") {
1099 // Special key to provide abbreviated author or
1100 // editor names (suitable for citation labels),
1101 // irrespective of maxcitenames.
1102 ret = getAuthorOrEditorList(&buf, false, true);
1103 if (ci.forceUpperCase && isLowerCase(ret[0]))
1104 ret[0] = uppercase(ret[0]);
1105 } else if (key == "bibentry") {
1106 // Special key to provide the full bibliography entry: see getInfo()
1107 CiteEngineType const engine_type = buf.params().citeEngineType();
1108 DocumentClass const & dc = buf.params().documentClass();
1109 docstring const & format =
1110 from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_), false));
1112 ret = expandFormat(format, xrefs, counter, buf, ci, false, false);
1113 } else if (key == "textbefore")
1114 ret = ci.textBefore;
1115 else if (key == "textafter")
1117 else if (key == "curpretext")
1118 ret = ci.getPretexts()[bib_key_];
1119 else if (key == "curposttext")
1120 ret = ci.getPosttexts()[bib_key_];
1121 else if (key == "year")
1126 ret = html::cleanAttr(ret);
1128 // make sure it is not too big
1129 support::truncateWithEllipsis(ret, maxsize);
1134 //////////////////////////////////////////////////////////////////////
1138 //////////////////////////////////////////////////////////////////////
1142 // A functor for use with sort, leading to case insensitive sorting
1143 class compareNoCase: public binary_function<docstring, docstring, bool>
1146 bool operator()(docstring const & s1, docstring const & s2) const {
1147 return compare_no_case(s1, s2) < 0;
1154 vector<docstring> const BiblioInfo::getXRefs(BibTeXInfo const & data, bool const nested) const
1156 vector<docstring> result;
1157 if (!data.isBibTeX())
1159 // Legacy crossref field. This is not nestable.
1160 if (!nested && !data["crossref"].empty()) {
1161 docstring const xrefkey = data["crossref"];
1162 result.push_back(xrefkey);
1163 // However, check for nested xdatas
1164 BiblioInfo::const_iterator it = find(xrefkey);
1166 BibTeXInfo const & xref = it->second;
1167 vector<docstring> const nxdata = getXRefs(xref, true);
1168 if (!nxdata.empty())
1169 result.insert(result.end(), nxdata.begin(), nxdata.end());
1172 // Biblatex's xdata field. Infinitely nestable.
1173 // XData field can consist of a comma-separated list of keys
1174 vector<docstring> const xdatakeys = getVectorFromString(data["xdata"]);
1175 if (!xdatakeys.empty()) {
1176 vector<docstring>::const_iterator xit = xdatakeys.begin();
1177 vector<docstring>::const_iterator xen = xdatakeys.end();
1178 for (; xit != xen; ++xit) {
1179 docstring const xdatakey = *xit;
1180 result.push_back(xdatakey);
1181 BiblioInfo::const_iterator it = find(xdatakey);
1183 BibTeXInfo const & xdata = it->second;
1184 vector<docstring> const nxdata = getXRefs(xdata, true);
1185 if (!nxdata.empty())
1186 result.insert(result.end(), nxdata.begin(), nxdata.end());
1194 vector<docstring> const BiblioInfo::getKeys() const
1196 vector<docstring> bibkeys;
1197 BiblioInfo::const_iterator it = begin();
1198 for (; it != end(); ++it)
1199 bibkeys.push_back(it->first);
1200 sort(bibkeys.begin(), bibkeys.end(), compareNoCase());
1205 vector<docstring> const BiblioInfo::getFields() const
1207 vector<docstring> bibfields;
1208 set<docstring>::const_iterator it = field_names_.begin();
1209 set<docstring>::const_iterator end = field_names_.end();
1210 for (; it != end; ++it)
1211 bibfields.push_back(*it);
1212 sort(bibfields.begin(), bibfields.end());
1217 vector<docstring> const BiblioInfo::getEntries() const
1219 vector<docstring> bibentries;
1220 set<docstring>::const_iterator it = entry_types_.begin();
1221 set<docstring>::const_iterator end = entry_types_.end();
1222 for (; it != end; ++it)
1223 bibentries.push_back(*it);
1224 sort(bibentries.begin(), bibentries.end());
1229 docstring const BiblioInfo::getAuthorOrEditorList(docstring const & key, Buffer const & buf) const
1231 BiblioInfo::const_iterator it = find(key);
1234 BibTeXInfo const & data = it->second;
1235 return data.getAuthorOrEditorList(&buf, false);
1239 docstring const BiblioInfo::getCiteNumber(docstring const & key) const
1241 BiblioInfo::const_iterator it = find(key);
1244 BibTeXInfo const & data = it->second;
1245 return data.citeNumber();
1249 docstring const BiblioInfo::getYear(docstring const & key, bool use_modifier) const
1251 BiblioInfo::const_iterator it = find(key);
1254 BibTeXInfo const & data = it->second;
1255 docstring year = data.getYear();
1257 // let's try the crossrefs
1258 vector<docstring> const xrefs = getXRefs(data);
1262 vector<docstring>::const_iterator it = xrefs.begin();
1263 vector<docstring>::const_iterator en = xrefs.end();
1264 for (; it != en; ++it) {
1265 BiblioInfo::const_iterator const xrefit = find(*it);
1266 if (xrefit == end())
1268 BibTeXInfo const & xref_data = xrefit->second;
1269 year = xref_data.getYear();
1275 if (use_modifier && data.modifier() != 0)
1276 year += data.modifier();
1281 docstring const BiblioInfo::getYear(docstring const & key, Buffer const & buf, bool use_modifier) const
1283 docstring const year = getYear(key, use_modifier);
1285 return buf.B_("No year");
1290 docstring const BiblioInfo::getInfo(docstring const & key,
1291 Buffer const & buf, CiteItem const & ci) const
1293 BiblioInfo::const_iterator it = find(key);
1295 return docstring(_("Bibliography entry not found!"));
1296 BibTeXInfo const & data = it->second;
1297 BibTeXInfoList xrefptrs;
1298 vector<docstring> const xrefs = getXRefs(data);
1299 if (!xrefs.empty()) {
1300 vector<docstring>::const_iterator it = xrefs.begin();
1301 vector<docstring>::const_iterator en = xrefs.end();
1302 for (; it != en; ++it) {
1303 BiblioInfo::const_iterator const xrefit = find(*it);
1304 if (xrefit != end())
1305 xrefptrs.push_back(&(xrefit->second));
1308 return data.getInfo(xrefptrs, buf, ci);
1312 docstring const BiblioInfo::getLabel(vector<docstring> keys,
1313 Buffer const & buf, string const & style, CiteItem const & ci) const
1315 size_t max_size = ci.max_size;
1316 // shorter makes no sense
1317 LASSERT(max_size >= 16, max_size = 16);
1319 // we can't display more than 10 of these, anyway
1320 bool const too_many_keys = keys.size() > 10;
1324 CiteEngineType const engine_type = buf.params().citeEngineType();
1325 DocumentClass const & dc = buf.params().documentClass();
1326 docstring const & format = from_utf8(dc.getCiteFormat(engine_type, style, false, "cite"));
1327 docstring ret = format;
1328 vector<docstring>::const_iterator key = keys.begin();
1329 vector<docstring>::const_iterator ken = keys.end();
1330 for (int i = 0; key != ken; ++key, ++i) {
1331 BiblioInfo::const_iterator it = find(*key);
1332 BibTeXInfo empty_data;
1333 empty_data.key(*key);
1334 BibTeXInfo & data = empty_data;
1335 vector<BibTeXInfo const *> xrefptrs;
1338 vector<docstring> const xrefs = getXRefs(data);
1339 if (!xrefs.empty()) {
1340 vector<docstring>::const_iterator it = xrefs.begin();
1341 vector<docstring>::const_iterator en = xrefs.end();
1342 for (; it != en; ++it) {
1343 BiblioInfo::const_iterator const xrefit = find(*it);
1344 if (xrefit != end())
1345 xrefptrs.push_back(&(xrefit->second));
1349 ret = data.getLabel(xrefptrs, buf, ret, ci, key + 1 != ken, i == 1);
1353 ret.push_back(0x2026);//HORIZONTAL ELLIPSIS
1354 support::truncateWithEllipsis(ret, max_size);
1359 bool BiblioInfo::isBibtex(docstring const & key) const
1362 split(key, key1, ',');
1363 BiblioInfo::const_iterator it = find(key1);
1366 return it->second.isBibTeX();
1370 BiblioInfo::CiteStringMap const BiblioInfo::getCiteStrings(
1371 vector<docstring> const & keys, vector<CitationStyle> const & styles,
1372 Buffer const & buf, CiteItem const & ci) const
1375 return vector<pair<docstring,docstring>>();
1378 CiteStringMap csm(styles.size());
1379 for (size_t i = 0; i != csm.size(); ++i) {
1380 style = styles[i].name;
1381 csm[i] = make_pair(from_ascii(style), getLabel(keys, buf, style, ci));
1388 void BiblioInfo::mergeBiblioInfo(BiblioInfo const & info)
1390 bimap_.insert(info.begin(), info.end());
1391 field_names_.insert(info.field_names_.begin(), info.field_names_.end());
1392 entry_types_.insert(info.entry_types_.begin(), info.entry_types_.end());
1398 // used in xhtml to sort a list of BibTeXInfo objects
1399 bool lSorter(BibTeXInfo const * lhs, BibTeXInfo const * rhs)
1401 docstring const lauth = lhs->getAuthorOrEditorList();
1402 docstring const rauth = rhs->getAuthorOrEditorList();
1403 docstring const lyear = lhs->getYear();
1404 docstring const ryear = rhs->getYear();
1405 docstring const ltitl = lhs->operator[]("title");
1406 docstring const rtitl = rhs->operator[]("title");
1407 return (lauth < rauth)
1408 || (lauth == rauth && lyear < ryear)
1409 || (lauth == rauth && lyear == ryear && ltitl < rtitl);
1415 void BiblioInfo::collectCitedEntries(Buffer const & buf)
1417 cited_entries_.clear();
1418 // We are going to collect all the citation keys used in the document,
1419 // getting them from the TOC.
1420 // FIXME We may want to collect these differently, in the first case,
1421 // so that we might have them in order of appearance.
1422 set<docstring> citekeys;
1423 shared_ptr<Toc const> toc = buf.tocBackend().toc("citation");
1424 Toc::const_iterator it = toc->begin();
1425 Toc::const_iterator const en = toc->end();
1426 for (; it != en; ++it) {
1427 if (it->str().empty())
1429 vector<docstring> const keys = getVectorFromString(it->str());
1430 citekeys.insert(keys.begin(), keys.end());
1432 if (citekeys.empty())
1435 // We have a set of the keys used in this document.
1436 // We will now convert it to a list of the BibTeXInfo objects used in
1438 vector<BibTeXInfo const *> bi;
1439 set<docstring>::const_iterator cit = citekeys.begin();
1440 set<docstring>::const_iterator const cen = citekeys.end();
1441 for (; cit != cen; ++cit) {
1442 BiblioInfo::const_iterator const bt = find(*cit);
1443 if (bt == end() || !bt->second.isBibTeX())
1445 bi.push_back(&(bt->second));
1448 sort(bi.begin(), bi.end(), lSorter);
1450 // Now we can write the sorted keys
1451 vector<BibTeXInfo const *>::const_iterator bit = bi.begin();
1452 vector<BibTeXInfo const *>::const_iterator ben = bi.end();
1453 for (; bit != ben; ++bit)
1454 cited_entries_.push_back((*bit)->key());
1458 void BiblioInfo::makeCitationLabels(Buffer const & buf)
1460 collectCitedEntries(buf);
1461 CiteEngineType const engine_type = buf.params().citeEngineType();
1462 bool const numbers = (engine_type & ENGINE_TYPE_NUMERICAL);
1466 // used to remember the last one we saw
1467 // we'll be comparing entries to see if we need to add
1468 // modifiers, like "1984a"
1469 map<docstring, BibTeXInfo>::iterator last = bimap_.end();
1471 vector<docstring>::const_iterator it = cited_entries_.begin();
1472 vector<docstring>::const_iterator const en = cited_entries_.end();
1473 for (; it != en; ++it) {
1474 map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(*it);
1475 // this shouldn't happen, but...
1476 if (biit == bimap_.end())
1477 // ...fail gracefully, anyway.
1479 BibTeXInfo & entry = biit->second;
1481 docstring const num = convert<docstring>(++keynumber);
1482 entry.setCiteNumber(num);
1484 // The first test here is checking whether this is the first
1485 // time through the loop. If so, then we do not have anything
1486 // with which to compare.
1487 if (last != bimap_.end()
1488 && entry.getAuthorOrEditorList() == last->second.getAuthorOrEditorList()
1489 // we access the year via getYear() so as to get it from the xref,
1490 // if we need to do so
1491 && getYear(entry.key()) == getYear(last->second.key())) {
1492 if (modifier == 0) {
1493 // so the last one should have been 'a'
1494 last->second.setModifier('a');
1496 } else if (modifier == 'z')
1503 entry.setModifier(modifier);
1504 // remember the last one
1509 it = cited_entries_.begin();
1510 for (; it != en; ++it) {
1511 map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(*it);
1512 // this shouldn't happen, but...
1513 if (biit == bimap_.end())
1514 // ...fail gracefully, anyway.
1516 BibTeXInfo & entry = biit->second;
1518 entry.label(entry.citeNumber());
1520 docstring const auth = entry.getAuthorOrEditorList(&buf, false);
1521 // we do it this way so as to access the xref, if necessary
1522 // note that this also gives us the modifier
1523 docstring const year = getYear(*it, buf, true);
1524 if (!auth.empty() && !year.empty())
1525 entry.label(auth + ' ' + year);
1527 entry.label(entry.key());
1533 //////////////////////////////////////////////////////////////////////
1537 //////////////////////////////////////////////////////////////////////
1540 CitationStyle citationStyleFromString(string const & command,
1541 BufferParams const & params)
1544 if (command.empty())
1547 string const alias = params.getCiteAlias(command);
1548 string cmd = alias.empty() ? command : alias;
1549 if (isUpperCase(command[0])) {
1550 cs.forceUpperCase = true;
1551 cmd[0] = lowercase(cmd[0]);
1554 size_t const n = command.size() - 1;
1555 if (command[n] == '*') {
1556 cs.hasStarredVersion = true;
1557 if (suffixIs(cmd, '*'))
1558 cmd = cmd.substr(0, cmd.size() - 1);
1566 string citationStyleToString(const CitationStyle & cs, bool const latex)
1568 string cmd = latex ? cs.cmd : cs.name;
1569 if (cs.forceUpperCase)
1570 cmd[0] = uppercase(cmd[0]);
1571 if (cs.hasStarredVersion)