3 * This file is part of LyX, the document processor.
4 * Licence details can be found in the file COPYING.
6 * \author Angus Leeming
10 * \author Jürgen Spitzmüller
12 * Full author contact details are available in file CREDITS.
17 #include "BiblioInfo.h"
19 #include "BufferParams.h"
20 #include "buffer_funcs.h"
23 #include "InsetIterator.h"
25 #include "output_xhtml.h"
26 #include "Paragraph.h"
27 #include "TextClass.h"
28 #include "TocBackend.h"
30 #include "support/convert.h"
31 #include "support/debug.h"
32 #include "support/docstream.h"
33 #include "support/gettext.h"
34 #include "support/lassert.h"
35 #include "support/lstrings.h"
36 #include "support/regex.h"
37 #include "support/textutils.h"
43 using namespace lyx::support;
50 // Remove placeholders from names
51 docstring renormalize(docstring const & input)
53 docstring res = subst(input, from_ascii("$$space!"), from_ascii(" "));
54 return subst(res, from_ascii("$$comma!"), from_ascii(","));
58 // Split the surname into prefix ("von-part") and family name
59 pair<docstring, docstring> parseSurname(docstring const & sname)
61 // Split the surname into its tokens
62 vector<docstring> pieces = getVectorFromString(sname, from_ascii(" "));
63 if (pieces.size() < 2)
64 return make_pair(docstring(), sname);
66 // Now we look for pieces that begin with a lower case letter.
67 // All except for the very last token constitute the "von-part".
69 vector<docstring>::const_iterator it = pieces.begin();
70 vector<docstring>::const_iterator const en = pieces.end();
72 for (; it != en; ++it) {
75 // If this is the last piece, then what we now have is
76 // the family name, notwithstanding the casing.
79 char_type const c = (*it)[0];
80 // If the piece starts with a upper case char, we assume
81 // this is part of the surname.
84 // Nothing of the former, so add this piece to the prename
92 // Reconstruct the family name.
93 // Note that if we left the loop with because it + 1 == en,
94 // then this will still do the right thing, i.e., make surname
95 // just be the last piece.
98 for (; it != en; ++it) {
105 return make_pair(prefix, surname);
117 // gets the name parts (prename, surname, prefix, suffix) from an author-type string
118 name_parts nameParts(docstring const & iname)
124 // First we check for goupings (via {...}) and replace blanks and
125 // commas inside groups with temporary placeholders
128 docstring::const_iterator p = iname.begin();
129 while (p != iname.end()) {
130 // count grouping level
135 // generate string with probable placeholders
136 if (*p == ' ' && gl > 0)
137 name += from_ascii("$$space!");
138 else if (*p == ',' && gl > 0)
139 name += from_ascii("$$comma!");
145 // Now we look for a comma, and take the last name to be everything
146 // preceding the right-most one, so that we also get the name suffix
148 vector<docstring> pieces = getVectorFromString(name);
149 if (pieces.size() > 1) {
150 // Whether we have a name suffix or not, the prename is
152 res.prename = renormalize(pieces.back());
153 // The family name, conversely, is always the first item.
154 // However, it might contain a prefix (aka "von" part)
155 docstring const sname = pieces.front();
156 res.prefix = renormalize(parseSurname(sname).first);
157 res.surname = renormalize(parseSurname(sname).second);
158 // If we have three pieces (the maximum allowed by BibTeX),
159 // the second one is the name suffix.
160 if (pieces.size() > 2)
161 res.suffix = renormalize(pieces.at(1));
165 // OK, so now we want to look for the last name.
166 // Split on spaces, to get various tokens.
167 pieces = getVectorFromString(name, from_ascii(" "));
168 // No space: Only a family name given
169 if (pieces.size() < 2) {
170 res.surname = renormalize(pieces.back());
173 // If we get two pieces, assume "prename surname"
174 if (pieces.size() == 2) {
175 res.prename = renormalize(pieces.front());
176 res.surname = renormalize(pieces.back());
180 // More than 3 pieces: A name prefix (aka "von" part) might be included.
181 // We look for the first piece that begins with a lower case letter
182 // (which is the name prefix, if it is not the last token) or the last token.
184 vector<docstring>::const_iterator it = pieces.begin();
185 vector<docstring>::const_iterator const en = pieces.end();
187 for (; it != en; ++it) {
190 char_type const c = (*it)[0];
191 // If the piece starts with a lower case char, we assume
192 // this is the name prefix and thus prename is complete.
195 // Same if this is the last piece, which is always the surname.
198 // Nothing of the former, so add this piece to the prename
206 // Now reconstruct the family name and strip the prefix.
207 // Note that if we left the loop because it + 1 == en,
208 // then this will still do the right thing, i.e., make surname
209 // just be the last piece.
212 for (; it != en; ++it) {
219 res.prename = renormalize(prename);
220 res.prefix = renormalize(parseSurname(surname).first);
221 res.surname = renormalize(parseSurname(surname).second);
226 docstring constructName(docstring const & name, string const scheme)
228 // re-constructs a name from name parts according
230 docstring const prename = nameParts(name).prename;
231 docstring const surname = nameParts(name).surname;
232 docstring const prefix = nameParts(name).prefix;
233 docstring const suffix = nameParts(name).suffix;
235 static regex const reg1("(.*)(\\{%prename%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)");
236 static regex const reg2("(.*)(\\{%suffix%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)");
237 static regex const reg3("(.*)(\\{%prefix%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)");
239 if (regex_match(scheme, sub, reg1)) {
241 if (!prename.empty())
245 if (regex_match(res, sub, reg2)) {
251 if (regex_match(res, sub, reg3)) {
257 docstring result = from_ascii(res);
258 result = subst(result, from_ascii("%prename%"), prename);
259 result = subst(result, from_ascii("%surname%"), surname);
260 result = subst(result, from_ascii("%prefix%"), prefix);
261 result = subst(result, from_ascii("%suffix%"), suffix);
266 vector<docstring> const getAuthors(docstring const & author)
268 // We check for goupings (via {...}) and only consider " and "
269 // outside groups as author separator. This is to account
270 // for cases such as {{Barnes and Noble, Inc.}}, which
271 // need to be treated as one single family name.
272 // We use temporary placeholders in order to differentiate the
273 // diverse " and " cases.
275 // First, we temporarily replace all ampersands. It is rather unusual
276 // in author names, but can happen (consider cases such as "C \& A Corp.").
277 docstring iname = subst(author, from_ascii("&"), from_ascii("$$amp!"));
278 // Then, we temporarily make all " and " strings to ampersands in order
279 // to handle them later on a per-char level.
280 iname = subst(iname, from_ascii(" and "), from_ascii(" & "));
281 // Now we traverse through the string and replace the "&" by the proper
282 // output in- and outside groups
285 docstring::const_iterator p = iname.begin();
286 while (p != iname.end()) {
287 // count grouping level
292 // generate string with probable placeholders
295 // Inside groups, we output "and"
296 name += from_ascii("and");
298 // Outside groups, we output a separator
299 name += from_ascii("$$namesep!");
306 // re-insert the literal ampersands
307 name = subst(name, from_ascii("$$amp!"), from_ascii("&"));
309 // Now construct the actual vector
310 return getVectorFromString(name, from_ascii(" $$namesep! "));
314 bool multipleAuthors(docstring const author)
316 return getAuthors(author).size() > 1;
320 // converts a string containing LaTeX commands into unicode
322 docstring convertLaTeXCommands(docstring const & str)
327 bool scanning_cmd = false;
328 bool scanning_math = false;
329 bool escaped = false; // used to catch \$, etc.
330 while (!val.empty()) {
331 char_type const ch = val[0];
333 // if we're scanning math, we output everything until we
334 // find an unescaped $, at which point we break out.
341 scanning_math = false;
347 // if we're scanning a command name, then we just
348 // discard characters until we hit something that
351 if (isAlphaASCII(ch)) {
356 // so we're done with this command.
357 // now we fall through and check this character.
358 scanning_cmd = false;
361 // was the last character a \? If so, then this is something like:
362 // \\ or \$, so we'll just output it. That's probably not always right...
364 // exception: output \, as THIN SPACE
366 ret.push_back(0x2009);
377 scanning_math = true;
381 // we just ignore braces
382 if (ch == '{' || ch == '}') {
387 // we're going to check things that look like commands, so if
388 // this doesn't, just output it.
395 // ok, could be a command of some sort
396 // let's see if it corresponds to some unicode
397 // unicodesymbols has things in the form: \"{u},
398 // whereas we may see things like: \"u. So we'll
399 // look for that and change it, if necessary.
400 // FIXME: This is a sort of mini-tex2lyx.
401 // Use the real tex2lyx instead!
402 static lyx::regex const reg("^\\\\\\W\\w");
403 if (lyx::regex_search(to_utf8(val), reg)) {
404 val.insert(3, from_ascii("}"));
405 val.insert(2, from_ascii("{"));
409 docstring const cnvtd = Encodings::fromLaTeXCommand(val,
410 Encodings::TEXT_CMD, termination, rem);
411 if (!cnvtd.empty()) {
412 // it did, so we'll take that bit and proceed with what's left
417 // it's a command of some sort
426 // Escape '<' and '>' and remove richtext markers (e.g. {!this is richtext!}) from a string.
427 docstring processRichtext(docstring const & str, bool richtext)
432 bool scanning_rich = false;
433 while (!val.empty()) {
434 char_type const ch = val[0];
435 if (ch == '{' && val.size() > 1 && val[1] == '!') {
436 // beginning of rich text
437 scanning_rich = true;
441 if (scanning_rich && ch == '!' && val.size() > 1 && val[1] == '}') {
443 scanning_rich = false;
451 // we need to escape '<' and '>'
459 } else if (!scanning_rich /* && !richtext */)
461 // else the character is discarded, which will happen only if
462 // richtext == false and we are scanning rich text
471 //////////////////////////////////////////////////////////////////////
475 //////////////////////////////////////////////////////////////////////
477 BibTeXInfo::BibTeXInfo(docstring const & key, docstring const & type)
478 : is_bibtex_(true), bib_key_(key), entry_type_(type), info_(),
484 docstring const BibTeXInfo::getAuthorOrEditorList(Buffer const * buf,
485 bool full, bool forceshort) const
487 docstring author = operator[]("author");
489 author = operator[]("editor");
491 return getAuthorList(buf, author, full, forceshort);
495 docstring const BibTeXInfo::getAuthorList(Buffer const * buf,
496 docstring const & author, bool const full, bool const forceshort,
497 bool const allnames, bool const beginning) const
499 // Maxnames treshold depend on engine
500 size_t maxnames = buf ?
501 buf->params().documentClass().max_citenames() : 2;
504 docstring const opt = label();
509 docstring const remainder = trim(split(opt, authors, '('));
510 if (remainder.empty())
511 // in this case, we didn't find a "(",
512 // so we don't have author (year)
520 // OK, we've got some names. Let's format them.
521 // Try to split the author list
522 vector<docstring> const authors = getAuthors(author);
526 CiteEngineType const engine_type = buf ? buf->params().citeEngineType()
527 : ENGINE_TYPE_DEFAULT;
529 // These are defined in the styles
531 buf ? buf->params().documentClass().getCiteMacro(engine_type, "_etal")
533 string const namesep =
534 buf ? buf->params().documentClass().getCiteMacro(engine_type, "_namesep")
536 string const lastnamesep =
537 buf ? buf->params().documentClass().getCiteMacro(engine_type, "_lastnamesep")
539 string const pairnamesep =
540 buf ? buf->params().documentClass().getCiteMacro(engine_type, "_pairnamesep")
542 string firstnameform =
543 buf ? buf->params().documentClass().getCiteMacro(engine_type, "!firstnameform")
544 : "{%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}{%prename%[[, %prename%]]}";
546 firstnameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!firstbynameform")
547 : "%prename% {%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}";
548 string othernameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!othernameform")
549 : "{%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}{%prename%[[, %prename%]]}";
551 othernameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!otherbynameform")
552 : "%prename% {%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}";
553 string citenameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!citenameform")
554 : "{%prefix%[[%prefix% ]]}%surname%";
556 // Shorten the list (with et al.) if forceshort is set
557 // and the list can actually be shortened, else if maxcitenames
558 // is passed and full is not set.
559 bool shorten = forceshort && authors.size() > 1;
560 vector<docstring>::const_iterator it = authors.begin();
561 vector<docstring>::const_iterator en = authors.end();
562 for (size_t i = 0; it != en; ++it, ++i) {
563 if (i >= maxnames && !full) {
567 if (*it == "others") {
568 retval += buf ? buf->B_(etal) : from_ascii(etal);
571 if (i > 0 && i == authors.size() - 1) {
572 if (authors.size() == 2)
573 retval += buf ? buf->B_(pairnamesep) : from_ascii(pairnamesep);
575 retval += buf ? buf->B_(lastnamesep) : from_ascii(lastnamesep);
577 retval += buf ? buf->B_(namesep) : from_ascii(namesep);
579 retval += (i == 0) ? constructName(*it, firstnameform)
580 : constructName(*it, othernameform);
582 retval += constructName(*it, citenameform);
586 retval = constructName(authors[0], firstnameform) + (buf ? buf->B_(etal) : from_ascii(etal));
588 retval = constructName(authors[0], citenameform) + (buf ? buf->B_(etal) : from_ascii(etal));
591 return convertLaTeXCommands(retval);
595 docstring const BibTeXInfo::getYear() const
598 // first try legacy year field
599 docstring year = operator[]("year");
602 // now try biblatex's date field
603 year = operator[]("date");
604 // Format is [-]YYYY-MM-DD*/[-]YYYY-MM-DD*
605 // We only want the years.
606 static regex const yreg("[-]?([\\d]{4}).*");
607 static regex const ereg(".*/[-]?([\\d]{4}).*");
609 string const date = to_utf8(year);
610 if (!regex_match(date, sm, yreg))
611 // cannot parse year.
613 year = from_ascii(sm[1]);
614 // check for an endyear
615 if (regex_match(date, sm, ereg))
616 year += char_type(0x2013) + from_ascii(sm[1]);
620 docstring const opt = label();
625 docstring tmp = split(opt, authors, '(');
627 // we don't have author (year)
630 tmp = split(tmp, year, ')');
637 docstring parseOptions(docstring const & format, string & optkey,
638 docstring & ifpart, docstring & elsepart);
640 // Calls parseOptions to deal with an embedded option, such as:
641 // {%number%[[, no.~%number%]]}
642 // which must appear at the start of format. ifelsepart gets the
643 // whole of the option, and we return what's left after the option.
644 // we return format if there is an error.
645 docstring parseEmbeddedOption(docstring const & format, docstring & ifelsepart)
647 LASSERT(format[0] == '{' && format[1] == '%', return format);
651 docstring const rest = parseOptions(format, optkey, ifpart, elsepart);
652 if (format == rest) { // parse error
653 LYXERR0("ERROR! Couldn't parse `" << format <<"'.");
656 LASSERT(rest.size() <= format.size(),
657 { ifelsepart = docstring(); return format; });
658 ifelsepart = format.substr(0, format.size() - rest.size());
663 // Gets a "clause" from a format string, where the clause is
664 // delimited by '[[' and ']]'. Returns what is left after the
665 // clause is removed, and returns format if there is an error.
666 docstring getClause(docstring const & format, docstring & clause)
668 docstring fmt = format;
671 // we'll remove characters from the front of fmt as we
673 while (!fmt.empty()) {
674 if (fmt[0] == ']' && fmt.size() > 1 && fmt[1] == ']') {
679 // check for an embedded option
680 if (fmt[0] == '{' && fmt.size() > 1 && fmt[1] == '%') {
682 docstring const rest = parseEmbeddedOption(fmt, part);
684 LYXERR0("ERROR! Couldn't parse embedded option in `" << format <<"'.");
689 } else { // it's just a normal character
698 // parse an options string, which must appear at the start of the
699 // format parameter. puts the parsed bits in optkey, ifpart, and
700 // elsepart and returns what's left after the option is removed.
701 // if there's an error, it returns format itself.
702 docstring parseOptions(docstring const & format, string & optkey,
703 docstring & ifpart, docstring & elsepart)
705 LASSERT(format[0] == '{' && format[1] == '%', return format);
707 docstring fmt = format.substr(2);
708 size_t pos = fmt.find('%'); // end of key
709 if (pos == string::npos) {
710 LYXERR0("Error parsing `" << format <<"'. Can't find end of key.");
713 optkey = to_utf8(fmt.substr(0, pos));
714 fmt = fmt.substr(pos + 1);
715 // [[format]] should be next
716 if (fmt[0] != '[' || fmt[1] != '[') {
717 LYXERR0("Error parsing `" << format <<"'. Can't find '[[' after key.");
721 docstring curfmt = fmt;
722 fmt = getClause(curfmt, ifpart);
724 LYXERR0("Error parsing `" << format <<"'. Couldn't get if clause.");
728 if (fmt[0] == '}') // we're done, no else clause
729 return fmt.substr(1);
731 // else part should follow
732 if (fmt[0] != '[' || fmt[1] != '[') {
733 LYXERR0("Error parsing `" << format <<"'. Can't find else clause.");
738 fmt = getClause(curfmt, elsepart);
740 if (fmt == curfmt || fmt[0] != '}') {
741 LYXERR0("Error parsing `" << format <<"'. Can't find end of option.");
744 return fmt.substr(1);
751 Bug #9131 revealed an oddity in how we are generating citation information
752 when more than one key is given. We end up building a longer and longer format
753 string as we go, which we then have to re-parse, over and over and over again,
754 rather than generating the information for the individual keys and then putting
755 all of that together. We do that to deal with the way separators work, from what
756 I can tell, but it still feels like a hack. Fixing this would require quite a
757 bit of work, however.
759 docstring BibTeXInfo::expandFormat(docstring const & format,
760 BibTeXInfoList const xrefs, int & counter, Buffer const & buf,
761 CiteItem const & ci, bool next, bool second) const
763 // incorrect use of macros could put us in an infinite loop
764 static int const max_passes = 5000;
765 // the use of overly large keys can lead to performance problems, due
766 // to eventual attempts to convert LaTeX macros to unicode. See bug
767 // #8944. By default, the size is limited to 128 (in CiteItem), but
768 // for specific purposes (such as XHTML export), it needs to be enlarged
769 // This is perhaps not the best solution, but it will have to do for now.
770 size_t const max_keysize = ci.max_key_size;
771 odocstringstream ret; // return value
773 bool scanning_key = false;
774 bool scanning_rich = false;
776 CiteEngineType const engine_type = buf.params().citeEngineType();
777 docstring fmt = format;
778 // we'll remove characters from the front of fmt as we
780 while (!fmt.empty()) {
781 if (counter > max_passes) {
782 LYXERR0("Recursion limit reached while parsing `"
787 char_type thischar = fmt[0];
788 if (thischar == '%') {
789 // beginning or end of key
792 scanning_key = false;
793 // so we replace the key with its value, which may be empty
797 buf.params().documentClass().getCiteMacro(engine_type, key);
798 fmt = from_utf8(val) + fmt.substr(1);
801 } else if (key[0] == '_') {
802 // a translatable bit
804 buf.params().documentClass().getCiteMacro(engine_type, key);
805 docstring const trans =
806 translateIfPossible(from_utf8(val), buf.params().language->code());
809 docstring const val =
810 getValueForKey(key, buf, ci, xrefs, max_keysize);
812 ret << from_ascii("{!<span class=\"bib-" + key + "\">!}");
815 ret << from_ascii("{!</span>!}");
823 else if (thischar == '{') {
824 // beginning of option?
826 LYXERR0("ERROR: Found `{' when scanning key in `" << format << "'.");
829 if (fmt.size() > 1) {
831 // it is the beginning of an optional format
835 docstring const newfmt =
836 parseOptions(fmt, optkey, ifpart, elsepart);
837 if (newfmt == fmt) // parse error
840 docstring const val =
841 getValueForKey(optkey, buf, ci, xrefs);
842 if (optkey == "next" && next)
843 ret << ifpart; // without expansion
844 else if (optkey == "second" && second) {
846 ret << expandFormat(ifpart, xrefs, newcounter, buf,
848 } else if (!val.empty()) {
850 ret << expandFormat(ifpart, xrefs, newcounter, buf,
852 } else if (!elsepart.empty()) {
854 ret << expandFormat(elsepart, xrefs, newcounter, buf,
857 // fmt will have been shortened for us already
861 // beginning of rich text
862 scanning_rich = true;
864 ret << from_ascii("{!");
868 // we are here if '{' was not followed by % or !.
869 // So it's just a character.
872 else if (scanning_rich && thischar == '!'
873 && fmt.size() > 1 && fmt[1] == '}') {
875 scanning_rich = false;
877 ret << from_ascii("!}");
880 else if (scanning_key)
881 key += char(thischar);
885 } catch (EncodingException & /* e */) {
886 LYXERR0("Uncodable character '" << docstring(1, thischar) << " in citation label!");
892 LYXERR0("Never found end of key in `" << format << "'!");
896 LYXERR0("Never found end of rich text in `" << format << "'!");
903 docstring const & BibTeXInfo::getInfo(BibTeXInfoList const xrefs,
904 Buffer const & buf, CiteItem const & ci) const
906 bool const richtext = ci.richtext;
908 if (!richtext && !info_.empty())
910 if (richtext && !info_richtext_.empty())
911 return info_richtext_;
914 BibTeXInfo::const_iterator it = find(from_ascii("ref"));
919 CiteEngineType const engine_type = buf.params().citeEngineType();
920 DocumentClass const & dc = buf.params().documentClass();
921 docstring const & format =
922 from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_)));
924 info_ = expandFormat(format, xrefs, counter, buf,
928 // this probably shouldn't happen
933 info_richtext_ = convertLaTeXCommands(processRichtext(info_, true));
934 return info_richtext_;
937 info_ = convertLaTeXCommands(processRichtext(info_, false));
942 docstring const BibTeXInfo::getLabel(BibTeXInfoList const xrefs,
943 Buffer const & buf, docstring const & format,
944 CiteItem const & ci, bool next, bool second) const
949 loclabel = expandFormat(format, xrefs, counter, buf, ci, next, second);
951 if (!loclabel.empty() && !next) {
952 loclabel = processRichtext(loclabel, ci.richtext);
953 loclabel = convertLaTeXCommands(loclabel);
960 docstring const & BibTeXInfo::operator[](docstring const & field) const
962 BibTeXInfo::const_iterator it = find(field);
965 static docstring const empty_value = docstring();
970 docstring const & BibTeXInfo::operator[](string const & field) const
972 return operator[](from_ascii(field));
976 docstring BibTeXInfo::getValueForKey(string const & oldkey, Buffer const & buf,
977 CiteItem const & ci, BibTeXInfoList const xrefs, size_t maxsize) const
979 // anything less is pointless
980 LASSERT(maxsize >= 16, maxsize = 16);
982 bool cleanit = false;
983 if (prefixIs(oldkey, "clean:")) {
984 key = oldkey.substr(6);
988 docstring ret = operator[](key);
989 if (ret.empty() && !xrefs.empty()) {
990 vector<BibTeXInfo const *>::const_iterator it = xrefs.begin();
991 vector<BibTeXInfo const *>::const_iterator en = xrefs.end();
992 for (; it != en; ++it) {
993 if (*it && !(**it)[key].empty()) {
1000 // some special keys
1001 // FIXME: dialog, textbefore and textafter have nothing to do with this
1002 if (key == "dialog" && ci.context == CiteItem::Dialog)
1003 ret = from_ascii("x"); // any non-empty string will do
1004 else if (key == "export" && ci.context == CiteItem::Export)
1005 ret = from_ascii("x"); // any non-empty string will do
1006 else if (key == "ifstar" && ci.Starred)
1007 ret = from_ascii("x"); // any non-empty string will do
1008 else if (key == "ifqualified" && ci.isQualified)
1009 ret = from_ascii("x"); // any non-empty string will do
1010 else if (key == "entrytype")
1012 else if (prefixIs(key, "ifentrytype:")
1013 && from_ascii(key.substr(12)) == entry_type_)
1014 ret = from_ascii("x"); // any non-empty string will do
1015 else if (key == "key")
1017 else if (key == "label")
1019 else if (key == "modifier" && modifier_ != 0)
1021 else if (key == "numericallabel")
1023 else if (prefixIs(key, "ifmultiple:")) {
1024 // Return whether we have multiple authors
1025 docstring const kind = operator[](from_ascii(key.substr(11)));
1026 if (multipleAuthors(kind))
1027 ret = from_ascii("x"); // any non-empty string will do
1029 else if (prefixIs(key, "abbrvnames:")) {
1030 // Special key to provide abbreviated name list,
1031 // with respect to maxcitenames. Suitable for Bibliography
1033 docstring const kind = operator[](from_ascii(key.substr(11)));
1034 ret = getAuthorList(&buf, kind, false, false, true);
1035 if (ci.forceUpperCase && isLowerCase(ret[0]))
1036 ret[0] = uppercase(ret[0]);
1037 } else if (prefixIs(key, "fullnames:")) {
1038 // Return a full name list. Suitable for Bibliography
1040 docstring const kind = operator[](from_ascii(key.substr(10)));
1041 ret = getAuthorList(&buf, kind, true, false, true);
1042 if (ci.forceUpperCase && isLowerCase(ret[0]))
1043 ret[0] = uppercase(ret[0]);
1044 } else if (prefixIs(key, "forceabbrvnames:")) {
1045 // Special key to provide abbreviated name lists,
1046 // irrespective of maxcitenames. Suitable for Bibliography
1048 docstring const kind = operator[](from_ascii(key.substr(15)));
1049 ret = getAuthorList(&buf, kind, false, true, true);
1050 if (ci.forceUpperCase && isLowerCase(ret[0]))
1051 ret[0] = uppercase(ret[0]);
1052 } else if (prefixIs(key, "abbrvbynames:")) {
1053 // Special key to provide abbreviated name list,
1054 // with respect to maxcitenames. Suitable for further names inside a
1055 // bibliography item // (such as "ed. by ...")
1056 docstring const kind = operator[](from_ascii(key.substr(11)));
1057 ret = getAuthorList(&buf, kind, false, false, true, false);
1058 if (ci.forceUpperCase && isLowerCase(ret[0]))
1059 ret[0] = uppercase(ret[0]);
1060 } else if (prefixIs(key, "fullbynames:")) {
1061 // Return a full name list. Suitable for further names inside a
1062 // bibliography item // (such as "ed. by ...")
1063 docstring const kind = operator[](from_ascii(key.substr(10)));
1064 ret = getAuthorList(&buf, kind, true, false, true, false);
1065 if (ci.forceUpperCase && isLowerCase(ret[0]))
1066 ret[0] = uppercase(ret[0]);
1067 } else if (prefixIs(key, "forceabbrvbynames:")) {
1068 // Special key to provide abbreviated name lists,
1069 // irrespective of maxcitenames. Suitable for further names inside a
1070 // bibliography item // (such as "ed. by ...")
1071 docstring const kind = operator[](from_ascii(key.substr(15)));
1072 ret = getAuthorList(&buf, kind, false, true, true, false);
1073 if (ci.forceUpperCase && isLowerCase(ret[0]))
1074 ret[0] = uppercase(ret[0]);
1075 } else if (key == "abbrvciteauthor") {
1076 // Special key to provide abbreviated author or
1077 // editor names (suitable for citation labels),
1078 // with respect to maxcitenames.
1079 ret = getAuthorOrEditorList(&buf, false, false);
1080 if (ci.forceUpperCase && isLowerCase(ret[0]))
1081 ret[0] = uppercase(ret[0]);
1082 } else if (key == "fullciteauthor") {
1083 // Return a full author or editor list (for citation labels)
1084 ret = getAuthorOrEditorList(&buf, true, false);
1085 if (ci.forceUpperCase && isLowerCase(ret[0]))
1086 ret[0] = uppercase(ret[0]);
1087 } else if (key == "forceabbrvciteauthor") {
1088 // Special key to provide abbreviated author or
1089 // editor names (suitable for citation labels),
1090 // irrespective of maxcitenames.
1091 ret = getAuthorOrEditorList(&buf, false, true);
1092 if (ci.forceUpperCase && isLowerCase(ret[0]))
1093 ret[0] = uppercase(ret[0]);
1094 } else if (key == "bibentry") {
1095 // Special key to provide the full bibliography entry: see getInfo()
1096 CiteEngineType const engine_type = buf.params().citeEngineType();
1097 DocumentClass const & dc = buf.params().documentClass();
1098 docstring const & format =
1099 from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_), false));
1101 ret = expandFormat(format, xrefs, counter, buf, ci, false, false);
1102 } else if (key == "textbefore")
1103 ret = ci.textBefore;
1104 else if (key == "textafter")
1106 else if (key == "curpretext")
1107 ret = ci.getPretexts()[bib_key_];
1108 else if (key == "curposttext")
1109 ret = ci.getPosttexts()[bib_key_];
1110 else if (key == "year")
1115 ret = html::cleanAttr(ret);
1117 // make sure it is not too big
1118 support::truncateWithEllipsis(ret, maxsize);
1123 //////////////////////////////////////////////////////////////////////
1127 //////////////////////////////////////////////////////////////////////
1131 // A functor for use with sort, leading to case insensitive sorting
1132 class compareNoCase: public binary_function<docstring, docstring, bool>
1135 bool operator()(docstring const & s1, docstring const & s2) const {
1136 return compare_no_case(s1, s2) < 0;
1143 vector<docstring> const BiblioInfo::getXRefs(BibTeXInfo const & data, bool const nested) const
1145 vector<docstring> result;
1146 if (!data.isBibTeX())
1148 // Legacy crossref field. This is not nestable.
1149 if (!nested && !data["crossref"].empty()) {
1150 docstring const xrefkey = data["crossref"];
1151 result.push_back(xrefkey);
1152 // However, check for nested xdatas
1153 BiblioInfo::const_iterator it = find(xrefkey);
1155 BibTeXInfo const & xref = it->second;
1156 vector<docstring> const nxdata = getXRefs(xref, true);
1157 if (!nxdata.empty())
1158 result.insert(result.end(), nxdata.begin(), nxdata.end());
1161 // Biblatex's xdata field. Infinitely nestable.
1162 // XData field can consist of a comma-separated list of keys
1163 vector<docstring> const xdatakeys = getVectorFromString(data["xdata"]);
1164 if (!xdatakeys.empty()) {
1165 vector<docstring>::const_iterator xit = xdatakeys.begin();
1166 vector<docstring>::const_iterator xen = xdatakeys.end();
1167 for (; xit != xen; ++xit) {
1168 docstring const xdatakey = *xit;
1169 result.push_back(xdatakey);
1170 BiblioInfo::const_iterator it = find(xdatakey);
1172 BibTeXInfo const & xdata = it->second;
1173 vector<docstring> const nxdata = getXRefs(xdata, true);
1174 if (!nxdata.empty())
1175 result.insert(result.end(), nxdata.begin(), nxdata.end());
1183 vector<docstring> const BiblioInfo::getKeys() const
1185 vector<docstring> bibkeys;
1186 BiblioInfo::const_iterator it = begin();
1187 for (; it != end(); ++it)
1188 bibkeys.push_back(it->first);
1189 sort(bibkeys.begin(), bibkeys.end(), compareNoCase());
1194 vector<docstring> const BiblioInfo::getFields() const
1196 vector<docstring> bibfields;
1197 set<docstring>::const_iterator it = field_names_.begin();
1198 set<docstring>::const_iterator end = field_names_.end();
1199 for (; it != end; ++it)
1200 bibfields.push_back(*it);
1201 sort(bibfields.begin(), bibfields.end());
1206 vector<docstring> const BiblioInfo::getEntries() const
1208 vector<docstring> bibentries;
1209 set<docstring>::const_iterator it = entry_types_.begin();
1210 set<docstring>::const_iterator end = entry_types_.end();
1211 for (; it != end; ++it)
1212 bibentries.push_back(*it);
1213 sort(bibentries.begin(), bibentries.end());
1218 docstring const BiblioInfo::getAuthorOrEditorList(docstring const & key, Buffer const & buf) const
1220 BiblioInfo::const_iterator it = find(key);
1223 BibTeXInfo const & data = it->second;
1224 return data.getAuthorOrEditorList(&buf, false);
1228 docstring const BiblioInfo::getCiteNumber(docstring const & key) const
1230 BiblioInfo::const_iterator it = find(key);
1233 BibTeXInfo const & data = it->second;
1234 return data.citeNumber();
1238 docstring const BiblioInfo::getYear(docstring const & key, bool use_modifier) const
1240 BiblioInfo::const_iterator it = find(key);
1243 BibTeXInfo const & data = it->second;
1244 docstring year = data.getYear();
1246 // let's try the crossrefs
1247 vector<docstring> const xrefs = getXRefs(data);
1251 vector<docstring>::const_iterator it = xrefs.begin();
1252 vector<docstring>::const_iterator en = xrefs.end();
1253 for (; it != en; ++it) {
1254 BiblioInfo::const_iterator const xrefit = find(*it);
1255 if (xrefit == end())
1257 BibTeXInfo const & xref_data = xrefit->second;
1258 year = xref_data.getYear();
1264 if (use_modifier && data.modifier() != 0)
1265 year += data.modifier();
1270 docstring const BiblioInfo::getYear(docstring const & key, Buffer const & buf, bool use_modifier) const
1272 docstring const year = getYear(key, use_modifier);
1274 return buf.B_("No year");
1279 docstring const BiblioInfo::getInfo(docstring const & key,
1280 Buffer const & buf, CiteItem const & ci) const
1282 BiblioInfo::const_iterator it = find(key);
1284 return docstring(_("Bibliography entry not found!"));
1285 BibTeXInfo const & data = it->second;
1286 BibTeXInfoList xrefptrs;
1287 vector<docstring> const xrefs = getXRefs(data);
1288 if (!xrefs.empty()) {
1289 vector<docstring>::const_iterator it = xrefs.begin();
1290 vector<docstring>::const_iterator en = xrefs.end();
1291 for (; it != en; ++it) {
1292 BiblioInfo::const_iterator const xrefit = find(*it);
1293 if (xrefit != end())
1294 xrefptrs.push_back(&(xrefit->second));
1297 return data.getInfo(xrefptrs, buf, ci);
1301 docstring const BiblioInfo::getLabel(vector<docstring> keys,
1302 Buffer const & buf, string const & style, CiteItem const & ci) const
1304 size_t max_size = ci.max_size;
1305 // shorter makes no sense
1306 LASSERT(max_size >= 16, max_size = 16);
1308 // we can't display more than 10 of these, anyway
1309 bool const too_many_keys = keys.size() > 10;
1313 CiteEngineType const engine_type = buf.params().citeEngineType();
1314 DocumentClass const & dc = buf.params().documentClass();
1315 docstring const & format = from_utf8(dc.getCiteFormat(engine_type, style, false, "cite"));
1316 docstring ret = format;
1317 vector<docstring>::const_iterator key = keys.begin();
1318 vector<docstring>::const_iterator ken = keys.end();
1319 for (int i = 0; key != ken; ++key, ++i) {
1320 BiblioInfo::const_iterator it = find(*key);
1321 BibTeXInfo empty_data;
1322 empty_data.key(*key);
1323 BibTeXInfo & data = empty_data;
1324 vector<BibTeXInfo const *> xrefptrs;
1327 vector<docstring> const xrefs = getXRefs(data);
1328 if (!xrefs.empty()) {
1329 vector<docstring>::const_iterator it = xrefs.begin();
1330 vector<docstring>::const_iterator en = xrefs.end();
1331 for (; it != en; ++it) {
1332 BiblioInfo::const_iterator const xrefit = find(*it);
1333 if (xrefit != end())
1334 xrefptrs.push_back(&(xrefit->second));
1338 ret = data.getLabel(xrefptrs, buf, ret, ci, key + 1 != ken, i == 1);
1342 ret.push_back(0x2026);//HORIZONTAL ELLIPSIS
1343 support::truncateWithEllipsis(ret, max_size);
1348 bool BiblioInfo::isBibtex(docstring const & key) const
1351 split(key, key1, ',');
1352 BiblioInfo::const_iterator it = find(key1);
1355 return it->second.isBibTeX();
1359 vector<docstring> const BiblioInfo::getCiteStrings(
1360 vector<docstring> const & keys, vector<CitationStyle> const & styles,
1361 Buffer const & buf, CiteItem const & ci) const
1364 return vector<docstring>();
1367 vector<docstring> vec(styles.size());
1368 for (size_t i = 0; i != vec.size(); ++i) {
1369 style = styles[i].name;
1370 vec[i] = getLabel(keys, buf, style, ci);
1377 void BiblioInfo::mergeBiblioInfo(BiblioInfo const & info)
1379 bimap_.insert(info.begin(), info.end());
1380 field_names_.insert(info.field_names_.begin(), info.field_names_.end());
1381 entry_types_.insert(info.entry_types_.begin(), info.entry_types_.end());
1387 // used in xhtml to sort a list of BibTeXInfo objects
1388 bool lSorter(BibTeXInfo const * lhs, BibTeXInfo const * rhs)
1390 docstring const lauth = lhs->getAuthorOrEditorList();
1391 docstring const rauth = rhs->getAuthorOrEditorList();
1392 docstring const lyear = lhs->getYear();
1393 docstring const ryear = rhs->getYear();
1394 docstring const ltitl = lhs->operator[]("title");
1395 docstring const rtitl = rhs->operator[]("title");
1396 return (lauth < rauth)
1397 || (lauth == rauth && lyear < ryear)
1398 || (lauth == rauth && lyear == ryear && ltitl < rtitl);
1404 void BiblioInfo::collectCitedEntries(Buffer const & buf)
1406 cited_entries_.clear();
1407 // We are going to collect all the citation keys used in the document,
1408 // getting them from the TOC.
1409 // FIXME We may want to collect these differently, in the first case,
1410 // so that we might have them in order of appearance.
1411 set<docstring> citekeys;
1412 shared_ptr<Toc const> toc = buf.tocBackend().toc("citation");
1413 Toc::const_iterator it = toc->begin();
1414 Toc::const_iterator const en = toc->end();
1415 for (; it != en; ++it) {
1416 if (it->str().empty())
1418 vector<docstring> const keys = getVectorFromString(it->str());
1419 citekeys.insert(keys.begin(), keys.end());
1421 if (citekeys.empty())
1424 // We have a set of the keys used in this document.
1425 // We will now convert it to a list of the BibTeXInfo objects used in
1427 vector<BibTeXInfo const *> bi;
1428 set<docstring>::const_iterator cit = citekeys.begin();
1429 set<docstring>::const_iterator const cen = citekeys.end();
1430 for (; cit != cen; ++cit) {
1431 BiblioInfo::const_iterator const bt = find(*cit);
1432 if (bt == end() || !bt->second.isBibTeX())
1434 bi.push_back(&(bt->second));
1437 sort(bi.begin(), bi.end(), lSorter);
1439 // Now we can write the sorted keys
1440 vector<BibTeXInfo const *>::const_iterator bit = bi.begin();
1441 vector<BibTeXInfo const *>::const_iterator ben = bi.end();
1442 for (; bit != ben; ++bit)
1443 cited_entries_.push_back((*bit)->key());
1447 void BiblioInfo::makeCitationLabels(Buffer const & buf)
1449 collectCitedEntries(buf);
1450 CiteEngineType const engine_type = buf.params().citeEngineType();
1451 bool const numbers = (engine_type & ENGINE_TYPE_NUMERICAL);
1455 // used to remember the last one we saw
1456 // we'll be comparing entries to see if we need to add
1457 // modifiers, like "1984a"
1458 map<docstring, BibTeXInfo>::iterator last = bimap_.end();
1460 vector<docstring>::const_iterator it = cited_entries_.begin();
1461 vector<docstring>::const_iterator const en = cited_entries_.end();
1462 for (; it != en; ++it) {
1463 map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(*it);
1464 // this shouldn't happen, but...
1465 if (biit == bimap_.end())
1466 // ...fail gracefully, anyway.
1468 BibTeXInfo & entry = biit->second;
1470 docstring const num = convert<docstring>(++keynumber);
1471 entry.setCiteNumber(num);
1473 // The first test here is checking whether this is the first
1474 // time through the loop. If so, then we do not have anything
1475 // with which to compare.
1476 if (last != bimap_.end()
1477 && entry.getAuthorOrEditorList() == last->second.getAuthorOrEditorList()
1478 // we access the year via getYear() so as to get it from the xref,
1479 // if we need to do so
1480 && getYear(entry.key()) == getYear(last->second.key())) {
1481 if (modifier == 0) {
1482 // so the last one should have been 'a'
1483 last->second.setModifier('a');
1485 } else if (modifier == 'z')
1492 entry.setModifier(modifier);
1493 // remember the last one
1498 it = cited_entries_.begin();
1499 for (; it != en; ++it) {
1500 map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(*it);
1501 // this shouldn't happen, but...
1502 if (biit == bimap_.end())
1503 // ...fail gracefully, anyway.
1505 BibTeXInfo & entry = biit->second;
1507 entry.label(entry.citeNumber());
1509 docstring const auth = entry.getAuthorOrEditorList(&buf, false);
1510 // we do it this way so as to access the xref, if necessary
1511 // note that this also gives us the modifier
1512 docstring const year = getYear(*it, buf, true);
1513 if (!auth.empty() && !year.empty())
1514 entry.label(auth + ' ' + year);
1516 entry.label(entry.key());
1522 //////////////////////////////////////////////////////////////////////
1526 //////////////////////////////////////////////////////////////////////
1529 CitationStyle citationStyleFromString(string const & command,
1530 BufferParams const & params)
1533 if (command.empty())
1536 string const alias = params.getCiteAlias(command);
1537 string cmd = alias.empty() ? command : alias;
1538 if (isUpperCase(command[0])) {
1539 cs.forceUpperCase = true;
1540 cmd[0] = lowercase(cmd[0]);
1543 size_t const n = command.size() - 1;
1544 if (command[n] == '*') {
1545 cs.hasStarredVersion = true;
1546 if (suffixIs(cmd, '*'))
1547 cmd = cmd.substr(0, cmd.size() - 1);
1555 string citationStyleToString(const CitationStyle & cs, bool const latex)
1557 string cmd = latex ? cs.cmd : cs.name;
1558 if (cs.forceUpperCase)
1559 cmd[0] = uppercase(cmd[0]);
1560 if (cs.hasStarredVersion)