3 * This file is part of LyX, the document processor.
4 * Licence details can be found in the file COPYING.
6 * \author Angus Leeming
10 * \author Jürgen Spitzmüller
12 * Full author contact details are available in file CREDITS.
17 #include "BiblioInfo.h"
19 #include "BufferParams.h"
20 #include "buffer_funcs.h"
23 #include "InsetIterator.h"
25 #include "output_xhtml.h"
26 #include "Paragraph.h"
27 #include "TextClass.h"
28 #include "TocBackend.h"
30 #include "support/convert.h"
31 #include "support/debug.h"
32 #include "support/docstream.h"
33 #include "support/gettext.h"
34 #include "support/lassert.h"
35 #include "support/lstrings.h"
36 #include "support/regex.h"
37 #include "support/textutils.h"
43 using namespace lyx::support;
50 // Remove placeholders from names
51 docstring renormalize(docstring const & input)
53 docstring res = subst(input, from_ascii("$$space!"), from_ascii(" "));
54 return subst(res, from_ascii("$$comma!"), from_ascii(","));
58 // Split the surname into prefix ("von-part") and family name
59 pair<docstring, docstring> parseSurname(docstring const & sname)
61 // Split the surname into its tokens
62 vector<docstring> pieces = getVectorFromString(sname, from_ascii(" "));
63 if (pieces.size() < 2)
64 return make_pair(docstring(), sname);
66 // Now we look for pieces that begin with a lower case letter.
67 // All except for the very last token constitute the "von-part".
69 vector<docstring>::const_iterator it = pieces.begin();
70 vector<docstring>::const_iterator const en = pieces.end();
72 for (; it != en; ++it) {
75 // If this is the last piece, then what we now have is
76 // the family name, notwithstanding the casing.
79 char_type const c = (*it)[0];
80 // If the piece starts with a upper case char, we assume
81 // this is part of the surname.
84 // Nothing of the former, so add this piece to the prename
92 // Reconstruct the family name.
93 // Note that if we left the loop with because it + 1 == en,
94 // then this will still do the right thing, i.e., make surname
95 // just be the last piece.
98 for (; it != en; ++it) {
105 return make_pair(prefix, surname);
117 // gets the name parts (prename, surname, prefix, suffix) from an author-type string
118 name_parts nameParts(docstring const & iname)
124 // First we check for goupings (via {...}) and replace blanks and
125 // commas inside groups with temporary placeholders
128 docstring::const_iterator p = iname.begin();
129 while (p != iname.end()) {
130 // count grouping level
135 // generate string with probable placeholders
136 if (*p == ' ' && gl > 0)
137 name += from_ascii("$$space!");
138 else if (*p == ',' && gl > 0)
139 name += from_ascii("$$comma!");
145 // Now we look for a comma, and take the last name to be everything
146 // preceding the right-most one, so that we also get the name suffix
148 vector<docstring> pieces = getVectorFromString(name);
149 if (pieces.size() > 1) {
150 // Whether we have a name suffix or not, the prename is
152 res.prename = renormalize(pieces.back());
153 // The family name, conversely, is always the first item.
154 // However, it might contain a prefix (aka "von" part)
155 docstring const sname = pieces.front();
156 res.prefix = renormalize(parseSurname(sname).first);
157 res.surname = renormalize(parseSurname(sname).second);
158 // If we have three pieces (the maximum allowed by BibTeX),
159 // the second one is the name suffix.
160 if (pieces.size() > 2)
161 res.suffix = renormalize(pieces.at(1));
165 // OK, so now we want to look for the last name.
166 // Split on spaces, to get various tokens.
167 pieces = getVectorFromString(name, from_ascii(" "));
168 // No space: Only a family name given
169 if (pieces.size() < 2) {
170 res.surname = renormalize(pieces.back());
173 // If we get two pieces, assume "prename surname"
174 if (pieces.size() == 2) {
175 res.prename = renormalize(pieces.front());
176 res.surname = renormalize(pieces.back());
180 // More than 3 pieces: A name prefix (aka "von" part) might be included.
181 // We look for the first piece that begins with a lower case letter
182 // (which is the name prefix, if it is not the last token) or the last token.
184 vector<docstring>::const_iterator it = pieces.begin();
185 vector<docstring>::const_iterator const en = pieces.end();
187 for (; it != en; ++it) {
190 char_type const c = (*it)[0];
191 // If the piece starts with a lower case char, we assume
192 // this is the name prefix and thus prename is complete.
195 // Same if this is the last piece, which is always the surname.
198 // Nothing of the former, so add this piece to the prename
206 // Now reconstruct the family name and strip the prefix.
207 // Note that if we left the loop because it + 1 == en,
208 // then this will still do the right thing, i.e., make surname
209 // just be the last piece.
212 for (; it != en; ++it) {
219 res.prename = renormalize(prename);
220 res.prefix = renormalize(parseSurname(surname).first);
221 res.surname = renormalize(parseSurname(surname).second);
226 docstring constructName(docstring const & name, string const scheme)
228 // re-constructs a name from name parts according
230 docstring const prename = nameParts(name).prename;
231 docstring const surname = nameParts(name).surname;
232 docstring const prefix = nameParts(name).prefix;
233 docstring const suffix = nameParts(name).suffix;
235 static regex const reg1("(.*)(\\{%prename%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)");
236 static regex const reg2("(.*)(\\{%suffix%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)");
237 static regex const reg3("(.*)(\\{%prefix%\\[\\[)([^\\]]+)(\\]\\]\\})(.*)");
239 // Changing the first parameter of regex_match() may corrupt the
240 // second one. In this case we use the temporary string tmp.
241 if (regex_match(scheme, sub, reg1)) {
243 if (!prename.empty())
247 if (regex_match(res, sub, reg2)) {
248 string tmp = sub.str(1);
251 res = tmp + sub.str(5);
253 if (regex_match(res, sub, reg3)) {
254 string tmp = sub.str(1);
257 res = tmp + sub.str(5);
259 docstring result = from_ascii(res);
260 result = subst(result, from_ascii("%prename%"), prename);
261 result = subst(result, from_ascii("%surname%"), surname);
262 result = subst(result, from_ascii("%prefix%"), prefix);
263 result = subst(result, from_ascii("%suffix%"), suffix);
268 vector<docstring> const getAuthors(docstring const & author)
270 // We check for goupings (via {...}) and only consider " and "
271 // outside groups as author separator. This is to account
272 // for cases such as {{Barnes and Noble, Inc.}}, which
273 // need to be treated as one single family name.
274 // We use temporary placeholders in order to differentiate the
275 // diverse " and " cases.
277 // First, we temporarily replace all ampersands. It is rather unusual
278 // in author names, but can happen (consider cases such as "C \& A Corp.").
279 docstring iname = subst(author, from_ascii("&"), from_ascii("$$amp!"));
280 // Then, we temporarily make all " and " strings to ampersands in order
281 // to handle them later on a per-char level.
282 iname = subst(iname, from_ascii(" and "), from_ascii(" & "));
283 // Now we traverse through the string and replace the "&" by the proper
284 // output in- and outside groups
287 docstring::const_iterator p = iname.begin();
288 while (p != iname.end()) {
289 // count grouping level
294 // generate string with probable placeholders
297 // Inside groups, we output "and"
298 name += from_ascii("and");
300 // Outside groups, we output a separator
301 name += from_ascii("$$namesep!");
308 // re-insert the literal ampersands
309 name = subst(name, from_ascii("$$amp!"), from_ascii("&"));
311 // Now construct the actual vector
312 return getVectorFromString(name, from_ascii(" $$namesep! "));
316 bool multipleAuthors(docstring const author)
318 return getAuthors(author).size() > 1;
322 // converts a string containing LaTeX commands into unicode
324 docstring convertLaTeXCommands(docstring const & str)
329 bool scanning_cmd = false;
330 bool scanning_math = false;
331 bool escaped = false; // used to catch \$, etc.
332 while (!val.empty()) {
333 char_type const ch = val[0];
335 // if we're scanning math, we output everything until we
336 // find an unescaped $, at which point we break out.
343 scanning_math = false;
349 // if we're scanning a command name, then we just
350 // discard characters until we hit something that
353 if (isAlphaASCII(ch)) {
358 // so we're done with this command.
359 // now we fall through and check this character.
360 scanning_cmd = false;
363 // was the last character a \? If so, then this is something like:
364 // \\ or \$, so we'll just output it. That's probably not always right...
366 // exception: output \, as THIN SPACE
368 ret.push_back(0x2009);
379 scanning_math = true;
383 // Change text mode accents in the form
384 // {\v a} to \v{a} (see #9340).
385 // FIXME: This is a sort of mini-tex2lyx.
386 // Use the real tex2lyx instead!
387 static lyx::regex const tma_reg("^\\{\\\\[bcCdfGhHkrtuUv]\\s\\w\\}");
388 if (lyx::regex_search(to_utf8(val), tma_reg)) {
390 val.replace(2, 1, from_ascii("{"));
394 // Apart from the above, we just ignore braces
395 if (ch == '{' || ch == '}') {
400 // we're going to check things that look like commands, so if
401 // this doesn't, just output it.
408 // ok, could be a command of some sort
409 // let's see if it corresponds to some unicode
410 // unicodesymbols has things in the form: \"{u},
411 // whereas we may see things like: \"u. So we'll
412 // look for that and change it, if necessary.
413 // FIXME: This is a sort of mini-tex2lyx.
414 // Use the real tex2lyx instead!
415 static lyx::regex const reg("^\\\\\\W\\w");
416 if (lyx::regex_search(to_utf8(val), reg)) {
417 val.insert(3, from_ascii("}"));
418 val.insert(2, from_ascii("{"));
422 docstring const cnvtd = Encodings::fromLaTeXCommand(val,
423 Encodings::TEXT_CMD, termination, rem);
424 if (!cnvtd.empty()) {
425 // it did, so we'll take that bit and proceed with what's left
430 // it's a command of some sort
439 // Escape '<' and '>' and remove richtext markers (e.g. {!this is richtext!}) from a string.
440 docstring processRichtext(docstring const & str, bool richtext)
445 bool scanning_rich = false;
446 while (!val.empty()) {
447 char_type const ch = val[0];
448 if (ch == '{' && val.size() > 1 && val[1] == '!') {
449 // beginning of rich text
450 scanning_rich = true;
454 if (scanning_rich && ch == '!' && val.size() > 1 && val[1] == '}') {
456 scanning_rich = false;
464 // we need to escape '<' and '>'
472 } else if (!scanning_rich /* && !richtext */)
474 // else the character is discarded, which will happen only if
475 // richtext == false and we are scanning rich text
484 //////////////////////////////////////////////////////////////////////
488 //////////////////////////////////////////////////////////////////////
490 BibTeXInfo::BibTeXInfo(docstring const & key, docstring const & type)
491 : is_bibtex_(true), bib_key_(key), entry_type_(type), info_(),
497 docstring const BibTeXInfo::getAuthorOrEditorList(Buffer const * buf,
498 bool full, bool forceshort) const
500 docstring author = operator[]("author");
502 author = operator[]("editor");
504 return getAuthorList(buf, author, full, forceshort);
508 docstring const BibTeXInfo::getAuthorList(Buffer const * buf,
509 docstring const & author, bool const full, bool const forceshort,
510 bool const allnames, bool const beginning) const
512 // Maxnames treshold depend on engine
513 size_t maxnames = buf ?
514 buf->params().documentClass().max_citenames() : 2;
517 docstring const opt = label();
522 docstring const remainder = trim(split(opt, authors, '('));
523 if (remainder.empty())
524 // in this case, we didn't find a "(",
525 // so we don't have author (year)
533 // OK, we've got some names. Let's format them.
534 // Try to split the author list
535 vector<docstring> const authors = getAuthors(author);
539 CiteEngineType const engine_type = buf ? buf->params().citeEngineType()
540 : ENGINE_TYPE_DEFAULT;
542 // These are defined in the styles
544 buf ? buf->params().documentClass().getCiteMacro(engine_type, "B_etal")
546 string const namesep =
547 buf ? buf->params().documentClass().getCiteMacro(engine_type, "B_namesep")
549 string const lastnamesep =
550 buf ? buf->params().documentClass().getCiteMacro(engine_type, "B_lastnamesep")
552 string const pairnamesep =
553 buf ? buf->params().documentClass().getCiteMacro(engine_type, "B_pairnamesep")
555 string firstnameform =
556 buf ? buf->params().documentClass().getCiteMacro(engine_type, "!firstnameform")
557 : "{%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}{%prename%[[, %prename%]]}";
559 firstnameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!firstbynameform")
560 : "%prename% {%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}";
561 string othernameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!othernameform")
562 : "{%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}{%prename%[[, %prename%]]}";
564 othernameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!otherbynameform")
565 : "%prename% {%prefix%[[%prefix% ]]}%surname%{%suffix%[[, %suffix%]]}";
566 string citenameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!citenameform")
567 : "{%prefix%[[%prefix% ]]}%surname%";
569 // Shorten the list (with et al.) if forceshort is set
570 // and the list can actually be shortened, else if maxcitenames
571 // is passed and full is not set.
572 bool shorten = forceshort && authors.size() > 1;
573 vector<docstring>::const_iterator it = authors.begin();
574 vector<docstring>::const_iterator en = authors.end();
575 for (size_t i = 0; it != en; ++it, ++i) {
576 if (i >= maxnames && !full) {
580 if (*it == "others") {
581 retval += buf ? buf->B_(etal) : from_ascii(etal);
584 if (i > 0 && i == authors.size() - 1) {
585 if (authors.size() == 2)
586 retval += buf ? buf->B_(pairnamesep) : from_ascii(pairnamesep);
588 retval += buf ? buf->B_(lastnamesep) : from_ascii(lastnamesep);
590 retval += buf ? buf->B_(namesep) : from_ascii(namesep);
592 retval += (i == 0) ? constructName(*it, firstnameform)
593 : constructName(*it, othernameform);
595 retval += constructName(*it, citenameform);
599 retval = constructName(authors[0], firstnameform) + (buf ? buf->B_(etal) : from_ascii(etal));
601 retval = constructName(authors[0], citenameform) + (buf ? buf->B_(etal) : from_ascii(etal));
604 return convertLaTeXCommands(retval);
608 docstring const BibTeXInfo::getYear() const
611 // first try legacy year field
612 docstring year = operator[]("year");
615 // now try biblatex's date field
616 year = operator[]("date");
617 // Format is [-]YYYY-MM-DD*/[-]YYYY-MM-DD*
618 // We only want the years.
619 static regex const yreg("[-]?([\\d]{4}).*");
620 static regex const ereg(".*/[-]?([\\d]{4}).*");
622 string const date = to_utf8(year);
623 if (!regex_match(date, sm, yreg))
624 // cannot parse year.
626 year = from_ascii(sm[1]);
627 // check for an endyear
628 if (regex_match(date, sm, ereg))
629 year += char_type(0x2013) + from_ascii(sm[1]);
633 docstring const opt = label();
638 docstring tmp = split(opt, authors, '(');
640 // we don't have author (year)
643 tmp = split(tmp, year, ')');
650 docstring parseOptions(docstring const & format, string & optkey,
651 docstring & ifpart, docstring & elsepart);
653 // Calls parseOptions to deal with an embedded option, such as:
654 // {%number%[[, no.~%number%]]}
655 // which must appear at the start of format. ifelsepart gets the
656 // whole of the option, and we return what's left after the option.
657 // we return format if there is an error.
658 docstring parseEmbeddedOption(docstring const & format, docstring & ifelsepart)
660 LASSERT(format[0] == '{' && format[1] == '%', return format);
664 docstring const rest = parseOptions(format, optkey, ifpart, elsepart);
665 if (format == rest) { // parse error
666 LYXERR0("ERROR! Couldn't parse `" << format <<"'.");
669 LASSERT(rest.size() <= format.size(),
670 { ifelsepart = docstring(); return format; });
671 ifelsepart = format.substr(0, format.size() - rest.size());
676 // Gets a "clause" from a format string, where the clause is
677 // delimited by '[[' and ']]'. Returns what is left after the
678 // clause is removed, and returns format if there is an error.
679 docstring getClause(docstring const & format, docstring & clause)
681 docstring fmt = format;
684 // we'll remove characters from the front of fmt as we
686 while (!fmt.empty()) {
687 if (fmt[0] == ']' && fmt.size() > 1 && fmt[1] == ']') {
692 // check for an embedded option
693 if (fmt[0] == '{' && fmt.size() > 1 && fmt[1] == '%') {
695 docstring const rest = parseEmbeddedOption(fmt, part);
697 LYXERR0("ERROR! Couldn't parse embedded option in `" << format <<"'.");
702 } else { // it's just a normal character
711 // parse an options string, which must appear at the start of the
712 // format parameter. puts the parsed bits in optkey, ifpart, and
713 // elsepart and returns what's left after the option is removed.
714 // if there's an error, it returns format itself.
715 docstring parseOptions(docstring const & format, string & optkey,
716 docstring & ifpart, docstring & elsepart)
718 LASSERT(format[0] == '{' && format[1] == '%', return format);
720 docstring fmt = format.substr(2);
721 size_t pos = fmt.find('%'); // end of key
722 if (pos == string::npos) {
723 LYXERR0("Error parsing `" << format <<"'. Can't find end of key.");
726 optkey = to_utf8(fmt.substr(0, pos));
727 fmt = fmt.substr(pos + 1);
728 // [[format]] should be next
729 if (fmt[0] != '[' || fmt[1] != '[') {
730 LYXERR0("Error parsing `" << format <<"'. Can't find '[[' after key.");
734 docstring curfmt = fmt;
735 fmt = getClause(curfmt, ifpart);
737 LYXERR0("Error parsing `" << format <<"'. Couldn't get if clause.");
741 if (fmt[0] == '}') // we're done, no else clause
742 return fmt.substr(1);
744 // else part should follow
745 if (fmt[0] != '[' || fmt[1] != '[') {
746 LYXERR0("Error parsing `" << format <<"'. Can't find else clause.");
751 fmt = getClause(curfmt, elsepart);
753 if (fmt == curfmt || fmt[0] != '}') {
754 LYXERR0("Error parsing `" << format <<"'. Can't find end of option.");
757 return fmt.substr(1);
764 Bug #9131 revealed an oddity in how we are generating citation information
765 when more than one key is given. We end up building a longer and longer format
766 string as we go, which we then have to re-parse, over and over and over again,
767 rather than generating the information for the individual keys and then putting
768 all of that together. We do that to deal with the way separators work, from what
769 I can tell, but it still feels like a hack. Fixing this would require quite a
770 bit of work, however.
772 docstring BibTeXInfo::expandFormat(docstring const & format,
773 BibTeXInfoList const xrefs, int & counter, Buffer const & buf,
774 CiteItem const & ci, bool next, bool second) const
776 // incorrect use of macros could put us in an infinite loop
777 static int const max_passes = 5000;
778 // the use of overly large keys can lead to performance problems, due
779 // to eventual attempts to convert LaTeX macros to unicode. See bug
780 // #8944. By default, the size is limited to 128 (in CiteItem), but
781 // for specific purposes (such as XHTML export), it needs to be enlarged
782 // This is perhaps not the best solution, but it will have to do for now.
783 size_t const max_keysize = ci.max_key_size;
784 odocstringstream ret; // return value
786 bool scanning_key = false;
787 bool scanning_rich = false;
789 CiteEngineType const engine_type = buf.params().citeEngineType();
790 docstring fmt = format;
791 // we'll remove characters from the front of fmt as we
793 while (!fmt.empty()) {
794 if (counter > max_passes) {
795 LYXERR0("Recursion limit reached while parsing `"
800 char_type thischar = fmt[0];
801 if (thischar == '%') {
802 // beginning or end of key
805 scanning_key = false;
806 // so we replace the key with its value, which may be empty
810 buf.params().documentClass().getCiteMacro(engine_type, key);
811 fmt = from_utf8(val) + fmt.substr(1);
814 } else if (prefixIs(key, "B_")) {
815 // a translatable bit (to the Buffer language)
817 buf.params().documentClass().getCiteMacro(engine_type, key);
818 docstring const trans =
819 translateIfPossible(from_utf8(val), buf.params().language->code());
821 } else if (key[0] == '_') {
822 // a translatable bit (to the GUI language)
824 buf.params().documentClass().getCiteMacro(engine_type, key);
825 docstring const trans =
826 translateIfPossible(from_utf8(val));
829 docstring const val =
830 getValueForKey(key, buf, ci, xrefs, max_keysize);
832 ret << from_ascii("{!<span class=\"bib-" + key + "\">!}");
835 ret << from_ascii("{!</span>!}");
843 else if (thischar == '{') {
844 // beginning of option?
846 LYXERR0("ERROR: Found `{' when scanning key in `" << format << "'.");
849 if (fmt.size() > 1) {
851 // it is the beginning of an optional format
855 docstring const newfmt =
856 parseOptions(fmt, optkey, ifpart, elsepart);
857 if (newfmt == fmt) // parse error
860 docstring const val =
861 getValueForKey(optkey, buf, ci, xrefs);
862 if (optkey == "next" && next)
863 ret << ifpart; // without expansion
864 else if (optkey == "second" && second) {
866 ret << expandFormat(ifpart, xrefs, newcounter, buf,
868 } else if (!val.empty()) {
870 ret << expandFormat(ifpart, xrefs, newcounter, buf,
872 } else if (!elsepart.empty()) {
874 ret << expandFormat(elsepart, xrefs, newcounter, buf,
877 // fmt will have been shortened for us already
881 // beginning of rich text
882 scanning_rich = true;
884 ret << from_ascii("{!");
888 // we are here if '{' was not followed by % or !.
889 // So it's just a character.
892 else if (scanning_rich && thischar == '!'
893 && fmt.size() > 1 && fmt[1] == '}') {
895 scanning_rich = false;
897 ret << from_ascii("!}");
900 else if (scanning_key)
901 key += char(thischar);
905 } catch (EncodingException & /* e */) {
906 LYXERR0("Uncodable character '" << docstring(1, thischar) << " in citation label!");
912 LYXERR0("Never found end of key in `" << format << "'!");
916 LYXERR0("Never found end of rich text in `" << format << "'!");
923 docstring const & BibTeXInfo::getInfo(BibTeXInfoList const xrefs,
924 Buffer const & buf, CiteItem const & ci) const
926 bool const richtext = ci.richtext;
928 if (!richtext && !info_.empty())
930 if (richtext && !info_richtext_.empty())
931 return info_richtext_;
934 BibTeXInfo::const_iterator it = find(from_ascii("ref"));
939 CiteEngineType const engine_type = buf.params().citeEngineType();
940 DocumentClass const & dc = buf.params().documentClass();
941 docstring const & format =
942 from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_)));
944 info_ = expandFormat(format, xrefs, counter, buf,
948 // this probably shouldn't happen
953 info_richtext_ = convertLaTeXCommands(processRichtext(info_, true));
954 return info_richtext_;
957 info_ = convertLaTeXCommands(processRichtext(info_, false));
962 docstring const BibTeXInfo::getLabel(BibTeXInfoList const xrefs,
963 Buffer const & buf, docstring const & format,
964 CiteItem const & ci, bool next, bool second) const
969 loclabel = expandFormat(format, xrefs, counter, buf, ci, next, second);
971 if (!loclabel.empty() && !next) {
972 loclabel = processRichtext(loclabel, ci.richtext);
973 loclabel = convertLaTeXCommands(loclabel);
980 docstring const & BibTeXInfo::operator[](docstring const & field) const
982 BibTeXInfo::const_iterator it = find(field);
985 static docstring const empty_value = docstring();
990 docstring const & BibTeXInfo::operator[](string const & field) const
992 return operator[](from_ascii(field));
996 docstring BibTeXInfo::getValueForKey(string const & oldkey, Buffer const & buf,
997 CiteItem const & ci, BibTeXInfoList const xrefs, size_t maxsize) const
999 // anything less is pointless
1000 LASSERT(maxsize >= 16, maxsize = 16);
1001 string key = oldkey;
1002 bool cleanit = false;
1003 if (prefixIs(oldkey, "clean:")) {
1004 key = oldkey.substr(6);
1008 docstring ret = operator[](key);
1009 if (ret.empty() && !xrefs.empty()) {
1010 vector<BibTeXInfo const *>::const_iterator it = xrefs.begin();
1011 vector<BibTeXInfo const *>::const_iterator en = xrefs.end();
1012 for (; it != en; ++it) {
1013 if (*it && !(**it)[key].empty()) {
1020 // some special keys
1021 // FIXME: dialog, textbefore and textafter have nothing to do with this
1022 if (key == "dialog" && ci.context == CiteItem::Dialog)
1023 ret = from_ascii("x"); // any non-empty string will do
1024 else if (key == "export" && ci.context == CiteItem::Export)
1025 ret = from_ascii("x"); // any non-empty string will do
1026 else if (key == "ifstar" && ci.Starred)
1027 ret = from_ascii("x"); // any non-empty string will do
1028 else if (key == "ifqualified" && ci.isQualified)
1029 ret = from_ascii("x"); // any non-empty string will do
1030 else if (key == "entrytype")
1032 else if (prefixIs(key, "ifentrytype:")
1033 && from_ascii(key.substr(12)) == entry_type_)
1034 ret = from_ascii("x"); // any non-empty string will do
1035 else if (key == "key")
1037 else if (key == "label")
1039 else if (key == "modifier" && modifier_ != 0)
1041 else if (key == "numericallabel")
1043 else if (prefixIs(key, "ifmultiple:")) {
1044 // Return whether we have multiple authors
1045 docstring const kind = operator[](from_ascii(key.substr(11)));
1046 if (multipleAuthors(kind))
1047 ret = from_ascii("x"); // any non-empty string will do
1049 else if (prefixIs(key, "abbrvnames:")) {
1050 // Special key to provide abbreviated name list,
1051 // with respect to maxcitenames. Suitable for Bibliography
1053 docstring const kind = operator[](from_ascii(key.substr(11)));
1054 ret = getAuthorList(&buf, kind, false, false, true);
1055 if (ci.forceUpperCase && isLowerCase(ret[0]))
1056 ret[0] = uppercase(ret[0]);
1057 } else if (prefixIs(key, "fullnames:")) {
1058 // Return a full name list. Suitable for Bibliography
1060 docstring const kind = operator[](from_ascii(key.substr(10)));
1061 ret = getAuthorList(&buf, kind, true, false, true);
1062 if (ci.forceUpperCase && isLowerCase(ret[0]))
1063 ret[0] = uppercase(ret[0]);
1064 } else if (prefixIs(key, "forceabbrvnames:")) {
1065 // Special key to provide abbreviated name lists,
1066 // irrespective of maxcitenames. Suitable for Bibliography
1068 docstring const kind = operator[](from_ascii(key.substr(15)));
1069 ret = getAuthorList(&buf, kind, false, true, true);
1070 if (ci.forceUpperCase && isLowerCase(ret[0]))
1071 ret[0] = uppercase(ret[0]);
1072 } else if (prefixIs(key, "abbrvbynames:")) {
1073 // Special key to provide abbreviated name list,
1074 // with respect to maxcitenames. Suitable for further names inside a
1075 // bibliography item // (such as "ed. by ...")
1076 docstring const kind = operator[](from_ascii(key.substr(11)));
1077 ret = getAuthorList(&buf, kind, false, false, true, false);
1078 if (ci.forceUpperCase && isLowerCase(ret[0]))
1079 ret[0] = uppercase(ret[0]);
1080 } else if (prefixIs(key, "fullbynames:")) {
1081 // Return a full name list. Suitable for further names inside a
1082 // bibliography item // (such as "ed. by ...")
1083 docstring const kind = operator[](from_ascii(key.substr(10)));
1084 ret = getAuthorList(&buf, kind, true, false, true, false);
1085 if (ci.forceUpperCase && isLowerCase(ret[0]))
1086 ret[0] = uppercase(ret[0]);
1087 } else if (prefixIs(key, "forceabbrvbynames:")) {
1088 // Special key to provide abbreviated name lists,
1089 // irrespective of maxcitenames. Suitable for further names inside a
1090 // bibliography item // (such as "ed. by ...")
1091 docstring const kind = operator[](from_ascii(key.substr(15)));
1092 ret = getAuthorList(&buf, kind, false, true, true, false);
1093 if (ci.forceUpperCase && isLowerCase(ret[0]))
1094 ret[0] = uppercase(ret[0]);
1095 } else if (key == "abbrvciteauthor") {
1096 // Special key to provide abbreviated author or
1097 // editor names (suitable for citation labels),
1098 // with respect to maxcitenames.
1099 ret = getAuthorOrEditorList(&buf, false, false);
1100 if (ci.forceUpperCase && isLowerCase(ret[0]))
1101 ret[0] = uppercase(ret[0]);
1102 } else if (key == "fullciteauthor") {
1103 // Return a full author or editor list (for citation labels)
1104 ret = getAuthorOrEditorList(&buf, true, false);
1105 if (ci.forceUpperCase && isLowerCase(ret[0]))
1106 ret[0] = uppercase(ret[0]);
1107 } else if (key == "forceabbrvciteauthor") {
1108 // Special key to provide abbreviated author or
1109 // editor names (suitable for citation labels),
1110 // irrespective of maxcitenames.
1111 ret = getAuthorOrEditorList(&buf, false, true);
1112 if (ci.forceUpperCase && isLowerCase(ret[0]))
1113 ret[0] = uppercase(ret[0]);
1114 } else if (key == "bibentry") {
1115 // Special key to provide the full bibliography entry: see getInfo()
1116 CiteEngineType const engine_type = buf.params().citeEngineType();
1117 DocumentClass const & dc = buf.params().documentClass();
1118 docstring const & format =
1119 from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_), false));
1121 ret = expandFormat(format, xrefs, counter, buf, ci, false, false);
1122 } else if (key == "textbefore")
1123 ret = ci.textBefore;
1124 else if (key == "textafter")
1126 else if (key == "curpretext")
1127 ret = ci.getPretexts()[bib_key_];
1128 else if (key == "curposttext")
1129 ret = ci.getPosttexts()[bib_key_];
1130 else if (key == "year")
1135 ret = html::cleanAttr(ret);
1137 // make sure it is not too big
1138 support::truncateWithEllipsis(ret, maxsize);
1143 //////////////////////////////////////////////////////////////////////
1147 //////////////////////////////////////////////////////////////////////
1151 // A functor for use with sort, leading to case insensitive sorting
1152 class compareNoCase: public binary_function<docstring, docstring, bool>
1155 bool operator()(docstring const & s1, docstring const & s2) const {
1156 return compare_no_case(s1, s2) < 0;
1163 vector<docstring> const BiblioInfo::getXRefs(BibTeXInfo const & data, bool const nested) const
1165 vector<docstring> result;
1166 if (!data.isBibTeX())
1168 // Legacy crossref field. This is not nestable.
1169 if (!nested && !data["crossref"].empty()) {
1170 docstring const xrefkey = data["crossref"];
1171 result.push_back(xrefkey);
1172 // However, check for nested xdatas
1173 BiblioInfo::const_iterator it = find(xrefkey);
1175 BibTeXInfo const & xref = it->second;
1176 vector<docstring> const nxdata = getXRefs(xref, true);
1177 if (!nxdata.empty())
1178 result.insert(result.end(), nxdata.begin(), nxdata.end());
1181 // Biblatex's xdata field. Infinitely nestable.
1182 // XData field can consist of a comma-separated list of keys
1183 vector<docstring> const xdatakeys = getVectorFromString(data["xdata"]);
1184 if (!xdatakeys.empty()) {
1185 vector<docstring>::const_iterator xit = xdatakeys.begin();
1186 vector<docstring>::const_iterator xen = xdatakeys.end();
1187 for (; xit != xen; ++xit) {
1188 docstring const xdatakey = *xit;
1189 result.push_back(xdatakey);
1190 BiblioInfo::const_iterator it = find(xdatakey);
1192 BibTeXInfo const & xdata = it->second;
1193 vector<docstring> const nxdata = getXRefs(xdata, true);
1194 if (!nxdata.empty())
1195 result.insert(result.end(), nxdata.begin(), nxdata.end());
1203 vector<docstring> const BiblioInfo::getKeys() const
1205 vector<docstring> bibkeys;
1206 BiblioInfo::const_iterator it = begin();
1207 for (; it != end(); ++it)
1208 bibkeys.push_back(it->first);
1209 sort(bibkeys.begin(), bibkeys.end(), compareNoCase());
1214 vector<docstring> const BiblioInfo::getFields() const
1216 vector<docstring> bibfields;
1217 set<docstring>::const_iterator it = field_names_.begin();
1218 set<docstring>::const_iterator end = field_names_.end();
1219 for (; it != end; ++it)
1220 bibfields.push_back(*it);
1221 sort(bibfields.begin(), bibfields.end());
1226 vector<docstring> const BiblioInfo::getEntries() const
1228 vector<docstring> bibentries;
1229 set<docstring>::const_iterator it = entry_types_.begin();
1230 set<docstring>::const_iterator end = entry_types_.end();
1231 for (; it != end; ++it)
1232 bibentries.push_back(*it);
1233 sort(bibentries.begin(), bibentries.end());
1238 docstring const BiblioInfo::getAuthorOrEditorList(docstring const & key, Buffer const & buf) const
1240 BiblioInfo::const_iterator it = find(key);
1243 BibTeXInfo const & data = it->second;
1244 return data.getAuthorOrEditorList(&buf, false);
1248 docstring const BiblioInfo::getCiteNumber(docstring const & key) const
1250 BiblioInfo::const_iterator it = find(key);
1253 BibTeXInfo const & data = it->second;
1254 return data.citeNumber();
1258 docstring const BiblioInfo::getYear(docstring const & key, bool use_modifier) const
1260 BiblioInfo::const_iterator it = find(key);
1263 BibTeXInfo const & data = it->second;
1264 docstring year = data.getYear();
1266 // let's try the crossrefs
1267 vector<docstring> const xrefs = getXRefs(data);
1271 for (docstring const & xref : xrefs) {
1272 BiblioInfo::const_iterator const xrefit = find(xref);
1273 if (xrefit == end())
1275 BibTeXInfo const & xref_data = xrefit->second;
1276 year = xref_data.getYear();
1282 if (use_modifier && data.modifier() != 0)
1283 year += data.modifier();
1288 docstring const BiblioInfo::getYear(docstring const & key, Buffer const & buf, bool use_modifier) const
1290 docstring const year = getYear(key, use_modifier);
1292 return buf.B_("No year");
1297 docstring const BiblioInfo::getInfo(docstring const & key,
1298 Buffer const & buf, CiteItem const & ci) const
1300 BiblioInfo::const_iterator it = find(key);
1302 return docstring(_("Bibliography entry not found!"));
1303 BibTeXInfo const & data = it->second;
1304 BibTeXInfoList xrefptrs;
1305 vector<docstring> const xrefs = getXRefs(data);
1306 for (docstring const & xref : getXRefs(data)) {
1307 BiblioInfo::const_iterator const xrefit = find(xref);
1308 if (xrefit != end())
1309 xrefptrs.push_back(&(xrefit->second));
1311 return data.getInfo(xrefptrs, buf, ci);
1315 docstring const BiblioInfo::getLabel(vector<docstring> keys,
1316 Buffer const & buf, string const & style, CiteItem const & ci) const
1318 size_t max_size = ci.max_size;
1319 // shorter makes no sense
1320 LASSERT(max_size >= 16, max_size = 16);
1322 // we can't display more than 10 of these, anyway
1323 bool const too_many_keys = keys.size() > 10;
1327 CiteEngineType const engine_type = buf.params().citeEngineType();
1328 DocumentClass const & dc = buf.params().documentClass();
1329 docstring const & format = from_utf8(dc.getCiteFormat(engine_type, style, false, "cite"));
1330 docstring ret = format;
1331 vector<docstring>::const_iterator key = keys.begin();
1332 vector<docstring>::const_iterator ken = keys.end();
1333 for (int i = 0; key != ken; ++key, ++i) {
1334 BiblioInfo::const_iterator it = find(*key);
1335 BibTeXInfo empty_data;
1336 empty_data.key(*key);
1337 BibTeXInfo & data = empty_data;
1338 vector<BibTeXInfo const *> xrefptrs;
1341 for (docstring const & xref : getXRefs(data)) {
1342 BiblioInfo::const_iterator const xrefit = find(xref);
1343 if (xrefit != end())
1344 xrefptrs.push_back(&(xrefit->second));
1347 ret = data.getLabel(xrefptrs, buf, ret, ci, key + 1 != ken, i == 1);
1351 ret.push_back(0x2026);//HORIZONTAL ELLIPSIS
1352 support::truncateWithEllipsis(ret, max_size);
1357 bool BiblioInfo::isBibtex(docstring const & key) const
1360 split(key, key1, ',');
1361 BiblioInfo::const_iterator it = find(key1);
1364 return it->second.isBibTeX();
1368 BiblioInfo::CiteStringMap const BiblioInfo::getCiteStrings(
1369 vector<docstring> const & keys, vector<CitationStyle> const & styles,
1370 Buffer const & buf, CiteItem const & ci) const
1373 return vector<pair<docstring,docstring>>();
1376 CiteStringMap csm(styles.size());
1377 for (size_t i = 0; i != csm.size(); ++i) {
1378 style = styles[i].name;
1379 csm[i] = make_pair(from_ascii(style), getLabel(keys, buf, style, ci));
1386 void BiblioInfo::mergeBiblioInfo(BiblioInfo const & info)
1388 bimap_.insert(info.begin(), info.end());
1389 field_names_.insert(info.field_names_.begin(), info.field_names_.end());
1390 entry_types_.insert(info.entry_types_.begin(), info.entry_types_.end());
1396 // used in xhtml to sort a list of BibTeXInfo objects
1397 bool lSorter(BibTeXInfo const * lhs, BibTeXInfo const * rhs)
1399 docstring const lauth = lhs->getAuthorOrEditorList();
1400 docstring const rauth = rhs->getAuthorOrEditorList();
1401 docstring const lyear = lhs->getYear();
1402 docstring const ryear = rhs->getYear();
1403 docstring const ltitl = lhs->operator[]("title");
1404 docstring const rtitl = rhs->operator[]("title");
1405 return (lauth < rauth)
1406 || (lauth == rauth && lyear < ryear)
1407 || (lauth == rauth && lyear == ryear && ltitl < rtitl);
1413 void BiblioInfo::collectCitedEntries(Buffer const & buf)
1415 cited_entries_.clear();
1416 // We are going to collect all the citation keys used in the document,
1417 // getting them from the TOC.
1418 // FIXME We may want to collect these differently, in the first case,
1419 // so that we might have them in order of appearance.
1420 set<docstring> citekeys;
1421 shared_ptr<Toc const> toc = buf.tocBackend().toc("citation");
1422 Toc::const_iterator it = toc->begin();
1423 Toc::const_iterator const en = toc->end();
1424 for (; it != en; ++it) {
1425 if (it->str().empty())
1427 vector<docstring> const keys = getVectorFromString(it->str());
1428 citekeys.insert(keys.begin(), keys.end());
1430 if (citekeys.empty())
1433 // We have a set of the keys used in this document.
1434 // We will now convert it to a list of the BibTeXInfo objects used in
1436 vector<BibTeXInfo const *> bi;
1437 set<docstring>::const_iterator cit = citekeys.begin();
1438 set<docstring>::const_iterator const cen = citekeys.end();
1439 for (; cit != cen; ++cit) {
1440 BiblioInfo::const_iterator const bt = find(*cit);
1441 if (bt == end() || !bt->second.isBibTeX())
1443 bi.push_back(&(bt->second));
1446 sort(bi.begin(), bi.end(), lSorter);
1448 // Now we can write the sorted keys
1449 vector<BibTeXInfo const *>::const_iterator bit = bi.begin();
1450 vector<BibTeXInfo const *>::const_iterator ben = bi.end();
1451 for (; bit != ben; ++bit)
1452 cited_entries_.push_back((*bit)->key());
1456 void BiblioInfo::makeCitationLabels(Buffer const & buf)
1458 collectCitedEntries(buf);
1459 CiteEngineType const engine_type = buf.params().citeEngineType();
1460 bool const numbers = (engine_type & ENGINE_TYPE_NUMERICAL);
1464 // used to remember the last one we saw
1465 // we'll be comparing entries to see if we need to add
1466 // modifiers, like "1984a"
1467 map<docstring, BibTeXInfo>::iterator last = bimap_.end();
1469 vector<docstring>::const_iterator it = cited_entries_.begin();
1470 vector<docstring>::const_iterator const en = cited_entries_.end();
1471 for (; it != en; ++it) {
1472 map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(*it);
1473 // this shouldn't happen, but...
1474 if (biit == bimap_.end())
1475 // ...fail gracefully, anyway.
1477 BibTeXInfo & entry = biit->second;
1479 docstring const num = convert<docstring>(++keynumber);
1480 entry.setCiteNumber(num);
1482 // The first test here is checking whether this is the first
1483 // time through the loop. If so, then we do not have anything
1484 // with which to compare.
1485 if (last != bimap_.end()
1486 && entry.getAuthorOrEditorList() == last->second.getAuthorOrEditorList()
1487 // we access the year via getYear() so as to get it from the xref,
1488 // if we need to do so
1489 && getYear(entry.key()) == getYear(last->second.key())) {
1490 if (modifier == 0) {
1491 // so the last one should have been 'a'
1492 last->second.setModifier('a');
1494 } else if (modifier == 'z')
1501 entry.setModifier(modifier);
1502 // remember the last one
1507 it = cited_entries_.begin();
1508 for (; it != en; ++it) {
1509 map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(*it);
1510 // this shouldn't happen, but...
1511 if (biit == bimap_.end())
1512 // ...fail gracefully, anyway.
1514 BibTeXInfo & entry = biit->second;
1516 entry.label(entry.citeNumber());
1518 docstring const auth = entry.getAuthorOrEditorList(&buf, false);
1519 // we do it this way so as to access the xref, if necessary
1520 // note that this also gives us the modifier
1521 docstring const year = getYear(*it, buf, true);
1522 if (!auth.empty() && !year.empty())
1523 entry.label(auth + ' ' + year);
1525 entry.label(entry.key());
1531 //////////////////////////////////////////////////////////////////////
1535 //////////////////////////////////////////////////////////////////////
1538 CitationStyle citationStyleFromString(string const & command,
1539 BufferParams const & params)
1542 if (command.empty())
1545 string const alias = params.getCiteAlias(command);
1546 string cmd = alias.empty() ? command : alias;
1547 if (isUpperCase(command[0])) {
1548 cs.forceUpperCase = true;
1549 cmd[0] = lowercase(cmd[0]);
1552 size_t const n = command.size() - 1;
1553 if (command[n] == '*') {
1554 cs.hasStarredVersion = true;
1555 if (suffixIs(cmd, '*'))
1556 cmd = cmd.substr(0, cmd.size() - 1);
1564 string citationStyleToString(const CitationStyle & cs, bool const latex)
1566 string cmd = latex ? cs.cmd : cs.name;
1567 if (cs.forceUpperCase)
1568 cmd[0] = uppercase(cmd[0]);
1569 if (cs.hasStarredVersion)