3 * This file is part of LyX, the document processor.
4 * Licence details can be found in the file COPYING.
6 * \author Angus Leeming
10 * \author Jürgen Spitzmüller
12 * Full author contact details are available in file CREDITS.
17 #include "BiblioInfo.h"
19 #include "BufferParams.h"
20 #include "buffer_funcs.h"
23 #include "InsetIterator.h"
25 #include "output_xhtml.h"
26 #include "Paragraph.h"
27 #include "TextClass.h"
28 #include "TocBackend.h"
30 #include "support/convert.h"
31 #include "support/debug.h"
32 #include "support/docstream.h"
33 #include "support/gettext.h"
34 #include "support/lassert.h"
35 #include "support/lstrings.h"
36 #include "support/regex.h"
37 #include "support/textutils.h"
43 using namespace lyx::support;
50 // gets the "prename" and "family name" from an author-type string
51 pair<docstring, docstring> nameParts(docstring const & name)
54 return make_pair(docstring(), docstring());
56 // first we look for a comma, and take the last name to be everything
57 // preceding the right-most one, so that we also get the "jr" part.
58 vector<docstring> pieces = getVectorFromString(name);
59 if (pieces.size() > 1)
60 // whether we have a jr. part or not, it's always
61 // the first and last item (reversed)
62 return make_pair(pieces.back(), pieces.front());
64 // OK, so now we want to look for the last name. We're going to
65 // include the "von" part. This isn't perfect.
66 // Split on spaces, to get various tokens.
67 pieces = getVectorFromString(name, from_ascii(" "));
68 // unusual not to have a space, but could happen
69 if (pieces.size() < 2)
70 return make_pair(from_ascii(""), name);
71 // If we get two, assume the last one is the last name
72 if (pieces.size() == 2)
73 return make_pair(pieces.front(), pieces.back());
75 // Now we look for the first token that begins with
76 // a lower case letter or an opening group {.
78 vector<docstring>::const_iterator it = pieces.begin();
79 vector<docstring>::const_iterator const en = pieces.end();
81 for (; it != en; ++it) {
84 char_type const c = (*it)[0];
85 if (isLower(c) || c == '{')
87 // if this is the last time through the loop, then
88 // what we now have is the last name, so we do not want
89 // to add that to the prename.
92 // add this piece to the prename
100 // reconstruct the family name
101 // note that if we left the loop with because it + 1 == en,
102 // then this will still do the right thing, i.e., make surname
103 // just be the last piece.
106 for (; it != en; ++it) {
113 return make_pair(prename, surname);
117 docstring constructName(docstring const & name, string const scheme)
119 // re-constructs a name from name parts according
121 docstring const prename = nameParts(name).first;
122 docstring const surname = nameParts(name).second;
123 docstring result = from_ascii(scheme);
124 result = subst(result, from_ascii("%prename%"), prename);
125 result = subst(result, from_ascii("%surname%"), surname);
130 bool multipleAuthors(docstring const author)
132 vector<docstring> const authors =
133 getVectorFromString(author, from_ascii(" and "));
134 return authors.size() > 1;
138 // converts a string containing LaTeX commands into unicode
140 docstring convertLaTeXCommands(docstring const & str)
145 bool scanning_cmd = false;
146 bool scanning_math = false;
147 bool escaped = false; // used to catch \$, etc.
148 while (!val.empty()) {
149 char_type const ch = val[0];
151 // if we're scanning math, we output everything until we
152 // find an unescaped $, at which point we break out.
159 scanning_math = false;
165 // if we're scanning a command name, then we just
166 // discard characters until we hit something that
169 if (isAlphaASCII(ch)) {
174 // so we're done with this command.
175 // now we fall through and check this character.
176 scanning_cmd = false;
179 // was the last character a \? If so, then this is something like:
180 // \\ or \$, so we'll just output it. That's probably not always right...
182 // exception: output \, as THIN SPACE
184 ret.push_back(0x2009);
195 scanning_math = true;
199 // we just ignore braces
200 if (ch == '{' || ch == '}') {
205 // we're going to check things that look like commands, so if
206 // this doesn't, just output it.
213 // ok, could be a command of some sort
214 // let's see if it corresponds to some unicode
215 // unicodesymbols has things in the form: \"{u},
216 // whereas we may see things like: \"u. So we'll
217 // look for that and change it, if necessary.
218 // FIXME: This is a sort of mini-tex2lyx.
219 // Use the real tex2lyx instead!
220 static lyx::regex const reg("^\\\\\\W\\w");
221 if (lyx::regex_search(to_utf8(val), reg)) {
222 val.insert(3, from_ascii("}"));
223 val.insert(2, from_ascii("{"));
227 docstring const cnvtd = Encodings::fromLaTeXCommand(val,
228 Encodings::TEXT_CMD, termination, rem);
229 if (!cnvtd.empty()) {
230 // it did, so we'll take that bit and proceed with what's left
235 // it's a command of some sort
244 // Escape '<' and '>' and remove richtext markers (e.g. {!this is richtext!}) from a string.
245 docstring processRichtext(docstring const & str, bool richtext)
250 bool scanning_rich = false;
251 while (!val.empty()) {
252 char_type const ch = val[0];
253 if (ch == '{' && val.size() > 1 && val[1] == '!') {
254 // beginning of rich text
255 scanning_rich = true;
259 if (scanning_rich && ch == '!' && val.size() > 1 && val[1] == '}') {
261 scanning_rich = false;
269 // we need to escape '<' and '>'
277 } else if (!scanning_rich /* && !richtext */)
279 // else the character is discarded, which will happen only if
280 // richtext == false and we are scanning rich text
289 //////////////////////////////////////////////////////////////////////
293 //////////////////////////////////////////////////////////////////////
295 BibTeXInfo::BibTeXInfo(docstring const & key, docstring const & type)
296 : is_bibtex_(true), bib_key_(key), entry_type_(type), info_(),
302 docstring const BibTeXInfo::getAuthorOrEditorList(Buffer const * buf,
303 bool full, bool forceshort) const
305 docstring author = operator[]("author");
307 author = operator[]("editor");
309 return getAuthorList(buf, author, full, forceshort);
313 docstring const BibTeXInfo::getAuthorList(Buffer const * buf,
314 docstring const & author, bool const full, bool const forceshort,
315 bool const allnames, bool const beginning) const
317 // Maxnames treshold depend on engine
318 size_t maxnames = buf ?
319 buf->params().documentClass().max_citenames() : 2;
322 docstring const opt = label();
327 docstring const remainder = trim(split(opt, authors, '('));
328 if (remainder.empty())
329 // in this case, we didn't find a "(",
330 // so we don't have author (year)
338 // FIXME Move this to a separate routine that can
339 // be called from elsewhere.
341 // OK, we've got some names. Let's format them.
342 // Try to split the author list on " and "
343 vector<docstring> const authors =
344 getVectorFromString(author, from_ascii(" and "));
348 CiteEngineType const engine_type = buf ? buf->params().citeEngineType()
349 : ENGINE_TYPE_DEFAULT;
351 // These are defined in the styles
353 buf ? buf->params().documentClass().getCiteMacro(engine_type, "_etal")
355 string const namesep =
356 buf ? buf->params().documentClass().getCiteMacro(engine_type, "_namesep")
358 string const lastnamesep =
359 buf ? buf->params().documentClass().getCiteMacro(engine_type, "_lastnamesep")
361 string const pairnamesep =
362 buf ? buf->params().documentClass().getCiteMacro(engine_type, "_pairnamesep")
364 string firstnameform =
365 buf ? buf->params().documentClass().getCiteMacro(engine_type, "!firstnameform")
366 : "%surname%, %prename%";
368 firstnameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!firstbynameform")
369 : "%prename% %surname%";
370 string othernameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!othernameform")
371 : "%surname%, %prename%";
373 othernameform = buf ? buf->params().documentClass().getCiteMacro(engine_type, "!otherbynameform")
374 : "%prename% %surname%";
376 // Shorten the list (with et al.) if forceshort is set
377 // and the list can actually be shortened, else if maxcitenames
378 // is passed and full is not set.
379 bool shorten = forceshort && authors.size() > 1;
380 vector<docstring>::const_iterator it = authors.begin();
381 vector<docstring>::const_iterator en = authors.end();
382 for (size_t i = 0; it != en; ++it, ++i) {
383 if (i >= maxnames && !full) {
387 if (*it == "others") {
388 retval += buf ? buf->B_(etal) : from_ascii(etal);
391 if (i > 0 && i == authors.size() - 1) {
392 if (authors.size() == 2)
393 retval += buf ? buf->B_(pairnamesep) : from_ascii(pairnamesep);
395 retval += buf ? buf->B_(lastnamesep) : from_ascii(lastnamesep);
397 retval += buf ? buf->B_(namesep) : from_ascii(namesep);
399 retval += (i == 0) ? constructName(*it, firstnameform)
400 : constructName(*it, othernameform);
402 retval += nameParts(*it).second;
406 retval = constructName(authors[0], firstnameform) + (buf ? buf->B_(etal) : from_ascii(etal));
408 retval = nameParts(authors[0]).second + (buf ? buf->B_(etal) : from_ascii(etal));
411 return convertLaTeXCommands(retval);
415 docstring const BibTeXInfo::getYear() const
418 // first try legacy year field
419 docstring year = operator[]("year");
422 // now try biblatex's date field
423 year = operator[]("date");
424 // Format is [-]YYYY-MM-DD*/[-]YYYY-MM-DD*
425 // We only want the years.
426 static regex const yreg("[-]?([\\d]{4}).*");
427 static regex const ereg(".*/[-]?([\\d]{4}).*");
429 string const date = to_utf8(year);
430 if (!regex_match(date, sm, yreg))
431 // cannot parse year.
433 year = from_ascii(sm[1]);
434 // check for an endyear
435 if (regex_match(date, sm, ereg))
436 year += char_type(0x2013) + from_ascii(sm[1]);
440 docstring const opt = label();
445 docstring tmp = split(opt, authors, '(');
447 // we don't have author (year)
450 tmp = split(tmp, year, ')');
457 docstring parseOptions(docstring const & format, string & optkey,
458 docstring & ifpart, docstring & elsepart);
460 // Calls parseOptions to deal with an embedded option, such as:
461 // {%number%[[, no.~%number%]]}
462 // which must appear at the start of format. ifelsepart gets the
463 // whole of the option, and we return what's left after the option.
464 // we return format if there is an error.
465 docstring parseEmbeddedOption(docstring const & format, docstring & ifelsepart)
467 LASSERT(format[0] == '{' && format[1] == '%', return format);
471 docstring const rest = parseOptions(format, optkey, ifpart, elsepart);
472 if (format == rest) { // parse error
473 LYXERR0("ERROR! Couldn't parse `" << format <<"'.");
476 LASSERT(rest.size() <= format.size(),
477 { ifelsepart = docstring(); return format; });
478 ifelsepart = format.substr(0, format.size() - rest.size());
483 // Gets a "clause" from a format string, where the clause is
484 // delimited by '[[' and ']]'. Returns what is left after the
485 // clause is removed, and returns format if there is an error.
486 docstring getClause(docstring const & format, docstring & clause)
488 docstring fmt = format;
491 // we'll remove characters from the front of fmt as we
493 while (!fmt.empty()) {
494 if (fmt[0] == ']' && fmt.size() > 1 && fmt[1] == ']') {
499 // check for an embedded option
500 if (fmt[0] == '{' && fmt.size() > 1 && fmt[1] == '%') {
502 docstring const rest = parseEmbeddedOption(fmt, part);
504 LYXERR0("ERROR! Couldn't parse embedded option in `" << format <<"'.");
509 } else { // it's just a normal character
518 // parse an options string, which must appear at the start of the
519 // format parameter. puts the parsed bits in optkey, ifpart, and
520 // elsepart and returns what's left after the option is removed.
521 // if there's an error, it returns format itself.
522 docstring parseOptions(docstring const & format, string & optkey,
523 docstring & ifpart, docstring & elsepart)
525 LASSERT(format[0] == '{' && format[1] == '%', return format);
527 docstring fmt = format.substr(2);
528 size_t pos = fmt.find('%'); // end of key
529 if (pos == string::npos) {
530 LYXERR0("Error parsing `" << format <<"'. Can't find end of key.");
533 optkey = to_utf8(fmt.substr(0, pos));
534 fmt = fmt.substr(pos + 1);
535 // [[format]] should be next
536 if (fmt[0] != '[' || fmt[1] != '[') {
537 LYXERR0("Error parsing `" << format <<"'. Can't find '[[' after key.");
541 docstring curfmt = fmt;
542 fmt = getClause(curfmt, ifpart);
544 LYXERR0("Error parsing `" << format <<"'. Couldn't get if clause.");
548 if (fmt[0] == '}') // we're done, no else clause
549 return fmt.substr(1);
551 // else part should follow
552 if (fmt[0] != '[' || fmt[1] != '[') {
553 LYXERR0("Error parsing `" << format <<"'. Can't find else clause.");
558 fmt = getClause(curfmt, elsepart);
560 if (fmt == curfmt || fmt[0] != '}') {
561 LYXERR0("Error parsing `" << format <<"'. Can't find end of option.");
564 return fmt.substr(1);
571 Bug #9131 revealed an oddity in how we are generating citation information
572 when more than one key is given. We end up building a longer and longer format
573 string as we go, which we then have to re-parse, over and over and over again,
574 rather than generating the information for the individual keys and then putting
575 all of that together. We do that to deal with the way separators work, from what
576 I can tell, but it still feels like a hack. Fixing this would require quite a
577 bit of work, however.
579 docstring BibTeXInfo::expandFormat(docstring const & format,
580 BibTeXInfoList const xrefs, int & counter, Buffer const & buf,
581 CiteItem const & ci, bool next, bool second) const
583 // incorrect use of macros could put us in an infinite loop
584 static int const max_passes = 5000;
585 // the use of overly large keys can lead to performance problems, due
586 // to eventual attempts to convert LaTeX macros to unicode. See bug
587 // #8944. By default, the size is limited to 128 (in CiteItem), but
588 // for specific purposes (such as XHTML export), it needs to be enlarged
589 // This is perhaps not the best solution, but it will have to do for now.
590 size_t const max_keysize = ci.max_key_size;
591 odocstringstream ret; // return value
593 bool scanning_key = false;
594 bool scanning_rich = false;
596 CiteEngineType const engine_type = buf.params().citeEngineType();
597 docstring fmt = format;
598 // we'll remove characters from the front of fmt as we
600 while (!fmt.empty()) {
601 if (counter > max_passes) {
602 LYXERR0("Recursion limit reached while parsing `"
607 char_type thischar = fmt[0];
608 if (thischar == '%') {
609 // beginning or end of key
612 scanning_key = false;
613 // so we replace the key with its value, which may be empty
617 buf.params().documentClass().getCiteMacro(engine_type, key);
618 fmt = from_utf8(val) + fmt.substr(1);
621 } else if (key[0] == '_') {
622 // a translatable bit
624 buf.params().documentClass().getCiteMacro(engine_type, key);
625 docstring const trans =
626 translateIfPossible(from_utf8(val), buf.params().language->code());
629 docstring const val =
630 getValueForKey(key, buf, ci, xrefs, max_keysize);
632 ret << from_ascii("{!<span class=\"bib-" + key + "\">!}");
635 ret << from_ascii("{!</span>!}");
643 else if (thischar == '{') {
644 // beginning of option?
646 LYXERR0("ERROR: Found `{' when scanning key in `" << format << "'.");
649 if (fmt.size() > 1) {
651 // it is the beginning of an optional format
655 docstring const newfmt =
656 parseOptions(fmt, optkey, ifpart, elsepart);
657 if (newfmt == fmt) // parse error
660 docstring const val =
661 getValueForKey(optkey, buf, ci, xrefs);
662 if (optkey == "next" && next)
663 ret << ifpart; // without expansion
664 else if (optkey == "second" && second) {
666 ret << expandFormat(ifpart, xrefs, newcounter, buf,
668 } else if (!val.empty()) {
670 ret << expandFormat(ifpart, xrefs, newcounter, buf,
672 } else if (!elsepart.empty()) {
674 ret << expandFormat(elsepart, xrefs, newcounter, buf,
677 // fmt will have been shortened for us already
681 // beginning of rich text
682 scanning_rich = true;
684 ret << from_ascii("{!");
688 // we are here if '{' was not followed by % or !.
689 // So it's just a character.
692 else if (scanning_rich && thischar == '!'
693 && fmt.size() > 1 && fmt[1] == '}') {
695 scanning_rich = false;
697 ret << from_ascii("!}");
700 else if (scanning_key)
701 key += char(thischar);
705 } catch (EncodingException & /* e */) {
706 LYXERR0("Uncodable character '" << docstring(1, thischar) << " in citation label!");
712 LYXERR0("Never found end of key in `" << format << "'!");
716 LYXERR0("Never found end of rich text in `" << format << "'!");
723 docstring const & BibTeXInfo::getInfo(BibTeXInfoList const xrefs,
724 Buffer const & buf, CiteItem const & ci) const
726 bool const richtext = ci.richtext;
728 if (!richtext && !info_.empty())
730 if (richtext && !info_richtext_.empty())
731 return info_richtext_;
734 BibTeXInfo::const_iterator it = find(from_ascii("ref"));
739 CiteEngineType const engine_type = buf.params().citeEngineType();
740 DocumentClass const & dc = buf.params().documentClass();
741 docstring const & format =
742 from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_)));
744 info_ = expandFormat(format, xrefs, counter, buf,
748 // this probably shouldn't happen
753 info_richtext_ = convertLaTeXCommands(processRichtext(info_, true));
754 return info_richtext_;
757 info_ = convertLaTeXCommands(processRichtext(info_, false));
762 docstring const BibTeXInfo::getLabel(BibTeXInfoList const xrefs,
763 Buffer const & buf, docstring const & format,
764 CiteItem const & ci, bool next, bool second) const
769 loclabel = expandFormat(format, xrefs, counter, buf, ci, next, second);
771 if (!loclabel.empty() && !next) {
772 loclabel = processRichtext(loclabel, ci.richtext);
773 loclabel = convertLaTeXCommands(loclabel);
780 docstring const & BibTeXInfo::operator[](docstring const & field) const
782 BibTeXInfo::const_iterator it = find(field);
785 static docstring const empty_value = docstring();
790 docstring const & BibTeXInfo::operator[](string const & field) const
792 return operator[](from_ascii(field));
796 docstring BibTeXInfo::getValueForKey(string const & oldkey, Buffer const & buf,
797 CiteItem const & ci, BibTeXInfoList const xrefs, size_t maxsize) const
799 // anything less is pointless
800 LASSERT(maxsize >= 16, maxsize = 16);
802 bool cleanit = false;
803 if (prefixIs(oldkey, "clean:")) {
804 key = oldkey.substr(6);
808 docstring ret = operator[](key);
809 if (ret.empty() && !xrefs.empty()) {
810 vector<BibTeXInfo const *>::const_iterator it = xrefs.begin();
811 vector<BibTeXInfo const *>::const_iterator en = xrefs.end();
812 for (; it != en; ++it) {
813 if (*it && !(**it)[key].empty()) {
821 // FIXME: dialog, textbefore and textafter have nothing to do with this
822 if (key == "dialog" && ci.context == CiteItem::Dialog)
823 ret = from_ascii("x"); // any non-empty string will do
824 else if (key == "export" && ci.context == CiteItem::Export)
825 ret = from_ascii("x"); // any non-empty string will do
826 else if (key == "ifstar" && ci.Starred)
827 ret = from_ascii("x"); // any non-empty string will do
828 else if (key == "ifqualified" && ci.isQualified)
829 ret = from_ascii("x"); // any non-empty string will do
830 else if (key == "entrytype")
832 else if (prefixIs(key, "ifentrytype:")
833 && from_ascii(key.substr(12)) == entry_type_)
834 ret = from_ascii("x"); // any non-empty string will do
835 else if (key == "key")
837 else if (key == "label")
839 else if (key == "modifier" && modifier_ != 0)
841 else if (key == "numericallabel")
843 else if (prefixIs(key, "ifmultiple:")) {
844 // Return whether we have multiple authors
845 docstring const kind = operator[](from_ascii(key.substr(11)));
846 if (multipleAuthors(kind))
847 ret = from_ascii("x"); // any non-empty string will do
849 else if (prefixIs(key, "abbrvnames:")) {
850 // Special key to provide abbreviated name list,
851 // with respect to maxcitenames. Suitable for Bibliography
853 docstring const kind = operator[](from_ascii(key.substr(11)));
854 ret = getAuthorList(&buf, kind, false, false, true);
855 if (ci.forceUpperCase && isLowerCase(ret[0]))
856 ret[0] = uppercase(ret[0]);
857 } else if (prefixIs(key, "fullnames:")) {
858 // Return a full name list. Suitable for Bibliography
860 docstring const kind = operator[](from_ascii(key.substr(10)));
861 ret = getAuthorList(&buf, kind, true, false, true);
862 if (ci.forceUpperCase && isLowerCase(ret[0]))
863 ret[0] = uppercase(ret[0]);
864 } else if (prefixIs(key, "forceabbrvnames:")) {
865 // Special key to provide abbreviated name lists,
866 // irrespective of maxcitenames. Suitable for Bibliography
868 docstring const kind = operator[](from_ascii(key.substr(15)));
869 ret = getAuthorList(&buf, kind, false, true, true);
870 if (ci.forceUpperCase && isLowerCase(ret[0]))
871 ret[0] = uppercase(ret[0]);
872 } else if (prefixIs(key, "abbrvbynames:")) {
873 // Special key to provide abbreviated name list,
874 // with respect to maxcitenames. Suitable for further names inside a
875 // bibliography item // (such as "ed. by ...")
876 docstring const kind = operator[](from_ascii(key.substr(11)));
877 ret = getAuthorList(&buf, kind, false, false, true, false);
878 if (ci.forceUpperCase && isLowerCase(ret[0]))
879 ret[0] = uppercase(ret[0]);
880 } else if (prefixIs(key, "fullbynames:")) {
881 // Return a full name list. Suitable for further names inside a
882 // bibliography item // (such as "ed. by ...")
883 docstring const kind = operator[](from_ascii(key.substr(10)));
884 ret = getAuthorList(&buf, kind, true, false, true, false);
885 if (ci.forceUpperCase && isLowerCase(ret[0]))
886 ret[0] = uppercase(ret[0]);
887 } else if (prefixIs(key, "forceabbrvbynames:")) {
888 // Special key to provide abbreviated name lists,
889 // irrespective of maxcitenames. Suitable for further names inside a
890 // bibliography item // (such as "ed. by ...")
891 docstring const kind = operator[](from_ascii(key.substr(15)));
892 ret = getAuthorList(&buf, kind, false, true, true, false);
893 if (ci.forceUpperCase && isLowerCase(ret[0]))
894 ret[0] = uppercase(ret[0]);
895 } else if (key == "abbrvciteauthor") {
896 // Special key to provide abbreviated author or
897 // editor names (suitable for citation labels),
898 // with respect to maxcitenames.
899 ret = getAuthorOrEditorList(&buf, false, false);
900 if (ci.forceUpperCase && isLowerCase(ret[0]))
901 ret[0] = uppercase(ret[0]);
902 } else if (key == "fullciteauthor") {
903 // Return a full author or editor list (for citation labels)
904 ret = getAuthorOrEditorList(&buf, true, false);
905 if (ci.forceUpperCase && isLowerCase(ret[0]))
906 ret[0] = uppercase(ret[0]);
907 } else if (key == "forceabbrvciteauthor") {
908 // Special key to provide abbreviated author or
909 // editor names (suitable for citation labels),
910 // irrespective of maxcitenames.
911 ret = getAuthorOrEditorList(&buf, false, true);
912 if (ci.forceUpperCase && isLowerCase(ret[0]))
913 ret[0] = uppercase(ret[0]);
914 } else if (key == "bibentry") {
915 // Special key to provide the full bibliography entry: see getInfo()
916 CiteEngineType const engine_type = buf.params().citeEngineType();
917 DocumentClass const & dc = buf.params().documentClass();
918 docstring const & format =
919 from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_), false));
921 ret = expandFormat(format, xrefs, counter, buf, ci, false, false);
922 } else if (key == "textbefore")
924 else if (key == "textafter")
926 else if (key == "curpretext")
927 ret = ci.getPretexts()[bib_key_];
928 else if (key == "curposttext")
929 ret = ci.getPosttexts()[bib_key_];
930 else if (key == "year")
935 ret = html::cleanAttr(ret);
937 // make sure it is not too big
938 support::truncateWithEllipsis(ret, maxsize);
943 //////////////////////////////////////////////////////////////////////
947 //////////////////////////////////////////////////////////////////////
951 // A functor for use with sort, leading to case insensitive sorting
952 class compareNoCase: public binary_function<docstring, docstring, bool>
955 bool operator()(docstring const & s1, docstring const & s2) const {
956 return compare_no_case(s1, s2) < 0;
963 vector<docstring> const BiblioInfo::getXRefs(BibTeXInfo const & data, bool const nested) const
965 vector<docstring> result;
966 if (!data.isBibTeX())
968 // Legacy crossref field. This is not nestable.
969 if (!nested && !data["crossref"].empty()) {
970 docstring const xrefkey = data["crossref"];
971 result.push_back(xrefkey);
972 // However, check for nested xdatas
973 BiblioInfo::const_iterator it = find(xrefkey);
975 BibTeXInfo const & xref = it->second;
976 vector<docstring> const nxdata = getXRefs(xref, true);
978 result.insert(result.end(), nxdata.begin(), nxdata.end());
981 // Biblatex's xdata field. Infinitely nestable.
982 // XData field can consist of a comma-separated list of keys
983 vector<docstring> const xdatakeys = getVectorFromString(data["xdata"]);
984 if (!xdatakeys.empty()) {
985 vector<docstring>::const_iterator xit = xdatakeys.begin();
986 vector<docstring>::const_iterator xen = xdatakeys.end();
987 for (; xit != xen; ++xit) {
988 docstring const xdatakey = *xit;
989 result.push_back(xdatakey);
990 BiblioInfo::const_iterator it = find(xdatakey);
992 BibTeXInfo const & xdata = it->second;
993 vector<docstring> const nxdata = getXRefs(xdata, true);
995 result.insert(result.end(), nxdata.begin(), nxdata.end());
1003 vector<docstring> const BiblioInfo::getKeys() const
1005 vector<docstring> bibkeys;
1006 BiblioInfo::const_iterator it = begin();
1007 for (; it != end(); ++it)
1008 bibkeys.push_back(it->first);
1009 sort(bibkeys.begin(), bibkeys.end(), compareNoCase());
1014 vector<docstring> const BiblioInfo::getFields() const
1016 vector<docstring> bibfields;
1017 set<docstring>::const_iterator it = field_names_.begin();
1018 set<docstring>::const_iterator end = field_names_.end();
1019 for (; it != end; ++it)
1020 bibfields.push_back(*it);
1021 sort(bibfields.begin(), bibfields.end());
1026 vector<docstring> const BiblioInfo::getEntries() const
1028 vector<docstring> bibentries;
1029 set<docstring>::const_iterator it = entry_types_.begin();
1030 set<docstring>::const_iterator end = entry_types_.end();
1031 for (; it != end; ++it)
1032 bibentries.push_back(*it);
1033 sort(bibentries.begin(), bibentries.end());
1038 docstring const BiblioInfo::getAuthorOrEditorList(docstring const & key, Buffer const & buf) const
1040 BiblioInfo::const_iterator it = find(key);
1043 BibTeXInfo const & data = it->second;
1044 return data.getAuthorOrEditorList(&buf, false);
1048 docstring const BiblioInfo::getCiteNumber(docstring const & key) const
1050 BiblioInfo::const_iterator it = find(key);
1053 BibTeXInfo const & data = it->second;
1054 return data.citeNumber();
1058 docstring const BiblioInfo::getYear(docstring const & key, bool use_modifier) const
1060 BiblioInfo::const_iterator it = find(key);
1063 BibTeXInfo const & data = it->second;
1064 docstring year = data.getYear();
1066 // let's try the crossrefs
1067 vector<docstring> const xrefs = getXRefs(data);
1071 vector<docstring>::const_iterator it = xrefs.begin();
1072 vector<docstring>::const_iterator en = xrefs.end();
1073 for (; it != en; ++it) {
1074 BiblioInfo::const_iterator const xrefit = find(*it);
1075 if (xrefit == end())
1077 BibTeXInfo const & xref_data = xrefit->second;
1078 year = xref_data.getYear();
1084 if (use_modifier && data.modifier() != 0)
1085 year += data.modifier();
1090 docstring const BiblioInfo::getYear(docstring const & key, Buffer const & buf, bool use_modifier) const
1092 docstring const year = getYear(key, use_modifier);
1094 return buf.B_("No year");
1099 docstring const BiblioInfo::getInfo(docstring const & key,
1100 Buffer const & buf, CiteItem const & ci) const
1102 BiblioInfo::const_iterator it = find(key);
1104 return docstring(_("Bibliography entry not found!"));
1105 BibTeXInfo const & data = it->second;
1106 BibTeXInfoList xrefptrs;
1107 vector<docstring> const xrefs = getXRefs(data);
1108 if (!xrefs.empty()) {
1109 vector<docstring>::const_iterator it = xrefs.begin();
1110 vector<docstring>::const_iterator en = xrefs.end();
1111 for (; it != en; ++it) {
1112 BiblioInfo::const_iterator const xrefit = find(*it);
1113 if (xrefit != end())
1114 xrefptrs.push_back(&(xrefit->second));
1117 return data.getInfo(xrefptrs, buf, ci);
1121 docstring const BiblioInfo::getLabel(vector<docstring> keys,
1122 Buffer const & buf, string const & style, CiteItem const & ci) const
1124 size_t max_size = ci.max_size;
1125 // shorter makes no sense
1126 LASSERT(max_size >= 16, max_size = 16);
1128 // we can't display more than 10 of these, anyway
1129 bool const too_many_keys = keys.size() > 10;
1133 CiteEngineType const engine_type = buf.params().citeEngineType();
1134 DocumentClass const & dc = buf.params().documentClass();
1135 docstring const & format = from_utf8(dc.getCiteFormat(engine_type, style, false, "cite"));
1136 docstring ret = format;
1137 vector<docstring>::const_iterator key = keys.begin();
1138 vector<docstring>::const_iterator ken = keys.end();
1139 for (int i = 0; key != ken; ++key, ++i) {
1140 BiblioInfo::const_iterator it = find(*key);
1141 BibTeXInfo empty_data;
1142 empty_data.key(*key);
1143 BibTeXInfo & data = empty_data;
1144 vector<BibTeXInfo const *> xrefptrs;
1147 vector<docstring> const xrefs = getXRefs(data);
1148 if (!xrefs.empty()) {
1149 vector<docstring>::const_iterator it = xrefs.begin();
1150 vector<docstring>::const_iterator en = xrefs.end();
1151 for (; it != en; ++it) {
1152 BiblioInfo::const_iterator const xrefit = find(*it);
1153 if (xrefit != end())
1154 xrefptrs.push_back(&(xrefit->second));
1158 ret = data.getLabel(xrefptrs, buf, ret, ci, key + 1 != ken, i == 1);
1162 ret.push_back(0x2026);//HORIZONTAL ELLIPSIS
1163 support::truncateWithEllipsis(ret, max_size);
1168 bool BiblioInfo::isBibtex(docstring const & key) const
1171 split(key, key1, ',');
1172 BiblioInfo::const_iterator it = find(key1);
1175 return it->second.isBibTeX();
1179 vector<docstring> const BiblioInfo::getCiteStrings(
1180 vector<docstring> const & keys, vector<CitationStyle> const & styles,
1181 Buffer const & buf, CiteItem const & ci) const
1184 return vector<docstring>();
1187 vector<docstring> vec(styles.size());
1188 for (size_t i = 0; i != vec.size(); ++i) {
1189 style = styles[i].name;
1190 vec[i] = getLabel(keys, buf, style, ci);
1197 void BiblioInfo::mergeBiblioInfo(BiblioInfo const & info)
1199 bimap_.insert(info.begin(), info.end());
1200 field_names_.insert(info.field_names_.begin(), info.field_names_.end());
1201 entry_types_.insert(info.entry_types_.begin(), info.entry_types_.end());
1207 // used in xhtml to sort a list of BibTeXInfo objects
1208 bool lSorter(BibTeXInfo const * lhs, BibTeXInfo const * rhs)
1210 docstring const lauth = lhs->getAuthorOrEditorList();
1211 docstring const rauth = rhs->getAuthorOrEditorList();
1212 docstring const lyear = lhs->getYear();
1213 docstring const ryear = rhs->getYear();
1214 docstring const ltitl = lhs->operator[]("title");
1215 docstring const rtitl = rhs->operator[]("title");
1216 return (lauth < rauth)
1217 || (lauth == rauth && lyear < ryear)
1218 || (lauth == rauth && lyear == ryear && ltitl < rtitl);
1224 void BiblioInfo::collectCitedEntries(Buffer const & buf)
1226 cited_entries_.clear();
1227 // We are going to collect all the citation keys used in the document,
1228 // getting them from the TOC.
1229 // FIXME We may want to collect these differently, in the first case,
1230 // so that we might have them in order of appearance.
1231 set<docstring> citekeys;
1232 shared_ptr<Toc const> toc = buf.tocBackend().toc("citation");
1233 Toc::const_iterator it = toc->begin();
1234 Toc::const_iterator const en = toc->end();
1235 for (; it != en; ++it) {
1236 if (it->str().empty())
1238 vector<docstring> const keys = getVectorFromString(it->str());
1239 citekeys.insert(keys.begin(), keys.end());
1241 if (citekeys.empty())
1244 // We have a set of the keys used in this document.
1245 // We will now convert it to a list of the BibTeXInfo objects used in
1247 vector<BibTeXInfo const *> bi;
1248 set<docstring>::const_iterator cit = citekeys.begin();
1249 set<docstring>::const_iterator const cen = citekeys.end();
1250 for (; cit != cen; ++cit) {
1251 BiblioInfo::const_iterator const bt = find(*cit);
1252 if (bt == end() || !bt->second.isBibTeX())
1254 bi.push_back(&(bt->second));
1257 sort(bi.begin(), bi.end(), lSorter);
1259 // Now we can write the sorted keys
1260 vector<BibTeXInfo const *>::const_iterator bit = bi.begin();
1261 vector<BibTeXInfo const *>::const_iterator ben = bi.end();
1262 for (; bit != ben; ++bit)
1263 cited_entries_.push_back((*bit)->key());
1267 void BiblioInfo::makeCitationLabels(Buffer const & buf)
1269 collectCitedEntries(buf);
1270 CiteEngineType const engine_type = buf.params().citeEngineType();
1271 bool const numbers = (engine_type & ENGINE_TYPE_NUMERICAL);
1275 // used to remember the last one we saw
1276 // we'll be comparing entries to see if we need to add
1277 // modifiers, like "1984a"
1278 map<docstring, BibTeXInfo>::iterator last = bimap_.end();
1280 vector<docstring>::const_iterator it = cited_entries_.begin();
1281 vector<docstring>::const_iterator const en = cited_entries_.end();
1282 for (; it != en; ++it) {
1283 map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(*it);
1284 // this shouldn't happen, but...
1285 if (biit == bimap_.end())
1286 // ...fail gracefully, anyway.
1288 BibTeXInfo & entry = biit->second;
1290 docstring const num = convert<docstring>(++keynumber);
1291 entry.setCiteNumber(num);
1293 // The first test here is checking whether this is the first
1294 // time through the loop. If so, then we do not have anything
1295 // with which to compare.
1296 if (last != bimap_.end()
1297 && entry.getAuthorOrEditorList() == last->second.getAuthorOrEditorList()
1298 // we access the year via getYear() so as to get it from the xref,
1299 // if we need to do so
1300 && getYear(entry.key()) == getYear(last->second.key())) {
1301 if (modifier == 0) {
1302 // so the last one should have been 'a'
1303 last->second.setModifier('a');
1305 } else if (modifier == 'z')
1312 entry.setModifier(modifier);
1313 // remember the last one
1318 it = cited_entries_.begin();
1319 for (; it != en; ++it) {
1320 map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(*it);
1321 // this shouldn't happen, but...
1322 if (biit == bimap_.end())
1323 // ...fail gracefully, anyway.
1325 BibTeXInfo & entry = biit->second;
1327 entry.label(entry.citeNumber());
1329 docstring const auth = entry.getAuthorOrEditorList(&buf, false);
1330 // we do it this way so as to access the xref, if necessary
1331 // note that this also gives us the modifier
1332 docstring const year = getYear(*it, buf, true);
1333 if (!auth.empty() && !year.empty())
1334 entry.label(auth + ' ' + year);
1336 entry.label(entry.key());
1342 //////////////////////////////////////////////////////////////////////
1346 //////////////////////////////////////////////////////////////////////
1349 CitationStyle citationStyleFromString(string const & command,
1350 BufferParams const & params)
1353 if (command.empty())
1356 string const alias = params.getCiteAlias(command);
1357 string cmd = alias.empty() ? command : alias;
1358 if (isUpperCase(command[0])) {
1359 cs.forceUpperCase = true;
1360 cmd[0] = lowercase(cmd[0]);
1363 size_t const n = command.size() - 1;
1364 if (command[n] == '*') {
1365 cs.hasStarredVersion = true;
1366 if (suffixIs(cmd, '*'))
1367 cmd = cmd.substr(0, cmd.size() - 1);
1375 string citationStyleToString(const CitationStyle & cs, bool const latex)
1377 string cmd = latex ? cs.cmd : cs.name;
1378 if (cs.forceUpperCase)
1379 cmd[0] = uppercase(cmd[0]);
1380 if (cs.hasStarredVersion)