3 * This file is part of LyX, the document processor.
4 * Licence details can be found in the file COPYING.
6 * \author Angus Leeming
10 * Full author contact details are available in file CREDITS.
15 #include "BiblioInfo.h"
17 #include "BufferParams.h"
18 #include "buffer_funcs.h"
20 #include "InsetIterator.h"
21 #include "Paragraph.h"
22 #include "TextClass.h"
23 #include "TocBackend.h"
25 #include "insets/Inset.h"
26 #include "insets/InsetBibitem.h"
27 #include "insets/InsetBibtex.h"
28 #include "insets/InsetInclude.h"
30 #include "support/convert.h"
31 #include "support/debug.h"
32 #include "support/docstream.h"
33 #include "support/gettext.h"
34 #include "support/lassert.h"
35 #include "support/lstrings.h"
36 #include "support/textutils.h"
38 #include "boost/regex.hpp"
43 using namespace lyx::support;
50 // gets the "family name" from an author-type string
51 docstring familyName(docstring const & name)
56 // first we look for a comma, and take the last name to be everything
57 // preceding the right-most one, so that we also get the "jr" part.
58 docstring::size_type idx = name.rfind(',');
59 if (idx != docstring::npos)
60 return ltrim(name.substr(0, idx));
62 // OK, so now we want to look for the last name. We're going to
63 // include the "von" part. This isn't perfect.
64 // Split on spaces, to get various tokens.
65 vector<docstring> pieces = getVectorFromString(name, from_ascii(" "));
66 // If we only get two, assume the last one is the last name
67 if (pieces.size() <= 2)
70 // Now we look for the first token that begins with a lower case letter.
71 vector<docstring>::const_iterator it = pieces.begin();
72 vector<docstring>::const_iterator en = pieces.end();
73 for (; it != en; ++it) {
74 if ((*it).size() == 0)
76 char_type const c = (*it)[0];
81 if (it == en) // we never found a "von"
84 // reconstruct what we need to return
87 for (; it != en; ++it) {
97 // converts a string containing LaTeX commands into unicode
99 docstring convertLaTeXCommands(docstring const & str)
104 bool scanning_cmd = false;
105 bool scanning_math = false;
106 bool escaped = false; // used to catch \$, etc.
108 char_type const ch = val[0];
110 // if we're scanning math, we output everything until we
111 // find an unescaped $, at which point we break out.
118 scanning_math = false;
124 // if we're scanning a command name, then we just
125 // discard characters until we hit something that
128 if (isAlphaASCII(ch)) {
133 // so we're done with this command.
134 // now we fall through and check this character.
135 scanning_cmd = false;
138 // was the last character a \? If so, then this is something like:
139 // \\ or \$, so we'll just output it. That's probably not always right...
141 // exception: output \, as THIN SPACE
143 ret.push_back(0x2009);
154 scanning_math = true;
158 // we just ignore braces
159 if (ch == '{' || ch == '}') {
164 // we're going to check things that look like commands, so if
165 // this doesn't, just output it.
172 // ok, could be a command of some sort
173 // let's see if it corresponds to some unicode
174 // unicodesymbols has things in the form: \"{u},
175 // whereas we may see things like: \"u. So we'll
176 // look for that and change it, if necessary.
177 static boost::regex const reg("^\\\\\\W\\w");
178 if (boost::regex_search(to_utf8(val), reg)) {
179 val.insert(3, from_ascii("}"));
180 val.insert(2, from_ascii("{"));
183 docstring const cnvtd = Encodings::fromLaTeXCommand(val, rem,
184 Encodings::TEXT_CMD);
185 if (!cnvtd.empty()) {
186 // it did, so we'll take that bit and proceed with what's left
191 // it's a command of some sort
202 //////////////////////////////////////////////////////////////////////
206 //////////////////////////////////////////////////////////////////////
208 BibTeXInfo::BibTeXInfo(docstring const & key, docstring const & type)
209 : is_bibtex_(true), bib_key_(key), entry_type_(type), info_(),
214 docstring const BibTeXInfo::getAbbreviatedAuthor() const
217 docstring const opt = label();
222 docstring const remainder = trim(split(opt, authors, '('));
223 if (remainder.empty())
224 // in this case, we didn't find a "(",
225 // so we don't have author (year)
230 docstring author = convertLaTeXCommands(operator[]("author"));
231 if (author.empty()) {
232 author = convertLaTeXCommands(operator[]("editor"));
237 // FIXME Move this to a separate routine that can
238 // be called from elsewhere.
240 // OK, we've got some names. Let's format them.
241 // Try to split the author list on " and "
242 vector<docstring> const authors =
243 getVectorFromString(author, from_ascii(" and "));
245 if (authors.size() == 2)
246 return bformat(_("%1$s and %2$s"),
247 familyName(authors[0]), familyName(authors[1]));
249 if (authors.size() > 2)
250 return bformat(_("%1$s et al."), familyName(authors[0]));
252 return familyName(authors[0]);
256 docstring const BibTeXInfo::getYear() const
259 return operator[]("year");
261 docstring const opt = label();
266 docstring tmp = split(opt, authors, '(');
268 // we don't have author (year)
271 tmp = split(tmp, year, ')');
276 docstring const BibTeXInfo::getXRef() const
280 return operator[]("crossref");
285 string parseOptions(string const & format, string & optkey,
286 string & ifpart, string & elsepart);
288 // Calls parseOptions to deal with an embedded option, such as:
289 // {%number%[[, no.~%number%]]}
290 // which must appear at the start of format. ifelsepart gets the
291 // whole of the option, and we return what's left after the option.
292 // we return format if there is an error.
293 string parseEmbeddedOption(string const & format, string & ifelsepart)
295 LASSERT(format[0] == '{' && format[1] == '%', return format);
299 string const rest = parseOptions(format, optkey, ifpart, elsepart);
300 if (format == rest) { // parse error
301 LYXERR0("ERROR! Couldn't parse `" << format <<"'.");
304 LASSERT(rest.size() <= format.size(), /* */);
305 ifelsepart = format.substr(0, format.size() - rest.size());
310 // Gets a "clause" from a format string, where the clause is
311 // delimited by '[[' and ']]'. Returns what is left after the
312 // clause is removed, and returns format if there is an error.
313 string getClause(string const & format, string & clause)
318 // we'll remove characters from the front of fmt as we
321 if (fmt[0] == ']' && fmt.size() > 1 && fmt[1] == ']') {
326 // check for an embedded option
327 if (fmt[0] == '{' && fmt.size() > 1 && fmt[1] == '%') {
329 string const rest = parseEmbeddedOption(fmt, part);
331 LYXERR0("ERROR! Couldn't parse embedded option in `" << format <<"'.");
336 } else { // it's just a normal character
345 // parse an options string, which must appear at the start of the
346 // format parameter. puts the parsed bits in optkey, ifpart, and
347 // elsepart and returns what's left after the option is removed.
348 // if there's an error, it returns format itself.
349 string parseOptions(string const & format, string & optkey,
350 string & ifpart, string & elsepart)
352 LASSERT(format[0] == '{' && format[1] == '%', return format);
354 string fmt = format.substr(2);
355 size_t pos = fmt.find('%'); // end of key
356 if (pos == string::npos) {
357 LYXERR0("Error parsing `" << format <<"'. Can't find end of key.");
360 optkey = fmt.substr(0,pos);
361 fmt = fmt.substr(pos + 1);
362 // [[format]] should be next
363 if (fmt[0] != '[' || fmt[1] != '[') {
364 LYXERR0("Error parsing `" << format <<"'. Can't find '[[' after key.");
369 fmt = getClause(curfmt, ifpart);
371 LYXERR0("Error parsing `" << format <<"'. Couldn't get if clause.");
375 if (fmt[0] == '}') // we're done, no else clause
376 return fmt.substr(1);
378 // else part should follow
379 if (fmt[0] != '[' || fmt[1] != '[') {
380 LYXERR0("Error parsing `" << format <<"'. Can't find else clause.");
385 fmt = getClause(curfmt, elsepart);
387 if (fmt == curfmt || fmt[0] != '}') {
388 LYXERR0("Error parsing `" << format <<"'. Can't find end of option.");
391 return fmt.substr(1);
397 docstring BibTeXInfo::expandFormat(string const & format,
398 BibTeXInfo const * const xref, bool richtext) const
400 docstring ret; // return value
402 bool scanning_key = false;
403 bool scanning_rich = false;
406 // we'll remove characters from the front of fmt as we
409 char_type thischar = fmt[0];
410 if (thischar == '%') {
411 // beginning or end of key
414 scanning_key = false;
415 // so we replace the key with its value, which may be empty
416 docstring const val = getValueForKey(key, xref);
424 else if (thischar == '{') {
425 // beginning of option?
427 LYXERR0("ERROR: Found `{' when scanning key in `" << format << "'.");
430 if (fmt.size() > 1) {
432 // it is the beginning of an optional format
436 string const newfmt =
437 parseOptions(fmt, optkey, ifpart, elsepart);
438 if (newfmt == fmt) // parse error
441 docstring const val = getValueForKey(optkey, xref);
443 ret += expandFormat(ifpart, xref, richtext);
444 else if (!elsepart.empty())
445 ret += expandFormat(elsepart, xref, richtext);
446 // fmt will have been shortened for us already
450 // beginning of rich text
451 scanning_rich = true;
456 // we are here if the '{' was at the end of the format. hmm.
459 else if (scanning_rich && thischar == '!'
460 && fmt.size() > 1 && fmt[1] == '}') {
462 scanning_rich = false;
466 else if (scanning_key)
467 key += char(thischar);
468 else if (richtext || !scanning_rich)
470 // else the character is discarded, which will happen only if
471 // richtext == false and we are scanning rich text
475 LYXERR0("Never found end of key in `" << format << "'!");
479 LYXERR0("Never found end of rich text in `" << format << "'!");
486 docstring const & BibTeXInfo::getInfo(BibTeXInfo const * const xref,
487 Buffer const & buf, bool richtext) const
493 BibTeXInfo::const_iterator it = find(from_ascii("ref"));
498 DocumentClass const & dc = buf.params().documentClass();
499 string const & format = dc.getCiteFormat(to_utf8(entry_type_));
500 info_ = expandFormat(format, xref, richtext);
503 info_ = convertLaTeXCommands(info_);
508 docstring const & BibTeXInfo::operator[](docstring const & field) const
510 BibTeXInfo::const_iterator it = find(field);
513 static docstring const empty_value = docstring();
518 docstring const & BibTeXInfo::operator[](string const & field) const
520 return operator[](from_ascii(field));
524 docstring BibTeXInfo::getValueForKey(string const & key,
525 BibTeXInfo const * const xref) const
527 docstring const ret = operator[](key);
528 if (!ret.empty() || !xref)
534 //////////////////////////////////////////////////////////////////////
538 //////////////////////////////////////////////////////////////////////
541 // A functor for use with sort, leading to case insensitive sorting
542 class compareNoCase: public binary_function<docstring, docstring, bool>
545 bool operator()(docstring const & s1, docstring const & s2) const {
546 return compare_no_case(s1, s2) < 0;
552 vector<docstring> const BiblioInfo::getKeys() const
554 vector<docstring> bibkeys;
555 BiblioInfo::const_iterator it = begin();
556 for (; it != end(); ++it)
557 bibkeys.push_back(it->first);
558 sort(bibkeys.begin(), bibkeys.end(), compareNoCase());
563 vector<docstring> const BiblioInfo::getFields() const
565 vector<docstring> bibfields;
566 set<docstring>::const_iterator it = field_names_.begin();
567 set<docstring>::const_iterator end = field_names_.end();
568 for (; it != end; ++it)
569 bibfields.push_back(*it);
570 sort(bibfields.begin(), bibfields.end());
575 vector<docstring> const BiblioInfo::getEntries() const
577 vector<docstring> bibentries;
578 set<docstring>::const_iterator it = entry_types_.begin();
579 set<docstring>::const_iterator end = entry_types_.end();
580 for (; it != end; ++it)
581 bibentries.push_back(*it);
582 sort(bibentries.begin(), bibentries.end());
587 docstring const BiblioInfo::getAbbreviatedAuthor(docstring const & key) const
589 BiblioInfo::const_iterator it = find(key);
592 BibTeXInfo const & data = it->second;
593 return data.getAbbreviatedAuthor();
597 docstring const BiblioInfo::getCiteNumber(docstring const & key) const
599 BiblioInfo::const_iterator it = find(key);
602 BibTeXInfo const & data = it->second;
603 return data.citeNumber();
607 docstring const BiblioInfo::getYear(docstring const & key, bool use_modifier) const
609 BiblioInfo::const_iterator it = find(key);
612 BibTeXInfo const & data = it->second;
613 docstring year = data.getYear();
615 // let's try the crossref
616 docstring const xref = data.getXRef();
618 return _("No year"); // no luck
619 BiblioInfo::const_iterator const xrefit = find(xref);
621 return _("No year"); // no luck again
622 BibTeXInfo const & xref_data = xrefit->second;
623 year = xref_data.getYear();
625 if (use_modifier && data.modifier() != 0)
626 year += data.modifier();
631 docstring const BiblioInfo::getInfo(docstring const & key,
632 Buffer const & buf, bool richtext) const
634 BiblioInfo::const_iterator it = find(key);
637 BibTeXInfo const & data = it->second;
638 BibTeXInfo const * xrefptr = 0;
639 docstring const xref = data.getXRef();
641 BiblioInfo::const_iterator const xrefit = find(xref);
643 xrefptr = &(xrefit->second);
645 return data.getInfo(xrefptr, buf, richtext);
649 bool BiblioInfo::isBibtex(docstring const & key) const
651 BiblioInfo::const_iterator it = find(key);
654 return it->second.isBibTeX();
659 vector<docstring> const BiblioInfo::getCiteStrings(
660 docstring const & key, Buffer const & buf) const
662 CiteEngine const engine = buf.params().citeEngine();
663 if (engine == ENGINE_BASIC || engine == ENGINE_NATBIB_NUMERICAL)
664 return getNumericalStrings(key, buf);
666 return getAuthorYearStrings(key, buf);
670 vector<docstring> const BiblioInfo::getNumericalStrings(
671 docstring const & key, Buffer const & buf) const
674 return vector<docstring>();
676 docstring const author = getAbbreviatedAuthor(key);
677 docstring const year = getYear(key);
678 if (author.empty() || year.empty())
679 return vector<docstring>();
681 vector<CiteStyle> const & styles = citeStyles(buf.params().citeEngine());
683 vector<docstring> vec(styles.size());
684 for (size_t i = 0; i != vec.size(); ++i) {
690 str = from_ascii("[#ID]");
694 str = _("Add to bibliography only.");
698 str = author + " [#ID]";
702 str = author + " #ID";
706 str = from_ascii("#ID");
718 str = '(' + year + ')';
729 vector<docstring> const BiblioInfo::getAuthorYearStrings(
730 docstring const & key, Buffer const & buf) const
733 return vector<docstring>();
735 docstring const author = getAbbreviatedAuthor(key);
736 docstring const year = getYear(key);
737 if (author.empty() || year.empty())
738 return vector<docstring>();
740 vector<CiteStyle> const & styles = citeStyles(buf.params().citeEngine());
742 vector<docstring> vec(styles.size());
743 for (size_t i = 0; i != vec.size(); ++i) {
748 // jurabib only: Author/Annotator
749 // (i.e. the "before" field, 2nd opt arg)
750 str = author + "/<" + _("before") + '>';
754 str = _("Add to bibliography only.");
758 str = author + " (" + year + ')';
762 str = '(' + author + ", " + year + ')';
766 str = author + ' ' + year ;
770 str = author + ", " + year ;
782 str = '(' + year + ')';
791 void BiblioInfo::mergeBiblioInfo(BiblioInfo const & info)
793 bimap_.insert(info.begin(), info.end());
798 // used in xhtml to sort a list of BibTeXInfo objects
799 bool lSorter(BibTeXInfo const * lhs, BibTeXInfo const * rhs)
801 docstring const lauth = lhs->getAbbreviatedAuthor();
802 docstring const rauth = rhs->getAbbreviatedAuthor();
803 docstring const lyear = lhs->getYear();
804 docstring const ryear = rhs->getYear();
805 docstring const ltitl = lhs->operator[]("title");
806 docstring const rtitl = rhs->operator[]("title");
807 return (lauth < rauth)
808 || (lauth == rauth && lyear < ryear)
809 || (lauth == rauth && lyear == ryear && ltitl < rtitl);
814 void BiblioInfo::collectCitedEntries(Buffer const & buf)
816 cited_entries_.clear();
817 // We are going to collect all the citation keys used in the document,
818 // getting them from the TOC.
819 // FIXME We may want to collect these differently, in the first case,
820 // so that we might have them in order of appearance.
821 set<docstring> citekeys;
822 Toc const & toc = buf.tocBackend().toc("citation");
823 Toc::const_iterator it = toc.begin();
824 Toc::const_iterator const en = toc.end();
825 for (; it != en; ++it) {
826 if (it->str().empty())
828 vector<docstring> const keys = getVectorFromString(it->str());
829 citekeys.insert(keys.begin(), keys.end());
831 if (citekeys.empty())
834 // We have a set of the keys used in this document.
835 // We will now convert it to a list of the BibTeXInfo objects used in
837 vector<BibTeXInfo const *> bi;
838 set<docstring>::const_iterator cit = citekeys.begin();
839 set<docstring>::const_iterator const cen = citekeys.end();
840 for (; cit != cen; ++cit) {
841 BiblioInfo::const_iterator const bt = find(*cit);
842 if (bt == end() || !bt->second.isBibTeX())
844 bi.push_back(&(bt->second));
847 sort(bi.begin(), bi.end(), lSorter);
849 // Now we can write the sorted keys
850 vector<BibTeXInfo const *>::const_iterator bit = bi.begin();
851 vector<BibTeXInfo const *>::const_iterator ben = bi.end();
852 for (; bit != ben; ++bit)
853 cited_entries_.push_back((*bit)->key());
857 void BiblioInfo::makeCitationLabels(Buffer const & buf)
859 collectCitedEntries(buf);
860 CiteEngine const engine = buf.params().citeEngine();
862 (engine == ENGINE_BASIC || engine == ENGINE_NATBIB_NUMERICAL);
866 // used to remember the last one we saw
867 // we'll be comparing entries to see if we need to add
868 // modifiers, like "1984a"
869 map<docstring, BibTeXInfo>::iterator last;
871 vector<docstring>::const_iterator it = cited_entries_.begin();
872 vector<docstring>::const_iterator const en = cited_entries_.end();
873 for (; it != en; ++it) {
874 map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(*it);
875 // this shouldn't happen, but...
876 if (biit == bimap_.end())
877 // ...fail gracefully, anyway.
879 BibTeXInfo & entry = biit->second;
881 docstring const num = convert<docstring>(++keynumber);
882 entry.setCiteNumber(num);
884 if (it != cited_entries_.begin()
885 && entry.getAbbreviatedAuthor() == last->second.getAbbreviatedAuthor()
886 // we access the year via getYear() so as to get it from the xref,
887 // if we need to do so
888 && getYear(entry.key()) == getYear(last->second.key())) {
890 // so the last one should have been 'a'
891 last->second.setModifier('a');
893 } else if (modifier == 'z')
900 entry.setModifier(modifier);
901 // remember the last one
908 //////////////////////////////////////////////////////////////////////
912 //////////////////////////////////////////////////////////////////////
917 char const * const citeCommands[] = {
918 "cite", "citet", "citep", "citealt", "citealp",
919 "citeauthor", "citeyear", "citeyearpar", "nocite" };
921 unsigned int const nCiteCommands =
922 sizeof(citeCommands) / sizeof(char *);
924 CiteStyle const citeStylesArray[] = {
925 CITE, CITET, CITEP, CITEALT, CITEALP,
926 CITEAUTHOR, CITEYEAR, CITEYEARPAR, NOCITE };
928 unsigned int const nCiteStyles =
929 sizeof(citeStylesArray) / sizeof(CiteStyle);
931 CiteStyle const citeStylesFull[] = {
932 CITET, CITEP, CITEALT, CITEALP, CITEAUTHOR };
934 unsigned int const nCiteStylesFull =
935 sizeof(citeStylesFull) / sizeof(CiteStyle);
937 CiteStyle const citeStylesUCase[] = {
938 CITET, CITEP, CITEALT, CITEALP, CITEAUTHOR };
940 unsigned int const nCiteStylesUCase =
941 sizeof(citeStylesUCase) / sizeof(CiteStyle);
946 CitationStyle citationStyleFromString(string const & command)
952 string cmd = command;
954 s.forceUpperCase = true;
958 size_t const n = cmd.size() - 1;
959 if (cmd != "cite" && cmd[n] == '*') {
961 cmd = cmd.substr(0, n);
964 char const * const * const last = citeCommands + nCiteCommands;
965 char const * const * const ptr = find(citeCommands, last, cmd);
968 size_t idx = ptr - citeCommands;
969 s.style = citeStylesArray[idx];
975 string citationStyleToString(const CitationStyle & s)
977 string cite = citeCommands[s.style];
979 CiteStyle const * last = citeStylesFull + nCiteStylesFull;
980 if (std::find(citeStylesFull, last, s.style) != last)
984 if (s.forceUpperCase) {
985 CiteStyle const * last = citeStylesUCase + nCiteStylesUCase;
986 if (std::find(citeStylesUCase, last, s.style) != last)
993 vector<CiteStyle> citeStyles(CiteEngine engine)
995 unsigned int nStyles = 0;
996 unsigned int start = 0;
1003 case ENGINE_NATBIB_AUTHORYEAR:
1004 case ENGINE_NATBIB_NUMERICAL:
1005 nStyles = nCiteStyles - 1;
1008 case ENGINE_JURABIB:
1009 nStyles = nCiteStyles;
1014 vector<CiteStyle> styles(nStyles);
1017 for (; i != styles.size(); ++i, ++j)
1018 styles[i] = citeStylesArray[j];