3 * This file is part of LyX, the document processor.
4 * Licence details can be found in the file COPYING.
6 * \author Angus Leeming
10 * Full author contact details are available in file CREDITS.
15 #include "BiblioInfo.h"
17 #include "BufferParams.h"
18 #include "buffer_funcs.h"
20 #include "InsetIterator.h"
21 #include "Paragraph.h"
22 #include "TocBackend.h"
24 #include "insets/Inset.h"
25 #include "insets/InsetBibitem.h"
26 #include "insets/InsetBibtex.h"
27 #include "insets/InsetInclude.h"
29 #include "support/convert.h"
30 #include "support/docstream.h"
31 #include "support/gettext.h"
32 #include "support/lassert.h"
33 #include "support/lstrings.h"
34 #include "support/textutils.h"
36 #include "boost/regex.hpp"
41 using namespace lyx::support;
48 // gets the "family name" from an author-type string
49 docstring familyName(docstring const & name)
54 // first we look for a comma, and take the last name to be everything
55 // preceding the right-most one, so that we also get the "jr" part.
56 docstring::size_type idx = name.rfind(',');
57 if (idx != docstring::npos)
58 return ltrim(name.substr(0, idx));
60 // OK, so now we want to look for the last name. We're going to
61 // include the "von" part. This isn't perfect.
62 // Split on spaces, to get various tokens.
63 vector<docstring> pieces = getVectorFromString(name, from_ascii(" "));
64 // If we only get two, assume the last one is the last name
65 if (pieces.size() <= 2)
68 // Now we look for the first token that begins with a lower case letter.
69 vector<docstring>::const_iterator it = pieces.begin();
70 vector<docstring>::const_iterator en = pieces.end();
71 for (; it != en; ++it) {
72 if ((*it).size() == 0)
74 char_type const c = (*it)[0];
79 if (it == en) // we never found a "von"
82 // reconstruct what we need to return
85 for (; it != en; ++it) {
95 // converts a string containing LaTeX commands into unicode
97 docstring convertLaTeXCommands(docstring const & str)
102 bool scanning_cmd = false;
103 bool scanning_math = false;
104 bool escaped = false; // used to catch \$, etc.
106 char_type const ch = val[0];
108 // if we're scanning math, we output everything until we
109 // find an unescaped $, at which point we break out.
116 scanning_math = false;
122 // if we're scanning a command name, then we just
123 // discard characters until we hit something that
126 if (isAlphaASCII(ch)) {
131 // so we're done with this command.
132 // now we fall through and check this character.
133 scanning_cmd = false;
136 // was the last character a \? If so, then this is something like:
137 // \\ or \$, so we'll just output it. That's probably not always right...
139 // exception: output \, as THIN SPACE
141 ret.push_back(0x2009);
152 scanning_math = true;
156 // we just ignore braces
157 if (ch == '{' || ch == '}') {
162 // we're going to check things that look like commands, so if
163 // this doesn't, just output it.
170 // ok, could be a command of some sort
171 // let's see if it corresponds to some unicode
172 // unicodesymbols has things in the form: \"{u},
173 // whereas we may see things like: \"u. So we'll
174 // look for that and change it, if necessary.
175 static boost::regex const reg("^\\\\\\W\\w");
176 if (boost::regex_search(to_utf8(val), reg)) {
177 val.insert(3, from_ascii("}"));
178 val.insert(2, from_ascii("{"));
181 docstring const cnvtd = Encodings::fromLaTeXCommand(val, rem,
182 Encodings::TEXT_CMD);
183 if (!cnvtd.empty()) {
184 // it did, so we'll take that bit and proceed with what's left
189 // it's a command of some sort
200 //////////////////////////////////////////////////////////////////////
204 //////////////////////////////////////////////////////////////////////
206 BibTeXInfo::BibTeXInfo(docstring const & key, docstring const & type)
207 : is_bibtex_(true), bib_key_(key), entry_type_(type), info_(),
212 docstring const BibTeXInfo::getAbbreviatedAuthor() const
215 docstring const opt = label();
220 docstring const remainder = trim(split(opt, authors, '('));
221 if (remainder.empty())
222 // in this case, we didn't find a "(",
223 // so we don't have author (year)
228 docstring author = convertLaTeXCommands(operator[]("author"));
229 if (author.empty()) {
230 author = convertLaTeXCommands(operator[]("editor"));
235 // OK, we've got some names. Let's format them.
236 // Try to split the author list on " and "
237 vector<docstring> const authors =
238 getVectorFromString(author, from_ascii(" and "));
240 if (authors.size() == 2)
241 return bformat(_("%1$s and %2$s"),
242 familyName(authors[0]), familyName(authors[1]));
244 if (authors.size() > 2)
245 return bformat(_("%1$s et al."), familyName(authors[0]));
247 return familyName(authors[0]);
251 docstring const BibTeXInfo::getYear() const
254 return operator[]("year");
256 docstring const opt = label();
261 docstring tmp = split(opt, authors, '(');
263 // we don't have author (year)
266 tmp = split(tmp, year, ')');
271 docstring const BibTeXInfo::getXRef() const
275 return operator[]("crossref");
279 docstring const & BibTeXInfo::getInfo(BibTeXInfo const * const xref) const
285 BibTeXInfo::const_iterator it = find(from_ascii("ref"));
291 // This could be made a lot better using the entry_type_
292 // field to customize the output based upon entry type.
294 // Search for all possible "required" fields
295 docstring author = getValueForKey("author", xref);
297 author = getValueForKey("editor", xref);
299 docstring year = getValueForKey("year", xref);
300 docstring title = getValueForKey("title", xref);
301 docstring docLoc = getValueForKey("pages", xref);
302 if (docLoc.empty()) {
303 docLoc = getValueForKey("chapter", xref);
305 docLoc = _("Ch. ") + docLoc;
307 docLoc = _("pp. ") + docLoc;
310 docstring media = getValueForKey("journal", xref);
312 media = getValueForKey("publisher", xref);
314 media = getValueForKey("school", xref);
316 media = getValueForKey("institution");
319 docstring volume = getValueForKey("volume", xref);
321 odocstringstream result;
323 result << author << ", ";
327 result << ", " << media;
329 result << " (" << year << ")";
331 result << ", " << docLoc;
333 docstring const result_str = rtrim(result.str());
334 if (!result_str.empty()) {
335 info_ = convertLaTeXCommands(result_str);
339 // This should never happen (or at least be very unusual!)
340 static docstring e = docstring();
345 docstring const & BibTeXInfo::operator[](docstring const & field) const
347 BibTeXInfo::const_iterator it = find(field);
350 static docstring const empty_value = docstring();
355 docstring const & BibTeXInfo::operator[](string const & field) const
357 return operator[](from_ascii(field));
361 docstring BibTeXInfo::getValueForKey(string const & key,
362 BibTeXInfo const * const xref) const
364 docstring const ret = operator[](key);
365 if (!ret.empty() || !xref)
371 //////////////////////////////////////////////////////////////////////
375 //////////////////////////////////////////////////////////////////////
378 // A functor for use with sort, leading to case insensitive sorting
379 class compareNoCase: public binary_function<docstring, docstring, bool>
382 bool operator()(docstring const & s1, docstring const & s2) const {
383 return compare_no_case(s1, s2) < 0;
389 vector<docstring> const BiblioInfo::getKeys() const
391 vector<docstring> bibkeys;
392 BiblioInfo::const_iterator it = begin();
393 for (; it != end(); ++it)
394 bibkeys.push_back(it->first);
395 sort(bibkeys.begin(), bibkeys.end(), compareNoCase());
400 vector<docstring> const BiblioInfo::getFields() const
402 vector<docstring> bibfields;
403 set<docstring>::const_iterator it = field_names_.begin();
404 set<docstring>::const_iterator end = field_names_.end();
405 for (; it != end; ++it)
406 bibfields.push_back(*it);
407 sort(bibfields.begin(), bibfields.end());
412 vector<docstring> const BiblioInfo::getEntries() const
414 vector<docstring> bibentries;
415 set<docstring>::const_iterator it = entry_types_.begin();
416 set<docstring>::const_iterator end = entry_types_.end();
417 for (; it != end; ++it)
418 bibentries.push_back(*it);
419 sort(bibentries.begin(), bibentries.end());
424 docstring const BiblioInfo::getAbbreviatedAuthor(docstring const & key) const
426 BiblioInfo::const_iterator it = find(key);
429 BibTeXInfo const & data = it->second;
430 return data.getAbbreviatedAuthor();
434 docstring const BiblioInfo::getCiteNumber(docstring const & key) const
436 BiblioInfo::const_iterator it = find(key);
439 BibTeXInfo const & data = it->second;
440 return data.citeNumber();
444 docstring const BiblioInfo::getYear(docstring const & key, bool use_modifier) const
446 BiblioInfo::const_iterator it = find(key);
449 BibTeXInfo const & data = it->second;
450 docstring year = data.getYear();
452 // let's try the crossref
453 docstring const xref = data.getXRef();
455 return _("No year"); // no luck
456 BiblioInfo::const_iterator const xrefit = find(xref);
458 return _("No year"); // no luck again
459 BibTeXInfo const & xref_data = xrefit->second;
460 year = xref_data.getYear();
462 if (use_modifier && data.modifier() != 0)
463 year += data.modifier();
468 docstring const BiblioInfo::getInfo(docstring const & key) const
470 BiblioInfo::const_iterator it = find(key);
473 BibTeXInfo const & data = it->second;
474 BibTeXInfo const * xrefptr = 0;
475 docstring const xref = data.getXRef();
477 BiblioInfo::const_iterator const xrefit = find(xref);
479 xrefptr = &(xrefit->second);
481 return data.getInfo(xrefptr);
485 bool BiblioInfo::isBibtex(docstring const & key) const
487 BiblioInfo::const_iterator it = find(key);
490 return it->second.isBibTeX();
495 vector<docstring> const BiblioInfo::getCiteStrings(
496 docstring const & key, Buffer const & buf) const
498 CiteEngine const engine = buf.params().citeEngine();
499 if (engine == ENGINE_BASIC || engine == ENGINE_NATBIB_NUMERICAL)
500 return getNumericalStrings(key, buf);
502 return getAuthorYearStrings(key, buf);
506 vector<docstring> const BiblioInfo::getNumericalStrings(
507 docstring const & key, Buffer const & buf) const
510 return vector<docstring>();
512 docstring const author = getAbbreviatedAuthor(key);
513 docstring const year = getYear(key);
514 if (author.empty() || year.empty())
515 return vector<docstring>();
517 vector<CiteStyle> const & styles = citeStyles(buf.params().citeEngine());
519 vector<docstring> vec(styles.size());
520 for (size_t i = 0; i != vec.size(); ++i) {
526 str = from_ascii("[#ID]");
530 str = _("Add to bibliography only.");
534 str = author + " [#ID]";
538 str = author + " #ID";
542 str = from_ascii("#ID");
554 str = '(' + year + ')';
565 vector<docstring> const BiblioInfo::getAuthorYearStrings(
566 docstring const & key, Buffer const & buf) const
569 return vector<docstring>();
571 docstring const author = getAbbreviatedAuthor(key);
572 docstring const year = getYear(key);
573 if (author.empty() || year.empty())
574 return vector<docstring>();
576 vector<CiteStyle> const & styles = citeStyles(buf.params().citeEngine());
578 vector<docstring> vec(styles.size());
579 for (size_t i = 0; i != vec.size(); ++i) {
584 // jurabib only: Author/Annotator
585 // (i.e. the "before" field, 2nd opt arg)
586 str = author + "/<" + _("before") + '>';
590 str = _("Add to bibliography only.");
594 str = author + " (" + year + ')';
598 str = '(' + author + ", " + year + ')';
602 str = author + ' ' + year ;
606 str = author + ", " + year ;
618 str = '(' + year + ')';
627 void BiblioInfo::mergeBiblioInfo(BiblioInfo const & info)
629 bimap_.insert(info.begin(), info.end());
634 // used in xhtml to sort a list of BibTeXInfo objects
635 bool lSorter(BibTeXInfo const * lhs, BibTeXInfo const * rhs)
637 docstring const lauth = lhs->getAbbreviatedAuthor();
638 docstring const rauth = rhs->getAbbreviatedAuthor();
639 docstring const lyear = lhs->getYear();
640 docstring const ryear = rhs->getYear();
641 docstring const ltitl = lhs->operator[]("title");
642 docstring const rtitl = rhs->operator[]("title");
643 return (lauth < rauth)
644 || (lauth == rauth && lyear < ryear)
645 || (lauth == rauth && lyear == ryear && ltitl < rtitl);
650 void BiblioInfo::collectCitedEntries(Buffer const & buf)
652 cited_entries_.clear();
653 // We are going to collect all the citation keys used in the document,
654 // getting them from the TOC.
655 // FIXME We may want to collect these differently, in the first case,
656 // so that we might have them in order of appearance.
657 set<docstring> citekeys;
658 Toc const & toc = buf.tocBackend().toc("citation");
659 Toc::const_iterator it = toc.begin();
660 Toc::const_iterator const en = toc.end();
661 for (; it != en; ++it) {
662 if (it->str().empty())
664 vector<docstring> const keys = getVectorFromString(it->str());
665 citekeys.insert(keys.begin(), keys.end());
667 if (citekeys.empty())
670 // We have a set of the keys used in this document.
671 // We will now convert it to a list of the BibTeXInfo objects used in
673 vector<BibTeXInfo const *> bi;
674 set<docstring>::const_iterator cit = citekeys.begin();
675 set<docstring>::const_iterator const cen = citekeys.end();
676 for (; cit != cen; ++cit) {
677 BiblioInfo::const_iterator const bt = find(*cit);
678 if (bt == end() || !bt->second.isBibTeX())
680 bi.push_back(&(bt->second));
683 sort(bi.begin(), bi.end(), lSorter);
685 // Now we can write the sorted keys
686 vector<BibTeXInfo const *>::const_iterator bit = bi.begin();
687 vector<BibTeXInfo const *>::const_iterator ben = bi.end();
688 for (; bit != ben; ++bit)
689 cited_entries_.push_back((*bit)->key());
693 void BiblioInfo::makeCitationLabels(Buffer const & buf)
695 collectCitedEntries(buf);
696 CiteEngine const engine = buf.params().citeEngine();
698 (engine == ENGINE_BASIC || engine == ENGINE_NATBIB_NUMERICAL);
702 // used to remember the last one we saw
703 // we'll be comparing entries to see if we need to add
704 // modifiers, like "1984a"
705 map<docstring, BibTeXInfo>::iterator last;
707 vector<docstring>::const_iterator it = cited_entries_.begin();
708 vector<docstring>::const_iterator const en = cited_entries_.end();
709 for (; it != en; ++it) {
710 map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(*it);
711 // this shouldn't happen, but...
712 if (biit == bimap_.end())
713 // ...fail gracefully, anyway.
715 BibTeXInfo & entry = biit->second;
717 docstring const num = convert<docstring>(++keynumber);
718 entry.setCiteNumber(num);
720 if (it != cited_entries_.begin()
721 && entry.getAbbreviatedAuthor() == last->second.getAbbreviatedAuthor()
722 // we access the year via getYear() so as to get it from the xref,
723 // if we need to do so
724 && getYear(entry.key()) == getYear(last->second.key())) {
726 // so the last one should have been 'a'
727 last->second.setModifier('a');
729 } else if (modifier == 'z')
736 entry.setModifier(modifier);
737 // remember the last one
744 //////////////////////////////////////////////////////////////////////
748 //////////////////////////////////////////////////////////////////////
753 char const * const citeCommands[] = {
754 "cite", "citet", "citep", "citealt", "citealp",
755 "citeauthor", "citeyear", "citeyearpar", "nocite" };
757 unsigned int const nCiteCommands =
758 sizeof(citeCommands) / sizeof(char *);
760 CiteStyle const citeStylesArray[] = {
761 CITE, CITET, CITEP, CITEALT, CITEALP,
762 CITEAUTHOR, CITEYEAR, CITEYEARPAR, NOCITE };
764 unsigned int const nCiteStyles =
765 sizeof(citeStylesArray) / sizeof(CiteStyle);
767 CiteStyle const citeStylesFull[] = {
768 CITET, CITEP, CITEALT, CITEALP, CITEAUTHOR };
770 unsigned int const nCiteStylesFull =
771 sizeof(citeStylesFull) / sizeof(CiteStyle);
773 CiteStyle const citeStylesUCase[] = {
774 CITET, CITEP, CITEALT, CITEALP, CITEAUTHOR };
776 unsigned int const nCiteStylesUCase =
777 sizeof(citeStylesUCase) / sizeof(CiteStyle);
782 CitationStyle citationStyleFromString(string const & command)
788 string cmd = command;
790 s.forceUpperCase = true;
794 size_t const n = cmd.size() - 1;
795 if (cmd != "cite" && cmd[n] == '*') {
797 cmd = cmd.substr(0, n);
800 char const * const * const last = citeCommands + nCiteCommands;
801 char const * const * const ptr = find(citeCommands, last, cmd);
804 size_t idx = ptr - citeCommands;
805 s.style = citeStylesArray[idx];
811 string citationStyleToString(const CitationStyle & s)
813 string cite = citeCommands[s.style];
815 CiteStyle const * last = citeStylesFull + nCiteStylesFull;
816 if (std::find(citeStylesFull, last, s.style) != last)
820 if (s.forceUpperCase) {
821 CiteStyle const * last = citeStylesUCase + nCiteStylesUCase;
822 if (std::find(citeStylesUCase, last, s.style) != last)
829 vector<CiteStyle> citeStyles(CiteEngine engine)
831 unsigned int nStyles = 0;
832 unsigned int start = 0;
839 case ENGINE_NATBIB_AUTHORYEAR:
840 case ENGINE_NATBIB_NUMERICAL:
841 nStyles = nCiteStyles - 1;
845 nStyles = nCiteStyles;
850 vector<CiteStyle> styles(nStyles);
853 for (; i != styles.size(); ++i, ++j)
854 styles[i] = citeStylesArray[j];