3 * This file is part of LyX, the document processor.
4 * Licence details can be found in the file COPYING.
6 * \author Angus Leeming
10 * Full author contact details are available in file CREDITS.
15 #include "BiblioInfo.h"
17 #include "BufferParams.h"
18 #include "buffer_funcs.h"
20 #include "InsetIterator.h"
21 #include "Paragraph.h"
22 #include "TocBackend.h"
24 #include "insets/Inset.h"
25 #include "insets/InsetBibitem.h"
26 #include "insets/InsetBibtex.h"
27 #include "insets/InsetInclude.h"
29 #include "support/docstream.h"
30 #include "support/gettext.h"
31 #include "support/lassert.h"
32 #include "support/lstrings.h"
33 #include "support/textutils.h"
35 #include "boost/regex.hpp"
40 using namespace lyx::support;
47 // gets the "family name" from an author-type string
48 docstring familyName(docstring const & name)
53 // first we look for a comma, and take the last name to be everything
54 // preceding the right-most one, so that we also get the "jr" part.
55 docstring::size_type idx = name.rfind(',');
56 if (idx != docstring::npos)
57 return ltrim(name.substr(0, idx));
59 // OK, so now we want to look for the last name. We're going to
60 // include the "von" part. This isn't perfect.
61 // Split on spaces, to get various tokens.
62 vector<docstring> pieces = getVectorFromString(name, from_ascii(" "));
63 // If we only get two, assume the last one is the last name
64 if (pieces.size() <= 2)
67 // Now we look for the first token that begins with a lower case letter.
68 vector<docstring>::const_iterator it = pieces.begin();
69 vector<docstring>::const_iterator en = pieces.end();
70 for (; it != en; ++it) {
71 if ((*it).size() == 0)
73 char_type const c = (*it)[0];
78 if (it == en) // we never found a "von"
81 // reconstruct what we need to return
84 for (; it != en; ++it) {
94 // converts a string containing LaTeX commands into unicode
96 docstring convertLaTeXCommands(docstring const & str)
101 bool scanning_cmd = false;
102 bool scanning_math = false;
103 bool escaped = false; // used to catch \$, etc.
105 char_type const ch = val[0];
107 // if we're scanning math, we output everything until we
108 // find an unescaped $, at which point we break out.
115 scanning_math = false;
121 // if we're scanning a command name, then we just
122 // discard characters until we hit something that
125 if (isAlphaASCII(ch)) {
130 // so we're done with this command.
131 // now we fall through and check this character.
132 scanning_cmd = false;
135 // was the last character a \? If so, then this is something like:
136 // \\ or \$, so we'll just output it. That's probably not always right...
138 // exception: output \, as THIN SPACE
140 ret.push_back(0x2009);
151 scanning_math = true;
155 // we just ignore braces
156 if (ch == '{' || ch == '}') {
161 // we're going to check things that look like commands, so if
162 // this doesn't, just output it.
169 // ok, could be a command of some sort
170 // let's see if it corresponds to some unicode
171 // unicodesymbols has things in the form: \"{u},
172 // whereas we may see things like: \"u. So we'll
173 // look for that and change it, if necessary.
174 static boost::regex const reg("^\\\\\\W\\w");
175 if (boost::regex_search(to_utf8(val), reg)) {
176 val.insert(3, from_ascii("}"));
177 val.insert(2, from_ascii("{"));
180 docstring const cnvtd = Encodings::fromLaTeXCommand(val, rem,
181 Encodings::TEXT_CMD);
182 if (!cnvtd.empty()) {
183 // it did, so we'll take that bit and proceed with what's left
188 // it's a command of some sort
199 //////////////////////////////////////////////////////////////////////
203 //////////////////////////////////////////////////////////////////////
205 BibTeXInfo::BibTeXInfo(docstring const & key, docstring const & type)
206 : is_bibtex_(true), bib_key_(key), entry_type_(type), info_()
210 bool BibTeXInfo::hasField(docstring const & field) const
212 return count(field) == 1;
216 docstring const BibTeXInfo::getAbbreviatedAuthor() const
219 docstring const opt = label();
224 split(opt, authors, '(');
228 docstring author = convertLaTeXCommands(operator[]("author"));
229 if (author.empty()) {
230 author = convertLaTeXCommands(operator[]("editor"));
235 // OK, we've got some names. Let's format them.
236 // Try to split the author list on " and "
237 vector<docstring> const authors =
238 getVectorFromString(author, from_ascii(" and "));
240 if (authors.size() == 2)
241 return bformat(_("%1$s and %2$s"),
242 familyName(authors[0]), familyName(authors[1]));
244 if (authors.size() > 2)
245 return bformat(_("%1$s et al."), familyName(authors[0]));
247 return familyName(authors[0]);
251 docstring const BibTeXInfo::getYear() const
254 return operator[]("year");
256 docstring const opt = label();
261 docstring const tmp = split(opt, authors, '(');
263 split(tmp, year, ')');
268 docstring const BibTeXInfo::getXRef() const
272 return operator[]("crossref");
276 docstring const & BibTeXInfo::getInfo(BibTeXInfo const * const xref) const
282 BibTeXInfo::const_iterator it = find(from_ascii("ref"));
288 // This could be made a lot better using the entry_type_
289 // field to customize the output based upon entry type.
291 // Search for all possible "required" fields
292 docstring author = getValueForKey("author", xref);
294 author = getValueForKey("editor", xref);
296 docstring year = getValueForKey("year", xref);
297 docstring title = getValueForKey("title", xref);
298 docstring docLoc = getValueForKey("pages", xref);
299 if (docLoc.empty()) {
300 docLoc = getValueForKey("chapter", xref);
302 docLoc = _("Ch. ") + docLoc;
304 docLoc = _("pp. ") + docLoc;
307 docstring media = getValueForKey("journal", xref);
309 media = getValueForKey("publisher", xref);
311 media = getValueForKey("school", xref);
313 media = getValueForKey("institution");
316 docstring volume = getValueForKey("volume", xref);
318 odocstringstream result;
320 result << author << ", ";
324 result << ", " << media;
326 result << " (" << year << ")";
328 result << ", " << docLoc;
330 docstring const result_str = rtrim(result.str());
331 if (!result_str.empty()) {
332 info_ = convertLaTeXCommands(result_str);
336 // This should never happen (or at least be very unusual!)
337 static docstring e = docstring();
342 docstring const & BibTeXInfo::operator[](docstring const & field) const
344 BibTeXInfo::const_iterator it = find(field);
347 static docstring const empty_value = docstring();
352 docstring const & BibTeXInfo::operator[](string const & field) const
354 return operator[](from_ascii(field));
358 docstring BibTeXInfo::getValueForKey(string const & key,
359 BibTeXInfo const * const xref) const
361 docstring const ret = operator[](key);
362 if (!ret.empty() || !xref)
368 //////////////////////////////////////////////////////////////////////
372 //////////////////////////////////////////////////////////////////////
375 // A functor for use with sort, leading to case insensitive sorting
376 class compareNoCase: public binary_function<docstring, docstring, bool>
379 bool operator()(docstring const & s1, docstring const & s2) const {
380 return compare_no_case(s1, s2) < 0;
386 vector<docstring> const BiblioInfo::getKeys() const
388 vector<docstring> bibkeys;
389 BiblioInfo::const_iterator it = begin();
390 for (; it != end(); ++it)
391 bibkeys.push_back(it->first);
392 sort(bibkeys.begin(), bibkeys.end(), compareNoCase());
397 vector<docstring> const BiblioInfo::getFields() const
399 vector<docstring> bibfields;
400 set<docstring>::const_iterator it = field_names_.begin();
401 set<docstring>::const_iterator end = field_names_.end();
402 for (; it != end; ++it)
403 bibfields.push_back(*it);
404 sort(bibfields.begin(), bibfields.end());
409 vector<docstring> const BiblioInfo::getEntries() const
411 vector<docstring> bibentries;
412 set<docstring>::const_iterator it = entry_types_.begin();
413 set<docstring>::const_iterator end = entry_types_.end();
414 for (; it != end; ++it)
415 bibentries.push_back(*it);
416 sort(bibentries.begin(), bibentries.end());
421 docstring const BiblioInfo::getAbbreviatedAuthor(docstring const & key) const
423 BiblioInfo::const_iterator it = find(key);
426 BibTeXInfo const & data = it->second;
427 return data.getAbbreviatedAuthor();
431 docstring const BiblioInfo::getYear(docstring const & key) const
433 BiblioInfo::const_iterator it = find(key);
436 BibTeXInfo const & data = it->second;
437 docstring year = data.getYear();
440 // let's try the crossref
441 docstring const xref = data.getXRef();
443 return _("No year"); // no luck
444 BiblioInfo::const_iterator const xrefit = find(xref);
446 return _("No year"); // no luck again
447 BibTeXInfo const & xref_data = xrefit->second;
448 return xref_data.getYear();
449 return data.getYear();
453 docstring const BiblioInfo::getInfo(docstring const & key) const
455 BiblioInfo::const_iterator it = find(key);
458 BibTeXInfo const & data = it->second;
459 BibTeXInfo const * xrefptr = 0;
460 docstring const xref = data.getXRef();
462 BiblioInfo::const_iterator const xrefit = find(xref);
464 xrefptr = &(xrefit->second);
466 return data.getInfo(xrefptr);
470 vector<docstring> const BiblioInfo::getCiteStrings(
471 docstring const & key, Buffer const & buf) const
473 CiteEngine const engine = buf.params().citeEngine();
474 if (engine == ENGINE_BASIC || engine == ENGINE_NATBIB_NUMERICAL)
475 return getNumericalStrings(key, buf);
477 return getAuthorYearStrings(key, buf);
481 vector<docstring> const BiblioInfo::getNumericalStrings(
482 docstring const & key, Buffer const & buf) const
485 return vector<docstring>();
487 docstring const author = getAbbreviatedAuthor(key);
488 docstring const year = getYear(key);
489 if (author.empty() || year.empty())
490 return vector<docstring>();
492 vector<CiteStyle> const & styles = citeStyles(buf.params().citeEngine());
494 vector<docstring> vec(styles.size());
495 for (size_t i = 0; i != vec.size(); ++i) {
501 str = from_ascii("[#ID]");
505 str = _("Add to bibliography only.");
509 str = author + " [#ID]";
513 str = author + " #ID";
517 str = from_ascii("#ID");
529 str = '(' + year + ')';
540 vector<docstring> const BiblioInfo::getAuthorYearStrings(
541 docstring const & key, Buffer const & buf) const
544 return vector<docstring>();
546 docstring const author = getAbbreviatedAuthor(key);
547 docstring const year = getYear(key);
548 if (author.empty() || year.empty())
549 return vector<docstring>();
551 vector<CiteStyle> const & styles = citeStyles(buf.params().citeEngine());
553 vector<docstring> vec(styles.size());
554 for (size_t i = 0; i != vec.size(); ++i) {
559 // jurabib only: Author/Annotator
560 // (i.e. the "before" field, 2nd opt arg)
561 str = author + "/<" + _("before") + '>';
565 str = _("Add to bibliography only.");
569 str = author + " (" + year + ')';
573 str = '(' + author + ", " + year + ')';
577 str = author + ' ' + year ;
581 str = author + ", " + year ;
593 str = '(' + year + ')';
602 void BiblioInfo::mergeBiblioInfo(BiblioInfo const & info)
604 bimap_.insert(info.begin(), info.end());
609 // used in xhtml to sort a list of BibTeXInfo objects
610 bool lSorter(BibTeXInfo const * lhs, BibTeXInfo const * rhs)
612 return lhs->getAbbreviatedAuthor() < rhs->getAbbreviatedAuthor();
617 void BiblioInfo::collectCitedEntries(Buffer const & buf)
619 cited_entries_.clear();
620 // We are going to collect all the citation keys used in the document,
621 // getting them from the TOC.
622 // FIXME We may want to collect these differently, in the first case,
623 // so that we might have them in order of appearance.
624 set<docstring> citekeys;
625 Toc const & toc = buf.tocBackend().toc("citation");
626 Toc::const_iterator it = toc.begin();
627 Toc::const_iterator const en = toc.end();
628 for (; it != en; ++it) {
629 if (it->str().empty())
631 vector<docstring> const keys = getVectorFromString(it->str());
632 citekeys.insert(keys.begin(), keys.end());
634 if (citekeys.empty())
637 // We have a set of the keys used in this document.
638 // We will now convert it to a list of the BibTeXInfo objects used in
640 vector<BibTeXInfo const *> bi;
641 set<docstring>::const_iterator cit = citekeys.begin();
642 set<docstring>::const_iterator const cen = citekeys.end();
643 for (; cit != cen; ++cit) {
644 BiblioInfo::const_iterator const bt = find(*cit);
645 if (bt == end() || !bt->second.isBibTeX())
647 bi.push_back(&(bt->second));
650 sort(bi.begin(), bi.end(), lSorter);
652 // Now we can write the sorted keys
653 vector<BibTeXInfo const *>::const_iterator bit = bi.begin();
654 vector<BibTeXInfo const *>::const_iterator ben = bi.end();
655 for (; bit != ben; ++bit)
656 cited_entries_.push_back((*bit)->key());
660 //////////////////////////////////////////////////////////////////////
664 //////////////////////////////////////////////////////////////////////
669 char const * const citeCommands[] = {
670 "cite", "citet", "citep", "citealt", "citealp",
671 "citeauthor", "citeyear", "citeyearpar", "nocite" };
673 unsigned int const nCiteCommands =
674 sizeof(citeCommands) / sizeof(char *);
676 CiteStyle const citeStylesArray[] = {
677 CITE, CITET, CITEP, CITEALT, CITEALP,
678 CITEAUTHOR, CITEYEAR, CITEYEARPAR, NOCITE };
680 unsigned int const nCiteStyles =
681 sizeof(citeStylesArray) / sizeof(CiteStyle);
683 CiteStyle const citeStylesFull[] = {
684 CITET, CITEP, CITEALT, CITEALP, CITEAUTHOR };
686 unsigned int const nCiteStylesFull =
687 sizeof(citeStylesFull) / sizeof(CiteStyle);
689 CiteStyle const citeStylesUCase[] = {
690 CITET, CITEP, CITEALT, CITEALP, CITEAUTHOR };
692 unsigned int const nCiteStylesUCase =
693 sizeof(citeStylesUCase) / sizeof(CiteStyle);
698 CitationStyle citationStyleFromString(string const & command)
704 string cmd = command;
706 s.forceUpperCase = true;
710 size_t const n = cmd.size() - 1;
711 if (cmd != "cite" && cmd[n] == '*') {
713 cmd = cmd.substr(0, n);
716 char const * const * const last = citeCommands + nCiteCommands;
717 char const * const * const ptr = find(citeCommands, last, cmd);
720 size_t idx = ptr - citeCommands;
721 s.style = citeStylesArray[idx];
727 string citationStyleToString(const CitationStyle & s)
729 string cite = citeCommands[s.style];
731 CiteStyle const * last = citeStylesFull + nCiteStylesFull;
732 if (std::find(citeStylesFull, last, s.style) != last)
736 if (s.forceUpperCase) {
737 CiteStyle const * last = citeStylesUCase + nCiteStylesUCase;
738 if (std::find(citeStylesUCase, last, s.style) != last)
745 vector<CiteStyle> citeStyles(CiteEngine engine)
747 unsigned int nStyles = 0;
748 unsigned int start = 0;
755 case ENGINE_NATBIB_AUTHORYEAR:
756 case ENGINE_NATBIB_NUMERICAL:
757 nStyles = nCiteStyles - 1;
761 nStyles = nCiteStyles;
766 vector<CiteStyle> styles(nStyles);
769 for (; i != styles.size(); ++i, ++j)
770 styles[i] = citeStylesArray[j];