Remove the test for "tableofcontents", since that is the only thing this

[lyx.git] / src / BiblioInfo.cpp
diff --git a/src/BiblioInfo.cpp b/src/BiblioInfo.cpp

index 6118dde26d4ceff2f92c47b7c735f5936fb48d3e..81234b89d873aea7bc1e8630cd4468b043307d4d 100644 (file)
--- a/src/BiblioInfo.cpp
+++ b/src/BiblioInfo.cpp
@@ -19,12 +19,14 @@
  #include "Encoding.h"
  #include "InsetIterator.h"
  #include "Paragraph.h"
+#include "TocBackend.h"
  
  #include "insets/Inset.h"
  #include "insets/InsetBibitem.h"
  #include "insets/InsetBibtex.h"
  #include "insets/InsetInclude.h"
  
+#include "support/convert.h"
  #include "support/docstream.h"
  #include "support/gettext.h"
  #include "support/lassert.h"
@@ -33,55 +35,17 @@
  
  #include "boost/regex.hpp"
  
+#include <set>
+
  using namespace std;
  using namespace lyx::support;
  
  
  namespace lyx {
  
-//////////////////////////////////////////////////////////////////////
-//
-// BibTeXInfo
-//
-//////////////////////////////////////////////////////////////////////
-
-BibTeXInfo::BibTeXInfo(docstring const & key, docstring const & type)
-       : is_bibtex_(true), bib_key_(key), entry_type_(type), info_()
-{}
-
-
-bool BibTeXInfo::hasField(docstring const & field) const
-{
-       return count(field) == 1;
-}
-
-
-docstring const & BibTeXInfo::operator[](docstring const & field) const
-{
-       BibTeXInfo::const_iterator it = find(field);
-       if (it != end())
-               return it->second;
-       static docstring const empty_value = docstring();
-       return empty_value;
-}
-       
-       
-docstring const & BibTeXInfo::operator[](string const & field) const
-{
-       return operator[](from_ascii(field));
-}
-
-
-docstring BibTeXInfo::getValueForKey(string const & key, 
-               BibTeXInfo const * const xref) const
-{
-       docstring const ret = operator[](key);
-       if (!ret.empty() || !xref)
-               return ret;
-       return (*xref)[key];
-}
-
+namespace {
  
+// gets the "family name" from an author-type string
  docstring familyName(docstring const & name)
  {
         if (name.empty())
@@ -128,10 +92,127 @@ docstring familyName(docstring const & name)
         return retval;
  }
  
+// converts a string containing LaTeX commands into unicode
+// for display.
+docstring convertLaTeXCommands(docstring const & str)
+{
+       docstring val = str;
+       docstring ret;
+
+       bool scanning_cmd = false;
+       bool scanning_math = false;
+       bool escaped = false; // used to catch \$, etc.
+       while (val.size()) {
+               char_type const ch = val[0];
+
+               // if we're scanning math, we output everything until we
+               // find an unescaped $, at which point we break out.
+               if (scanning_math) {
+                       if (escaped)
+                               escaped = false;
+                       else if (ch == '\\')
+                               escaped = true;
+                       else if (ch == '$') 
+                               scanning_math = false;
+                       ret += ch;
+                       val = val.substr(1);
+                       continue;
+               }
+
+               // if we're scanning a command name, then we just
+               // discard characters until we hit something that
+               // isn't alpha.
+               if (scanning_cmd) {
+                       if (isAlphaASCII(ch)) {
+                               val = val.substr(1);
+                               escaped = false;
+                               continue;
+                       }
+                       // so we're done with this command.
+                       // now we fall through and check this character.
+                       scanning_cmd = false;
+               }
+
+               // was the last character a \? If so, then this is something like:
+               // \\ or \$, so we'll just output it. That's probably not always right...
+               if (escaped) {
+                       // exception: output \, as THIN SPACE
+                       if (ch == ',')
+                               ret.push_back(0x2009);
+                       else
+                               ret += ch;
+                       val = val.substr(1);
+                       escaped = false;
+                       continue;
+               }
+
+               if (ch == '$') {
+                       ret += ch;
+                       val = val.substr(1);
+                       scanning_math = true;
+                       continue;
+               }
+
+               // we just ignore braces
+               if (ch == '{' || ch == '}') {
+                       val = val.substr(1);
+                       continue;
+               }
+
+               // we're going to check things that look like commands, so if
+               // this doesn't, just output it.
+               if (ch != '\\') {
+                       ret += ch;
+                       val = val.substr(1);
+                       continue;
+               }
+
+               // ok, could be a command of some sort
+               // let's see if it corresponds to some unicode
+               // unicodesymbols has things in the form: \"{u},
+               // whereas we may see things like: \"u. So we'll
+               // look for that and change it, if necessary.
+               static boost::regex const reg("^\\\\\\W\\w");
+               if (boost::regex_search(to_utf8(val), reg)) {
+                       val.insert(3, from_ascii("}"));
+                       val.insert(2, from_ascii("{"));
+               }
+               docstring rem;
+               docstring const cnvtd = Encodings::fromLaTeXCommand(val, rem,
+                                                       Encodings::TEXT_CMD);
+               if (!cnvtd.empty()) {
+                       // it did, so we'll take that bit and proceed with what's left
+                       ret += cnvtd;
+                       val = rem;
+                       continue;
+               }
+               // it's a command of some sort
+               scanning_cmd = true;
+               escaped = true;
+               val = val.substr(1);
+       }
+       return ret;
+}
+
+} // anon namespace
+
+
+//////////////////////////////////////////////////////////////////////
+//
+// BibTeXInfo
+//
+//////////////////////////////////////////////////////////////////////
+
+BibTeXInfo::BibTeXInfo(docstring const & key, docstring const & type)
+       : is_bibtex_(true), bib_key_(key), entry_type_(type), info_(),
+         modifier_(0)
+{}
+
+
  docstring const BibTeXInfo::getAbbreviatedAuthor() const
  {
         if (!is_bibtex_) {
-               docstring const opt = trim(operator[]("label"));
+               docstring const opt = label();
                 if (opt.empty())
                         return docstring();
  
@@ -140,9 +221,9 @@ docstring const BibTeXInfo::getAbbreviatedAuthor() const
                 return authors;
         }
  
-       docstring author = operator[]("author");
+       docstring author = convertLaTeXCommands(operator[]("author"));
         if (author.empty()) {
-               author = operator[]("editor");
+               author = convertLaTeXCommands(operator[]("editor"));
                 if (author.empty())
                         return bib_key_;
         }
@@ -168,7 +249,7 @@ docstring const BibTeXInfo::getYear() const
         if (is_bibtex_) 
                 return operator[]("year");
  
-       docstring const opt = trim(operator[]("label"));
+       docstring const opt = label();
         if (opt.empty())
                 return docstring();
  
@@ -188,106 +269,6 @@ docstring const BibTeXInfo::getXRef() const
  }
  
  
-namespace {
-
-       docstring convertLaTeXCommands(docstring const & str)
-       {
-               docstring val = str;
-               docstring ret;
-       
-               bool scanning_cmd = false;
-               bool scanning_math = false;
-               bool escaped = false; // used to catch \$, etc.
-               while (val.size()) {
-                       char_type const ch = val[0];
-
-                       // if we're scanning math, we output everything until we
-                       // find an unescaped $, at which point we break out.
-                       if (scanning_math) {
-                               if (escaped)
-                                       escaped = false;
-                               else if (ch == '\\')
-                                       escaped = true;
-                               else if (ch == '$') 
-                                       scanning_math = false;
-                               ret += ch;
-                               val = val.substr(1);
-                               continue;
-                       }
-
-                       // if we're scanning a command name, then we just
-                       // discard characters until we hit something that
-                       // isn't alpha.
-                       if (scanning_cmd) {
-                               if (isAlphaASCII(ch)) {
-                                       val = val.substr(1);
-                                       escaped = false;
-                                       continue;
-                               }
-                               // so we're done with this command.
-                               // now we fall through and check this character.
-                               scanning_cmd = false;
-                       }
-
-                       // was the last character a \? If so, then this is something like: \\,
-                       // or \$, so we'll just output it. That's probably not always right...
-                       if (escaped) {
-                               ret += ch;
-                               val = val.substr(1);
-                               escaped = false;
-                               continue;
-                       }
-
-                       if (ch == '$') {
-                               ret += ch;
-                               val = val.substr(1);
-                               scanning_math = true;
-                               continue;
-                       }
-
-                       // we just ignore braces
-                       if (ch == '{' || ch == '}') {
-                               val = val.substr(1);
-                               continue;
-                       }
-
-                       // we're going to check things that look like commands, so if
-                       // this doesn't, just output it.
-                       if (ch != '\\') {
-                               ret += ch;
-                               val = val.substr(1);
-                               continue;
-                       }
-
-                       // ok, could be a command of some sort
-                       // let's see if it corresponds to some unicode
-                       // unicodesymbols has things in the form: \"{u},
-                       // whereas we may see things like: \"u. So we'll
-                       // look for that and change it, if necessary.
-                       static boost::regex const reg("^\\\\\\W\\w");
-                       if (boost::regex_search(to_utf8(val), reg)) {
-                               val.insert(3, from_ascii("}"));
-                               val.insert(2, from_ascii("{"));
-                       }
-                       docstring rem;
-                       docstring const cnvtd = Encodings::fromLaTeXCommand(val, rem);
-                       if (!cnvtd.empty()) {
-                               // it did, so we'll take that bit and proceed with what's left
-                               ret += cnvtd;
-                               val = rem;
-                               continue;
-                       }
-                       // it's a command of some sort
-                       scanning_cmd = true;
-                       escaped = true;
-                       val = val.substr(1);
-               }
-               return ret;
-       }
-
-} // anon namespace
-
-
  docstring const & BibTeXInfo::getInfo(BibTeXInfo const * const xref) const
  {
         if (!info_.empty())
@@ -354,6 +335,32 @@ docstring const & BibTeXInfo::getInfo(BibTeXInfo const * const xref) const
  }
  
  
+docstring const & BibTeXInfo::operator[](docstring const & field) const
+{
+       BibTeXInfo::const_iterator it = find(field);
+       if (it != end())
+               return it->second;
+       static docstring const empty_value = docstring();
+       return empty_value;
+}
+       
+       
+docstring const & BibTeXInfo::operator[](string const & field) const
+{
+       return operator[](from_ascii(field));
+}
+
+
+docstring BibTeXInfo::getValueForKey(string const & key, 
+               BibTeXInfo const * const xref) const
+{
+       docstring const ret = operator[](key);
+       if (!ret.empty() || !xref)
+               return ret;
+       return (*xref)[key];
+}
+
+
  //////////////////////////////////////////////////////////////////////
  //
  // BiblioInfo
@@ -417,25 +424,37 @@ docstring const BiblioInfo::getAbbreviatedAuthor(docstring const & key) const
  }
  
  
-docstring const BiblioInfo::getYear(docstring const & key) const
+docstring const BiblioInfo::getCiteNumber(docstring const & key) const
+{
+       BiblioInfo::const_iterator it = find(key);
+       if (it == end())
+               return docstring();
+       BibTeXInfo const & data = it->second;
+       return data.citeNumber();
+}
+
+
+docstring const BiblioInfo::getYear(docstring const & key, bool use_modifier) const
  {
         BiblioInfo::const_iterator it = find(key);
         if (it == end())
                 return docstring();
         BibTeXInfo const & data = it->second;
         docstring year = data.getYear();
-       if (!year.empty())
-               return year;
-       // let's try the crossref
-       docstring const xref = data.getXRef();
-       if (xref.empty())
-               return _("No year"); // no luck
-       BiblioInfo::const_iterator const xrefit = find(xref);
-       if (xrefit == end())
-               return _("No year"); // no luck again
-       BibTeXInfo const & xref_data = xrefit->second;
-       return xref_data.getYear();
-       return data.getYear();
+       if (year.empty()) {
+               // let's try the crossref
+               docstring const xref = data.getXRef();
+               if (xref.empty())
+                       return _("No year"); // no luck
+               BiblioInfo::const_iterator const xrefit = find(xref);
+               if (xrefit == end())
+                       return _("No year"); // no luck again
+               BibTeXInfo const & xref_data = xrefit->second;
+               year = xref_data.getYear();
+       }
+       if (use_modifier && data.modifier() != 0)
+               year += data.modifier();
+       return year;
  }
  
  
@@ -594,6 +613,117 @@ void BiblioInfo::mergeBiblioInfo(BiblioInfo const & info)
  }
  
  
+namespace {
+       // used in xhtml to sort a list of BibTeXInfo objects
+       bool lSorter(BibTeXInfo const * lhs, BibTeXInfo const * rhs)
+       {
+               docstring const lauth = lhs->getAbbreviatedAuthor();
+               docstring const rauth = rhs->getAbbreviatedAuthor();
+               docstring const lyear = lhs->getYear();
+               docstring const ryear = rhs->getYear();
+               docstring const ltitl = lhs->operator[]("title");
+               docstring const rtitl = rhs->operator[]("title");
+               return  (lauth < rauth)
+                               || (lauth == rauth && lyear < ryear)
+                               || (lauth == rauth && lyear == ryear && ltitl < rtitl);
+       }
+}
+
+
+void BiblioInfo::collectCitedEntries(Buffer const & buf)
+{
+       cited_entries_.clear();
+       // We are going to collect all the citation keys used in the document,
+       // getting them from the TOC.
+       // FIXME We may want to collect these differently, in the first case,
+       // so that we might have them in order of appearance.
+       set<docstring> citekeys;
+       Toc const & toc = buf.tocBackend().toc("citation");
+       Toc::const_iterator it = toc.begin();
+       Toc::const_iterator const en = toc.end();
+       for (; it != en; ++it) {
+               if (it->str().empty())
+                       continue;
+               vector<docstring> const keys = getVectorFromString(it->str());
+               citekeys.insert(keys.begin(), keys.end());
+       }
+       if (citekeys.empty())
+               return;
+       
+       // We have a set of the keys used in this document.
+       // We will now convert it to a list of the BibTeXInfo objects used in 
+       // this document...
+       vector<BibTeXInfo const *> bi;
+       set<docstring>::const_iterator cit = citekeys.begin();
+       set<docstring>::const_iterator const cen = citekeys.end();
+       for (; cit != cen; ++cit) {
+               BiblioInfo::const_iterator const bt = find(*cit);
+               if (bt == end() || !bt->second.isBibTeX())
+                       continue;
+               bi.push_back(&(bt->second));
+       }
+       // ...and sort it.
+       sort(bi.begin(), bi.end(), lSorter);
+       
+       // Now we can write the sorted keys
+       vector<BibTeXInfo const *>::const_iterator bit = bi.begin();
+       vector<BibTeXInfo const *>::const_iterator ben = bi.end();
+       for (; bit != ben; ++bit)
+               cited_entries_.push_back((*bit)->key());
+}
+
+
+void BiblioInfo::makeCitationLabels(Buffer const & buf)
+{
+       collectCitedEntries(buf);
+       CiteEngine const engine = buf.params().citeEngine();
+       bool const numbers = 
+               (engine == ENGINE_BASIC || engine == ENGINE_NATBIB_NUMERICAL);
+
+       int keynumber = 0;
+       char modifier = 0;
+       // used to remember the last one we saw
+       // we'll be comparing entries to see if we need to add
+       // modifiers, like "1984a"
+       map<docstring, BibTeXInfo>::iterator last;
+
+       vector<docstring>::const_iterator it = cited_entries_.begin();
+       vector<docstring>::const_iterator const en = cited_entries_.end();
+       for (; it != en; ++it) {
+               map<docstring, BibTeXInfo>::iterator const biit = bimap_.find(*it);
+               // this shouldn't happen, but...
+               if (biit == bimap_.end())
+                       // ...fail gracefully, anyway.
+                       continue;
+               BibTeXInfo & entry = biit->second;
+               if (numbers) {
+                       docstring const num = convert<docstring>(++keynumber);
+                       entry.setCiteNumber(num);
+               } else {
+                       if (it != cited_entries_.begin()
+                           && entry.getAbbreviatedAuthor() == last->second.getAbbreviatedAuthor()
+                           // we access the year via getYear() so as to get it from the xref,
+                           // if we need to do so
+                           && getYear(entry.key()) == getYear(last->second.key())) {
+                               if (modifier == 0) {
+                                       // so the last one should have been 'a'
+                                       last->second.setModifier('a');
+                                       modifier = 'b';
+                               } else if (modifier == 'z')
+                                       modifier = 'A';
+                               else
+                                       modifier++;
+                       } else {
+                               modifier = 0;
+                       }
+                       entry.setModifier(modifier);                            
+                       // remember the last one
+                       last = biit;
+               }
+       }
+}
+
+
  //////////////////////////////////////////////////////////////////////
  //
  // CitationStyle
@@ -666,13 +796,13 @@ string citationStyleToString(const CitationStyle & s)
         string cite = citeCommands[s.style];
         if (s.full) {
                 CiteStyle const * last = citeStylesFull + nCiteStylesFull;
-               if (find(citeStylesFull, last, s.style) != last)
+               if (std::find(citeStylesFull, last, s.style) != last)
                         cite += '*';
         }
  
         if (s.forceUpperCase) {
                 CiteStyle const * last = citeStylesUCase + nCiteStylesUCase;
-               if (find(citeStylesUCase, last, s.style) != last)
+               if (std::find(citeStylesUCase, last, s.style) != last)
                         cite[0] = 'C';
         }