Update Win installer for new dictionary links. Untested.

[lyx.git] / src / BiblioInfo.cpp
diff --git a/src/BiblioInfo.cpp b/src/BiblioInfo.cpp

index 50d8abfd22babb64f6d64bf8cbdc4460c4e29011..253fb3759cbb7d50a30d9d6c701dd62330c18530 100644 (file)
--- a/src/BiblioInfo.cpp
+++ b/src/BiblioInfo.cpp
@@ -5,7 +5,7 @@
   *
   * \author Angus Leeming
   * \author Herbert Voß
- * \author Richard Heck
+ * \author Richard Kimberly Heck
   * \author Julien Rioux
   * \author Jürgen Spitzmüller
   *
@@ -15,28 +15,27 @@
  #include <config.h>
  
  #include "BiblioInfo.h"
+
  #include "Buffer.h"
  #include "BufferParams.h"
-#include "buffer_funcs.h"
  #include "Citation.h"
  #include "Encoding.h"
-#include "InsetIterator.h"
  #include "Language.h"
-#include "output_xhtml.h"
-#include "Paragraph.h"
  #include "TextClass.h"
  #include "TocBackend.h"
+#include "xml.h"
  
  #include "support/convert.h"
  #include "support/debug.h"
  #include "support/docstream.h"
+#include "support/FileName.h"
  #include "support/gettext.h"
  #include "support/lassert.h"
  #include "support/lstrings.h"
-#include "support/regex.h"
  #include "support/textutils.h"
  
  #include <map>
+#include <regex>
  #include <set>
  
  using namespace std;
@@ -278,8 +277,14 @@ vector<docstring> const getAuthors(docstring const & author)
         // in author names, but can happen (consider cases such as "C \& A Corp.").
         docstring iname = subst(author, from_ascii("&"), from_ascii("$$amp!"));
         // Then, we temporarily make all " and " strings to ampersands in order
-       // to handle them later on a per-char level.
-       iname = subst(iname, from_ascii(" and "), from_ascii(" & "));
+       // to handle them later on a per-char level. Note that arbitrary casing
+       // ("And", "AND", "aNd", ...) is allowed in bibtex (#10465).
+       static regex const and_reg("(.* )([aA][nN][dD])( .*)");
+       smatch sub;
+       string res = to_utf8(iname);
+       while (regex_match(res, sub, and_reg))
+               res = sub.str(1) + "&" + sub.str(3);
+       iname = from_utf8(res);
         // Now we traverse through the string and replace the "&" by the proper
         // output in- and outside groups
         docstring name;
@@ -328,6 +333,7 @@ docstring convertLaTeXCommands(docstring const & str)
  
         bool scanning_cmd = false;
         bool scanning_math = false;
+       bool is_section = false;
         bool escaped = false; // used to catch \$, etc.
         while (!val.empty()) {
                 char_type const ch = val[0];
@@ -350,13 +356,24 @@ docstring convertLaTeXCommands(docstring const & str)
                 // discard characters until we hit something that
                 // isn't alpha.
                 if (scanning_cmd) {
+                       if (!is_section && ch == 'S') {
+                               is_section = true;
+                               val = val.substr(1);
+                               continue;
+                       }
                         if (isAlphaASCII(ch)) {
+                               is_section = false;
                                 val = val.substr(1);
                                 escaped = false;
                                 continue;
+                       } else if (is_section) {
+                               ret.push_back(0x00a7);
+                               is_section = false;
+                               continue;
                         }
                         // so we're done with this command.
                         // now we fall through and check this character.
+                       is_section = false;
                         scanning_cmd = false;
                 }
  
@@ -373,6 +390,12 @@ docstring convertLaTeXCommands(docstring const & str)
                         continue;
                 }
  
+               if (ch == '~') {
+                       ret += char_type(0x00a0);
+                       val = val.substr(1);
+                       continue;
+               }
+
                 if (ch == '$') {
                         ret += ch;
                         val = val.substr(1);
@@ -384,8 +407,8 @@ docstring convertLaTeXCommands(docstring const & str)
                 // {\v a} to \v{a} (see #9340).
                 // FIXME: This is a sort of mini-tex2lyx.
                 //        Use the real tex2lyx instead!
-               static lyx::regex const tma_reg("^\\{\\\\[bcCdfGhHkrtuUv]\\s\\w\\}");
-               if (lyx::regex_search(to_utf8(val), tma_reg)) {
+               static regex const tma_reg("^\\{\\\\[bcCdfGhHkrtuUv]\\s\\w\\}");
+               if (regex_search(to_utf8(val), tma_reg)) {
                         val = val.substr(1);
                         val.replace(2, 1, from_ascii("{"));
                         continue;
@@ -412,8 +435,8 @@ docstring convertLaTeXCommands(docstring const & str)
                 // look for that and change it, if necessary.
                 // FIXME: This is a sort of mini-tex2lyx.
                 //        Use the real tex2lyx instead!
-               static lyx::regex const reg("^\\\\\\W\\w");
-               if (lyx::regex_search(to_utf8(val), reg)) {
+               static regex const reg("^\\\\\\W\\w");
+               if (regex_search(to_utf8(val), reg)) {
                         val.insert(3, from_ascii("}"));
                         val.insert(2, from_ascii("{"));
                 }
@@ -488,8 +511,8 @@ docstring processRichtext(docstring const & str, bool richtext)
  //////////////////////////////////////////////////////////////////////
  
  BibTeXInfo::BibTeXInfo(docstring const & key, docstring const & type)
-       : is_bibtex_(true), bib_key_(key), num_bib_key_(0), entry_type_(type), info_(),
-         modifier_(0)
+       : is_bibtex_(true), bib_key_(key), num_bib_key_(0), entry_type_(type),
+         info_(), format_(), modifier_(0)
  {}
  
  
@@ -651,6 +674,97 @@ docstring const BibTeXInfo::getYear() const
  }
  
  
+void BibTeXInfo::getLocators(docstring & doi, docstring & url, docstring & file) const
+{
+       if (is_bibtex_) {
+               // get "doi" entry from citation record
+               doi = operator[]("doi");
+               if (!doi.empty() && !prefixIs(doi,from_ascii("http")))
+                       doi = "https://doi.org/" + doi;
+               // get "url" entry from citation record
+               url = operator[]("url");
+               // get "file" entry from citation record
+               file = operator[]("file");
+
+               // Jabref case, "file" field has a format (depending on exporter):
+               // Description:Location:Filetype;Description:Location:Filetype...
+               // or simply:
+               // Location;Location;...
+               // We will strip out the locations and return an \n-separated list
+               if (!file.empty()) {
+                       docstring filelist;
+                       vector<docstring> files = getVectorFromString(file, from_ascii(";"));
+                       for (auto const & f : files) {
+                               // first try if we have Description:Location:Filetype
+                               docstring ret, filedest, tmp;
+                               ret = split(f, tmp, ':');
+                               tmp = split(ret, filedest, ':');
+                               if (filedest.empty())
+                                       // we haven't, so use the whole string
+                                       filedest = f;
+                               // TODO howto deal with relative directories?
+                               FileName fn(to_utf8(filedest));
+                               if (fn.exists()) {
+                                       if (!filelist.empty())
+                                               filelist += '\n';
+                                       filelist += "file:///" + filedest;
+                               }
+                       }
+                       if (!filelist.empty())
+                               file = filelist;
+               }
+
+               // kbibtex case, "localfile" field with format:
+               // file1.pdf;file2.pdf
+               // We will strip out the locations and return an \n-separated list
+               docstring kfile;
+               if (file.empty())
+                       kfile = operator[]("localfile");
+               if (!kfile.empty()) {
+                       docstring filelist;
+                       vector<docstring> files = getVectorFromString(kfile, from_ascii(";"));
+                       for (auto const & f : files) {
+                               // TODO howto deal with relative directories?
+                               FileName fn(to_utf8(f));
+                               if (fn.exists()) {
+                                       if (!filelist.empty())
+                                               filelist += '\n';
+                                       filelist = "file:///" + f;
+                               }
+                       }
+                       if (!filelist.empty())
+                               file = filelist;
+               }
+
+               if (!url.empty())
+                       return;
+
+               // try biblatex specific fields, see its manual
+               // 3.13.7 "Electronic Publishing Informationl"
+               docstring eprinttype = operator[]("eprinttype");
+               docstring eprint = operator[]("eprint");
+               if (eprint.empty())
+                       return;
+
+               if (eprinttype == "arxiv")
+                       url = "https://arxiv.org/abs/" + eprint;
+               if (eprinttype == "jstor")
+                       url = "https://www.jstor.org/stable/" + eprint;
+               if (eprinttype == "pubmed")
+                       url = "http://www.ncbi.nlm.nih.gov/pubmed/" + eprint;
+               if (eprinttype == "hdl")
+                       url = "https://hdl.handle.net/" + eprint;
+               if (eprinttype == "googlebooks")
+                       url = "http://books.google.com/books?id=" + eprint;
+
+               return;
+       }
+
+       // Here can be handled the bibliography environment. All one could do
+       // here is let LyX scan the entry for URL or HRef insets.
+}
+
+
  namespace {
  
  docstring parseOptions(docstring const & format, string & optkey,
@@ -927,12 +1041,27 @@ docstring BibTeXInfo::expandFormat(docstring const & format,
  
  
  docstring const & BibTeXInfo::getInfo(BibTeXInfoList const & xrefs,
-       Buffer const & buf, CiteItem const & ci) const
+       Buffer const & buf, CiteItem const & ci, docstring const & format_in) const
  {
         bool const richtext = ci.richtext;
  
-       if (!richtext && !info_.empty())
+       CiteEngineType const engine_type = buf.params().citeEngineType();
+       DocumentClass const & dc = buf.params().documentClass();
+       docstring const & format = format_in.empty()? 
+                               from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_)))
+                             : format_in;
+
+       if (format != format_) {
+               // clear caches since format changed
+               info_.clear();
+               info_richtext_.clear();
+               format_ = format;
+       }
+
+       if (!richtext && !info_.empty()) {
+               info_ = convertLaTeXCommands(processRichtext(info_, false));
                 return info_;
+       }
         if (richtext && !info_richtext_.empty())
                 return info_richtext_;
  
@@ -942,10 +1071,6 @@ docstring const & BibTeXInfo::getInfo(BibTeXInfoList const & xrefs,
                 return info_;
         }
  
-       CiteEngineType const engine_type = buf.params().citeEngineType();
-       DocumentClass const & dc = buf.params().documentClass();
-       docstring const & format =
-               from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_)));
         int counter = 0;
         info_ = expandFormat(format, xrefs, counter, buf,
                 ci, false, false);
@@ -965,7 +1090,7 @@ docstring const & BibTeXInfo::getInfo(BibTeXInfoList const & xrefs,
  }
  
  
-docstring const BibTeXInfo::getLabel(BibTeXInfoList const xrefs,
+docstring const BibTeXInfo::getLabel(BibTeXInfoList const & xrefs,
         Buffer const & buf, docstring const & format,
         CiteItem const & ci, bool next, bool second) const
  {
@@ -1012,16 +1137,10 @@ docstring BibTeXInfo::getValueForKey(string const & oldkey, Buffer const & buf,
         }
  
         docstring ret = operator[](key);
-       if (ret.empty() && !xrefs.empty()) {
-               // xr is a (reference to a) BibTeXInfo const *
-               for (auto const & xr : xrefs) {
-                       if (xr && !(*xr)[key].empty()) {
-                               ret = (*xr)[key];
-                               break;
-                       }
-               }
-       }
         if (ret.empty()) {
+               docstring subtype;
+               if (contains(key, ':'))
+                       subtype = from_ascii(token(key, ':', 1));
                 // some special keys
                 // FIXME: dialog, textbefore and textafter have nothing to do with this
                 if (key == "dialog" && ci.context == CiteItem::Dialog)
@@ -1047,7 +1166,7 @@ docstring BibTeXInfo::getValueForKey(string const & oldkey, Buffer const & buf,
                         ret = cite_number_;
                 else if (prefixIs(key, "ifmultiple:")) {
                         // Return whether we have multiple authors
-                       docstring const kind = operator[](from_ascii(key.substr(11)));
+                       docstring const kind = operator[](subtype);
                         if (multipleAuthors(kind))
                                 ret = from_ascii("x"); // any non-empty string will do
                 }
@@ -1055,14 +1174,14 @@ docstring BibTeXInfo::getValueForKey(string const & oldkey, Buffer const & buf,
                         // Special key to provide abbreviated name list,
                         // with respect to maxcitenames. Suitable for Bibliography
                         // beginnings.
-                       docstring const kind = operator[](from_ascii(key.substr(11)));
+                       docstring const kind = operator[](subtype);
                         ret = getAuthorList(&buf, kind, false, false, true);
                         if (ci.forceUpperCase && isLowerCase(ret[0]))
                                 ret[0] = uppercase(ret[0]);
                 } else if (prefixIs(key, "fullnames:")) {
                         // Return a full name list. Suitable for Bibliography
                         // beginnings.
-                       docstring const kind = operator[](from_ascii(key.substr(10)));
+                       docstring const kind = operator[](subtype);
                         ret = getAuthorList(&buf, kind, true, false, true);
                         if (ci.forceUpperCase && isLowerCase(ret[0]))
                                 ret[0] = uppercase(ret[0]);
@@ -1070,7 +1189,7 @@ docstring BibTeXInfo::getValueForKey(string const & oldkey, Buffer const & buf,
                         // Special key to provide abbreviated name lists,
                         // irrespective of maxcitenames. Suitable for Bibliography
                         // beginnings.
-                       docstring const kind = operator[](from_ascii(key.substr(15)));
+                       docstring const kind = operator[](subtype);
                         ret = getAuthorList(&buf, kind, false, true, true);
                         if (ci.forceUpperCase && isLowerCase(ret[0]))
                                 ret[0] = uppercase(ret[0]);
@@ -1078,14 +1197,14 @@ docstring BibTeXInfo::getValueForKey(string const & oldkey, Buffer const & buf,
                         // Special key to provide abbreviated name list,
                         // with respect to maxcitenames. Suitable for further names inside a
                         // bibliography item // (such as "ed. by ...")
-                       docstring const kind = operator[](from_ascii(key.substr(11)));
+                       docstring const kind = operator[](subtype);
                         ret = getAuthorList(&buf, kind, false, false, true, false);
                         if (ci.forceUpperCase && isLowerCase(ret[0]))
                                 ret[0] = uppercase(ret[0]);
                 } else if (prefixIs(key, "fullbynames:")) {
                         // Return a full name list. Suitable for further names inside a
                         // bibliography item // (such as "ed. by ...")
-                       docstring const kind = operator[](from_ascii(key.substr(10)));
+                       docstring const kind = operator[](subtype);
                         ret = getAuthorList(&buf, kind, true, false, true, false);
                         if (ci.forceUpperCase && isLowerCase(ret[0]))
                                 ret[0] = uppercase(ret[0]);
@@ -1093,7 +1212,7 @@ docstring BibTeXInfo::getValueForKey(string const & oldkey, Buffer const & buf,
                         // Special key to provide abbreviated name lists,
                         // irrespective of maxcitenames. Suitable for further names inside a
                         // bibliography item // (such as "ed. by ...")
-                       docstring const kind = operator[](from_ascii(key.substr(15)));
+                       docstring const kind = operator[](subtype);
                         ret = getAuthorList(&buf, kind, false, true, true, false);
                         if (ci.forceUpperCase && isLowerCase(ret[0]))
                                 ret[0] = uppercase(ret[0]);
@@ -1158,8 +1277,36 @@ docstring BibTeXInfo::getValueForKey(string const & oldkey, Buffer const & buf,
                         ret = getYear();
         }
  
+       // If we have no result, check in the cross-ref'ed entries
+       if (ret.empty() && !xrefs.empty()) {
+               bool const biblatex =
+                       buf.params().documentClass().citeFramework() == "biblatex";
+               // xr is a (reference to a) BibTeXInfo const *
+               for (auto const & xr : xrefs) {
+                       if (!xr)
+                               continue;
+                       // use empty BibTeXInfoList to avoid loops
+                       BibTeXInfoList xr_dummy;
+                       ret = xr->getValueForKey(oldkey, buf, ci, xr_dummy, maxsize);
+                       if (!ret.empty())
+                               // success!
+                               break;
+                       // in biblatex, cross-ref'ed titles are mapped
+                       // to booktitle. Same for subtitle etc.
+                       if (biblatex && prefixIs(key, "book"))
+                               ret = (*xr)[key.substr(4)];
+                       // likewise, author is maped onto bookauthor
+                       else if (biblatex && contains(key, ":bookauthor"))
+                               ret = xr->getValueForKey(subst(key, "bookauthor", "author"),
+                                                        buf, ci, xr_dummy, maxsize);
+                       if (!ret.empty())
+                               // success!
+                               break;
+               }
+       }
+
         if (cleanit)
-               ret = html::cleanAttr(ret);
+               ret = xml::cleanAttr(ret);
  
         // make sure it is not too big
         support::truncateWithEllipsis(ret, maxsize);
@@ -1176,13 +1323,9 @@ docstring BibTeXInfo::getValueForKey(string const & oldkey, Buffer const & buf,
  namespace {
  
  // A functor for use with sort, leading to case insensitive sorting
-class compareNoCase: public binary_function<docstring, docstring, bool>
-{
-public:
-       bool operator()(docstring const & s1, docstring const & s2) const {
-               return compare_no_case(s1, s2) < 0;
-       }
-};
+bool compareNoCase(const docstring & a, const docstring & b) {
+       return compare_no_case(a, b) < 0;
+}
  
  } // namespace
  
@@ -1229,7 +1372,7 @@ vector<docstring> const BiblioInfo::getKeys() const
         vector<docstring> bibkeys;
         for (auto const & bi : *this)
                 bibkeys.push_back(bi.first);
-       sort(bibkeys.begin(), bibkeys.end(), compareNoCase());
+       sort(bibkeys.begin(), bibkeys.end(), &compareNoCase);
         return bibkeys;
  }
  
@@ -1273,6 +1416,15 @@ docstring const BiblioInfo::getCiteNumber(docstring const & key) const
         return data.citeNumber();
  }
  
+void BiblioInfo::getLocators(docstring const & key, docstring & doi, docstring & url, docstring & file) const
+{
+       BiblioInfo::const_iterator it = find(key);
+        if (it == end())
+               return;
+       BibTeXInfo const & data = it->second;
+       data.getLocators(doi,url,file);
+}
+
  
  docstring const BiblioInfo::getYear(docstring const & key, bool use_modifier) const
  {
@@ -1314,11 +1466,11 @@ docstring const BiblioInfo::getYear(docstring const & key, Buffer const & buf, b
  
  
  docstring const BiblioInfo::getInfo(docstring const & key,
-       Buffer const & buf, CiteItem const & ci) const
+       Buffer const & buf, CiteItem const & ci, docstring const & format) const
  {
         BiblioInfo::const_iterator it = find(key);
         if (it == end())
-               return docstring(_("Bibliography entry not found!"));
+               return _("Bibliography entry not found!");
         BibTeXInfo const & data = it->second;
         BibTeXInfoList xrefptrs;
         for (docstring const & xref : getXRefs(data)) {
@@ -1326,7 +1478,7 @@ docstring const BiblioInfo::getInfo(docstring const & key,
                 if (xrefit != end())
                         xrefptrs.push_back(&(xrefit->second));
         }
-       return data.getInfo(xrefptrs, buf, ci);
+       return data.getInfo(xrefptrs, buf, ci, format);
  }
  
  
@@ -1449,7 +1601,7 @@ void BiblioInfo::collectCitedEntries(Buffer const & buf)
         // FIXME We may want to collect these differently, in the first case,
         // so that we might have them in order of appearance.
         set<docstring> citekeys;
-       Toc const toc = *buf.tocBackend().toc("citation");
+       Toc const & toc = *buf.tocBackend().toc("citation");
         for (auto const & t : toc) {
                 if (t.str().empty())
                         continue;
@@ -1595,4 +1747,87 @@ string citationStyleToString(const CitationStyle & cs, bool const latex)
         return cmd;
  }
  
+
+void authorsToDocBookAuthorGroup(docstring const & authorsString, XMLStream & xs, Buffer const & buf,
+                                 const std::string type)
+{
+       // This function closely mimics getAuthorList, but produces DocBook instead of text.
+       // It has been greatly simplified, as the complete list of authors is always produced. No separators are required,
+       // as the output has a database-like shape.
+       // constructName has also been merged within, as it becomes really simple and leads to no copy-paste.
+
+       if (! type.empty() && (type != "author" && type != "book")) {
+               LYXERR0("ERROR! Unexpected author contribution `" << type <<"'.");
+               return;
+       }
+
+       if (authorsString.empty()) {
+               return;
+       }
+
+       // Split the input list of authors into individual authors.
+       vector<docstring> const authors = getAuthors(authorsString);
+
+       // Retrieve the "et al." variation.
+       string const etal = buf.params().documentClass().getCiteMacro(buf.params().citeEngineType(), "_etal");
+
+       // Output the list of authors.
+       xs << xml::StartTag("authorgroup");
+       xs << xml::CR();
+
+       auto it = authors.cbegin();
+       auto en = authors.cend();
+       for (size_t i = 0; it != en; ++it, ++i) {
+               const std::string tag = (type.empty() || type == "author") ? "author" : "othercredit";
+               const std::string attr = (type == "book") ? R"(class="other" otherclass="bookauthor")" : "";
+
+               xs << xml::StartTag(tag, attr);
+               xs << xml::CR();
+               xs << xml::StartTag("personname");
+               xs << xml::CR();
+               const docstring name = *it;
+
+               // All authors go in a <personname>. If more structure is known, use it; otherwise (just "et al."),
+               // print it as such.
+               if (name == "others") {
+                       xs << buf.B_(etal);
+               } else {
+                       name_parts parts = nameParts(name);
+                       if (! parts.prefix.empty()) {
+                               xs << xml::StartTag("honorific");
+                               xs << parts.prefix;
+                               xs << xml::EndTag("honorific");
+                               xs << xml::CR();
+                       }
+                       if (! parts.prename.empty()) {
+                               xs << xml::StartTag("firstname");
+                               xs << parts.prename;
+                               xs << xml::EndTag("firstname");
+                               xs << xml::CR();
+                       }
+                       if (! parts.surname.empty()) {
+                               xs << xml::StartTag("surname");
+                               xs << parts.surname;
+                               xs << xml::EndTag("surname");
+                               xs << xml::CR();
+                       }
+                       if (! parts.suffix.empty()) {
+                               xs << xml::StartTag("othername", "role=\"suffix\"");
+                               xs << parts.suffix;
+                               xs << xml::EndTag("othername");
+                               xs << xml::CR();
+                       }
+               }
+
+               xs << xml::EndTag("personname");
+               xs << xml::CR();
+               xs << xml::EndTag(tag);
+               xs << xml::CR();
+
+               // Could add an affiliation after <personname>, but not stored in BibTeX.
+       }
+       xs << xml::EndTag("authorgroup");
+       xs << xml::CR();
+}
+
  } // namespace lyx