]> git.lyx.org Git - lyx.git/blobdiff - src/BiblioInfo.cpp
Update Win installer for new dictionary links. Untested.
[lyx.git] / src / BiblioInfo.cpp
index 50d8abfd22babb64f6d64bf8cbdc4460c4e29011..253fb3759cbb7d50a30d9d6c701dd62330c18530 100644 (file)
@@ -5,7 +5,7 @@
  *
  * \author Angus Leeming
  * \author Herbert Voß
- * \author Richard Heck
+ * \author Richard Kimberly Heck
  * \author Julien Rioux
  * \author Jürgen Spitzmüller
  *
 #include <config.h>
 
 #include "BiblioInfo.h"
+
 #include "Buffer.h"
 #include "BufferParams.h"
-#include "buffer_funcs.h"
 #include "Citation.h"
 #include "Encoding.h"
-#include "InsetIterator.h"
 #include "Language.h"
-#include "output_xhtml.h"
-#include "Paragraph.h"
 #include "TextClass.h"
 #include "TocBackend.h"
+#include "xml.h"
 
 #include "support/convert.h"
 #include "support/debug.h"
 #include "support/docstream.h"
+#include "support/FileName.h"
 #include "support/gettext.h"
 #include "support/lassert.h"
 #include "support/lstrings.h"
-#include "support/regex.h"
 #include "support/textutils.h"
 
 #include <map>
+#include <regex>
 #include <set>
 
 using namespace std;
@@ -278,8 +277,14 @@ vector<docstring> const getAuthors(docstring const & author)
        // in author names, but can happen (consider cases such as "C \& A Corp.").
        docstring iname = subst(author, from_ascii("&"), from_ascii("$$amp!"));
        // Then, we temporarily make all " and " strings to ampersands in order
-       // to handle them later on a per-char level.
-       iname = subst(iname, from_ascii(" and "), from_ascii(" & "));
+       // to handle them later on a per-char level. Note that arbitrary casing
+       // ("And", "AND", "aNd", ...) is allowed in bibtex (#10465).
+       static regex const and_reg("(.* )([aA][nN][dD])( .*)");
+       smatch sub;
+       string res = to_utf8(iname);
+       while (regex_match(res, sub, and_reg))
+               res = sub.str(1) + "&" + sub.str(3);
+       iname = from_utf8(res);
        // Now we traverse through the string and replace the "&" by the proper
        // output in- and outside groups
        docstring name;
@@ -328,6 +333,7 @@ docstring convertLaTeXCommands(docstring const & str)
 
        bool scanning_cmd = false;
        bool scanning_math = false;
+       bool is_section = false;
        bool escaped = false; // used to catch \$, etc.
        while (!val.empty()) {
                char_type const ch = val[0];
@@ -350,13 +356,24 @@ docstring convertLaTeXCommands(docstring const & str)
                // discard characters until we hit something that
                // isn't alpha.
                if (scanning_cmd) {
+                       if (!is_section && ch == 'S') {
+                               is_section = true;
+                               val = val.substr(1);
+                               continue;
+                       }
                        if (isAlphaASCII(ch)) {
+                               is_section = false;
                                val = val.substr(1);
                                escaped = false;
                                continue;
+                       } else if (is_section) {
+                               ret.push_back(0x00a7);
+                               is_section = false;
+                               continue;
                        }
                        // so we're done with this command.
                        // now we fall through and check this character.
+                       is_section = false;
                        scanning_cmd = false;
                }
 
@@ -373,6 +390,12 @@ docstring convertLaTeXCommands(docstring const & str)
                        continue;
                }
 
+               if (ch == '~') {
+                       ret += char_type(0x00a0);
+                       val = val.substr(1);
+                       continue;
+               }
+
                if (ch == '$') {
                        ret += ch;
                        val = val.substr(1);
@@ -384,8 +407,8 @@ docstring convertLaTeXCommands(docstring const & str)
                // {\v a} to \v{a} (see #9340).
                // FIXME: This is a sort of mini-tex2lyx.
                //        Use the real tex2lyx instead!
-               static lyx::regex const tma_reg("^\\{\\\\[bcCdfGhHkrtuUv]\\s\\w\\}");
-               if (lyx::regex_search(to_utf8(val), tma_reg)) {
+               static regex const tma_reg("^\\{\\\\[bcCdfGhHkrtuUv]\\s\\w\\}");
+               if (regex_search(to_utf8(val), tma_reg)) {
                        val = val.substr(1);
                        val.replace(2, 1, from_ascii("{"));
                        continue;
@@ -412,8 +435,8 @@ docstring convertLaTeXCommands(docstring const & str)
                // look for that and change it, if necessary.
                // FIXME: This is a sort of mini-tex2lyx.
                //        Use the real tex2lyx instead!
-               static lyx::regex const reg("^\\\\\\W\\w");
-               if (lyx::regex_search(to_utf8(val), reg)) {
+               static regex const reg("^\\\\\\W\\w");
+               if (regex_search(to_utf8(val), reg)) {
                        val.insert(3, from_ascii("}"));
                        val.insert(2, from_ascii("{"));
                }
@@ -488,8 +511,8 @@ docstring processRichtext(docstring const & str, bool richtext)
 //////////////////////////////////////////////////////////////////////
 
 BibTeXInfo::BibTeXInfo(docstring const & key, docstring const & type)
-       : is_bibtex_(true), bib_key_(key), num_bib_key_(0), entry_type_(type), info_(),
-         modifier_(0)
+       : is_bibtex_(true), bib_key_(key), num_bib_key_(0), entry_type_(type),
+         info_(), format_(), modifier_(0)
 {}
 
 
@@ -651,6 +674,97 @@ docstring const BibTeXInfo::getYear() const
 }
 
 
+void BibTeXInfo::getLocators(docstring & doi, docstring & url, docstring & file) const
+{
+       if (is_bibtex_) {
+               // get "doi" entry from citation record
+               doi = operator[]("doi");
+               if (!doi.empty() && !prefixIs(doi,from_ascii("http")))
+                       doi = "https://doi.org/" + doi;
+               // get "url" entry from citation record
+               url = operator[]("url");
+               // get "file" entry from citation record
+               file = operator[]("file");
+
+               // Jabref case, "file" field has a format (depending on exporter):
+               // Description:Location:Filetype;Description:Location:Filetype...
+               // or simply:
+               // Location;Location;...
+               // We will strip out the locations and return an \n-separated list
+               if (!file.empty()) {
+                       docstring filelist;
+                       vector<docstring> files = getVectorFromString(file, from_ascii(";"));
+                       for (auto const & f : files) {
+                               // first try if we have Description:Location:Filetype
+                               docstring ret, filedest, tmp;
+                               ret = split(f, tmp, ':');
+                               tmp = split(ret, filedest, ':');
+                               if (filedest.empty())
+                                       // we haven't, so use the whole string
+                                       filedest = f;
+                               // TODO howto deal with relative directories?
+                               FileName fn(to_utf8(filedest));
+                               if (fn.exists()) {
+                                       if (!filelist.empty())
+                                               filelist += '\n';
+                                       filelist += "file:///" + filedest;
+                               }
+                       }
+                       if (!filelist.empty())
+                               file = filelist;
+               }
+
+               // kbibtex case, "localfile" field with format:
+               // file1.pdf;file2.pdf
+               // We will strip out the locations and return an \n-separated list
+               docstring kfile;
+               if (file.empty())
+                       kfile = operator[]("localfile");
+               if (!kfile.empty()) {
+                       docstring filelist;
+                       vector<docstring> files = getVectorFromString(kfile, from_ascii(";"));
+                       for (auto const & f : files) {
+                               // TODO howto deal with relative directories?
+                               FileName fn(to_utf8(f));
+                               if (fn.exists()) {
+                                       if (!filelist.empty())
+                                               filelist += '\n';
+                                       filelist = "file:///" + f;
+                               }
+                       }
+                       if (!filelist.empty())
+                               file = filelist;
+               }
+
+               if (!url.empty())
+                       return;
+
+               // try biblatex specific fields, see its manual
+               // 3.13.7 "Electronic Publishing Informationl"
+               docstring eprinttype = operator[]("eprinttype");
+               docstring eprint = operator[]("eprint");
+               if (eprint.empty())
+                       return;
+
+               if (eprinttype == "arxiv")
+                       url = "https://arxiv.org/abs/" + eprint;
+               if (eprinttype == "jstor")
+                       url = "https://www.jstor.org/stable/" + eprint;
+               if (eprinttype == "pubmed")
+                       url = "http://www.ncbi.nlm.nih.gov/pubmed/" + eprint;
+               if (eprinttype == "hdl")
+                       url = "https://hdl.handle.net/" + eprint;
+               if (eprinttype == "googlebooks")
+                       url = "http://books.google.com/books?id=" + eprint;
+
+               return;
+       }
+
+       // Here can be handled the bibliography environment. All one could do
+       // here is let LyX scan the entry for URL or HRef insets.
+}
+
+
 namespace {
 
 docstring parseOptions(docstring const & format, string & optkey,
@@ -927,12 +1041,27 @@ docstring BibTeXInfo::expandFormat(docstring const & format,
 
 
 docstring const & BibTeXInfo::getInfo(BibTeXInfoList const & xrefs,
-       Buffer const & buf, CiteItem const & ci) const
+       Buffer const & buf, CiteItem const & ci, docstring const & format_in) const
 {
        bool const richtext = ci.richtext;
 
-       if (!richtext && !info_.empty())
+       CiteEngineType const engine_type = buf.params().citeEngineType();
+       DocumentClass const & dc = buf.params().documentClass();
+       docstring const & format = format_in.empty()? 
+                               from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_)))
+                             : format_in;
+
+       if (format != format_) {
+               // clear caches since format changed
+               info_.clear();
+               info_richtext_.clear();
+               format_ = format;
+       }
+
+       if (!richtext && !info_.empty()) {
+               info_ = convertLaTeXCommands(processRichtext(info_, false));
                return info_;
+       }
        if (richtext && !info_richtext_.empty())
                return info_richtext_;
 
@@ -942,10 +1071,6 @@ docstring const & BibTeXInfo::getInfo(BibTeXInfoList const & xrefs,
                return info_;
        }
 
-       CiteEngineType const engine_type = buf.params().citeEngineType();
-       DocumentClass const & dc = buf.params().documentClass();
-       docstring const & format =
-               from_utf8(dc.getCiteFormat(engine_type, to_utf8(entry_type_)));
        int counter = 0;
        info_ = expandFormat(format, xrefs, counter, buf,
                ci, false, false);
@@ -965,7 +1090,7 @@ docstring const & BibTeXInfo::getInfo(BibTeXInfoList const & xrefs,
 }
 
 
-docstring const BibTeXInfo::getLabel(BibTeXInfoList const xrefs,
+docstring const BibTeXInfo::getLabel(BibTeXInfoList const xrefs,
        Buffer const & buf, docstring const & format,
        CiteItem const & ci, bool next, bool second) const
 {
@@ -1012,16 +1137,10 @@ docstring BibTeXInfo::getValueForKey(string const & oldkey, Buffer const & buf,
        }
 
        docstring ret = operator[](key);
-       if (ret.empty() && !xrefs.empty()) {
-               // xr is a (reference to a) BibTeXInfo const *
-               for (auto const & xr : xrefs) {
-                       if (xr && !(*xr)[key].empty()) {
-                               ret = (*xr)[key];
-                               break;
-                       }
-               }
-       }
        if (ret.empty()) {
+               docstring subtype;
+               if (contains(key, ':'))
+                       subtype = from_ascii(token(key, ':', 1));
                // some special keys
                // FIXME: dialog, textbefore and textafter have nothing to do with this
                if (key == "dialog" && ci.context == CiteItem::Dialog)
@@ -1047,7 +1166,7 @@ docstring BibTeXInfo::getValueForKey(string const & oldkey, Buffer const & buf,
                        ret = cite_number_;
                else if (prefixIs(key, "ifmultiple:")) {
                        // Return whether we have multiple authors
-                       docstring const kind = operator[](from_ascii(key.substr(11)));
+                       docstring const kind = operator[](subtype);
                        if (multipleAuthors(kind))
                                ret = from_ascii("x"); // any non-empty string will do
                }
@@ -1055,14 +1174,14 @@ docstring BibTeXInfo::getValueForKey(string const & oldkey, Buffer const & buf,
                        // Special key to provide abbreviated name list,
                        // with respect to maxcitenames. Suitable for Bibliography
                        // beginnings.
-                       docstring const kind = operator[](from_ascii(key.substr(11)));
+                       docstring const kind = operator[](subtype);
                        ret = getAuthorList(&buf, kind, false, false, true);
                        if (ci.forceUpperCase && isLowerCase(ret[0]))
                                ret[0] = uppercase(ret[0]);
                } else if (prefixIs(key, "fullnames:")) {
                        // Return a full name list. Suitable for Bibliography
                        // beginnings.
-                       docstring const kind = operator[](from_ascii(key.substr(10)));
+                       docstring const kind = operator[](subtype);
                        ret = getAuthorList(&buf, kind, true, false, true);
                        if (ci.forceUpperCase && isLowerCase(ret[0]))
                                ret[0] = uppercase(ret[0]);
@@ -1070,7 +1189,7 @@ docstring BibTeXInfo::getValueForKey(string const & oldkey, Buffer const & buf,
                        // Special key to provide abbreviated name lists,
                        // irrespective of maxcitenames. Suitable for Bibliography
                        // beginnings.
-                       docstring const kind = operator[](from_ascii(key.substr(15)));
+                       docstring const kind = operator[](subtype);
                        ret = getAuthorList(&buf, kind, false, true, true);
                        if (ci.forceUpperCase && isLowerCase(ret[0]))
                                ret[0] = uppercase(ret[0]);
@@ -1078,14 +1197,14 @@ docstring BibTeXInfo::getValueForKey(string const & oldkey, Buffer const & buf,
                        // Special key to provide abbreviated name list,
                        // with respect to maxcitenames. Suitable for further names inside a
                        // bibliography item // (such as "ed. by ...")
-                       docstring const kind = operator[](from_ascii(key.substr(11)));
+                       docstring const kind = operator[](subtype);
                        ret = getAuthorList(&buf, kind, false, false, true, false);
                        if (ci.forceUpperCase && isLowerCase(ret[0]))
                                ret[0] = uppercase(ret[0]);
                } else if (prefixIs(key, "fullbynames:")) {
                        // Return a full name list. Suitable for further names inside a
                        // bibliography item // (such as "ed. by ...")
-                       docstring const kind = operator[](from_ascii(key.substr(10)));
+                       docstring const kind = operator[](subtype);
                        ret = getAuthorList(&buf, kind, true, false, true, false);
                        if (ci.forceUpperCase && isLowerCase(ret[0]))
                                ret[0] = uppercase(ret[0]);
@@ -1093,7 +1212,7 @@ docstring BibTeXInfo::getValueForKey(string const & oldkey, Buffer const & buf,
                        // Special key to provide abbreviated name lists,
                        // irrespective of maxcitenames. Suitable for further names inside a
                        // bibliography item // (such as "ed. by ...")
-                       docstring const kind = operator[](from_ascii(key.substr(15)));
+                       docstring const kind = operator[](subtype);
                        ret = getAuthorList(&buf, kind, false, true, true, false);
                        if (ci.forceUpperCase && isLowerCase(ret[0]))
                                ret[0] = uppercase(ret[0]);
@@ -1158,8 +1277,36 @@ docstring BibTeXInfo::getValueForKey(string const & oldkey, Buffer const & buf,
                        ret = getYear();
        }
 
+       // If we have no result, check in the cross-ref'ed entries
+       if (ret.empty() && !xrefs.empty()) {
+               bool const biblatex =
+                       buf.params().documentClass().citeFramework() == "biblatex";
+               // xr is a (reference to a) BibTeXInfo const *
+               for (auto const & xr : xrefs) {
+                       if (!xr)
+                               continue;
+                       // use empty BibTeXInfoList to avoid loops
+                       BibTeXInfoList xr_dummy;
+                       ret = xr->getValueForKey(oldkey, buf, ci, xr_dummy, maxsize);
+                       if (!ret.empty())
+                               // success!
+                               break;
+                       // in biblatex, cross-ref'ed titles are mapped
+                       // to booktitle. Same for subtitle etc.
+                       if (biblatex && prefixIs(key, "book"))
+                               ret = (*xr)[key.substr(4)];
+                       // likewise, author is maped onto bookauthor
+                       else if (biblatex && contains(key, ":bookauthor"))
+                               ret = xr->getValueForKey(subst(key, "bookauthor", "author"),
+                                                        buf, ci, xr_dummy, maxsize);
+                       if (!ret.empty())
+                               // success!
+                               break;
+               }
+       }
+
        if (cleanit)
-               ret = html::cleanAttr(ret);
+               ret = xml::cleanAttr(ret);
 
        // make sure it is not too big
        support::truncateWithEllipsis(ret, maxsize);
@@ -1176,13 +1323,9 @@ docstring BibTeXInfo::getValueForKey(string const & oldkey, Buffer const & buf,
 namespace {
 
 // A functor for use with sort, leading to case insensitive sorting
-class compareNoCase: public binary_function<docstring, docstring, bool>
-{
-public:
-       bool operator()(docstring const & s1, docstring const & s2) const {
-               return compare_no_case(s1, s2) < 0;
-       }
-};
+bool compareNoCase(const docstring & a, const docstring & b) {
+       return compare_no_case(a, b) < 0;
+}
 
 } // namespace
 
@@ -1229,7 +1372,7 @@ vector<docstring> const BiblioInfo::getKeys() const
        vector<docstring> bibkeys;
        for (auto const & bi : *this)
                bibkeys.push_back(bi.first);
-       sort(bibkeys.begin(), bibkeys.end(), compareNoCase());
+       sort(bibkeys.begin(), bibkeys.end(), &compareNoCase);
        return bibkeys;
 }
 
@@ -1273,6 +1416,15 @@ docstring const BiblioInfo::getCiteNumber(docstring const & key) const
        return data.citeNumber();
 }
 
+void BiblioInfo::getLocators(docstring const & key, docstring & doi, docstring & url, docstring & file) const
+{
+       BiblioInfo::const_iterator it = find(key);
+        if (it == end())
+               return;
+       BibTeXInfo const & data = it->second;
+       data.getLocators(doi,url,file);
+}
+
 
 docstring const BiblioInfo::getYear(docstring const & key, bool use_modifier) const
 {
@@ -1314,11 +1466,11 @@ docstring const BiblioInfo::getYear(docstring const & key, Buffer const & buf, b
 
 
 docstring const BiblioInfo::getInfo(docstring const & key,
-       Buffer const & buf, CiteItem const & ci) const
+       Buffer const & buf, CiteItem const & ci, docstring const & format) const
 {
        BiblioInfo::const_iterator it = find(key);
        if (it == end())
-               return docstring(_("Bibliography entry not found!"));
+               return _("Bibliography entry not found!");
        BibTeXInfo const & data = it->second;
        BibTeXInfoList xrefptrs;
        for (docstring const & xref : getXRefs(data)) {
@@ -1326,7 +1478,7 @@ docstring const BiblioInfo::getInfo(docstring const & key,
                if (xrefit != end())
                        xrefptrs.push_back(&(xrefit->second));
        }
-       return data.getInfo(xrefptrs, buf, ci);
+       return data.getInfo(xrefptrs, buf, ci, format);
 }
 
 
@@ -1449,7 +1601,7 @@ void BiblioInfo::collectCitedEntries(Buffer const & buf)
        // FIXME We may want to collect these differently, in the first case,
        // so that we might have them in order of appearance.
        set<docstring> citekeys;
-       Toc const toc = *buf.tocBackend().toc("citation");
+       Toc const toc = *buf.tocBackend().toc("citation");
        for (auto const & t : toc) {
                if (t.str().empty())
                        continue;
@@ -1595,4 +1747,87 @@ string citationStyleToString(const CitationStyle & cs, bool const latex)
        return cmd;
 }
 
+
+void authorsToDocBookAuthorGroup(docstring const & authorsString, XMLStream & xs, Buffer const & buf,
+                                 const std::string type)
+{
+       // This function closely mimics getAuthorList, but produces DocBook instead of text.
+       // It has been greatly simplified, as the complete list of authors is always produced. No separators are required,
+       // as the output has a database-like shape.
+       // constructName has also been merged within, as it becomes really simple and leads to no copy-paste.
+
+       if (! type.empty() && (type != "author" && type != "book")) {
+               LYXERR0("ERROR! Unexpected author contribution `" << type <<"'.");
+               return;
+       }
+
+       if (authorsString.empty()) {
+               return;
+       }
+
+       // Split the input list of authors into individual authors.
+       vector<docstring> const authors = getAuthors(authorsString);
+
+       // Retrieve the "et al." variation.
+       string const etal = buf.params().documentClass().getCiteMacro(buf.params().citeEngineType(), "_etal");
+
+       // Output the list of authors.
+       xs << xml::StartTag("authorgroup");
+       xs << xml::CR();
+
+       auto it = authors.cbegin();
+       auto en = authors.cend();
+       for (size_t i = 0; it != en; ++it, ++i) {
+               const std::string tag = (type.empty() || type == "author") ? "author" : "othercredit";
+               const std::string attr = (type == "book") ? R"(class="other" otherclass="bookauthor")" : "";
+
+               xs << xml::StartTag(tag, attr);
+               xs << xml::CR();
+               xs << xml::StartTag("personname");
+               xs << xml::CR();
+               const docstring name = *it;
+
+               // All authors go in a <personname>. If more structure is known, use it; otherwise (just "et al."),
+               // print it as such.
+               if (name == "others") {
+                       xs << buf.B_(etal);
+               } else {
+                       name_parts parts = nameParts(name);
+                       if (! parts.prefix.empty()) {
+                               xs << xml::StartTag("honorific");
+                               xs << parts.prefix;
+                               xs << xml::EndTag("honorific");
+                               xs << xml::CR();
+                       }
+                       if (! parts.prename.empty()) {
+                               xs << xml::StartTag("firstname");
+                               xs << parts.prename;
+                               xs << xml::EndTag("firstname");
+                               xs << xml::CR();
+                       }
+                       if (! parts.surname.empty()) {
+                               xs << xml::StartTag("surname");
+                               xs << parts.surname;
+                               xs << xml::EndTag("surname");
+                               xs << xml::CR();
+                       }
+                       if (! parts.suffix.empty()) {
+                               xs << xml::StartTag("othername", "role=\"suffix\"");
+                               xs << parts.suffix;
+                               xs << xml::EndTag("othername");
+                               xs << xml::CR();
+                       }
+               }
+
+               xs << xml::EndTag("personname");
+               xs << xml::CR();
+               xs << xml::EndTag(tag);
+               xs << xml::CR();
+
+               // Could add an affiliation after <personname>, but not stored in BibTeX.
+       }
+       xs << xml::EndTag("authorgroup");
+       xs << xml::CR();
+}
+
 } // namespace lyx