New attempt on #9906: allow following hyperlinks via context menu.

[lyx.git] / src / insets / InsetBibtex.cpp
diff --git a/src/insets/InsetBibtex.cpp b/src/insets/InsetBibtex.cpp

index c4bf8721c1f5960eb22daffaab9836cddc18c6ba..876adea54b1770ad15a140a3a8d50eb2890edff5 100644 (file)
--- a/src/insets/InsetBibtex.cpp
+++ b/src/insets/InsetBibtex.cpp
@@ -27,7 +27,7 @@
  #include "FuncStatus.h"
  #include "LaTeXFeatures.h"
  #include "output_latex.h"
-#include "output_xhtml.h"
+#include "xml.h"
  #include "OutputParams.h"
  #include "PDFOptions.h"
  #include "texstream.h"
@@ -43,6 +43,7 @@
  #include "support/ExceptionMessage.h"
  #include "support/FileNameList.h"
  #include "support/filetools.h"
+#include "support/regex.h"
  #include "support/gettext.h"
  #include "support/lstrings.h"
  #include "support/os.h"
@@ -50,6 +51,10 @@
  #include "support/textutils.h"
  
  #include <limits>
+#include <map>
+#include <utility>
+
+#include <iostream>
  
  using namespace std;
  using namespace lyx::support;
@@ -85,7 +90,7 @@ void InsetBibtex::doDispatch(Cursor & cur, FuncRequest & cmd)
         switch (cmd.action()) {
  
         case LFUN_INSET_EDIT:
-               editDatabases();
+               editDatabases(cmd.argument());
                 break;
  
         case LFUN_INSET_MODIFY: {
@@ -132,15 +137,15 @@ bool InsetBibtex::getStatus(Cursor & cur, FuncRequest const & cmd,
  }
  
  
-void InsetBibtex::editDatabases() const
+void InsetBibtex::editDatabases(docstring const & db) const
  {
         vector<docstring> bibfilelist = getVectorFromString(getParam("bibfiles"));
  
         if (bibfilelist.empty())
                 return;
  
-       int nr_databases = bibfilelist.size();
-       if (nr_databases > 1) {
+       size_t nr_databases = bibfilelist.size();
+       if (nr_databases > 1 && db.empty()) {
                         docstring const engine = usingBiblatex() ? _("Biblatex") : _("BibTeX");
                         docstring message = bformat(_("The %1$s[[BibTeX/Biblatex]] inset includes %2$s databases.\n"
                                                        "If you proceed, all of them will be opened."),
@@ -155,6 +160,8 @@ void InsetBibtex::editDatabases() const
         vector<docstring>::const_iterator it = bibfilelist.begin();
         vector<docstring>::const_iterator en = bibfilelist.end();
         for (; it != en; ++it) {
+               if (!db.empty() && db != *it)
+                       continue;
                 FileName const bibfile = buffer().getBibfilePath(*it);
                 theFormats().edit(buffer(), bibfile,
                      theFormats().getFormatFromFile(bibfile));
@@ -264,6 +271,15 @@ void InsetBibtex::latex(otexstream & os, OutputParams const & runparams) const
             && buffer().params().multibib == "child")
                 return;
  
+       if (runparams.inDeletedInset) {
+               // We cannot strike-out bibligraphies,
+               // so we just output a note.
+               os << "\\textbf{"
+                  << buffer().B_("[BIBLIOGRAPHY DELETED!]")
+                  << "}";
+               return;
+       }
+
         string style = to_utf8(getParam("options")); // maybe empty! and with bibtotoc
         string bibtotoc;
         if (prefixIs(style, "bibtotoc")) {
@@ -361,7 +377,8 @@ void InsetBibtex::latex(otexstream & os, OutputParams const & runparams) const
                            << "\\end{btSect}\n";
                 }
                 // bibtotoc option
-               if (!bibtotoc.empty() && !buffer().masterParams().useBibtopic()) {
+               if (!bibtotoc.empty() && !buffer().masterParams().useBibtopic()
+                   && !buffer().masterParams().documentClass().bibInToc()) {
                         // set label for hyperref, see http://www.lyx.org/trac/ticket/6470
                         if (buffer().masterParams().pdfoptions().use_hyperref)
                                         os << "\\phantomsection";
@@ -441,7 +458,7 @@ namespace {
                 keepCase
         };
  
-       /// remove whitespace characters, read characer sequence
+       /// remove whitespace characters, read character sequence
         /// not containing whitespace characters or characters in
         /// delimChars, and remove further whitespace characters.
         ///
@@ -498,7 +515,7 @@ namespace {
         /// read subsequent bibtex values that are delimited with a #-character.
         /// Concatenate all parts and replace names with the associated string in
         /// the variable strings.
-       /// @return true if reading was successfull (all single parts were delimited
+       /// @return true if reading was successful (all single parts were delimited
         /// correctly)
         bool readValue(docstring & val, ifdocstream & ifs, const VarMap & strings) {
  
@@ -676,8 +693,11 @@ void InsetBibtex::parseBibTeXFiles(FileNameList & checkedFiles) const
                         // record that we check this.
                         checkedFiles.push_back(bibfile);
                 string encoding = buffer().masterParams().encoding().iconvName();
-               string const ienc = to_ascii(params()["encoding"]);
-               if (!ienc.empty() && ienc != "default" && encodings.fromLyXName(ienc))
+               string ienc = buffer().masterParams().bibFileEncoding(to_utf8(bf));
+               if (ienc.empty() || ienc == "general")
+                       ienc = to_ascii(params()["encoding"]);
+
+               if (!ienc.empty() && ienc != "auto-legacy-plain" && ienc != "auto-legacy" && encodings.fromLyXName(ienc))
                         encoding = encodings.fromLyXName(ienc)->iconvName();
                 ifdocstream ifs(bibfile.toFilesystemEncoding().c_str(),
                         ios_base::in, encoding);
@@ -893,7 +913,7 @@ void InsetBibtex::validate(LaTeXFeatures & features) const
  }
  
  
-void InsetBibtex::updateBuffer(ParIterator const &, UpdateType)
+void InsetBibtex::updateBuffer(ParIterator const &, UpdateType, bool const /*deleted*/)
  {
         buffer().registerBibfiles(getBibFiles());
         // record encoding of bib files for biblatex
@@ -946,9 +966,12 @@ void InsetBibtex::addToToc(DocIterator const & cpit, bool output_active,
                 return;
  
         docstring const str = getRefLabel();
-       TocBuilder & b = backend.builder("tableofcontents");
-       b.pushItem(cpit, str, output_active);
-       b.pop();
+       shared_ptr<Toc> toc = backend.toc("tableofcontents");
+       // Assign to appropriate level
+       int const item_depth =
+               (buffer().masterParams().documentClass().hasLaTeXLayout("chapter")) 
+                       ? 1 : 2;
+       toc->push_back(TocItem(cpit, item_depth, str, output_active));
  }
  
  
@@ -990,18 +1013,18 @@ int InsetBibtex::plaintext(odocstringstream & os,
                 refoutput += "[" + entry.label() + "] ";
                 // FIXME Right now, we are calling BibInfo::getInfo on the key,
                 // which will give us all the cross-referenced info. But for every
-               // entry, so there's a lot of repitition. This should be fixed.
+               // entry, so there's a lot of repetition. This should be fixed.
                 refoutput += bibinfo.getInfo(entry.key(), buffer(), ci) + "\n\n";
         }
         os << refoutput;
-       return refoutput.size();
+       return int(refoutput.size());
  }
  
  
  // FIXME
  // docstring InsetBibtex::entriesAsXHTML(vector<docstring> const & entries)
  // And then here just: entriesAsXHTML(buffer().masterBibInfo().citedEntries())
-docstring InsetBibtex::xhtml(XHTMLStream & xs, OutputParams const &) const
+docstring InsetBibtex::xhtml(XMLStream & xs, OutputParams const &) const
  {
         BiblioInfo const & bibinfo = buffer().masterBibInfo();
         bool const all_entries = getParam("btprint") == "btPrintAll";
@@ -1016,10 +1039,10 @@ docstring InsetBibtex::xhtml(XHTMLStream & xs, OutputParams const &) const
         ci.richtext = true;
         ci.max_key_size = UINT_MAX;
  
-       xs << html::StartTag("h2", "class='bibtex'")
+       xs << xml::StartTag("h2", "class='bibtex'")
                 << reflabel
-               << html::EndTag("h2")
-               << html::StartTag("div", "class='bibtex'");
+               << xml::EndTag("h2")
+               << xml::StartTag("div", "class='bibtex'");
  
         // Now we loop over the entries
         vector<docstring>::const_iterator vit = cites.begin();
@@ -1031,31 +1054,365 @@ docstring InsetBibtex::xhtml(XHTMLStream & xs, OutputParams const &) const
  
                 BibTeXInfo const & entry = biit->second;
                 string const attr = "class='bibtexentry' id='LyXCite-"
-                   + to_utf8(html::cleanAttr(entry.key())) + "'";
-               xs << html::StartTag("div", attr);
+                   + to_utf8(xml::cleanAttr(entry.key())) + "'";
+               xs << xml::StartTag("div", attr);
  
                 // don't print labels if we're outputting all entries
                 if (!all_entries) {
-                       xs << html::StartTag("span", "class='bibtexlabel'")
+                       xs << xml::StartTag("span", "class='bibtexlabel'")
                                 << entry.label()
-                               << html::EndTag("span");
+                               << xml::EndTag("span");
                 }
  
                 // FIXME Right now, we are calling BibInfo::getInfo on the key,
                 // which will give us all the cross-referenced info. But for every
-               // entry, so there's a lot of repitition. This should be fixed.
-               xs << html::StartTag("span", "class='bibtexinfo'")
-                  << XHTMLStream::ESCAPE_AND
+               // entry, so there's a lot of repetition. This should be fixed.
+               xs << xml::StartTag("span", "class='bibtexinfo'")
+                  << XMLStream::ESCAPE_AND
                    << bibinfo.getInfo(entry.key(), buffer(), ci)
-                  << html::EndTag("span")
-                  << html::EndTag("div")
-                  << html::CR();
+                  << xml::EndTag("span")
+                  << xml::EndTag("div")
+                  << xml::CR();
         }
-       xs << html::EndTag("div");
+       xs << xml::EndTag("div");
         return docstring();
  }
  
  
+void InsetBibtex::docbook(XMLStream & xs, OutputParams const &) const
+{
+       BiblioInfo const & bibinfo = buffer().masterBibInfo();
+       bool const all_entries = getParam("btprint") == "btPrintAll";
+       vector<docstring> const & cites =
+                       all_entries ? bibinfo.getKeys() : bibinfo.citedEntries();
+
+       docstring const reflabel = buffer().B_("References");
+
+       // Tell BiblioInfo our purpose (i.e. generate HTML rich text).
+       CiteItem ci;
+       ci.context = CiteItem::Export;
+       ci.richtext = true;
+       ci.max_key_size = UINT_MAX;
+
+       // Header for bibliography (title required).
+       xs << xml::StartTag("bibliography");
+       xs << xml::CR();
+       xs << xml::StartTag("title");
+       xs << reflabel;
+       xs << xml::EndTag("title") << xml::CR();
+
+       // Translation between keys in each entry and DocBook tags.
+       // IDs for publications; list: http://tdg.docbook.org/tdg/5.2/biblioid.html.
+       vector<pair<string, string>> biblioId = { // <bibtex, docbook>
+               make_pair("doi", "doi"),
+               make_pair("isbn", "isbn"),
+               make_pair("issn", "issn"),
+               make_pair("isrn", "isrn"),
+               make_pair("istc", "istc"),
+               make_pair("lccn", "libraryofcongress"),
+               make_pair("number", "pubsnumber"),
+               make_pair("url", "uri")
+       };
+       // Relations between documents.
+       vector<pair<string, string>> relations = { // <bibtex, docbook biblioset relation>
+               make_pair("journal", "journal"),
+               make_pair("booktitle", "book"),
+               make_pair("series", "series")
+       };
+       // Various things that do not fit DocBook.
+       vector<string> misc = { "language", "school", "note" };
+
+       // Store the mapping between BibTeX and DocBook.
+       map<string, string> toDocBookTag;
+       toDocBookTag["fullnames:author"] = "SPECIFIC"; // No direct translation to DocBook: <authorgroup>.
+       toDocBookTag["publisher"] = "SPECIFIC"; // No direct translation to DocBook: <publisher>.
+       toDocBookTag["address"] = "SPECIFIC"; // No direct translation to DocBook: <publisher>.
+       toDocBookTag["editor"] = "editor";
+       toDocBookTag["institution"] = "SPECIFIC"; // No direct translation to DocBook: <org>.
+
+       toDocBookTag["title"] = "title";
+       toDocBookTag["volume"] = "volumenum";
+       toDocBookTag["edition"] = "edition";
+       toDocBookTag["pages"] = "artpagenums";
+
+       toDocBookTag["abstract"] = "SPECIFIC"; // No direct translation to DocBook: <abstract>.
+       toDocBookTag["keywords"] = "SPECIFIC"; // No direct translation to DocBook: <keywordset>.
+       toDocBookTag["year"] = "SPECIFIC"; // No direct translation to DocBook: <pubdate>.
+       toDocBookTag["month"] = "SPECIFIC"; // No direct translation to DocBook: <pubdate>.
+
+       toDocBookTag["journal"] = "SPECIFIC"; // No direct translation to DocBook: <biblioset>.
+       toDocBookTag["booktitle"] = "SPECIFIC"; // No direct translation to DocBook: <biblioset>.
+       toDocBookTag["series"] = "SPECIFIC"; // No direct translation to DocBook: <biblioset>.
+
+       for (auto const & id: biblioId)
+           toDocBookTag[id.first] = "SPECIFIC"; // No direct translation to DocBook: <biblioid>.
+       for (auto const & id: relations)
+           toDocBookTag[id.first] = "SPECIFIC"; // No direct translation to DocBook: <biblioset>.
+       for (auto const & id: misc)
+           toDocBookTag[id] = "SPECIFIC"; // No direct translation to DocBook: <bibliomisc>.
+
+       // Loop over the entries. If there are no entries, add a comment to say so.
+       auto vit = cites.begin();
+       auto ven = cites.end();
+
+       if (vit == ven) {
+               xs << XMLStream::ESCAPE_NONE << "<!-- No entry in the bibliography. -->";
+       }
+
+       for (; vit != ven; ++vit) {
+               BiblioInfo::const_iterator const biit = bibinfo.find(*vit);
+               if (biit == bibinfo.end())
+                       continue;
+
+               BibTeXInfo const & entry = biit->second;
+               string const attr = "xml:id=\"" + to_utf8(xml::cleanID(entry.key())) + "\"";
+               xs << xml::StartTag("biblioentry", attr);
+               xs << xml::CR();
+
+               // FIXME Right now, we are calling BibInfo::getInfo on the key,
+               // which will give us all the cross-referenced info. But for every
+               // entry, so there's a lot of repetition. This should be fixed.
+
+               // Parse the results of getInfo and emit the corresponding DocBook tags. Interesting pieces have the form
+               // "<span class="bib-STH">STH</span>", the rest of the text may be discarded.
+               // Could have written a DocBook version of expandFormat (that parses a citation into HTML), but it implements
+               // some kind of recursion. Still, a (static) conversion step between the citation format and DocBook would have
+               // been required. All in all, both codes approaches would have been similar, but this parsing allows relying
+               // on existing building blocks.
+
+               string html = to_utf8(bibinfo.getInfo(entry.key(), buffer(), ci));
+               regex tagRegex("<span class=\"bib-([^\"]*)\">([^<]*)</span>");
+               smatch match;
+               auto tagIt = lyx::sregex_iterator(html.cbegin(), html.cend(), tagRegex, regex_constants::match_default);
+               auto tagEnd = lyx::sregex_iterator();
+               map<string, string> delayedTags;
+
+               // Read all tags from HTML and convert those that have a 1:1 matching.
+               while (tagIt != tagEnd) {
+                       string tag = tagIt->str(); // regex_match cannot work with temporary strings.
+                       ++tagIt;
+
+                       if (regex_match(tag, match, tagRegex)) {
+                               if (toDocBookTag[match[1]] == "SPECIFIC") {
+                                       delayedTags[match[1]] = match[2];
+                               } else {
+                                       xs << xml::StartTag(toDocBookTag[match[1]]);
+                                       xs << from_utf8(match[2].str());
+                                       xs << xml::EndTag(toDocBookTag[match[1]]);
+                               }
+                       } else {
+                               LYXERR0("The BibTeX field " << match[1].str() << " is unknown.");
+                               xs << XMLStream::ESCAPE_NONE << from_utf8("<!-- Output Error: The BibTeX field " + match[1].str() + " is unknown -->\n");
+                       }
+               }
+
+               // Type of document (book, journal paper, etc.).
+               xs << xml::StartTag("bibliomisc", "role=\"type\"");
+               xs << entry.entryType();
+               xs << xml::EndTag("bibliomisc");
+               xs << xml::CR();
+
+               // Handle tags that have complex transformations.
+               if (! delayedTags.empty()) {
+                       unsigned long remainingTags = delayedTags.size(); // Used as a workaround. With GCC 7, when erasing all
+                       // elements one by one, some elements may still pop in later on (even though they were deleted previously).
+                       auto hasTag = [&delayedTags](string key) { return delayedTags.find(key) != delayedTags.end(); };
+                       auto getTag = [&delayedTags](string key) { return from_utf8(delayedTags[key]); };
+                       auto eraseTag = [&delayedTags, &remainingTags](string key) {
+                               remainingTags -= 1;
+                               delayedTags.erase(key);
+                       };
+
+                       // Notes on order of checks.
+                       // - address goes with publisher if there is one, so check this first. Otherwise, the address goes with
+                       //   the entry without other details.
+
+                       // <publisher>
+                       if (hasTag("publisher")) {
+                               xs << xml::StartTag("publisher");
+                               xs << xml::CR();
+                               xs << xml::StartTag("publishername");
+                               xs << getTag("publisher");
+                               xs << xml::EndTag("publishername");
+                               xs << xml::CR();
+
+                               if (hasTag("address")) {
+                                       xs << xml::StartTag("address");
+                                       xs << getTag("address");
+                                       xs << xml::EndTag("address");
+                                       eraseTag("address");
+                               }
+
+                               xs << xml::EndTag("publisher");
+                               xs << xml::CR();
+                               eraseTag("publisher");
+                       }
+
+                       if (hasTag("address")) {
+                               xs << xml::StartTag("address");
+                               xs << getTag("address");
+                               xs << xml::EndTag("address");
+                               eraseTag("address");
+                       }
+
+                       // <keywordset>
+                       if (hasTag("keywords")) {
+                               // Split the keywords on comma.
+                               docstring keywordSet = getTag("keywords");
+                               vector<docstring> keywords;
+                               if (keywordSet.find(from_utf8(",")) == string::npos) {
+                                       keywords = { keywordSet };
+                               } else {
+                                       size_t pos = 0;
+                                       while ((pos = keywordSet.find(from_utf8(","))) != string::npos) {
+                                               keywords.push_back(keywordSet.substr(0, pos));
+                                               keywordSet.erase(0, pos + 1);
+                                       }
+                                       keywords.push_back(keywordSet);
+                               }
+
+                               xs << xml::StartTag("keywordset") << xml::CR();
+                               for (auto & kw: keywords) {
+                                       kw.erase(kw.begin(), std::find_if(kw.begin(), kw.end(),
+                                                                         [](int c) {return !std::isspace(c);}));
+                                       xs << xml::StartTag("keyword");
+                                       xs << kw;
+                                       xs << xml::EndTag("keyword");
+                                       xs << xml::CR();
+                               }
+                               xs << xml::EndTag("keywordset") << xml::CR();
+                               eraseTag("keywords");
+                       }
+
+                       // <copyright>
+                       // Example: http://tdg.docbook.org/tdg/5.1/biblioset.html
+                       if (hasTag("year")) {
+                               docstring value = getTag("year");
+                               eraseTag("year");
+
+                               // Follow xsd:gYearMonth format (http://books.xmlschemata.org/relaxng/ch19-77135.html).
+                               if (hasTag("month")) {
+                                       value += "-" + getTag("month");
+                                       eraseTag("month");
+                               }
+
+                               xs << xml::StartTag("pubdate");
+                               xs << value;
+                               xs << xml::EndTag("pubdate");
+                               xs << xml::CR();
+                       }
+
+                       // <institution>
+                       if (hasTag("institution")) {
+                               xs << xml::StartTag("org");
+                               xs << xml::CR();
+                               xs << xml::StartTag("orgname");
+                               xs << getTag("institution");
+                               xs << xml::EndTag("orgname");
+                               xs << xml::CR();
+                               xs << xml::EndTag("org");
+                               xs << xml::CR();
+                               eraseTag("institution");
+                       }
+
+                       // <biblioset>
+                       // Example: http://tdg.docbook.org/tdg/5.1/biblioset.html
+                       for (auto const & id: relations) {
+                               if (hasTag(id.first)) {
+                                       xs << xml::StartTag("biblioset", "relation=\"" + id.second + "\"");
+                                       xs << xml::CR();
+                                       xs << xml::StartTag("title");
+                                       xs << getTag(id.first);
+                                       xs << xml::EndTag("title");
+                                       xs << xml::CR();
+                                       xs << xml::EndTag("biblioset");
+                                       xs << xml::CR();
+                                       eraseTag(id.first);
+                               }
+                       }
+
+                       // <authorgroup>
+                       // Example: http://tdg.docbook.org/tdg/5.1/authorgroup.html
+                       if (hasTag("fullnames:author")) {
+                               // Perform full parsing of the BibTeX string, dealing with the many corner cases that might
+                               // be encountered.
+                               authorsToDocBookAuthorGroup(getTag("fullnames:author"), xs, buffer());
+                               eraseTag("fullnames:author");
+                       }
+
+                       // <abstract>
+                       if (hasTag("abstract")) {
+                               // Split the paragraphs on new line.
+                               docstring abstract = getTag("abstract");
+                               vector<docstring> paragraphs;
+                               if (abstract.find(from_utf8("\n")) == string::npos) {
+                                       paragraphs = { abstract };
+                               } else {
+                                       size_t pos = 0;
+                                       while ((pos = abstract.find(from_utf8(","))) != string::npos) {
+                                               paragraphs.push_back(abstract.substr(0, pos));
+                                               abstract.erase(0, pos + 1);
+                                       }
+                                       paragraphs.push_back(abstract);
+                               }
+
+                               xs << xml::StartTag("abstract");
+                               xs << xml::CR();
+                               for (auto const & para: paragraphs) {
+                                       if (para.empty())
+                                               continue;
+                                       xs << xml::StartTag("para");
+                                       xs << para;
+                                       xs << xml::EndTag("para");
+                               }
+                               xs << xml::CR();
+                               xs << xml::EndTag("abstract");
+                               xs << xml::CR();
+                               eraseTag("abstract");
+                       }
+
+                       // <biblioid>
+                       for (auto const & id: biblioId) {
+                               if (hasTag(id.first)) {
+                                       xs << xml::StartTag("biblioid", "class=\"" + id.second + "\"");
+                                       xs << getTag(id.first);
+                                       xs << xml::EndTag("biblioid");
+                                       xs << xml::CR();
+                                       eraseTag(id.first);
+                               }
+                       }
+
+                       // <bibliomisc>
+                       for (auto const & id: misc) {
+                               if (hasTag(id)) {
+                                       xs << xml::StartTag("bibliomisc", "role=\"" + id + "\"");
+                                       xs << getTag(id);
+                                       xs << xml::EndTag("bibliomisc");
+                                       xs << xml::CR();
+                                       eraseTag(id);
+                               }
+                       }
+
+                       // After all tags are processed, check for errors.
+                       if (remainingTags > 0) {
+                               LYXERR0("Still delayed tags not yet handled.");
+                               xs << XMLStream::ESCAPE_NONE << from_utf8("<!-- Output Error: still delayed tags not yet handled.\n");
+                               for (auto const & item: delayedTags) {
+                                       xs << from_utf8(" " + item.first + ": " + item.second + "\n");
+                               }
+                               xs << XMLStream::ESCAPE_NONE << from_utf8(" -->\n");
+                       }
+               }
+
+               xs << xml::EndTag("biblioentry");
+               xs << xml::CR();
+       }
+
+       // Footer for bibliography.
+       xs << xml::EndTag("bibliography");
+}
+
+
  void InsetBibtex::write(ostream & os) const
  {
         params().Write(os, &buffer());