X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=src%2Finsets%2FInsetBibtex.cpp;h=876adea54b1770ad15a140a3a8d50eb2890edff5;hb=48b1e8a0aca2f3f3faa8f1f800568e47792ba9a0;hp=e2b88ca1a9673ebfc09912dc3999be4d79f66ec3;hpb=d94b708a0fec6ade04fc9d09bd86afd73e72fac7;p=lyx.git diff --git a/src/insets/InsetBibtex.cpp b/src/insets/InsetBibtex.cpp index e2b88ca1a9..876adea54b 100644 --- a/src/insets/InsetBibtex.cpp +++ b/src/insets/InsetBibtex.cpp @@ -27,7 +27,7 @@ #include "FuncStatus.h" #include "LaTeXFeatures.h" #include "output_latex.h" -#include "output_xhtml.h" +#include "xml.h" #include "OutputParams.h" #include "PDFOptions.h" #include "texstream.h" @@ -43,6 +43,7 @@ #include "support/ExceptionMessage.h" #include "support/FileNameList.h" #include "support/filetools.h" +#include "support/regex.h" #include "support/gettext.h" #include "support/lstrings.h" #include "support/os.h" @@ -50,6 +51,10 @@ #include "support/textutils.h" #include +#include +#include + +#include using namespace std; using namespace lyx::support; @@ -266,6 +271,15 @@ void InsetBibtex::latex(otexstream & os, OutputParams const & runparams) const && buffer().params().multibib == "child") return; + if (runparams.inDeletedInset) { + // We cannot strike-out bibligraphies, + // so we just output a note. + os << "\\textbf{" + << buffer().B_("[BIBLIOGRAPHY DELETED!]") + << "}"; + return; + } + string style = to_utf8(getParam("options")); // maybe empty! and with bibtotoc string bibtotoc; if (prefixIs(style, "bibtotoc")) { @@ -444,7 +458,7 @@ namespace { keepCase }; - /// remove whitespace characters, read characer sequence + /// remove whitespace characters, read character sequence /// not containing whitespace characters or characters in /// delimChars, and remove further whitespace characters. /// @@ -501,7 +515,7 @@ namespace { /// read subsequent bibtex values that are delimited with a #-character. /// Concatenate all parts and replace names with the associated string in /// the variable strings. - /// @return true if reading was successfull (all single parts were delimited + /// @return true if reading was successful (all single parts were delimited /// correctly) bool readValue(docstring & val, ifdocstream & ifs, const VarMap & strings) { @@ -952,9 +966,12 @@ void InsetBibtex::addToToc(DocIterator const & cpit, bool output_active, return; docstring const str = getRefLabel(); - TocBuilder & b = backend.builder("tableofcontents"); - b.pushItem(cpit, str, output_active); - b.pop(); + shared_ptr toc = backend.toc("tableofcontents"); + // Assign to appropriate level + int const item_depth = + (buffer().masterParams().documentClass().hasLaTeXLayout("chapter")) + ? 1 : 2; + toc->push_back(TocItem(cpit, item_depth, str, output_active)); } @@ -996,7 +1013,7 @@ int InsetBibtex::plaintext(odocstringstream & os, refoutput += "[" + entry.label() + "] "; // FIXME Right now, we are calling BibInfo::getInfo on the key, // which will give us all the cross-referenced info. But for every - // entry, so there's a lot of repitition. This should be fixed. + // entry, so there's a lot of repetition. This should be fixed. refoutput += bibinfo.getInfo(entry.key(), buffer(), ci) + "\n\n"; } os << refoutput; @@ -1007,7 +1024,7 @@ int InsetBibtex::plaintext(odocstringstream & os, // FIXME // docstring InsetBibtex::entriesAsXHTML(vector const & entries) // And then here just: entriesAsXHTML(buffer().masterBibInfo().citedEntries()) -docstring InsetBibtex::xhtml(XHTMLStream & xs, OutputParams const &) const +docstring InsetBibtex::xhtml(XMLStream & xs, OutputParams const &) const { BiblioInfo const & bibinfo = buffer().masterBibInfo(); bool const all_entries = getParam("btprint") == "btPrintAll"; @@ -1022,10 +1039,10 @@ docstring InsetBibtex::xhtml(XHTMLStream & xs, OutputParams const &) const ci.richtext = true; ci.max_key_size = UINT_MAX; - xs << html::StartTag("h2", "class='bibtex'") + xs << xml::StartTag("h2", "class='bibtex'") << reflabel - << html::EndTag("h2") - << html::StartTag("div", "class='bibtex'"); + << xml::EndTag("h2") + << xml::StartTag("div", "class='bibtex'"); // Now we loop over the entries vector::const_iterator vit = cites.begin(); @@ -1037,31 +1054,365 @@ docstring InsetBibtex::xhtml(XHTMLStream & xs, OutputParams const &) const BibTeXInfo const & entry = biit->second; string const attr = "class='bibtexentry' id='LyXCite-" - + to_utf8(html::cleanAttr(entry.key())) + "'"; - xs << html::StartTag("div", attr); + + to_utf8(xml::cleanAttr(entry.key())) + "'"; + xs << xml::StartTag("div", attr); // don't print labels if we're outputting all entries if (!all_entries) { - xs << html::StartTag("span", "class='bibtexlabel'") + xs << xml::StartTag("span", "class='bibtexlabel'") << entry.label() - << html::EndTag("span"); + << xml::EndTag("span"); } // FIXME Right now, we are calling BibInfo::getInfo on the key, // which will give us all the cross-referenced info. But for every - // entry, so there's a lot of repitition. This should be fixed. - xs << html::StartTag("span", "class='bibtexinfo'") - << XHTMLStream::ESCAPE_AND + // entry, so there's a lot of repetition. This should be fixed. + xs << xml::StartTag("span", "class='bibtexinfo'") + << XMLStream::ESCAPE_AND << bibinfo.getInfo(entry.key(), buffer(), ci) - << html::EndTag("span") - << html::EndTag("div") - << html::CR(); + << xml::EndTag("span") + << xml::EndTag("div") + << xml::CR(); } - xs << html::EndTag("div"); + xs << xml::EndTag("div"); return docstring(); } +void InsetBibtex::docbook(XMLStream & xs, OutputParams const &) const +{ + BiblioInfo const & bibinfo = buffer().masterBibInfo(); + bool const all_entries = getParam("btprint") == "btPrintAll"; + vector const & cites = + all_entries ? bibinfo.getKeys() : bibinfo.citedEntries(); + + docstring const reflabel = buffer().B_("References"); + + // Tell BiblioInfo our purpose (i.e. generate HTML rich text). + CiteItem ci; + ci.context = CiteItem::Export; + ci.richtext = true; + ci.max_key_size = UINT_MAX; + + // Header for bibliography (title required). + xs << xml::StartTag("bibliography"); + xs << xml::CR(); + xs << xml::StartTag("title"); + xs << reflabel; + xs << xml::EndTag("title") << xml::CR(); + + // Translation between keys in each entry and DocBook tags. + // IDs for publications; list: http://tdg.docbook.org/tdg/5.2/biblioid.html. + vector> biblioId = { // + make_pair("doi", "doi"), + make_pair("isbn", "isbn"), + make_pair("issn", "issn"), + make_pair("isrn", "isrn"), + make_pair("istc", "istc"), + make_pair("lccn", "libraryofcongress"), + make_pair("number", "pubsnumber"), + make_pair("url", "uri") + }; + // Relations between documents. + vector> relations = { // + make_pair("journal", "journal"), + make_pair("booktitle", "book"), + make_pair("series", "series") + }; + // Various things that do not fit DocBook. + vector misc = { "language", "school", "note" }; + + // Store the mapping between BibTeX and DocBook. + map toDocBookTag; + toDocBookTag["fullnames:author"] = "SPECIFIC"; // No direct translation to DocBook: . + toDocBookTag["publisher"] = "SPECIFIC"; // No direct translation to DocBook: . + toDocBookTag["address"] = "SPECIFIC"; // No direct translation to DocBook: . + toDocBookTag["editor"] = "editor"; + toDocBookTag["institution"] = "SPECIFIC"; // No direct translation to DocBook: . + + toDocBookTag["title"] = "title"; + toDocBookTag["volume"] = "volumenum"; + toDocBookTag["edition"] = "edition"; + toDocBookTag["pages"] = "artpagenums"; + + toDocBookTag["abstract"] = "SPECIFIC"; // No direct translation to DocBook: . + toDocBookTag["keywords"] = "SPECIFIC"; // No direct translation to DocBook: . + toDocBookTag["year"] = "SPECIFIC"; // No direct translation to DocBook: . + toDocBookTag["month"] = "SPECIFIC"; // No direct translation to DocBook: . + + toDocBookTag["journal"] = "SPECIFIC"; // No direct translation to DocBook: . + toDocBookTag["booktitle"] = "SPECIFIC"; // No direct translation to DocBook: . + toDocBookTag["series"] = "SPECIFIC"; // No direct translation to DocBook: . + + for (auto const & id: biblioId) + toDocBookTag[id.first] = "SPECIFIC"; // No direct translation to DocBook: . + for (auto const & id: relations) + toDocBookTag[id.first] = "SPECIFIC"; // No direct translation to DocBook: . + for (auto const & id: misc) + toDocBookTag[id] = "SPECIFIC"; // No direct translation to DocBook: . + + // Loop over the entries. If there are no entries, add a comment to say so. + auto vit = cites.begin(); + auto ven = cites.end(); + + if (vit == ven) { + xs << XMLStream::ESCAPE_NONE << ""; + } + + for (; vit != ven; ++vit) { + BiblioInfo::const_iterator const biit = bibinfo.find(*vit); + if (biit == bibinfo.end()) + continue; + + BibTeXInfo const & entry = biit->second; + string const attr = "xml:id=\"" + to_utf8(xml::cleanID(entry.key())) + "\""; + xs << xml::StartTag("biblioentry", attr); + xs << xml::CR(); + + // FIXME Right now, we are calling BibInfo::getInfo on the key, + // which will give us all the cross-referenced info. But for every + // entry, so there's a lot of repetition. This should be fixed. + + // Parse the results of getInfo and emit the corresponding DocBook tags. Interesting pieces have the form + // "STH", the rest of the text may be discarded. + // Could have written a DocBook version of expandFormat (that parses a citation into HTML), but it implements + // some kind of recursion. Still, a (static) conversion step between the citation format and DocBook would have + // been required. All in all, both codes approaches would have been similar, but this parsing allows relying + // on existing building blocks. + + string html = to_utf8(bibinfo.getInfo(entry.key(), buffer(), ci)); + regex tagRegex("([^<]*)"); + smatch match; + auto tagIt = lyx::sregex_iterator(html.cbegin(), html.cend(), tagRegex, regex_constants::match_default); + auto tagEnd = lyx::sregex_iterator(); + map delayedTags; + + // Read all tags from HTML and convert those that have a 1:1 matching. + while (tagIt != tagEnd) { + string tag = tagIt->str(); // regex_match cannot work with temporary strings. + ++tagIt; + + if (regex_match(tag, match, tagRegex)) { + if (toDocBookTag[match[1]] == "SPECIFIC") { + delayedTags[match[1]] = match[2]; + } else { + xs << xml::StartTag(toDocBookTag[match[1]]); + xs << from_utf8(match[2].str()); + xs << xml::EndTag(toDocBookTag[match[1]]); + } + } else { + LYXERR0("The BibTeX field " << match[1].str() << " is unknown."); + xs << XMLStream::ESCAPE_NONE << from_utf8("\n"); + } + } + + // Type of document (book, journal paper, etc.). + xs << xml::StartTag("bibliomisc", "role=\"type\""); + xs << entry.entryType(); + xs << xml::EndTag("bibliomisc"); + xs << xml::CR(); + + // Handle tags that have complex transformations. + if (! delayedTags.empty()) { + unsigned long remainingTags = delayedTags.size(); // Used as a workaround. With GCC 7, when erasing all + // elements one by one, some elements may still pop in later on (even though they were deleted previously). + auto hasTag = [&delayedTags](string key) { return delayedTags.find(key) != delayedTags.end(); }; + auto getTag = [&delayedTags](string key) { return from_utf8(delayedTags[key]); }; + auto eraseTag = [&delayedTags, &remainingTags](string key) { + remainingTags -= 1; + delayedTags.erase(key); + }; + + // Notes on order of checks. + // - address goes with publisher if there is one, so check this first. Otherwise, the address goes with + // the entry without other details. + + // + if (hasTag("publisher")) { + xs << xml::StartTag("publisher"); + xs << xml::CR(); + xs << xml::StartTag("publishername"); + xs << getTag("publisher"); + xs << xml::EndTag("publishername"); + xs << xml::CR(); + + if (hasTag("address")) { + xs << xml::StartTag("address"); + xs << getTag("address"); + xs << xml::EndTag("address"); + eraseTag("address"); + } + + xs << xml::EndTag("publisher"); + xs << xml::CR(); + eraseTag("publisher"); + } + + if (hasTag("address")) { + xs << xml::StartTag("address"); + xs << getTag("address"); + xs << xml::EndTag("address"); + eraseTag("address"); + } + + // + if (hasTag("keywords")) { + // Split the keywords on comma. + docstring keywordSet = getTag("keywords"); + vector keywords; + if (keywordSet.find(from_utf8(",")) == string::npos) { + keywords = { keywordSet }; + } else { + size_t pos = 0; + while ((pos = keywordSet.find(from_utf8(","))) != string::npos) { + keywords.push_back(keywordSet.substr(0, pos)); + keywordSet.erase(0, pos + 1); + } + keywords.push_back(keywordSet); + } + + xs << xml::StartTag("keywordset") << xml::CR(); + for (auto & kw: keywords) { + kw.erase(kw.begin(), std::find_if(kw.begin(), kw.end(), + [](int c) {return !std::isspace(c);})); + xs << xml::StartTag("keyword"); + xs << kw; + xs << xml::EndTag("keyword"); + xs << xml::CR(); + } + xs << xml::EndTag("keywordset") << xml::CR(); + eraseTag("keywords"); + } + + // + // Example: http://tdg.docbook.org/tdg/5.1/biblioset.html + if (hasTag("year")) { + docstring value = getTag("year"); + eraseTag("year"); + + // Follow xsd:gYearMonth format (http://books.xmlschemata.org/relaxng/ch19-77135.html). + if (hasTag("month")) { + value += "-" + getTag("month"); + eraseTag("month"); + } + + xs << xml::StartTag("pubdate"); + xs << value; + xs << xml::EndTag("pubdate"); + xs << xml::CR(); + } + + // + if (hasTag("institution")) { + xs << xml::StartTag("org"); + xs << xml::CR(); + xs << xml::StartTag("orgname"); + xs << getTag("institution"); + xs << xml::EndTag("orgname"); + xs << xml::CR(); + xs << xml::EndTag("org"); + xs << xml::CR(); + eraseTag("institution"); + } + + // + // Example: http://tdg.docbook.org/tdg/5.1/biblioset.html + for (auto const & id: relations) { + if (hasTag(id.first)) { + xs << xml::StartTag("biblioset", "relation=\"" + id.second + "\""); + xs << xml::CR(); + xs << xml::StartTag("title"); + xs << getTag(id.first); + xs << xml::EndTag("title"); + xs << xml::CR(); + xs << xml::EndTag("biblioset"); + xs << xml::CR(); + eraseTag(id.first); + } + } + + // + // Example: http://tdg.docbook.org/tdg/5.1/authorgroup.html + if (hasTag("fullnames:author")) { + // Perform full parsing of the BibTeX string, dealing with the many corner cases that might + // be encountered. + authorsToDocBookAuthorGroup(getTag("fullnames:author"), xs, buffer()); + eraseTag("fullnames:author"); + } + + // + if (hasTag("abstract")) { + // Split the paragraphs on new line. + docstring abstract = getTag("abstract"); + vector paragraphs; + if (abstract.find(from_utf8("\n")) == string::npos) { + paragraphs = { abstract }; + } else { + size_t pos = 0; + while ((pos = abstract.find(from_utf8(","))) != string::npos) { + paragraphs.push_back(abstract.substr(0, pos)); + abstract.erase(0, pos + 1); + } + paragraphs.push_back(abstract); + } + + xs << xml::StartTag("abstract"); + xs << xml::CR(); + for (auto const & para: paragraphs) { + if (para.empty()) + continue; + xs << xml::StartTag("para"); + xs << para; + xs << xml::EndTag("para"); + } + xs << xml::CR(); + xs << xml::EndTag("abstract"); + xs << xml::CR(); + eraseTag("abstract"); + } + + // + for (auto const & id: biblioId) { + if (hasTag(id.first)) { + xs << xml::StartTag("biblioid", "class=\"" + id.second + "\""); + xs << getTag(id.first); + xs << xml::EndTag("biblioid"); + xs << xml::CR(); + eraseTag(id.first); + } + } + + // + for (auto const & id: misc) { + if (hasTag(id)) { + xs << xml::StartTag("bibliomisc", "role=\"" + id + "\""); + xs << getTag(id); + xs << xml::EndTag("bibliomisc"); + xs << xml::CR(); + eraseTag(id); + } + } + + // After all tags are processed, check for errors. + if (remainingTags > 0) { + LYXERR0("Still delayed tags not yet handled."); + xs << XMLStream::ESCAPE_NONE << from_utf8("\n"); + } + } + + xs << xml::EndTag("biblioentry"); + xs << xml::CR(); + } + + // Footer for bibliography. + xs << xml::EndTag("bibliography"); +} + + void InsetBibtex::write(ostream & os) const { params().Write(os, &buffer());