DocBook: escape IDs for InsetRef.

[lyx.git] / src / insets / InsetIndex.cpp
diff --git a/src/insets/InsetIndex.cpp b/src/insets/InsetIndex.cpp

index 05da188678e31f0008f13186c8c9f0aad9a9e45b..83b3be186ec34f71ace9f7bffdd1bcf425a3239c 100644 (file)
--- a/src/insets/InsetIndex.cpp
+++ b/src/insets/InsetIndex.cpp
@@ -27,7 +27,7 @@
  #include "Lexer.h"
  #include "output_latex.h"
  #include "output_xhtml.h"
-#include "sgml.h"
+#include "xml.h"
  #include "texstream.h"
  #include "TextClass.h"
  #include "TocBackend.h"
@@ -40,8 +40,11 @@
  #include "frontends/alert.h"
  
  #include <algorithm>
+#include <set>
  #include <ostream>
  
+#include <QThreadStorage>
+
  using namespace std;
  using namespace lyx::support;
  
@@ -55,17 +58,19 @@ namespace lyx {
  
  
  InsetIndex::InsetIndex(Buffer * buf, InsetIndexParams const & params)
-       : InsetCollapsible(buf), params_(params)
+        : InsetCollapsible(buf), params_(params)
  {}
  
  
-void InsetIndex::latex(otexstream & os, OutputParams const & runparams_in) const
+void InsetIndex::latex(otexstream & ios, OutputParams const & runparams_in) const
  {
         OutputParams runparams(runparams_in);
         runparams.inIndexEntry = true;
  
+       otexstringstream os;
+
         if (buffer().masterBuffer()->params().use_indices && !params_.index.empty()
-           && params_.index != "idx") {
+               && params_.index != "idx") {
                 os << "\\sindex[";
                 os << escape(params_.index);
                 os << "]{";
@@ -74,14 +79,19 @@ void InsetIndex::latex(otexstream & os, OutputParams const & runparams_in) const
                 os << '{';
         }
  
-       // get contents of InsetText as LaTeX and plaintext
         odocstringstream ourlatex;
-       // FIXME: do Tex/Row correspondence (I don't currently understand what is
-       // being generated from latexstr below)
         otexstream ots(ourlatex);
         InsetText::latex(ots, runparams);
+       if (runparams.for_search) {
+               // No need for special handling, if we are only searching for some patterns
+               os << ourlatex.str() << "}";
+               return;
+       }
+       // get contents of InsetText as LaTeX and plaintext
         odocstringstream ourplain;
         InsetText::plaintext(ourplain, runparams);
+       // FIXME: do Tex/Row correspondence (I don't currently understand what is
+       // being generated from latexstr below)
         docstring latexstr = ourlatex.str();
         docstring plainstr = ourplain.str();
  
@@ -105,12 +115,12 @@ void InsetIndex::latex(otexstream & os, OutputParams const & runparams_in) const
                         LYXERR0("The `|' separator was not found in the plaintext version!");
         }
  
-       // Separate the entires and subentries, i.e., split on "!"
+       // Separate the entries and subentries, i.e., split on "!"
         // FIXME This would do the wrong thing with escaped ! characters
         std::vector<docstring> const levels =
-               getVectorFromString(latexstr, from_ascii("!"), true);
+                       getVectorFromString(latexstr, from_ascii("!"), true);
         std::vector<docstring> const levels_plain =
-               getVectorFromString(plainstr, from_ascii("!"), true);
+                       getVectorFromString(plainstr, from_ascii("!"), true);
  
         vector<docstring>::const_iterator it = levels.begin();
         vector<docstring>::const_iterator end = levels.end();
@@ -131,27 +141,27 @@ void InsetIndex::latex(otexstream & os, OutputParams const & runparams_in) const
                 if (contains(*it, '\\') && !contains(*it, '@')) {
                         // Plaintext might return nothing (e.g. for ERTs)
                         docstring const spart =
-                               (it2 < levels_plain.end() && !(*it2).empty())
-                               ? *it2 : *it;
+                                       (it2 < levels_plain.end() && !(*it2).empty())
+                                       ? *it2 : *it;
                         // Now we need to validate that all characters in
                         // the sorting part are representable in the current
                         // encoding. If not try the LaTeX macro which might
                         // or might not be a good choice, and issue a warning.
                         pair<docstring, docstring> spart_latexed =
-                               runparams.encoding->latexString(spart, runparams.dryrun);
+                                       runparams.encoding->latexString(spart, runparams.dryrun);
                         if (!spart_latexed.second.empty())
-                                       LYXERR0("Uncodable character in index entry. Sorting might be wrong!");
+                               LYXERR0("Uncodable character in index entry. Sorting might be wrong!");
                         if (spart != spart_latexed.first && !runparams.dryrun) {
                                 // FIXME: warning should be passed to the error dialog
                                 frontend::Alert::warning(_("Index sorting failed"),
-                               bformat(_("LyX's automatic index sorting algorithm faced\n"
-                                 "problems with the entry '%1$s'.\n"
-                                 "Please specify the sorting of this entry manually, as\n"
-                                 "explained in the User Guide."), spart));
+                                                                                bformat(_("LyX's automatic index sorting algorithm faced\n"
+                                                                                                                  "problems with the entry '%1$s'.\n"
+                                                                                                                  "Please specify the sorting of this entry manually, as\n"
+                                                                                                                  "explained in the User Guide."), spart));
                         }
                         // remove remaining \'s for the sorting part
                         docstring const ppart =
-                               subst(spart_latexed.first, from_ascii("\\"), docstring());
+                                       subst(spart_latexed.first, from_ascii("\\"), docstring());
                         os << ppart;
                         os << '@';
                 }
@@ -165,25 +175,201 @@ void InsetIndex::latex(otexstream & os, OutputParams const & runparams_in) const
                 os << "|" << cmd;
         }
         os << '}';
+
+       // In macros with moving arguments, such as \section,
+       // we store the index and output it after the macro (#2154)
+       if (runparams_in.postpone_fragile_stuff)
+               runparams_in.post_macro += os.str();
+       else
+               ios << os.release();
  }
  
  
-int InsetIndex::docbook(odocstream & os, OutputParams const & runparams) const
+void InsetIndex::docbook(XMLStream & xs, OutputParams const & runparams) const
  {
-       os << "<indexterm><primary>";
-       int const i = InsetText::docbook(os, runparams);
-       os << "</primary></indexterm>";
-       return i;
+       // Get the content of the inset as LaTeX, as some things may be encoded as ERT (like {}).
+       odocstringstream odss;
+       otexstream ots(odss);
+       InsetText::latex(ots, runparams);
+       docstring latexString = trim(odss.str());
+
+       // Check whether there are unsupported things.
+       if (latexString.find(from_utf8("@")) != latexString.npos) {
+               docstring error = from_utf8("Unsupported feature: an index entry contains an @. "
+                                                                       "Complete entry: \"") + latexString + from_utf8("\"");
+               LYXERR0(error);
+               xs << XMLStream::ESCAPE_NONE << (from_utf8("<!-- Output Error: ") + error + from_utf8(" -->\n"));
+       }
+
+       // Handle several indices.
+       docstring indexType = from_utf8("");
+       if (buffer().masterBuffer()->params().use_indices) {
+               indexType += " type=\"" + params_.index + "\"";
+       }
+
+       // Split the string into its main constituents: terms, and command (see, see also, range).
+       size_t positionVerticalBar = latexString.find(from_ascii("|")); // What comes before | is (sub)(sub)entries.
+       docstring indexTerms = latexString.substr(0, positionVerticalBar);
+       docstring command = latexString.substr(positionVerticalBar + 1);
+
+       // Handle primary, secondary, and tertiary terms (entries, subentries, and subsubentries, for LaTeX).
+       vector<docstring> terms = getVectorFromString(indexTerms, from_ascii("!"), false);
+
+       // Handle ranges. Happily, (| and |) can only be at the end of the string! However, | may be trapped by the
+       bool hasStartRange = latexString.find(from_ascii("|(")) != latexString.npos;
+       bool hasEndRange = latexString.find(from_ascii("|)")) != latexString.npos;
+       if (hasStartRange || hasEndRange) {
+               // Remove the ranges from the command if they do not appear at the beginning.
+               size_t index = 0;
+               while ((index = command.find(from_utf8("|("), index)) != std::string::npos)
+                       command.erase(index, 1);
+               index = 0;
+               while ((index = command.find(from_utf8("|)"), index)) != std::string::npos)
+                       command.erase(index, 1);
+
+               // Remove the ranges when they are the only vertical bar in the complete string.
+               if (command[0] == '(' || command[0] == ')')
+                       command.erase(0, 1);
+       }
+
+       // Handle see and seealso. As "see" is a prefix of "seealso", the order of the comparisons is important.
+       // Both commands are mutually exclusive!
+       docstring see = from_utf8("");
+       vector<docstring> seeAlsoes;
+       if (command.substr(0, 3) == "see") {
+               // Unescape brackets.
+               size_t index = 0;
+               while ((index = command.find(from_utf8("\\{"), index)) != std::string::npos)
+                       command.erase(index, 1);
+               index = 0;
+               while ((index = command.find(from_utf8("\\}"), index)) != std::string::npos)
+                       command.erase(index, 1);
+
+               // Retrieve the part between brackets, and remove the complete seealso.
+               size_t positionOpeningBracket = command.find(from_ascii("{"));
+               size_t positionClosingBracket = command.find(from_ascii("}"));
+               docstring list = command.substr(positionOpeningBracket + 1, positionClosingBracket - positionOpeningBracket - 1);
+
+               // Parse the list of referenced entries (or a single one for see).
+               if (command.substr(0, 7) == "seealso") {
+                       seeAlsoes = getVectorFromString(list, from_ascii(","), false);
+               } else {
+                       see = list;
+
+                       if (see.find(from_ascii(",")) != see.npos) {
+                               docstring error = from_utf8("Several index terms found as \"see\"! Only one is acceptable. "
+                                                                                       "Complete entry: \"") + latexString + from_utf8("\"");
+                               LYXERR0(error);
+                               xs << XMLStream::ESCAPE_NONE << (from_utf8("<!-- Output Error: ") + error + from_utf8(" -->\n"));
+                       }
+               }
+
+               // Remove the complete see/seealso from the commands, in case there is something else to parse.
+               command = command.substr(positionClosingBracket + 1);
+       }
+
+       // Some parts of the strings are not parsed, as they do not have anything matching in DocBook: things like
+       // formatting the entry or the page number, other strings for sorting. https://wiki.lyx.org/Tips/Indexing
+       // If there are such things in the index entry, then this code may miserably fail. For example, for "Peter|(textbf",
+       // no range will be detected.
+       // TODO: Could handle formatting as significance="preferred"?
+
+    // Write all of this down.
+       if (terms.empty() && !hasEndRange) {
+               docstring error = from_utf8("No index term found! Complete entry: \"") + latexString + from_utf8("\"");
+               LYXERR0(error);
+               xs << XMLStream::ESCAPE_NONE << (from_utf8("<!-- Output Error: ") + error + from_utf8(" -->\n"));
+       } else {
+               // Generate the attributes for ranges. It is based on the terms that are indexed, but the ID must be unique
+               // to this indexing area (xml::cleanID does not guarantee this: for each call with the same arguments,
+               // the same legal ID is produced; here, as the input would be the same, the output must be, by design).
+               // Hence the thread-local storage, as the numbers must strictly be unique, and thus cannot be shared across
+               // a paragraph (making the solution used for HTML worthless). This solution is very similar to the one used in
+               // xml::cleanID.
+               docstring attrs = indexType;
+               if (hasStartRange || hasEndRange) {
+                       // Append an ID if uniqueness is not guaranteed across the document.
+                       static QThreadStorage<set<docstring>> tKnownTermLists;
+                       static QThreadStorage<int> tID;
+
+                       set<docstring> & knownTermLists = tKnownTermLists.localData();
+                       int & ID = tID.localData();
+
+                       if (!tID.hasLocalData()) {
+                               tID.localData() = 0;
+                       }
+
+                       // Modify the index terms to add the unique ID if needed.
+                       docstring newIndexTerms = indexTerms;
+                       if (knownTermLists.find(indexTerms) != knownTermLists.end()) {
+                               newIndexTerms += from_ascii(string("-") + to_string(ID));
+
+                               // Only increment for the end of range, so that the same number is used for the start of range.
+                               if (hasEndRange) {
+                                       ID++;
+                               }
+                       }
+
+                       // Term list not yet known: add it to the set AFTER the end of range. After
+                       if (knownTermLists.find(indexTerms) == knownTermLists.end() && hasEndRange) {
+                               knownTermLists.insert(indexTerms);
+                       }
+
+                       // Generate the attributes.
+                       docstring id = xml::cleanID(newIndexTerms);
+                       if (hasStartRange) {
+                               attrs += " class=\"startofrange\" xml:id=\"" + id + "\"";
+                       } else {
+                               attrs += " class=\"endofrange\" startref=\"" + id + "\"";
+                       }
+               }
+
+               // Handle the index terms (including the specific index for this entry).
+               xs << xml::StartTag("indexterm", attrs);
+               if (terms.size() > 0) { // hasEndRange has no content.
+                       xs << xml::StartTag("primary");
+                       xs << terms[0];
+                       xs << xml::EndTag("primary");
+               }
+               if (terms.size() > 1) {
+                       xs << xml::StartTag("secondary");
+                       xs << terms[1];
+                       xs << xml::EndTag("secondary");
+               }
+               if (terms.size() > 2) {
+                       xs << xml::StartTag("tertiary");
+                       xs << terms[2];
+                       xs << xml::EndTag("tertiary");
+               }
+
+               // Handle see and see also.
+               if (!see.empty()) {
+                       xs << xml::StartTag("see");
+                       xs << see;
+                       xs << xml::EndTag("see");
+               }
+
+               if (!seeAlsoes.empty()) {
+                       for (auto & entry : seeAlsoes) {
+                               xs << xml::StartTag("seealso");
+                               xs << entry;
+                               xs << xml::EndTag("seealso");
+                       }
+               }
+
+               // Close the entry.
+               xs << xml::EndTag("indexterm");
+       }
  }
  
  
-docstring InsetIndex::xhtml(XHTMLStream & xs, OutputParams const &) const
+docstring InsetIndex::xhtml(XMLStream & xs, OutputParams const &) const
  {
         // we just print an anchor, taking the paragraph ID from
         // our own interior paragraph, which doesn't get printed
         std::string const magic = paragraphs().front().magicLabel();
         std::string const attr = "id='" + magic + "'";
-       xs << html::CompTag("a", attr);
+       xs << xml::CompTag("a", attr);
         return docstring();
  }
  
@@ -562,7 +748,7 @@ bool InsetPrintIndex::getStatus(Cursor & cur, FuncRequest const & cmd,
  }
  
  
-void InsetPrintIndex::updateBuffer(ParIterator const &, UpdateType)
+void InsetPrintIndex::updateBuffer(ParIterator const &, UpdateType, bool const /*deleted*/)
  {
         Index const * index =
                 buffer().masterParams().indiceslist().findShortcut(getParam("type"));
@@ -693,7 +879,7 @@ bool operator<(IndexEntry const & lhs, IndexEntry const & rhs)
  } // namespace
  
  
-docstring InsetPrintIndex::xhtml(XHTMLStream &, OutputParams const & op) const
+docstring InsetPrintIndex::xhtml(XMLStream &, OutputParams const & op) const
  {
         BufferParams const & bp = buffer().masterBuffer()->params();
  
@@ -733,14 +919,14 @@ docstring InsetPrintIndex::xhtml(XHTMLStream &, OutputParams const & op) const
         // that's how we deal with the fact that we're probably inside a standard
         // paragraph, and we don't want to be.
         odocstringstream ods;
-       XHTMLStream xs(ods);
+       XMLStream xs(ods);
  
-       xs << html::StartTag("div", tocattr);
-       xs << html::StartTag(lay.htmltag(), lay.htmlattr())
+       xs << xml::StartTag("div", tocattr);
+       xs << xml::StartTag(lay.htmltag(), lay.htmlattr())
                  << translateIfPossible(from_ascii("Index"),
                                   op.local_font->language()->lang())
-                << html::EndTag(lay.htmltag());
-       xs << html::StartTag("ul", "class='main'");
+                << xml::EndTag(lay.htmltag());
+       xs << xml::StartTag("ul", "class='main'");
         Font const dummy;
  
         vector<IndexEntry>::const_iterator eit = entries.begin();
@@ -760,11 +946,11 @@ docstring InsetPrintIndex::xhtml(XHTMLStream &, OutputParams const & op) const
                                 // close last entry or entries, depending.
                                 if (level == 3) {
                                         // close this sub-sub-entry
-                                       xs << html::EndTag("li") << html::CR();
+                                       xs << xml::EndTag("li") << xml::CR();
                                         // is this another sub-sub-entry within the same sub-entry?
                                         if (!eit->same_sub(last)) {
                                                 // close this level
-                                               xs << html::EndTag("ul") << html::CR();
+                                               xs << xml::EndTag("ul") << xml::CR();
                                                 level = 2;
                                         }
                                 }
@@ -775,11 +961,11 @@ docstring InsetPrintIndex::xhtml(XHTMLStream &, OutputParams const & op) const
                                 // sub-entry. In that case, we do not want to close anything.
                                 if (level == 2 && !eit->same_sub(last)) {
                                         // close sub-entry
-                                       xs << html::EndTag("li") << html::CR();
+                                       xs << xml::EndTag("li") << xml::CR();
                                         // is this another sub-entry with the same main entry?
                                         if (!eit->same_main(last)) {
                                                 // close this level
-                                               xs << html::EndTag("ul") << html::CR();
+                                               xs << xml::EndTag("ul") << xml::CR();
                                                 level = 1;
                                         }
                                 }
@@ -788,7 +974,7 @@ docstring InsetPrintIndex::xhtml(XHTMLStream &, OutputParams const & op) const
                                 // close the entry.
                                 if (level == 1 && !eit->same_main(last)) {
                                         // close entry
-                                       xs << html::EndTag("li") << html::CR();
+                                       xs << xml::EndTag("li") << xml::CR();
                                 }
                         }
  
@@ -798,7 +984,7 @@ docstring InsetPrintIndex::xhtml(XHTMLStream &, OutputParams const & op) const
                         // We need to use our own stream, since we will have to
                         // modify what we get back.
                         odocstringstream ent;
-                       XHTMLStream entstream(ent);
+                       XMLStream entstream(ent);
                         OutputParams ours = op;
                         ours.for_toc = true;
                         par.simpleLyXHTMLOnePar(buffer(), entstream, ours, dummy);
@@ -816,8 +1002,8 @@ docstring InsetPrintIndex::xhtml(XHTMLStream &, OutputParams const & op) const
  
                         if (level == 3) {
                                 // another subsubentry
-                               xs << html::StartTag("li", "class='subsubentry'")
-                                  << XHTMLStream::ESCAPE_NONE << subsub;
+                               xs << xml::StartTag("li", "class='subsubentry'")
+                                  << XMLStream::ESCAPE_NONE << subsub;
                         } else if (level == 2) {
                                 // there are two ways we can be here:
                                 // (i) we can actually be inside a sub-entry already and be about
@@ -830,14 +1016,14 @@ docstring InsetPrintIndex::xhtml(XHTMLStream &, OutputParams const & op) const
                                 // note that in this case, too, though, the sub-entry might already
                                 // have a sub-sub-entry.
                                 if (eit->sub != last.sub)
-                                       xs << html::StartTag("li", "class='subentry'")
-                                          << XHTMLStream::ESCAPE_NONE << sub;
+                                       xs << xml::StartTag("li", "class='subentry'")
+                                          << XMLStream::ESCAPE_NONE << sub;
                                 if (!subsub.empty()) {
                                         // it's actually a subsubentry, so we need to start that list
-                                       xs << html::CR()
-                                          << html::StartTag("ul", "class='subsubentry'")
-                                          << html::StartTag("li", "class='subsubentry'")
-                                          << XHTMLStream::ESCAPE_NONE << subsub;
+                                       xs << xml::CR()
+                                          << xml::StartTag("ul", "class='subsubentry'")
+                                          << xml::StartTag("li", "class='subsubentry'")
+                                          << XMLStream::ESCAPE_NONE << subsub;
                                         level = 3;
                                 }
                         } else {
@@ -852,20 +1038,20 @@ docstring InsetPrintIndex::xhtml(XHTMLStream &, OutputParams const & op) const
                                 // note that in this case, too, though, the main entry might already
                                 // have a sub-entry, or even a sub-sub-entry.
                                 if (eit->main != last.main)
-                                       xs << html::StartTag("li", "class='main'") << main;
+                                       xs << xml::StartTag("li", "class='main'") << main;
                                 if (!sub.empty()) {
                                         // there's a sub-entry, too
-                                       xs << html::CR()
-                                          << html::StartTag("ul", "class='subentry'")
-                                          << html::StartTag("li", "class='subentry'")
-                                          << XHTMLStream::ESCAPE_NONE << sub;
+                                       xs << xml::CR()
+                                          << xml::StartTag("ul", "class='subentry'")
+                                          << xml::StartTag("li", "class='subentry'")
+                                          << XMLStream::ESCAPE_NONE << sub;
                                         level = 2;
                                         if (!subsub.empty()) {
                                                 // and a sub-sub-entry
-                                               xs << html::CR()
-                                                  << html::StartTag("ul", "class='subsubentry'")
-                                                  << html::StartTag("li", "class='subsubentry'")
-                                                  << XHTMLStream::ESCAPE_NONE << subsub;
+                                               xs << xml::CR()
+                                                  << xml::StartTag("ul", "class='subsubentry'")
+                                                  << xml::StartTag("li", "class='subsubentry'")
+                                                  << XMLStream::ESCAPE_NONE << subsub;
                                                 level = 3;
                                         }
                                 }
@@ -874,16 +1060,16 @@ docstring InsetPrintIndex::xhtml(XHTMLStream &, OutputParams const & op) const
                 // finally, then, we can output the index link itself
                 string const parattr = "href='#" + par.magicLabel() + "'";
                 xs << (entry_number == 0 ? ":" : ",");
-               xs << " " << html::StartTag("a", parattr)
-                  << ++entry_number << html::EndTag("a");
+               xs << " " << xml::StartTag("a", parattr)
+                  << ++entry_number << xml::EndTag("a");
                 last = *eit;
         }
         // now we have to close all the open levels
         while (level > 0) {
-               xs << html::EndTag("li") << html::EndTag("ul") << html::CR();
+               xs << xml::EndTag("li") << xml::EndTag("ul") << xml::CR();
                 --level;
         }
-       xs << html::EndTag("div") << html::CR();
+       xs << xml::EndTag("div") << xml::CR();
         return ods.str();
  }