X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=src%2Finsets%2FInsetIndex.cpp;h=e510e7330b1c2d9024cbbdf89f0169ea9e2b45c2;hb=f352a375;hp=70a16f6e3c2e01c2877b371ab0887dcfee508ff0;hpb=45479198f4caead0d29a12787603e98d7bfe9845;p=lyx.git diff --git a/src/insets/InsetIndex.cpp b/src/insets/InsetIndex.cpp index 70a16f6e3c..e510e7330b 100644 --- a/src/insets/InsetIndex.cpp +++ b/src/insets/InsetIndex.cpp @@ -11,36 +11,107 @@ #include #include "InsetIndex.h" +#include "InsetIndexMacro.h" #include "Buffer.h" #include "BufferParams.h" #include "BufferView.h" #include "ColorSet.h" +#include "Cursor.h" #include "DispatchResult.h" #include "Encoding.h" +#include "ErrorList.h" #include "FuncRequest.h" #include "FuncStatus.h" #include "IndicesList.h" +#include "InsetList.h" +#include "Language.h" +#include "LaTeX.h" #include "LaTeXFeatures.h" #include "Lexer.h" -#include "MetricsInfo.h" -#include "sgml.h" +#include "output_latex.h" +#include "output_xhtml.h" +#include "xml.h" +#include "texstream.h" +#include "TextClass.h" #include "TocBackend.h" #include "support/debug.h" #include "support/docstream.h" +#include "support/FileName.h" #include "support/gettext.h" #include "support/lstrings.h" +#include "support/Translator.h" #include "frontends/alert.h" -#include +#include +#include +#include + +#include using namespace std; using namespace lyx::support; namespace lyx { +namespace { + +typedef Translator PageRangeTranslator; +typedef Translator PageRangeTranslatorLoc; + +PageRangeTranslator const init_insetindexpagerangetranslator() +{ + PageRangeTranslator translator("none", InsetIndexParams::None); + translator.addPair("start", InsetIndexParams::Start); + translator.addPair("end", InsetIndexParams::End); + return translator; +} + +PageRangeTranslator const init_insetindexpagerangetranslator_latex() +{ + PageRangeTranslator translator("", InsetIndexParams::None); + translator.addPair("(", InsetIndexParams::Start); + translator.addPair(")", InsetIndexParams::End); + return translator; +} + + +PageRangeTranslatorLoc const init_insetindexpagerangetranslator_loc() +{ + PageRangeTranslatorLoc translator(docstring(), InsetIndexParams::None); + translator.addPair(_("Starts page range"), InsetIndexParams::Start); + translator.addPair(_("Ends page range"), InsetIndexParams::End); + return translator; +} + + +PageRangeTranslator const & insetindexpagerangetranslator() +{ + static PageRangeTranslator const prtranslator = + init_insetindexpagerangetranslator(); + return prtranslator; +} + + +PageRangeTranslatorLoc const & insetindexpagerangetranslator_loc() +{ + static PageRangeTranslatorLoc const translator = + init_insetindexpagerangetranslator_loc(); + return translator; +} + + +PageRangeTranslator const & insetindexpagerangetranslator_latex() +{ + static PageRangeTranslator const lttranslator = + init_insetindexpagerangetranslator_latex(); + return lttranslator; +} + +} // namespace anon + ///////////////////////////////////////////////////////////////////// // // InsetIndex @@ -49,133 +120,431 @@ namespace lyx { InsetIndex::InsetIndex(Buffer * buf, InsetIndexParams const & params) - : InsetCollapsable(buf), params_(params) + : InsetCollapsible(buf), params_(params) {} -int InsetIndex::latex(odocstream & os, - OutputParams const & runparams_in) const +void InsetIndex::latex(otexstream & ios, OutputParams const & runparams_in) const { OutputParams runparams(runparams_in); runparams.inIndexEntry = true; + otexstringstream os; + if (buffer().masterBuffer()->params().use_indices && !params_.index.empty() - && params_.index != "idx") { + && params_.index != "idx") { os << "\\sindex["; - os << params_.index; + os << escape(params_.index); os << "]{"; } else { os << "\\index"; os << '{'; } - int i = 0; - // get contents of InsetText as LaTeX and plaintext + // Get the LaTeX output from InsetText. We need to deconstruct this later + // in order to check if we need to generate a sorting key odocstringstream ourlatex; - InsetText::latex(ourlatex, runparams); - odocstringstream ourplain; - InsetText::plaintext(ourplain, runparams); - docstring latexstr = ourlatex.str(); - docstring plainstr = ourplain.str(); - - // this will get what follows | if anything does - docstring cmd; - - // check for the | separator - // FIXME This would go wrong on an escaped "|", but - // how far do we want to go here? - size_t pos = latexstr.find(from_ascii("|")); - if (pos != docstring::npos) { - // put the bit after "|" into cmd... - cmd = latexstr.substr(pos + 1); - // ...and erase that stuff from latexstr - latexstr = latexstr.erase(pos); - // ...and similarly from plainstr - size_t ppos = plainstr.find(from_ascii("|")); - if (ppos < plainstr.size()) - plainstr.erase(ppos); - else - LYXERR0("The `|' separator was not found in the plaintext version!"); - } - - // Separate the entires and subentries, i.e., split on "!" - // FIXME This would do the wrong thing with escaped ! characters - std::vector const levels = - getVectorFromString(latexstr, from_ascii("!"), true); - std::vector const levels_plain = - getVectorFromString(plainstr, from_ascii("!"), true); - - vector::const_iterator it = levels.begin(); - vector::const_iterator end = levels.end(); - vector::const_iterator it2 = levels_plain.begin(); - bool first = true; - for (; it != end; ++it) { - // write the separator except the first time - if (!first) - os << '!'; - else - first = false; - - // correctly sort macros and formatted strings - // if we do find a command, prepend a plain text - // version of the content to get sorting right, - // e.g. \index{LyX@\LyX}, \index{text@\textbf{text}} - // Don't do that if the user entered '@' himself, though. - if (contains(*it, '\\') && !contains(*it, '@')) { - // Plaintext might return nothing (e.g. for ERTs) - docstring const spart = - (it2 < levels_plain.end() && !(*it2).empty()) - ? *it2 : *it; - // Now we need to validate that all characters in - // the sorting part are representable in the current - // encoding. If not try the LaTeX macro which might - // or might not be a good choice, and issue a warning. - docstring spart2; - for (size_t n = 0; n < spart.size(); ++n) { - try { - spart2 += runparams.encoding->latexChar(spart[n]); - } catch (EncodingException & /* e */) { - LYXERR0("Uncodable character in index entry. Sorting might be wrong!"); - } + otexstream ots(ourlatex); + InsetText::latex(ots, runparams); + if (runparams.for_search != OutputParams::NoSearch) { + // No need for special handling, if we are only searching for some patterns + os << ourlatex.str() << "}"; + return; + } + + if (hasSortKey()) { + getSortkey(os, runparams); + os << "@"; + os << ourlatex.str(); + getSubentries(os, runparams); + if (hasSeeRef()) { + os << "|"; + os << insetindexpagerangetranslator_latex().find(params_.range); + getSeeRefs(os, runparams); + } + } else { + // We check whether we need a sort key. + // If so, we use the plaintext version + odocstringstream ourplain; + InsetText::plaintext(ourplain, runparams); + + // These are the LaTeX and plaintext representations + docstring latexstr = ourlatex.str(); + docstring plainstr = ourplain.str(); + + // This will get what follows | if anything does, + // the command (e.g., see, textbf) for pagination + // formatting + docstring cmd; + + if (hasSeeRef()) { + odocstringstream seeref; + otexstream otsee(seeref); + getSeeRefs(otsee, runparams); + cmd = seeref.str(); + } else if (!params_.pagefmt.empty() && params_.pagefmt != "default") { + cmd = from_utf8(params_.pagefmt); + } else { + // Check for the | separator to strip the cmd. + // This goes wrong on an escaped "|", but as the escape + // character can be changed in style files, we cannot + // prevent that. + size_t pos = latexstr.find(from_ascii("|")); + if (pos != docstring::npos) { + // Put the bit after "|" into cmd... + cmd = latexstr.substr(pos + 1); + // ...and erase that stuff from latexstr + latexstr = latexstr.erase(pos); + // ...as well as from plainstr + size_t ppos = plainstr.find(from_ascii("|")); + if (ppos < plainstr.size()) + plainstr.erase(ppos); + else + LYXERR0("The `|' separator was not found in the plaintext version!"); } - if (spart != spart2 && !runparams.dryrun) { - // FIXME: warning should be passed to the error dialog - frontend::Alert::warning(_("Index sorting failed"), - bformat(_("LyX's automatic index sorting algorithm faced\n" - "problems with the entry '%1$s'.\n" - "Please specify the sorting of this entry manually, as\n" - "explained in the User Guide."), spart)); + } + + odocstringstream subentries; + otexstream otsub(subentries); + getSubentries(otsub, runparams); + if (subentries.str().empty()) { + // Separate the entries and subentries, i.e., split on "!". + // This goes wrong on an escaped "!", but as the escape + // character can be changed in style files, we cannot + // prevent that. + std::vector const levels = + getVectorFromString(latexstr, from_ascii("!"), true); + std::vector const levels_plain = + getVectorFromString(plainstr, from_ascii("!"), true); + + vector::const_iterator it = levels.begin(); + vector::const_iterator end = levels.end(); + vector::const_iterator it2 = levels_plain.begin(); + bool first = true; + for (; it != end; ++it) { + // The separator needs to be put back when + // writing the levels, except for the first level + if (!first) + os << '!'; + else + first = false; + + // Now here comes the reason for this whole procedure: + // We try to correctly sort macros and formatted strings. + // If we find a command, prepend a plain text + // version of the content to get sorting right, + // e.g. \index{LyX@\LyX}, \index{text@\textbf{text}}. + // We do this on all levels. + // We don't do it if the level already contains a '@', though. + // Plaintext might return nothing (e.g. for ERTs). + // In that case, we use LaTeX. + docstring const spart = (levels_plain.empty() || (*it2).empty()) ? *it : *it2; + processLatexSorting(os, runparams, *it, spart); + if (it2 < levels_plain.end()) + ++it2; } - // remove remaining \'s for the sorting part - docstring const ppart = - subst(spart2, from_ascii("\\"), docstring()); - os << ppart; - os << '@'; - } - docstring const tpart = *it; - os << tpart; - if (it2 < levels_plain.end()) - ++it2; - } - // write the bit that followed "|" - if (!cmd.empty()) - os << "|" << cmd; + } else { + processLatexSorting(os, runparams, latexstr, plainstr); + os << subentries.str(); + } + + // At last, re-insert the command, separated by "|" + if (!cmd.empty()) { + os << "|" + << insetindexpagerangetranslator_latex().find(params_.range) + << cmd; + } + } os << '}'; - return i; + + // In macros with moving arguments, such as \section, + // we store the index and output it after the macro (#2154) + if (runparams_in.postpone_fragile_stuff) + runparams_in.post_macro += os.str(); + else + ios << os.release(); } -int InsetIndex::docbook(odocstream & os, OutputParams const & runparams) const +void InsetIndex::processLatexSorting(otexstream & os, OutputParams const & runparams, + docstring const latex, docstring const spart) const { - os << ""; - int const i = InsetText::docbook(os, runparams); - os << ""; - return i; + if (contains(latex, '\\') && !contains(latex, '@')) { + // Now we need to validate that all characters in + // the sorting part are representable in the current + // encoding. If not try the LaTeX macro which might + // or might not be a good choice, and issue a warning. + pair spart_latexed = + runparams.encoding->latexString(spart, runparams.dryrun); + if (!spart_latexed.second.empty()) + LYXERR0("Uncodable character in index entry. Sorting might be wrong!"); + if (spart != spart_latexed.first && !runparams.dryrun) { + TeXErrors terr; + ErrorList & errorList = buffer().errorList("Export"); + docstring const s = bformat(_("LyX's automatic index sorting algorithm faced " + "problems with the entry '%1$s'.\n" + "Please specify the sorting of this entry manually, as " + "explained in the User Guide."), spart); + Paragraph const & par = buffer().paragraphs().front(); + errorList.push_back(ErrorItem(_("Index sorting failed"), s, + {par.id(), 0}, {par.id(), -1})); + buffer().bufferErrors(terr, errorList); + } + // Remove remaining \'s from the sort key + docstring ppart = subst(spart_latexed.first, from_ascii("\\"), docstring()); + // Plain quotes need to be escaped, however (#10649), as this + // is the default escape character + ppart = subst(ppart, from_ascii("\""), from_ascii("\\\"")); + + // Now insert the sortkey, separated by '@'. + os << ppart; + os << '@'; + } + // Insert the actual level text + os << latex; } -docstring InsetIndex::xhtml(XHTMLStream &, OutputParams const &) const +void InsetIndex::docbook(XMLStream & xs, OutputParams const & runparams) const { + // Two ways of processing this inset are implemented: + // - the legacy one, based on parsing the raw LaTeX (before LyX 2.4) -- unlikely to be deprecated + // - the modern one, based on precise insets for indexing features + // Like the LaTeX implementation, consider the user chooses either of those options. + + // Get the content of the inset as LaTeX, as some things may be encoded as ERT (like {}). + // TODO: if there is an ERT within the index term, its conversion should be tried, in case it becomes useful; + // otherwise, ERTs should become comments. For now, they are just copied as-is, which is barely satisfactory. + odocstringstream odss; + otexstream ots(odss); + InsetText::latex(ots, runparams); + docstring latexString = trim(odss.str()); + + // Check whether there are unsupported things. @ is supported, but only for sorting, without specific formatting. + if (latexString.find(from_utf8("@\\")) != lyx::docstring::npos) { + docstring error = from_utf8("Unsupported feature: an index entry contains an @\\. " + "Complete entry: \"") + latexString + from_utf8("\""); + LYXERR0(error); + xs << XMLStream::ESCAPE_NONE << (from_utf8("\n")); + } + + // Handle several indices (indicated in the inset instead of the raw latexString). + docstring indexType = from_utf8(""); + if (buffer().masterBuffer()->params().use_indices) { + indexType += " type=\"" + params_.index + "\""; + } + + // Split the string into its main constituents: terms, and command (see, see also, range). + size_t positionVerticalBar = latexString.find(from_ascii("|")); // What comes before | is (sub)(sub)entries. + docstring indexTerms = latexString.substr(0, positionVerticalBar); + docstring command; + if (positionVerticalBar != lyx::docstring::npos) { + command = latexString.substr(positionVerticalBar + 1); + } + + // Handle sorting issues, with @. + docstring sortAs; + if (hasSortKey()) { + sortAs = getSortkeyAsText(runparams); + // indexTerms may contain a sort key if the user has both the inset and the manual key. + } else { + vector sortingElements = getVectorFromString(indexTerms, from_ascii("@"), false); + if (sortingElements.size() == 2) { + sortAs = sortingElements[0]; + indexTerms = sortingElements[1]; + } + } + + // Handle primary, secondary, and tertiary terms (entries, subentries, and subsubentries, for LaTeX). + vector terms; + if (const vector potential_terms = getSubentriesAsText(runparams); !potential_terms.empty()) { + terms = potential_terms; + // The main term is not present in the vector, as it's not a subentry. The main index term is inserted raw in + // the index inset. Considering that the user either uses the new or the legacy mechanism, the main term is the + // full string within this inset (i.e. without the subinsets). + terms.insert(terms.begin(), latexString); + } else { + terms = getVectorFromString(indexTerms, from_ascii("!"), false); + } + + // Handle ranges. Happily, in the raw LaTeX mode, (| and |) can only be at the end of the string! + bool hasInsetRange = params_.range != InsetIndexParams::PageRange::None; + bool hasStartRange = params_.range == InsetIndexParams::PageRange::Start || + latexString.find(from_ascii("|(")) != lyx::docstring::npos; + bool hasEndRange = params_.range == InsetIndexParams::PageRange::End || + latexString.find(from_ascii("|)")) != lyx::docstring::npos; + + if (hasInsetRange) { + // Remove the ranges from the command if they do not appear at the beginning. + size_t index = 0; + while ((index = command.find(from_utf8("|("), index)) != std::string::npos) + command.erase(index, 1); + index = 0; + while ((index = command.find(from_utf8("|)"), index)) != std::string::npos) + command.erase(index, 1); + + // Remove the ranges when they are the only vertical bar in the complete string. + if (command[0] == '(' || command[0] == ')') + command.erase(0, 1); + } + + // Handle see and seealso. As "see" is a prefix of "seealso", the order of the comparisons is important. + // Both commands are mutually exclusive! + docstring see = getSeeAsText(runparams); + vector seeAlsoes = getSeeAlsoesAsText(runparams); + + if (see.empty() && seeAlsoes.empty() && command.substr(0, 3) == "see") { + // Unescape brackets. + size_t index = 0; + while ((index = command.find(from_utf8("\\{"), index)) != std::string::npos) + command.erase(index, 1); + index = 0; + while ((index = command.find(from_utf8("\\}"), index)) != std::string::npos) + command.erase(index, 1); + + // Retrieve the part between brackets, and remove the complete seealso. + size_t positionOpeningBracket = command.find(from_ascii("{")); + size_t positionClosingBracket = command.find(from_ascii("}")); + docstring list = command.substr(positionOpeningBracket + 1, positionClosingBracket - positionOpeningBracket - 1); + + // Parse the list of referenced entries (or a single one for see). + if (command.substr(0, 7) == "seealso") { + seeAlsoes = getVectorFromString(list, from_ascii(","), false); + } else { + see = list; + + if (see.find(from_ascii(",")) != std::string::npos) { + docstring error = from_utf8("Several index terms found as \"see\"! Only one is acceptable. " + "Complete entry: \"") + latexString + from_utf8("\""); + LYXERR0(error); + xs << XMLStream::ESCAPE_NONE << (from_utf8("\n")); + } + } + + // Remove the complete see/seealso from the commands, in case there is something else to parse. + command = command.substr(positionClosingBracket + 1); + } + + // Some parts of the strings are not parsed, as they do not have anything matching in DocBook: things like + // formatting the entry or the page number, other strings for sorting. https://wiki.lyx.org/Tips/Indexing + // If there are such things in the index entry, then this code may miserably fail. For example, for "Peter|(textbf", + // no range will be detected. + // TODO: Could handle formatting as significance="preferred"? + if (!command.empty()) { + docstring error = from_utf8("Unsupported feature: an index entry contains a | with an unsupported command, ") + + command + from_utf8(". ") + from_utf8("Complete entry: \"") + latexString + from_utf8("\""); + LYXERR0(error); + xs << XMLStream::ESCAPE_NONE << (from_utf8("\n")); + } + + // Write all of this down. + if (terms.empty() && !hasEndRange) { + docstring error = from_utf8("No index term found! Complete entry: \"") + latexString + from_utf8("\""); + LYXERR0(error); + xs << XMLStream::ESCAPE_NONE << (from_utf8("\n")); + } else { + // Generate the attributes for ranges. It is based on the terms that are indexed, but the ID must be unique + // to this indexing area (xml::cleanID does not guarantee this: for each call with the same arguments, + // the same legal ID is produced; here, as the input would be the same, the output must be, by design). + // Hence the thread-local storage, as the numbers must strictly be unique, and thus cannot be shared across + // a paragraph (making the solution used for HTML worthless). This solution is very similar to the one used in + // xml::cleanID. + // indexType can only be used for singular and startofrange types! + docstring attrs; + if (!hasStartRange && !hasEndRange) { + attrs = indexType; + } else { + // Append an ID if uniqueness is not guaranteed across the document. + static QThreadStorage> tKnownTermLists; + static QThreadStorage tID; + + set &knownTermLists = tKnownTermLists.localData(); + int &ID = tID.localData(); + + if (!tID.hasLocalData()) { + tID.localData() = 0; + } + + // Modify the index terms to add the unique ID if needed. + docstring newIndexTerms = indexTerms; + if (knownTermLists.find(indexTerms) != knownTermLists.end()) { + newIndexTerms += from_ascii(string("-") + to_string(ID)); + + // Only increment for the end of range, so that the same number is used for the start of range. + if (hasEndRange) { + ID++; + } + } + + // Term list not yet known: add it to the set AFTER the end of range. After + if (knownTermLists.find(indexTerms) == knownTermLists.end() && hasEndRange) { + knownTermLists.insert(indexTerms); + } + + // Generate the attributes. + docstring id = xml::cleanID(newIndexTerms); + if (hasStartRange) { + attrs = indexType + " class=\"startofrange\" xml:id=\"" + id + "\""; + } else { + attrs = " class=\"endofrange\" startref=\"" + id + "\""; + } + } + + // Handle the index terms (including the specific index for this entry). + if (hasEndRange) { + xs << xml::CompTag("indexterm", attrs); + } else { + xs << xml::StartTag("indexterm", attrs); + if (!terms.empty()) { // hasEndRange has no content. + docstring attr; + if (!sortAs.empty()) { + attr = from_utf8("sortas='") + sortAs + from_utf8("'"); + } + + xs << xml::StartTag("primary", attr); + xs << terms[0]; + xs << xml::EndTag("primary"); + } + if (terms.size() > 1) { + xs << xml::StartTag("secondary"); + xs << terms[1]; + xs << xml::EndTag("secondary"); + } + if (terms.size() > 2) { + xs << xml::StartTag("tertiary"); + xs << terms[2]; + xs << xml::EndTag("tertiary"); + } + + // Handle see and see also. + if (!see.empty()) { + xs << xml::StartTag("see"); + xs << see; + xs << xml::EndTag("see"); + } + + if (!seeAlsoes.empty()) { + for (auto &entry : seeAlsoes) { + xs << xml::StartTag("seealso"); + xs << entry; + xs << xml::EndTag("seealso"); + } + } + + // Close the entry. + xs << xml::EndTag("indexterm"); + } + } +} + + +docstring InsetIndex::xhtml(XMLStream & xs, OutputParams const &) const +{ + // we just print an anchor, taking the paragraph ID from + // our own interior paragraph, which doesn't get printed + std::string const magic = paragraphs().front().magicLabel(); + std::string const attr = "id='" + magic + "'"; + xs << xml::CompTag("a", attr); return docstring(); } @@ -190,16 +559,23 @@ bool InsetIndex::showInsetDialog(BufferView * bv) const void InsetIndex::doDispatch(Cursor & cur, FuncRequest & cmd) { - switch (cmd.action) { + switch (cmd.action()) { case LFUN_INSET_MODIFY: { if (cmd.getArg(0) == "changetype") { + cur.recordUndoInset(this); params_.index = from_utf8(cmd.getArg(1)); break; } InsetIndexParams params; InsetIndex::string2params(to_utf8(cmd.argument()), params); + cur.recordUndoInset(this); params_.index = params.index; + params_.range = params.range; + params_.pagefmt = params.pagefmt; + // what we really want here is a TOC update, but that means + // a full buffer update + cur.forceBufferUpdate(); break; } @@ -208,7 +584,7 @@ void InsetIndex::doDispatch(Cursor & cur, FuncRequest & cmd) break; default: - InsetCollapsable::doDispatch(cur, cmd); + InsetCollapsible::doDispatch(cur, cmd); break; } } @@ -217,7 +593,7 @@ void InsetIndex::doDispatch(Cursor & cur, FuncRequest & cmd) bool InsetIndex::getStatus(Cursor & cur, FuncRequest const & cmd, FuncStatus & flag) const { - switch (cmd.action) { + switch (cmd.action()) { case LFUN_INSET_MODIFY: if (cmd.getArg(0) == "changetype") { @@ -230,39 +606,261 @@ bool InsetIndex::getStatus(Cursor & cur, FuncRequest const & cmd, from_utf8(cmd.getArg(1)) == params_.index); return true; } - flag.setEnabled(true); - return true; + return InsetCollapsible::getStatus(cur, cmd, flag); case LFUN_INSET_DIALOG_UPDATE: { Buffer const & realbuffer = *buffer().masterBuffer(); flag.setEnabled(realbuffer.params().use_indices); return true; } + + case LFUN_INDEXMACRO_INSERT: + return macrosPossible(cmd.getArg(0)); default: - return InsetCollapsable::getStatus(cur, cmd, flag); + return InsetCollapsible::getStatus(cur, cmd, flag); } } -docstring const InsetIndex::buttonLabel(BufferView const & bv) const +void InsetIndex::getSortkey(otexstream & os, OutputParams const & runparams) const { - docstring s = _("Idx"); - if (decoration() == InsetLayout::CLASSIC) - return isOpen(bv) ? s : getNewLabel(s); - else - return getNewLabel(s); + Paragraph const & par = paragraphs().front(); + InsetList::const_iterator it = par.insetList().begin(); + for (; it != par.insetList().end(); ++it) { + Inset & inset = *it->inset; + if (inset.lyxCode() == INDEXMACRO_SORTKEY_CODE) { + InsetIndexMacro const & iim = + static_cast(inset); + iim.getLatex(os, runparams); + return; + } + } +} + + +docstring InsetIndex::getSortkeyAsText(OutputParams const & runparams) const +{ + Paragraph const & par = paragraphs().front(); + InsetList::const_iterator it = par.insetList().begin(); + for (; it != par.insetList().end(); ++it) { + Inset & inset = *it->inset; + if (inset.lyxCode() == INDEXMACRO_SORTKEY_CODE) { + otexstringstream os; + InsetIndexMacro const & iim = + static_cast(inset); + iim.getLatex(os, runparams); + return os.str(); + } + } + return from_ascii(""); +} + + +void InsetIndex::getSubentries(otexstream & os, OutputParams const & runparams) const +{ + Paragraph const & par = paragraphs().front(); + InsetList::const_iterator it = par.insetList().begin(); + int i = 0; + for (; it != par.insetList().end(); ++it) { + Inset & inset = *it->inset; + if (inset.lyxCode() == INDEXMACRO_CODE) { + InsetIndexMacro const & iim = + static_cast(inset); + if (iim.params().type == InsetIndexMacroParams::Subindex) { + ++i; + if (i > 2) + return; + os << "!"; + iim.getLatex(os, runparams); + } + } + } +} + + +std::vector InsetIndex::getSubentriesAsText(OutputParams const & runparams) const +{ + std::vector subentries; + + Paragraph const & par = paragraphs().front(); + InsetList::const_iterator it = par.insetList().begin(); + int i = 0; + for (; it != par.insetList().end(); ++it) { + Inset & inset = *it->inset; + if (inset.lyxCode() == INDEXMACRO_CODE) { + InsetIndexMacro const & iim = + static_cast(inset); + if (iim.params().type == InsetIndexMacroParams::Subindex) { + ++i; + if (i > 2) + break; + + otexstringstream os; + iim.getLatex(os, runparams); + subentries.emplace_back(os.str()); + } + } + } + + return subentries; +} + + +docstring InsetIndex::getMainSubentryAsText(OutputParams const & runparams) const +{ + otexstringstream os; + InsetText::latex(os, runparams); + return os.str(); +} + + +void InsetIndex::getSeeRefs(otexstream & os, OutputParams const & runparams) const +{ + Paragraph const & par = paragraphs().front(); + InsetList::const_iterator it = par.insetList().begin(); + for (; it != par.insetList().end(); ++it) { + Inset & inset = *it->inset; + if (inset.lyxCode() == INDEXMACRO_CODE) { + InsetIndexMacro const & iim = + static_cast(inset); + if (iim.params().type == InsetIndexMacroParams::See + || iim.params().type == InsetIndexMacroParams::Seealso) { + iim.getLatex(os, runparams); + return; + } + } + } +} + + +docstring InsetIndex::getSeeAsText(OutputParams const & runparams) const +{ + Paragraph const & par = paragraphs().front(); + InsetList::const_iterator it = par.insetList().begin(); + for (; it != par.insetList().end(); ++it) { + Inset & inset = *it->inset; + if (inset.lyxCode() == INDEXMACRO_CODE) { + InsetIndexMacro const & iim = + static_cast(inset); + if (iim.params().type == InsetIndexMacroParams::See) { + otexstringstream os; + iim.getLatex(os, runparams); + return os.str(); + } + } + } + return from_ascii(""); +} + + +std::vector InsetIndex::getSeeAlsoesAsText(OutputParams const & runparams) const +{ + std::vector seeAlsoes; + + Paragraph const & par = paragraphs().front(); + InsetList::const_iterator it = par.insetList().begin(); + for (; it != par.insetList().end(); ++it) { + Inset & inset = *it->inset; + if (inset.lyxCode() == INDEXMACRO_CODE) { + InsetIndexMacro const & iim = + static_cast(inset); + if (iim.params().type == InsetIndexMacroParams::Seealso) { + otexstringstream os; + iim.getLatex(os, runparams); + seeAlsoes.emplace_back(os.str()); + } + } + } + + return seeAlsoes; +} + + +namespace { + +bool hasInsetWithCode(const InsetIndex * const inset_index, const InsetCode code, + const std::set types = {}) +{ + Paragraph const & par = inset_index->paragraphs().front(); + InsetList::const_iterator it = par.insetList().begin(); + for (; it != par.insetList().end(); ++it) { + Inset & inset = *it->inset; + if (inset.lyxCode() == code) { + if (types.empty()) + return true; + + LASSERT(code == INDEXMACRO_CODE, return false); + InsetIndexMacro const & iim = + static_cast(inset); + if (types.find(iim.params().type) != types.end()) + return true; + } + } + return false; +} + +} // namespace + + +bool InsetIndex::hasSubentries() const +{ + return hasInsetWithCode(this, INDEXMACRO_CODE, {InsetIndexMacroParams::Subindex}); +} + + +bool InsetIndex::hasSeeRef() const +{ + return hasInsetWithCode(this, INDEXMACRO_CODE, {InsetIndexMacroParams::See, InsetIndexMacroParams::Seealso}); +} + + +bool InsetIndex::hasSortKey() const +{ + return hasInsetWithCode(this, INDEXMACRO_SORTKEY_CODE); +} + + +bool InsetIndex::macrosPossible(string const type) const +{ + if (type != "see" && type != "seealso" + && type != "sortkey" && type != "subindex") + return false; + + Paragraph const & par = paragraphs().front(); + InsetList::const_iterator it = par.insetList().begin(); + int subidxs = 0; + for (; it != par.insetList().end(); ++it) { + Inset & inset = *it->inset; + if (type == "sortkey" && inset.lyxCode() == INDEXMACRO_SORTKEY_CODE) + return false; + if (inset.lyxCode() == INDEXMACRO_CODE) { + InsetIndexMacro const & iim = static_cast(inset); + if ((type == "see" || type == "seealso") + && (iim.params().type == InsetIndexMacroParams::See + || iim.params().type == InsetIndexMacroParams::Seealso)) + return false; + if (type == "subindex" + && iim.params().type == InsetIndexMacroParams::Subindex) { + ++subidxs; + if (subidxs > 1) + return false; + } + } + } + return true; } ColorCode InsetIndex::labelColor() const { if (params_.index.empty() || params_.index == from_ascii("idx")) - return InsetCollapsable::labelColor(); + return InsetCollapsible::labelColor(); // FIXME UNICODE - ColorCode c = lcolor.getFromLyXName(to_utf8(params_.index)); + ColorCode c = lcolor.getFromLyXName(to_utf8(params_.index) + + "@" + buffer().fileName().absFileName()); if (c == Color_none) - c = InsetCollapsable::labelColor(); + c = InsetCollapsible::labelColor(); return c; } @@ -282,26 +880,64 @@ docstring InsetIndex::toolTip(BufferView const &, int, int) const tip += ")"; } tip += ": "; - OutputParams rp(&buffer().params().encoding()); - odocstringstream ods; - InsetText::plaintext(ods, rp); - tip += ods.str(); - return wrapParas(tip); + docstring res = toolTipText(tip); + if (!insetindexpagerangetranslator_loc().find(params_.range).empty()) + res += "\n" + insetindexpagerangetranslator_loc().find(params_.range); + if (!params_.pagefmt.empty() && params_.pagefmt != "default") { + res += "\n" + _("Pagination format:") + " "; + if (params_.pagefmt == "textbf") + res += _("bold"); + else if (params_.pagefmt == "textit") + res += _("italic"); + else if (params_.pagefmt == "emph") + res += _("emphasized"); + else + res += from_utf8(params_.pagefmt); + } + return res; +} + + +docstring const InsetIndex::buttonLabel(BufferView const & bv) const +{ + InsetLayout const & il = getLayout(); + docstring label = translateIfPossible(il.labelstring()); + + if (buffer().params().use_indices && !params_.index.empty()) { + Buffer const & realbuffer = *buffer().masterBuffer(); + IndicesList const & indiceslist = realbuffer.params().indiceslist(); + label += " ("; + Index const * index = indiceslist.findShortcut(params_.index); + if (!index) + label += _("unknown type!"); + else + label += index->index(); + label += ")"; + } + + docstring res; + if (!il.contentaslabel() || geometry(bv) != ButtonOnly) + res = label; + else + res = getNewLabel(label); + if (!insetindexpagerangetranslator_latex().find(params_.range).empty()) + res += " " + from_ascii(insetindexpagerangetranslator_latex().find(params_.range)); + return res; } void InsetIndex::write(ostream & os) const { - os << to_utf8(name()); + os << to_utf8(layoutName()); params_.write(os); - InsetCollapsable::write(os); + InsetCollapsible::write(os); } void InsetIndex::read(Lexer & lex) { params_.read(lex); - InsetCollapsable::read(lex); + InsetCollapsible::read(lex); } @@ -329,14 +965,22 @@ void InsetIndex::string2params(string const & in, InsetIndexParams & params) } -void InsetIndex::addToToc(DocIterator const & cpit) +void InsetIndex::addToToc(DocIterator const & cpit, bool output_active, + UpdateType utype, TocBackend & backend) const { DocIterator pit = cpit; - pit.push_back(CursorSlice(*this)); - docstring const item = text().asString(0, 1, AS_STR_LABEL | AS_STR_INSETS); - buffer().tocBackend().toc("index").push_back(TocItem(pit, 0, item)); + pit.push_back(CursorSlice(const_cast(*this))); + docstring str; + string type = "index"; + if (buffer().masterBuffer()->params().use_indices) + type += ":" + to_utf8(params_.index); + // this is unlikely to be terribly long + text().forOutliner(str, INT_MAX); + TocBuilder & b = backend.builder(type); + b.pushItem(pit, str, output_active); // Proceed with the rest of the inset. - InsetCollapsable::addToToc(cpit); + InsetCollapsible::addToToc(cpit, output_active, utype, backend); + b.pop(); } @@ -346,21 +990,43 @@ void InsetIndex::validate(LaTeXFeatures & features) const && !params_.index.empty() && params_.index != "idx") features.require("splitidx"); + InsetCollapsible::validate(features); } -docstring InsetIndex::contextMenu(BufferView const &, int, int) const +string InsetIndex::contextMenuName() const { - return from_ascii("context-index"); + return "context-index"; +} + + +string InsetIndex::contextMenu(BufferView const & bv, int x, int y) const +{ + // We override the implementation of InsetCollapsible, + // because we have eytra entries. + string owncm = "context-edit-index;"; + return owncm + InsetCollapsible::contextMenu(bv, x, y); } bool InsetIndex::hasSettings() const { - return buffer().masterBuffer()->params().use_indices; + return true; } +bool InsetIndex::insetAllowed(InsetCode code) const +{ + switch (code) { + case INDEXMACRO_CODE: + case INDEXMACRO_SORTKEY_CODE: + return true; + case INDEX_CODE: + return false; + default: + return InsetCollapsible::insetAllowed(code); + } +} ///////////////////////////////////////////////////////////////////// @@ -378,6 +1044,12 @@ void InsetIndexParams::write(ostream & os) const else os << "idx"; os << '\n'; + os << "range " + << insetindexpagerangetranslator().find(range) + << '\n'; + os << "pageformat " + << pagefmt + << '\n'; } @@ -387,6 +1059,16 @@ void InsetIndexParams::read(Lexer & lex) index = lex.getDocString(); else index = from_ascii("idx"); + if (lex.checkFor("range")) { + string st = lex.getString(); + if (lex.eatLine()) { + st = lex.getString(); + range = insetindexpagerangetranslator().find(lex.getString()); + } + } + if (lex.checkFor("pageformat") && lex.eatLine()) { + pagefmt = lex.getString(); + } } @@ -397,7 +1079,7 @@ void InsetIndexParams::read(Lexer & lex) /////////////////////////////////////////////////////////////////////// InsetPrintIndex::InsetPrintIndex(Buffer * buf, InsetCommandParams const & p) - : InsetCommand(buf, p, "index_print") + : InsetCommand(buf, p) {} @@ -405,8 +1087,11 @@ ParamInfo const & InsetPrintIndex::findInfo(string const & /* cmdName */) { static ParamInfo param_info_; if (param_info_.empty()) { - param_info_.add("type", ParamInfo::LATEX_OPTIONAL); - param_info_.add("name", ParamInfo::LATEX_REQUIRED); + param_info_.add("type", ParamInfo::LATEX_OPTIONAL, + ParamInfo::HANDLING_ESCAPE); + param_info_.add("name", ParamInfo::LATEX_OPTIONAL, + ParamInfo::HANDLING_LATEXIFY); + param_info_.add("literal", ParamInfo::LYX_INTERNAL); } return param_info_; } @@ -425,7 +1110,7 @@ docstring InsetPrintIndex::screenLabel() const Index const * index = indiceslist.findShortcut(getParam("type")); if (!index && !printall) return _("Unknown index type!"); - docstring res = printall ? _("All indices") : index->index(); + docstring res = printall ? _("All indexes") : index->index(); if (!multind) res += " (" + _("non-active") + ")"; else if (contains(getCmdName(), "printsubindex")) @@ -443,34 +1128,36 @@ bool InsetPrintIndex::isCompatibleCommand(string const & s) void InsetPrintIndex::doDispatch(Cursor & cur, FuncRequest & cmd) { - switch (cmd.action) { + switch (cmd.action()) { case LFUN_INSET_MODIFY: { if (cmd.argument() == from_ascii("toggle-subindex")) { - string cmd = getCmdName(); - if (contains(cmd, "printindex")) - cmd = subst(cmd, "printindex", "printsubindex"); + string scmd = getCmdName(); + if (contains(scmd, "printindex")) + scmd = subst(scmd, "printindex", "printsubindex"); else - cmd = subst(cmd, "printsubindex", "printindex"); - setCmdName(cmd); + scmd = subst(scmd, "printsubindex", "printindex"); + cur.recordUndo(); + setCmdName(scmd); break; } else if (cmd.argument() == from_ascii("check-printindex*")) { - string cmd = getCmdName(); - if (suffixIs(cmd, '*')) + string scmd = getCmdName(); + if (suffixIs(scmd, '*')) break; - cmd += '*'; + scmd += '*'; + cur.recordUndo(); setParam("type", docstring()); - setCmdName(cmd); + setCmdName(scmd); break; } InsetCommandParams p(INDEX_PRINT_CODE); // FIXME UNICODE - InsetCommand::string2params("index_print", - to_utf8(cmd.argument()), p); + InsetCommand::string2params(to_utf8(cmd.argument()), p); if (p.getCmdName().empty()) { - cur.noUpdate(); + cur.noScreenUpdate(); break; } + cur.recordUndo(); setParams(p); break; } @@ -485,7 +1172,7 @@ void InsetPrintIndex::doDispatch(Cursor & cur, FuncRequest & cmd) bool InsetPrintIndex::getStatus(Cursor & cur, FuncRequest const & cmd, FuncStatus & status) const { - switch (cmd.action) { + switch (cmd.action()) { case LFUN_INSET_MODIFY: { if (cmd.argument() == from_ascii("toggle-subindex")) { @@ -499,8 +1186,7 @@ bool InsetPrintIndex::getStatus(Cursor & cur, FuncRequest const & cmd, } if (cmd.getArg(0) == "index_print" && cmd.getArg(1) == "CommandInset") { InsetCommandParams p(INDEX_PRINT_CODE); - InsetCommand::string2params("index_print", - to_utf8(cmd.argument()), p); + InsetCommand::string2params(to_utf8(cmd.argument()), p); if (suffixIs(p.getCmdName(), '*')) { status.setEnabled(true); status.setOnOff(false); @@ -516,7 +1202,7 @@ bool InsetPrintIndex::getStatus(Cursor & cur, FuncRequest const & cmd, } else return InsetCommand::getStatus(cur, cmd, status); } - + case LFUN_INSET_DIALOG_UPDATE: { status.setEnabled(buffer().masterBuffer()->params().use_indices); return true; @@ -528,15 +1214,24 @@ bool InsetPrintIndex::getStatus(Cursor & cur, FuncRequest const & cmd, } -int InsetPrintIndex::latex(odocstream & os, OutputParams const &) const +void InsetPrintIndex::updateBuffer(ParIterator const &, UpdateType, bool const /*deleted*/) +{ + Index const * index = + buffer().masterParams().indiceslist().findShortcut(getParam("type")); + if (index) + setParam("name", index->index()); +} + + +void InsetPrintIndex::latex(otexstream & os, OutputParams const & runparams_in) const { if (!buffer().masterBuffer()->params().use_indices) { if (getParam("type") == from_ascii("idx")) - os << "\\printindex{}"; - return 0; + os << "\\printindex" << termcmd; + return; } - os << getCommand(); - return 0; + OutputParams runparams = runparams_in; + os << getCommand(runparams); } @@ -545,13 +1240,14 @@ void InsetPrintIndex::validate(LaTeXFeatures & features) const features.require("makeidx"); if (buffer().masterBuffer()->params().use_indices) features.require("splitidx"); + InsetCommand::validate(features); } -docstring InsetPrintIndex::contextMenu(BufferView const &, int, int) const +string InsetPrintIndex::contextMenuName() const { return buffer().masterBuffer()->params().use_indices ? - from_ascii("context-indexprint") : docstring(); + "context-indexprint" : string(); } @@ -560,9 +1256,473 @@ bool InsetPrintIndex::hasSettings() const return buffer().masterBuffer()->params().use_indices; } -docstring InsetPrintIndex::xhtml(odocstream &, OutputParams const &) const + +class IndexEntry { - return docstring(); +public: + /// Builds an entry for the index. + IndexEntry(const InsetIndex * inset, OutputParams const * runparams) : inset_(inset), runparams_(runparams) + { + LASSERT(runparams, return); + + // Convert the inset as text. The resulting text usually only contains an XHTML anchor () and text. + odocstringstream entry; + OutputParams ours = *runparams; + ours.for_toc = false; + inset_->plaintext(entry, ours); + entry_ = entry.str(); + + // Determine in which index this entry belongs to. + if (inset_->buffer().masterBuffer()->params().use_indices) { + index_ = inset_->params_.index; + } + + // Attempt parsing the inset. + if (isModern()) + parseAsModern(); + else + parseAsLegacy(); + } + + /// When parsing this entry, some errors may be found; they are reported as a single string. + // It is up to the caller to send this string to LYXERR and the output file, as needed. + const docstring & output_error() const + { + return output_error_; + } + + void output_error(XMLStream xs) const + { + LYXERR0(output_error()); + xs << XMLStream::ESCAPE_NONE << (from_utf8("\n")); + } + + +private: + bool isModern() + { + std::cout << to_utf8(entry_) << std::endl; + + // If a modern parameter is present, this is definitely a modern index inset. Similarly, if it contains the + // usual LaTeX symbols (!|@), then it is definitely a legacy index inset. Otherwise, if it has features of + // neither, it is both: consider this is a modern inset, to trigger the least complex code. Mixing both types + // is not allowed (i.e. behaviour is undefined). + const bool is_definitely_modern = inset_->hasSortKey() || inset_->hasSeeRef() || inset_->hasSubentries() + || inset_->params_.range != InsetIndexParams::PageRange::None; + const bool is_definitely_legacy = entry_.find('@') != std::string::npos + || entry_.find('|') != std::string::npos || entry_.find('!') != std::string::npos; + + if (is_definitely_legacy && is_definitely_modern) + output_error_ += from_utf8("Mix of index properties and raw LaTeX index commands is unsupported. "); + + // Truth table: + // - is_definitely_modern == true: + // - is_definitely_legacy == true: error (return whatever) + // - is_definitely_legacy == false: return modern + // - is_definitely_modern == false: + // - is_definitely_legacy == true: return legacy + // - is_definitely_legacy == false: return modern + return !is_definitely_legacy; + } + + void parseAsModern() + { + LASSERT(runparams_, return); + + if (inset_->hasSortKey()) { + sort_as_ = inset_->getSortkeyAsText(*runparams_); + } + + terms_ = inset_->getSubentriesAsText(*runparams_); + // The main term is not present in the vector, as it's not a subentry. The main index term is inserted raw in + // the index inset. Considering that the user either uses the new or the legacy mechanism, the main term is the + // full string within this inset (i.e. without the subinsets). + terms_.insert(terms_.begin(), inset_->getMainSubentryAsText(*runparams_)); + + has_start_range_ = inset_->params_.range == InsetIndexParams::PageRange::Start; + has_end_range_ = inset_->params_.range == InsetIndexParams::PageRange::End; + + see_ = inset_->getSeeAsText(*runparams_); + see_alsoes_ = inset_->getSeeAlsoesAsText(*runparams_); + } + + void parseAsLegacy() { + // Determine if some features are known not to be supported. For now, this is only formatting like + // \index{alpha@\textbf{alpha}} or \index{alpha@$\alpha$}. + // @ is supported, but only for sorting, without specific formatting. + if (entry_.find(from_utf8("@\\")) != lyx::docstring::npos) { + output_error_ += from_utf8("Unsupported feature: an index entry contains an @\\. " + "Complete entry: \"") + entry_ + from_utf8("\". "); + } + if (entry_.find(from_utf8("@$")) != lyx::docstring::npos) { + output_error_ += from_utf8("Unsupported feature: an index entry contains an @$. " + "Complete entry: \"") + entry_ + from_utf8("\". "); + } + + // Split the string into its main constituents: terms, and command (see, see also, range). + size_t positionVerticalBar = entry_.find(from_ascii("|")); // What comes before | is (sub)(sub)entries. + docstring indexTerms = entry_.substr(0, positionVerticalBar); + docstring command; + if (positionVerticalBar != lyx::docstring::npos) { + command = entry_.substr(positionVerticalBar + 1); + } + + // Handle sorting issues, with @. + vector sortingElements = getVectorFromString(indexTerms, from_ascii("@"), false); + if (sortingElements.size() == 2) { + sort_as_ = sortingElements[0]; + indexTerms = sortingElements[1]; + } + + // Handle entries, subentries, and subsubentries. + terms_ = getVectorFromString(indexTerms, from_ascii("!"), false); + + // Handle ranges. Happily, (| and |) can only be at the end of the string! + has_start_range_ = entry_.find(from_ascii("|(")) != lyx::docstring::npos; + has_end_range_ = entry_.find(from_ascii("|)")) != lyx::docstring::npos; + + // - Remove the ranges from the command if they do not appear at the beginning. + size_t range_index = 0; + while ((range_index = command.find(from_utf8("|("), range_index)) != std::string::npos) + command.erase(range_index, 1); + range_index = 0; + while ((range_index = command.find(from_utf8("|)"), range_index)) != std::string::npos) + command.erase(range_index, 1); + + // - Remove the ranges when they are the only vertical bar in the complete string. + if (command[0] == '(' || command[0] == ')') + command.erase(0, 1); + + // Handle see and seealso. As "see" is a prefix of "seealso", the order of the comparisons is important. + // Both commands are mutually exclusive! + if (command.substr(0, 3) == "see") { + // Unescape brackets. + size_t index_argument_begin = 0; + while ((index_argument_begin = command.find(from_utf8("\\{"), index_argument_begin)) != std::string::npos) + command.erase(index_argument_begin, 1); + size_t index_argument_end = 0; + while ((index_argument_end = command.find(from_utf8("\\}"), index_argument_end)) != std::string::npos) + command.erase(index_argument_end, 1); + + // Retrieve the part between brackets, and remove the complete seealso. + size_t position_opening_bracket = command.find(from_ascii("{")); + size_t position_closing_bracket = command.find(from_ascii("}")); + docstring argument = command.substr(position_opening_bracket + 1, + position_closing_bracket - position_opening_bracket - 1); + + // Parse the argument of referenced entries (or a single one for see). + if (command.substr(0, 7) == "seealso") { + see_alsoes_ = getVectorFromString(argument, from_ascii(","), false); + } else { + see_ = argument; + + if (see_.find(from_ascii(",")) != std::string::npos) { + output_error_ += from_utf8("Several index_argument_end terms found as \"see\"! Only one is " + "acceptable. Complete entry: \"") + entry_ + from_utf8("\". "); + } + } + + // Remove the complete see/seealso from the commands, in case there is something else to parse. + command = command.substr(position_closing_bracket + 1); + } + + // Some parts of the strings are not parsed, as they do not have anything matching in DocBook or XHTML: + // things like formatting the entry or the page number, other strings for sorting. + // https://wiki.lyx.org/Tips/Indexing + // If there are such things in the index entry, then this code may miserably fail. For example, for + // "Peter|(textbf", no range will be detected. + if (!command.empty()) { + output_error_ += from_utf8("Unsupported feature: an index entry contains a | with an unsupported command, ") + + command + from_utf8(". Complete entry: \"") + entry_ + from_utf8("\". "); + } + } + +public: + int level() const { + return terms_.size(); + } + + const std::vector& terms() const { + return terms_; + } + + std::vector& terms() { + return terms_; + } + + const InsetIndex* inset() const { + return inset_; + } + +private: + // Input inset. These should only be used when parsing the inset (either parseAsModern or parseAsLegacy, called in + // the constructor). + const InsetIndex * inset_; + OutputParams const * runparams_; + docstring entry_; + docstring index_; // Useful when there are multiple indices in the same document. + + // Errors, concatenated as a single string, available as soon as parsing is done, const afterwards (i.e. once + // constructor is done). + docstring output_error_; + + // Parsed index entry. + std::vector terms_; // Up to three entries, in general. + docstring sort_as_; + docstring command_; + bool has_start_range_; + bool has_end_range_; + docstring see_; + vector see_alsoes_; + + // Operators used for sorting entries (alphabetical order). + friend bool operator<(IndexEntry const & lhs, IndexEntry const & rhs); +}; + +bool operator<(IndexEntry const & lhs, IndexEntry const & rhs) +{ + if (lhs.terms_.empty()) + return false; + + for (int i = 0; i < min(rhs.terms_.size(), lhs.terms_.size()); ++i) { + int comp = compare_no_case(lhs.terms_[i], rhs.terms_[i]); + if (comp != 0) + return comp < 0; + } + return false; +} + + +namespace { +std::string generateCssClassAtDepth(unsigned depth) { + std::string css_class = "entry"; + + while (depth > 0) { + depth -= 1; + css_class.insert(0, "sub"); + } + + return css_class; +} + +struct IndexNode { + std::vector entries; + std::vector children; +}; + +docstring termAtLevel(const IndexNode* node, unsigned depth) +{ + // The typical entry has a depth of 1 to 3: the call stack would then be at most 4 (due to the root node). This + // function could be made constant time by copying the term in each node, but that would make data duplication that + // may fall out of sync; the performance benefit would probably be negligible. + if (!node->entries.empty()) { + LASSERT(node->entries.begin()->terms().size() >= depth + 1, return from_ascii("")); + return node->entries.begin()->terms()[depth]; + } + + if (!node->children.empty()) { + return termAtLevel(*node->children.begin(), depth); + } + + LASSERT(false, return from_ascii("")); +} + +void insertIntoNode(const IndexEntry& entry, IndexNode* node, unsigned depth = 0) +{ + // depth == 0 is for the root, not yet the index, hence the increase when going to vector size. + for (IndexNode* child : node->children) { + if (entry.terms()[depth] == termAtLevel(child, depth)) { + if (depth + 1 == entry.terms().size()) { // == child.entries.begin()->terms().size() + // All term entries match: it's an entry. + child->entries.emplace_back(entry); + return; + } else { + insertIntoNode(entry, child, depth + 1); + return; + } + } + } + + // Out of the loop: no matching child found, create a new (possibly nested) child for this entry. Due to the + // possibility of nestedness, only insert the current entry when the right level is reached. This is needed if the + // first entry for a word has several levels that never appeared. + // In particular, this case is called for the first entry. + IndexNode* new_node = node; + do { + new_node->children.emplace_back(new IndexNode{{}, {}}); + new_node = new_node->children.back(); + depth += 1; + } while (depth + 1 <= entry.terms().size()); // depth == 0: root node, no text associated. + new_node->entries.emplace_back(entry); +} + +IndexNode* buildIndexTree(vector& entries) +{ + // Sort the entries, first on the main entry, then the subentry, then the subsubentry, + // thanks to the implementation of operator<. + // If this operation is not performed, the algorithm below is no more correct (and ensuring that it works with + // unsorted entries would make its complexity blow up). + stable_sort(entries.begin(), entries.end()); + + // Cook the index into a nice tree data structure: entries at a given level in the index as a node, with subentries + // as children. + auto* index_root = new IndexNode{{}, {}}; + for (const IndexEntry& entry : entries) { + insertIntoNode(entry, index_root); + } + + return index_root; +} + +void outputIndexPage(XMLStream & xs, const IndexNode* root_node, unsigned depth = 0) +{ + LASSERT(root_node->entries.size() + root_node->children.size() > 0, return); + + xs << xml::StartTag("li", "class='" + generateCssClassAtDepth(depth) + "'"); + xs << xml::CR(); + xs << XMLStream::ESCAPE_NONE << termAtLevel(root_node, depth); + // By tree assumption, all the entries at this node have the same set of terms. + + if (!root_node->entries.empty()) { + xs << XMLStream::ESCAPE_NONE << " — "; + unsigned entry_number = 1; + + for (unsigned i = 0; i < root_node->entries.size(); ++i) { + const IndexEntry &entry = root_node->entries[i]; + + std::string const link_attr = "href='#" + entry.inset()->paragraphs()[0].magicLabel() + "'"; + xs << xml::StartTag("a", link_attr); + xs << from_ascii(std::to_string(entry_number)); + xs << xml::EndTag("a"); + + if (i < root_node->entries.size() - 1) { + xs << ", "; + } + entry_number += 1; + } + } + + if (!root_node->entries.empty() && !root_node->children.empty()) { + xs << xml::CR(); + } + + if (!root_node->children.empty()) { + xs << xml::StartTag("ul", "class='" + generateCssClassAtDepth(depth) + "'"); + xs << xml::CR(); + + for (const IndexNode* child : root_node->children) { + outputIndexPage(xs, child, depth + 1); + } + + xs << xml::EndTag("ul"); + xs << xml::CR(); + } + + xs << xml::EndTag("li"); + xs << xml::CR(); +} + +// Only useful for debugging. +void printTree(const IndexNode* root_node, unsigned depth = 0) +{ + static const std::string pattern = " "; + std::string prefix; + for (unsigned i = 0; i < depth; ++i) { + prefix += pattern; + } + const std::string prefix_long = prefix + pattern + pattern; + + docstring term_at_level; + if (depth == 0) { + // The root has no term. + std::cout << "" << std::endl; + } else { + LASSERT(depth - 1 <= 10, return); // Check for overflows. + term_at_level = termAtLevel(root_node, depth - 1); + std::cout << prefix << to_utf8(term_at_level) << " (x " << std::to_string(root_node->entries.size()) << ")" + << std::endl; + } + + for (const IndexEntry& entry : root_node->entries) { + if (entry.terms().size() != depth) { + std::cout << prefix_long << "ERROR: an entry doesn't have the same number of terms" << std::endl; + } + if (depth > 0 && entry.terms()[depth - 1] != term_at_level) { + std::cout << prefix_long << "ERROR: an entry doesn't have the right term at depth " << std::to_string(depth) + << std::endl; + } + } + + for (const IndexNode* node : root_node->children) { + printTree(node, depth + 1); + } +} +} + + +docstring InsetPrintIndex::xhtml(XMLStream &, OutputParams const & op) const +{ + BufferParams const & bp = buffer().masterBuffer()->params(); + + // we do not presently support multiple indices, so we refuse to print + // anything but the main index, so as not to generate multiple indices. + // NOTE Multiple index support would require some work. The reason + // is that the TOC does not know about multiple indices. Either it would + // need to be told about them (not a bad idea), or else the index entries + // would need to be collected differently, say, during validation. + if (bp.use_indices && getParam("type") != from_ascii("idx")) + return docstring(); + + shared_ptr toc = buffer().tocBackend().toc("index"); + if (toc->empty()) + return docstring(); + + // Collect the index entries in a form we can use them. + vector entries; + for (const TocItem& item : *toc) { + if (item.isOutput()) + entries.emplace_back(IndexEntry{static_cast(&(item.dit().inset())), &op}); + } + + // If all the index entries are in notes or not displayed, get out sooner. + if (entries.empty()) + return docstring(); + + const IndexNode* index_root = buildIndexTree(entries); +#if 0 + printTree(index_root); +#endif + + // Start generating the XHTML index. + Layout const & lay = bp.documentClass().htmlTOCLayout(); + string const & tocclass = lay.defaultCSSClass(); + string const tocattr = "class='index " + tocclass + "'"; + + // we'll use our own stream, because we are going to defer everything. + // that's how we deal with the fact that we're probably inside a standard + // paragraph, and we don't want to be. + odocstringstream ods; + XMLStream xs(ods); + + xs << xml::StartTag("div", tocattr); + xs << xml::CR(); + xs << xml::StartTag(lay.htmltag(), lay.htmlattr()); + xs << translateIfPossible(from_ascii("Index"), op.local_font->language()->lang()); + xs << xml::EndTag(lay.htmltag()); + xs << xml::CR(); + xs << xml::StartTag("ul", "class='main'"); + xs << xml::CR(); + + LASSERT(index_root->entries.empty(), return docstring()); // No index entry should have zero terms. + for (const IndexNode* node : index_root->children) { + outputIndexPage(xs, node); + } + + xs << xml::EndTag("ul"); + xs << xml::CR(); + xs << xml::EndTag("div"); + + return ods.str(); } } // namespace lyx