src/insets/InsetIndex.cpp

   1 /**
   2  * \file InsetIndex.cpp
   3  * This file is part of LyX, the document processor.
   4  * Licence details can be found in the file COPYING.
   5  *
   6  * \author Lars Gullik Bjønnes
   7  * \author Jürgen Spitzmüller
   8  *
   9  * Full author contact details are available in file CREDITS.
  10  */
  11 #include <config.h>
  12
  13 #include "InsetIndex.h"
  14 #include "InsetIndexMacro.h"
  15
  16 #include "Buffer.h"
  17 #include "BufferParams.h"
  18 #include "BufferView.h"
  19 #include "ColorSet.h"
  20 #include "Cursor.h"
  21 #include "DispatchResult.h"
  22 #include "Encoding.h"
  23 #include "ErrorList.h"
  24 #include "FuncRequest.h"
  25 #include "FuncStatus.h"
  26 #include "IndicesList.h"
  27 #include "InsetList.h"
  28 #include "Language.h"
  29 #include "LaTeX.h"
  30 #include "LaTeXFeatures.h"
  31 #include "Lexer.h"
  32 #include "output_latex.h"
  33 #include "output_xhtml.h"
  34 #include "xml.h"
  35 #include "texstream.h"
  36 #include "TextClass.h"
  37 #include "TocBackend.h"
  38
  39 #include "support/debug.h"
  40 #include "support/docstream.h"
  41 #include "support/FileName.h"
  42 #include "support/gettext.h"
  43 #include "support/lstrings.h"
  44 #include "support/Translator.h"
  45
  46 #include "frontends/alert.h"
  47
  48 #include <algorithm>
  49 #include <set>
  50 #include <iostream>
  51
  52 #include <QThreadStorage>
  53
  54 using namespace std;
  55 using namespace lyx::support;
  56
  57 namespace lyx {
  58
  59 namespace {
  60
  61 typedef Translator<string, InsetIndexParams::PageRange> PageRangeTranslator;
  62 typedef Translator<docstring, InsetIndexParams::PageRange> PageRangeTranslatorLoc;
  63
  64 PageRangeTranslator const init_insetindexpagerangetranslator()
  65 {
  66         PageRangeTranslator translator("none", InsetIndexParams::None);
  67         translator.addPair("start", InsetIndexParams::Start);
  68         translator.addPair("end", InsetIndexParams::End);
  69         return translator;
  70 }
  71
  72 PageRangeTranslator const init_insetindexpagerangetranslator_latex()
  73 {
  74         PageRangeTranslator translator("", InsetIndexParams::None);
  75         translator.addPair("(", InsetIndexParams::Start);
  76         translator.addPair(")", InsetIndexParams::End);
  77         return translator;
  78 }
  79
  80
  81 PageRangeTranslatorLoc const init_insetindexpagerangetranslator_loc()
  82 {
  83         PageRangeTranslatorLoc translator(docstring(), InsetIndexParams::None);
  84         translator.addPair(_("Starts page range"), InsetIndexParams::Start);
  85         translator.addPair(_("Ends page range"), InsetIndexParams::End);
  86         return translator;
  87 }
  88
  89
  90 PageRangeTranslator const & insetindexpagerangetranslator()
  91 {
  92         static PageRangeTranslator const prtranslator =
  93                         init_insetindexpagerangetranslator();
  94         return prtranslator;
  95 }
  96
  97
  98 PageRangeTranslatorLoc const & insetindexpagerangetranslator_loc()
  99 {
 100         static PageRangeTranslatorLoc const translator =
 101                         init_insetindexpagerangetranslator_loc();
 102         return translator;
 103 }
 104
 105
 106 PageRangeTranslator const & insetindexpagerangetranslator_latex()
 107 {
 108         static PageRangeTranslator const lttranslator =
 109                         init_insetindexpagerangetranslator_latex();
 110         return lttranslator;
 111 }
 112
 113 } // namespace anon
 114
 115 /////////////////////////////////////////////////////////////////////
 116 //
 117 // InsetIndex
 118 //
 119 ///////////////////////////////////////////////////////////////////////
 120
 121
 122 InsetIndex::InsetIndex(Buffer * buf, InsetIndexParams const & params)
 123         : InsetCollapsible(buf), params_(params)
 124 {}
 125
 126
 127 void InsetIndex::latex(otexstream & ios, OutputParams const & runparams_in) const
 128 {
 129         OutputParams runparams(runparams_in);
 130         runparams.inIndexEntry = true;
 131
 132         otexstringstream os;
 133
 134         if (buffer().masterBuffer()->params().use_indices && !params_.index.empty()
 135                 && params_.index != "idx") {
 136                 os << "\\sindex[";
 137                 os << escape(params_.index);
 138                 os << "]{";
 139         } else {
 140                 os << "\\index";
 141                 os << '{';
 142         }
 143
 144         // Get the LaTeX output from InsetText. We need to deconstruct this later
 145         // in order to check if we need to generate a sorting key
 146         odocstringstream ourlatex;
 147         otexstream ots(ourlatex);
 148         InsetText::latex(ots, runparams);
 149         if (runparams.find_effective()) {
 150                 // No need for special handling, if we are only searching for some patterns
 151                 os << ourlatex.str() << "}";
 152                 return;
 153         }
 154
 155         if (hasSortKey()) {
 156                 getSortkey(os, runparams);
 157                 os << "@";
 158                 os << ourlatex.str();
 159                 getSubentries(os, runparams);
 160                 if (hasSeeRef()) {
 161                         os << "|";
 162                         os << insetindexpagerangetranslator_latex().find(params_.range);
 163                         getSeeRefs(os, runparams);
 164                 }
 165         } else {
 166                 // We check whether we need a sort key.
 167                 // If so, we use the plaintext version
 168                 odocstringstream ourplain;
 169                 InsetText::plaintext(ourplain, runparams);
 170
 171                 // These are the LaTeX and plaintext representations
 172                 docstring latexstr = ourlatex.str();
 173                 docstring plainstr = ourplain.str();
 174
 175                 // This will get what follows | if anything does,
 176                 // the command (e.g., see, textbf) for pagination
 177                 // formatting
 178                 docstring cmd;
 179
 180                 if (hasSeeRef()) {
 181                         odocstringstream seeref;
 182                         otexstream otsee(seeref);
 183                         getSeeRefs(otsee, runparams);
 184                         cmd = seeref.str();
 185                 } else if (!params_.pagefmt.empty() && params_.pagefmt != "default") {
 186                         cmd = from_utf8(params_.pagefmt);
 187                 } else {
 188                         // Check for the | separator to strip the cmd.
 189                         // This goes wrong on an escaped "|", but as the escape
 190                         // character can be changed in style files, we cannot
 191                         // prevent that.
 192                         size_t pos = latexstr.find(from_ascii("|"));
 193                         if (pos != docstring::npos) {
 194                                 // Put the bit after "|" into cmd...
 195                                 cmd = latexstr.substr(pos + 1);
 196                                 // ...and erase that stuff from latexstr
 197                                 latexstr = latexstr.erase(pos);
 198                                 // ...as well as from plainstr
 199                                 size_t ppos = plainstr.find(from_ascii("|"));
 200                                 if (ppos < plainstr.size())
 201                                         plainstr.erase(ppos);
 202                                 else
 203                                         LYXERR0("The `|' separator was not found in the plaintext version!");
 204                         }
 205                 }
 206
 207                 odocstringstream subentries;
 208                 otexstream otsub(subentries);
 209                 getSubentries(otsub, runparams);
 210                 if (subentries.str().empty()) {
 211                         // Separate the entries and subentries, i.e., split on "!".
 212                         // This goes wrong on an escaped "!", but as the escape
 213                         // character can be changed in style files, we cannot
 214                         // prevent that.
 215                         std::vector<docstring> const levels =
 216                                         getVectorFromString(latexstr, from_ascii("!"), true);
 217                         std::vector<docstring> const levels_plain =
 218                                         getVectorFromString(plainstr, from_ascii("!"), true);
 219
 220                         vector<docstring>::const_iterator it = levels.begin();
 221                         vector<docstring>::const_iterator end = levels.end();
 222                         vector<docstring>::const_iterator it2 = levels_plain.begin();
 223                         bool first = true;
 224                         for (; it != end; ++it) {
 225                                 // The separator needs to be put back when
 226                                 // writing the levels, except for the first level
 227                                 if (!first)
 228                                         os << '!';
 229                                 else
 230                                         first = false;
 231
 232                                 // Now here comes the reason for this whole procedure:
 233                                 // We try to correctly sort macros and formatted strings.
 234                                 // If we find a command, prepend a plain text
 235                                 // version of the content to get sorting right,
 236                                 // e.g. \index{LyX@\LyX}, \index{text@\textbf{text}}.
 237                                 // We do this on all levels.
 238                                 // We don't do it if the level already contains a '@', though.
 239                                 // Plaintext might return nothing (e.g. for ERTs).
 240                                 // In that case, we use LaTeX.
 241                                 docstring const spart = (levels_plain.empty() || (*it2).empty()) ? *it : *it2;
 242                                 processLatexSorting(os, runparams, *it, spart);
 243                                 if (it2 < levels_plain.end())
 244                                         ++it2;
 245                         }
 246                 } else {
 247                         processLatexSorting(os, runparams, latexstr, plainstr);
 248                         os << subentries.str();
 249                 }
 250
 251                 // At last, re-insert the command, separated by "|"
 252                 if (!cmd.empty()) {
 253                         os << "|"
 254                            << insetindexpagerangetranslator_latex().find(params_.range)
 255                            << cmd;
 256                 }
 257         }
 258         os << '}';
 259
 260         // In macros with moving arguments, such as \section,
 261         // we store the index and output it after the macro (#2154)
 262         if (runparams_in.postpone_fragile_stuff)
 263                 runparams_in.post_macro += os.str();
 264         else
 265                 ios << os.release();
 266 }
 267
 268
 269 void InsetIndex::processLatexSorting(otexstream & os, OutputParams const & runparams,
 270                                 docstring const latex, docstring const spart) const
 271 {
 272         if (contains(latex, '\\') && !contains(latex, '@')) {
 273                 // Now we need to validate that all characters in
 274                 // the sorting part are representable in the current
 275                 // encoding. If not try the LaTeX macro which might
 276                 // or might not be a good choice, and issue a warning.
 277                 pair<docstring, docstring> spart_latexed =
 278                                 runparams.encoding->latexString(spart, runparams.dryrun);
 279                 if (!spart_latexed.second.empty())
 280                         LYXERR0("Uncodable character in index entry. Sorting might be wrong!");
 281                 if (spart != spart_latexed.first && !runparams.dryrun) {
 282                         TeXErrors terr;
 283                         ErrorList & errorList = buffer().errorList("Export");
 284                         docstring const s = bformat(_("LyX's automatic index sorting algorithm faced "
 285                                                       "problems with the entry '%1$s'.\n"
 286                                                       "Please specify the sorting of this entry manually, as "
 287                                                       "explained in the User Guide."), spart);
 288                         Paragraph const & par = buffer().paragraphs().front();
 289                         errorList.push_back(ErrorItem(_("Index sorting failed"), s,
 290                                                       {par.id(), 0}, {par.id(), -1}));
 291                         buffer().bufferErrors(terr, errorList);
 292                 }
 293                 // Remove remaining \'s from the sort key
 294                 docstring ppart = subst(spart_latexed.first, from_ascii("\\"), docstring());
 295                 // Plain quotes need to be escaped, however (#10649), as this
 296                 // is the default escape character
 297                 ppart = subst(ppart, from_ascii("\""), from_ascii("\\\""));
 298
 299                 // Now insert the sortkey, separated by '@'.
 300                 os << ppart;
 301                 os << '@';
 302         }
 303         // Insert the actual level text
 304         os << latex;
 305 }
 306
 307
 308 void InsetIndex::docbook(XMLStream & xs, OutputParams const & runparams) const
 309 {
 310         // Two ways of processing this inset are implemented:
 311         // - the legacy one, based on parsing the raw LaTeX (before LyX 2.4) -- unlikely to be deprecated
 312         // - the modern one, based on precise insets for indexing features
 313         // Like the LaTeX implementation, consider the user chooses either of those options.
 314
 315         // Get the content of the inset as LaTeX, as some things may be encoded as ERT (like {}).
 316         // TODO: if there is an ERT within the index term, its conversion should be tried, in case it becomes useful;
 317         //  otherwise, ERTs should become comments. For now, they are just copied as-is, which is barely satisfactory.
 318         odocstringstream odss;
 319         otexstream ots(odss);
 320         InsetText::latex(ots, runparams);
 321         docstring latexString = trim(odss.str());
 322
 323         // Handle several indices (indicated in the inset instead of the raw latexString).
 324         docstring indexType = from_utf8("");
 325         if (buffer().masterBuffer()->params().use_indices) {
 326                 indexType += " type=\"" + params_.index + "\"";
 327         }
 328
 329         // Split the string into its main constituents: terms, and command (see, see also, range).
 330         size_t positionVerticalBar = latexString.find(from_ascii("|")); // What comes before | is (sub)(sub)entries.
 331         docstring indexTerms = latexString.substr(0, positionVerticalBar);
 332         docstring command;
 333         if (positionVerticalBar != lyx::docstring::npos) {
 334                 command = latexString.substr(positionVerticalBar + 1);
 335         }
 336
 337         // Handle sorting issues, with @.
 338         docstring sortAs;
 339         if (hasSortKey()) {
 340                 sortAs = getSortkeyAsText(runparams);
 341                 // indexTerms may contain a sort key if the user has both the inset and the manual key.
 342         } else {
 343                 vector<docstring> sortingElements = getVectorFromString(indexTerms, from_ascii("@"), false);
 344                 if (sortingElements.size() == 2) {
 345                         sortAs = sortingElements[0];
 346                         indexTerms = sortingElements[1];
 347                 }
 348         }
 349
 350         // Handle primary, secondary, and tertiary terms (entries, subentries, and subsubentries, for LaTeX).
 351         vector<docstring> terms;
 352         if (const vector<docstring> potential_terms = getSubentriesAsText(runparams); !potential_terms.empty()) {
 353                 terms = potential_terms;
 354                 // The main term is not present in the vector, as it's not a subentry. The main index term is inserted raw in
 355                 // the index inset. Considering that the user either uses the new or the legacy mechanism, the main term is the
 356                 // full string within this inset (i.e. without the subinsets).
 357                 terms.insert(terms.begin(), latexString);
 358         } else {
 359                 terms = getVectorFromString(indexTerms, from_ascii("!"), false);
 360         }
 361
 362         // Handle ranges. Happily, in the raw LaTeX mode, (| and |) can only be at the end of the string!
 363         const bool hasInsetRange = params_.range != InsetIndexParams::PageRange::None;
 364         const bool hasStartRange = params_.range == InsetIndexParams::PageRange::Start ||
 365                         latexString.find(from_ascii("|(")) != lyx::docstring::npos;
 366         const bool hasEndRange = params_.range == InsetIndexParams::PageRange::End ||
 367                         latexString.find(from_ascii("|)")) != lyx::docstring::npos;
 368
 369         if (hasInsetRange) {
 370                 // Remove the ranges from the command if they do not appear at the beginning.
 371                 size_t index = 0;
 372                 while ((index = command.find(from_utf8("|("), index)) != std::string::npos)
 373                         command.erase(index, 1);
 374                 index = 0;
 375                 while ((index = command.find(from_utf8("|)"), index)) != std::string::npos)
 376                         command.erase(index, 1);
 377
 378                 // Remove the ranges when they are the only vertical bar in the complete string.
 379                 if (command[0] == '(' || command[0] == ')')
 380                         command.erase(0, 1);
 381         }
 382
 383         // Handle see and seealso. As "see" is a prefix of "seealso", the order of the comparisons is important.
 384         // Both commands are mutually exclusive!
 385         docstring see = getSeeAsText(runparams);
 386         vector<docstring> seeAlsoes = getSeeAlsoesAsText(runparams);
 387
 388         if (see.empty() && seeAlsoes.empty() && command.substr(0, 3) == "see") {
 389                 // Unescape brackets.
 390                 size_t index = 0;
 391                 while ((index = command.find(from_utf8("\\{"), index)) != std::string::npos)
 392                         command.erase(index, 1);
 393                 index = 0;
 394                 while ((index = command.find(from_utf8("\\}"), index)) != std::string::npos)
 395                         command.erase(index, 1);
 396
 397                 // Retrieve the part between brackets, and remove the complete seealso.
 398                 size_t positionOpeningBracket = command.find(from_ascii("{"));
 399                 size_t positionClosingBracket = command.find(from_ascii("}"));
 400                 docstring list = command.substr(positionOpeningBracket + 1, positionClosingBracket - positionOpeningBracket - 1);
 401
 402                 // Parse the list of referenced entries (or a single one for see).
 403                 if (command.substr(0, 7) == "seealso") {
 404                         seeAlsoes = getVectorFromString(list, from_ascii(","), false);
 405                 } else {
 406                         see = list;
 407
 408                         if (see.find(from_ascii(",")) != std::string::npos) {
 409                                 docstring error = from_utf8("Several index terms found as \"see\"! Only one is acceptable. "
 410                                                                                         "Complete entry: \"") + latexString + from_utf8("\"");
 411                                 LYXERR0(error);
 412                                 xs << XMLStream::ESCAPE_NONE << (from_utf8("<!-- Output Error: ") + error + from_utf8(" -->\n"));
 413                         }
 414                 }
 415
 416                 // Remove the complete see/seealso from the commands, in case there is something else to parse.
 417                 command = command.substr(positionClosingBracket + 1);
 418         }
 419
 420         // Some parts of the strings are not parsed, as they do not have anything matching in DocBook: things like
 421         // formatting the entry or the page number, other strings for sorting. https://wiki.lyx.org/Tips/Indexing
 422         // If there are such things in the index entry, then this code may miserably fail. For example, for "Peter|(textbf",
 423         // no range will be detected.
 424         // TODO: Could handle formatting as significance="preferred"?
 425         if (!command.empty()) {
 426                 docstring error = from_utf8("Unsupported feature: an index entry contains a | with an unsupported command, ")
 427                                           + command + from_utf8(". ") + from_utf8("Complete entry: \"") + latexString + from_utf8("\"");
 428                 LYXERR0(error);
 429                 xs << XMLStream::ESCAPE_NONE << (from_utf8("<!-- Output Error: ") + error + from_utf8(" -->\n"));
 430         }
 431
 432     // Write all of this down.
 433         if (terms.empty() && !hasEndRange) {
 434                 docstring error = from_utf8("No index term found! Complete entry: \"") + latexString + from_utf8("\"");
 435                 LYXERR0(error);
 436                 xs << XMLStream::ESCAPE_NONE << (from_utf8("<!-- Output Error: ") + error + from_utf8(" -->\n"));
 437         } else {
 438                 // Generate the attributes for ranges. It is based on the terms that are indexed, but the ID must be unique
 439                 // to this indexing area (xml::cleanID does not guarantee this: for each call with the same arguments,
 440                 // the same legal ID is produced; here, as the input would be the same, the output must be, by design).
 441                 // Hence the thread-local storage, as the numbers must strictly be unique, and thus cannot be shared across
 442                 // a paragraph (making the solution used for HTML worthless). This solution is very similar to the one used in
 443                 // xml::cleanID.
 444                 // indexType can only be used for singular and startofrange types!
 445                 docstring attrs;
 446                 if (!hasStartRange && !hasEndRange) {
 447                         attrs = indexType;
 448                 } else {
 449                         // Append an ID if uniqueness is not guaranteed across the document.
 450                         static QThreadStorage<set<docstring>> tKnownTermLists;
 451                         static QThreadStorage<int> tID;
 452
 453                         set<docstring> &knownTermLists = tKnownTermLists.localData();
 454                         int &ID = tID.localData();
 455
 456                         if (!tID.hasLocalData()) {
 457                                 tID.localData() = 0;
 458                         }
 459
 460                         // Modify the index terms to add the unique ID if needed.
 461                         docstring newIndexTerms = indexTerms;
 462                         if (knownTermLists.find(indexTerms) != knownTermLists.end()) {
 463                                 newIndexTerms += from_ascii(string("-") + to_string(ID));
 464
 465                                 // Only increment for the end of range, so that the same number is used for the start of range.
 466                                 if (hasEndRange) {
 467                                         ID++;
 468                                 }
 469                         }
 470
 471                         // Term list not yet known: add it to the set AFTER the end of range. After
 472                         if (knownTermLists.find(indexTerms) == knownTermLists.end() && hasEndRange) {
 473                                 knownTermLists.insert(indexTerms);
 474                         }
 475
 476                         // Generate the attributes.
 477                         docstring id = xml::cleanID(newIndexTerms);
 478                         if (hasStartRange) {
 479                                 attrs = indexType + " class=\"startofrange\" xml:id=\"" + id + "\"";
 480                         } else {
 481                                 attrs = " class=\"endofrange\" startref=\"" + id + "\"";
 482                         }
 483                 }
 484
 485                 // Handle the index terms (including the specific index for this entry).
 486                 if (hasEndRange) {
 487                         xs << xml::CompTag("indexterm", attrs);
 488                 } else {
 489                         xs << xml::StartTag("indexterm", attrs);
 490                         if (!terms.empty()) { // hasEndRange has no content.
 491                                 docstring attr;
 492                                 if (!sortAs.empty()) {
 493                                         attr = from_utf8("sortas='") + sortAs + from_utf8("'");
 494                                 }
 495
 496                                 xs << xml::StartTag("primary", attr);
 497                                 xs << terms[0];
 498                                 xs << xml::EndTag("primary");
 499                         }
 500                         if (terms.size() > 1) {
 501                                 xs << xml::StartTag("secondary");
 502                                 xs << terms[1];
 503                                 xs << xml::EndTag("secondary");
 504                         }
 505                         if (terms.size() > 2) {
 506                                 xs << xml::StartTag("tertiary");
 507                                 xs << terms[2];
 508                                 xs << xml::EndTag("tertiary");
 509                         }
 510
 511                         // Handle see and see also.
 512                         if (!see.empty()) {
 513                                 xs << xml::StartTag("see");
 514                                 xs << see;
 515                                 xs << xml::EndTag("see");
 516                         }
 517
 518                         if (!seeAlsoes.empty()) {
 519                                 for (auto &entry : seeAlsoes) {
 520                                         xs << xml::StartTag("seealso");
 521                                         xs << entry;
 522                                         xs << xml::EndTag("seealso");
 523                                 }
 524                         }
 525
 526                         // Close the entry.
 527                         xs << xml::EndTag("indexterm");
 528                 }
 529         }
 530 }
 531
 532
 533 docstring InsetIndex::xhtml(XMLStream & xs, OutputParams const &) const
 534 {
 535         // we just print an anchor, taking the paragraph ID from
 536         // our own interior paragraph, which doesn't get printed
 537         std::string const magic = paragraphs().front().magicLabel();
 538         std::string const attr = "id='" + magic + "'";
 539         xs << xml::CompTag("a", attr);
 540         return docstring();
 541 }
 542
 543
 544 bool InsetIndex::showInsetDialog(BufferView * bv) const
 545 {
 546         bv->showDialog("index", params2string(params_),
 547                         const_cast<InsetIndex *>(this));
 548         return true;
 549 }
 550
 551
 552 void InsetIndex::doDispatch(Cursor & cur, FuncRequest & cmd)
 553 {
 554         switch (cmd.action()) {
 555
 556         case LFUN_INSET_MODIFY: {
 557                 if (cmd.getArg(0) == "changetype") {
 558                         cur.recordUndoInset(this);
 559                         params_.index = from_utf8(cmd.getArg(1));
 560                         break;
 561                 }
 562                 InsetIndexParams params;
 563                 InsetIndex::string2params(to_utf8(cmd.argument()), params);
 564                 cur.recordUndoInset(this);
 565                 params_.index = params.index;
 566                 params_.range = params.range;
 567                 params_.pagefmt = params.pagefmt;
 568                 // what we really want here is a TOC update, but that means
 569                 // a full buffer update
 570                 cur.forceBufferUpdate();
 571                 break;
 572         }
 573
 574         case LFUN_INSET_DIALOG_UPDATE:
 575                 cur.bv().updateDialog("index", params2string(params_));
 576                 break;
 577
 578         default:
 579                 InsetCollapsible::doDispatch(cur, cmd);
 580                 break;
 581         }
 582 }
 583
 584
 585 bool InsetIndex::getStatus(Cursor & cur, FuncRequest const & cmd,
 586                 FuncStatus & flag) const
 587 {
 588         switch (cmd.action()) {
 589
 590         case LFUN_INSET_MODIFY:
 591                 if (cmd.getArg(0) == "changetype") {
 592                         docstring const newtype = from_utf8(cmd.getArg(1));
 593                         Buffer const & realbuffer = *buffer().masterBuffer();
 594                         IndicesList const & indiceslist = realbuffer.params().indiceslist();
 595                         Index const * index = indiceslist.findShortcut(newtype);
 596                         flag.setEnabled(index != 0);
 597                         flag.setOnOff(
 598                                 from_utf8(cmd.getArg(1)) == params_.index);
 599                         return true;
 600                 }
 601                 return InsetCollapsible::getStatus(cur, cmd, flag);
 602
 603         case LFUN_INSET_DIALOG_UPDATE: {
 604                 Buffer const & realbuffer = *buffer().masterBuffer();
 605                 flag.setEnabled(realbuffer.params().use_indices);
 606                 return true;
 607         }
 608
 609         case LFUN_INDEXMACRO_INSERT:
 610                 return macrosPossible(cmd.getArg(0));
 611
 612         default:
 613                 return InsetCollapsible::getStatus(cur, cmd, flag);
 614         }
 615 }
 616
 617
 618 void InsetIndex::getSortkey(otexstream & os, OutputParams const & runparams) const
 619 {
 620         Paragraph const & par = paragraphs().front();
 621         InsetList::const_iterator it = par.insetList().begin();
 622         for (; it != par.insetList().end(); ++it) {
 623                 Inset & inset = *it->inset;
 624                 if (inset.lyxCode() == INDEXMACRO_SORTKEY_CODE) {
 625                         InsetIndexMacro const & iim =
 626                                 static_cast<InsetIndexMacro const &>(inset);
 627                         iim.getLatex(os, runparams);
 628                         return;
 629                 }
 630         }
 631 }
 632
 633
 634 docstring InsetIndex::getSortkeyAsText(OutputParams const & runparams) const
 635 {
 636         Paragraph const & par = paragraphs().front();
 637         InsetList::const_iterator it = par.insetList().begin();
 638         for (; it != par.insetList().end(); ++it) {
 639                 Inset & inset = *it->inset;
 640                 if (inset.lyxCode() == INDEXMACRO_SORTKEY_CODE) {
 641                         otexstringstream os;
 642                         InsetIndexMacro const & iim =
 643                                 static_cast<InsetIndexMacro const &>(inset);
 644                         iim.getLatex(os, runparams);
 645                         return os.str();
 646                 }
 647         }
 648         return from_ascii("");
 649 }
 650
 651
 652 void InsetIndex::getSubentries(otexstream & os, OutputParams const & runparams) const
 653 {
 654         Paragraph const & par = paragraphs().front();
 655         InsetList::const_iterator it = par.insetList().begin();
 656         int i = 0;
 657         for (; it != par.insetList().end(); ++it) {
 658                 Inset & inset = *it->inset;
 659                 if (inset.lyxCode() == INDEXMACRO_CODE) {
 660                         InsetIndexMacro const & iim =
 661                                 static_cast<InsetIndexMacro const &>(inset);
 662                         if (iim.params().type == InsetIndexMacroParams::Subindex) {
 663                                 ++i;
 664                                 if (i > 2)
 665                                         return;
 666                                 os << "!";
 667                                 iim.getLatex(os, runparams);
 668                         }
 669                 }
 670         }
 671 }
 672
 673
 674 std::vector<docstring> InsetIndex::getSubentriesAsText(OutputParams const & runparams) const
 675 {
 676         std::vector<docstring> subentries;
 677
 678         Paragraph const & par = paragraphs().front();
 679         InsetList::const_iterator it = par.insetList().begin();
 680         int i = 0;
 681         for (; it != par.insetList().end(); ++it) {
 682                 Inset & inset = *it->inset;
 683                 if (inset.lyxCode() == INDEXMACRO_CODE) {
 684                         InsetIndexMacro const & iim =
 685                                 static_cast<InsetIndexMacro const &>(inset);
 686                         if (iim.params().type == InsetIndexMacroParams::Subindex) {
 687                                 ++i;
 688                                 if (i > 2)
 689                                         break;
 690
 691                                 otexstringstream os;
 692                                 iim.getLatex(os, runparams);
 693                                 subentries.emplace_back(os.str());
 694                         }
 695                 }
 696         }
 697
 698         return subentries;
 699 }
 700
 701
 702 docstring InsetIndex::getMainSubentryAsText(OutputParams const & runparams) const
 703 {
 704         otexstringstream os;
 705         InsetText::latex(os, runparams);
 706         return os.str();
 707 }
 708
 709
 710 void InsetIndex::getSeeRefs(otexstream & os, OutputParams const & runparams) const
 711 {
 712         Paragraph const & par = paragraphs().front();
 713         InsetList::const_iterator it = par.insetList().begin();
 714         for (; it != par.insetList().end(); ++it) {
 715                 Inset & inset = *it->inset;
 716                 if (inset.lyxCode() == INDEXMACRO_CODE) {
 717                         InsetIndexMacro const & iim =
 718                                 static_cast<InsetIndexMacro const &>(inset);
 719                         if (iim.params().type == InsetIndexMacroParams::See
 720                             || iim.params().type == InsetIndexMacroParams::Seealso) {
 721                                 iim.getLatex(os, runparams);
 722                                 return;
 723                         }
 724                 }
 725         }
 726 }
 727
 728
 729 docstring InsetIndex::getSeeAsText(OutputParams const & runparams) const
 730 {
 731         Paragraph const & par = paragraphs().front();
 732         InsetList::const_iterator it = par.insetList().begin();
 733         for (; it != par.insetList().end(); ++it) {
 734                 Inset & inset = *it->inset;
 735                 if (inset.lyxCode() == INDEXMACRO_CODE) {
 736                         InsetIndexMacro const & iim =
 737                                 static_cast<InsetIndexMacro const &>(inset);
 738                         if (iim.params().type == InsetIndexMacroParams::See) {
 739                                 otexstringstream os;
 740                                 iim.getLatex(os, runparams);
 741                                 return os.str();
 742                         }
 743                 }
 744         }
 745         return from_ascii("");
 746 }
 747
 748
 749 std::vector<docstring> InsetIndex::getSeeAlsoesAsText(OutputParams const & runparams) const
 750 {
 751         std::vector<docstring> seeAlsoes;
 752
 753         Paragraph const & par = paragraphs().front();
 754         InsetList::const_iterator it = par.insetList().begin();
 755         for (; it != par.insetList().end(); ++it) {
 756                 Inset & inset = *it->inset;
 757                 if (inset.lyxCode() == INDEXMACRO_CODE) {
 758                         InsetIndexMacro const & iim =
 759                                 static_cast<InsetIndexMacro const &>(inset);
 760                         if (iim.params().type == InsetIndexMacroParams::Seealso) {
 761                                 otexstringstream os;
 762                                 iim.getLatex(os, runparams);
 763                                 seeAlsoes.emplace_back(os.str());
 764                         }
 765                 }
 766         }
 767
 768         return seeAlsoes;
 769 }
 770
 771
 772 namespace {
 773
 774 bool hasInsetWithCode(const InsetIndex * const inset_index, const InsetCode code,
 775                                           const std::set<InsetIndexMacroParams::Type> types = {})
 776 {
 777         Paragraph const & par = inset_index->paragraphs().front();
 778         InsetList::const_iterator it = par.insetList().begin();
 779         for (; it != par.insetList().end(); ++it) {
 780                 Inset & inset = *it->inset;
 781                 if (inset.lyxCode() == code) {
 782                         if (types.empty())
 783                                 return true;
 784
 785                         LASSERT(code == INDEXMACRO_CODE, return false);
 786                         InsetIndexMacro const & iim =
 787                                         static_cast<InsetIndexMacro const &>(inset);
 788                         if (types.find(iim.params().type) != types.end())
 789                                 return true;
 790                 }
 791         }
 792         return false;
 793 }
 794
 795 } // namespace
 796
 797
 798 bool InsetIndex::hasSubentries() const
 799 {
 800         return hasInsetWithCode(this, INDEXMACRO_CODE, {InsetIndexMacroParams::Subindex});
 801 }
 802
 803
 804 bool InsetIndex::hasSeeRef() const
 805 {
 806         return hasInsetWithCode(this, INDEXMACRO_CODE, {InsetIndexMacroParams::See, InsetIndexMacroParams::Seealso});
 807 }
 808
 809
 810 bool InsetIndex::hasSortKey() const
 811 {
 812         return hasInsetWithCode(this, INDEXMACRO_SORTKEY_CODE);
 813 }
 814
 815
 816 bool InsetIndex::macrosPossible(string const type) const
 817 {
 818         if (type != "see" && type != "seealso"
 819             && type != "sortkey" && type != "subindex")
 820                 return false;
 821
 822         Paragraph const & par = paragraphs().front();
 823         InsetList::const_iterator it = par.insetList().begin();
 824         int subidxs = 0;
 825         for (; it != par.insetList().end(); ++it) {
 826                 Inset & inset = *it->inset;
 827                 if (type == "sortkey" && inset.lyxCode() == INDEXMACRO_SORTKEY_CODE)
 828                         return false;
 829                 if (inset.lyxCode() == INDEXMACRO_CODE) {
 830                         InsetIndexMacro const & iim = static_cast<InsetIndexMacro const &>(inset);
 831                         if ((type == "see" || type == "seealso")
 832                              && (iim.params().type == InsetIndexMacroParams::See
 833                                  || iim.params().type == InsetIndexMacroParams::Seealso))
 834                                 return false;
 835                         if (type == "subindex"
 836                              && iim.params().type == InsetIndexMacroParams::Subindex) {
 837                                 ++subidxs;
 838                                 if (subidxs > 1)
 839                                         return false;
 840                         }
 841                 }
 842         }
 843         return true;
 844 }
 845
 846
 847 ColorCode InsetIndex::labelColor() const
 848 {
 849         if (params_.index.empty() || params_.index == from_ascii("idx"))
 850                 return InsetCollapsible::labelColor();
 851         // FIXME UNICODE
 852         ColorCode c = lcolor.getFromLyXName(to_utf8(params_.index)
 853                                             + "@" + buffer().fileName().absFileName());
 854         if (c == Color_none)
 855                 c = InsetCollapsible::labelColor();
 856         return c;
 857 }
 858
 859
 860 docstring InsetIndex::toolTip(BufferView const &, int, int) const
 861 {
 862         docstring tip = _("Index Entry");
 863         if (buffer().params().use_indices && !params_.index.empty()) {
 864                 Buffer const & realbuffer = *buffer().masterBuffer();
 865                 IndicesList const & indiceslist = realbuffer.params().indiceslist();
 866                 tip += " (";
 867                 Index const * index = indiceslist.findShortcut(params_.index);
 868                 if (!index)
 869                         tip += _("unknown type!");
 870                 else
 871                         tip += index->index();
 872                 tip += ")";
 873         }
 874         tip += ": ";
 875         docstring res = toolTipText(tip);
 876         if (!insetindexpagerangetranslator_loc().find(params_.range).empty())
 877                 res += "\n" + insetindexpagerangetranslator_loc().find(params_.range);
 878         if (!params_.pagefmt.empty() && params_.pagefmt != "default") {
 879                 res += "\n" + _("Pagination format:") + " ";
 880                 if (params_.pagefmt == "textbf")
 881                         res += _("bold");
 882                 else if (params_.pagefmt == "textit")
 883                         res += _("italic");
 884                 else if (params_.pagefmt == "emph")
 885                         res += _("emphasized");
 886                 else
 887                         res += from_utf8(params_.pagefmt);
 888         }
 889         return res;
 890 }
 891
 892
 893 docstring const InsetIndex::buttonLabel(BufferView const & bv) const
 894 {
 895         InsetLayout const & il = getLayout();
 896         docstring label = translateIfPossible(il.labelstring());
 897
 898         if (buffer().params().use_indices && !params_.index.empty()) {
 899                 Buffer const & realbuffer = *buffer().masterBuffer();
 900                 IndicesList const & indiceslist = realbuffer.params().indiceslist();
 901                 label += " (";
 902                 Index const * index = indiceslist.findShortcut(params_.index);
 903                 if (!index)
 904                         label += _("unknown type!");
 905                 else
 906                         label += index->index();
 907                 label += ")";
 908         }
 909
 910         docstring res;
 911         if (!il.contentaslabel() || geometry(bv) != ButtonOnly)
 912                 res = label;
 913         else
 914                 res = getNewLabel(label);
 915         if (!insetindexpagerangetranslator_latex().find(params_.range).empty())
 916                 res += " " + from_ascii(insetindexpagerangetranslator_latex().find(params_.range));
 917         return res;
 918 }
 919
 920
 921 void InsetIndex::write(ostream & os) const
 922 {
 923         os << to_utf8(layoutName());
 924         params_.write(os);
 925         InsetCollapsible::write(os);
 926 }
 927
 928
 929 void InsetIndex::read(Lexer & lex)
 930 {
 931         params_.read(lex);
 932         InsetCollapsible::read(lex);
 933 }
 934
 935
 936 string InsetIndex::params2string(InsetIndexParams const & params)
 937 {
 938         ostringstream data;
 939         data << "index";
 940         params.write(data);
 941         return data.str();
 942 }
 943
 944
 945 void InsetIndex::string2params(string const & in, InsetIndexParams & params)
 946 {
 947         params = InsetIndexParams();
 948         if (in.empty())
 949                 return;
 950
 951         istringstream data(in);
 952         Lexer lex;
 953         lex.setStream(data);
 954         lex.setContext("InsetIndex::string2params");
 955         lex >> "index";
 956         params.read(lex);
 957 }
 958
 959
 960 void InsetIndex::addToToc(DocIterator const & cpit, bool output_active,
 961                                                   UpdateType utype, TocBackend & backend) const
 962 {
 963         DocIterator pit = cpit;
 964         pit.push_back(CursorSlice(const_cast<InsetIndex &>(*this)));
 965         docstring str;
 966         string type = "index";
 967         if (buffer().masterBuffer()->params().use_indices)
 968                 type += ":" + to_utf8(params_.index);
 969         // this is unlikely to be terribly long
 970         text().forOutliner(str, INT_MAX);
 971         TocBuilder & b = backend.builder(type);
 972         b.pushItem(pit, str, output_active);
 973         // Proceed with the rest of the inset.
 974         InsetCollapsible::addToToc(cpit, output_active, utype, backend);
 975         b.pop();
 976 }
 977
 978
 979 void InsetIndex::validate(LaTeXFeatures & features) const
 980 {
 981         if (buffer().masterBuffer()->params().use_indices
 982             && !params_.index.empty()
 983             && params_.index != "idx")
 984                 features.require("splitidx");
 985         InsetCollapsible::validate(features);
 986 }
 987
 988
 989 string InsetIndex::contextMenuName() const
 990 {
 991         return "context-index";
 992 }
 993
 994
 995 string InsetIndex::contextMenu(BufferView const & bv, int x, int y) const
 996 {
 997         // We override the implementation of InsetCollapsible,
 998         // because we have eytra entries.
 999         string owncm = "context-edit-index;";
1000         return owncm + InsetCollapsible::contextMenu(bv, x, y);
1001 }
1002
1003
1004 bool InsetIndex::hasSettings() const
1005 {
1006         return true;
1007 }
1008
1009
1010 bool InsetIndex::insetAllowed(InsetCode code) const
1011 {
1012         switch (code) {
1013         case INDEXMACRO_CODE:
1014         case INDEXMACRO_SORTKEY_CODE:
1015                 return true;
1016         case INDEX_CODE:
1017                 return false;
1018         default:
1019                 return InsetCollapsible::insetAllowed(code);
1020         }
1021 }
1022
1023
1024 /////////////////////////////////////////////////////////////////////
1025 //
1026 // InsetIndexParams
1027 //
1028 ///////////////////////////////////////////////////////////////////////
1029
1030
1031 void InsetIndexParams::write(ostream & os) const
1032 {
1033         os << ' ';
1034         if (!index.empty())
1035                 os << to_utf8(index);
1036         else
1037                 os << "idx";
1038         os << '\n';
1039         os << "range "
1040            << insetindexpagerangetranslator().find(range)
1041            << '\n';
1042         os << "pageformat "
1043            << pagefmt
1044            << '\n';
1045 }
1046
1047
1048 void InsetIndexParams::read(Lexer & lex)
1049 {
1050         if (lex.eatLine())
1051                 index = lex.getDocString();
1052         else
1053                 index = from_ascii("idx");
1054         if (lex.checkFor("range")) {
1055                 string st = lex.getString();
1056                 if (lex.eatLine()) {
1057                         st = lex.getString();
1058                         range = insetindexpagerangetranslator().find(lex.getString());
1059                 }
1060         }
1061         if (lex.checkFor("pageformat") && lex.eatLine()) {
1062                 pagefmt = lex.getString();
1063         }
1064 }
1065
1066
1067 /////////////////////////////////////////////////////////////////////
1068 //
1069 // InsetPrintIndex
1070 //
1071 ///////////////////////////////////////////////////////////////////////
1072
1073 InsetPrintIndex::InsetPrintIndex(Buffer * buf, InsetCommandParams const & p)
1074         : InsetCommand(buf, p)
1075 {}
1076
1077
1078 ParamInfo const & InsetPrintIndex::findInfo(string const & /* cmdName */)
1079 {
1080         static ParamInfo param_info_;
1081         if (param_info_.empty()) {
1082                 param_info_.add("type", ParamInfo::LATEX_OPTIONAL,
1083                                 ParamInfo::HANDLING_ESCAPE);
1084                 param_info_.add("name", ParamInfo::LATEX_OPTIONAL,
1085                                 ParamInfo::HANDLING_LATEXIFY);
1086                 param_info_.add("literal", ParamInfo::LYX_INTERNAL);
1087         }
1088         return param_info_;
1089 }
1090
1091
1092 docstring InsetPrintIndex::screenLabel() const
1093 {
1094         bool const printall = suffixIs(getCmdName(), '*');
1095         bool const multind = buffer().masterBuffer()->params().use_indices;
1096         if ((!multind
1097              && getParam("type") == from_ascii("idx"))
1098             || (getParam("type").empty() && !printall))
1099                 return _("Index");
1100         Buffer const & realbuffer = *buffer().masterBuffer();
1101         IndicesList const & indiceslist = realbuffer.params().indiceslist();
1102         Index const * index = indiceslist.findShortcut(getParam("type"));
1103         if (!index && !printall)
1104                 return _("Unknown index type!");
1105         docstring res = printall ? _("All indexes") : index->index();
1106         if (!multind)
1107                 res += " (" + _("non-active") + ")";
1108         else if (contains(getCmdName(), "printsubindex"))
1109                 res += " (" + _("subindex") + ")";
1110         return res;
1111 }
1112
1113
1114 bool InsetPrintIndex::isCompatibleCommand(string const & s)
1115 {
1116         return s == "printindex" || s == "printsubindex"
1117                 || s == "printindex*" || s == "printsubindex*";
1118 }
1119
1120
1121 void InsetPrintIndex::doDispatch(Cursor & cur, FuncRequest & cmd)
1122 {
1123         switch (cmd.action()) {
1124
1125         case LFUN_INSET_MODIFY: {
1126                 if (cmd.argument() == from_ascii("toggle-subindex")) {
1127                         string scmd = getCmdName();
1128                         if (contains(scmd, "printindex"))
1129                                 scmd = subst(scmd, "printindex", "printsubindex");
1130                         else
1131                                 scmd = subst(scmd, "printsubindex", "printindex");
1132                         cur.recordUndo();
1133                         setCmdName(scmd);
1134                         break;
1135                 } else if (cmd.argument() == from_ascii("check-printindex*")) {
1136                         string scmd = getCmdName();
1137                         if (suffixIs(scmd, '*'))
1138                                 break;
1139                         scmd += '*';
1140                         cur.recordUndo();
1141                         setParam("type", docstring());
1142                         setCmdName(scmd);
1143                         break;
1144                 }
1145                 InsetCommandParams p(INDEX_PRINT_CODE);
1146                 // FIXME UNICODE
1147                 InsetCommand::string2params(to_utf8(cmd.argument()), p);
1148                 if (p.getCmdName().empty()) {
1149                         cur.noScreenUpdate();
1150                         break;
1151                 }
1152                 cur.recordUndo();
1153                 setParams(p);
1154                 break;
1155         }
1156
1157         default:
1158                 InsetCommand::doDispatch(cur, cmd);
1159                 break;
1160         }
1161 }
1162
1163
1164 bool InsetPrintIndex::getStatus(Cursor & cur, FuncRequest const & cmd,
1165         FuncStatus & status) const
1166 {
1167         switch (cmd.action()) {
1168
1169         case LFUN_INSET_MODIFY: {
1170                 if (cmd.argument() == from_ascii("toggle-subindex")) {
1171                         status.setEnabled(buffer().masterBuffer()->params().use_indices);
1172                         status.setOnOff(contains(getCmdName(), "printsubindex"));
1173                         return true;
1174                 } else if (cmd.argument() == from_ascii("check-printindex*")) {
1175                         status.setEnabled(buffer().masterBuffer()->params().use_indices);
1176                         status.setOnOff(suffixIs(getCmdName(), '*'));
1177                         return true;
1178                 } if (cmd.getArg(0) == "index_print"
1179                     && cmd.getArg(1) == "CommandInset") {
1180                         InsetCommandParams p(INDEX_PRINT_CODE);
1181                         InsetCommand::string2params(to_utf8(cmd.argument()), p);
1182                         if (suffixIs(p.getCmdName(), '*')) {
1183                                 status.setEnabled(true);
1184                                 status.setOnOff(false);
1185                                 return true;
1186                         }
1187                         Buffer const & realbuffer = *buffer().masterBuffer();
1188                         IndicesList const & indiceslist =
1189                                 realbuffer.params().indiceslist();
1190                         Index const * index = indiceslist.findShortcut(p["type"]);
1191                         status.setEnabled(index != 0);
1192                         status.setOnOff(p["type"] == getParam("type"));
1193                         return true;
1194                 } else
1195                         return InsetCommand::getStatus(cur, cmd, status);
1196         }
1197
1198         case LFUN_INSET_DIALOG_UPDATE: {
1199                 status.setEnabled(buffer().masterBuffer()->params().use_indices);
1200                 return true;
1201         }
1202
1203         default:
1204                 return InsetCommand::getStatus(cur, cmd, status);
1205         }
1206 }
1207
1208
1209 void InsetPrintIndex::updateBuffer(ParIterator const &, UpdateType, bool const /*deleted*/)
1210 {
1211         Index const * index =
1212                 buffer().masterParams().indiceslist().findShortcut(getParam("type"));
1213         if (index)
1214                 setParam("name", index->index());
1215 }
1216
1217
1218 void InsetPrintIndex::latex(otexstream & os, OutputParams const & runparams_in) const
1219 {
1220         if (!buffer().masterBuffer()->params().use_indices) {
1221                 if (getParam("type") == from_ascii("idx"))
1222                         os << "\\printindex" << termcmd;
1223                 return;
1224         }
1225         OutputParams runparams = runparams_in;
1226         os << getCommand(runparams);
1227 }
1228
1229
1230 void InsetPrintIndex::validate(LaTeXFeatures & features) const
1231 {
1232         features.require("makeidx");
1233         if (buffer().masterBuffer()->params().use_indices)
1234                 features.require("splitidx");
1235         InsetCommand::validate(features);
1236 }
1237
1238
1239 string InsetPrintIndex::contextMenuName() const
1240 {
1241         return buffer().masterBuffer()->params().use_indices ?
1242                 "context-indexprint" : string();
1243 }
1244
1245
1246 bool InsetPrintIndex::hasSettings() const
1247 {
1248         return buffer().masterBuffer()->params().use_indices;
1249 }
1250
1251
1252 class IndexEntry
1253 {
1254 public:
1255         /// Builds an entry for the index.
1256         IndexEntry(const InsetIndex * inset, OutputParams const * runparams) : inset_(inset), runparams_(runparams)
1257         {
1258                 LASSERT(runparams, return);
1259
1260                 // Convert the inset as text. The resulting text usually only contains an XHTML anchor (<a id='...'/>) and text.
1261                 odocstringstream entry;
1262                 OutputParams ours = *runparams;
1263                 ours.for_toc = false;
1264                 inset_->plaintext(entry, ours);
1265                 entry_ = entry.str();
1266
1267                 // Determine in which index this entry belongs to.
1268                 if (inset_->buffer().masterBuffer()->params().use_indices) {
1269                         index_ = inset_->params_.index;
1270                 }
1271
1272                 // Attempt parsing the inset.
1273                 if (isModern())
1274                         parseAsModern();
1275                 else
1276                         parseAsLegacy();
1277         }
1278
1279         /// When parsing this entry, some errors may be found; they are reported as a single string.
1280         // It is up to the caller to send this string to LYXERR and the output file, as needed.
1281         const docstring & output_error() const
1282         {
1283                 return output_error_;
1284         }
1285
1286         void output_error(XMLStream xs) const
1287         {
1288                 LYXERR0(output_error());
1289                 xs << XMLStream::ESCAPE_NONE << (from_utf8("<!-- Output Error: ") + output_error() + from_utf8(" -->\n"));
1290         }
1291
1292
1293 private:
1294         bool isModern()
1295         {
1296                 std::cout << to_utf8(entry_) << std::endl;
1297
1298                 // If a modern parameter is present, this is definitely a modern index inset. Similarly, if it contains the
1299                 // usual LaTeX symbols (!|@), then it is definitely a legacy index inset. Otherwise, if it has features of
1300                 // neither, it is both: consider this is a modern inset, to trigger the least complex code. Mixing both types
1301                 // is not allowed (i.e. behaviour is undefined).
1302                 const bool is_definitely_modern = inset_->hasSortKey() || inset_->hasSeeRef() || inset_->hasSubentries()
1303                                             || inset_->params_.range != InsetIndexParams::PageRange::None;
1304                 const bool is_definitely_legacy = entry_.find('@') != std::string::npos
1305                                 || entry_.find('|') != std::string::npos || entry_.find('!') != std::string::npos;
1306
1307                 if (is_definitely_legacy && is_definitely_modern)
1308                         output_error_ += from_utf8("Mix of index properties and raw LaTeX index commands is unsupported. ");
1309
1310                 // Truth table:
1311                 // - is_definitely_modern == true:
1312                 //   - is_definitely_legacy == true: error (return whatever)
1313                 //   - is_definitely_legacy == false: return modern
1314                 // - is_definitely_modern == false:
1315                 //   - is_definitely_legacy == true: return legacy
1316                 //   - is_definitely_legacy == false: return modern
1317                 return !is_definitely_legacy;
1318         }
1319
1320         void parseAsModern()
1321         {
1322                 LASSERT(runparams_, return);
1323
1324                 if (inset_->hasSortKey()) {
1325                         sort_as_ = inset_->getSortkeyAsText(*runparams_);
1326                 }
1327
1328                 terms_ = inset_->getSubentriesAsText(*runparams_);
1329                 // The main term is not present in the vector, as it's not a subentry. The main index term is inserted raw in
1330                 // the index inset. Considering that the user either uses the new or the legacy mechanism, the main term is the
1331                 // full string within this inset (i.e. without the subinsets).
1332                 terms_.insert(terms_.begin(), inset_->getMainSubentryAsText(*runparams_));
1333
1334                 has_start_range_ = inset_->params_.range == InsetIndexParams::PageRange::Start;
1335                 has_end_range_ = inset_->params_.range == InsetIndexParams::PageRange::End;
1336
1337                 see_ = inset_->getSeeAsText(*runparams_);
1338                 see_alsoes_ = inset_->getSeeAlsoesAsText(*runparams_);
1339         }
1340
1341         void parseAsLegacy() {
1342                 // Determine if some features are known not to be supported. For now, this is only formatting like
1343                 // \index{alpha@\textbf{alpha}} or \index{alpha@$\alpha$}.
1344                 // @ is supported, but only for sorting, without specific formatting.
1345                 if (entry_.find(from_utf8("@\\")) != lyx::docstring::npos) {
1346                         output_error_ += from_utf8("Unsupported feature: an index entry contains an @\\. "
1347                                                    "Complete entry: \"") + entry_ + from_utf8("\". ");
1348                 }
1349                 if (entry_.find(from_utf8("@$")) != lyx::docstring::npos) {
1350                         output_error_ += from_utf8("Unsupported feature: an index entry contains an @$. "
1351                                                    "Complete entry: \"") + entry_ + from_utf8("\". ");
1352                 }
1353
1354                 // Split the string into its main constituents: terms, and command (see, see also, range).
1355                 size_t positionVerticalBar = entry_.find(from_ascii("|")); // What comes before | is (sub)(sub)entries.
1356                 docstring indexTerms = entry_.substr(0, positionVerticalBar);
1357                 docstring command;
1358                 if (positionVerticalBar != lyx::docstring::npos) {
1359                         command = entry_.substr(positionVerticalBar + 1);
1360                 }
1361
1362                 // Handle sorting issues, with @.
1363                 vector<docstring> sortingElements = getVectorFromString(indexTerms, from_ascii("@"), false);
1364                 if (sortingElements.size() == 2) {
1365                         sort_as_ = sortingElements[0];
1366                         indexTerms = sortingElements[1];
1367                 }
1368
1369                 // Handle entries, subentries, and subsubentries.
1370                 terms_ = getVectorFromString(indexTerms, from_ascii("!"), false);
1371
1372                 // Handle ranges. Happily, (| and |) can only be at the end of the string!
1373                 has_start_range_ = entry_.find(from_ascii("|(")) != lyx::docstring::npos;
1374                 has_end_range_ = entry_.find(from_ascii("|)")) != lyx::docstring::npos;
1375
1376                 // - Remove the ranges from the command if they do not appear at the beginning.
1377                 size_t range_index = 0;
1378                 while ((range_index = command.find(from_utf8("|("), range_index)) != std::string::npos)
1379                         command.erase(range_index, 1);
1380                 range_index = 0;
1381                 while ((range_index = command.find(from_utf8("|)"), range_index)) != std::string::npos)
1382                         command.erase(range_index, 1);
1383
1384                 // - Remove the ranges when they are the only vertical bar in the complete string.
1385                 if (command[0] == '(' || command[0] == ')')
1386                         command.erase(0, 1);
1387
1388                 // Handle see and seealso. As "see" is a prefix of "seealso", the order of the comparisons is important.
1389                 // Both commands are mutually exclusive!
1390                 if (command.substr(0, 3) == "see") {
1391                         // Unescape brackets.
1392                         size_t index_argument_begin = 0;
1393                         while ((index_argument_begin = command.find(from_utf8("\\{"), index_argument_begin)) != std::string::npos)
1394                                 command.erase(index_argument_begin, 1);
1395                         size_t index_argument_end = 0;
1396                         while ((index_argument_end = command.find(from_utf8("\\}"), index_argument_end)) != std::string::npos)
1397                                 command.erase(index_argument_end, 1);
1398
1399                         // Retrieve the part between brackets, and remove the complete seealso.
1400                         size_t position_opening_bracket = command.find(from_ascii("{"));
1401                         size_t position_closing_bracket = command.find(from_ascii("}"));
1402                         docstring argument = command.substr(position_opening_bracket + 1,
1403                                                                                                 position_closing_bracket - position_opening_bracket - 1);
1404
1405                         // Parse the argument of referenced entries (or a single one for see).
1406                         if (command.substr(0, 7) == "seealso") {
1407                                 see_alsoes_ = getVectorFromString(argument, from_ascii(","), false);
1408                         } else {
1409                                 see_ = argument;
1410
1411                                 if (see_.find(from_ascii(",")) != std::string::npos) {
1412                                         output_error_ += from_utf8("Several index_argument_end terms found as \"see\"! Only one is "
1413                                                                    "acceptable. Complete entry: \"") + entry_ + from_utf8("\". ");
1414                                 }
1415                         }
1416
1417                         // Remove the complete see/seealso from the commands, in case there is something else to parse.
1418                         command = command.substr(position_closing_bracket + 1);
1419                 }
1420
1421                 // Some parts of the strings are not parsed, as they do not have anything matching in DocBook or XHTML:
1422                 // things like formatting the entry or the page number, other strings for sorting.
1423                 // https://wiki.lyx.org/Tips/Indexing
1424                 // If there are such things in the index entry, then this code may miserably fail. For example, for
1425                 // "Peter|(textbf", no range will be detected.
1426                 if (!command.empty()) {
1427                         output_error_ += from_utf8("Unsupported feature: an index entry contains a | with an unsupported command, ")
1428                                          + command + from_utf8(". Complete entry: \"") + entry_ + from_utf8("\". ");
1429                 }
1430         }
1431
1432 public:
1433         int level() const {
1434                 return terms_.size();
1435         }
1436
1437         const std::vector<docstring>& terms() const {
1438                 return terms_;
1439         }
1440
1441         std::vector<docstring>& terms() {
1442                 return terms_;
1443         }
1444
1445         const InsetIndex* inset() const {
1446                 return inset_;
1447         }
1448
1449 private:
1450         // Input inset. These should only be used when parsing the inset (either parseAsModern or parseAsLegacy, called in
1451         // the constructor).
1452         const InsetIndex * inset_;
1453         OutputParams const * runparams_;
1454         docstring entry_;
1455         docstring index_; // Useful when there are multiple indices in the same document.
1456
1457         // Errors, concatenated as a single string, available as soon as parsing is done, const afterwards (i.e. once
1458         // constructor is done).
1459         docstring output_error_;
1460
1461         // Parsed index entry.
1462         std::vector<docstring> terms_; // Up to three entries, in general.
1463         docstring sort_as_;
1464         docstring command_;
1465         bool has_start_range_;
1466         bool has_end_range_;
1467         docstring see_;
1468         vector<docstring> see_alsoes_;
1469
1470         // Operators used for sorting entries (alphabetical order).
1471         friend bool operator<(IndexEntry const & lhs, IndexEntry const & rhs);
1472 };
1473
1474 bool operator<(IndexEntry const & lhs, IndexEntry const & rhs)
1475 {
1476         if (lhs.terms_.empty())
1477                 return false;
1478
1479         for (int i = 0; i < min(rhs.terms_.size(), lhs.terms_.size()); ++i) {
1480                 int comp = compare_no_case(lhs.terms_[i], rhs.terms_[i]);
1481                 if (comp != 0)
1482                         return comp < 0;
1483         }
1484         return false;
1485 }
1486
1487
1488 namespace {
1489 std::string generateCssClassAtDepth(unsigned depth) {
1490         std::string css_class = "entry";
1491
1492         while (depth > 0) {
1493                 depth -= 1;
1494                 css_class.insert(0, "sub");
1495         }
1496
1497         return css_class;
1498 }
1499
1500 struct IndexNode {
1501         std::vector<IndexEntry> entries;
1502         std::vector<IndexNode*> children;
1503 };
1504
1505 docstring termAtLevel(const IndexNode* node, unsigned depth)
1506 {
1507         // The typical entry has a depth of 1 to 3: the call stack would then be at most 4 (due to the root node). This
1508         // function could be made constant time by copying the term in each node, but that would make data duplication that
1509         // may fall out of sync; the performance benefit would probably be negligible.
1510         if (!node->entries.empty()) {
1511                 LASSERT(node->entries.begin()->terms().size() >= depth + 1, return from_ascii(""));
1512                 return node->entries.begin()->terms()[depth];
1513         }
1514
1515         if (!node->children.empty()) {
1516                 return termAtLevel(*node->children.begin(), depth);
1517         }
1518
1519         LASSERT(false, return from_ascii(""));
1520 }
1521
1522 void insertIntoNode(const IndexEntry& entry, IndexNode* node, unsigned depth = 0)
1523 {
1524         // depth == 0 is for the root, not yet the index, hence the increase when going to vector size.
1525         for (IndexNode* child : node->children) {
1526                 if (entry.terms()[depth] == termAtLevel(child, depth)) {
1527                         if (depth + 1 == entry.terms().size()) { // == child.entries.begin()->terms().size()
1528                                 // All term entries match: it's an entry.
1529                                 child->entries.emplace_back(entry);
1530                                 return;
1531                         } else {
1532                                 insertIntoNode(entry, child, depth + 1);
1533                                 return;
1534                         }
1535                 }
1536         }
1537
1538         // Out of the loop: no matching child found, create a new (possibly nested) child for this entry. Due to the
1539         // possibility of nestedness, only insert the current entry when the right level is reached. This is needed if the
1540         // first entry for a word has several levels that never appeared.
1541         // In particular, this case is called for the first entry.
1542         IndexNode* new_node = node;
1543         do {
1544                 new_node->children.emplace_back(new IndexNode{{}, {}});
1545                 new_node = new_node->children.back();
1546                 depth += 1;
1547         } while (depth + 1 <= entry.terms().size()); // depth == 0: root node, no text associated.
1548         new_node->entries.emplace_back(entry);
1549 }
1550
1551 IndexNode* buildIndexTree(vector<IndexEntry>& entries)
1552 {
1553         // Sort the entries, first on the main entry, then the subentry, then the subsubentry,
1554         // thanks to the implementation of operator<.
1555         // If this operation is not performed, the algorithm below is no more correct (and ensuring that it works with
1556         // unsorted entries would make its complexity blow up).
1557         stable_sort(entries.begin(), entries.end());
1558
1559         // Cook the index into a nice tree data structure: entries at a given level in the index as a node, with subentries
1560         // as children.
1561         auto* index_root = new IndexNode{{}, {}};
1562         for (const IndexEntry& entry : entries) {
1563                 insertIntoNode(entry, index_root);
1564         }
1565
1566         return index_root;
1567 }
1568
1569 void outputIndexPage(XMLStream & xs, const IndexNode* root_node, unsigned depth = 0) // NOLINT(misc-no-recursion)
1570 {
1571         LASSERT(root_node->entries.size() + root_node->children.size() > 0, return);
1572
1573         xs << xml::StartTag("li", "class='" + generateCssClassAtDepth(depth) + "'");
1574         xs << xml::CR();
1575         xs << XMLStream::ESCAPE_NONE << termAtLevel(root_node, depth);
1576         // By tree assumption, all the entries at this node have the same set of terms.
1577
1578         if (!root_node->entries.empty()) {
1579                 xs << XMLStream::ESCAPE_NONE << " &#8212; "; // Em dash, i.e. long (---).
1580                 unsigned entry_number = 1;
1581
1582                 auto writeLinkToEntry = [&xs](const IndexEntry &entry, unsigned entry_number) {
1583                         std::string const link_attr = "href='#" + entry.inset()->paragraphs()[0].magicLabel() + "'";
1584                         xs << xml::StartTag("a", link_attr);
1585                         xs << from_ascii(std::to_string(entry_number));
1586                         xs << xml::EndTag("a");
1587                 };
1588
1589                 for (unsigned i = 0; i < root_node->entries.size(); ++i) {
1590                         const IndexEntry &entry = root_node->entries[i];
1591
1592                         switch (entry.inset()->params().range) {
1593                                 case InsetIndexParams::PageRange::None:
1594                                         writeLinkToEntry(entry, entry_number);
1595                                         break;
1596                                 case InsetIndexParams::PageRange::Start: {
1597                                         // Try to find the end of the range, if it is just after. Otherwise, the output will be slightly
1598                                         // scrambled, but understandable. Doing better would mean implementing more of the indexing logic here
1599                                         // and more complex indexing here (skipping the end is not just incrementing i). Worst case output:
1600                                         //     1--, 2, --3
1601                                         const bool nextEntryIsEnd = i + 1 < root_node->entries.size() &&
1602                                                                     root_node->entries[i + 1].inset()->params().range ==
1603                                                                     InsetIndexParams::PageRange::End;
1604                                         // No need to check if both entries are for the same terms: they are in the same IndexNode.
1605
1606                                         writeLinkToEntry(entry, entry_number);
1607                                         xs << XMLStream::ESCAPE_NONE << " &#8211; "; // En dash, i.e. semi-long (--).
1608
1609                                         if (nextEntryIsEnd) {
1610                                                 // Skip the next entry in the loop, write it right now, after the dash.
1611                                                 entry_number += 1;
1612                                                 i += 1;
1613                                                 writeLinkToEntry(root_node->entries[i], entry_number);
1614                                         }
1615                                 }
1616                                         break;
1617                                 case InsetIndexParams::PageRange::End:
1618                                         // This range end was not caught by the range start, do it now to avoid losing content.
1619                                         xs << XMLStream::ESCAPE_NONE << " &#8211; "; // En dash, i.e. semi-long (--).
1620                                         writeLinkToEntry(root_node->entries[i], entry_number);
1621                         }
1622
1623                         if (i < root_node->entries.size() - 1) {
1624                                 xs << ", ";
1625                         }
1626                         entry_number += 1;
1627                 }
1628         }
1629
1630         if (!root_node->entries.empty() && !root_node->children.empty()) {
1631                 xs << xml::CR();
1632         }
1633
1634         if (!root_node->children.empty()) {
1635                 xs << xml::StartTag("ul", "class='" + generateCssClassAtDepth(depth) + "'");
1636                 xs << xml::CR();
1637
1638                 for (const IndexNode* child : root_node->children) {
1639                         outputIndexPage(xs, child, depth + 1);
1640                 }
1641
1642                 xs << xml::EndTag("ul");
1643                 xs << xml::CR();
1644         }
1645
1646         xs << xml::EndTag("li");
1647         xs << xml::CR();
1648 }
1649
1650 // Only useful for debugging.
1651 void printTree(const IndexNode* root_node, unsigned depth = 0)
1652 {
1653         static const std::string pattern = "    ";
1654         std::string prefix;
1655         for (unsigned i = 0; i < depth; ++i) {
1656                 prefix += pattern;
1657         }
1658         const std::string prefix_long = prefix + pattern + pattern;
1659
1660         docstring term_at_level;
1661         if (depth == 0) {
1662                 // The root has no term.
1663                 std::cout << "<ROOT>" << std::endl;
1664         } else {
1665                 LASSERT(depth - 1 <= 10, return); // Check for overflows.
1666                 term_at_level = termAtLevel(root_node, depth - 1);
1667                 std::cout << prefix << to_utf8(term_at_level) << " (x " << std::to_string(root_node->entries.size()) << ")"
1668                           << std::endl;
1669         }
1670
1671         for (const IndexEntry& entry : root_node->entries) {
1672                 if (entry.terms().size() != depth) {
1673                         std::cout << prefix_long << "ERROR: an entry doesn't have the same number of terms" << std::endl;
1674                 }
1675                 if (depth > 0 && entry.terms()[depth - 1] != term_at_level) {
1676                         std::cout << prefix_long << "ERROR: an entry doesn't have the right term at depth " << std::to_string(depth)
1677                                 << std::endl;
1678                 }
1679         }
1680
1681         for (const IndexNode* node : root_node->children) {
1682                 printTree(node, depth + 1);
1683         }
1684 }
1685 }
1686
1687
1688 docstring InsetPrintIndex::xhtml(XMLStream &, OutputParams const & op) const
1689 {
1690         BufferParams const & bp = buffer().masterBuffer()->params();
1691
1692         shared_ptr<Toc const> toc = buffer().tocBackend().toc("index");
1693         if (toc->empty())
1694                 return docstring();
1695
1696         // Collect the index entries in a form we can use them.
1697         vector<IndexEntry> entries;
1698         const docstring & indexType = params().getParamOr("type", from_ascii("idx"));
1699         for (const TocItem& item : *toc) {
1700                 const auto* inset = static_cast<const InsetIndex*>(&(item.dit().inset()));
1701                 if (item.isOutput() && inset->params().index == indexType)
1702                         entries.emplace_back(IndexEntry{inset, &op});
1703         }
1704
1705         // If all the index entries are in notes or not displayed, get out sooner.
1706         if (entries.empty())
1707                 return docstring();
1708
1709         const IndexNode* index_root = buildIndexTree(entries);
1710 #if 0
1711         printTree(index_root);
1712 #endif
1713
1714         // Start generating the XHTML index.
1715         Layout const & lay = bp.documentClass().htmlTOCLayout();
1716         string const & tocclass = lay.defaultCSSClass();
1717         string const tocattr = "class='index " + tocclass + "'";
1718         docstring const indexName = params().getParamOr("name", from_ascii("Index"));
1719
1720         // we'll use our own stream, because we are going to defer everything.
1721         // that's how we deal with the fact that we're probably inside a standard
1722         // paragraph, and we don't want to be.
1723         odocstringstream ods;
1724         XMLStream xs(ods);
1725
1726         xs << xml::StartTag("div", tocattr);
1727         xs << xml::CR();
1728         xs << xml::StartTag(lay.htmltag(), lay.htmlattr());
1729         xs << translateIfPossible(indexName, op.local_font->language()->lang());
1730         xs << xml::EndTag(lay.htmltag());
1731         xs << xml::CR();
1732         xs << xml::StartTag("ul", "class='main'");
1733         xs << xml::CR();
1734
1735         LASSERT(index_root->entries.empty(), return docstring()); // No index entry should have zero terms.
1736         for (const IndexNode* node : index_root->children) {
1737                 outputIndexPage(xs, node);
1738         }
1739
1740         xs << xml::EndTag("ul");
1741         xs << xml::CR();
1742         xs << xml::EndTag("div");
1743
1744         return ods.str();
1745 }
1746
1747 } // namespace lyx