]> git.lyx.org Git - features.git/blob - src/insets/InsetIndex.cpp
13a7b4d1dff1eb677f4a694df4eebfccfb699b8b
[features.git] / src / insets / InsetIndex.cpp
1 /**
2  * \file InsetIndex.cpp
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author Lars Gullik Bjønnes
7  * \author Jürgen Spitzmüller
8  *
9  * Full author contact details are available in file CREDITS.
10  */
11 #include <config.h>
12
13 #include "InsetIndex.h"
14 #include "InsetIndexMacro.h"
15
16 #include "Buffer.h"
17 #include "BufferParams.h"
18 #include "BufferView.h"
19 #include "ColorSet.h"
20 #include "Cursor.h"
21 #include "DispatchResult.h"
22 #include "Encoding.h"
23 #include "ErrorList.h"
24 #include "FuncRequest.h"
25 #include "FuncStatus.h"
26 #include "IndicesList.h"
27 #include "InsetList.h"
28 #include "Language.h"
29 #include "LaTeX.h"
30 #include "LaTeXFeatures.h"
31 #include "Lexer.h"
32 #include "LyX.h"
33 #include "output_latex.h"
34 #include "output_xhtml.h"
35 #include "xml.h"
36 #include "texstream.h"
37 #include "TextClass.h"
38 #include "TocBackend.h"
39
40 #include "support/debug.h"
41 #include "support/docstream.h"
42 #include "support/FileName.h"
43 #include "support/gettext.h"
44 #include "support/lstrings.h"
45 #include "support/Translator.h"
46
47 #include "frontends/alert.h"
48
49 #include <algorithm>
50 #include <set>
51 #include <iostream>
52
53 #include <QThreadStorage>
54
55 using namespace std;
56 using namespace lyx::support;
57
58 // Uncomment to enable InsetIndex-specific debugging mode: the tree for the index will be printed to std::cout.
59 // #define LYX_INSET_INDEX_DEBUG
60
61 namespace lyx {
62
63 namespace {
64
65 typedef Translator<string, InsetIndexParams::PageRange> PageRangeTranslator;
66 typedef Translator<docstring, InsetIndexParams::PageRange> PageRangeTranslatorLoc;
67
68 PageRangeTranslator const init_insetindexpagerangetranslator()
69 {
70         PageRangeTranslator translator("none", InsetIndexParams::None);
71         translator.addPair("start", InsetIndexParams::Start);
72         translator.addPair("end", InsetIndexParams::End);
73         return translator;
74 }
75
76 PageRangeTranslator const init_insetindexpagerangetranslator_latex()
77 {
78         PageRangeTranslator translator("", InsetIndexParams::None);
79         translator.addPair("(", InsetIndexParams::Start);
80         translator.addPair(")", InsetIndexParams::End);
81         return translator;
82 }
83
84
85 PageRangeTranslatorLoc const init_insetindexpagerangetranslator_loc()
86 {
87         PageRangeTranslatorLoc translator(docstring(), InsetIndexParams::None);
88         translator.addPair(_("Starts page range"), InsetIndexParams::Start);
89         translator.addPair(_("Ends page range"), InsetIndexParams::End);
90         return translator;
91 }
92
93
94 PageRangeTranslator const & insetindexpagerangetranslator()
95 {
96         static PageRangeTranslator const prtranslator =
97                         init_insetindexpagerangetranslator();
98         return prtranslator;
99 }
100
101
102 PageRangeTranslatorLoc const & insetindexpagerangetranslator_loc()
103 {
104         static PageRangeTranslatorLoc const translator =
105                         init_insetindexpagerangetranslator_loc();
106         return translator;
107 }
108
109
110 PageRangeTranslator const & insetindexpagerangetranslator_latex()
111 {
112         static PageRangeTranslator const lttranslator =
113                         init_insetindexpagerangetranslator_latex();
114         return lttranslator;
115 }
116
117 } // namespace anon
118
119 /////////////////////////////////////////////////////////////////////
120 //
121 // InsetIndex
122 //
123 ///////////////////////////////////////////////////////////////////////
124
125
126 InsetIndex::InsetIndex(Buffer * buf, InsetIndexParams const & params)
127         : InsetCollapsible(buf), params_(params)
128 {}
129
130
131 void InsetIndex::latex(otexstream & ios, OutputParams const & runparams_in) const
132 {
133         OutputParams runparams(runparams_in);
134         runparams.inIndexEntry = true;
135
136         otexstringstream os;
137
138         if (buffer().masterBuffer()->params().use_indices && !params_.index.empty()
139                 && params_.index != "idx") {
140                 os << "\\sindex[";
141                 os << escape(params_.index);
142                 os << "]{";
143         } else {
144                 os << "\\index";
145                 os << '{';
146         }
147
148         // Get the LaTeX output from InsetText. We need to deconstruct this later
149         // in order to check if we need to generate a sorting key
150         odocstringstream ourlatex;
151         otexstream ots(ourlatex);
152         InsetText::latex(ots, runparams);
153         if (runparams.find_effective()) {
154                 // No need for special handling, if we are only searching for some patterns
155                 os << ourlatex.str() << "}";
156                 return;
157         }
158
159         if (hasSortKey()) {
160                 getSortkey(os, runparams);
161                 os << "@";
162                 os << ourlatex.str();
163                 getSubentries(os, runparams);
164                 if (hasSeeRef()) {
165                         os << "|";
166                         os << insetindexpagerangetranslator_latex().find(params_.range);
167                         getSeeRefs(os, runparams);
168                 } else if (!params_.pagefmt.empty() && params_.pagefmt != "default") {
169                         os << "|";
170                         os << insetindexpagerangetranslator_latex().find(params_.range);
171                         os << from_utf8(params_.pagefmt);
172                 }
173         } else {
174                 // We check whether we need a sort key.
175                 // If so, we use the plaintext version
176                 odocstringstream ourplain;
177                 InsetText::plaintext(ourplain, runparams);
178
179                 // These are the LaTeX and plaintext representations
180                 docstring latexstr = ourlatex.str();
181                 docstring plainstr = ourplain.str();
182         
183                 // This will get what follows | if anything does,
184                 // the command (e.g., see, textbf) for pagination
185                 // formatting
186                 docstring cmd;
187
188                 if (hasSeeRef()) {
189                         odocstringstream seeref;
190                         otexstream otsee(seeref);
191                         getSeeRefs(otsee, runparams);
192                         cmd = seeref.str();
193                 } else if (!params_.pagefmt.empty() && params_.pagefmt != "default") {
194                         cmd = from_utf8(params_.pagefmt);
195                 } else {
196                         // Check for the | separator to strip the cmd.
197                         // This goes wrong on an escaped "|", but as the escape
198                         // character can be changed in style files, we cannot
199                         // prevent that.
200                         size_t pos = latexstr.find(from_ascii("|"));
201                         if (pos != docstring::npos) {
202                                 // Put the bit after "|" into cmd...
203                                 cmd = latexstr.substr(pos + 1);
204                                 // ...and erase that stuff from latexstr
205                                 latexstr = latexstr.erase(pos);
206                                 // ...as well as from plainstr
207                                 size_t ppos = plainstr.find(from_ascii("|"));
208                                 if (ppos < plainstr.size())
209                                         plainstr.erase(ppos);
210                                 else
211                                         LYXERR0("The `|' separator was not found in the plaintext version!");
212                         }
213                 }
214
215                 odocstringstream subentries;
216                 otexstream otsub(subentries);
217                 getSubentries(otsub, runparams);
218                 if (subentries.str().empty()) {
219                         // Separate the entries and subentries, i.e., split on "!".
220                         // This goes wrong on an escaped "!", but as the escape
221                         // character can be changed in style files, we cannot
222                         // prevent that.
223                         std::vector<docstring> const levels =
224                                         getVectorFromString(latexstr, from_ascii("!"), true);
225                         std::vector<docstring> const levels_plain =
226                                         getVectorFromString(plainstr, from_ascii("!"), true);
227                 
228                         vector<docstring>::const_iterator it = levels.begin();
229                         vector<docstring>::const_iterator end = levels.end();
230                         vector<docstring>::const_iterator it2 = levels_plain.begin();
231                         bool first = true;
232                         for (; it != end; ++it) {
233                                 // The separator needs to be put back when
234                                 // writing the levels, except for the first level
235                                 if (!first)
236                                         os << '!';
237                                 else
238                                         first = false;
239                 
240                                 // Now here comes the reason for this whole procedure:
241                                 // We try to correctly sort macros and formatted strings.
242                                 // If we find a command, prepend a plain text
243                                 // version of the content to get sorting right,
244                                 // e.g. \index{LyX@\LyX}, \index{text@\textbf{text}}.
245                                 // We do this on all levels.
246                                 // We don't do it if the level already contains a '@', though.
247                                 // Plaintext might return nothing (e.g. for ERTs).
248                                 // In that case, we use LaTeX.
249                                 docstring const spart = (levels_plain.empty() || (*it2).empty()) ? *it : *it2;
250                                 processLatexSorting(os, runparams, *it, spart);
251                                 if (it2 < levels_plain.end())
252                                         ++it2;
253                         }
254                 } else {
255                         processLatexSorting(os, runparams, latexstr, plainstr);
256                         os << subentries.str();
257                 }
258
259                 // At last, re-insert the command, separated by "|"
260                 if (!cmd.empty()) {
261                         os << "|"
262                            << insetindexpagerangetranslator_latex().find(params_.range)
263                            << cmd;
264                 }
265         }
266         os << '}';
267
268         // In macros with moving arguments, such as \section,
269         // we store the index and output it after the macro (#2154)
270         if (runparams_in.postpone_fragile_stuff)
271                 runparams_in.post_macro += os.str();
272         else
273                 ios << os.release();
274 }
275
276
277 void InsetIndex::processLatexSorting(otexstream & os, OutputParams const & runparams,
278                                 docstring const latex, docstring const spart) const
279 {
280         if (contains(latex, '\\') && !contains(latex, '@')) {
281                 // Now we need to validate that all characters in
282                 // the sorting part are representable in the current
283                 // encoding. If not try the LaTeX macro which might
284                 // or might not be a good choice, and issue a warning.
285                 pair<docstring, docstring> spart_latexed =
286                                 runparams.encoding->latexString(spart, runparams.dryrun);
287                 if (!spart_latexed.second.empty())
288                         LYXERR0("Uncodable character in index entry. Sorting might be wrong!");
289                 if (spart != spart_latexed.first && !runparams.dryrun) {
290                         TeXErrors terr;
291                         ErrorList & errorList = buffer().errorList("Export");
292                         docstring const s = bformat(_("LyX's automatic index sorting algorithm faced "
293                                                       "problems with the entry '%1$s'.\n"
294                                                       "Please specify the sorting of this entry manually, as "
295                                                       "explained in the User Guide."), spart);
296                         Paragraph const & par = buffer().paragraphs().front();
297                         errorList.push_back(ErrorItem(_("Index sorting failed"), s,
298                                                       {par.id(), 0}, {par.id(), -1}));
299                         buffer().bufferErrors(terr, errorList);
300                 }
301                 // Remove remaining \'s from the sort key
302                 docstring ppart = subst(spart_latexed.first, from_ascii("\\"), docstring());
303                 // Plain quotes need to be escaped, however (#10649), as this
304                 // is the default escape character
305                 ppart = subst(ppart, from_ascii("\""), from_ascii("\\\""));
306
307                 // Now insert the sortkey, separated by '@'.
308                 os << ppart;
309                 os << '@';
310         }
311         // Insert the actual level text
312         os << latex;
313 }
314
315
316 void InsetIndex::docbook(XMLStream & xs, OutputParams const & runparams) const
317 {
318         // Two ways of processing this inset are implemented:
319         // - the legacy one, based on parsing the raw LaTeX (before LyX 2.4) -- unlikely to be deprecated
320         // - the modern one, based on precise insets for indexing features
321         // Like the LaTeX implementation, consider the user chooses either of those options.
322
323         // Get the content of the inset as LaTeX, as some things may be encoded as ERT (like {}).
324         // TODO: if there is an ERT within the index term, its conversion should be tried, in case it becomes useful;
325         //  otherwise, ERTs should become comments. For now, they are just copied as-is, which is barely satisfactory.
326         odocstringstream odss;
327         otexstream ots(odss);
328         InsetText::latex(ots, runparams);
329         docstring latexString = trim(odss.str());
330
331         // Handle several indices (indicated in the inset instead of the raw latexString).
332         docstring indexType = from_utf8("");
333         if (buffer().masterBuffer()->params().use_indices) {
334                 indexType += " type=\"" + params_.index + "\"";
335         }
336
337         // Split the string into its main constituents: terms, and command (see, see also, range).
338         size_t positionVerticalBar = latexString.find(from_ascii("|")); // What comes before | is (sub)(sub)entries.
339         docstring indexTerms = latexString.substr(0, positionVerticalBar);
340         docstring command;
341         if (positionVerticalBar != lyx::docstring::npos) {
342                 command = latexString.substr(positionVerticalBar + 1);
343         }
344
345         // Handle sorting issues, with @.
346         docstring sortAs;
347         if (hasSortKey()) {
348                 sortAs = getSortkeyAsText(runparams);
349                 // indexTerms may contain a sort key if the user has both the inset and the manual key.
350         } else {
351                 vector<docstring> sortingElements = getVectorFromString(indexTerms, from_ascii("@"), false);
352                 if (sortingElements.size() == 2) {
353                         sortAs = sortingElements[0];
354                         indexTerms = sortingElements[1];
355                 }
356         }
357
358         // Handle primary, secondary, and tertiary terms (entries, subentries, and subsubentries, for LaTeX).
359         vector<docstring> terms;
360         if (const vector<docstring> potential_terms = getSubentriesAsText(runparams); !potential_terms.empty()) {
361                 terms = potential_terms;
362                 // The main term is not present in the vector, as it's not a subentry. The main index term is inserted raw in
363                 // the index inset. Considering that the user either uses the new or the legacy mechanism, the main term is the
364                 // full string within this inset (i.e. without the subinsets).
365                 terms.insert(terms.begin(), latexString);
366         } else {
367                 terms = getVectorFromString(indexTerms, from_ascii("!"), false);
368         }
369
370         // Handle ranges. Happily, in the raw LaTeX mode, (| and |) can only be at the end of the string!
371         const bool hasInsetRange = params_.range != InsetIndexParams::PageRange::None;
372         const bool hasStartRange = params_.range == InsetIndexParams::PageRange::Start ||
373                         latexString.find(from_ascii("|(")) != lyx::docstring::npos;
374         const bool hasEndRange = params_.range == InsetIndexParams::PageRange::End ||
375                         latexString.find(from_ascii("|)")) != lyx::docstring::npos;
376
377         if (hasInsetRange) {
378                 // Remove the ranges from the command if they do not appear at the beginning.
379                 size_t index = 0;
380                 while ((index = command.find(from_utf8("|("), index)) != std::string::npos)
381                         command.erase(index, 1);
382                 index = 0;
383                 while ((index = command.find(from_utf8("|)"), index)) != std::string::npos)
384                         command.erase(index, 1);
385
386                 // Remove the ranges when they are the only vertical bar in the complete string.
387                 if (command[0] == '(' || command[0] == ')')
388                         command.erase(0, 1);
389         }
390
391         // Handle see and seealso. As "see" is a prefix of "seealso", the order of the comparisons is important.
392         // Both commands are mutually exclusive!
393         docstring see = getSeeAsText(runparams);
394         vector<docstring> seeAlsoes = getSeeAlsoesAsText(runparams);
395
396         if (see.empty() && seeAlsoes.empty() && command.substr(0, 3) == "see") {
397                 // Unescape brackets.
398                 size_t index = 0;
399                 while ((index = command.find(from_utf8("\\{"), index)) != std::string::npos)
400                         command.erase(index, 1);
401                 index = 0;
402                 while ((index = command.find(from_utf8("\\}"), index)) != std::string::npos)
403                         command.erase(index, 1);
404
405                 // Retrieve the part between brackets, and remove the complete seealso.
406                 size_t positionOpeningBracket = command.find(from_ascii("{"));
407                 size_t positionClosingBracket = command.find(from_ascii("}"));
408                 docstring list = command.substr(positionOpeningBracket + 1, positionClosingBracket - positionOpeningBracket - 1);
409
410                 // Parse the list of referenced entries (or a single one for see).
411                 if (command.substr(0, 7) == "seealso") {
412                         seeAlsoes = getVectorFromString(list, from_ascii(","), false);
413                 } else {
414                         see = list;
415
416                         if (see.find(from_ascii(",")) != std::string::npos) {
417                                 docstring error = from_utf8("Several index terms found as \"see\"! Only one is acceptable. "
418                                                                                         "Complete entry: \"") + latexString + from_utf8("\"");
419                                 LYXERR0(error);
420                                 xs << XMLStream::ESCAPE_NONE << (from_utf8("<!-- Output Error: ") + error + from_utf8(" -->\n"));
421                         }
422                 }
423
424                 // Remove the complete see/seealso from the commands, in case there is something else to parse.
425                 command = command.substr(positionClosingBracket + 1);
426         }
427
428         // Some parts of the strings are not parsed, as they do not have anything matching in DocBook: things like
429         // formatting the entry or the page number, other strings for sorting. https://wiki.lyx.org/Tips/Indexing
430         // If there are such things in the index entry, then this code may miserably fail. For example, for "Peter|(textbf",
431         // no range will be detected.
432         // TODO: Could handle formatting as significance="preferred"?
433         if (!command.empty()) {
434                 docstring error = from_utf8("Unsupported feature: an index entry contains a | with an unsupported command, ")
435                                           + command + from_utf8(". ") + from_utf8("Complete entry: \"") + latexString + from_utf8("\"");
436                 LYXERR0(error);
437                 xs << XMLStream::ESCAPE_NONE << (from_utf8("<!-- Output Error: ") + error + from_utf8(" -->\n"));
438         }
439
440         // Write all of this down.
441         if (terms.empty() && !hasEndRange) {
442                 docstring error = from_utf8("No index term found! Complete entry: \"") + latexString + from_utf8("\"");
443                 LYXERR0(error);
444                 xs << XMLStream::ESCAPE_NONE << (from_utf8("<!-- Output Error: ") + error + from_utf8(" -->\n"));
445         } else {
446                 // Generate the attributes for ranges. It is based on the terms that are indexed, but the ID must be unique
447                 // to this indexing area (xml::cleanID does not guarantee this: for each call with the same arguments,
448                 // the same legal ID is produced; here, as the input would be the same, the output must be, by design).
449                 // Hence the thread-local storage, as the numbers must strictly be unique, and thus cannot be shared across
450                 // a paragraph (making the solution used for HTML worthless). This solution is very similar to the one used in
451                 // xml::cleanID.
452                 // indexType can only be used for singular and startofrange types!
453                 docstring attrs;
454                 if (!hasStartRange && !hasEndRange) {
455                         attrs = indexType;
456                 } else {
457                         // Append an ID if uniqueness is not guaranteed across the document.
458                         static QThreadStorage<set<docstring>> tKnownTermLists;
459                         static QThreadStorage<int> tID;
460
461                         set<docstring> &knownTermLists = tKnownTermLists.localData();
462                         int &ID = tID.localData();
463
464                         if (!tID.hasLocalData()) {
465                                 tID.localData() = 0;
466                         }
467
468                         // Modify the index terms to add the unique ID if needed.
469                         docstring newIndexTerms = indexTerms;
470                         if (knownTermLists.find(indexTerms) != knownTermLists.end()) {
471                                 newIndexTerms += from_ascii(string("-") + to_string(ID));
472
473                                 // Only increment for the end of range, so that the same number is used for the start of range.
474                                 if (hasEndRange) {
475                                         ID++;
476                                 }
477                         }
478
479                         // Term list not yet known: add it to the set AFTER the end of range. After
480                         if (knownTermLists.find(indexTerms) == knownTermLists.end() && hasEndRange) {
481                                 knownTermLists.insert(indexTerms);
482                         }
483
484                         // Generate the attributes.
485                         docstring id = xml::cleanID(newIndexTerms);
486                         if (hasStartRange) {
487                                 attrs = indexType + " class=\"startofrange\" xml:id=\"" + id + "\"";
488                         } else {
489                                 attrs = " class=\"endofrange\" startref=\"" + id + "\"";
490                         }
491                 }
492
493                 // Handle the index terms (including the specific index for this entry).
494                 if (hasEndRange) {
495                         xs << xml::CompTag("indexterm", attrs);
496                 } else {
497                         xs << xml::StartTag("indexterm", attrs);
498                         if (!terms.empty()) { // hasEndRange has no content.
499                                 docstring attr;
500                                 if (!sortAs.empty()) {
501                                         attr = from_utf8("sortas='") + sortAs + from_utf8("'");
502                                 }
503
504                                 xs << xml::StartTag("primary", attr);
505                                 xs << terms[0];
506                                 xs << xml::EndTag("primary");
507                         }
508                         if (terms.size() > 1) {
509                                 xs << xml::StartTag("secondary");
510                                 xs << terms[1];
511                                 xs << xml::EndTag("secondary");
512                         }
513                         if (terms.size() > 2) {
514                                 xs << xml::StartTag("tertiary");
515                                 xs << terms[2];
516                                 xs << xml::EndTag("tertiary");
517                         }
518
519                         // Handle see and see also.
520                         if (!see.empty()) {
521                                 xs << xml::StartTag("see");
522                                 xs << see;
523                                 xs << xml::EndTag("see");
524                         }
525
526                         if (!seeAlsoes.empty()) {
527                                 for (auto &entry : seeAlsoes) {
528                                         xs << xml::StartTag("seealso");
529                                         xs << entry;
530                                         xs << xml::EndTag("seealso");
531                                 }
532                         }
533
534                         // Close the entry.
535                         xs << xml::EndTag("indexterm");
536                 }
537         }
538 }
539
540
541 docstring InsetIndex::xhtml(XMLStream & xs, OutputParams const &) const
542 {
543         // we just print an anchor, taking the paragraph ID from
544         // our own interior paragraph, which doesn't get printed
545         std::string const magic = paragraphs().front().magicLabel();
546         std::string const attr = "id='" + magic + "'";
547         xs << xml::CompTag("a", attr);
548         return docstring();
549 }
550
551
552 bool InsetIndex::showInsetDialog(BufferView * bv) const
553 {
554         bv->showDialog("index", params2string(params_),
555                         const_cast<InsetIndex *>(this));
556         return true;
557 }
558
559
560 void InsetIndex::doDispatch(Cursor & cur, FuncRequest & cmd)
561 {
562         switch (cmd.action()) {
563
564         case LFUN_INSET_MODIFY: {
565                 if (cmd.getArg(0) == "changetype") {
566                         cur.recordUndoInset(this);
567                         params_.index = from_utf8(cmd.getArg(1));
568                         break;
569                 }
570                 InsetIndexParams params;
571                 InsetIndex::string2params(to_utf8(cmd.argument()), params);
572                 cur.recordUndoInset(this);
573                 params_.index = params.index;
574                 params_.range = params.range;
575                 params_.pagefmt = params.pagefmt;
576                 // what we really want here is a TOC update, but that means
577                 // a full buffer update
578                 cur.forceBufferUpdate();
579                 break;
580         }
581
582         case LFUN_INSET_DIALOG_UPDATE:
583                 cur.bv().updateDialog("index", params2string(params_));
584                 break;
585
586         case LFUN_PARAGRAPH_BREAK: {
587                 // Since this inset in single-par anyway, let's use
588                 // return to enter subentries
589                 FuncRequest fr(LFUN_INDEXMACRO_INSERT, "subentry");
590                 lyx::dispatch(fr);
591                 break;
592         }
593
594         default:
595                 InsetCollapsible::doDispatch(cur, cmd);
596                 break;
597         }
598 }
599
600
601 bool InsetIndex::getStatus(Cursor & cur, FuncRequest const & cmd,
602                 FuncStatus & flag) const
603 {
604         switch (cmd.action()) {
605
606         case LFUN_INSET_MODIFY:
607                 if (cmd.getArg(0) == "changetype") {
608                         docstring const newtype = from_utf8(cmd.getArg(1));
609                         Buffer const & realbuffer = *buffer().masterBuffer();
610                         IndicesList const & indiceslist = realbuffer.params().indiceslist();
611                         Index const * index = indiceslist.findShortcut(newtype);
612                         flag.setEnabled(index != 0);
613                         flag.setOnOff(
614                                 from_utf8(cmd.getArg(1)) == params_.index);
615                         return true;
616                 }
617                 return InsetCollapsible::getStatus(cur, cmd, flag);
618
619         case LFUN_INSET_DIALOG_UPDATE: {
620                 Buffer const & realbuffer = *buffer().masterBuffer();
621                 flag.setEnabled(realbuffer.params().use_indices);
622                 return true;
623         }
624         
625         case LFUN_PARAGRAPH_BREAK:
626                 return macrosPossible("subentry");
627         
628         case LFUN_INDEXMACRO_INSERT:
629                 return macrosPossible(cmd.getArg(0));
630
631         default:
632                 return InsetCollapsible::getStatus(cur, cmd, flag);
633         }
634 }
635
636
637 void InsetIndex::getSortkey(otexstream & os, OutputParams const & runparams) const
638 {
639         Paragraph const & par = paragraphs().front();
640         InsetList::const_iterator it = par.insetList().begin();
641         for (; it != par.insetList().end(); ++it) {
642                 Inset & inset = *it->inset;
643                 if (inset.lyxCode() == INDEXMACRO_SORTKEY_CODE) {
644                         InsetIndexMacro const & iim =
645                                 static_cast<InsetIndexMacro const &>(inset);
646                         iim.getLatex(os, runparams);
647                         return;
648                 }
649         }
650 }
651
652
653 docstring InsetIndex::getSortkeyAsText(OutputParams const & runparams) const
654 {
655         Paragraph const & par = paragraphs().front();
656         InsetList::const_iterator it = par.insetList().begin();
657         for (; it != par.insetList().end(); ++it) {
658                 Inset & inset = *it->inset;
659                 if (inset.lyxCode() == INDEXMACRO_SORTKEY_CODE) {
660                         otexstringstream os;
661                         InsetIndexMacro const & iim =
662                                 static_cast<InsetIndexMacro const &>(inset);
663                         iim.getLatex(os, runparams);
664                         return os.str();
665                 }
666         }
667         return from_ascii("");
668 }
669
670
671 void InsetIndex::getSubentries(otexstream & os, OutputParams const & runparams) const
672 {
673         Paragraph const & par = paragraphs().front();
674         InsetList::const_iterator it = par.insetList().begin();
675         int i = 0;
676         for (; it != par.insetList().end(); ++it) {
677                 Inset & inset = *it->inset;
678                 if (inset.lyxCode() == INDEXMACRO_CODE) {
679                         InsetIndexMacro const & iim =
680                                 static_cast<InsetIndexMacro const &>(inset);
681                         if (iim.params().type == InsetIndexMacroParams::Subentry) {
682                                 ++i;
683                                 if (i > 2)
684                                         return;
685                                 os << "!";
686                                 iim.getLatex(os, runparams);
687                         }
688                 }
689         }
690 }
691
692
693 std::vector<docstring> InsetIndex::getSubentriesAsText(OutputParams const & runparams,
694                                                        bool const asLabel) const
695 {
696         std::vector<docstring> subentries;
697
698         Paragraph const & par = paragraphs().front();
699         InsetList::const_iterator it = par.insetList().begin();
700         int i = 0;
701         for (; it != par.insetList().end(); ++it) {
702                 Inset & inset = *it->inset;
703                 if (inset.lyxCode() == INDEXMACRO_CODE) {
704                         InsetIndexMacro const & iim =
705                                 static_cast<InsetIndexMacro const &>(inset);
706                         if (iim.params().type == InsetIndexMacroParams::Subentry) {
707                                 ++i;
708                                 if (i > 2)
709                                         break;
710                                 if (asLabel) {
711                                         docstring const l;
712                                         docstring const sl = iim.getNewLabel(l);
713                                         subentries.emplace_back(sl);
714                                 } else {
715                                         otexstringstream os;
716                                         iim.getLatex(os, runparams);
717                                         subentries.emplace_back(os.str());
718                                 }
719                         }
720                 }
721         }
722
723         return subentries;
724 }
725
726
727 docstring InsetIndex::getMainSubentryAsText(OutputParams const & runparams) const
728 {
729         otexstringstream os;
730         InsetText::latex(os, runparams);
731         return os.str();
732 }
733
734
735 void InsetIndex::getSeeRefs(otexstream & os, OutputParams const & runparams) const
736 {
737         Paragraph const & par = paragraphs().front();
738         InsetList::const_iterator it = par.insetList().begin();
739         for (; it != par.insetList().end(); ++it) {
740                 Inset & inset = *it->inset;
741                 if (inset.lyxCode() == INDEXMACRO_CODE) {
742                         InsetIndexMacro const & iim =
743                                 static_cast<InsetIndexMacro const &>(inset);
744                         if (iim.params().type == InsetIndexMacroParams::See
745                             || iim.params().type == InsetIndexMacroParams::Seealso) {
746                                 iim.getLatex(os, runparams);
747                                 return;
748                         }
749                 }
750         }
751 }
752
753
754 docstring InsetIndex::getSeeAsText(OutputParams const & runparams) const
755 {
756         Paragraph const & par = paragraphs().front();
757         InsetList::const_iterator it = par.insetList().begin();
758         for (; it != par.insetList().end(); ++it) {
759                 Inset & inset = *it->inset;
760                 if (inset.lyxCode() == INDEXMACRO_CODE) {
761                         InsetIndexMacro const & iim =
762                                 static_cast<InsetIndexMacro const &>(inset);
763                         if (iim.params().type == InsetIndexMacroParams::See) {
764                                 otexstringstream os;
765                                 iim.getLatex(os, runparams);
766                                 return os.str();
767                         }
768                 }
769         }
770         return from_ascii("");
771 }
772
773
774 std::vector<docstring> InsetIndex::getSeeAlsoesAsText(OutputParams const & runparams) const
775 {
776         std::vector<docstring> seeAlsoes;
777
778         Paragraph const & par = paragraphs().front();
779         InsetList::const_iterator it = par.insetList().begin();
780         for (; it != par.insetList().end(); ++it) {
781                 Inset & inset = *it->inset;
782                 if (inset.lyxCode() == INDEXMACRO_CODE) {
783                         InsetIndexMacro const & iim =
784                                 static_cast<InsetIndexMacro const &>(inset);
785                         if (iim.params().type == InsetIndexMacroParams::Seealso) {
786                                 otexstringstream os;
787                                 iim.getLatex(os, runparams);
788                                 seeAlsoes.emplace_back(os.str());
789                         }
790                 }
791         }
792
793         return seeAlsoes;
794 }
795
796
797 namespace {
798
799 bool hasInsetWithCode(const InsetIndex * const inset_index, const InsetCode code,
800                                           const std::set<InsetIndexMacroParams::Type> types = {})
801 {
802         Paragraph const & par = inset_index->paragraphs().front();
803         InsetList::const_iterator it = par.insetList().begin();
804         for (; it != par.insetList().end(); ++it) {
805                 Inset & inset = *it->inset;
806                 if (inset.lyxCode() == code) {
807                         if (types.empty())
808                                 return true;
809
810                         LASSERT(code == INDEXMACRO_CODE, return false);
811                         InsetIndexMacro const & iim =
812                                         static_cast<InsetIndexMacro const &>(inset);
813                         if (types.find(iim.params().type) != types.end())
814                                 return true;
815                 }
816         }
817         return false;
818 }
819
820 } // namespace
821
822
823 bool InsetIndex::hasSubentries() const
824 {
825         return hasInsetWithCode(this, INDEXMACRO_CODE, {InsetIndexMacroParams::Subentry});
826 }
827
828
829 bool InsetIndex::hasSeeRef() const
830 {
831         return hasInsetWithCode(this, INDEXMACRO_CODE, {InsetIndexMacroParams::See, InsetIndexMacroParams::Seealso});
832 }
833
834
835 bool InsetIndex::hasSortKey() const
836 {
837         return hasInsetWithCode(this, INDEXMACRO_SORTKEY_CODE);
838 }
839
840
841 bool InsetIndex::macrosPossible(string const type) const
842 {
843         if (type != "see" && type != "seealso"
844             && type != "sortkey" && type != "subentry")
845                 return false;
846
847         Paragraph const & par = paragraphs().front();
848         InsetList::const_iterator it = par.insetList().begin();
849         int subidxs = 0;
850         for (; it != par.insetList().end(); ++it) {
851                 Inset & inset = *it->inset;
852                 if (type == "sortkey" && inset.lyxCode() == INDEXMACRO_SORTKEY_CODE)
853                         return false;
854                 if (inset.lyxCode() == INDEXMACRO_CODE) {
855                         InsetIndexMacro const & iim = static_cast<InsetIndexMacro const &>(inset);
856                         if ((type == "see" || type == "seealso")
857                              && (iim.params().type == InsetIndexMacroParams::See
858                                  || iim.params().type == InsetIndexMacroParams::Seealso))
859                                 return false;
860                         if (type == "subentry"
861                              && iim.params().type == InsetIndexMacroParams::Subentry) {
862                                 ++subidxs;
863                                 if (subidxs > 1)
864                                         return false;
865                         }
866                 }
867         }
868         return true;
869 }
870
871
872 ColorCode InsetIndex::labelColor() const
873 {
874         if (params_.index.empty() || params_.index == from_ascii("idx"))
875                 return InsetCollapsible::labelColor();
876         // FIXME UNICODE
877         ColorCode c = lcolor.getFromLyXName(to_utf8(params_.index)
878                                             + "@" + buffer().fileName().absFileName());
879         if (c == Color_none)
880                 c = InsetCollapsible::labelColor();
881         return c;
882 }
883
884
885 docstring InsetIndex::toolTip(BufferView const &, int, int) const
886 {
887         docstring tip = _("Index Entry");
888         if (buffer().params().use_indices && !params_.index.empty()) {
889                 Buffer const & realbuffer = *buffer().masterBuffer();
890                 IndicesList const & indiceslist = realbuffer.params().indiceslist();
891                 tip += " (";
892                 Index const * index = indiceslist.findShortcut(params_.index);
893                 if (!index)
894                         tip += _("unknown type!");
895                 else
896                         tip += index->index();
897                 tip += ")";
898         }
899         tip += ": ";
900         docstring res = toolTipText(tip);
901         if (!insetindexpagerangetranslator_loc().find(params_.range).empty())
902                 res += "\n" + insetindexpagerangetranslator_loc().find(params_.range);
903         if (!params_.pagefmt.empty() && params_.pagefmt != "default") {
904                 res += "\n" + _("Pagination format:") + " ";
905                 if (params_.pagefmt == "textbf")
906                         res += _("bold");
907                 else if (params_.pagefmt == "textit")
908                         res += _("italic");
909                 else if (params_.pagefmt == "emph")
910                         res += _("emphasized");
911                 else
912                         res += from_utf8(params_.pagefmt);
913         }
914         return res;
915 }
916
917
918 docstring const InsetIndex::buttonLabel(BufferView const & bv) const
919 {
920         InsetLayout const & il = getLayout();
921         docstring label = translateIfPossible(il.labelstring());
922
923         if (buffer().params().use_indices && !params_.index.empty()) {
924                 Buffer const & realbuffer = *buffer().masterBuffer();
925                 IndicesList const & indiceslist = realbuffer.params().indiceslist();
926                 label += " (";
927                 Index const * index = indiceslist.findShortcut(params_.index);
928                 if (!index)
929                         label += _("unknown type!");
930                 else
931                         label += index->index();
932                 label += ")";
933         }
934
935         docstring res;
936         if (!il.contentaslabel() || geometry(bv) != ButtonOnly)
937                 res = label;
938         else {
939                 res = getNewLabel(label);
940                 OutputParams const rp(0);
941                 vector<docstring> sublbls = getSubentriesAsText(rp, true);
942                 for (auto const & sublbl : sublbls) {
943                         res += " " + docstring(1, char_type(0x2023));// TRIANGULAR BULLET
944                         res += " " + sublbl;
945                 }
946         }
947         if (!insetindexpagerangetranslator_latex().find(params_.range).empty())
948                 res += " " + from_ascii(insetindexpagerangetranslator_latex().find(params_.range));
949         return res;
950 }
951
952
953 void InsetIndex::write(ostream & os) const
954 {
955         os << to_utf8(layoutName());
956         params_.write(os);
957         InsetCollapsible::write(os);
958 }
959
960
961 void InsetIndex::read(Lexer & lex)
962 {
963         params_.read(lex);
964         InsetCollapsible::read(lex);
965 }
966
967
968 string InsetIndex::params2string(InsetIndexParams const & params)
969 {
970         ostringstream data;
971         data << "index";
972         params.write(data);
973         return data.str();
974 }
975
976
977 void InsetIndex::string2params(string const & in, InsetIndexParams & params)
978 {
979         params = InsetIndexParams();
980         if (in.empty())
981                 return;
982
983         istringstream data(in);
984         Lexer lex;
985         lex.setStream(data);
986         lex.setContext("InsetIndex::string2params");
987         lex >> "index";
988         params.read(lex);
989 }
990
991
992 void InsetIndex::addToToc(DocIterator const & cpit, bool output_active,
993                                                   UpdateType utype, TocBackend & backend) const
994 {
995         DocIterator pit = cpit;
996         pit.push_back(CursorSlice(const_cast<InsetIndex &>(*this)));
997         docstring str;
998         InsetLayout const & il = getLayout();
999         docstring label = translateIfPossible(il.labelstring());
1000         if (!il.contentaslabel())
1001                 str = label;
1002         else {
1003                 str = getNewLabel(label);
1004                 OutputParams const rp(0);
1005                 vector<docstring> sublbls = getSubentriesAsText(rp, true);
1006                 for (auto const & sublbl : sublbls) {
1007                         str += " " + docstring(1, char_type(0x2023));// TRIANGULAR BULLET
1008                         str += " " + sublbl;
1009                 }
1010         }
1011         string type = "index";
1012         if (buffer().masterBuffer()->params().use_indices)
1013                 type += ":" + to_utf8(params_.index);
1014         TocBuilder & b = backend.builder(type);
1015         b.pushItem(pit, str, output_active);
1016         // Proceed with the rest of the inset.
1017         InsetCollapsible::addToToc(cpit, output_active, utype, backend);
1018         b.pop();
1019 }
1020
1021
1022 void InsetIndex::validate(LaTeXFeatures & features) const
1023 {
1024         if (buffer().masterBuffer()->params().use_indices
1025             && !params_.index.empty()
1026             && params_.index != "idx")
1027                 features.require("splitidx");
1028         InsetCollapsible::validate(features);
1029 }
1030
1031
1032 string InsetIndex::contextMenuName() const
1033 {
1034         return "context-index";
1035 }
1036
1037
1038 string InsetIndex::contextMenu(BufferView const & bv, int x, int y) const
1039 {
1040         // We override the implementation of InsetCollapsible,
1041         // because we have eytra entries.
1042         string owncm = "context-edit-index;";
1043         return owncm + InsetCollapsible::contextMenu(bv, x, y);
1044 }
1045
1046
1047 bool InsetIndex::hasSettings() const
1048 {
1049         return true;
1050 }
1051
1052
1053 bool InsetIndex::insetAllowed(InsetCode code) const
1054 {
1055         switch (code) {
1056         case INDEXMACRO_CODE:
1057         case INDEXMACRO_SORTKEY_CODE:
1058                 return true;
1059         case INDEX_CODE:
1060                 return false;
1061         default:
1062                 return InsetCollapsible::insetAllowed(code);
1063         }
1064 }
1065
1066
1067 /////////////////////////////////////////////////////////////////////
1068 //
1069 // InsetIndexParams
1070 //
1071 ///////////////////////////////////////////////////////////////////////
1072
1073
1074 void InsetIndexParams::write(ostream & os) const
1075 {
1076         os << ' ';
1077         if (!index.empty())
1078                 os << to_utf8(index);
1079         else
1080                 os << "idx";
1081         os << '\n';
1082         os << "range "
1083            << insetindexpagerangetranslator().find(range)
1084            << '\n';
1085         os << "pageformat "
1086            << pagefmt
1087            << '\n';
1088 }
1089
1090
1091 void InsetIndexParams::read(Lexer & lex)
1092 {
1093         if (lex.eatLine())
1094                 index = lex.getDocString();
1095         else
1096                 index = from_ascii("idx");
1097         if (lex.checkFor("range")) {
1098                 string st = lex.getString();
1099                 if (lex.eatLine()) {
1100                         st = lex.getString();
1101                         range = insetindexpagerangetranslator().find(lex.getString());
1102                 }
1103         }
1104         if (lex.checkFor("pageformat") && lex.eatLine()) {
1105                 pagefmt = lex.getString();
1106         }
1107 }
1108
1109
1110 /////////////////////////////////////////////////////////////////////
1111 //
1112 // InsetPrintIndex
1113 //
1114 ///////////////////////////////////////////////////////////////////////
1115
1116 InsetPrintIndex::InsetPrintIndex(Buffer * buf, InsetCommandParams const & p)
1117         : InsetCommand(buf, p)
1118 {}
1119
1120
1121 ParamInfo const & InsetPrintIndex::findInfo(string const & /* cmdName */)
1122 {
1123         static ParamInfo param_info_;
1124         if (param_info_.empty()) {
1125                 param_info_.add("type", ParamInfo::LATEX_OPTIONAL,
1126                                 ParamInfo::HANDLING_ESCAPE);
1127                 param_info_.add("name", ParamInfo::LATEX_OPTIONAL,
1128                                 ParamInfo::HANDLING_LATEXIFY);
1129                 param_info_.add("literal", ParamInfo::LYX_INTERNAL);
1130         }
1131         return param_info_;
1132 }
1133
1134
1135 docstring InsetPrintIndex::screenLabel() const
1136 {
1137         bool const printall = suffixIs(getCmdName(), '*');
1138         bool const multind = buffer().masterBuffer()->params().use_indices;
1139         if ((!multind
1140              && getParam("type") == from_ascii("idx"))
1141             || (getParam("type").empty() && !printall))
1142                 return _("Index");
1143         Buffer const & realbuffer = *buffer().masterBuffer();
1144         IndicesList const & indiceslist = realbuffer.params().indiceslist();
1145         Index const * index = indiceslist.findShortcut(getParam("type"));
1146         if (!index && !printall)
1147                 return _("Unknown index type!");
1148         docstring res = printall ? _("All indexes") : index->index();
1149         if (!multind)
1150                 res += " (" + _("non-active") + ")";
1151         else if (contains(getCmdName(), "printsubindex"))
1152                 res += " (" + _("subindex") + ")";
1153         return res;
1154 }
1155
1156
1157 bool InsetPrintIndex::isCompatibleCommand(string const & s)
1158 {
1159         return s == "printindex" || s == "printsubindex"
1160                 || s == "printindex*" || s == "printsubindex*";
1161 }
1162
1163
1164 void InsetPrintIndex::doDispatch(Cursor & cur, FuncRequest & cmd)
1165 {
1166         switch (cmd.action()) {
1167
1168         case LFUN_INSET_MODIFY: {
1169                 if (cmd.argument() == from_ascii("toggle-subindex")) {
1170                         string scmd = getCmdName();
1171                         if (contains(scmd, "printindex"))
1172                                 scmd = subst(scmd, "printindex", "printsubindex");
1173                         else
1174                                 scmd = subst(scmd, "printsubindex", "printindex");
1175                         cur.recordUndo();
1176                         setCmdName(scmd);
1177                         break;
1178                 } else if (cmd.argument() == from_ascii("check-printindex*")) {
1179                         string scmd = getCmdName();
1180                         if (suffixIs(scmd, '*'))
1181                                 break;
1182                         scmd += '*';
1183                         cur.recordUndo();
1184                         setParam("type", docstring());
1185                         setCmdName(scmd);
1186                         break;
1187                 }
1188                 InsetCommandParams p(INDEX_PRINT_CODE);
1189                 // FIXME UNICODE
1190                 InsetCommand::string2params(to_utf8(cmd.argument()), p);
1191                 if (p.getCmdName().empty()) {
1192                         cur.noScreenUpdate();
1193                         break;
1194                 }
1195                 cur.recordUndo();
1196                 setParams(p);
1197                 break;
1198         }
1199
1200         default:
1201                 InsetCommand::doDispatch(cur, cmd);
1202                 break;
1203         }
1204 }
1205
1206
1207 bool InsetPrintIndex::getStatus(Cursor & cur, FuncRequest const & cmd,
1208         FuncStatus & status) const
1209 {
1210         switch (cmd.action()) {
1211
1212         case LFUN_INSET_MODIFY: {
1213                 if (cmd.argument() == from_ascii("toggle-subindex")) {
1214                         status.setEnabled(buffer().masterBuffer()->params().use_indices);
1215                         status.setOnOff(contains(getCmdName(), "printsubindex"));
1216                         return true;
1217                 } else if (cmd.argument() == from_ascii("check-printindex*")) {
1218                         status.setEnabled(buffer().masterBuffer()->params().use_indices);
1219                         status.setOnOff(suffixIs(getCmdName(), '*'));
1220                         return true;
1221                 } if (cmd.getArg(0) == "index_print"
1222                     && cmd.getArg(1) == "CommandInset") {
1223                         InsetCommandParams p(INDEX_PRINT_CODE);
1224                         InsetCommand::string2params(to_utf8(cmd.argument()), p);
1225                         if (suffixIs(p.getCmdName(), '*')) {
1226                                 status.setEnabled(true);
1227                                 status.setOnOff(false);
1228                                 return true;
1229                         }
1230                         Buffer const & realbuffer = *buffer().masterBuffer();
1231                         IndicesList const & indiceslist =
1232                                 realbuffer.params().indiceslist();
1233                         Index const * index = indiceslist.findShortcut(p["type"]);
1234                         status.setEnabled(index != 0);
1235                         status.setOnOff(p["type"] == getParam("type"));
1236                         return true;
1237                 } else
1238                         return InsetCommand::getStatus(cur, cmd, status);
1239         }
1240
1241         case LFUN_INSET_DIALOG_UPDATE: {
1242                 status.setEnabled(buffer().masterBuffer()->params().use_indices);
1243                 return true;
1244         }
1245
1246         default:
1247                 return InsetCommand::getStatus(cur, cmd, status);
1248         }
1249 }
1250
1251
1252 void InsetPrintIndex::updateBuffer(ParIterator const &, UpdateType, bool const /*deleted*/)
1253 {
1254         Index const * index =
1255                 buffer().masterParams().indiceslist().findShortcut(getParam("type"));
1256         if (index)
1257                 setParam("name", index->index());
1258 }
1259
1260
1261 void InsetPrintIndex::latex(otexstream & os, OutputParams const & runparams_in) const
1262 {
1263         if (!buffer().masterBuffer()->params().use_indices) {
1264                 if (getParam("type") == from_ascii("idx"))
1265                         os << "\\printindex" << termcmd;
1266                 return;
1267         }
1268         OutputParams runparams = runparams_in;
1269         os << getCommand(runparams);
1270 }
1271
1272
1273 void InsetPrintIndex::validate(LaTeXFeatures & features) const
1274 {
1275         features.require("makeidx");
1276         if (buffer().masterBuffer()->params().use_indices)
1277                 features.require("splitidx");
1278         InsetCommand::validate(features);
1279 }
1280
1281
1282 string InsetPrintIndex::contextMenuName() const
1283 {
1284         return buffer().masterBuffer()->params().use_indices ?
1285                 "context-indexprint" : string();
1286 }
1287
1288
1289 bool InsetPrintIndex::hasSettings() const
1290 {
1291         return buffer().masterBuffer()->params().use_indices;
1292 }
1293
1294
1295 class IndexEntry
1296 {
1297 public:
1298         /// Builds an entry for the index.
1299         IndexEntry(const InsetIndex * inset, OutputParams const * runparams) : inset_(inset), runparams_(runparams)
1300         {
1301                 LASSERT(runparams, return);
1302
1303                 // Convert the inset as text. The resulting text usually only contains an XHTML anchor (<a id='...'/>) and text.
1304                 odocstringstream entry;
1305                 OutputParams ours = *runparams;
1306                 ours.for_toc = false;
1307                 inset_->plaintext(entry, ours);
1308                 entry_ = entry.str();
1309
1310                 // Determine in which index this entry belongs to.
1311                 if (inset_->buffer().masterBuffer()->params().use_indices) {
1312                         index_ = inset_->params_.index;
1313                 }
1314
1315                 // Attempt parsing the inset.
1316                 if (isModern())
1317                         parseAsModern();
1318                 else
1319                         parseAsLegacy();
1320         }
1321
1322         /// When parsing this entry, some errors may be found; they are reported as a single string.
1323         // It is up to the caller to send this string to LYXERR and the output file, as needed.
1324         const docstring & output_error() const
1325         {
1326                 return output_error_;
1327         }
1328
1329         void output_error(XMLStream xs) const
1330         {
1331                 LYXERR0(output_error());
1332                 xs << XMLStream::ESCAPE_NONE << (from_utf8("<!-- Output Error: ") + output_error() + from_utf8(" -->\n"));
1333         }
1334
1335
1336 private:
1337         bool isModern()
1338         {
1339                 std::cout << to_utf8(entry_) << std::endl;
1340
1341                 // If a modern parameter is present, this is definitely a modern index inset. Similarly, if it contains the
1342                 // usual LaTeX symbols (!|@), then it is definitely a legacy index inset. Otherwise, if it has features of
1343                 // neither, it is both: consider this is a modern inset, to trigger the least complex code. Mixing both types
1344                 // is not allowed (i.e. behaviour is undefined).
1345                 const bool is_definitely_modern = inset_->hasSortKey() || inset_->hasSeeRef() || inset_->hasSubentries()
1346                                             || inset_->params_.range != InsetIndexParams::PageRange::None;
1347                 const bool is_definitely_legacy = entry_.find('@') != std::string::npos
1348                                 || entry_.find('|') != std::string::npos || entry_.find('!') != std::string::npos;
1349
1350                 if (is_definitely_legacy && is_definitely_modern)
1351                         output_error_ += from_utf8("Mix of index properties and raw LaTeX index commands is unsupported. ");
1352
1353                 // Truth table:
1354                 // - is_definitely_modern == true:
1355                 //   - is_definitely_legacy == true: error (return whatever)
1356                 //   - is_definitely_legacy == false: return modern
1357                 // - is_definitely_modern == false:
1358                 //   - is_definitely_legacy == true: return legacy
1359                 //   - is_definitely_legacy == false: return modern
1360                 return !is_definitely_legacy;
1361         }
1362
1363         void parseAsModern()
1364         {
1365                 LASSERT(runparams_, return);
1366
1367                 if (inset_->hasSortKey()) {
1368                         sort_as_ = inset_->getSortkeyAsText(*runparams_);
1369                 }
1370
1371                 terms_ = inset_->getSubentriesAsText(*runparams_);
1372                 // The main term is not present in the vector, as it's not a subentry. The main index term is inserted raw in
1373                 // the index inset. Considering that the user either uses the new or the legacy mechanism, the main term is the
1374                 // full string within this inset (i.e. without the subinsets).
1375                 terms_.insert(terms_.begin(), inset_->getMainSubentryAsText(*runparams_));
1376
1377                 has_start_range_ = inset_->params_.range == InsetIndexParams::PageRange::Start;
1378                 has_end_range_ = inset_->params_.range == InsetIndexParams::PageRange::End;
1379
1380                 see_ = inset_->getSeeAsText(*runparams_);
1381                 see_alsoes_ = inset_->getSeeAlsoesAsText(*runparams_);
1382         }
1383
1384         void parseAsLegacy() {
1385                 // Determine if some features are known not to be supported. For now, this is only formatting like
1386                 // \index{alpha@\textbf{alpha}} or \index{alpha@$\alpha$}.
1387                 // @ is supported, but only for sorting, without specific formatting.
1388                 if (entry_.find(from_utf8("@\\")) != lyx::docstring::npos) {
1389                         output_error_ += from_utf8("Unsupported feature: an index entry contains an @\\. "
1390                                                    "Complete entry: \"") + entry_ + from_utf8("\". ");
1391                 }
1392                 if (entry_.find(from_utf8("@$")) != lyx::docstring::npos) {
1393                         output_error_ += from_utf8("Unsupported feature: an index entry contains an @$. "
1394                                                    "Complete entry: \"") + entry_ + from_utf8("\". ");
1395                 }
1396
1397                 // Split the string into its main constituents: terms, and command (see, see also, range).
1398                 size_t positionVerticalBar = entry_.find(from_ascii("|")); // What comes before | is (sub)(sub)entries.
1399                 docstring indexTerms = entry_.substr(0, positionVerticalBar);
1400                 docstring command;
1401                 if (positionVerticalBar != lyx::docstring::npos) {
1402                         command = entry_.substr(positionVerticalBar + 1);
1403                 }
1404
1405                 // Handle sorting issues, with @.
1406                 vector<docstring> sortingElements = getVectorFromString(indexTerms, from_ascii("@"), false);
1407                 if (sortingElements.size() == 2) {
1408                         sort_as_ = sortingElements[0];
1409                         indexTerms = sortingElements[1];
1410                 }
1411
1412                 // Handle entries, subentries, and subsubentries.
1413                 terms_ = getVectorFromString(indexTerms, from_ascii("!"), false);
1414
1415                 // Handle ranges. Happily, (| and |) can only be at the end of the string!
1416                 has_start_range_ = entry_.find(from_ascii("|(")) != lyx::docstring::npos;
1417                 has_end_range_ = entry_.find(from_ascii("|)")) != lyx::docstring::npos;
1418
1419                 // - Remove the ranges from the command if they do not appear at the beginning.
1420                 size_t range_index = 0;
1421                 while ((range_index = command.find(from_utf8("|("), range_index)) != std::string::npos)
1422                         command.erase(range_index, 1);
1423                 range_index = 0;
1424                 while ((range_index = command.find(from_utf8("|)"), range_index)) != std::string::npos)
1425                         command.erase(range_index, 1);
1426
1427                 // - Remove the ranges when they are the only vertical bar in the complete string.
1428                 if (command[0] == '(' || command[0] == ')')
1429                         command.erase(0, 1);
1430
1431                 // Handle see and seealso. As "see" is a prefix of "seealso", the order of the comparisons is important.
1432                 // Both commands are mutually exclusive!
1433                 if (command.substr(0, 3) == "see") {
1434                         // Unescape brackets.
1435                         size_t index_argument_begin = 0;
1436                         while ((index_argument_begin = command.find(from_utf8("\\{"), index_argument_begin)) != std::string::npos)
1437                                 command.erase(index_argument_begin, 1);
1438                         size_t index_argument_end = 0;
1439                         while ((index_argument_end = command.find(from_utf8("\\}"), index_argument_end)) != std::string::npos)
1440                                 command.erase(index_argument_end, 1);
1441
1442                         // Retrieve the part between brackets, and remove the complete seealso.
1443                         size_t position_opening_bracket = command.find(from_ascii("{"));
1444                         size_t position_closing_bracket = command.find(from_ascii("}"));
1445                         docstring argument = command.substr(position_opening_bracket + 1,
1446                                                                                                 position_closing_bracket - position_opening_bracket - 1);
1447
1448                         // Parse the argument of referenced entries (or a single one for see).
1449                         if (command.substr(0, 7) == "seealso") {
1450                                 see_alsoes_ = getVectorFromString(argument, from_ascii(","), false);
1451                         } else {
1452                                 see_ = argument;
1453
1454                                 if (see_.find(from_ascii(",")) != std::string::npos) {
1455                                         output_error_ += from_utf8("Several index_argument_end terms found as \"see\"! Only one is "
1456                                                                    "acceptable. Complete entry: \"") + entry_ + from_utf8("\". ");
1457                                 }
1458                         }
1459
1460                         // Remove the complete see/seealso from the commands, in case there is something else to parse.
1461                         command = command.substr(position_closing_bracket + 1);
1462                 }
1463
1464                 // Some parts of the strings are not parsed, as they do not have anything matching in DocBook or XHTML:
1465                 // things like formatting the entry or the page number, other strings for sorting.
1466                 // https://wiki.lyx.org/Tips/Indexing
1467                 // If there are such things in the index entry, then this code may miserably fail. For example, for
1468                 // "Peter|(textbf", no range will be detected.
1469                 if (!command.empty()) {
1470                         output_error_ += from_utf8("Unsupported feature: an index entry contains a | with an unsupported command, ")
1471                                          + command + from_utf8(". Complete entry: \"") + entry_ + from_utf8("\". ");
1472                 }
1473         }
1474
1475 public:
1476         int level() const {
1477                 return terms_.size();
1478         }
1479
1480         const std::vector<docstring>& terms() const {
1481                 return terms_;
1482         }
1483
1484         std::vector<docstring>& terms() {
1485                 return terms_;
1486         }
1487
1488         const InsetIndex* inset() const {
1489                 return inset_;
1490         }
1491
1492 private:
1493         // Input inset. These should only be used when parsing the inset (either parseAsModern or parseAsLegacy, called in
1494         // the constructor).
1495         const InsetIndex * inset_;
1496         OutputParams const * runparams_;
1497         docstring entry_;
1498         docstring index_; // Useful when there are multiple indices in the same document.
1499
1500         // Errors, concatenated as a single string, available as soon as parsing is done, const afterwards (i.e. once
1501         // constructor is done).
1502         docstring output_error_;
1503
1504         // Parsed index entry.
1505         std::vector<docstring> terms_; // Up to three entries, in general.
1506         docstring sort_as_;
1507         docstring command_;
1508         bool has_start_range_;
1509         bool has_end_range_;
1510         docstring see_;
1511         vector<docstring> see_alsoes_;
1512
1513         // Operators used for sorting entries (alphabetical order).
1514         friend bool operator<(IndexEntry const & lhs, IndexEntry const & rhs);
1515 };
1516
1517 bool operator<(IndexEntry const & lhs, IndexEntry const & rhs)
1518 {
1519         if (lhs.terms_.empty())
1520                 return false;
1521
1522         for (unsigned i = 0; i < min(rhs.terms_.size(), lhs.terms_.size()); ++i) {
1523                 int comp = compare_no_case(lhs.terms_[i], rhs.terms_[i]);
1524                 if (comp != 0)
1525                         return comp < 0;
1526         }
1527         return false;
1528 }
1529
1530
1531 namespace {
1532 std::string generateCssClassAtDepth(unsigned depth) {
1533         std::string css_class = "entry";
1534
1535         while (depth > 0) {
1536                 depth -= 1;
1537                 css_class.insert(0, "sub");
1538         }
1539
1540         return css_class;
1541 }
1542
1543 struct IndexNode {
1544         std::vector<IndexEntry> entries;
1545         std::vector<IndexNode*> children;
1546 };
1547
1548 docstring termAtLevel(const IndexNode* node, unsigned depth)
1549 {
1550         // The typical entry has a depth of 1 to 3: the call stack would then be at most 4 (due to the root node). This
1551         // function could be made constant time by copying the term in each node, but that would make data duplication that
1552         // may fall out of sync; the performance benefit would probably be negligible.
1553         if (!node->entries.empty()) {
1554                 LASSERT(node->entries.begin()->terms().size() >= depth + 1, return from_ascii(""));
1555                 return node->entries.begin()->terms()[depth];
1556         }
1557
1558         if (!node->children.empty()) {
1559                 return termAtLevel(*node->children.begin(), depth);
1560         }
1561
1562         LASSERT(false, return from_ascii(""));
1563 }
1564
1565 void insertIntoNode(const IndexEntry& entry, IndexNode* node, unsigned depth = 0)
1566 {
1567         // depth == 0 is for the root, not yet the index, hence the increase when going to vector size.
1568         for (IndexNode* child : node->children) {
1569                 if (entry.terms()[depth] == termAtLevel(child, depth)) {
1570                         if (depth + 1 == entry.terms().size()) { // == child.entries.begin()->terms().size()
1571                                 // All term entries match: it's an entry.
1572                                 child->entries.emplace_back(entry);
1573                                 return;
1574                         } else {
1575                                 insertIntoNode(entry, child, depth + 1);
1576                                 return;
1577                         }
1578                 }
1579         }
1580
1581         // Out of the loop: no matching child found, create a new (possibly nested) child for this entry. Due to the
1582         // possibility of nestedness, only insert the current entry when the right level is reached. This is needed if the
1583         // first entry for a word has several levels that never appeared.
1584         // In particular, this case is called for the first entry.
1585         IndexNode* new_node = node;
1586         do {
1587                 new_node->children.emplace_back(new IndexNode{{}, {}});
1588                 new_node = new_node->children.back();
1589                 depth += 1;
1590         } while (depth + 1 <= entry.terms().size()); // depth == 0: root node, no text associated.
1591         new_node->entries.emplace_back(entry);
1592 }
1593
1594 IndexNode* buildIndexTree(vector<IndexEntry>& entries)
1595 {
1596         // Sort the entries, first on the main entry, then the subentry, then the subsubentry,
1597         // thanks to the implementation of operator<.
1598         // If this operation is not performed, the algorithm below is no more correct (and ensuring that it works with
1599         // unsorted entries would make its complexity blow up).
1600         stable_sort(entries.begin(), entries.end());
1601
1602         // Cook the index into a nice tree data structure: entries at a given level in the index as a node, with subentries
1603         // as children.
1604         auto* index_root = new IndexNode{{}, {}};
1605         for (const IndexEntry& entry : entries) {
1606                 insertIntoNode(entry, index_root);
1607         }
1608
1609         return index_root;
1610 }
1611
1612 void outputIndexPage(XMLStream & xs, const IndexNode* root_node, unsigned depth = 0) // NOLINT(misc-no-recursion)
1613 {
1614         LASSERT(root_node->entries.size() + root_node->children.size() > 0, return);
1615
1616         xs << xml::StartTag("li", "class='" + generateCssClassAtDepth(depth) + "'");
1617         xs << xml::CR();
1618         xs << XMLStream::ESCAPE_NONE << termAtLevel(root_node, depth);
1619         // By tree assumption, all the entries at this node have the same set of terms.
1620
1621         if (!root_node->entries.empty()) {
1622                 xs << XMLStream::ESCAPE_NONE << " &#8212; "; // Em dash, i.e. long (---).
1623                 unsigned entry_number = 1;
1624
1625                 auto writeLinkToEntry = [&xs](const IndexEntry &entry, unsigned entry_number) {
1626                         std::string const link_attr = "href='#" + entry.inset()->paragraphs()[0].magicLabel() + "'";
1627                         xs << xml::StartTag("a", link_attr);
1628                         xs << from_ascii(std::to_string(entry_number));
1629                         xs << xml::EndTag("a");
1630                 };
1631
1632                 for (unsigned i = 0; i < root_node->entries.size(); ++i) {
1633                         const IndexEntry &entry = root_node->entries[i];
1634
1635                         switch (entry.inset()->params().range) {
1636                                 case InsetIndexParams::PageRange::None:
1637                                         writeLinkToEntry(entry, entry_number);
1638                                         break;
1639                                 case InsetIndexParams::PageRange::Start: {
1640                                         // Try to find the end of the range, if it is just after. Otherwise, the output will be slightly
1641                                         // scrambled, but understandable. Doing better would mean implementing more of the indexing logic here
1642                                         // and more complex indexing here (skipping the end is not just incrementing i). Worst case output:
1643                                         //     1--, 2, --3
1644                                         const bool nextEntryIsEnd = i + 1 < root_node->entries.size() &&
1645                                                                     root_node->entries[i + 1].inset()->params().range ==
1646                                                                     InsetIndexParams::PageRange::End;
1647                                         // No need to check if both entries are for the same terms: they are in the same IndexNode.
1648
1649                                         writeLinkToEntry(entry, entry_number);
1650                                         xs << XMLStream::ESCAPE_NONE << " &#8211; "; // En dash, i.e. semi-long (--).
1651
1652                                         if (nextEntryIsEnd) {
1653                                                 // Skip the next entry in the loop, write it right now, after the dash.
1654                                                 entry_number += 1;
1655                                                 i += 1;
1656                                                 writeLinkToEntry(root_node->entries[i], entry_number);
1657                                         }
1658                                 }
1659                                         break;
1660                                 case InsetIndexParams::PageRange::End:
1661                                         // This range end was not caught by the range start, do it now to avoid losing content.
1662                                         xs << XMLStream::ESCAPE_NONE << " &#8211; "; // En dash, i.e. semi-long (--).
1663                                         writeLinkToEntry(root_node->entries[i], entry_number);
1664                         }
1665
1666                         if (i < root_node->entries.size() - 1) {
1667                                 xs << ", ";
1668                         }
1669                         entry_number += 1;
1670                 }
1671         }
1672
1673         if (!root_node->entries.empty() && !root_node->children.empty()) {
1674                 xs << xml::CR();
1675         }
1676
1677         if (!root_node->children.empty()) {
1678                 xs << xml::StartTag("ul", "class='" + generateCssClassAtDepth(depth) + "'");
1679                 xs << xml::CR();
1680
1681                 for (const IndexNode* child : root_node->children) {
1682                         outputIndexPage(xs, child, depth + 1);
1683                 }
1684
1685                 xs << xml::EndTag("ul");
1686                 xs << xml::CR();
1687         }
1688
1689         xs << xml::EndTag("li");
1690         xs << xml::CR();
1691 }
1692
1693 #ifdef LYX_INSET_INDEX_DEBUG
1694 void printTree(const IndexNode* root_node, unsigned depth = 0)
1695 {
1696         static const std::string pattern = "    ";
1697         std::string prefix;
1698         for (unsigned i = 0; i < depth; ++i) {
1699                 prefix += pattern;
1700         }
1701         const std::string prefix_long = prefix + pattern + pattern;
1702
1703         docstring term_at_level;
1704         if (depth == 0) {
1705                 // The root has no term.
1706                 std::cout << "<ROOT>" << std::endl;
1707         } else {
1708                 LASSERT(depth - 1 <= 10, return); // Check for overflows.
1709                 term_at_level = termAtLevel(root_node, depth - 1);
1710                 std::cout << prefix << to_utf8(term_at_level) << " (x " << std::to_string(root_node->entries.size()) << ")"
1711                           << std::endl;
1712         }
1713
1714         for (const IndexEntry& entry : root_node->entries) {
1715                 if (entry.terms().size() != depth) {
1716                         std::cout << prefix_long << "ERROR: an entry doesn't have the same number of terms" << std::endl;
1717                 }
1718                 if (depth > 0 && entry.terms()[depth - 1] != term_at_level) {
1719                         std::cout << prefix_long << "ERROR: an entry doesn't have the right term at depth " << std::to_string(depth)
1720                                 << std::endl;
1721                 }
1722         }
1723
1724         for (const IndexNode* node : root_node->children) {
1725                 printTree(node, depth + 1);
1726         }
1727 }
1728 #endif // LYX_INSET_INDEX_DEBUG
1729 }
1730
1731
1732 docstring InsetPrintIndex::xhtml(XMLStream &, OutputParams const & op) const
1733 {
1734         BufferParams const & bp = buffer().masterBuffer()->params();
1735
1736         shared_ptr<Toc const> toc = buffer().tocBackend().toc("index");
1737         if (toc->empty())
1738                 return docstring();
1739
1740         // Collect the index entries in a form we can use them.
1741         vector<IndexEntry> entries;
1742         const docstring & indexType = params().getParamOr("type", from_ascii("idx"));
1743         for (const TocItem& item : *toc) {
1744                 const auto* inset = static_cast<const InsetIndex*>(&(item.dit().inset()));
1745                 if (item.isOutput() && inset->params().index == indexType)
1746                         entries.emplace_back(IndexEntry{inset, &op});
1747         }
1748
1749         // If all the index entries are in notes or not displayed, get out sooner.
1750         if (entries.empty())
1751                 return docstring();
1752
1753         const IndexNode* index_root = buildIndexTree(entries);
1754 #ifdef LYX_INSET_INDEX_DEBUG
1755         printTree(index_root);
1756 #endif
1757
1758         // Start generating the XHTML index.
1759         Layout const & lay = bp.documentClass().htmlTOCLayout();
1760         string const & tocclass = lay.defaultCSSClass();
1761         string const tocattr = "class='index " + tocclass + "'";
1762         docstring const indexName = params().getParamOr("name", from_ascii("Index"));
1763
1764         // we'll use our own stream, because we are going to defer everything.
1765         // that's how we deal with the fact that we're probably inside a standard
1766         // paragraph, and we don't want to be.
1767         odocstringstream ods;
1768         XMLStream xs(ods);
1769
1770         xs << xml::StartTag("div", tocattr);
1771         xs << xml::CR();
1772         xs << xml::StartTag(lay.htmltag(), lay.htmlattr());
1773         xs << translateIfPossible(indexName, op.local_font->language()->lang());
1774         xs << xml::EndTag(lay.htmltag());
1775         xs << xml::CR();
1776         xs << xml::StartTag("ul", "class='main'");
1777         xs << xml::CR();
1778
1779         LASSERT(index_root->entries.empty(), return docstring()); // No index entry should have zero terms.
1780         for (const IndexNode* node : index_root->children) {
1781                 outputIndexPage(xs, node);
1782         }
1783
1784         xs << xml::EndTag("ul");
1785         xs << xml::CR();
1786         xs << xml::EndTag("div");
1787
1788         return ods.str();
1789 }
1790
1791 } // namespace lyx