]> git.lyx.org Git - lyx.git/blob - src/insets/InsetIndex.cpp
InsetIndex: revamp IndexEntry to handle both legacy and modern index insets; simplify...
[lyx.git] / src / insets / InsetIndex.cpp
1 /**
2  * \file InsetIndex.cpp
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author Lars Gullik Bjønnes
7  * \author Jürgen Spitzmüller
8  *
9  * Full author contact details are available in file CREDITS.
10  */
11 #include <config.h>
12
13 #include "InsetIndex.h"
14 #include "InsetIndexMacro.h"
15
16 #include "Buffer.h"
17 #include "BufferParams.h"
18 #include "BufferView.h"
19 #include "ColorSet.h"
20 #include "Cursor.h"
21 #include "DispatchResult.h"
22 #include "Encoding.h"
23 #include "ErrorList.h"
24 #include "FuncRequest.h"
25 #include "FuncStatus.h"
26 #include "IndicesList.h"
27 #include "InsetList.h"
28 #include "Language.h"
29 #include "LaTeX.h"
30 #include "LaTeXFeatures.h"
31 #include "Lexer.h"
32 #include "output_latex.h"
33 #include "output_xhtml.h"
34 #include "xml.h"
35 #include "texstream.h"
36 #include "TextClass.h"
37 #include "TocBackend.h"
38
39 #include "support/debug.h"
40 #include "support/docstream.h"
41 #include "support/FileName.h"
42 #include "support/gettext.h"
43 #include "support/lstrings.h"
44 #include "support/Translator.h"
45
46 #include "frontends/alert.h"
47
48 #include <algorithm>
49 #include <set>
50 #include <iostream>
51
52 #include <QThreadStorage>
53
54 using namespace std;
55 using namespace lyx::support;
56
57 namespace lyx {
58
59 namespace {
60
61 typedef Translator<string, InsetIndexParams::PageRange> PageRangeTranslator;
62 typedef Translator<docstring, InsetIndexParams::PageRange> PageRangeTranslatorLoc;
63
64 PageRangeTranslator const init_insetindexpagerangetranslator()
65 {
66         PageRangeTranslator translator("none", InsetIndexParams::None);
67         translator.addPair("start", InsetIndexParams::Start);
68         translator.addPair("end", InsetIndexParams::End);
69         return translator;
70 }
71
72 PageRangeTranslator const init_insetindexpagerangetranslator_latex()
73 {
74         PageRangeTranslator translator("", InsetIndexParams::None);
75         translator.addPair("(", InsetIndexParams::Start);
76         translator.addPair(")", InsetIndexParams::End);
77         return translator;
78 }
79
80
81 PageRangeTranslatorLoc const init_insetindexpagerangetranslator_loc()
82 {
83         PageRangeTranslatorLoc translator(docstring(), InsetIndexParams::None);
84         translator.addPair(_("Starts page range"), InsetIndexParams::Start);
85         translator.addPair(_("Ends page range"), InsetIndexParams::End);
86         return translator;
87 }
88
89
90 PageRangeTranslator const & insetindexpagerangetranslator()
91 {
92         static PageRangeTranslator const prtranslator =
93                         init_insetindexpagerangetranslator();
94         return prtranslator;
95 }
96
97
98 PageRangeTranslatorLoc const & insetindexpagerangetranslator_loc()
99 {
100         static PageRangeTranslatorLoc const translator =
101                         init_insetindexpagerangetranslator_loc();
102         return translator;
103 }
104
105
106 PageRangeTranslator const & insetindexpagerangetranslator_latex()
107 {
108         static PageRangeTranslator const lttranslator =
109                         init_insetindexpagerangetranslator_latex();
110         return lttranslator;
111 }
112
113 } // namespace anon
114
115 /////////////////////////////////////////////////////////////////////
116 //
117 // InsetIndex
118 //
119 ///////////////////////////////////////////////////////////////////////
120
121
122 InsetIndex::InsetIndex(Buffer * buf, InsetIndexParams const & params)
123         : InsetCollapsible(buf), params_(params)
124 {}
125
126
127 void InsetIndex::latex(otexstream & ios, OutputParams const & runparams_in) const
128 {
129         OutputParams runparams(runparams_in);
130         runparams.inIndexEntry = true;
131
132         otexstringstream os;
133
134         if (buffer().masterBuffer()->params().use_indices && !params_.index.empty()
135                 && params_.index != "idx") {
136                 os << "\\sindex[";
137                 os << escape(params_.index);
138                 os << "]{";
139         } else {
140                 os << "\\index";
141                 os << '{';
142         }
143
144         // Get the LaTeX output from InsetText. We need to deconstruct this later
145         // in order to check if we need to generate a sorting key
146         odocstringstream ourlatex;
147         otexstream ots(ourlatex);
148         InsetText::latex(ots, runparams);
149         if (runparams.for_search != OutputParams::NoSearch) {
150                 // No need for special handling, if we are only searching for some patterns
151                 os << ourlatex.str() << "}";
152                 return;
153         }
154
155         if (hasSortKey()) {
156                 getSortkey(os, runparams);
157                 os << "@";
158                 os << ourlatex.str();
159                 getSubentries(os, runparams);
160                 if (hasSeeRef()) {
161                         os << "|";
162                         os << insetindexpagerangetranslator_latex().find(params_.range);
163                         getSeeRefs(os, runparams);
164                 }
165         } else {
166                 // We check whether we need a sort key.
167                 // If so, we use the plaintext version
168                 odocstringstream ourplain;
169                 InsetText::plaintext(ourplain, runparams);
170
171                 // These are the LaTeX and plaintext representations
172                 docstring latexstr = ourlatex.str();
173                 docstring plainstr = ourplain.str();
174         
175                 // This will get what follows | if anything does,
176                 // the command (e.g., see, textbf) for pagination
177                 // formatting
178                 docstring cmd;
179
180                 if (hasSeeRef()) {
181                         odocstringstream seeref;
182                         otexstream otsee(seeref);
183                         getSeeRefs(otsee, runparams);
184                         cmd = seeref.str();
185                 } else if (!params_.pagefmt.empty() && params_.pagefmt != "default") {
186                         cmd = from_utf8(params_.pagefmt);
187                 } else {
188                         // Check for the | separator to strip the cmd.
189                         // This goes wrong on an escaped "|", but as the escape
190                         // character can be changed in style files, we cannot
191                         // prevent that.
192                         size_t pos = latexstr.find(from_ascii("|"));
193                         if (pos != docstring::npos) {
194                                 // Put the bit after "|" into cmd...
195                                 cmd = latexstr.substr(pos + 1);
196                                 // ...and erase that stuff from latexstr
197                                 latexstr = latexstr.erase(pos);
198                                 // ...as well as from plainstr
199                                 size_t ppos = plainstr.find(from_ascii("|"));
200                                 if (ppos < plainstr.size())
201                                         plainstr.erase(ppos);
202                                 else
203                                         LYXERR0("The `|' separator was not found in the plaintext version!");
204                         }
205                 }
206
207                 odocstringstream subentries;
208                 otexstream otsub(subentries);
209                 getSubentries(otsub, runparams);
210                 if (subentries.str().empty()) {
211                         // Separate the entries and subentries, i.e., split on "!".
212                         // This goes wrong on an escaped "!", but as the escape
213                         // character can be changed in style files, we cannot
214                         // prevent that.
215                         std::vector<docstring> const levels =
216                                         getVectorFromString(latexstr, from_ascii("!"), true);
217                         std::vector<docstring> const levels_plain =
218                                         getVectorFromString(plainstr, from_ascii("!"), true);
219                 
220                         vector<docstring>::const_iterator it = levels.begin();
221                         vector<docstring>::const_iterator end = levels.end();
222                         vector<docstring>::const_iterator it2 = levels_plain.begin();
223                         bool first = true;
224                         for (; it != end; ++it) {
225                                 // The separator needs to be put back when
226                                 // writing the levels, except for the first level
227                                 if (!first)
228                                         os << '!';
229                                 else
230                                         first = false;
231                 
232                                 // Now here comes the reason for this whole procedure:
233                                 // We try to correctly sort macros and formatted strings.
234                                 // If we find a command, prepend a plain text
235                                 // version of the content to get sorting right,
236                                 // e.g. \index{LyX@\LyX}, \index{text@\textbf{text}}.
237                                 // We do this on all levels.
238                                 // We don't do it if the level already contains a '@', though.
239                                 // Plaintext might return nothing (e.g. for ERTs).
240                                 // In that case, we use LaTeX.
241                                 docstring const spart = (levels_plain.empty() || (*it2).empty()) ? *it : *it2;
242                                 processLatexSorting(os, runparams, *it, spart);
243                                 if (it2 < levels_plain.end())
244                                         ++it2;
245                         }
246                 } else {
247                         processLatexSorting(os, runparams, latexstr, plainstr);
248                         os << subentries.str();
249                 }
250
251                 // At last, re-insert the command, separated by "|"
252                 if (!cmd.empty()) {
253                         os << "|"
254                            << insetindexpagerangetranslator_latex().find(params_.range)
255                            << cmd;
256                 }
257         }
258         os << '}';
259
260         // In macros with moving arguments, such as \section,
261         // we store the index and output it after the macro (#2154)
262         if (runparams_in.postpone_fragile_stuff)
263                 runparams_in.post_macro += os.str();
264         else
265                 ios << os.release();
266 }
267
268
269 void InsetIndex::processLatexSorting(otexstream & os, OutputParams const & runparams,
270                                 docstring const latex, docstring const spart) const
271 {
272         if (contains(latex, '\\') && !contains(latex, '@')) {
273                 // Now we need to validate that all characters in
274                 // the sorting part are representable in the current
275                 // encoding. If not try the LaTeX macro which might
276                 // or might not be a good choice, and issue a warning.
277                 pair<docstring, docstring> spart_latexed =
278                                 runparams.encoding->latexString(spart, runparams.dryrun);
279                 if (!spart_latexed.second.empty())
280                         LYXERR0("Uncodable character in index entry. Sorting might be wrong!");
281                 if (spart != spart_latexed.first && !runparams.dryrun) {
282                         TeXErrors terr;
283                         ErrorList & errorList = buffer().errorList("Export");
284                         docstring const s = bformat(_("LyX's automatic index sorting algorithm faced "
285                                                       "problems with the entry '%1$s'.\n"
286                                                       "Please specify the sorting of this entry manually, as "
287                                                       "explained in the User Guide."), spart);
288                         Paragraph const & par = buffer().paragraphs().front();
289                         errorList.push_back(ErrorItem(_("Index sorting failed"), s,
290                                                       {par.id(), 0}, {par.id(), -1}));
291                         buffer().bufferErrors(terr, errorList);
292                 }
293                 // Remove remaining \'s from the sort key
294                 docstring ppart = subst(spart_latexed.first, from_ascii("\\"), docstring());
295                 // Plain quotes need to be escaped, however (#10649), as this
296                 // is the default escape character
297                 ppart = subst(ppart, from_ascii("\""), from_ascii("\\\""));
298
299                 // Now insert the sortkey, separated by '@'.
300                 os << ppart;
301                 os << '@';
302         }
303         // Insert the actual level text
304         os << latex;
305 }
306
307
308 void InsetIndex::docbook(XMLStream & xs, OutputParams const & runparams) const
309 {
310         // Two ways of processing this inset are implemented:
311         // - the legacy one, based on parsing the raw LaTeX (before LyX 2.4) -- unlikely to be deprecated
312         // - the modern one, based on precise insets for indexing features
313         // Like the LaTeX implementation, consider the user chooses either of those options.
314
315         // Get the content of the inset as LaTeX, as some things may be encoded as ERT (like {}).
316         // TODO: if there is an ERT within the index term, its conversion should be tried, in case it becomes useful;
317         //  otherwise, ERTs should become comments. For now, they are just copied as-is, which is barely satisfactory.
318         odocstringstream odss;
319         otexstream ots(odss);
320         InsetText::latex(ots, runparams);
321         docstring latexString = trim(odss.str());
322
323         // Check whether there are unsupported things. @ is supported, but only for sorting, without specific formatting.
324         if (latexString.find(from_utf8("@\\")) != lyx::docstring::npos) {
325                 docstring error = from_utf8("Unsupported feature: an index entry contains an @\\. "
326                                                                         "Complete entry: \"") + latexString + from_utf8("\"");
327                 LYXERR0(error);
328                 xs << XMLStream::ESCAPE_NONE << (from_utf8("<!-- Output Error: ") + error + from_utf8(" -->\n"));
329         }
330
331         // Handle several indices (indicated in the inset instead of the raw latexString).
332         docstring indexType = from_utf8("");
333         if (buffer().masterBuffer()->params().use_indices) {
334                 indexType += " type=\"" + params_.index + "\"";
335         }
336
337         // Split the string into its main constituents: terms, and command (see, see also, range).
338         size_t positionVerticalBar = latexString.find(from_ascii("|")); // What comes before | is (sub)(sub)entries.
339         docstring indexTerms = latexString.substr(0, positionVerticalBar);
340         docstring command;
341         if (positionVerticalBar != lyx::docstring::npos) {
342                 command = latexString.substr(positionVerticalBar + 1);
343         }
344
345         // Handle sorting issues, with @.
346         docstring sortAs;
347         if (hasSortKey()) {
348                 sortAs = getSortkeyAsText(runparams);
349                 // indexTerms may contain a sort key if the user has both the inset and the manual key.
350         } else {
351                 vector<docstring> sortingElements = getVectorFromString(indexTerms, from_ascii("@"), false);
352                 if (sortingElements.size() == 2) {
353                         sortAs = sortingElements[0];
354                         indexTerms = sortingElements[1];
355                 }
356         }
357
358         // Handle primary, secondary, and tertiary terms (entries, subentries, and subsubentries, for LaTeX).
359         vector<docstring> terms;
360         if (const vector<docstring> potential_terms = getSubentriesAsText(runparams); !potential_terms.empty()) {
361                 terms = potential_terms;
362                 // The main term is not present in the vector, as it's not a subentry. The main index term is inserted raw in
363                 // the index inset. Considering that the user either uses the new or the legacy mechanism, the main term is the
364                 // full string within this inset (i.e. without the subinsets).
365                 terms.insert(terms.begin(), latexString);
366         } else {
367                 terms = getVectorFromString(indexTerms, from_ascii("!"), false);
368         }
369
370         // Handle ranges. Happily, in the raw LaTeX mode, (| and |) can only be at the end of the string!
371         bool hasInsetRange = params_.range != InsetIndexParams::PageRange::None;
372         bool hasStartRange = params_.range == InsetIndexParams::PageRange::Start ||
373                         latexString.find(from_ascii("|(")) != lyx::docstring::npos;
374         bool hasEndRange = params_.range == InsetIndexParams::PageRange::End ||
375                         latexString.find(from_ascii("|)")) != lyx::docstring::npos;
376
377         if (hasInsetRange) {
378                 // Remove the ranges from the command if they do not appear at the beginning.
379                 size_t index = 0;
380                 while ((index = command.find(from_utf8("|("), index)) != std::string::npos)
381                         command.erase(index, 1);
382                 index = 0;
383                 while ((index = command.find(from_utf8("|)"), index)) != std::string::npos)
384                         command.erase(index, 1);
385
386                 // Remove the ranges when they are the only vertical bar in the complete string.
387                 if (command[0] == '(' || command[0] == ')')
388                         command.erase(0, 1);
389         }
390
391         // Handle see and seealso. As "see" is a prefix of "seealso", the order of the comparisons is important.
392         // Both commands are mutually exclusive!
393         docstring see = getSeeAsText(runparams);
394         vector<docstring> seeAlsoes = getSeeAlsoesAsText(runparams);
395
396         if (see.empty() && seeAlsoes.empty() && command.substr(0, 3) == "see") {
397                 // Unescape brackets.
398                 size_t index = 0;
399                 while ((index = command.find(from_utf8("\\{"), index)) != std::string::npos)
400                         command.erase(index, 1);
401                 index = 0;
402                 while ((index = command.find(from_utf8("\\}"), index)) != std::string::npos)
403                         command.erase(index, 1);
404
405                 // Retrieve the part between brackets, and remove the complete seealso.
406                 size_t positionOpeningBracket = command.find(from_ascii("{"));
407                 size_t positionClosingBracket = command.find(from_ascii("}"));
408                 docstring list = command.substr(positionOpeningBracket + 1, positionClosingBracket - positionOpeningBracket - 1);
409
410                 // Parse the list of referenced entries (or a single one for see).
411                 if (command.substr(0, 7) == "seealso") {
412                         seeAlsoes = getVectorFromString(list, from_ascii(","), false);
413                 } else {
414                         see = list;
415
416                         if (see.find(from_ascii(",")) != std::string::npos) {
417                                 docstring error = from_utf8("Several index terms found as \"see\"! Only one is acceptable. "
418                                                                                         "Complete entry: \"") + latexString + from_utf8("\"");
419                                 LYXERR0(error);
420                                 xs << XMLStream::ESCAPE_NONE << (from_utf8("<!-- Output Error: ") + error + from_utf8(" -->\n"));
421                         }
422                 }
423
424                 // Remove the complete see/seealso from the commands, in case there is something else to parse.
425                 command = command.substr(positionClosingBracket + 1);
426         }
427
428         // Some parts of the strings are not parsed, as they do not have anything matching in DocBook: things like
429         // formatting the entry or the page number, other strings for sorting. https://wiki.lyx.org/Tips/Indexing
430         // If there are such things in the index entry, then this code may miserably fail. For example, for "Peter|(textbf",
431         // no range will be detected.
432         // TODO: Could handle formatting as significance="preferred"?
433         if (!command.empty()) {
434                 docstring error = from_utf8("Unsupported feature: an index entry contains a | with an unsupported command, ")
435                                           + command + from_utf8(". ") + from_utf8("Complete entry: \"") + latexString + from_utf8("\"");
436                 LYXERR0(error);
437                 xs << XMLStream::ESCAPE_NONE << (from_utf8("<!-- Output Error: ") + error + from_utf8(" -->\n"));
438         }
439
440     // Write all of this down.
441         if (terms.empty() && !hasEndRange) {
442                 docstring error = from_utf8("No index term found! Complete entry: \"") + latexString + from_utf8("\"");
443                 LYXERR0(error);
444                 xs << XMLStream::ESCAPE_NONE << (from_utf8("<!-- Output Error: ") + error + from_utf8(" -->\n"));
445         } else {
446                 // Generate the attributes for ranges. It is based on the terms that are indexed, but the ID must be unique
447                 // to this indexing area (xml::cleanID does not guarantee this: for each call with the same arguments,
448                 // the same legal ID is produced; here, as the input would be the same, the output must be, by design).
449                 // Hence the thread-local storage, as the numbers must strictly be unique, and thus cannot be shared across
450                 // a paragraph (making the solution used for HTML worthless). This solution is very similar to the one used in
451                 // xml::cleanID.
452                 // indexType can only be used for singular and startofrange types!
453                 docstring attrs;
454                 if (!hasStartRange && !hasEndRange) {
455                         attrs = indexType;
456                 } else {
457                         // Append an ID if uniqueness is not guaranteed across the document.
458                         static QThreadStorage<set<docstring>> tKnownTermLists;
459                         static QThreadStorage<int> tID;
460
461                         set<docstring> &knownTermLists = tKnownTermLists.localData();
462                         int &ID = tID.localData();
463
464                         if (!tID.hasLocalData()) {
465                                 tID.localData() = 0;
466                         }
467
468                         // Modify the index terms to add the unique ID if needed.
469                         docstring newIndexTerms = indexTerms;
470                         if (knownTermLists.find(indexTerms) != knownTermLists.end()) {
471                                 newIndexTerms += from_ascii(string("-") + to_string(ID));
472
473                                 // Only increment for the end of range, so that the same number is used for the start of range.
474                                 if (hasEndRange) {
475                                         ID++;
476                                 }
477                         }
478
479                         // Term list not yet known: add it to the set AFTER the end of range. After
480                         if (knownTermLists.find(indexTerms) == knownTermLists.end() && hasEndRange) {
481                                 knownTermLists.insert(indexTerms);
482                         }
483
484                         // Generate the attributes.
485                         docstring id = xml::cleanID(newIndexTerms);
486                         if (hasStartRange) {
487                                 attrs = indexType + " class=\"startofrange\" xml:id=\"" + id + "\"";
488                         } else {
489                                 attrs = " class=\"endofrange\" startref=\"" + id + "\"";
490                         }
491                 }
492
493                 // Handle the index terms (including the specific index for this entry).
494                 if (hasEndRange) {
495                         xs << xml::CompTag("indexterm", attrs);
496                 } else {
497                         xs << xml::StartTag("indexterm", attrs);
498                         if (!terms.empty()) { // hasEndRange has no content.
499                                 docstring attr;
500                                 if (!sortAs.empty()) {
501                                         attr = from_utf8("sortas='") + sortAs + from_utf8("'");
502                                 }
503
504                                 xs << xml::StartTag("primary", attr);
505                                 xs << terms[0];
506                                 xs << xml::EndTag("primary");
507                         }
508                         if (terms.size() > 1) {
509                                 xs << xml::StartTag("secondary");
510                                 xs << terms[1];
511                                 xs << xml::EndTag("secondary");
512                         }
513                         if (terms.size() > 2) {
514                                 xs << xml::StartTag("tertiary");
515                                 xs << terms[2];
516                                 xs << xml::EndTag("tertiary");
517                         }
518
519                         // Handle see and see also.
520                         if (!see.empty()) {
521                                 xs << xml::StartTag("see");
522                                 xs << see;
523                                 xs << xml::EndTag("see");
524                         }
525
526                         if (!seeAlsoes.empty()) {
527                                 for (auto &entry : seeAlsoes) {
528                                         xs << xml::StartTag("seealso");
529                                         xs << entry;
530                                         xs << xml::EndTag("seealso");
531                                 }
532                         }
533
534                         // Close the entry.
535                         xs << xml::EndTag("indexterm");
536                 }
537         }
538 }
539
540
541 docstring InsetIndex::xhtml(XMLStream & xs, OutputParams const &) const
542 {
543         // we just print an anchor, taking the paragraph ID from
544         // our own interior paragraph, which doesn't get printed
545         std::string const magic = paragraphs().front().magicLabel();
546         std::string const attr = "id='" + magic + "'";
547         xs << xml::CompTag("a", attr);
548         return docstring();
549 }
550
551
552 bool InsetIndex::showInsetDialog(BufferView * bv) const
553 {
554         bv->showDialog("index", params2string(params_),
555                         const_cast<InsetIndex *>(this));
556         return true;
557 }
558
559
560 void InsetIndex::doDispatch(Cursor & cur, FuncRequest & cmd)
561 {
562         switch (cmd.action()) {
563
564         case LFUN_INSET_MODIFY: {
565                 if (cmd.getArg(0) == "changetype") {
566                         cur.recordUndoInset(this);
567                         params_.index = from_utf8(cmd.getArg(1));
568                         break;
569                 }
570                 InsetIndexParams params;
571                 InsetIndex::string2params(to_utf8(cmd.argument()), params);
572                 cur.recordUndoInset(this);
573                 params_.index = params.index;
574                 params_.range = params.range;
575                 params_.pagefmt = params.pagefmt;
576                 // what we really want here is a TOC update, but that means
577                 // a full buffer update
578                 cur.forceBufferUpdate();
579                 break;
580         }
581
582         case LFUN_INSET_DIALOG_UPDATE:
583                 cur.bv().updateDialog("index", params2string(params_));
584                 break;
585
586         default:
587                 InsetCollapsible::doDispatch(cur, cmd);
588                 break;
589         }
590 }
591
592
593 bool InsetIndex::getStatus(Cursor & cur, FuncRequest const & cmd,
594                 FuncStatus & flag) const
595 {
596         switch (cmd.action()) {
597
598         case LFUN_INSET_MODIFY:
599                 if (cmd.getArg(0) == "changetype") {
600                         docstring const newtype = from_utf8(cmd.getArg(1));
601                         Buffer const & realbuffer = *buffer().masterBuffer();
602                         IndicesList const & indiceslist = realbuffer.params().indiceslist();
603                         Index const * index = indiceslist.findShortcut(newtype);
604                         flag.setEnabled(index != 0);
605                         flag.setOnOff(
606                                 from_utf8(cmd.getArg(1)) == params_.index);
607                         return true;
608                 }
609                 return InsetCollapsible::getStatus(cur, cmd, flag);
610
611         case LFUN_INSET_DIALOG_UPDATE: {
612                 Buffer const & realbuffer = *buffer().masterBuffer();
613                 flag.setEnabled(realbuffer.params().use_indices);
614                 return true;
615         }
616         
617         case LFUN_INDEXMACRO_INSERT:
618                 return macrosPossible(cmd.getArg(0));
619
620         default:
621                 return InsetCollapsible::getStatus(cur, cmd, flag);
622         }
623 }
624
625
626 void InsetIndex::getSortkey(otexstream & os, OutputParams const & runparams) const
627 {
628         Paragraph const & par = paragraphs().front();
629         InsetList::const_iterator it = par.insetList().begin();
630         for (; it != par.insetList().end(); ++it) {
631                 Inset & inset = *it->inset;
632                 if (inset.lyxCode() == INDEXMACRO_SORTKEY_CODE) {
633                         InsetIndexMacro const & iim =
634                                 static_cast<InsetIndexMacro const &>(inset);
635                         iim.getLatex(os, runparams);
636                         return;
637                 }
638         }
639 }
640
641
642 docstring InsetIndex::getSortkeyAsText(OutputParams const & runparams) const
643 {
644         Paragraph const & par = paragraphs().front();
645         InsetList::const_iterator it = par.insetList().begin();
646         for (; it != par.insetList().end(); ++it) {
647                 Inset & inset = *it->inset;
648                 if (inset.lyxCode() == INDEXMACRO_SORTKEY_CODE) {
649                         otexstringstream os;
650                         InsetIndexMacro const & iim =
651                                 static_cast<InsetIndexMacro const &>(inset);
652                         iim.getLatex(os, runparams);
653                         return os.str();
654                 }
655         }
656         return from_ascii("");
657 }
658
659
660 void InsetIndex::getSubentries(otexstream & os, OutputParams const & runparams) const
661 {
662         Paragraph const & par = paragraphs().front();
663         InsetList::const_iterator it = par.insetList().begin();
664         int i = 0;
665         for (; it != par.insetList().end(); ++it) {
666                 Inset & inset = *it->inset;
667                 if (inset.lyxCode() == INDEXMACRO_CODE) {
668                         InsetIndexMacro const & iim =
669                                 static_cast<InsetIndexMacro const &>(inset);
670                         if (iim.params().type == InsetIndexMacroParams::Subindex) {
671                                 ++i;
672                                 if (i > 2)
673                                         return;
674                                 os << "!";
675                                 iim.getLatex(os, runparams);
676                         }
677                 }
678         }
679 }
680
681
682 std::vector<docstring> InsetIndex::getSubentriesAsText(OutputParams const & runparams) const
683 {
684         std::vector<docstring> subentries;
685
686         Paragraph const & par = paragraphs().front();
687         InsetList::const_iterator it = par.insetList().begin();
688         int i = 0;
689         for (; it != par.insetList().end(); ++it) {
690                 Inset & inset = *it->inset;
691                 if (inset.lyxCode() == INDEXMACRO_CODE) {
692                         InsetIndexMacro const & iim =
693                                 static_cast<InsetIndexMacro const &>(inset);
694                         if (iim.params().type == InsetIndexMacroParams::Subindex) {
695                                 ++i;
696                                 if (i > 2)
697                                         break;
698
699                                 otexstringstream os;
700                                 iim.getLatex(os, runparams);
701                                 subentries.emplace_back(os.str());
702                         }
703                 }
704         }
705
706         return subentries;
707 }
708
709
710 docstring InsetIndex::getMainSubentryAsText(OutputParams const & runparams) const
711 {
712         otexstringstream os;
713         InsetText::latex(os, runparams);
714         return os.str();
715 }
716
717
718 void InsetIndex::getSeeRefs(otexstream & os, OutputParams const & runparams) const
719 {
720         Paragraph const & par = paragraphs().front();
721         InsetList::const_iterator it = par.insetList().begin();
722         for (; it != par.insetList().end(); ++it) {
723                 Inset & inset = *it->inset;
724                 if (inset.lyxCode() == INDEXMACRO_CODE) {
725                         InsetIndexMacro const & iim =
726                                 static_cast<InsetIndexMacro const &>(inset);
727                         if (iim.params().type == InsetIndexMacroParams::See
728                             || iim.params().type == InsetIndexMacroParams::Seealso) {
729                                 iim.getLatex(os, runparams);
730                                 return;
731                         }
732                 }
733         }
734 }
735
736
737 docstring InsetIndex::getSeeAsText(OutputParams const & runparams) const
738 {
739         Paragraph const & par = paragraphs().front();
740         InsetList::const_iterator it = par.insetList().begin();
741         for (; it != par.insetList().end(); ++it) {
742                 Inset & inset = *it->inset;
743                 if (inset.lyxCode() == INDEXMACRO_CODE) {
744                         InsetIndexMacro const & iim =
745                                 static_cast<InsetIndexMacro const &>(inset);
746                         if (iim.params().type == InsetIndexMacroParams::See) {
747                                 otexstringstream os;
748                                 iim.getLatex(os, runparams);
749                                 return os.str();
750                         }
751                 }
752         }
753         return from_ascii("");
754 }
755
756
757 std::vector<docstring> InsetIndex::getSeeAlsoesAsText(OutputParams const & runparams) const
758 {
759         std::vector<docstring> seeAlsoes;
760
761         Paragraph const & par = paragraphs().front();
762         InsetList::const_iterator it = par.insetList().begin();
763         for (; it != par.insetList().end(); ++it) {
764                 Inset & inset = *it->inset;
765                 if (inset.lyxCode() == INDEXMACRO_CODE) {
766                         InsetIndexMacro const & iim =
767                                 static_cast<InsetIndexMacro const &>(inset);
768                         if (iim.params().type == InsetIndexMacroParams::Seealso) {
769                                 otexstringstream os;
770                                 iim.getLatex(os, runparams);
771                                 seeAlsoes.emplace_back(os.str());
772                         }
773                 }
774         }
775
776         return seeAlsoes;
777 }
778
779
780 namespace {
781
782 bool hasInsetWithCode(const InsetIndex * const inset_index, const InsetCode code,
783                                           const std::set<InsetIndexMacroParams::Type> types = {})
784 {
785         Paragraph const & par = inset_index->paragraphs().front();
786         InsetList::const_iterator it = par.insetList().begin();
787         for (; it != par.insetList().end(); ++it) {
788                 Inset & inset = *it->inset;
789                 if (inset.lyxCode() == code) {
790                         if (types.empty())
791                                 return true;
792
793                         LASSERT(code == INDEXMACRO_CODE, return false);
794                         InsetIndexMacro const & iim =
795                                         static_cast<InsetIndexMacro const &>(inset);
796                         if (types.find(iim.params().type) != types.end())
797                                 return true;
798                 }
799         }
800         return false;
801 }
802
803 } // namespace
804
805
806 bool InsetIndex::hasSubentries() const
807 {
808         return hasInsetWithCode(this, INDEXMACRO_CODE, {InsetIndexMacroParams::Subindex});
809 }
810
811
812 bool InsetIndex::hasSeeRef() const
813 {
814         return hasInsetWithCode(this, INDEXMACRO_CODE, {InsetIndexMacroParams::See, InsetIndexMacroParams::Seealso});
815 }
816
817
818 bool InsetIndex::hasSortKey() const
819 {
820         return hasInsetWithCode(this, INDEXMACRO_SORTKEY_CODE);
821 }
822
823
824 bool InsetIndex::macrosPossible(string const type) const
825 {
826         if (type != "see" && type != "seealso"
827             && type != "sortkey" && type != "subindex")
828                 return false;
829
830         Paragraph const & par = paragraphs().front();
831         InsetList::const_iterator it = par.insetList().begin();
832         int subidxs = 0;
833         for (; it != par.insetList().end(); ++it) {
834                 Inset & inset = *it->inset;
835                 if (type == "sortkey" && inset.lyxCode() == INDEXMACRO_SORTKEY_CODE)
836                         return false;
837                 if (inset.lyxCode() == INDEXMACRO_CODE) {
838                         InsetIndexMacro const & iim = static_cast<InsetIndexMacro const &>(inset);
839                         if ((type == "see" || type == "seealso")
840                              && (iim.params().type == InsetIndexMacroParams::See
841                                  || iim.params().type == InsetIndexMacroParams::Seealso))
842                                 return false;
843                         if (type == "subindex"
844                              && iim.params().type == InsetIndexMacroParams::Subindex) {
845                                 ++subidxs;
846                                 if (subidxs > 1)
847                                         return false;
848                         }
849                 }
850         }
851         return true;
852 }
853
854
855 ColorCode InsetIndex::labelColor() const
856 {
857         if (params_.index.empty() || params_.index == from_ascii("idx"))
858                 return InsetCollapsible::labelColor();
859         // FIXME UNICODE
860         ColorCode c = lcolor.getFromLyXName(to_utf8(params_.index)
861                                             + "@" + buffer().fileName().absFileName());
862         if (c == Color_none)
863                 c = InsetCollapsible::labelColor();
864         return c;
865 }
866
867
868 docstring InsetIndex::toolTip(BufferView const &, int, int) const
869 {
870         docstring tip = _("Index Entry");
871         if (buffer().params().use_indices && !params_.index.empty()) {
872                 Buffer const & realbuffer = *buffer().masterBuffer();
873                 IndicesList const & indiceslist = realbuffer.params().indiceslist();
874                 tip += " (";
875                 Index const * index = indiceslist.findShortcut(params_.index);
876                 if (!index)
877                         tip += _("unknown type!");
878                 else
879                         tip += index->index();
880                 tip += ")";
881         }
882         tip += ": ";
883         docstring res = toolTipText(tip);
884         if (!insetindexpagerangetranslator_loc().find(params_.range).empty())
885                 res += "\n" + insetindexpagerangetranslator_loc().find(params_.range);
886         if (!params_.pagefmt.empty() && params_.pagefmt != "default") {
887                 res += "\n" + _("Pagination format:") + " ";
888                 if (params_.pagefmt == "textbf")
889                         res += _("bold");
890                 else if (params_.pagefmt == "textit")
891                         res += _("italic");
892                 else if (params_.pagefmt == "emph")
893                         res += _("emphasized");
894                 else
895                         res += from_utf8(params_.pagefmt);
896         }
897         return res;
898 }
899
900
901 docstring const InsetIndex::buttonLabel(BufferView const & bv) const
902 {
903         InsetLayout const & il = getLayout();
904         docstring label = translateIfPossible(il.labelstring());
905
906         if (buffer().params().use_indices && !params_.index.empty()) {
907                 Buffer const & realbuffer = *buffer().masterBuffer();
908                 IndicesList const & indiceslist = realbuffer.params().indiceslist();
909                 label += " (";
910                 Index const * index = indiceslist.findShortcut(params_.index);
911                 if (!index)
912                         label += _("unknown type!");
913                 else
914                         label += index->index();
915                 label += ")";
916         }
917
918         docstring res;
919         if (!il.contentaslabel() || geometry(bv) != ButtonOnly)
920                 res = label;
921         else
922                 res = getNewLabel(label);
923         if (!insetindexpagerangetranslator_latex().find(params_.range).empty())
924                 res += " " + from_ascii(insetindexpagerangetranslator_latex().find(params_.range));
925         return res;
926 }
927
928
929 void InsetIndex::write(ostream & os) const
930 {
931         os << to_utf8(layoutName());
932         params_.write(os);
933         InsetCollapsible::write(os);
934 }
935
936
937 void InsetIndex::read(Lexer & lex)
938 {
939         params_.read(lex);
940         InsetCollapsible::read(lex);
941 }
942
943
944 string InsetIndex::params2string(InsetIndexParams const & params)
945 {
946         ostringstream data;
947         data << "index";
948         params.write(data);
949         return data.str();
950 }
951
952
953 void InsetIndex::string2params(string const & in, InsetIndexParams & params)
954 {
955         params = InsetIndexParams();
956         if (in.empty())
957                 return;
958
959         istringstream data(in);
960         Lexer lex;
961         lex.setStream(data);
962         lex.setContext("InsetIndex::string2params");
963         lex >> "index";
964         params.read(lex);
965 }
966
967
968 void InsetIndex::addToToc(DocIterator const & cpit, bool output_active,
969                                                   UpdateType utype, TocBackend & backend) const
970 {
971         DocIterator pit = cpit;
972         pit.push_back(CursorSlice(const_cast<InsetIndex &>(*this)));
973         docstring str;
974         string type = "index";
975         if (buffer().masterBuffer()->params().use_indices)
976                 type += ":" + to_utf8(params_.index);
977         // this is unlikely to be terribly long
978         text().forOutliner(str, INT_MAX);
979         TocBuilder & b = backend.builder(type);
980         b.pushItem(pit, str, output_active);
981         // Proceed with the rest of the inset.
982         InsetCollapsible::addToToc(cpit, output_active, utype, backend);
983         b.pop();
984 }
985
986
987 void InsetIndex::validate(LaTeXFeatures & features) const
988 {
989         if (buffer().masterBuffer()->params().use_indices
990             && !params_.index.empty()
991             && params_.index != "idx")
992                 features.require("splitidx");
993         InsetCollapsible::validate(features);
994 }
995
996
997 string InsetIndex::contextMenuName() const
998 {
999         return "context-index";
1000 }
1001
1002
1003 string InsetIndex::contextMenu(BufferView const & bv, int x, int y) const
1004 {
1005         // We override the implementation of InsetCollapsible,
1006         // because we have eytra entries.
1007         string owncm = "context-edit-index;";
1008         return owncm + InsetCollapsible::contextMenu(bv, x, y);
1009 }
1010
1011
1012 bool InsetIndex::hasSettings() const
1013 {
1014         return true;
1015 }
1016
1017
1018 bool InsetIndex::insetAllowed(InsetCode code) const
1019 {
1020         switch (code) {
1021         case INDEXMACRO_CODE:
1022         case INDEXMACRO_SORTKEY_CODE:
1023                 return true;
1024         case INDEX_CODE:
1025                 return false;
1026         default:
1027                 return InsetCollapsible::insetAllowed(code);
1028         }
1029 }
1030
1031
1032 /////////////////////////////////////////////////////////////////////
1033 //
1034 // InsetIndexParams
1035 //
1036 ///////////////////////////////////////////////////////////////////////
1037
1038
1039 void InsetIndexParams::write(ostream & os) const
1040 {
1041         os << ' ';
1042         if (!index.empty())
1043                 os << to_utf8(index);
1044         else
1045                 os << "idx";
1046         os << '\n';
1047         os << "range "
1048            << insetindexpagerangetranslator().find(range)
1049            << '\n';
1050         os << "pageformat "
1051            << pagefmt
1052            << '\n';
1053 }
1054
1055
1056 void InsetIndexParams::read(Lexer & lex)
1057 {
1058         if (lex.eatLine())
1059                 index = lex.getDocString();
1060         else
1061                 index = from_ascii("idx");
1062         if (lex.checkFor("range")) {
1063                 string st = lex.getString();
1064                 if (lex.eatLine()) {
1065                         st = lex.getString();
1066                         range = insetindexpagerangetranslator().find(lex.getString());
1067                 }
1068         }
1069         if (lex.checkFor("pageformat") && lex.eatLine()) {
1070                 pagefmt = lex.getString();
1071         }
1072 }
1073
1074
1075 /////////////////////////////////////////////////////////////////////
1076 //
1077 // InsetPrintIndex
1078 //
1079 ///////////////////////////////////////////////////////////////////////
1080
1081 InsetPrintIndex::InsetPrintIndex(Buffer * buf, InsetCommandParams const & p)
1082         : InsetCommand(buf, p)
1083 {}
1084
1085
1086 ParamInfo const & InsetPrintIndex::findInfo(string const & /* cmdName */)
1087 {
1088         static ParamInfo param_info_;
1089         if (param_info_.empty()) {
1090                 param_info_.add("type", ParamInfo::LATEX_OPTIONAL,
1091                                 ParamInfo::HANDLING_ESCAPE);
1092                 param_info_.add("name", ParamInfo::LATEX_OPTIONAL,
1093                                 ParamInfo::HANDLING_LATEXIFY);
1094                 param_info_.add("literal", ParamInfo::LYX_INTERNAL);
1095         }
1096         return param_info_;
1097 }
1098
1099
1100 docstring InsetPrintIndex::screenLabel() const
1101 {
1102         bool const printall = suffixIs(getCmdName(), '*');
1103         bool const multind = buffer().masterBuffer()->params().use_indices;
1104         if ((!multind
1105              && getParam("type") == from_ascii("idx"))
1106             || (getParam("type").empty() && !printall))
1107                 return _("Index");
1108         Buffer const & realbuffer = *buffer().masterBuffer();
1109         IndicesList const & indiceslist = realbuffer.params().indiceslist();
1110         Index const * index = indiceslist.findShortcut(getParam("type"));
1111         if (!index && !printall)
1112                 return _("Unknown index type!");
1113         docstring res = printall ? _("All indexes") : index->index();
1114         if (!multind)
1115                 res += " (" + _("non-active") + ")";
1116         else if (contains(getCmdName(), "printsubindex"))
1117                 res += " (" + _("subindex") + ")";
1118         return res;
1119 }
1120
1121
1122 bool InsetPrintIndex::isCompatibleCommand(string const & s)
1123 {
1124         return s == "printindex" || s == "printsubindex"
1125                 || s == "printindex*" || s == "printsubindex*";
1126 }
1127
1128
1129 void InsetPrintIndex::doDispatch(Cursor & cur, FuncRequest & cmd)
1130 {
1131         switch (cmd.action()) {
1132
1133         case LFUN_INSET_MODIFY: {
1134                 if (cmd.argument() == from_ascii("toggle-subindex")) {
1135                         string scmd = getCmdName();
1136                         if (contains(scmd, "printindex"))
1137                                 scmd = subst(scmd, "printindex", "printsubindex");
1138                         else
1139                                 scmd = subst(scmd, "printsubindex", "printindex");
1140                         cur.recordUndo();
1141                         setCmdName(scmd);
1142                         break;
1143                 } else if (cmd.argument() == from_ascii("check-printindex*")) {
1144                         string scmd = getCmdName();
1145                         if (suffixIs(scmd, '*'))
1146                                 break;
1147                         scmd += '*';
1148                         cur.recordUndo();
1149                         setParam("type", docstring());
1150                         setCmdName(scmd);
1151                         break;
1152                 }
1153                 InsetCommandParams p(INDEX_PRINT_CODE);
1154                 // FIXME UNICODE
1155                 InsetCommand::string2params(to_utf8(cmd.argument()), p);
1156                 if (p.getCmdName().empty()) {
1157                         cur.noScreenUpdate();
1158                         break;
1159                 }
1160                 cur.recordUndo();
1161                 setParams(p);
1162                 break;
1163         }
1164
1165         default:
1166                 InsetCommand::doDispatch(cur, cmd);
1167                 break;
1168         }
1169 }
1170
1171
1172 bool InsetPrintIndex::getStatus(Cursor & cur, FuncRequest const & cmd,
1173         FuncStatus & status) const
1174 {
1175         switch (cmd.action()) {
1176
1177         case LFUN_INSET_MODIFY: {
1178                 if (cmd.argument() == from_ascii("toggle-subindex")) {
1179                         status.setEnabled(buffer().masterBuffer()->params().use_indices);
1180                         status.setOnOff(contains(getCmdName(), "printsubindex"));
1181                         return true;
1182                 } else if (cmd.argument() == from_ascii("check-printindex*")) {
1183                         status.setEnabled(buffer().masterBuffer()->params().use_indices);
1184                         status.setOnOff(suffixIs(getCmdName(), '*'));
1185                         return true;
1186                 } if (cmd.getArg(0) == "index_print"
1187                     && cmd.getArg(1) == "CommandInset") {
1188                         InsetCommandParams p(INDEX_PRINT_CODE);
1189                         InsetCommand::string2params(to_utf8(cmd.argument()), p);
1190                         if (suffixIs(p.getCmdName(), '*')) {
1191                                 status.setEnabled(true);
1192                                 status.setOnOff(false);
1193                                 return true;
1194                         }
1195                         Buffer const & realbuffer = *buffer().masterBuffer();
1196                         IndicesList const & indiceslist =
1197                                 realbuffer.params().indiceslist();
1198                         Index const * index = indiceslist.findShortcut(p["type"]);
1199                         status.setEnabled(index != 0);
1200                         status.setOnOff(p["type"] == getParam("type"));
1201                         return true;
1202                 } else
1203                         return InsetCommand::getStatus(cur, cmd, status);
1204         }
1205
1206         case LFUN_INSET_DIALOG_UPDATE: {
1207                 status.setEnabled(buffer().masterBuffer()->params().use_indices);
1208                 return true;
1209         }
1210
1211         default:
1212                 return InsetCommand::getStatus(cur, cmd, status);
1213         }
1214 }
1215
1216
1217 void InsetPrintIndex::updateBuffer(ParIterator const &, UpdateType, bool const /*deleted*/)
1218 {
1219         Index const * index =
1220                 buffer().masterParams().indiceslist().findShortcut(getParam("type"));
1221         if (index)
1222                 setParam("name", index->index());
1223 }
1224
1225
1226 void InsetPrintIndex::latex(otexstream & os, OutputParams const & runparams_in) const
1227 {
1228         if (!buffer().masterBuffer()->params().use_indices) {
1229                 if (getParam("type") == from_ascii("idx"))
1230                         os << "\\printindex" << termcmd;
1231                 return;
1232         }
1233         OutputParams runparams = runparams_in;
1234         os << getCommand(runparams);
1235 }
1236
1237
1238 void InsetPrintIndex::validate(LaTeXFeatures & features) const
1239 {
1240         features.require("makeidx");
1241         if (buffer().masterBuffer()->params().use_indices)
1242                 features.require("splitidx");
1243         InsetCommand::validate(features);
1244 }
1245
1246
1247 string InsetPrintIndex::contextMenuName() const
1248 {
1249         return buffer().masterBuffer()->params().use_indices ?
1250                 "context-indexprint" : string();
1251 }
1252
1253
1254 bool InsetPrintIndex::hasSettings() const
1255 {
1256         return buffer().masterBuffer()->params().use_indices;
1257 }
1258
1259
1260 class IndexEntry
1261 {
1262 public:
1263         /// Builds an entry for the index.
1264         IndexEntry(const InsetIndex * inset, OutputParams const * runparams) : inset_(inset), runparams_(runparams)
1265         {
1266                 LASSERT(runparams, return);
1267
1268                 // Convert the inset as text. The resulting text usually only contains an XHTML anchor (<a id='...'/>) and text.
1269                 odocstringstream entry;
1270                 OutputParams ours = *runparams;
1271                 ours.for_toc = false;
1272                 inset_->plaintext(entry, ours);
1273                 entry_ = entry.str();
1274
1275                 // Determine in which index this entry belongs to.
1276                 if (inset_->buffer().masterBuffer()->params().use_indices) {
1277                         index_ = inset_->params_.index;
1278                 }
1279
1280                 // Attempt parsing the inset.
1281                 if (isModern())
1282                         parseAsModern();
1283                 else
1284                         parseAsLegacy();
1285         }
1286
1287         /// When parsing this entry, some errors may be found; they are reported as a single string.
1288         // It is up to the caller to send this string to LYXERR and the output file, as needed.
1289         const docstring & output_error() const
1290         {
1291                 return output_error_;
1292         }
1293
1294         void output_error(XMLStream xs) const
1295         {
1296                 LYXERR0(output_error());
1297                 xs << XMLStream::ESCAPE_NONE << (from_utf8("<!-- Output Error: ") + output_error() + from_utf8(" -->\n"));
1298         }
1299
1300
1301 private:
1302         bool isModern()
1303         {
1304                 std::cout << to_utf8(entry_) << std::endl;
1305
1306                 // If a modern parameter is present, this is definitely a modern index inset. Similarly, if it contains the
1307                 // usual LaTeX symbols (!|@), then it is definitely a legacy index inset. Otherwise, if it has features of
1308                 // neither, it is both: consider this is a modern inset, to trigger the least complex code. Mixing both types
1309                 // is not allowed (i.e. behaviour is undefined).
1310                 const bool is_definitely_modern = inset_->hasSortKey() || inset_->hasSeeRef() || inset_->hasSubentries()
1311                                             || inset_->params_.range != InsetIndexParams::PageRange::None;
1312                 const bool is_definitely_legacy = entry_.find('@') != std::string::npos
1313                                 || entry_.find('|') != std::string::npos || entry_.find('!') != std::string::npos;
1314
1315                 if (is_definitely_legacy && is_definitely_modern)
1316                         output_error_ += from_utf8("Mix of index properties and raw LaTeX index commands is unsupported. ");
1317
1318                 // Truth table:
1319                 // - is_definitely_modern == true:
1320                 //   - is_definitely_legacy == true: error (return whatever)
1321                 //   - is_definitely_legacy == false: return modern
1322                 // - is_definitely_modern == false:
1323                 //   - is_definitely_legacy == true: return legacy
1324                 //   - is_definitely_legacy == false: return modern
1325                 return !is_definitely_legacy;
1326         }
1327
1328         void parseAsModern()
1329         {
1330                 LASSERT(runparams_, return);
1331
1332                 if (inset_->hasSortKey()) {
1333                         sort_as_ = inset_->getSortkeyAsText(*runparams_);
1334                 }
1335
1336                 terms_ = inset_->getSubentriesAsText(*runparams_);
1337                 // The main term is not present in the vector, as it's not a subentry. The main index term is inserted raw in
1338                 // the index inset. Considering that the user either uses the new or the legacy mechanism, the main term is the
1339                 // full string within this inset (i.e. without the subinsets).
1340                 terms_.insert(terms_.begin(), inset_->getMainSubentryAsText(*runparams_));
1341
1342                 has_start_range_ = inset_->params_.range == InsetIndexParams::PageRange::Start;
1343                 has_end_range_ = inset_->params_.range == InsetIndexParams::PageRange::End;
1344
1345                 see_ = inset_->getSeeAsText(*runparams_);
1346                 see_alsoes_ = inset_->getSeeAlsoesAsText(*runparams_);
1347         }
1348
1349         void parseAsLegacy() {
1350                 // Determine if some features are known not to be supported. For now, this is only formatting like
1351                 // \index{alpha@\textbf{alpha}} or \index{alpha@$\alpha$}.
1352                 // @ is supported, but only for sorting, without specific formatting.
1353                 if (entry_.find(from_utf8("@\\")) != lyx::docstring::npos) {
1354                         output_error_ += from_utf8("Unsupported feature: an index entry contains an @\\. "
1355                                                    "Complete entry: \"") + entry_ + from_utf8("\". ");
1356                 }
1357                 if (entry_.find(from_utf8("@$")) != lyx::docstring::npos) {
1358                         output_error_ += from_utf8("Unsupported feature: an index entry contains an @$. "
1359                                                    "Complete entry: \"") + entry_ + from_utf8("\". ");
1360                 }
1361
1362                 // Split the string into its main constituents: terms, and command (see, see also, range).
1363                 size_t positionVerticalBar = entry_.find(from_ascii("|")); // What comes before | is (sub)(sub)entries.
1364                 docstring indexTerms = entry_.substr(0, positionVerticalBar);
1365                 docstring command;
1366                 if (positionVerticalBar != lyx::docstring::npos) {
1367                         command = entry_.substr(positionVerticalBar + 1);
1368                 }
1369
1370                 // Handle sorting issues, with @.
1371                 vector<docstring> sortingElements = getVectorFromString(indexTerms, from_ascii("@"), false);
1372                 if (sortingElements.size() == 2) {
1373                         sort_as_ = sortingElements[0];
1374                         indexTerms = sortingElements[1];
1375                 }
1376
1377                 // Handle entries, subentries, and subsubentries.
1378                 terms_ = getVectorFromString(indexTerms, from_ascii("!"), false);
1379
1380                 // Handle ranges. Happily, (| and |) can only be at the end of the string!
1381                 has_start_range_ = entry_.find(from_ascii("|(")) != lyx::docstring::npos;
1382                 has_end_range_ = entry_.find(from_ascii("|)")) != lyx::docstring::npos;
1383
1384                 // - Remove the ranges from the command if they do not appear at the beginning.
1385                 size_t range_index = 0;
1386                 while ((range_index = command.find(from_utf8("|("), range_index)) != std::string::npos)
1387                         command.erase(range_index, 1);
1388                 range_index = 0;
1389                 while ((range_index = command.find(from_utf8("|)"), range_index)) != std::string::npos)
1390                         command.erase(range_index, 1);
1391
1392                 // - Remove the ranges when they are the only vertical bar in the complete string.
1393                 if (command[0] == '(' || command[0] == ')')
1394                         command.erase(0, 1);
1395
1396                 // Handle see and seealso. As "see" is a prefix of "seealso", the order of the comparisons is important.
1397                 // Both commands are mutually exclusive!
1398                 if (command.substr(0, 3) == "see") {
1399                         // Unescape brackets.
1400                         size_t index_argument_begin = 0;
1401                         while ((index_argument_begin = command.find(from_utf8("\\{"), index_argument_begin)) != std::string::npos)
1402                                 command.erase(index_argument_begin, 1);
1403                         size_t index_argument_end = 0;
1404                         while ((index_argument_end = command.find(from_utf8("\\}"), index_argument_end)) != std::string::npos)
1405                                 command.erase(index_argument_end, 1);
1406
1407                         // Retrieve the part between brackets, and remove the complete seealso.
1408                         size_t position_opening_bracket = command.find(from_ascii("{"));
1409                         size_t position_closing_bracket = command.find(from_ascii("}"));
1410                         docstring argument = command.substr(position_opening_bracket + 1,
1411                                                                                                 position_closing_bracket - position_opening_bracket - 1);
1412
1413                         // Parse the argument of referenced entries (or a single one for see).
1414                         if (command.substr(0, 7) == "seealso") {
1415                                 see_alsoes_ = getVectorFromString(argument, from_ascii(","), false);
1416                         } else {
1417                                 see_ = argument;
1418
1419                                 if (see_.find(from_ascii(",")) != std::string::npos) {
1420                                         output_error_ += from_utf8("Several index_argument_end terms found as \"see\"! Only one is "
1421                                                                    "acceptable. Complete entry: \"") + entry_ + from_utf8("\". ");
1422                                 }
1423                         }
1424
1425                         // Remove the complete see/seealso from the commands, in case there is something else to parse.
1426                         command = command.substr(position_closing_bracket + 1);
1427                 }
1428
1429                 // Some parts of the strings are not parsed, as they do not have anything matching in DocBook or XHTML:
1430                 // things like formatting the entry or the page number, other strings for sorting.
1431                 // https://wiki.lyx.org/Tips/Indexing
1432                 // If there are such things in the index entry, then this code may miserably fail. For example, for
1433                 // "Peter|(textbf", no range will be detected.
1434                 if (!command.empty()) {
1435                         output_error_ += from_utf8("Unsupported feature: an index entry contains a | with an unsupported command, ")
1436                                          + command + from_utf8(". Complete entry: \"") + entry_ + from_utf8("\". ");
1437                 }
1438         }
1439
1440 public:
1441         int level() const {
1442                 return terms_.size();
1443         }
1444
1445         const std::vector<docstring>& terms() const {
1446                 return terms_;
1447         }
1448
1449         std::vector<docstring>& terms() {
1450                 return terms_;
1451         }
1452
1453         const InsetIndex* inset() const {
1454                 return inset_;
1455         }
1456
1457 private:
1458         // Input inset. These should only be used when parsing the inset (either parseAsModern or parseAsLegacy, called in
1459         // the constructor).
1460         const InsetIndex * inset_;
1461         OutputParams const * runparams_;
1462         docstring entry_;
1463         docstring index_; // Useful when there are multiple indices in the same document.
1464
1465         // Errors, concatenated as a single string, available as soon as parsing is done, const afterwards (i.e. once
1466         // constructor is done).
1467         docstring output_error_;
1468
1469         // Parsed index entry.
1470         std::vector<docstring> terms_; // Up to three entries, in general.
1471         docstring sort_as_;
1472         docstring command_;
1473         bool has_start_range_;
1474         bool has_end_range_;
1475         docstring see_;
1476         vector<docstring> see_alsoes_;
1477
1478         // Operators used for sorting entries (alphabetical order).
1479         friend bool operator<(IndexEntry const & lhs, IndexEntry const & rhs);
1480 };
1481
1482 bool operator<(IndexEntry const & lhs, IndexEntry const & rhs)
1483 {
1484         if (lhs.terms_.empty())
1485                 return false;
1486
1487         for (int i = 0; i < min(rhs.terms_.size(), lhs.terms_.size()); ++i) {
1488                 int comp = compare_no_case(lhs.terms_[i], rhs.terms_[i]);
1489                 if (comp != 0)
1490                         return comp < 0;
1491         }
1492         return false;
1493 }
1494
1495
1496 namespace {
1497 std::string generateCssClassAtDepth(unsigned depth) {
1498         std::string css_class = "entry";
1499
1500         while (depth > 0) {
1501                 depth -= 1;
1502                 css_class.insert(0, "sub");
1503         }
1504
1505         return css_class;
1506 }
1507
1508 struct IndexNode {
1509         std::vector<IndexEntry> entries;
1510         std::vector<IndexNode*> children;
1511 };
1512
1513 docstring termAtLevel(const IndexNode* node, unsigned depth)
1514 {
1515         // The typical entry has a depth of 1 to 3: the call stack would then be at most 4 (due to the root node). This
1516         // function could be made constant time by copying the term in each node, but that would make data duplication that
1517         // may fall out of sync; the performance benefit would probably be negligible.
1518         if (!node->entries.empty()) {
1519                 LASSERT(node->entries.begin()->terms().size() >= depth + 1, return from_ascii(""));
1520                 return node->entries.begin()->terms()[depth];
1521         }
1522
1523         if (!node->children.empty()) {
1524                 return termAtLevel(*node->children.begin(), depth);
1525         }
1526
1527         LASSERT(false, return from_ascii(""));
1528 }
1529
1530 void insertIntoNode(const IndexEntry& entry, IndexNode* node, unsigned depth = 0)
1531 {
1532         // depth == 0 is for the root, not yet the index, hence the increase when going to vector size.
1533         for (IndexNode* child : node->children) {
1534                 if (entry.terms()[depth] == termAtLevel(child, depth)) {
1535                         if (depth + 1 == entry.terms().size()) { // == child.entries.begin()->terms().size()
1536                                 // All term entries match: it's an entry.
1537                                 child->entries.emplace_back(entry);
1538                                 return;
1539                         } else {
1540                                 insertIntoNode(entry, child, depth + 1);
1541                                 return;
1542                         }
1543                 }
1544         }
1545
1546         // Out of the loop: no matching child found, create a new (possibly nested) child for this entry. Due to the
1547         // possibility of nestedness, only insert the current entry when the right level is reached. This is needed if the
1548         // first entry for a word has several levels that never appeared.
1549         // In particular, this case is called for the first entry.
1550         IndexNode* new_node = node;
1551         do {
1552                 new_node->children.emplace_back(new IndexNode{{}, {}});
1553                 new_node = new_node->children.back();
1554                 depth += 1;
1555         } while (depth + 1 <= entry.terms().size()); // depth == 0: root node, no text associated.
1556         new_node->entries.emplace_back(entry);
1557 }
1558
1559 IndexNode* buildIndexTree(vector<IndexEntry>& entries)
1560 {
1561         // Sort the entries, first on the main entry, then the subentry, then the subsubentry,
1562         // thanks to the implementation of operator<.
1563         // If this operation is not performed, the algorithm below is no more correct (and ensuring that it works with
1564         // unsorted entries would make its complexity blow up).
1565         stable_sort(entries.begin(), entries.end());
1566
1567         // Cook the index into a nice tree data structure: entries at a given level in the index as a node, with subentries
1568         // as children.
1569         auto* index_root = new IndexNode{{}, {}};
1570         for (const IndexEntry& entry : entries) {
1571                 insertIntoNode(entry, index_root);
1572         }
1573
1574         return index_root;
1575 }
1576
1577 void outputIndexPage(XMLStream & xs, const IndexNode* root_node, unsigned depth = 0)
1578 {
1579         LASSERT(root_node->entries.size() + root_node->children.size() > 0, return);
1580
1581         xs << xml::StartTag("li", "class='" + generateCssClassAtDepth(depth) + "'");
1582         xs << xml::CR();
1583         xs << XMLStream::ESCAPE_NONE << termAtLevel(root_node, depth);
1584         // By tree assumption, all the entries at this node have the same set of terms.
1585
1586         if (!root_node->entries.empty()) {
1587                 xs << XMLStream::ESCAPE_NONE << " &#8212; ";
1588                 unsigned entry_number = 1;
1589
1590                 for (unsigned i = 0; i < root_node->entries.size(); ++i) {
1591                         const IndexEntry &entry = root_node->entries[i];
1592
1593                         std::string const link_attr = "href='#" + entry.inset()->paragraphs()[0].magicLabel() + "'";
1594                         xs << xml::StartTag("a", link_attr);
1595                         xs << from_ascii(std::to_string(entry_number));
1596                         xs << xml::EndTag("a");
1597
1598                         if (i < root_node->entries.size() - 1) {
1599                                 xs << ", ";
1600                         }
1601                         entry_number += 1;
1602                 }
1603         }
1604
1605         if (!root_node->entries.empty() && !root_node->children.empty()) {
1606                 xs << xml::CR();
1607         }
1608
1609         if (!root_node->children.empty()) {
1610                 xs << xml::StartTag("ul", "class='" + generateCssClassAtDepth(depth) + "'");
1611                 xs << xml::CR();
1612
1613                 for (const IndexNode* child : root_node->children) {
1614                         outputIndexPage(xs, child, depth + 1);
1615                 }
1616
1617                 xs << xml::EndTag("ul");
1618                 xs << xml::CR();
1619         }
1620
1621         xs << xml::EndTag("li");
1622         xs << xml::CR();
1623 }
1624
1625 // Only useful for debugging.
1626 void printTree(const IndexNode* root_node, unsigned depth = 0)
1627 {
1628         static const std::string pattern = "    ";
1629         std::string prefix;
1630         for (unsigned i = 0; i < depth; ++i) {
1631                 prefix += pattern;
1632         }
1633         const std::string prefix_long = prefix + pattern + pattern;
1634
1635         docstring term_at_level;
1636         if (depth == 0) {
1637                 // The root has no term.
1638                 std::cout << "<ROOT>" << std::endl;
1639         } else {
1640                 LASSERT(depth - 1 <= 10, return); // Check for overflows.
1641                 term_at_level = termAtLevel(root_node, depth - 1);
1642                 std::cout << prefix << to_utf8(term_at_level) << " (x " << std::to_string(root_node->entries.size()) << ")"
1643                           << std::endl;
1644         }
1645
1646         for (const IndexEntry& entry : root_node->entries) {
1647                 if (entry.terms().size() != depth) {
1648                         std::cout << prefix_long << "ERROR: an entry doesn't have the same number of terms" << std::endl;
1649                 }
1650                 if (depth > 0 && entry.terms()[depth - 1] != term_at_level) {
1651                         std::cout << prefix_long << "ERROR: an entry doesn't have the right term at depth " << std::to_string(depth)
1652                                 << std::endl;
1653                 }
1654         }
1655
1656         for (const IndexNode* node : root_node->children) {
1657                 printTree(node, depth + 1);
1658         }
1659 }
1660 }
1661
1662
1663 docstring InsetPrintIndex::xhtml(XMLStream &, OutputParams const & op) const
1664 {
1665         BufferParams const & bp = buffer().masterBuffer()->params();
1666
1667         // we do not presently support multiple indices, so we refuse to print
1668         // anything but the main index, so as not to generate multiple indices.
1669         // NOTE Multiple index support would require some work. The reason
1670         // is that the TOC does not know about multiple indices. Either it would
1671         // need to be told about them (not a bad idea), or else the index entries
1672         // would need to be collected differently, say, during validation.
1673         if (bp.use_indices && getParam("type") != from_ascii("idx"))
1674                 return docstring();
1675
1676         shared_ptr<Toc const> toc = buffer().tocBackend().toc("index");
1677         if (toc->empty())
1678                 return docstring();
1679
1680         // Collect the index entries in a form we can use them.
1681         vector<IndexEntry> entries;
1682         for (const TocItem& item : *toc) {
1683                 if (item.isOutput())
1684                         entries.emplace_back(IndexEntry{static_cast<const InsetIndex*>(&(item.dit().inset())), &op});
1685         }
1686
1687         // If all the index entries are in notes or not displayed, get out sooner.
1688         if (entries.empty())
1689                 return docstring();
1690
1691         const IndexNode* index_root = buildIndexTree(entries);
1692 #if 0
1693         printTree(index_root);
1694 #endif
1695
1696         // Start generating the XHTML index.
1697         Layout const & lay = bp.documentClass().htmlTOCLayout();
1698         string const & tocclass = lay.defaultCSSClass();
1699         string const tocattr = "class='index " + tocclass + "'";
1700
1701         // we'll use our own stream, because we are going to defer everything.
1702         // that's how we deal with the fact that we're probably inside a standard
1703         // paragraph, and we don't want to be.
1704         odocstringstream ods;
1705         XMLStream xs(ods);
1706
1707         xs << xml::StartTag("div", tocattr);
1708         xs << xml::CR();
1709         xs << xml::StartTag(lay.htmltag(), lay.htmlattr());
1710         xs << translateIfPossible(from_ascii("Index"), op.local_font->language()->lang());
1711         xs << xml::EndTag(lay.htmltag());
1712         xs << xml::CR();
1713         xs << xml::StartTag("ul", "class='main'");
1714         xs << xml::CR();
1715
1716         LASSERT(index_root->entries.empty(), return docstring()); // No index entry should have zero terms.
1717         for (const IndexNode* node : index_root->children) {
1718                 outputIndexPage(xs, node);
1719         }
1720
1721         xs << xml::EndTag("ul");
1722         xs << xml::CR();
1723         xs << xml::EndTag("div");
1724
1725         return ods.str();
1726 }
1727
1728 } // namespace lyx