]> git.lyx.org Git - lyx.git/blob - src/insets/InsetIndex.cpp
5f4cb7e93fc49ab81d4738118603d9dfb15d46cb
[lyx.git] / src / insets / InsetIndex.cpp
1 /**
2  * \file InsetIndex.cpp
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author Lars Gullik Bjønnes
7  * \author Jürgen Spitzmüller
8  *
9  * Full author contact details are available in file CREDITS.
10  */
11 #include <config.h>
12
13 #include "InsetIndex.h"
14 #include "InsetIndexMacro.h"
15
16 #include "Buffer.h"
17 #include "BufferParams.h"
18 #include "BufferView.h"
19 #include "ColorSet.h"
20 #include "Cursor.h"
21 #include "DispatchResult.h"
22 #include "Encoding.h"
23 #include "ErrorList.h"
24 #include "FuncRequest.h"
25 #include "FuncStatus.h"
26 #include "IndicesList.h"
27 #include "InsetList.h"
28 #include "Language.h"
29 #include "LaTeX.h"
30 #include "LaTeXFeatures.h"
31 #include "Lexer.h"
32 #include "LyX.h"
33 #include "output_latex.h"
34 #include "output_xhtml.h"
35 #include "xml.h"
36 #include "texstream.h"
37 #include "TextClass.h"
38 #include "TocBackend.h"
39
40 #include "support/debug.h"
41 #include "support/docstream.h"
42 #include "support/FileName.h"
43 #include "support/gettext.h"
44 #include "support/lstrings.h"
45 #include "support/Translator.h"
46
47 #include "frontends/alert.h"
48
49 #include <algorithm>
50 #include <set>
51 #include <iostream>
52
53 #include <QThreadStorage>
54
55 using namespace std;
56 using namespace lyx::support;
57
58 namespace lyx {
59
60 namespace {
61
62 typedef Translator<string, InsetIndexParams::PageRange> PageRangeTranslator;
63 typedef Translator<docstring, InsetIndexParams::PageRange> PageRangeTranslatorLoc;
64
65 PageRangeTranslator const init_insetindexpagerangetranslator()
66 {
67         PageRangeTranslator translator("none", InsetIndexParams::None);
68         translator.addPair("start", InsetIndexParams::Start);
69         translator.addPair("end", InsetIndexParams::End);
70         return translator;
71 }
72
73 PageRangeTranslator const init_insetindexpagerangetranslator_latex()
74 {
75         PageRangeTranslator translator("", InsetIndexParams::None);
76         translator.addPair("(", InsetIndexParams::Start);
77         translator.addPair(")", InsetIndexParams::End);
78         return translator;
79 }
80
81
82 PageRangeTranslatorLoc const init_insetindexpagerangetranslator_loc()
83 {
84         PageRangeTranslatorLoc translator(docstring(), InsetIndexParams::None);
85         translator.addPair(_("Starts page range"), InsetIndexParams::Start);
86         translator.addPair(_("Ends page range"), InsetIndexParams::End);
87         return translator;
88 }
89
90
91 PageRangeTranslator const & insetindexpagerangetranslator()
92 {
93         static PageRangeTranslator const prtranslator =
94                         init_insetindexpagerangetranslator();
95         return prtranslator;
96 }
97
98
99 PageRangeTranslatorLoc const & insetindexpagerangetranslator_loc()
100 {
101         static PageRangeTranslatorLoc const translator =
102                         init_insetindexpagerangetranslator_loc();
103         return translator;
104 }
105
106
107 PageRangeTranslator const & insetindexpagerangetranslator_latex()
108 {
109         static PageRangeTranslator const lttranslator =
110                         init_insetindexpagerangetranslator_latex();
111         return lttranslator;
112 }
113
114 } // namespace anon
115
116 /////////////////////////////////////////////////////////////////////
117 //
118 // InsetIndex
119 //
120 ///////////////////////////////////////////////////////////////////////
121
122
123 InsetIndex::InsetIndex(Buffer * buf, InsetIndexParams const & params)
124         : InsetCollapsible(buf), params_(params)
125 {}
126
127
128 void InsetIndex::latex(otexstream & ios, OutputParams const & runparams_in) const
129 {
130         OutputParams runparams(runparams_in);
131         runparams.inIndexEntry = true;
132
133         otexstringstream os;
134
135         if (buffer().masterBuffer()->params().use_indices && !params_.index.empty()
136                 && params_.index != "idx") {
137                 os << "\\sindex[";
138                 os << escape(params_.index);
139                 os << "]{";
140         } else {
141                 os << "\\index";
142                 os << '{';
143         }
144
145         // Get the LaTeX output from InsetText. We need to deconstruct this later
146         // in order to check if we need to generate a sorting key
147         odocstringstream ourlatex;
148         otexstream ots(ourlatex);
149         InsetText::latex(ots, runparams);
150         if (runparams.find_effective()) {
151                 // No need for special handling, if we are only searching for some patterns
152                 os << ourlatex.str() << "}";
153                 return;
154         }
155
156         if (hasSortKey()) {
157                 getSortkey(os, runparams);
158                 os << "@";
159                 os << ourlatex.str();
160                 getSubentries(os, runparams);
161                 if (hasSeeRef()) {
162                         os << "|";
163                         os << insetindexpagerangetranslator_latex().find(params_.range);
164                         getSeeRefs(os, runparams);
165                 } else if (!params_.pagefmt.empty() && params_.pagefmt != "default") {
166                         os << "|";
167                         os << insetindexpagerangetranslator_latex().find(params_.range);
168                         os << from_utf8(params_.pagefmt);
169                 }
170         } else {
171                 // We check whether we need a sort key.
172                 // If so, we use the plaintext version
173                 odocstringstream ourplain;
174                 InsetText::plaintext(ourplain, runparams);
175
176                 // These are the LaTeX and plaintext representations
177                 docstring latexstr = ourlatex.str();
178                 docstring plainstr = ourplain.str();
179         
180                 // This will get what follows | if anything does,
181                 // the command (e.g., see, textbf) for pagination
182                 // formatting
183                 docstring cmd;
184
185                 if (hasSeeRef()) {
186                         odocstringstream seeref;
187                         otexstream otsee(seeref);
188                         getSeeRefs(otsee, runparams);
189                         cmd = seeref.str();
190                 } else if (!params_.pagefmt.empty() && params_.pagefmt != "default") {
191                         cmd = from_utf8(params_.pagefmt);
192                 } else {
193                         // Check for the | separator to strip the cmd.
194                         // This goes wrong on an escaped "|", but as the escape
195                         // character can be changed in style files, we cannot
196                         // prevent that.
197                         size_t pos = latexstr.find(from_ascii("|"));
198                         if (pos != docstring::npos) {
199                                 // Put the bit after "|" into cmd...
200                                 cmd = latexstr.substr(pos + 1);
201                                 // ...and erase that stuff from latexstr
202                                 latexstr = latexstr.erase(pos);
203                                 // ...as well as from plainstr
204                                 size_t ppos = plainstr.find(from_ascii("|"));
205                                 if (ppos < plainstr.size())
206                                         plainstr.erase(ppos);
207                                 else
208                                         LYXERR0("The `|' separator was not found in the plaintext version!");
209                         }
210                 }
211
212                 odocstringstream subentries;
213                 otexstream otsub(subentries);
214                 getSubentries(otsub, runparams);
215                 if (subentries.str().empty()) {
216                         // Separate the entries and subentries, i.e., split on "!".
217                         // This goes wrong on an escaped "!", but as the escape
218                         // character can be changed in style files, we cannot
219                         // prevent that.
220                         std::vector<docstring> const levels =
221                                         getVectorFromString(latexstr, from_ascii("!"), true);
222                         std::vector<docstring> const levels_plain =
223                                         getVectorFromString(plainstr, from_ascii("!"), true);
224                 
225                         vector<docstring>::const_iterator it = levels.begin();
226                         vector<docstring>::const_iterator end = levels.end();
227                         vector<docstring>::const_iterator it2 = levels_plain.begin();
228                         bool first = true;
229                         for (; it != end; ++it) {
230                                 // The separator needs to be put back when
231                                 // writing the levels, except for the first level
232                                 if (!first)
233                                         os << '!';
234                                 else
235                                         first = false;
236                 
237                                 // Now here comes the reason for this whole procedure:
238                                 // We try to correctly sort macros and formatted strings.
239                                 // If we find a command, prepend a plain text
240                                 // version of the content to get sorting right,
241                                 // e.g. \index{LyX@\LyX}, \index{text@\textbf{text}}.
242                                 // We do this on all levels.
243                                 // We don't do it if the level already contains a '@', though.
244                                 // Plaintext might return nothing (e.g. for ERTs).
245                                 // In that case, we use LaTeX.
246                                 docstring const spart = (levels_plain.empty() || (*it2).empty()) ? *it : *it2;
247                                 processLatexSorting(os, runparams, *it, spart);
248                                 if (it2 < levels_plain.end())
249                                         ++it2;
250                         }
251                 } else {
252                         processLatexSorting(os, runparams, latexstr, plainstr);
253                         os << subentries.str();
254                 }
255
256                 // At last, re-insert the command, separated by "|"
257                 if (!cmd.empty()) {
258                         os << "|"
259                            << insetindexpagerangetranslator_latex().find(params_.range)
260                            << cmd;
261                 }
262         }
263         os << '}';
264
265         // In macros with moving arguments, such as \section,
266         // we store the index and output it after the macro (#2154)
267         if (runparams_in.postpone_fragile_stuff)
268                 runparams_in.post_macro += os.str();
269         else
270                 ios << os.release();
271 }
272
273
274 void InsetIndex::processLatexSorting(otexstream & os, OutputParams const & runparams,
275                                 docstring const latex, docstring const spart) const
276 {
277         if (contains(latex, '\\') && !contains(latex, '@')) {
278                 // Now we need to validate that all characters in
279                 // the sorting part are representable in the current
280                 // encoding. If not try the LaTeX macro which might
281                 // or might not be a good choice, and issue a warning.
282                 pair<docstring, docstring> spart_latexed =
283                                 runparams.encoding->latexString(spart, runparams.dryrun);
284                 if (!spart_latexed.second.empty())
285                         LYXERR0("Uncodable character in index entry. Sorting might be wrong!");
286                 if (spart != spart_latexed.first && !runparams.dryrun) {
287                         TeXErrors terr;
288                         ErrorList & errorList = buffer().errorList("Export");
289                         docstring const s = bformat(_("LyX's automatic index sorting algorithm faced "
290                                                       "problems with the entry '%1$s'.\n"
291                                                       "Please specify the sorting of this entry manually, as "
292                                                       "explained in the User Guide."), spart);
293                         Paragraph const & par = buffer().paragraphs().front();
294                         errorList.push_back(ErrorItem(_("Index sorting failed"), s,
295                                                       {par.id(), 0}, {par.id(), -1}));
296                         buffer().bufferErrors(terr, errorList);
297                 }
298                 // Remove remaining \'s from the sort key
299                 docstring ppart = subst(spart_latexed.first, from_ascii("\\"), docstring());
300                 // Plain quotes need to be escaped, however (#10649), as this
301                 // is the default escape character
302                 ppart = subst(ppart, from_ascii("\""), from_ascii("\\\""));
303
304                 // Now insert the sortkey, separated by '@'.
305                 os << ppart;
306                 os << '@';
307         }
308         // Insert the actual level text
309         os << latex;
310 }
311
312
313 void InsetIndex::docbook(XMLStream & xs, OutputParams const & runparams) const
314 {
315         // Two ways of processing this inset are implemented:
316         // - the legacy one, based on parsing the raw LaTeX (before LyX 2.4) -- unlikely to be deprecated
317         // - the modern one, based on precise insets for indexing features
318         // Like the LaTeX implementation, consider the user chooses either of those options.
319
320         // Get the content of the inset as LaTeX, as some things may be encoded as ERT (like {}).
321         // TODO: if there is an ERT within the index term, its conversion should be tried, in case it becomes useful;
322         //  otherwise, ERTs should become comments. For now, they are just copied as-is, which is barely satisfactory.
323         odocstringstream odss;
324         otexstream ots(odss);
325         InsetText::latex(ots, runparams);
326         docstring latexString = trim(odss.str());
327
328         // Handle several indices (indicated in the inset instead of the raw latexString).
329         docstring indexType = from_utf8("");
330         if (buffer().masterBuffer()->params().use_indices) {
331                 indexType += " type=\"" + params_.index + "\"";
332         }
333
334         // Split the string into its main constituents: terms, and command (see, see also, range).
335         size_t positionVerticalBar = latexString.find(from_ascii("|")); // What comes before | is (sub)(sub)entries.
336         docstring indexTerms = latexString.substr(0, positionVerticalBar);
337         docstring command;
338         if (positionVerticalBar != lyx::docstring::npos) {
339                 command = latexString.substr(positionVerticalBar + 1);
340         }
341
342         // Handle sorting issues, with @.
343         docstring sortAs;
344         if (hasSortKey()) {
345                 sortAs = getSortkeyAsText(runparams);
346                 // indexTerms may contain a sort key if the user has both the inset and the manual key.
347         } else {
348                 vector<docstring> sortingElements = getVectorFromString(indexTerms, from_ascii("@"), false);
349                 if (sortingElements.size() == 2) {
350                         sortAs = sortingElements[0];
351                         indexTerms = sortingElements[1];
352                 }
353         }
354
355         // Handle primary, secondary, and tertiary terms (entries, subentries, and subsubentries, for LaTeX).
356         vector<docstring> terms;
357         if (const vector<docstring> potential_terms = getSubentriesAsText(runparams); !potential_terms.empty()) {
358                 terms = potential_terms;
359                 // The main term is not present in the vector, as it's not a subentry. The main index term is inserted raw in
360                 // the index inset. Considering that the user either uses the new or the legacy mechanism, the main term is the
361                 // full string within this inset (i.e. without the subinsets).
362                 terms.insert(terms.begin(), latexString);
363         } else {
364                 terms = getVectorFromString(indexTerms, from_ascii("!"), false);
365         }
366
367         // Handle ranges. Happily, in the raw LaTeX mode, (| and |) can only be at the end of the string!
368         const bool hasInsetRange = params_.range != InsetIndexParams::PageRange::None;
369         const bool hasStartRange = params_.range == InsetIndexParams::PageRange::Start ||
370                         latexString.find(from_ascii("|(")) != lyx::docstring::npos;
371         const bool hasEndRange = params_.range == InsetIndexParams::PageRange::End ||
372                         latexString.find(from_ascii("|)")) != lyx::docstring::npos;
373
374         if (hasInsetRange) {
375                 // Remove the ranges from the command if they do not appear at the beginning.
376                 size_t index = 0;
377                 while ((index = command.find(from_utf8("|("), index)) != std::string::npos)
378                         command.erase(index, 1);
379                 index = 0;
380                 while ((index = command.find(from_utf8("|)"), index)) != std::string::npos)
381                         command.erase(index, 1);
382
383                 // Remove the ranges when they are the only vertical bar in the complete string.
384                 if (command[0] == '(' || command[0] == ')')
385                         command.erase(0, 1);
386         }
387
388         // Handle see and seealso. As "see" is a prefix of "seealso", the order of the comparisons is important.
389         // Both commands are mutually exclusive!
390         docstring see = getSeeAsText(runparams);
391         vector<docstring> seeAlsoes = getSeeAlsoesAsText(runparams);
392
393         if (see.empty() && seeAlsoes.empty() && command.substr(0, 3) == "see") {
394                 // Unescape brackets.
395                 size_t index = 0;
396                 while ((index = command.find(from_utf8("\\{"), index)) != std::string::npos)
397                         command.erase(index, 1);
398                 index = 0;
399                 while ((index = command.find(from_utf8("\\}"), index)) != std::string::npos)
400                         command.erase(index, 1);
401
402                 // Retrieve the part between brackets, and remove the complete seealso.
403                 size_t positionOpeningBracket = command.find(from_ascii("{"));
404                 size_t positionClosingBracket = command.find(from_ascii("}"));
405                 docstring list = command.substr(positionOpeningBracket + 1, positionClosingBracket - positionOpeningBracket - 1);
406
407                 // Parse the list of referenced entries (or a single one for see).
408                 if (command.substr(0, 7) == "seealso") {
409                         seeAlsoes = getVectorFromString(list, from_ascii(","), false);
410                 } else {
411                         see = list;
412
413                         if (see.find(from_ascii(",")) != std::string::npos) {
414                                 docstring error = from_utf8("Several index terms found as \"see\"! Only one is acceptable. "
415                                                                                         "Complete entry: \"") + latexString + from_utf8("\"");
416                                 LYXERR0(error);
417                                 xs << XMLStream::ESCAPE_NONE << (from_utf8("<!-- Output Error: ") + error + from_utf8(" -->\n"));
418                         }
419                 }
420
421                 // Remove the complete see/seealso from the commands, in case there is something else to parse.
422                 command = command.substr(positionClosingBracket + 1);
423         }
424
425         // Some parts of the strings are not parsed, as they do not have anything matching in DocBook: things like
426         // formatting the entry or the page number, other strings for sorting. https://wiki.lyx.org/Tips/Indexing
427         // If there are such things in the index entry, then this code may miserably fail. For example, for "Peter|(textbf",
428         // no range will be detected.
429         // TODO: Could handle formatting as significance="preferred"?
430         if (!command.empty()) {
431                 docstring error = from_utf8("Unsupported feature: an index entry contains a | with an unsupported command, ")
432                                           + command + from_utf8(". ") + from_utf8("Complete entry: \"") + latexString + from_utf8("\"");
433                 LYXERR0(error);
434                 xs << XMLStream::ESCAPE_NONE << (from_utf8("<!-- Output Error: ") + error + from_utf8(" -->\n"));
435         }
436
437         // Write all of this down.
438         if (terms.empty() && !hasEndRange) {
439                 docstring error = from_utf8("No index term found! Complete entry: \"") + latexString + from_utf8("\"");
440                 LYXERR0(error);
441                 xs << XMLStream::ESCAPE_NONE << (from_utf8("<!-- Output Error: ") + error + from_utf8(" -->\n"));
442         } else {
443                 // Generate the attributes for ranges. It is based on the terms that are indexed, but the ID must be unique
444                 // to this indexing area (xml::cleanID does not guarantee this: for each call with the same arguments,
445                 // the same legal ID is produced; here, as the input would be the same, the output must be, by design).
446                 // Hence the thread-local storage, as the numbers must strictly be unique, and thus cannot be shared across
447                 // a paragraph (making the solution used for HTML worthless). This solution is very similar to the one used in
448                 // xml::cleanID.
449                 // indexType can only be used for singular and startofrange types!
450                 docstring attrs;
451                 if (!hasStartRange && !hasEndRange) {
452                         attrs = indexType;
453                 } else {
454                         // Append an ID if uniqueness is not guaranteed across the document.
455                         static QThreadStorage<set<docstring>> tKnownTermLists;
456                         static QThreadStorage<int> tID;
457
458                         set<docstring> &knownTermLists = tKnownTermLists.localData();
459                         int &ID = tID.localData();
460
461                         if (!tID.hasLocalData()) {
462                                 tID.localData() = 0;
463                         }
464
465                         // Modify the index terms to add the unique ID if needed.
466                         docstring newIndexTerms = indexTerms;
467                         if (knownTermLists.find(indexTerms) != knownTermLists.end()) {
468                                 newIndexTerms += from_ascii(string("-") + to_string(ID));
469
470                                 // Only increment for the end of range, so that the same number is used for the start of range.
471                                 if (hasEndRange) {
472                                         ID++;
473                                 }
474                         }
475
476                         // Term list not yet known: add it to the set AFTER the end of range. After
477                         if (knownTermLists.find(indexTerms) == knownTermLists.end() && hasEndRange) {
478                                 knownTermLists.insert(indexTerms);
479                         }
480
481                         // Generate the attributes.
482                         docstring id = xml::cleanID(newIndexTerms);
483                         if (hasStartRange) {
484                                 attrs = indexType + " class=\"startofrange\" xml:id=\"" + id + "\"";
485                         } else {
486                                 attrs = " class=\"endofrange\" startref=\"" + id + "\"";
487                         }
488                 }
489
490                 // Handle the index terms (including the specific index for this entry).
491                 if (hasEndRange) {
492                         xs << xml::CompTag("indexterm", attrs);
493                 } else {
494                         xs << xml::StartTag("indexterm", attrs);
495                         if (!terms.empty()) { // hasEndRange has no content.
496                                 docstring attr;
497                                 if (!sortAs.empty()) {
498                                         attr = from_utf8("sortas='") + sortAs + from_utf8("'");
499                                 }
500
501                                 xs << xml::StartTag("primary", attr);
502                                 xs << terms[0];
503                                 xs << xml::EndTag("primary");
504                         }
505                         if (terms.size() > 1) {
506                                 xs << xml::StartTag("secondary");
507                                 xs << terms[1];
508                                 xs << xml::EndTag("secondary");
509                         }
510                         if (terms.size() > 2) {
511                                 xs << xml::StartTag("tertiary");
512                                 xs << terms[2];
513                                 xs << xml::EndTag("tertiary");
514                         }
515
516                         // Handle see and see also.
517                         if (!see.empty()) {
518                                 xs << xml::StartTag("see");
519                                 xs << see;
520                                 xs << xml::EndTag("see");
521                         }
522
523                         if (!seeAlsoes.empty()) {
524                                 for (auto &entry : seeAlsoes) {
525                                         xs << xml::StartTag("seealso");
526                                         xs << entry;
527                                         xs << xml::EndTag("seealso");
528                                 }
529                         }
530
531                         // Close the entry.
532                         xs << xml::EndTag("indexterm");
533                 }
534         }
535 }
536
537
538 docstring InsetIndex::xhtml(XMLStream & xs, OutputParams const &) const
539 {
540         // we just print an anchor, taking the paragraph ID from
541         // our own interior paragraph, which doesn't get printed
542         std::string const magic = paragraphs().front().magicLabel();
543         std::string const attr = "id='" + magic + "'";
544         xs << xml::CompTag("a", attr);
545         return docstring();
546 }
547
548
549 bool InsetIndex::showInsetDialog(BufferView * bv) const
550 {
551         bv->showDialog("index", params2string(params_),
552                         const_cast<InsetIndex *>(this));
553         return true;
554 }
555
556
557 void InsetIndex::doDispatch(Cursor & cur, FuncRequest & cmd)
558 {
559         switch (cmd.action()) {
560
561         case LFUN_INSET_MODIFY: {
562                 if (cmd.getArg(0) == "changetype") {
563                         cur.recordUndoInset(this);
564                         params_.index = from_utf8(cmd.getArg(1));
565                         break;
566                 }
567                 InsetIndexParams params;
568                 InsetIndex::string2params(to_utf8(cmd.argument()), params);
569                 cur.recordUndoInset(this);
570                 params_.index = params.index;
571                 params_.range = params.range;
572                 params_.pagefmt = params.pagefmt;
573                 // what we really want here is a TOC update, but that means
574                 // a full buffer update
575                 cur.forceBufferUpdate();
576                 break;
577         }
578
579         case LFUN_INSET_DIALOG_UPDATE:
580                 cur.bv().updateDialog("index", params2string(params_));
581                 break;
582
583         case LFUN_PARAGRAPH_BREAK: {
584                 // Since this inset in single-par anyway, let's use
585                 // return to enter subindexes
586                 FuncRequest fr(LFUN_INDEXMACRO_INSERT, "subindex");
587                 lyx::dispatch(fr);
588                 break;
589         }
590
591         default:
592                 InsetCollapsible::doDispatch(cur, cmd);
593                 break;
594         }
595 }
596
597
598 bool InsetIndex::getStatus(Cursor & cur, FuncRequest const & cmd,
599                 FuncStatus & flag) const
600 {
601         switch (cmd.action()) {
602
603         case LFUN_INSET_MODIFY:
604                 if (cmd.getArg(0) == "changetype") {
605                         docstring const newtype = from_utf8(cmd.getArg(1));
606                         Buffer const & realbuffer = *buffer().masterBuffer();
607                         IndicesList const & indiceslist = realbuffer.params().indiceslist();
608                         Index const * index = indiceslist.findShortcut(newtype);
609                         flag.setEnabled(index != 0);
610                         flag.setOnOff(
611                                 from_utf8(cmd.getArg(1)) == params_.index);
612                         return true;
613                 }
614                 return InsetCollapsible::getStatus(cur, cmd, flag);
615
616         case LFUN_INSET_DIALOG_UPDATE: {
617                 Buffer const & realbuffer = *buffer().masterBuffer();
618                 flag.setEnabled(realbuffer.params().use_indices);
619                 return true;
620         }
621         
622         case LFUN_PARAGRAPH_BREAK:
623                 return macrosPossible("subindex");
624         
625         case LFUN_INDEXMACRO_INSERT:
626                 return macrosPossible(cmd.getArg(0));
627
628         default:
629                 return InsetCollapsible::getStatus(cur, cmd, flag);
630         }
631 }
632
633
634 void InsetIndex::getSortkey(otexstream & os, OutputParams const & runparams) const
635 {
636         Paragraph const & par = paragraphs().front();
637         InsetList::const_iterator it = par.insetList().begin();
638         for (; it != par.insetList().end(); ++it) {
639                 Inset & inset = *it->inset;
640                 if (inset.lyxCode() == INDEXMACRO_SORTKEY_CODE) {
641                         InsetIndexMacro const & iim =
642                                 static_cast<InsetIndexMacro const &>(inset);
643                         iim.getLatex(os, runparams);
644                         return;
645                 }
646         }
647 }
648
649
650 docstring InsetIndex::getSortkeyAsText(OutputParams const & runparams) const
651 {
652         Paragraph const & par = paragraphs().front();
653         InsetList::const_iterator it = par.insetList().begin();
654         for (; it != par.insetList().end(); ++it) {
655                 Inset & inset = *it->inset;
656                 if (inset.lyxCode() == INDEXMACRO_SORTKEY_CODE) {
657                         otexstringstream os;
658                         InsetIndexMacro const & iim =
659                                 static_cast<InsetIndexMacro const &>(inset);
660                         iim.getLatex(os, runparams);
661                         return os.str();
662                 }
663         }
664         return from_ascii("");
665 }
666
667
668 void InsetIndex::getSubentries(otexstream & os, OutputParams const & runparams) const
669 {
670         Paragraph const & par = paragraphs().front();
671         InsetList::const_iterator it = par.insetList().begin();
672         int i = 0;
673         for (; it != par.insetList().end(); ++it) {
674                 Inset & inset = *it->inset;
675                 if (inset.lyxCode() == INDEXMACRO_CODE) {
676                         InsetIndexMacro const & iim =
677                                 static_cast<InsetIndexMacro const &>(inset);
678                         if (iim.params().type == InsetIndexMacroParams::Subindex) {
679                                 ++i;
680                                 if (i > 2)
681                                         return;
682                                 os << "!";
683                                 iim.getLatex(os, runparams);
684                         }
685                 }
686         }
687 }
688
689
690 std::vector<docstring> InsetIndex::getSubentriesAsText(OutputParams const & runparams,
691                                                        bool const asLabel) const
692 {
693         std::vector<docstring> subentries;
694
695         Paragraph const & par = paragraphs().front();
696         InsetList::const_iterator it = par.insetList().begin();
697         int i = 0;
698         for (; it != par.insetList().end(); ++it) {
699                 Inset & inset = *it->inset;
700                 if (inset.lyxCode() == INDEXMACRO_CODE) {
701                         InsetIndexMacro const & iim =
702                                 static_cast<InsetIndexMacro const &>(inset);
703                         if (iim.params().type == InsetIndexMacroParams::Subindex) {
704                                 ++i;
705                                 if (i > 2)
706                                         break;
707                                 if (asLabel) {
708                                         docstring const l;
709                                         docstring const sl = iim.getNewLabel(l);
710                                         subentries.emplace_back(sl);
711                                 } else {
712                                         otexstringstream os;
713                                         iim.getLatex(os, runparams);
714                                         subentries.emplace_back(os.str());
715                                 }
716                         }
717                 }
718         }
719
720         return subentries;
721 }
722
723
724 docstring InsetIndex::getMainSubentryAsText(OutputParams const & runparams) const
725 {
726         otexstringstream os;
727         InsetText::latex(os, runparams);
728         return os.str();
729 }
730
731
732 void InsetIndex::getSeeRefs(otexstream & os, OutputParams const & runparams) const
733 {
734         Paragraph const & par = paragraphs().front();
735         InsetList::const_iterator it = par.insetList().begin();
736         for (; it != par.insetList().end(); ++it) {
737                 Inset & inset = *it->inset;
738                 if (inset.lyxCode() == INDEXMACRO_CODE) {
739                         InsetIndexMacro const & iim =
740                                 static_cast<InsetIndexMacro const &>(inset);
741                         if (iim.params().type == InsetIndexMacroParams::See
742                             || iim.params().type == InsetIndexMacroParams::Seealso) {
743                                 iim.getLatex(os, runparams);
744                                 return;
745                         }
746                 }
747         }
748 }
749
750
751 docstring InsetIndex::getSeeAsText(OutputParams const & runparams) const
752 {
753         Paragraph const & par = paragraphs().front();
754         InsetList::const_iterator it = par.insetList().begin();
755         for (; it != par.insetList().end(); ++it) {
756                 Inset & inset = *it->inset;
757                 if (inset.lyxCode() == INDEXMACRO_CODE) {
758                         InsetIndexMacro const & iim =
759                                 static_cast<InsetIndexMacro const &>(inset);
760                         if (iim.params().type == InsetIndexMacroParams::See) {
761                                 otexstringstream os;
762                                 iim.getLatex(os, runparams);
763                                 return os.str();
764                         }
765                 }
766         }
767         return from_ascii("");
768 }
769
770
771 std::vector<docstring> InsetIndex::getSeeAlsoesAsText(OutputParams const & runparams) const
772 {
773         std::vector<docstring> seeAlsoes;
774
775         Paragraph const & par = paragraphs().front();
776         InsetList::const_iterator it = par.insetList().begin();
777         for (; it != par.insetList().end(); ++it) {
778                 Inset & inset = *it->inset;
779                 if (inset.lyxCode() == INDEXMACRO_CODE) {
780                         InsetIndexMacro const & iim =
781                                 static_cast<InsetIndexMacro const &>(inset);
782                         if (iim.params().type == InsetIndexMacroParams::Seealso) {
783                                 otexstringstream os;
784                                 iim.getLatex(os, runparams);
785                                 seeAlsoes.emplace_back(os.str());
786                         }
787                 }
788         }
789
790         return seeAlsoes;
791 }
792
793
794 namespace {
795
796 bool hasInsetWithCode(const InsetIndex * const inset_index, const InsetCode code,
797                                           const std::set<InsetIndexMacroParams::Type> types = {})
798 {
799         Paragraph const & par = inset_index->paragraphs().front();
800         InsetList::const_iterator it = par.insetList().begin();
801         for (; it != par.insetList().end(); ++it) {
802                 Inset & inset = *it->inset;
803                 if (inset.lyxCode() == code) {
804                         if (types.empty())
805                                 return true;
806
807                         LASSERT(code == INDEXMACRO_CODE, return false);
808                         InsetIndexMacro const & iim =
809                                         static_cast<InsetIndexMacro const &>(inset);
810                         if (types.find(iim.params().type) != types.end())
811                                 return true;
812                 }
813         }
814         return false;
815 }
816
817 } // namespace
818
819
820 bool InsetIndex::hasSubentries() const
821 {
822         return hasInsetWithCode(this, INDEXMACRO_CODE, {InsetIndexMacroParams::Subindex});
823 }
824
825
826 bool InsetIndex::hasSeeRef() const
827 {
828         return hasInsetWithCode(this, INDEXMACRO_CODE, {InsetIndexMacroParams::See, InsetIndexMacroParams::Seealso});
829 }
830
831
832 bool InsetIndex::hasSortKey() const
833 {
834         return hasInsetWithCode(this, INDEXMACRO_SORTKEY_CODE);
835 }
836
837
838 bool InsetIndex::macrosPossible(string const type) const
839 {
840         if (type != "see" && type != "seealso"
841             && type != "sortkey" && type != "subindex")
842                 return false;
843
844         Paragraph const & par = paragraphs().front();
845         InsetList::const_iterator it = par.insetList().begin();
846         int subidxs = 0;
847         for (; it != par.insetList().end(); ++it) {
848                 Inset & inset = *it->inset;
849                 if (type == "sortkey" && inset.lyxCode() == INDEXMACRO_SORTKEY_CODE)
850                         return false;
851                 if (inset.lyxCode() == INDEXMACRO_CODE) {
852                         InsetIndexMacro const & iim = static_cast<InsetIndexMacro const &>(inset);
853                         if ((type == "see" || type == "seealso")
854                              && (iim.params().type == InsetIndexMacroParams::See
855                                  || iim.params().type == InsetIndexMacroParams::Seealso))
856                                 return false;
857                         if (type == "subindex"
858                              && iim.params().type == InsetIndexMacroParams::Subindex) {
859                                 ++subidxs;
860                                 if (subidxs > 1)
861                                         return false;
862                         }
863                 }
864         }
865         return true;
866 }
867
868
869 ColorCode InsetIndex::labelColor() const
870 {
871         if (params_.index.empty() || params_.index == from_ascii("idx"))
872                 return InsetCollapsible::labelColor();
873         // FIXME UNICODE
874         ColorCode c = lcolor.getFromLyXName(to_utf8(params_.index)
875                                             + "@" + buffer().fileName().absFileName());
876         if (c == Color_none)
877                 c = InsetCollapsible::labelColor();
878         return c;
879 }
880
881
882 docstring InsetIndex::toolTip(BufferView const &, int, int) const
883 {
884         docstring tip = _("Index Entry");
885         if (buffer().params().use_indices && !params_.index.empty()) {
886                 Buffer const & realbuffer = *buffer().masterBuffer();
887                 IndicesList const & indiceslist = realbuffer.params().indiceslist();
888                 tip += " (";
889                 Index const * index = indiceslist.findShortcut(params_.index);
890                 if (!index)
891                         tip += _("unknown type!");
892                 else
893                         tip += index->index();
894                 tip += ")";
895         }
896         tip += ": ";
897         docstring res = toolTipText(tip);
898         if (!insetindexpagerangetranslator_loc().find(params_.range).empty())
899                 res += "\n" + insetindexpagerangetranslator_loc().find(params_.range);
900         if (!params_.pagefmt.empty() && params_.pagefmt != "default") {
901                 res += "\n" + _("Pagination format:") + " ";
902                 if (params_.pagefmt == "textbf")
903                         res += _("bold");
904                 else if (params_.pagefmt == "textit")
905                         res += _("italic");
906                 else if (params_.pagefmt == "emph")
907                         res += _("emphasized");
908                 else
909                         res += from_utf8(params_.pagefmt);
910         }
911         return res;
912 }
913
914
915 docstring const InsetIndex::buttonLabel(BufferView const & bv) const
916 {
917         InsetLayout const & il = getLayout();
918         docstring label = translateIfPossible(il.labelstring());
919
920         if (buffer().params().use_indices && !params_.index.empty()) {
921                 Buffer const & realbuffer = *buffer().masterBuffer();
922                 IndicesList const & indiceslist = realbuffer.params().indiceslist();
923                 label += " (";
924                 Index const * index = indiceslist.findShortcut(params_.index);
925                 if (!index)
926                         label += _("unknown type!");
927                 else
928                         label += index->index();
929                 label += ")";
930         }
931
932         docstring res;
933         if (!il.contentaslabel() || geometry(bv) != ButtonOnly)
934                 res = label;
935         else {
936                 res = getNewLabel(label);
937                 OutputParams const rp(0);
938                 vector<docstring> sublbls = getSubentriesAsText(rp, true);
939                 for (auto const & sublbl : sublbls) {
940                         res += " " + docstring(1, char_type(0x2023));// TRIANGULAR BULLET
941                         res += " " + sublbl;
942                 }
943         }
944         if (!insetindexpagerangetranslator_latex().find(params_.range).empty())
945                 res += " " + from_ascii(insetindexpagerangetranslator_latex().find(params_.range));
946         return res;
947 }
948
949
950 void InsetIndex::write(ostream & os) const
951 {
952         os << to_utf8(layoutName());
953         params_.write(os);
954         InsetCollapsible::write(os);
955 }
956
957
958 void InsetIndex::read(Lexer & lex)
959 {
960         params_.read(lex);
961         InsetCollapsible::read(lex);
962 }
963
964
965 string InsetIndex::params2string(InsetIndexParams const & params)
966 {
967         ostringstream data;
968         data << "index";
969         params.write(data);
970         return data.str();
971 }
972
973
974 void InsetIndex::string2params(string const & in, InsetIndexParams & params)
975 {
976         params = InsetIndexParams();
977         if (in.empty())
978                 return;
979
980         istringstream data(in);
981         Lexer lex;
982         lex.setStream(data);
983         lex.setContext("InsetIndex::string2params");
984         lex >> "index";
985         params.read(lex);
986 }
987
988
989 void InsetIndex::addToToc(DocIterator const & cpit, bool output_active,
990                                                   UpdateType utype, TocBackend & backend) const
991 {
992         DocIterator pit = cpit;
993         pit.push_back(CursorSlice(const_cast<InsetIndex &>(*this)));
994         docstring str;
995         InsetLayout const & il = getLayout();
996         docstring label = translateIfPossible(il.labelstring());
997         if (!il.contentaslabel())
998                 str = label;
999         else {
1000                 str = getNewLabel(label);
1001                 OutputParams const rp(0);
1002                 vector<docstring> sublbls = getSubentriesAsText(rp, true);
1003                 for (auto const & sublbl : sublbls) {
1004                         str += " " + docstring(1, char_type(0x2023));// TRIANGULAR BULLET
1005                         str += " " + sublbl;
1006                 }
1007         }
1008         string type = "index";
1009         if (buffer().masterBuffer()->params().use_indices)
1010                 type += ":" + to_utf8(params_.index);
1011         TocBuilder & b = backend.builder(type);
1012         b.pushItem(pit, str, output_active);
1013         // Proceed with the rest of the inset.
1014         InsetCollapsible::addToToc(cpit, output_active, utype, backend);
1015         b.pop();
1016 }
1017
1018
1019 void InsetIndex::validate(LaTeXFeatures & features) const
1020 {
1021         if (buffer().masterBuffer()->params().use_indices
1022             && !params_.index.empty()
1023             && params_.index != "idx")
1024                 features.require("splitidx");
1025         InsetCollapsible::validate(features);
1026 }
1027
1028
1029 string InsetIndex::contextMenuName() const
1030 {
1031         return "context-index";
1032 }
1033
1034
1035 string InsetIndex::contextMenu(BufferView const & bv, int x, int y) const
1036 {
1037         // We override the implementation of InsetCollapsible,
1038         // because we have eytra entries.
1039         string owncm = "context-edit-index;";
1040         return owncm + InsetCollapsible::contextMenu(bv, x, y);
1041 }
1042
1043
1044 bool InsetIndex::hasSettings() const
1045 {
1046         return true;
1047 }
1048
1049
1050 bool InsetIndex::insetAllowed(InsetCode code) const
1051 {
1052         switch (code) {
1053         case INDEXMACRO_CODE:
1054         case INDEXMACRO_SORTKEY_CODE:
1055                 return true;
1056         case INDEX_CODE:
1057                 return false;
1058         default:
1059                 return InsetCollapsible::insetAllowed(code);
1060         }
1061 }
1062
1063
1064 /////////////////////////////////////////////////////////////////////
1065 //
1066 // InsetIndexParams
1067 //
1068 ///////////////////////////////////////////////////////////////////////
1069
1070
1071 void InsetIndexParams::write(ostream & os) const
1072 {
1073         os << ' ';
1074         if (!index.empty())
1075                 os << to_utf8(index);
1076         else
1077                 os << "idx";
1078         os << '\n';
1079         os << "range "
1080            << insetindexpagerangetranslator().find(range)
1081            << '\n';
1082         os << "pageformat "
1083            << pagefmt
1084            << '\n';
1085 }
1086
1087
1088 void InsetIndexParams::read(Lexer & lex)
1089 {
1090         if (lex.eatLine())
1091                 index = lex.getDocString();
1092         else
1093                 index = from_ascii("idx");
1094         if (lex.checkFor("range")) {
1095                 string st = lex.getString();
1096                 if (lex.eatLine()) {
1097                         st = lex.getString();
1098                         range = insetindexpagerangetranslator().find(lex.getString());
1099                 }
1100         }
1101         if (lex.checkFor("pageformat") && lex.eatLine()) {
1102                 pagefmt = lex.getString();
1103         }
1104 }
1105
1106
1107 /////////////////////////////////////////////////////////////////////
1108 //
1109 // InsetPrintIndex
1110 //
1111 ///////////////////////////////////////////////////////////////////////
1112
1113 InsetPrintIndex::InsetPrintIndex(Buffer * buf, InsetCommandParams const & p)
1114         : InsetCommand(buf, p)
1115 {}
1116
1117
1118 ParamInfo const & InsetPrintIndex::findInfo(string const & /* cmdName */)
1119 {
1120         static ParamInfo param_info_;
1121         if (param_info_.empty()) {
1122                 param_info_.add("type", ParamInfo::LATEX_OPTIONAL,
1123                                 ParamInfo::HANDLING_ESCAPE);
1124                 param_info_.add("name", ParamInfo::LATEX_OPTIONAL,
1125                                 ParamInfo::HANDLING_LATEXIFY);
1126                 param_info_.add("literal", ParamInfo::LYX_INTERNAL);
1127         }
1128         return param_info_;
1129 }
1130
1131
1132 docstring InsetPrintIndex::screenLabel() const
1133 {
1134         bool const printall = suffixIs(getCmdName(), '*');
1135         bool const multind = buffer().masterBuffer()->params().use_indices;
1136         if ((!multind
1137              && getParam("type") == from_ascii("idx"))
1138             || (getParam("type").empty() && !printall))
1139                 return _("Index");
1140         Buffer const & realbuffer = *buffer().masterBuffer();
1141         IndicesList const & indiceslist = realbuffer.params().indiceslist();
1142         Index const * index = indiceslist.findShortcut(getParam("type"));
1143         if (!index && !printall)
1144                 return _("Unknown index type!");
1145         docstring res = printall ? _("All indexes") : index->index();
1146         if (!multind)
1147                 res += " (" + _("non-active") + ")";
1148         else if (contains(getCmdName(), "printsubindex"))
1149                 res += " (" + _("subindex") + ")";
1150         return res;
1151 }
1152
1153
1154 bool InsetPrintIndex::isCompatibleCommand(string const & s)
1155 {
1156         return s == "printindex" || s == "printsubindex"
1157                 || s == "printindex*" || s == "printsubindex*";
1158 }
1159
1160
1161 void InsetPrintIndex::doDispatch(Cursor & cur, FuncRequest & cmd)
1162 {
1163         switch (cmd.action()) {
1164
1165         case LFUN_INSET_MODIFY: {
1166                 if (cmd.argument() == from_ascii("toggle-subindex")) {
1167                         string scmd = getCmdName();
1168                         if (contains(scmd, "printindex"))
1169                                 scmd = subst(scmd, "printindex", "printsubindex");
1170                         else
1171                                 scmd = subst(scmd, "printsubindex", "printindex");
1172                         cur.recordUndo();
1173                         setCmdName(scmd);
1174                         break;
1175                 } else if (cmd.argument() == from_ascii("check-printindex*")) {
1176                         string scmd = getCmdName();
1177                         if (suffixIs(scmd, '*'))
1178                                 break;
1179                         scmd += '*';
1180                         cur.recordUndo();
1181                         setParam("type", docstring());
1182                         setCmdName(scmd);
1183                         break;
1184                 }
1185                 InsetCommandParams p(INDEX_PRINT_CODE);
1186                 // FIXME UNICODE
1187                 InsetCommand::string2params(to_utf8(cmd.argument()), p);
1188                 if (p.getCmdName().empty()) {
1189                         cur.noScreenUpdate();
1190                         break;
1191                 }
1192                 cur.recordUndo();
1193                 setParams(p);
1194                 break;
1195         }
1196
1197         default:
1198                 InsetCommand::doDispatch(cur, cmd);
1199                 break;
1200         }
1201 }
1202
1203
1204 bool InsetPrintIndex::getStatus(Cursor & cur, FuncRequest const & cmd,
1205         FuncStatus & status) const
1206 {
1207         switch (cmd.action()) {
1208
1209         case LFUN_INSET_MODIFY: {
1210                 if (cmd.argument() == from_ascii("toggle-subindex")) {
1211                         status.setEnabled(buffer().masterBuffer()->params().use_indices);
1212                         status.setOnOff(contains(getCmdName(), "printsubindex"));
1213                         return true;
1214                 } else if (cmd.argument() == from_ascii("check-printindex*")) {
1215                         status.setEnabled(buffer().masterBuffer()->params().use_indices);
1216                         status.setOnOff(suffixIs(getCmdName(), '*'));
1217                         return true;
1218                 } if (cmd.getArg(0) == "index_print"
1219                     && cmd.getArg(1) == "CommandInset") {
1220                         InsetCommandParams p(INDEX_PRINT_CODE);
1221                         InsetCommand::string2params(to_utf8(cmd.argument()), p);
1222                         if (suffixIs(p.getCmdName(), '*')) {
1223                                 status.setEnabled(true);
1224                                 status.setOnOff(false);
1225                                 return true;
1226                         }
1227                         Buffer const & realbuffer = *buffer().masterBuffer();
1228                         IndicesList const & indiceslist =
1229                                 realbuffer.params().indiceslist();
1230                         Index const * index = indiceslist.findShortcut(p["type"]);
1231                         status.setEnabled(index != 0);
1232                         status.setOnOff(p["type"] == getParam("type"));
1233                         return true;
1234                 } else
1235                         return InsetCommand::getStatus(cur, cmd, status);
1236         }
1237
1238         case LFUN_INSET_DIALOG_UPDATE: {
1239                 status.setEnabled(buffer().masterBuffer()->params().use_indices);
1240                 return true;
1241         }
1242
1243         default:
1244                 return InsetCommand::getStatus(cur, cmd, status);
1245         }
1246 }
1247
1248
1249 void InsetPrintIndex::updateBuffer(ParIterator const &, UpdateType, bool const /*deleted*/)
1250 {
1251         Index const * index =
1252                 buffer().masterParams().indiceslist().findShortcut(getParam("type"));
1253         if (index)
1254                 setParam("name", index->index());
1255 }
1256
1257
1258 void InsetPrintIndex::latex(otexstream & os, OutputParams const & runparams_in) const
1259 {
1260         if (!buffer().masterBuffer()->params().use_indices) {
1261                 if (getParam("type") == from_ascii("idx"))
1262                         os << "\\printindex" << termcmd;
1263                 return;
1264         }
1265         OutputParams runparams = runparams_in;
1266         os << getCommand(runparams);
1267 }
1268
1269
1270 void InsetPrintIndex::validate(LaTeXFeatures & features) const
1271 {
1272         features.require("makeidx");
1273         if (buffer().masterBuffer()->params().use_indices)
1274                 features.require("splitidx");
1275         InsetCommand::validate(features);
1276 }
1277
1278
1279 string InsetPrintIndex::contextMenuName() const
1280 {
1281         return buffer().masterBuffer()->params().use_indices ?
1282                 "context-indexprint" : string();
1283 }
1284
1285
1286 bool InsetPrintIndex::hasSettings() const
1287 {
1288         return buffer().masterBuffer()->params().use_indices;
1289 }
1290
1291
1292 class IndexEntry
1293 {
1294 public:
1295         /// Builds an entry for the index.
1296         IndexEntry(const InsetIndex * inset, OutputParams const * runparams) : inset_(inset), runparams_(runparams)
1297         {
1298                 LASSERT(runparams, return);
1299
1300                 // Convert the inset as text. The resulting text usually only contains an XHTML anchor (<a id='...'/>) and text.
1301                 odocstringstream entry;
1302                 OutputParams ours = *runparams;
1303                 ours.for_toc = false;
1304                 inset_->plaintext(entry, ours);
1305                 entry_ = entry.str();
1306
1307                 // Determine in which index this entry belongs to.
1308                 if (inset_->buffer().masterBuffer()->params().use_indices) {
1309                         index_ = inset_->params_.index;
1310                 }
1311
1312                 // Attempt parsing the inset.
1313                 if (isModern())
1314                         parseAsModern();
1315                 else
1316                         parseAsLegacy();
1317         }
1318
1319         /// When parsing this entry, some errors may be found; they are reported as a single string.
1320         // It is up to the caller to send this string to LYXERR and the output file, as needed.
1321         const docstring & output_error() const
1322         {
1323                 return output_error_;
1324         }
1325
1326         void output_error(XMLStream xs) const
1327         {
1328                 LYXERR0(output_error());
1329                 xs << XMLStream::ESCAPE_NONE << (from_utf8("<!-- Output Error: ") + output_error() + from_utf8(" -->\n"));
1330         }
1331
1332
1333 private:
1334         bool isModern()
1335         {
1336                 std::cout << to_utf8(entry_) << std::endl;
1337
1338                 // If a modern parameter is present, this is definitely a modern index inset. Similarly, if it contains the
1339                 // usual LaTeX symbols (!|@), then it is definitely a legacy index inset. Otherwise, if it has features of
1340                 // neither, it is both: consider this is a modern inset, to trigger the least complex code. Mixing both types
1341                 // is not allowed (i.e. behaviour is undefined).
1342                 const bool is_definitely_modern = inset_->hasSortKey() || inset_->hasSeeRef() || inset_->hasSubentries()
1343                                             || inset_->params_.range != InsetIndexParams::PageRange::None;
1344                 const bool is_definitely_legacy = entry_.find('@') != std::string::npos
1345                                 || entry_.find('|') != std::string::npos || entry_.find('!') != std::string::npos;
1346
1347                 if (is_definitely_legacy && is_definitely_modern)
1348                         output_error_ += from_utf8("Mix of index properties and raw LaTeX index commands is unsupported. ");
1349
1350                 // Truth table:
1351                 // - is_definitely_modern == true:
1352                 //   - is_definitely_legacy == true: error (return whatever)
1353                 //   - is_definitely_legacy == false: return modern
1354                 // - is_definitely_modern == false:
1355                 //   - is_definitely_legacy == true: return legacy
1356                 //   - is_definitely_legacy == false: return modern
1357                 return !is_definitely_legacy;
1358         }
1359
1360         void parseAsModern()
1361         {
1362                 LASSERT(runparams_, return);
1363
1364                 if (inset_->hasSortKey()) {
1365                         sort_as_ = inset_->getSortkeyAsText(*runparams_);
1366                 }
1367
1368                 terms_ = inset_->getSubentriesAsText(*runparams_);
1369                 // The main term is not present in the vector, as it's not a subentry. The main index term is inserted raw in
1370                 // the index inset. Considering that the user either uses the new or the legacy mechanism, the main term is the
1371                 // full string within this inset (i.e. without the subinsets).
1372                 terms_.insert(terms_.begin(), inset_->getMainSubentryAsText(*runparams_));
1373
1374                 has_start_range_ = inset_->params_.range == InsetIndexParams::PageRange::Start;
1375                 has_end_range_ = inset_->params_.range == InsetIndexParams::PageRange::End;
1376
1377                 see_ = inset_->getSeeAsText(*runparams_);
1378                 see_alsoes_ = inset_->getSeeAlsoesAsText(*runparams_);
1379         }
1380
1381         void parseAsLegacy() {
1382                 // Determine if some features are known not to be supported. For now, this is only formatting like
1383                 // \index{alpha@\textbf{alpha}} or \index{alpha@$\alpha$}.
1384                 // @ is supported, but only for sorting, without specific formatting.
1385                 if (entry_.find(from_utf8("@\\")) != lyx::docstring::npos) {
1386                         output_error_ += from_utf8("Unsupported feature: an index entry contains an @\\. "
1387                                                    "Complete entry: \"") + entry_ + from_utf8("\". ");
1388                 }
1389                 if (entry_.find(from_utf8("@$")) != lyx::docstring::npos) {
1390                         output_error_ += from_utf8("Unsupported feature: an index entry contains an @$. "
1391                                                    "Complete entry: \"") + entry_ + from_utf8("\". ");
1392                 }
1393
1394                 // Split the string into its main constituents: terms, and command (see, see also, range).
1395                 size_t positionVerticalBar = entry_.find(from_ascii("|")); // What comes before | is (sub)(sub)entries.
1396                 docstring indexTerms = entry_.substr(0, positionVerticalBar);
1397                 docstring command;
1398                 if (positionVerticalBar != lyx::docstring::npos) {
1399                         command = entry_.substr(positionVerticalBar + 1);
1400                 }
1401
1402                 // Handle sorting issues, with @.
1403                 vector<docstring> sortingElements = getVectorFromString(indexTerms, from_ascii("@"), false);
1404                 if (sortingElements.size() == 2) {
1405                         sort_as_ = sortingElements[0];
1406                         indexTerms = sortingElements[1];
1407                 }
1408
1409                 // Handle entries, subentries, and subsubentries.
1410                 terms_ = getVectorFromString(indexTerms, from_ascii("!"), false);
1411
1412                 // Handle ranges. Happily, (| and |) can only be at the end of the string!
1413                 has_start_range_ = entry_.find(from_ascii("|(")) != lyx::docstring::npos;
1414                 has_end_range_ = entry_.find(from_ascii("|)")) != lyx::docstring::npos;
1415
1416                 // - Remove the ranges from the command if they do not appear at the beginning.
1417                 size_t range_index = 0;
1418                 while ((range_index = command.find(from_utf8("|("), range_index)) != std::string::npos)
1419                         command.erase(range_index, 1);
1420                 range_index = 0;
1421                 while ((range_index = command.find(from_utf8("|)"), range_index)) != std::string::npos)
1422                         command.erase(range_index, 1);
1423
1424                 // - Remove the ranges when they are the only vertical bar in the complete string.
1425                 if (command[0] == '(' || command[0] == ')')
1426                         command.erase(0, 1);
1427
1428                 // Handle see and seealso. As "see" is a prefix of "seealso", the order of the comparisons is important.
1429                 // Both commands are mutually exclusive!
1430                 if (command.substr(0, 3) == "see") {
1431                         // Unescape brackets.
1432                         size_t index_argument_begin = 0;
1433                         while ((index_argument_begin = command.find(from_utf8("\\{"), index_argument_begin)) != std::string::npos)
1434                                 command.erase(index_argument_begin, 1);
1435                         size_t index_argument_end = 0;
1436                         while ((index_argument_end = command.find(from_utf8("\\}"), index_argument_end)) != std::string::npos)
1437                                 command.erase(index_argument_end, 1);
1438
1439                         // Retrieve the part between brackets, and remove the complete seealso.
1440                         size_t position_opening_bracket = command.find(from_ascii("{"));
1441                         size_t position_closing_bracket = command.find(from_ascii("}"));
1442                         docstring argument = command.substr(position_opening_bracket + 1,
1443                                                                                                 position_closing_bracket - position_opening_bracket - 1);
1444
1445                         // Parse the argument of referenced entries (or a single one for see).
1446                         if (command.substr(0, 7) == "seealso") {
1447                                 see_alsoes_ = getVectorFromString(argument, from_ascii(","), false);
1448                         } else {
1449                                 see_ = argument;
1450
1451                                 if (see_.find(from_ascii(",")) != std::string::npos) {
1452                                         output_error_ += from_utf8("Several index_argument_end terms found as \"see\"! Only one is "
1453                                                                    "acceptable. Complete entry: \"") + entry_ + from_utf8("\". ");
1454                                 }
1455                         }
1456
1457                         // Remove the complete see/seealso from the commands, in case there is something else to parse.
1458                         command = command.substr(position_closing_bracket + 1);
1459                 }
1460
1461                 // Some parts of the strings are not parsed, as they do not have anything matching in DocBook or XHTML:
1462                 // things like formatting the entry or the page number, other strings for sorting.
1463                 // https://wiki.lyx.org/Tips/Indexing
1464                 // If there are such things in the index entry, then this code may miserably fail. For example, for
1465                 // "Peter|(textbf", no range will be detected.
1466                 if (!command.empty()) {
1467                         output_error_ += from_utf8("Unsupported feature: an index entry contains a | with an unsupported command, ")
1468                                          + command + from_utf8(". Complete entry: \"") + entry_ + from_utf8("\". ");
1469                 }
1470         }
1471
1472 public:
1473         int level() const {
1474                 return terms_.size();
1475         }
1476
1477         const std::vector<docstring>& terms() const {
1478                 return terms_;
1479         }
1480
1481         std::vector<docstring>& terms() {
1482                 return terms_;
1483         }
1484
1485         const InsetIndex* inset() const {
1486                 return inset_;
1487         }
1488
1489 private:
1490         // Input inset. These should only be used when parsing the inset (either parseAsModern or parseAsLegacy, called in
1491         // the constructor).
1492         const InsetIndex * inset_;
1493         OutputParams const * runparams_;
1494         docstring entry_;
1495         docstring index_; // Useful when there are multiple indices in the same document.
1496
1497         // Errors, concatenated as a single string, available as soon as parsing is done, const afterwards (i.e. once
1498         // constructor is done).
1499         docstring output_error_;
1500
1501         // Parsed index entry.
1502         std::vector<docstring> terms_; // Up to three entries, in general.
1503         docstring sort_as_;
1504         docstring command_;
1505         bool has_start_range_;
1506         bool has_end_range_;
1507         docstring see_;
1508         vector<docstring> see_alsoes_;
1509
1510         // Operators used for sorting entries (alphabetical order).
1511         friend bool operator<(IndexEntry const & lhs, IndexEntry const & rhs);
1512 };
1513
1514 bool operator<(IndexEntry const & lhs, IndexEntry const & rhs)
1515 {
1516         if (lhs.terms_.empty())
1517                 return false;
1518
1519         for (unsigned i = 0; i < min(rhs.terms_.size(), lhs.terms_.size()); ++i) {
1520                 int comp = compare_no_case(lhs.terms_[i], rhs.terms_[i]);
1521                 if (comp != 0)
1522                         return comp < 0;
1523         }
1524         return false;
1525 }
1526
1527
1528 namespace {
1529 std::string generateCssClassAtDepth(unsigned depth) {
1530         std::string css_class = "entry";
1531
1532         while (depth > 0) {
1533                 depth -= 1;
1534                 css_class.insert(0, "sub");
1535         }
1536
1537         return css_class;
1538 }
1539
1540 struct IndexNode {
1541         std::vector<IndexEntry> entries;
1542         std::vector<IndexNode*> children;
1543 };
1544
1545 docstring termAtLevel(const IndexNode* node, unsigned depth)
1546 {
1547         // The typical entry has a depth of 1 to 3: the call stack would then be at most 4 (due to the root node). This
1548         // function could be made constant time by copying the term in each node, but that would make data duplication that
1549         // may fall out of sync; the performance benefit would probably be negligible.
1550         if (!node->entries.empty()) {
1551                 LASSERT(node->entries.begin()->terms().size() >= depth + 1, return from_ascii(""));
1552                 return node->entries.begin()->terms()[depth];
1553         }
1554
1555         if (!node->children.empty()) {
1556                 return termAtLevel(*node->children.begin(), depth);
1557         }
1558
1559         LASSERT(false, return from_ascii(""));
1560 }
1561
1562 void insertIntoNode(const IndexEntry& entry, IndexNode* node, unsigned depth = 0)
1563 {
1564         // depth == 0 is for the root, not yet the index, hence the increase when going to vector size.
1565         for (IndexNode* child : node->children) {
1566                 if (entry.terms()[depth] == termAtLevel(child, depth)) {
1567                         if (depth + 1 == entry.terms().size()) { // == child.entries.begin()->terms().size()
1568                                 // All term entries match: it's an entry.
1569                                 child->entries.emplace_back(entry);
1570                                 return;
1571                         } else {
1572                                 insertIntoNode(entry, child, depth + 1);
1573                                 return;
1574                         }
1575                 }
1576         }
1577
1578         // Out of the loop: no matching child found, create a new (possibly nested) child for this entry. Due to the
1579         // possibility of nestedness, only insert the current entry when the right level is reached. This is needed if the
1580         // first entry for a word has several levels that never appeared.
1581         // In particular, this case is called for the first entry.
1582         IndexNode* new_node = node;
1583         do {
1584                 new_node->children.emplace_back(new IndexNode{{}, {}});
1585                 new_node = new_node->children.back();
1586                 depth += 1;
1587         } while (depth + 1 <= entry.terms().size()); // depth == 0: root node, no text associated.
1588         new_node->entries.emplace_back(entry);
1589 }
1590
1591 IndexNode* buildIndexTree(vector<IndexEntry>& entries)
1592 {
1593         // Sort the entries, first on the main entry, then the subentry, then the subsubentry,
1594         // thanks to the implementation of operator<.
1595         // If this operation is not performed, the algorithm below is no more correct (and ensuring that it works with
1596         // unsorted entries would make its complexity blow up).
1597         stable_sort(entries.begin(), entries.end());
1598
1599         // Cook the index into a nice tree data structure: entries at a given level in the index as a node, with subentries
1600         // as children.
1601         auto* index_root = new IndexNode{{}, {}};
1602         for (const IndexEntry& entry : entries) {
1603                 insertIntoNode(entry, index_root);
1604         }
1605
1606         return index_root;
1607 }
1608
1609 void outputIndexPage(XMLStream & xs, const IndexNode* root_node, unsigned depth = 0) // NOLINT(misc-no-recursion)
1610 {
1611         LASSERT(root_node->entries.size() + root_node->children.size() > 0, return);
1612
1613         xs << xml::StartTag("li", "class='" + generateCssClassAtDepth(depth) + "'");
1614         xs << xml::CR();
1615         xs << XMLStream::ESCAPE_NONE << termAtLevel(root_node, depth);
1616         // By tree assumption, all the entries at this node have the same set of terms.
1617
1618         if (!root_node->entries.empty()) {
1619                 xs << XMLStream::ESCAPE_NONE << " &#8212; "; // Em dash, i.e. long (---).
1620                 unsigned entry_number = 1;
1621
1622                 auto writeLinkToEntry = [&xs](const IndexEntry &entry, unsigned entry_number) {
1623                         std::string const link_attr = "href='#" + entry.inset()->paragraphs()[0].magicLabel() + "'";
1624                         xs << xml::StartTag("a", link_attr);
1625                         xs << from_ascii(std::to_string(entry_number));
1626                         xs << xml::EndTag("a");
1627                 };
1628
1629                 for (unsigned i = 0; i < root_node->entries.size(); ++i) {
1630                         const IndexEntry &entry = root_node->entries[i];
1631
1632                         switch (entry.inset()->params().range) {
1633                                 case InsetIndexParams::PageRange::None:
1634                                         writeLinkToEntry(entry, entry_number);
1635                                         break;
1636                                 case InsetIndexParams::PageRange::Start: {
1637                                         // Try to find the end of the range, if it is just after. Otherwise, the output will be slightly
1638                                         // scrambled, but understandable. Doing better would mean implementing more of the indexing logic here
1639                                         // and more complex indexing here (skipping the end is not just incrementing i). Worst case output:
1640                                         //     1--, 2, --3
1641                                         const bool nextEntryIsEnd = i + 1 < root_node->entries.size() &&
1642                                                                     root_node->entries[i + 1].inset()->params().range ==
1643                                                                     InsetIndexParams::PageRange::End;
1644                                         // No need to check if both entries are for the same terms: they are in the same IndexNode.
1645
1646                                         writeLinkToEntry(entry, entry_number);
1647                                         xs << XMLStream::ESCAPE_NONE << " &#8211; "; // En dash, i.e. semi-long (--).
1648
1649                                         if (nextEntryIsEnd) {
1650                                                 // Skip the next entry in the loop, write it right now, after the dash.
1651                                                 entry_number += 1;
1652                                                 i += 1;
1653                                                 writeLinkToEntry(root_node->entries[i], entry_number);
1654                                         }
1655                                 }
1656                                         break;
1657                                 case InsetIndexParams::PageRange::End:
1658                                         // This range end was not caught by the range start, do it now to avoid losing content.
1659                                         xs << XMLStream::ESCAPE_NONE << " &#8211; "; // En dash, i.e. semi-long (--).
1660                                         writeLinkToEntry(root_node->entries[i], entry_number);
1661                         }
1662
1663                         if (i < root_node->entries.size() - 1) {
1664                                 xs << ", ";
1665                         }
1666                         entry_number += 1;
1667                 }
1668         }
1669
1670         if (!root_node->entries.empty() && !root_node->children.empty()) {
1671                 xs << xml::CR();
1672         }
1673
1674         if (!root_node->children.empty()) {
1675                 xs << xml::StartTag("ul", "class='" + generateCssClassAtDepth(depth) + "'");
1676                 xs << xml::CR();
1677
1678                 for (const IndexNode* child : root_node->children) {
1679                         outputIndexPage(xs, child, depth + 1);
1680                 }
1681
1682                 xs << xml::EndTag("ul");
1683                 xs << xml::CR();
1684         }
1685
1686         xs << xml::EndTag("li");
1687         xs << xml::CR();
1688 }
1689
1690 // Only useful for debugging.
1691 void printTree(const IndexNode* root_node, unsigned depth = 0)
1692 {
1693         static const std::string pattern = "    ";
1694         std::string prefix;
1695         for (unsigned i = 0; i < depth; ++i) {
1696                 prefix += pattern;
1697         }
1698         const std::string prefix_long = prefix + pattern + pattern;
1699
1700         docstring term_at_level;
1701         if (depth == 0) {
1702                 // The root has no term.
1703                 std::cout << "<ROOT>" << std::endl;
1704         } else {
1705                 LASSERT(depth - 1 <= 10, return); // Check for overflows.
1706                 term_at_level = termAtLevel(root_node, depth - 1);
1707                 std::cout << prefix << to_utf8(term_at_level) << " (x " << std::to_string(root_node->entries.size()) << ")"
1708                           << std::endl;
1709         }
1710
1711         for (const IndexEntry& entry : root_node->entries) {
1712                 if (entry.terms().size() != depth) {
1713                         std::cout << prefix_long << "ERROR: an entry doesn't have the same number of terms" << std::endl;
1714                 }
1715                 if (depth > 0 && entry.terms()[depth - 1] != term_at_level) {
1716                         std::cout << prefix_long << "ERROR: an entry doesn't have the right term at depth " << std::to_string(depth)
1717                                 << std::endl;
1718                 }
1719         }
1720
1721         for (const IndexNode* node : root_node->children) {
1722                 printTree(node, depth + 1);
1723         }
1724 }
1725 }
1726
1727
1728 docstring InsetPrintIndex::xhtml(XMLStream &, OutputParams const & op) const
1729 {
1730         BufferParams const & bp = buffer().masterBuffer()->params();
1731
1732         shared_ptr<Toc const> toc = buffer().tocBackend().toc("index");
1733         if (toc->empty())
1734                 return docstring();
1735
1736         // Collect the index entries in a form we can use them.
1737         vector<IndexEntry> entries;
1738         const docstring & indexType = params().getParamOr("type", from_ascii("idx"));
1739         for (const TocItem& item : *toc) {
1740                 const auto* inset = static_cast<const InsetIndex*>(&(item.dit().inset()));
1741                 if (item.isOutput() && inset->params().index == indexType)
1742                         entries.emplace_back(IndexEntry{inset, &op});
1743         }
1744
1745         // If all the index entries are in notes or not displayed, get out sooner.
1746         if (entries.empty())
1747                 return docstring();
1748
1749         const IndexNode* index_root = buildIndexTree(entries);
1750 #if 0
1751         printTree(index_root);
1752 #endif
1753
1754         // Start generating the XHTML index.
1755         Layout const & lay = bp.documentClass().htmlTOCLayout();
1756         string const & tocclass = lay.defaultCSSClass();
1757         string const tocattr = "class='index " + tocclass + "'";
1758         docstring const indexName = params().getParamOr("name", from_ascii("Index"));
1759
1760         // we'll use our own stream, because we are going to defer everything.
1761         // that's how we deal with the fact that we're probably inside a standard
1762         // paragraph, and we don't want to be.
1763         odocstringstream ods;
1764         XMLStream xs(ods);
1765
1766         xs << xml::StartTag("div", tocattr);
1767         xs << xml::CR();
1768         xs << xml::StartTag(lay.htmltag(), lay.htmlattr());
1769         xs << translateIfPossible(indexName, op.local_font->language()->lang());
1770         xs << xml::EndTag(lay.htmltag());
1771         xs << xml::CR();
1772         xs << xml::StartTag("ul", "class='main'");
1773         xs << xml::CR();
1774
1775         LASSERT(index_root->entries.empty(), return docstring()); // No index entry should have zero terms.
1776         for (const IndexNode* node : index_root->children) {
1777                 outputIndexPage(xs, node);
1778         }
1779
1780         xs << xml::EndTag("ul");
1781         xs << xml::CR();
1782         xs << xml::EndTag("div");
1783
1784         return ods.str();
1785 }
1786
1787 } // namespace lyx