From 6fa0532ed4aaa8b9ddd479d6cef03a8a3083d774 Mon Sep 17 00:00:00 2001 From: Juergen Spitzmueller Date: Mon, 12 Aug 2024 17:10:57 +0200 Subject: [PATCH] Escape special chars in indexes We now have the functionality via sub-insets, and unexperienced users are surprised when they don't get a literal !. The unescaped chars can still be used via ERT. --- lib/doc/UserGuide.lyx | 96 ++++++++++++++++++++++++++++--- lib/doc/de/UserGuide.lyx | 114 +++++-------------------------------- lib/layouts/stdinsets.inc | 2 +- lib/lyx2lyx/lyx_2_5.py | 97 +++++++++++++++++++++++++++++-- src/Layout.cpp | 6 +- src/insets/InsetIndex.cpp | 49 +++++++++------- src/insets/InsetIndex.h | 3 +- src/insets/InsetLayout.cpp | 8 ++- 8 files changed, 233 insertions(+), 142 deletions(-) diff --git a/lib/doc/UserGuide.lyx b/lib/doc/UserGuide.lyx index 1da8d6c80a..115085e82b 100644 --- a/lib/doc/UserGuide.lyx +++ b/lib/doc/UserGuide.lyx @@ -35194,10 +35194,33 @@ Note also that in \SpecialChar LaTeX . This also works in \SpecialChar LyX , - although you have to take care that the character is not formatted. - For this reason, + although you +\change_deleted -712698321 1723474901 +have +\change_inserted -712698321 1723474901 +need +\change_unchanged + to +\change_inserted -712698321 1723474911 +insert +\change_deleted -712698321 1723474914 +take care that +\change_unchanged + the character +\change_deleted -712698321 1723474917 +is not formatted +\change_inserted -712698321 1723474926 +in \SpecialChar TeX + mode +\change_unchanged +. + +\change_deleted -712698321 1723474935 +For this reason, a real exclamation mark has to be inserted to an index entry in a specific way. - See section + +\change_unchanged +See section \begin_inset space ~ \end_inset @@ -35493,10 +35516,21 @@ Note that in \SpecialChar LaTeX . This also works in \SpecialChar LyX , - although you have to take care that the character is not formatted. - For this reason, + although you +\change_deleted -712698321 1723475013 +have to take care that the character is not formatted +\change_inserted -712698321 1723475013 +need to insert the character in \SpecialChar TeX + mode +\change_unchanged +. + +\change_deleted -712698321 1723475019 +For this reason, a real @ character has to be inserted to an index entry in a specific way. - See section + +\change_unchanged +See section \begin_inset space ~ \end_inset @@ -35894,13 +35928,39 @@ status collapsed \end_inset . - If you use them literally, + +\change_deleted -712698321 1723475049 +If you use them literally, you might get surprising results or even a non-working index. In any case, - you will not get the character itself. + you will not get the character itself +\change_inserted -712698321 1723475153 +Since the respective functionality is achievable in \SpecialChar LyX + via sub-insets, + these characters are escaped by +\begin_inset Flex Code +status collapsed + +\begin_layout Plain Layout + +\change_inserted -712698321 1723475136 +\begin_inset Quotes qld +\end_inset + + +\end_layout + +\end_inset + + if you insert them to indexes, + so you get the literal character in the output +\change_unchanged +. \end_layout \begin_layout Standard + +\change_deleted -712698321 1723475160 In order to use these characters in inset entries, they have to be \begin_inset Quotes els @@ -36002,11 +36062,27 @@ status collapsed \end_inset , +\change_inserted -712698321 1723475173 +If you want the special meaning instead, + you +\change_unchanged \emph on must \emph default - be inserted in a TeX code box (see section + +\change_deleted -712698321 1723475181 +be +\change_unchanged +insert +\change_deleted -712698321 1723475183 +ed +\change_unchanged + +\change_inserted -712698321 1723475191 +these special characters +\change_unchanged +in a TeX code box (see section \begin_inset space ~ \end_inset @@ -37370,6 +37446,8 @@ status collapsed \backslash protect +\change_unchanged + \end_layout \end_inset diff --git a/lib/doc/de/UserGuide.lyx b/lib/doc/de/UserGuide.lyx index 124e892c8a..3807321782 100644 --- a/lib/doc/de/UserGuide.lyx +++ b/lib/doc/de/UserGuide.lyx @@ -34426,10 +34426,8 @@ Beachten Sie außerdem, markiert werden. Das funktioniert auch in \SpecialChar LyX , - allerdings müssen Sie darauf achten, - dass das Zeichen nicht formatiert ist. - Da das Ausrufezeichen in Indexeinträgen diese spezielle Bedeutung hat, - müssen echte Anführungszeichen in einer spezifischen Art und Weise eingegeben werden. + allerdings müssen Sie das Zeichen hierfür im \SpecialChar TeX +-Modus eingeben. Dies wird in Abschnitt \begin_inset space ~ \end_inset @@ -34734,10 +34732,9 @@ Beachten Sie, abgetrennt werden. Das funktioniert auch in \SpecialChar LyX , - allerdings müssen Sie darauf achten, - dass das Zeichen nicht formatiert ist. - Da das @-Zeichen in Indexeinträgen diese spezielle Bedeutung hat, - müssen echte Klammeraffen in einer spezifischen Art und Weise eingegeben werden. + , + allerdings müssen Sie das Zeichen hierfür im \SpecialChar TeX +-Modus eingeben. Dies wird in Abschnitt \begin_inset space ~ \end_inset @@ -35215,19 +35212,9 @@ status collapsed \end_inset . - Wenn Sie diese verwenden, - bekommen Sie unter Umständen überraschende Ergebnisse oder die Erstellung des Stichwortverzeichnisses schlägt sogar fehl. - Jedenfalls bekommen Sie nicht das Zeichen selbst. -\end_layout - -\begin_layout Standard -Um diese Zeichen in Stichworteinträgen zu verwenden, - müssen Sie spezifisch markiert werden: - Ihnen muss ein sog. - Escape-Zeichen vorangestellt werden, - das dem Indexprozessor mitteilt, - dass dieses Zeichen nicht in seiner speziellen Bedeutung gemeint ist. - Per Voreinstellung ist dieses Escape-Zeichen + Da die damit erzielte Funktionalität in \SpecialChar LyX + durch Untereinfügungen zugänglich ist, + werden diese Sonderzeichen mit \begin_inset Flex Code status collapsed @@ -35240,87 +35227,14 @@ status collapsed \end_inset - (aber das kann man auch ändern). - Geben Sie also -\begin_inset Flex Code -status collapsed - -\begin_layout Plain Layout -\begin_inset Quotes qld -\end_inset - -! + maskiert, + wenn Sie sie in einen Index eingeben. + Dadurch bekommen Sie das Zeichen selbst im Index ausgegeben. \end_layout -\end_inset - -, - -\begin_inset Flex Code -status collapsed - -\begin_layout Plain Layout -\begin_inset Quotes qld -\end_inset - -@ -\end_layout - -\end_inset - -, - -\begin_inset Flex Code -status collapsed - -\begin_layout Plain Layout -\begin_inset Quotes qld -\end_inset - - -\begin_inset Quotes qrd -\end_inset - - -\end_layout - -\end_inset - -, - oder -\begin_inset Flex Code -status collapsed - -\begin_layout Plain Layout -\begin_inset Quotes qld -\end_inset - -| -\end_layout - -\end_inset - - ein, - um das entsprechende Zeichen im Stichwortverzeichnis zu bekommen. - Beachten Sie, - dass das Escape-Zeichen -\begin_inset Flex Code -status collapsed - -\begin_layout Plain Layout -\begin_inset Quotes qld -\end_inset - - -\end_layout - -\end_inset - - dabei über ein TeX-Code-Kästchen eingegeben werden -\emph on -muss -\emph default - (siehe Abschnitt +\begin_layout Standard +Wenn Sie die Zeichen in ihrer Spezialbedeutung verwenden wollen, + müssen Sie sie über ein TeX-Code-Kästchen eingeben (siehe Abschnitt \begin_inset space ~ \end_inset diff --git a/lib/layouts/stdinsets.inc b/lib/layouts/stdinsets.inc index 9d1eb8697e..8dab576d3d 100644 --- a/lib/layouts/stdinsets.inc +++ b/lib/layouts/stdinsets.inc @@ -420,7 +420,7 @@ InsetLayout Index CustomPars false ForcePlain true ContentAsLabel true - PassThruChars @|! + EscapeChars \"@|! End InsetLayout IndexMacro:see diff --git a/lib/lyx2lyx/lyx_2_5.py b/lib/lyx2lyx/lyx_2_5.py index 962dd8fe99..4f11a6cb79 100644 --- a/lib/lyx2lyx/lyx_2_5.py +++ b/lib/lyx2lyx/lyx_2_5.py @@ -34,8 +34,8 @@ from lyx2lyx_tools import ( # Uncomment only what you need to import, please (parser_tools): # check_token, count_pars_in_inset, del_complete_lines, # del_value, find_complete_lines, find_end_of, -# find_re, find_substring, find_token_backwards, find_token_exact, -# find_tokens, get_bool_value, get_containing_inset, +# find_re, find_token_backwards, find_token_exact, +# find_tokens, get_bool_value, # get_containing_layout, get_option_value, # is_in_inset, set_bool_value from parser_tools import ( @@ -43,7 +43,9 @@ from parser_tools import ( find_end_of_inset, find_end_of_layout, find_re, + find_substring, find_token, + get_containing_inset, get_quoted_value, get_value ) @@ -620,6 +622,93 @@ def revert_nomencl(document): i += 1 +def convert_index_sc(document): + """Convert index special characters to ERT.""" + + i = 0 + while True: + i = find_token(document.body, "\\begin_inset Index", i) + if i == -1: + return + + j = find_end_of_inset(document.body, i) + if j == -1: + document.warning( + "Malformed LyX document: Can't find end of index inset at line %d" % i + ) + i += 1 + continue + + escchars = ["!", "@", "|"] + for ec in escchars: + k = i; + while True: + j = find_end_of_inset(document.body, i) + k = find_substring(document.body, ec, k, j) + if k == -1: + break + if get_containing_inset(document.body, k)[0] == "ERT": + k += 1 + continue + + line = document.body[k] + chunks = line.split(ec) + repl = [] + if line[0] == ec: + repl = put_cmd_in_ert(ec) + chunks_len = len(chunks)-1 + for ch in chunks[:-1]: + repl += [ch] + repl += put_cmd_in_ert(ec) + repl += chunks[-1:] + if line[-1:] == ec: + repl += put_cmd_in_ert(ec) + document.body[k:k+1] = repl + i += 1 + + +def revert_index_sc(document): + """Escape index special characters.""" + + i = 0 + while True: + i = find_token(document.body, "\\begin_inset Index", i) + if i == -1: + return + + j = find_end_of_inset(document.body, i) + if j == -1: + document.warning( + "Malformed LyX document: Can't find end of index inset at line %d" % i + ) + i += 1 + continue + + escchars = ["!", "@", "|"] + for ec in escchars: + k = i; + while True: + j = find_end_of_inset(document.body, i) + k = find_substring(document.body, ec, k, j) + if k == -1: + break + if get_containing_inset(document.body, k)[0] == "ERT": + k += 1 + continue + + line = document.body[k] + chunks = line.split(ec) + repl = [] + chunks_len = len(chunks)-1 + for ch in chunks[:-1]: + repl += [ch] + repl += put_cmd_in_ert("\"") + repl += [ec] + repl += chunks[-1:] + document.body[k:k+1] = repl + k += len(repl) + i += 1 + ## # Conversion hub # @@ -632,12 +721,12 @@ convert = [ [624, [convert_biblatex_chicago]], [625, []], [626, []], - [627, [convert_nomencl]] + [627, [convert_nomencl, convert_index_sc]] ] revert = [ - [626, [revert_nomencl]], + [626, [revert_nomencl, revert_index_sc]], [625, [revert_nomencl_textwidth]], [624, [revert_nptextcite]], [623, [revert_biblatex_chicago]], diff --git a/src/Layout.cpp b/src/Layout.cpp index fa5ab4b7ae..10274191d7 100644 --- a/src/Layout.cpp +++ b/src/Layout.cpp @@ -669,9 +669,11 @@ bool Layout::readIgnoreForcelocal(Lexer & lex, TextClass const & tclass, lex >> pass_thru; break; - case LT_ESCAPE_CHARS: - lex >> escape_chars; + case LT_ESCAPE_CHARS: { + lex.next(true); + escape_chars = lex.getDocString(); break; + } case LT_PASS_THRU_CHARS: lex >> pass_thru_chars; diff --git a/src/insets/InsetIndex.cpp b/src/insets/InsetIndex.cpp index 81870f8282..0f8b16d210 100644 --- a/src/insets/InsetIndex.cpp +++ b/src/insets/InsetIndex.cpp @@ -190,6 +190,11 @@ void InsetIndex::latex(otexstream & ios, OutputParams const & runparams_in) cons // These are the LaTeX and plaintext representations docstring latexstr = ourlatex.str(); docstring plainstr = ourplain.str(); + + // get the escape char from the layout + docstring const escape_char = (runparams.escape_chars.empty()) + ? from_ascii("\"") + : runparams.escape_chars.substr(0,1); // This will get what follows | if anything does, // the command (e.g., see, textbf) for pagination @@ -205,10 +210,10 @@ void InsetIndex::latex(otexstream & ios, OutputParams const & runparams_in) cons cmd = from_utf8(params_.pagefmt); } else { // Check for the | separator to strip the cmd. - // This goes wrong on an escaped "|", but as the escape - // character can be changed in style files, we cannot - // prevent that. + // Consider escaped "|" size_t pos = latexstr.find(from_ascii("|")); + while (pos > 0 && pos < docstring::npos && prefixIs(latexstr.substr(pos - 1), escape_char)) + pos = latexstr.find(from_ascii("|"), pos + 1); if (pos != docstring::npos) { // Put the bit after "|" into cmd... cmd = latexstr.substr(pos + 1); @@ -228,23 +233,22 @@ void InsetIndex::latex(otexstream & ios, OutputParams const & runparams_in) cons getSubentries(otsub, runparams, ourlatex.str()); if (subentries.str().empty()) { // Separate the entries and subentries, i.e., split on "!". - // This goes wrong on an escaped "!", but as the escape - // character can be changed in style files, we cannot - // prevent that. + // Consider escaped "!" + // temporarily replace "! with SUBST character + docstring const sub_latexstring = + subst(latexstr, escape_char + from_ascii("!"), docstring(1, 0x001A)); std::vector const levels = - getVectorFromString(latexstr, from_ascii("!"), true); - std::vector const levels_plain = - getVectorFromString(plainstr, from_ascii("!"), true); + getVectorFromString(sub_latexstring, from_ascii("!"), true); vector::const_iterator it = levels.begin(); vector::const_iterator end = levels.end(); - vector::const_iterator it2 = levels_plain.begin(); bool first = true; for (; it != end; ++it) { - if ((*it).empty()) { + // replace back "! + docstring const thislevel = + subst(*it, docstring(1, 0x001A), escape_char + from_ascii("!")); + if (thislevel.empty()) { emptySubentriesWarning(ourlatex.str()); - if (it2 < levels_plain.end()) - ++it2; continue; } // The separator needs to be put back when @@ -261,15 +265,12 @@ void InsetIndex::latex(otexstream & ios, OutputParams const & runparams_in) cons // e.g. \index{LyX@\LyX}, \index{text@\textbf{text}}. // We do this on all levels. // We don't do it if the level already contains a '@', though. - // Plaintext might return nothing (e.g. for ERTs). - // In that case, we use LaTeX. - docstring const spart = (levels_plain.empty() || (*it2).empty()) ? *it : *it2; - processLatexSorting(os, runparams, *it, spart); - if (it2 < levels_plain.end()) - ++it2; + // We use a somewhat "plain" representation for this + docstring const spart = Encodings::convertLaTeXCommands(thislevel); + processLatexSorting(os, runparams, thislevel, spart, escape_char); } } else { - processLatexSorting(os, runparams, latexstr, plainstr); + processLatexSorting(os, runparams, latexstr, plainstr, escape_char); os << subentries.str(); } @@ -295,9 +296,13 @@ void InsetIndex::latex(otexstream & ios, OutputParams const & runparams_in) cons void InsetIndex::processLatexSorting(otexstream & os, OutputParams const & runparams, - docstring const & latex, docstring const & spart) const + docstring const & latex, docstring const & spart, + docstring const & esc) const { - if (contains(latex, '\\') && !contains(latex, '@')) { + size_t at_pos = latex.find(from_ascii("@")); + while (at_pos > 0 && at_pos < docstring::npos && latex.substr(at_pos - 1, at_pos) == esc) + at_pos = latex.find(from_ascii("|"), at_pos + 1); + if (contains(latex, '\\') && at_pos == docstring::npos) { // Now we need to validate that all characters in // the sorting part are representable in the current // encoding. If not try the LaTeX macro which might diff --git a/src/insets/InsetIndex.h b/src/insets/InsetIndex.h index a1b7507a27..f09596df17 100644 --- a/src/insets/InsetIndex.h +++ b/src/insets/InsetIndex.h @@ -81,7 +81,8 @@ private: void latex(otexstream &, OutputParams const &) const override; /// void processLatexSorting(otexstream &, OutputParams const &, - docstring const &, docstring const &) const; + docstring const &, docstring const &, + docstring const &) const; /// bool showInsetDialog(BufferView *) const override; /// diff --git a/src/insets/InsetLayout.cpp b/src/insets/InsetLayout.cpp index 61e574a9ac..581c01c569 100644 --- a/src/insets/InsetLayout.cpp +++ b/src/insets/InsetLayout.cpp @@ -386,9 +386,11 @@ bool InsetLayout::read(Lexer & lex, TextClass const & tclass, lex >> forceplain_; readCustomOrPlain = true; break; - case IL_ESCAPE_CHARS: - lex >> escape_chars_; + case IL_ESCAPE_CHARS: { + lex.next(true); + escape_chars_ = lex.getDocString(); break; + } case IL_PASSTHRU: lex >> passthru_; break; @@ -928,7 +930,7 @@ void InsetLayout::readArgument(Lexer & lex) } else if (tok == "labelfont") { arg.labelfont = lyxRead(lex, arg.labelfont); } else if (tok == "escapechars") { - lex.next(); + lex.next(true); arg.escape_chars = lex.getDocString(); } else if (tok == "passthruchars") { lex.next(); -- 2.39.5