From 0b2fae66e32bb626611e7ce055c2cd8f41d6e151 Mon Sep 17 00:00:00 2001 From: Juergen Spitzmueller Date: Sat, 28 Apr 2018 13:31:29 +0200 Subject: [PATCH] unicodesymbols: add general way to require a feature only for specific encodings A feature can now be required only for specific input or font encodings: - =enc1;enc2... Require the feature only if the character is used in one if the specified font or input encodings. - !=enc1;enc2... Require the feature only if the character is used in a font or input encoding that is not among the specified. --- lib/unicodesymbols | 74 ++++++++++++++++++++++------------------- src/BufferEncodings.cpp | 5 ++- src/Encoding.cpp | 7 ++-- src/LaTeXFeatures.cpp | 8 ++--- src/Paragraph.cpp | 39 +++++++++++++++++++--- 5 files changed, 85 insertions(+), 48 deletions(-) diff --git a/lib/unicodesymbols b/lib/unicodesymbols index 29fdb11444..c4f2303ccc 100644 --- a/lib/unicodesymbols +++ b/lib/unicodesymbols @@ -25,30 +25,36 @@ # syntax: # ucs4 textcommand textpreamble flags mathcommand mathpreamble -# textcommand and textpreamble are used if the symbol occurs in textmode. -# mathcommand and mathpreamble are used if the symbol occurs in mathmode. -# Both mathcommand and mathpreamble are optional. -# textpreamble and mathpreamble can either be a feature known by the LaTeXFeatures -# class (e.g. tipa), or a LaTeX command (e.g. \\usepackage{bla}). -# Features may be combined using '|', in this case one of the alternatives is -# chosen. The algorithm tries to satisfy as many requirements as possible. -# Therefore it may depend on the whole document contents which feature is chosen. -# Known flags: -# - combining This is a combining char that will get combined with a base char -# - force Always output replacement command -# - force=enc1;enc2... Always output replacement command in the specified encodings. -# - force!=en1;en2... Always output replacement command in all but the specified encodings. -# Symbols are never forced in encodings with iconv name -# UTF-8 and package none (currently only utf8-plain). -# - mathalpha This character is considered as a math variable in mathmode -# - notermination=text Do not terminate this textcommand (by {} or space). -# This is set by default if textcommand ends with }. -# - notermination=math Do not terminate this mathcommand (by {} or space). -# This is set by default if mathcommand ends with }. -# - notermination=both Do not terminate this textcommand and mathcommand (by {} or space). -# - notermination=none Always terminate this textcommand and mathcommand (by {} or space). -# - tipashortcut= Shortcut notation for TIPA -# - deprecated Do not use this symbol for backwards conversion in LyX and tex2lyx. +# +# * textcommand and textpreamble are used if the symbol occurs in textmode. +# * mathcommand and mathpreamble are used if the symbol occurs in mathmode. +# * Both mathcommand and mathpreamble are optional. +# * textpreamble and mathpreamble can either be a feature known by the LaTeXFeatures +# class (e.g. tipa), or a LaTeX command (e.g. \\usepackage{bla}). +# * Features may be combined using '|', in this case one of the alternatives is +# chosen. The algorithm tries to satisfy as many requirements as possible. +# Therefore it may depend on the whole document contents which feature is chosen. +# * A feature can be required only for specific input encodings or font encodings: +# - =enc1;enc2... Require the feature only if the character is used in +# one if the specified font or input encodings. +# - !=enc1;enc2... Require the feature only if the character is used in +# a font or input encoding that is not among the specified. +# * Known flags: +# - combining This is a combining char that will get combined with a base char +# - force Always output replacement command +# - force=enc1;enc2... Always output replacement command in the specified encodings. +# - force!=enc1;enc2... Always output replacement command in all but the specified encodings. +# Symbols are never forced in encodings with iconv name +# UTF-8 and package none (currently only utf8-plain). +# - mathalpha This character is considered as a math variable in mathmode +# - notermination=text Do not terminate this textcommand (by {} or space). +# This is set by default if textcommand ends with }. +# - notermination=math Do not terminate this mathcommand (by {} or space). +# This is set by default if mathcommand ends with }. +# - notermination=both Do not terminate this textcommand and mathcommand (by {} or space). +# - notermination=none Always terminate this textcommand and mathcommand (by {} or space). +# - tipashortcut= Shortcut notation for TIPA +# - deprecated Do not use this symbol for backwards conversion in LyX and tex2lyx. # # 2 Latin-1 Supplement @@ -186,8 +192,8 @@ 0x011f "\\u{g}" "" "mathalpha" "\\breve{g}" # LATIN SMALL LETTER G WITH BREVE 0x0120 "\\.{G}" "" "mathalpha" "\\dot{G}" # LATIN CAPITAL LETTER G WITH DOT ABOVE 0x0121 "\\.{g}" "" "mathalpha" "\\dot{g}" # LATIN SMALL LETTER G WITH DOT ABOVE -0x0122 "\\c{G}" "textbaltic" "mathalpha,force=utf8" "\\cedilla{G}" "accents,cedilla" # LATIN CAPITAL LETTER G WITH CEDILLA (actually a comma accent, Latvian) -0x0123 "\\c{g}" "textbaltic" "mathalpha,force=utf8;utf8x,notermination=math" "\\mathaccent96 g" "" # LATIN SMALL LETTER G WITH CEDILLA (actually a comma above accent, Latvian) +0x0122 "\\c{G}" "textbaltic!=L7x" "mathalpha,force=utf8" "\\cedilla{G}" "accents,cedilla" # LATIN CAPITAL LETTER G WITH CEDILLA (actually a comma accent, Latvian) +0x0123 "\\c{g}" "textbaltic!=L7x" "mathalpha,force=utf8;utf8x,notermination=math" "\\mathaccent96 g" "" # LATIN SMALL LETTER G WITH CEDILLA (actually a comma above accent, Latvian) 0x0124 "\\^{H}" "" "mathalpha" "\\hat{H}" # LATIN CAPITAL LETTER H WITH CIRCUMFLEX 0x0125 "\\^{h}" "" "mathalpha" "\\hat{h}" # LATIN SMALL LETTER H WITH CIRCUMFLEX #0x0126 "" "" "" "" "" # LATIN CAPITAL LETTER H WITH STROKE @@ -206,13 +212,13 @@ 0x0133 "ij" "" "mathalpha,force=utf8x,notermination=both" "ij" "" # LATIN SMALL LIGATURE IJ 0x0134 "\\^{J}" "" "mathalpha" "\\hat{J}" # LATIN CAPITAL LETTER J WITH CIRCUMFLEX 0x0135 "\\^{\\j}" "" "mathalpha,force=utf8" "\\hat{\\jmath}" "" # LATIN SMALL LETTER J WITH CIRCUMFLEX -0x0136 "\\c{K}" "textbaltic" "mathalpha,force=utf8" "\\cedilla{K}" "accents,cedilla" # LATIN CAPITAL LETTER K WITH CEDILLA (actually a comma accent, Latvian) -0x0137 "\\c{k}" "textbaltic" "mathalpha,force=utf8" "\\cedilla{k}" "accents,cedilla" # LATIN SMALL LETTER K WITH CEDILLA (actually a comma accent, Latvian) +0x0136 "\\c{K}" "textbaltic!=L7x" "mathalpha,force=utf8" "\\cedilla{K}" "accents,cedilla" # LATIN CAPITAL LETTER K WITH CEDILLA (actually a comma accent, Latvian) +0x0137 "\\c{k}" "textbaltic!=L7x" "mathalpha,force=utf8" "\\cedilla{k}" "accents,cedilla" # LATIN SMALL LETTER K WITH CEDILLA (actually a comma accent, Latvian) #0x0138 "" "" "" "" "" # LATIN SMALL LETTER KRA 0x0139 "\\'{L}" "" "mathalpha" "\\acute{L}" # LATIN CAPITAL LETTER L WITH ACUTE 0x013a "\\'{l}" "" "mathalpha" "\\acute{l}" # LATIN SMALL LETTER L WITH ACUTE -0x013b "\\c{L}" "textbaltic" "mathalpha,force=utf8" "\\cedilla{L}" "accents,cedilla" # LATIN CAPITAL LETTER L WITH CEDILLA (actually a comma accent, Latvian) -0x013c "\\c{l}" "textbaltic" "mathalpha,force=utf8" "\\cedilla{l}" "accents,cedilla" # LATIN SMALL LETTER L WITH CEDILLA (actually a comma accent, Latvian) +0x013b "\\c{L}" "textbaltic!=L7x" "mathalpha,force=utf8" "\\cedilla{L}" "accents,cedilla" # LATIN CAPITAL LETTER L WITH CEDILLA (actually a comma accent, Latvian) +0x013c "\\c{l}" "textbaltic!=L7x" "mathalpha,force=utf8" "\\cedilla{l}" "accents,cedilla" # LATIN SMALL LETTER L WITH CEDILLA (actually a comma accent, Latvian) 0x013d "\\v{L}" "" "mathalpha" "L\\mkern-7mu\\mathchar19" # LATIN CAPITAL LETTER L WITH CARON 0x013e "\\v{l}" "" "mathalpha" "l\\mkern-5mu\\mathchar19" # LATIN SMALL LETTER L WITH CARON #0x013f "L\\textperiodcentered" "" "" "" "" # LATIN CAPITAL LETTER L WITH MIDDLE DOT @@ -221,8 +227,8 @@ 0x0142 "\\l" "" "mathalpha,notermination=math" "\\mathchar'40\\mkern-5mu l" # LATIN SMALL LETTER L WITH STROKE 0x0143 "\\'{N}" "" "mathalpha" "\\acute{N}" # LATIN CAPITAL LETTER N WITH ACUTE 0x0144 "\\'{n}" "" "mathalpha" "\\acute{n}" # LATIN SMALL LETTER N WITH ACUTE -0x0145 "\\c{N}" "textbaltic" "mathalpha,force=utf8" "\\cedilla{N}" "accents,cedilla" # LATIN CAPITAL LETTER N WITH CEDILLA (actually a comma accent, Latvian) -0x0146 "\\c{n}" "textbaltic" "mathalpha,force=utf8" "\\cedilla{n}" "accents,cedilla" # LATIN SMALL LETTER N WITH CEDILLA (actually a comma accent, Latvian) +0x0145 "\\c{N}" "textbaltic!=L7x" "mathalpha,force=utf8" "\\cedilla{N}" "accents,cedilla" # LATIN CAPITAL LETTER N WITH CEDILLA (actually a comma accent, Latvian) +0x0146 "\\c{n}" "textbaltic!=L7x" "mathalpha,force=utf8" "\\cedilla{n}" "accents,cedilla" # LATIN SMALL LETTER N WITH CEDILLA (actually a comma accent, Latvian) 0x0147 "\\v{N}" "" "mathalpha" "\\check{N}" # LATIN CAPITAL LETTER N WITH CARON 0x0148 "\\v{n}" "" "mathalpha" "\\check{n}" # LATIN SMALL LETTER N WITH CARON 0x0149 "'n" "" "force=utf8;utf8x,deprecated" "" "" # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE @@ -238,8 +244,8 @@ 0x0153 "\\oe" "" "" "" "" # LATIN SMALL LIGATURE OE 0x0154 "\\'{R}" "" "mathalpha" "\\acute{R}" # LATIN CAPITAL LETTER R WITH ACUTE 0x0155 "\\'{r}" "" "mathalpha" "\\acute{r}" # LATIN SMALL LETTER R WITH ACUTE -0x0156 "\\c{R}" "textbaltic" "mathalpha,force=utf8" "\\cedilla{R}" "accents,cedilla" # LATIN CAPITAL LETTER R WITH CEDILLA (actually a comma accent, Latvian) -0x0157 "\\c{r}" "textbaltic" "mathalpha,force=utf8" "\\cedilla{r}" "accents,cedilla" # LATIN SMALL LETTER R WITH CEDILLA (actually a comma accent, Latvian) +0x0156 "\\c{R}" "textbaltic!=L7x" "mathalpha,force=utf8" "\\cedilla{R}" "accents,cedilla" # LATIN CAPITAL LETTER R WITH CEDILLA (actually a comma accent, Latvian) +0x0157 "\\c{r}" "textbaltic!=L7x" "mathalpha,force=utf8" "\\cedilla{r}" "accents,cedilla" # LATIN SMALL LETTER R WITH CEDILLA (actually a comma accent, Latvian) 0x0158 "\\v{R}" "" "mathalpha" "\\check{R}" # LATIN CAPITAL LETTER R WITH CARON 0x0159 "\\v{r}" "" "mathalpha" "\\check{r}" # LATIN SMALL LETTER R WITH CARON 0x015a "\\'{S}" "" "mathalpha" "\\acute{S}" # LATIN CAPITAL LETTER S WITH ACUTE diff --git a/src/BufferEncodings.cpp b/src/BufferEncodings.cpp index 2cd07f2c29..82fd3dc715 100644 --- a/src/BufferEncodings.cpp +++ b/src/BufferEncodings.cpp @@ -93,7 +93,10 @@ void BufferEncodings::validate(char_type c, LaTeXFeatures & features, bool for_m while (!feats.empty()) { string feat; feats = split(feats, feat, ','); - features.require(feat); + // context-dependent features are handled + // in Paragraph::Private::validate() + if (!contains(feat, '=')) + features.require(feat); } } else features.addPreambleSnippet(from_utf8(textpreamble)); diff --git a/src/Encoding.cpp b/src/Encoding.cpp index f6197af13d..026ad3c43e 100644 --- a/src/Encoding.cpp +++ b/src/Encoding.cpp @@ -589,11 +589,10 @@ bool Encodings::isKnownScriptChar(char_type const c, string & preamble) return false; if (it->second.textpreamble() != "textgreek" - && it->second.textpreamble() != "textcyrillic" - && it->second.textpreamble() != "textbaltic") + && it->second.textpreamble() != "textcyrillic") return false; - if (preamble.empty() && it->second.textpreamble() != "textbaltic") { + if (preamble.empty()) { preamble = it->second.textpreamble(); return true; } @@ -609,8 +608,6 @@ bool Encodings::needsScriptWrapper(string const & script, string const & fontenc return (fontenc != "T2A" && fontenc != "T2B" && fontenc != "T2C" && fontenc != "X2"); } - if (script == "textbaltic") - return (fontenc != "L7x"); return false; } diff --git a/src/LaTeXFeatures.cpp b/src/LaTeXFeatures.cpp index 49b93f615c..17d7ae4047 100644 --- a/src/LaTeXFeatures.cpp +++ b/src/LaTeXFeatures.cpp @@ -1388,19 +1388,19 @@ TexString LaTeXFeatures::getMacros() const // non-standard text accents: if (mustProvide("textcommaabove") || mustProvide("textcommaaboveright") || - mustProvide("textcommabelow") || mustProvide("textbalticdefs")) + mustProvide("textcommabelow") || mustProvide("textbaltic")) macros << lyxaccent_def; - if (mustProvide("textcommabelow") || mustProvide("textbalticdefs")) + if (mustProvide("textcommabelow") || mustProvide("textbaltic")) macros << textcommabelow_def << '\n'; - if (mustProvide("textcommaabove") || mustProvide("textbalticdefs")) + if (mustProvide("textcommaabove") || mustProvide("textbaltic")) macros << textcommaabove_def << '\n'; if (mustProvide("textcommaaboveright")) macros << textcommaaboveright_def << '\n'; - if (mustProvide("textbalticdefs")) + if (mustProvide("textbaltic")) macros << textbaltic_def << '\n'; // split-level fractions diff --git a/src/Paragraph.cpp b/src/Paragraph.cpp index dc41fb9e96..5fdfb157d0 100644 --- a/src/Paragraph.cpp +++ b/src/Paragraph.cpp @@ -1553,21 +1553,52 @@ void Paragraph::Private::validate(LaTeXFeatures & features) const // then the contents BufferParams const bp = features.runparams().is_child ? features.buffer().masterParams() : features.buffer().params(); - string bscript = "textbaltic"; for (pos_type i = 0; i < int(text_.size()) ; ++i) { char_type c = text_[i]; + CharInfo const & ci = Encodings::unicodeCharInfo(c); if (c == 0x0022) { if (features.runparams().isFullUnicode() && bp.useNonTeXFonts) features.require("textquotedblp"); else if (bp.main_font_encoding() != "T1" || ((&owner_->getFontSettings(bp, i))->language()->internalFontEncoding())) features.require("textquotedbl"); - } else if (Encodings::isKnownScriptChar(c, bscript)){ + } else if (ci.textfeature() && contains(ci.textpreamble(), '=')) { + // features that depend on the font or input encoding + string feats = ci.textpreamble(); string fontenc = (&owner_->getFontSettings(bp, i))->language()->fontenc(bp); if (fontenc.empty()) fontenc = features.runparams().main_fontenc; - if (Encodings::needsScriptWrapper("textbaltic", fontenc)) - features.require("textbalticdefs"); + while (!feats.empty()) { + string feat; + feats = split(feats, feat, ','); + if (contains(feat, "!=")) { + // a feature that is required except for the spcified + // font or input encodings + string realfeature; + string const contexts = ltrim(split(feat, realfeature, '!'), "="); + // multiple encodings are separated by semicolon + vector context = getVectorFromString(contexts, ";"); + // require feature if the context matches neither current font + // nor input encoding + if (std::find(context.begin(), context.end(), fontenc) == context.end() + && std::find(context.begin(), context.end(), + features.runparams().encoding->name()) == context.end()) + features.require(realfeature); + } else if (contains(feat, '=')) { + // a feature that is required only for the spcified + // font or input encodings + string realfeature; + string const contexts = split(feat, realfeature, '='); + // multiple encodings are separated by semicolon + vector context = getVectorFromString(contexts, ";"); + // require feature if the context matches either current font + // or input encoding + if (std::find(context.begin(), context.end(), fontenc) != context.end() + || std::find(context.begin(), context.end(), + features.runparams().encoding->name()) != context.end()) + features.require(realfeature); + } + } } else if (!bp.use_dash_ligatures && (c == 0x2013 || c == 0x2014) && bp.useNonTeXFonts -- 2.39.5