]> git.lyx.org Git - lyx.git/commitdiff
unicodesymbols: parse supplementary lines to encode alternative ways to encode symbol...
authorThibaut Cuvelier <tcuvelier@lyx.org>
Sat, 19 Feb 2022 01:10:45 +0000 (02:10 +0100)
committerThibaut Cuvelier <tcuvelier@lyx.org>
Sun, 20 Feb 2022 00:33:03 +0000 (01:33 +0100)
lib/unicodesymbols
src/Encoding.cpp
src/Encoding.h
src/insets/InsetERT.cpp

index 043f4dcc43fb3b4ac4fd1efc78ae3a664f3a1b18..1de6910f36e6b9f026926b637227f40bdefdcef0 100644 (file)
@@ -60,6 +60,7 @@
 #
 0x00a0 "~"                        "" "force=cp862;cp1255;cp1256;koi8-u;iso8859-6;iso8859-7;utf8-platex,notermination=both" "~" "" # NO-BREAK SPACE
 0x00a1 "\\textexclamdown"         "" "force=cp862;cp1255;euc-jp;euc-jp-platex;euc-kr;utf8-platex" # INVERTED EXCLAMATION MARK
+0x00a1 "!`" ""
 0x00a2 "\\textcent"               "textcomp" "force=cp862;cp1255;cp1256;euc-jp;euc-jp-platex;jis;shift-jis-platex" #"\\mathcent" "txfonts|pxfonts" # CENT SIGN
 0x00a3 "\\pounds"                 "" "force=cp862;cp1255;cp1256;iso8859-7;euc-jp;euc-jp-platex;jis;shift-jis-platex" "\\pounds" "" # £ POUND SIGN
 0x00a4 "\\textcurrency"           "textcomp" "force=cp1256;euc-cn;euc-jp;euc-jp-platex;euc-kr;gbk;iso8859-6;utf8-platex" # CURRENCY SYMBOL
@@ -90,6 +91,7 @@
 0x00bd "\\textonehalf"            "textcomp" "force=cp862;cp1255;cp1256;iso8859-7;euc-kr" "\\sfrac{1}{2}" "xfrac" # 1/2 FRACTION
 0x00be "\\textthreequarters"      "textcomp" "force=cp1255;cp1256;euc-kr" "\\sfrac{3}{4}" "xfrac" # 3/4 FRACTION
 0x00bf "\\textquestiondown"       "" "force=cp862;cp1255;euc-jp;euc-kr" # INVERTED QUESTION MARK
+0x00bf "?`" ""
 0x00c0 "\\`{A}"                   "" "mathalpha,force=euc-jp" "\\grave{A}" # LATIN CAPITAL LETTER A WITH GRAVE
 0x00c1 "\\'{A}"                   "" "mathalpha,force=euc-jp" "\\acute{A}" # LATIN CAPITAL LETTER A WITH ACUTE
 0x00c2 "\\^{A}"                   "" "mathalpha,force=euc-jp" "\\hat{A}" # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
index b0d56495cdcac2a31235046f60d31b94cb45bd69..4e47daef8b0dc6d5967ece778a3d7c31423d8a90 100644 (file)
@@ -50,7 +50,7 @@ CharInfoMap unicodesymbols;
 typedef set<char_type> CharSet;
 typedef map<string, CharSet> CharSetMap;
 CharSet forced;
-CharSetMap forcedselected;
+CharSetMap forcedSelected;
 
 typedef set<char_type> MathAlphaSet;
 MathAlphaSet mathalpha;
@@ -99,7 +99,7 @@ CharInfo::CharInfo(
 Encoding::Encoding(string const & n, string const & l, string const & g,
                   string const & i, bool f, bool u, Encoding::Package p)
        : name_(n), latexName_(l), guiName_(g), iconvName_(i), fixedwidth_(f),
-         unsafe_(u), forced_(&forcedselected[n]), package_(p)
+         unsafe_(u), forced_(&forcedSelected[n]), package_(p)
 {
        if (n == "ascii") {
                // ASCII can encode 128 code points and nothing else
@@ -705,20 +705,20 @@ void Encodings::read(FileName const & encfile, FileName const & symbolsfile)
 {
        // We must read the symbolsfile first, because the Encoding
        // constructor depends on it.
-       CharSetMap forcednotselected;
-       Lexer symbolslex;
-       symbolslex.setFile(symbolsfile);
+       CharSetMap forcedNotSelected;
+       Lexer symbolsLex;
+       symbolsLex.setFile(symbolsfile);
        bool getNextToken = true;
-       while (symbolslex.isOK()) {
+       while (symbolsLex.isOK()) {
                char_type symbol;
 
                if (getNextToken) {
-                       if (!symbolslex.next(true))
+                       if (!symbolsLex.next(true))
                                break;
                } else
                        getNextToken = true;
 
-               istringstream is(symbolslex.getString());
+               istringstream is(symbolsLex.getString());
                // reading symbol directly does not work if
                // char_type == wchar_t.
                uint32_t tmp;
@@ -726,20 +726,38 @@ void Encodings::read(FileName const & encfile, FileName const & symbolsfile)
                        break;
                symbol = tmp;
 
-               if (!symbolslex.next(true))
+               // Special case: more than one entry for one character (to add other LaTeX commands).
+               if (unicodesymbols.contains(symbol)) {
+                       if (!symbolsLex.next(true))
+                               break;
+                       docstring textCommand = symbolsLex.getDocString();
+                       if (!symbolsLex.next(true))
+                               break;
+                       string mathCommand = symbolsLex.getString();
+
+                       if (!textCommand.empty())
+                               unicodesymbols.at(symbol).addTextCommand(textCommand);
+                       if (!mathCommand.empty())
+                               unicodesymbols.at(symbol).addMathCommand(textCommand);
+
+                       continue;
+               }
+
+               // If the symbol is not the same as the previous entry, consider it is a totally new symbol.
+               if (!symbolsLex.next(true))
                        break;
-               docstring textcommand = symbolslex.getDocString();
-               if (!symbolslex.next(true))
+               docstring textCommand = symbolsLex.getDocString();
+               if (!symbolsLex.next(true))
                        break;
-               string textpreamble = symbolslex.getString();
-               if (!symbolslex.next(true))
+               string textPreamble = symbolsLex.getString();
+               if (!symbolsLex.next(true))
                        break;
-               string sflags = symbolslex.getString();
+               string sflags = symbolsLex.getString();
 
-               string tipashortcut;
+               string tipaShortcut;
                int flags = 0;
 
-               if (suffixIs(textcommand, '}'))
+               if (suffixIs(textCommand, '}'))
                        flags |= CharInfoTextNoTermination;
                while (!sflags.empty()) {
                        string flag;
@@ -753,13 +771,13 @@ void Encodings::read(FileName const & encfile, FileName const & symbolsfile)
                                vector<string> encs =
                                        getVectorFromString(flag.substr(6), ";");
                                for (auto const & enc : encs)
-                                       forcedselected[enc].insert(symbol);
+                                       forcedSelected[enc].insert(symbol);
                                flags |= CharInfoForceSelected;
                        } else if (prefixIs(flag, "force!=")) {
                                vector<string> encs =
                                        getVectorFromString(flag.substr(7), ";");
                                for (auto const & enc : encs)
-                                       forcednotselected[enc].insert(symbol);
+                                       forcedNotSelected[enc].insert(symbol);
                                flags |= CharInfoForceSelected;
                        } else if (flag == "mathalpha") {
                                mathalpha.insert(symbol);
@@ -773,8 +791,8 @@ void Encodings::read(FileName const & encfile, FileName const & symbolsfile)
                        } else if (flag == "notermination=none") {
                                flags &= ~CharInfoTextNoTermination;
                                flags &= ~CharInfoMathNoTermination;
-                       } else if (contains(flag, "tipaShortcut=")) {
-                               tipashortcut = split(flag, '=');
+                       } else if (contains(flag, "tipashortcut=")) {
+                               tipaShortcut = split(flag, '=');
                        } else if (flag == "deprecated") {
                                flags |= CharInfoDeprecated;
                        } else {
@@ -786,25 +804,25 @@ void Encodings::read(FileName const & encfile, FileName const & symbolsfile)
                }
                // mathCommand and mathPreamble have been added for 1.6.0.
                // make them optional so that old files still work.
-               int const lineno = symbolslex.lineNumber();
+               int const lineNo = symbolsLex.lineNumber();
                bool breakout = false;
-               docstring mathcommand;
-               string mathpreamble;
-               if (symbolslex.next(true)) {
-                       if (symbolslex.lineNumber() != lineno) {
+               docstring mathCommand;
+               string mathPreamble;
+               if (symbolsLex.next(true)) {
+                       if (symbolsLex.lineNumber() != lineNo) {
                                // line in old format without mathCommand and mathPreamble
                                getNextToken = false;
                        } else {
-                               mathcommand = symbolslex.getDocString();
-                               if (suffixIs(mathcommand, '}'))
+                               mathCommand = symbolsLex.getDocString();
+                               if (suffixIs(mathCommand, '}'))
                                        flags |= CharInfoMathNoTermination;
-                               if (symbolslex.next(true)) {
-                                       if (symbolslex.lineNumber() != lineno) {
+                               if (symbolsLex.next(true)) {
+                                       if (symbolsLex.lineNumber() != lineNo) {
                                                // line in new format with mathCommand only
                                                getNextToken = false;
                                        } else {
                                                // line in new format with mathCommand and mathPreamble
-                                               mathpreamble = symbolslex.getString();
+                                               mathPreamble = symbolsLex.getString();
                                        }
                                } else
                                        breakout = true;
@@ -814,20 +832,20 @@ void Encodings::read(FileName const & encfile, FileName const & symbolsfile)
                }
 
                // backward compatibility
-               if (mathpreamble == "esintoramsmath")
-                       mathpreamble = "esint|amsmath";
+               if (mathPreamble == "esintoramsmath")
+                       mathPreamble = "esint|amsmath";
 
-               if (!textpreamble.empty())
-                       if (textpreamble[0] != '\\')
+               if (!textPreamble.empty())
+                       if (textPreamble[0] != '\\')
                                flags |= CharInfoTextFeature;
-               if (!mathpreamble.empty())
-                       if (mathpreamble[0] != '\\')
+               if (!mathPreamble.empty())
+                       if (mathPreamble[0] != '\\')
                                flags |= CharInfoMathFeature;
 
                CharInfo info = CharInfo(
-                       textcommand, mathcommand,
-                       textpreamble, mathpreamble,
-                       tipashortcut, flags);
+                               textCommand, mathCommand,
+                               textPreamble, mathPreamble,
+                               tipaShortcut, flags);
                LYXERR(Debug::INFO, "Read unicode symbol " << symbol << " '"
                                                           << to_utf8(info.textCommand()) << "' '" << info.textPreamble()
                                                           << " '" << info.textFeature() << ' ' << info.textNoTermination()
@@ -851,12 +869,12 @@ void Encodings::read(FileName const & encfile, FileName const & symbolsfile)
                et_end
        };
 
-       LexerKeyword encodingtags[] = {
+       LexerKeyword encodingTags[] = {
                { "encoding", et_encoding },
                { "end", et_end }
        };
 
-       Lexer lex(encodingtags);
+       Lexer lex(encodingTags);
        lex.setFile(encfile);
        lex.setContext("Encodings::read");
        while (lex.isOK()) {
@@ -866,21 +884,21 @@ void Encodings::read(FileName const & encfile, FileName const & symbolsfile)
                        lex.next();
                        string const name = lex.getString();
                        lex.next();
-                       string const latexname = lex.getString();
+                       string const latexName = lex.getString();
                        lex.next();
-                       string const guiname = lex.getString();
+                       string const guiName = lex.getString();
                        lex.next();
-                       string const iconvname = lex.getString();
+                       string const iconvName = lex.getString();
                        lex.next();
                        string const width = lex.getString();
-                       bool fixedwidth = false;
+                       bool fixedWidth = false;
                        bool unsafe = false;
                        if (width == "fixed")
-                               fixedwidth = true;
+                               fixedWidth = true;
                        else if (width == "variable")
-                               fixedwidth = false;
+                               fixedWidth = false;
                        else if (width == "variableunsafe") {
-                               fixedwidth = false;
+                               fixedWidth = false;
                                unsafe = true;
                        }
                        else
@@ -901,9 +919,9 @@ void Encodings::read(FileName const & encfile, FileName const & symbolsfile)
                                lex.printError("Unknown package");
 
                        LYXERR(Debug::INFO, "Reading encoding " << name);
-                       encodinglist[name] = Encoding(name, latexname,
-                               guiname, iconvname, fixedwidth, unsafe,
-                               package);
+                       encodinglist[name] = Encoding(name, latexName,
+                                                     guiName, iconvName, fixedWidth, unsafe,
+                                                     package);
 
                        if (lex.lex() != et_end)
                                lex.printError("Missing end");
@@ -920,9 +938,9 @@ void Encodings::read(FileName const & encfile, FileName const & symbolsfile)
                }
        }
 
-       // Move all information from forcednotselected to forcedselected
-       for (CharSetMap::const_iterator it1 = forcednotselected.begin(); it1 != forcednotselected.end(); ++it1) {
-               for (CharSetMap::iterator it2 = forcedselected.begin(); it2 != forcedselected.end(); ++it2) {
+       // Move all information from forcedNotSelected to forcedSelected
+       for (CharSetMap::const_iterator it1 = forcedNotSelected.begin(); it1 != forcedNotSelected.end(); ++it1) {
+               for (CharSetMap::iterator it2 = forcedSelected.begin(); it2 != forcedSelected.end(); ++it2) {
                        if (it2->first != it1->first)
                                it2->second.insert(it1->second.begin(), it1->second.end());
                }
index 2bfbda051d68b7f00c217785c6f78cdea6aada66..6016f05f02e342391ef2b0cdc57f89ceadc94801 100644 (file)
@@ -69,6 +69,11 @@ public:
                std::vector<docstring> const & text_commands, std::vector<docstring> const & math_commands,
                std::string const & text_preamble, std::string const & math_preamble,
                std::string const & tipa_shortcut, unsigned int flags);
+       // Add a new text command for this symbol.
+       void addTextCommand(const docstring& newTextCommand) { text_commands_.emplace_back(newTextCommand); }
+       // Add a new math command for this symbol.
+       void addMathCommand(const docstring& newMathCommand) { math_commands_.emplace_back(newMathCommand); }
+
        // we assume that at least one command is nonempty when using unicodesymbols
        bool isUnicodeSymbol() const { return !text_commands_.empty() || !math_commands_.empty(); }
        /// LaTeX command (text mode) for this character
index 00f497f0319f258556ea3b6b77a2aabf2bcb958b..b3e73ffdede35656a53a29c8412594edf631521b 100644 (file)
@@ -158,6 +158,8 @@ void InsetERT::docbook(XMLStream & xs, OutputParams const & runparams) const
                        os_trimmed.insert(4, from_ascii("}"));
                }
 
+               std::cout << to_utf8(os_trimmed) << std::endl;
+
                // Look into the global table of Unicode characters if there is a match.
                bool termination;
                docstring rem;