X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=src%2FEncoding.cpp;h=7900aed06f631fd85cf572c2857b5d29ef6cdbc8;hb=a5be70af87123df1af5e2cdd1d713581cd69ba37;hp=99d216f418c2048e70b2422bd49574d4613d13e1;hpb=a2906cb877b6fb737502f4f894e326685f254953;p=lyx.git diff --git a/src/Encoding.cpp b/src/Encoding.cpp index 99d216f418..7900aed06f 100644 --- a/src/Encoding.cpp +++ b/src/Encoding.cpp @@ -3,7 +3,7 @@ * This file is part of LyX, the document processor. * Licence details can be found in the file COPYING. * - * \author Lars Gullik Bjønnes + * \author Lars Gullik Bjønnes * \author Jean-Marc Lasgouttes * \author Dekel Tsur * @@ -14,6 +14,9 @@ #include "Encoding.h" +#include "Buffer.h" +#include "BufferList.h" +#include "InsetIterator.h" #include "LaTeXFeatures.h" #include "Lexer.h" #include "LyXRC.h" @@ -21,6 +24,7 @@ #include "support/debug.h" #include "support/FileName.h" #include "support/lstrings.h" +#include "support/textutils.h" #include "support/unicode.h" #include @@ -34,6 +38,10 @@ namespace lyx { Encodings encodings; +Encodings::MathCommandSet Encodings::mathcmd; +Encodings::TextCommandSet Encodings::textcmd; +Encodings::MathSymbolSet Encodings::mathsym; + namespace { char_type arabic_table[172][4] = { @@ -250,6 +258,9 @@ CharInfoMap unicodesymbols; typedef std::set CharSet; CharSet forced; +typedef std::set MathAlphaSet; +MathAlphaSet mathalpha; + /// The highest code point in UCS4 encoding (1<<20 + 1<<16) char_type const max_ucs4 = 0x110000; @@ -333,7 +344,7 @@ void Encoding::init() const } -docstring Encoding::latexChar(char_type c, bool for_mathed) const +docstring Encoding::latexChar(char_type c, bool no_commands) const { // assure the used encoding is properly initialized init(); @@ -344,7 +355,7 @@ docstring Encoding::latexChar(char_type c, bool for_mathed) const return docstring(1, c); if (encodable_.find(c) != encodable_.end()) return docstring(1, c); - if (for_mathed) + if (no_commands) return docstring(); // c cannot (or should not) be encoded in this encoding @@ -377,25 +388,35 @@ vector Encoding::symbolsList() const } -bool Encodings::latexMathChar(char_type c, Encoding const * encoding, - docstring & command) +bool Encodings::latexMathChar(char_type c, bool mathmode, + Encoding const * encoding, docstring & command) { - if (encoding) { + if (encoding) command = encoding->latexChar(c, true); - if (!command.empty()) - return false; - } + CharInfoMap::const_iterator const it = unicodesymbols.find(c); - if (it == unicodesymbols.end()) - throw EncodingException(c); - if (it->second.mathcommand.empty()) { - if (it->second.textcommand.empty()) + if (it == unicodesymbols.end()) { + if (!encoding || command.empty()) throw EncodingException(c); - command = it->second.textcommand; + if (mathmode) + addMathSym(c); return false; } - command = it->second.mathcommand; - return true; + // at least one of mathcommand and textcommand is nonempty + bool use_math = (mathmode && !it->second.mathcommand.empty()) || + (!mathmode && it->second.textcommand.empty()); + if (use_math) { + command = it->second.mathcommand; + addMathCmd(c); + } else { + if (!encoding || command.empty()) { + command = it->second.textcommand; + addTextCmd(c); + } + if (mathmode) + addMathSym(c); + } + return use_math; } @@ -403,7 +424,7 @@ char_type Encodings::fromLaTeXCommand(docstring const & cmd, bool & combining) { CharInfoMap::const_iterator const end = unicodesymbols.end(); CharInfoMap::const_iterator it = unicodesymbols.begin(); - for (; it != end; ++it) { + for (combining = false; it != end; ++it) { docstring const math = it->second.mathcommand; docstring const text = it->second.textcommand; if (math == cmd || text == cmd) { @@ -415,8 +436,11 @@ char_type Encodings::fromLaTeXCommand(docstring const & cmd, bool & combining) } -docstring Encodings::fromLaTeXCommand(docstring const & cmd, docstring & rem) +docstring Encodings::fromLaTeXCommand(docstring const & cmd, docstring & rem, + int cmdtype) { + bool const mathmode = cmdtype & MATH_CMD; + bool const textmode = cmdtype & TEXT_CMD; docstring symbols; size_t i = 0; size_t const cmdend = cmd.size(); @@ -447,8 +471,10 @@ docstring Encodings::fromLaTeXCommand(docstring const & cmd, docstring & rem) size_t unicmd_size = 0; char_type c = 0; for (; it != uniend; ++it) { - docstring const math = it->second.mathcommand; - docstring const text = it->second.textcommand; + docstring const math = mathmode ? it->second.mathcommand + : docstring(); + docstring const text = textmode ? it->second.textcommand + : docstring(); size_t cur_size = max(math.size(), text.size()); // The current math or text unicode command cannot // match, or we already matched a longer one @@ -474,8 +500,23 @@ docstring Encodings::fromLaTeXCommand(docstring const & cmd, docstring & rem) tmp.resize(tmp.size() - 1); // If this is an exact match, we found a (longer) - // matching command in the unicodesymbols file - if (math == tmp || text == tmp) { + // matching entry in the unicodesymbols file. + // If the entry doesn't start with '\', we take note + // of the match and continue (this is not a ultimate + // acceptance, as some other entry may match a longer + // portion of the cmd string). However, if the entry + // does start with '\', we accept the match only if + // this is a valid macro, i.e., either it is a single + // (nonletter) char macro, or nothing else follows, + // or what follows is a nonletter char, or the last + // character is a }. + if ((math == tmp || text == tmp) + && (tmp[0] != '\\' + || (tmp.size() == 2 && !isAlphaASCII(tmp[1])) + || k == cmdend + || !isAlphaASCII(cmd[k]) + || tmp[tmp.size() - 1] == '}') + ) { c = it->first; j = k - 1; i = j + 1; @@ -492,33 +533,83 @@ docstring Encodings::fromLaTeXCommand(docstring const & cmd, docstring & rem) } +void Encodings::initUnicodeMath(Buffer const & buffer, bool clear_sets) +{ +#ifdef TEX2LYX + // The code below is not needed in tex2lyx and requires additional stuff + (void)buffer; + (void)clear_sets; +#else + if (clear_sets) { + mathcmd.clear(); + textcmd.clear(); + mathsym.clear(); + } + + // Check master + Inset & inset = buffer.inset(); + InsetIterator it = inset_iterator_begin(inset); + InsetIterator const end = inset_iterator_end(inset); + for (; it != end; ++it) + it->initUnicodeMath(); + + // Check children + BufferList::iterator bit = theBufferList().begin(); + BufferList::iterator const bend = theBufferList().end(); + for (; bit != bend; ++bit) + if (buffer.isChild(*bit)) + initUnicodeMath(**bit, false); +#endif +} + + void Encodings::validate(char_type c, LaTeXFeatures & features, bool for_mathed) { +#ifdef TEX2LYX + // The code below is not needed in tex2lyx and requires additional stuff + (void)c; + (void)features; + (void)for_mathed; +#else CharInfoMap::const_iterator const it = unicodesymbols.find(c); if (it != unicodesymbols.end()) { - // at least one of mathcommand and textcommand is nonempty - bool const use_math = (for_mathed && !it->second.mathcommand.empty()) || + // In mathed, c could be used both in textmode and mathmode + bool const use_math = (for_mathed && isMathCmd(c)) || (!for_mathed && it->second.textcommand.empty()); + bool const use_text = (for_mathed && isTextCmd(c)) || + (!for_mathed && !it->second.textcommand.empty()); if (use_math) { if (!it->second.mathpreamble.empty()) { - if (it->second.mathfeature) - features.require(it->second.mathpreamble); - else + if (it->second.mathfeature) { + string feats = it->second.mathpreamble; + while (!feats.empty()) { + string feat; + feats = split(feats, feat, ','); + features.require(feat); + } + } else features.addPreambleSnippet(it->second.mathpreamble); } - } else { + } + if (use_text) { if (!it->second.textpreamble.empty()) { - if (it->second.textfeature) - features.require(it->second.textpreamble); - else + if (it->second.textfeature) { + string feats = it->second.textpreamble; + while (!feats.empty()) { + string feat; + feats = split(feats, feat, ','); + features.require(feat); + } + } else features.addPreambleSnippet(it->second.textpreamble); } - if (for_mathed) { - features.require("relsize"); - features.require("lyxmathsym"); - } } } + if (for_mathed && isMathSym(c)) { + features.require("amstext"); + features.require("lyxmathsym"); + } +#endif } @@ -592,6 +683,12 @@ bool Encodings::isForced(char_type c) } +bool Encodings::isMathAlpha(char_type c) +{ + return mathalpha.count(c); +} + + Encoding const * Encodings::fromLyXName(string const & name) const { EncodingList::const_iterator const it = encodinglist.find(name); @@ -599,8 +696,14 @@ Encoding const * Encodings::fromLyXName(string const & name) const } -Encoding const * Encodings::fromLaTeXName(string const & name) const +Encoding const * Encodings::fromLaTeXName(string const & n) const { + string name = n; + // FIXME: if we have to test for too many of these synonyms, + // we should instead extend the format of lib/encodings + if (n == "ansinew") + name = "cp1252"; + // We don't use find_if because it makes copies of the pairs in // the map. // This linear search is OK since we don't have many encodings. @@ -661,16 +764,19 @@ void Encodings::read(FileName const & encfile, FileName const & symbolsfile) while (!flags.empty()) { string flag; flags = split(flags, flag, ','); - if (flag == "combining") + if (flag == "combining") { info.combining = true; - else if (flag == "force") { + } else if (flag == "force") { info.force = true; forced.insert(symbol); - } else + } else if (flag == "mathalpha") { + mathalpha.insert(symbol); + } else { lyxerr << "Ignoring unknown flag `" << flag << "' for symbol `0x" << hex << symbol << dec << "'." << endl; + } } // mathcommand and mathpreamble have been added for 1.6.0. // make them optional so that old files still work. @@ -719,7 +825,7 @@ void Encodings::read(FileName const & encfile, FileName const & symbolsfile) // Now read the encodings enum { et_encoding = 1, - et_end, + et_end }; LexerKeyword encodingtags[] = {