+ if (mathmode)
+ addMathSym(c);
+ return false;
+ }
+ // at least one of mathcommand and textcommand is nonempty
+ bool use_math = (mathmode && !it->second.mathcommand.empty()) ||
+ (!mathmode && it->second.textcommand.empty());
+ if (use_math) {
+ command = it->second.mathcommand;
+ needsTermination = !it->second.mathnotermination();
+ addMathCmd(c);
+ } else {
+ if (!encoding || command.empty()) {
+ command = it->second.textcommand;
+ needsTermination = !it->second.textnotermination();
+ addTextCmd(c);
+ }
+ if (mathmode)
+ addMathSym(c);
+ }
+ return use_math;
+}
+
+
+char_type Encodings::fromLaTeXCommand(docstring const & cmd, int cmdtype,
+ bool & combining, bool & needsTermination, set<string> * req)
+{
+ CharInfoMap::const_iterator const end = unicodesymbols.end();
+ CharInfoMap::const_iterator it = unicodesymbols.begin();
+ for (combining = false; it != end; ++it) {
+ docstring const math = it->second.mathcommand;
+ docstring const text = it->second.textcommand;
+ if ((cmdtype & MATH_CMD) && math == cmd) {
+ combining = it->second.combining();
+ needsTermination = !it->second.mathnotermination();
+ if (req && it->second.mathfeature() &&
+ !it->second.mathpreamble.empty())
+ req->insert(it->second.mathpreamble);
+ return it->first;
+ }
+ if ((cmdtype & TEXT_CMD) && text == cmd) {
+ combining = it->second.combining();
+ needsTermination = !it->second.textnotermination();
+ if (req && it->second.textfeature() &&
+ !it->second.textpreamble.empty())
+ req->insert(it->second.textpreamble);
+ return it->first;
+ }
+ }
+ needsTermination = false;
+ return 0;
+}
+
+
+docstring Encodings::fromLaTeXCommand(docstring const & cmd, int cmdtype,
+ bool & needsTermination, docstring & rem, set<string> * req)
+{
+ needsTermination = false;
+ rem = empty_docstring();
+ bool const mathmode = cmdtype & MATH_CMD;
+ bool const textmode = cmdtype & TEXT_CMD;
+ docstring symbols;
+ size_t const cmdend = cmd.size();
+ size_t prefix = 0;
+ CharInfoMap::const_iterator const uniend = unicodesymbols.end();
+ for (size_t i = 0, j = 0; j < cmdend; ++j) {
+ // Also get the char after a backslash
+ if (j + 1 < cmdend && cmd[j] == '\\') {
+ ++j;
+ prefix = 1;
+ // Detect things like \=*{e} as well
+ if (j + 3 < cmdend && cmd[j+1] == '*' &&
+ cmd[j+2] == '{') {
+ ++j;
+ prefix = 2;
+ }
+ }
+ // position of the last character before a possible macro
+ // argument
+ size_t m = j;
+ // If a macro argument follows, get it, too
+ // Do it here only for single character commands. Other
+ // combining commands need this too, but they are handled in
+ // the loop below for performance reasons.
+ if (j + 1 < cmdend && cmd[j + 1] == '{') {
+ size_t k = j + 1;
+ int count = 1;
+ while (k < cmdend && count && k != docstring::npos) {
+ k = cmd.find_first_of(from_ascii("{}"), k + 1);
+ if (cmd[k] == '{')
+ ++count;
+ else
+ --count;
+ }
+ if (k != docstring::npos)
+ j = k;
+ } else if (m + 1 < cmdend && isAlphaASCII(cmd[m])) {
+ while (m + 2 < cmdend && isAlphaASCII(cmd[m+1]))
+ m++;
+ }
+ // Start with this substring and try augmenting it when it is
+ // the prefix of some command in the unicodesymbols file
+ docstring subcmd = cmd.substr(i, j - i + 1);
+
+ CharInfoMap::const_iterator it = unicodesymbols.begin();
+ // First part of subcmd which might be a combining character
+ docstring combcmd = (m == j) ? docstring() : cmd.substr(i, m - i + 1);
+ // The combining character of combcmd if it exists
+ CharInfoMap::const_iterator combining = uniend;
+ size_t unicmd_size = 0;
+ char_type c = 0;
+ for (; it != uniend; ++it) {
+ docstring const math = mathmode ? it->second.mathcommand
+ : docstring();
+ docstring const text = textmode ? it->second.textcommand
+ : docstring();
+ if (!combcmd.empty() && it->second.combining() &&
+ (math == combcmd || text == combcmd))
+ combining = it;
+ size_t cur_size = max(math.size(), text.size());
+ // The current math or text unicode command cannot
+ // match, or we already matched a longer one
+ if (cur_size < subcmd.size() || cur_size <= unicmd_size)
+ continue;
+
+ docstring tmp = subcmd;
+ size_t k = j;
+ while (prefixIs(math, tmp) || prefixIs(text, tmp)) {
+ ++k;
+ if (k >= cmdend || cur_size <= tmp.size())
+ break;
+ tmp += cmd[k];
+ }
+ // No match
+ if (k == j)
+ continue;
+
+ // The last added char caused a mismatch, because
+ // we didn't exhaust the chars in cmd and didn't
+ // exceed the maximum size of the current unicmd
+ if (k < cmdend && cur_size > tmp.size())
+ tmp.resize(tmp.size() - 1);
+
+ // If this is an exact match, we found a (longer)
+ // matching entry in the unicodesymbols file.
+ if (math != tmp && text != tmp)
+ continue;
+ // If we found a combining command, we need to append
+ // the macro argument if this has not been done above.
+ if (tmp == combcmd && combining != uniend &&
+ k < cmdend && cmd[k] == '{') {
+ size_t l = k;
+ int count = 1;
+ while (l < cmdend && count && l != docstring::npos) {
+ l = cmd.find_first_of(from_ascii("{}"), l + 1);
+ if (cmd[l] == '{')
+ ++count;
+ else
+ --count;
+ }
+ if (l != docstring::npos) {
+ j = l;
+ subcmd = cmd.substr(i, j - i + 1);
+ }
+ }
+ // If the entry doesn't start with '\', we take note
+ // of the match and continue (this is not a ultimate
+ // acceptance, as some other entry may match a longer
+ // portion of the cmd string). However, if the entry
+ // does start with '\', we accept the match only if
+ // this is a valid macro, i.e., either it is a single
+ // (nonletter) char macro, or nothing else follows,
+ // or what follows is a nonletter char, or the last
+ // character is a }.
+ else if (tmp[0] != '\\'
+ || (tmp.size() == prefix + 1 &&
+ !isAlphaASCII(tmp[1]) &&
+ (prefix == 1 || !isAlphaASCII(tmp[2])))
+ || k == cmdend
+ || !isAlphaASCII(cmd[k])
+ || tmp[tmp.size() - 1] == '}'
+ ) {
+ c = it->first;
+ j = k - 1;
+ i = j + 1;
+ unicmd_size = cur_size;
+ if (math == tmp)
+ needsTermination = !it->second.mathnotermination();
+ else
+ needsTermination = !it->second.textnotermination();
+ if (req) {
+ if (math == tmp && it->second.mathfeature() &&
+ !it->second.mathpreamble.empty())
+ req->insert(it->second.mathpreamble);
+ if (text == tmp && it->second.textfeature() &&
+ !it->second.textpreamble.empty())
+ req->insert(it->second.textpreamble);
+ }
+ }
+ }
+ if (unicmd_size)
+ symbols += c;
+ else if (combining != uniend &&
+ prefixIs(subcmd, combcmd + '{')) {
+ // We know that subcmd starts with combcmd and
+ // contains an argument in braces.
+ docstring const arg = subcmd.substr(
+ combcmd.length() + 1,
+ subcmd.length() - combcmd.length() - 2);
+ // If arg is a single character we can construct a
+ // combining sequence.
+ char_type a;
+ bool argcomb = false;
+ if (arg.size() == 1 && isAlnumASCII(arg[0]))
+ a = arg[0];
+ else {
+ // Use the version of fromLaTeXCommand() that
+ // parses only one command, since we cannot
+ // use more than one character.
+ bool dummy = false;
+ set<string> r;
+ a = fromLaTeXCommand(arg, cmdtype, argcomb,
+ dummy, &r);
+ if (a && req && !argcomb)
+ req->insert(r.begin(), r.end());
+ }
+ if (a && !argcomb) {
+ // In unicode the combining character comes
+ // after its base
+ symbols += a;
+ symbols += combining->first;
+ i = j + 1;
+ unicmd_size = 2;
+ }
+ }
+ if (j + 1 == cmdend && !unicmd_size) {
+ // No luck. Return what remains
+ rem = cmd.substr(i);
+ if (needsTermination && !rem.empty()) {
+ if (rem.substr(0, 2) == "{}") {
+ rem = rem.substr(2);
+ needsTermination = false;
+ } else if (rem[0] == ' ') {
+ needsTermination = false;
+ // LaTeX would swallow all spaces
+ rem = ltrim(rem);
+ }
+ }
+ }
+ }
+ return symbols;
+}
+
+
+void Encodings::initUnicodeMath(Buffer const & buffer, bool for_master)
+{
+#ifdef TEX2LYX
+ // The code below is not needed in tex2lyx and requires additional stuff
+ (void)buffer;
+ (void)for_master;
+#else
+ if (for_master) {
+ mathcmd.clear();
+ textcmd.clear();
+ mathsym.clear();