bool const mathmode = cmdtype & MATH_CMD;
bool const textmode = cmdtype & TEXT_CMD;
docstring symbols;
- size_t i = 0;
size_t const cmdend = cmd.size();
+ size_t prefix = 0;
CharInfoMap::const_iterator const uniend = unicodesymbols.end();
- for (size_t j = 0; j < cmdend; ++j) {
+ for (size_t i = 0, j = 0; j < cmdend; ++j) {
// Also get the char after a backslash
- if (j + 1 < cmdend && cmd[j] == '\\')
+ if (j + 1 < cmdend && cmd[j] == '\\') {
++j;
+ prefix = 1;
+ // Detect things like \=*{e} as well
+ if (j + 3 < cmdend && cmd[j+1] == '*' &&
+ cmd[j+2] == '{') {
+ ++j;
+ prefix = 2;
+ }
+ }
+ // position of the last character before a possible macro
+ // argument
+ size_t m = j;
// If a macro argument follows, get it, too
+ // Do it here only for single character commands. Other
+ // combining commands need this too, but they are handled in
+ // the loop below for performance reasons.
if (j + 1 < cmdend && cmd[j + 1] == '{') {
size_t k = j + 1;
int count = 1;
}
if (k != docstring::npos)
j = k;
+ } else if (m + 1 < cmdend && isAlphaASCII(cmd[m])) {
+ while (m + 2 < cmdend && isAlphaASCII(cmd[m+1]))
+ m++;
}
// Start with this substring and try augmenting it when it is
// the prefix of some command in the unicodesymbols file
- docstring const subcmd = cmd.substr(i, j - i + 1);
+ docstring subcmd = cmd.substr(i, j - i + 1);
CharInfoMap::const_iterator it = unicodesymbols.begin();
+ // First part of subcmd which might be a combining character
+ docstring combcmd = (m == j) ? docstring() : cmd.substr(i, m - i + 1);
+ // The combining character of combcmd if it exists
+ CharInfoMap::const_iterator combining = uniend;
size_t unicmd_size = 0;
char_type c = 0;
for (; it != uniend; ++it) {
: docstring();
docstring const text = textmode ? it->second.textcommand
: docstring();
+ if (!combcmd.empty() && it->second.combining() &&
+ (math == combcmd || text == combcmd))
+ combining = it;
size_t cur_size = max(math.size(), text.size());
// The current math or text unicode command cannot
// match, or we already matched a longer one
// If this is an exact match, we found a (longer)
// matching entry in the unicodesymbols file.
+ if (math != tmp && text != tmp)
+ continue;
+ // If we found a combining command, we need to append
+ // the macro argument if this has not been done above.
+ if (tmp == combcmd && combining != uniend &&
+ k < cmdend && cmd[k] == '{') {
+ size_t l = k;
+ int count = 1;
+ while (l < cmdend && count && l != docstring::npos) {
+ l = cmd.find_first_of(from_ascii("{}"), l + 1);
+ if (cmd[l] == '{')
+ ++count;
+ else
+ --count;
+ }
+ if (l != docstring::npos) {
+ j = l;
+ subcmd = cmd.substr(i, j - i + 1);
+ }
+ }
// If the entry doesn't start with '\', we take note
// of the match and continue (this is not a ultimate
// acceptance, as some other entry may match a longer
// (nonletter) char macro, or nothing else follows,
// or what follows is a nonletter char, or the last
// character is a }.
- if ((math == tmp || text == tmp)
- && (tmp[0] != '\\'
- || (tmp.size() == 2 && !isAlphaASCII(tmp[1]))
+ else if (tmp[0] != '\\'
+ || (tmp.size() == prefix + 1 &&
+ !isAlphaASCII(tmp[1]) &&
+ (prefix == 1 || !isAlphaASCII(tmp[2])))
|| k == cmdend
|| !isAlphaASCII(cmd[k])
- || tmp[tmp.size() - 1] == '}')
+ || tmp[tmp.size() - 1] == '}'
) {
c = it->first;
j = k - 1;
}
if (unicmd_size)
symbols += c;
- else if (j + 1 == cmdend) {
+ else if (combining != uniend &&
+ prefixIs(subcmd, combcmd + '{')) {
+ // We know that subcmd starts with combcmd and
+ // contains an argument in braces.
+ docstring const arg = subcmd.substr(
+ combcmd.length() + 1,
+ subcmd.length() - combcmd.length() - 2);
+ // If arg is a single character we can construct a
+ // combining sequence.
+ char_type a;
+ bool argcomb = false;
+ if (arg.size() == 1 && isAlnumASCII(arg[0]))
+ a = arg[0];
+ else {
+ // Use the version of fromLaTeXCommand() that
+ // parses only one command, since we cannot
+ // use more than one character.
+ bool dummy = false;
+ set<string> r;
+ a = fromLaTeXCommand(arg, cmdtype, argcomb,
+ dummy, &r);
+ if (a && req && !argcomb)
+ req->insert(r.begin(), r.end());
+ }
+ if (a && !argcomb) {
+ // In unicode the combining character comes
+ // after its base
+ symbols += a;
+ symbols += combining->first;
+ unicmd_size = 2;
+ }
+ }
+ if (j + 1 == cmdend && !unicmd_size) {
// No luck. Return what remains
rem = cmd.substr(i);
if (needsTermination && !rem.empty()) {
if (!s.empty()) {
context.check_layout(os);
os << to_utf8(s);
+ if (!rem.empty())
+ output_ert_inset(os,
+ to_utf8(rem), context);
} else
// we did not find a non-ert version
output_ert_inset(os, name, context);
p.skip_spaces();
}
- // the TIPA Combining diacritical marks
- else if (is_known(t.cs(), known_tipa_marks) || t.cs() == "textvertline") {
- preamble.registerAutomaticallyLoadedPackage("tipa");
- preamble.registerAutomaticallyLoadedPackage("tipx");
- context.check_layout(os);
- if (t.cs() == "textvertline") {
- os << "|";
- skip_braces(p);
- continue;
- }
- // try to see whether the string is in unicodesymbols
- bool termination;
- docstring rem;
- string content = trimSpaceAndEol(p.verbatim_item());
- string command = t.asInput() + "{" + content + "}";
- set<string> req;
- docstring s = encodings.fromLaTeXCommand(from_utf8(command),
- Encodings::TEXT_CMD | Encodings::MATH_CMD,
- termination, rem, &req);
- if (!s.empty()) {
- if (!rem.empty())
- cerr << "When parsing " << command
- << ", result is " << to_utf8(s)
- << "+" << to_utf8(rem) << endl;
- os << content << to_utf8(s);
- } else
- // we did not find a non-ert version
- output_ert_inset(os, command, context);
+ else if (t.cs() == "textvertline") {
+ // FIXME: This is not correct, \textvertline is higher than |
+ os << "|";
+ skip_braces(p);
+ continue;
}
else if (t.cs() == "tone" ) {
Encodings::TEXT_CMD | Encodings::MATH_CMD,
termination, rem, &req);
if (!s.empty()) {
- if (!rem.empty())
- cerr << "When parsing " << command
- << ", result is " << to_utf8(s)
- << "+" << to_utf8(rem) << endl;
os << to_utf8(s);
+ if (!rem.empty())
+ output_ert_inset(os, to_utf8(rem), context);
} else
// we did not find a non-ert version
output_ert_inset(os, command, context);
else if (t.cs() == "=" && (flags & FLAG_TABBING))
output_ert_inset(os, t.asInput(), context);
- // accents (see Table 6 in Comprehensive LaTeX Symbol List)
- else if (t.cs().size() == 1
- && contains("\"'.=^`bcdHkrtuv~", t.cs())) {
- context.check_layout(os);
- // try to see whether the string is in unicodesymbols
- bool termination;
- docstring rem;
- string command = t.asInput() + "{"
- + trimSpaceAndEol(p.verbatim_item())
- + "}";
- set<string> req;
- docstring s = encodings.fromLaTeXCommand(from_utf8(command),
- Encodings::TEXT_CMD | Encodings::MATH_CMD,
- termination, rem, &req);
- if (!s.empty()) {
- if (!rem.empty())
- cerr << "When parsing " << command
- << ", result is " << to_utf8(s)
- << "+" << to_utf8(rem) << endl;
- os << to_utf8(s);
- for (set<string>::const_iterator it = req.begin(); it != req.end(); ++it)
- preamble.registerAutomaticallyLoadedPackage(*it);
- } else
- // we did not find a non-ert version
- output_ert_inset(os, command, context);
- }
-
else if (t.cs() == "\\") {
context.check_layout(os);
if (p.hasOpt())
// try to see whether the string is in unicodesymbols
// Only use text mode commands, since we are in text mode here,
// and math commands may be invalid (bug 6797)
- bool termination;
- docstring rem;
- set<string> req;
string name = t.asInput();
// handle the dingbats and Cyrillic
if (name == "\\ding" || name == "\\textcyr")
name = name + '{' + p.getArg('{', '}') + '}';
// handle the ifsym characters
- if (name == "\\textifsymbol") {
+ else if (name == "\\textifsymbol") {
string const optif = p.getFullOpt();
string const argif = p.getArg('{', '}');
name = name + optif + '{' + argif + '}';
// handle the \ascii characters
// the case of \ascii within braces, as LyX outputs it, is already
// handled for t.cat() == catBegin
- if (name == "\\ascii") {
+ else if (name == "\\ascii") {
// the code is "\asci\xxx"
name = "{" + name + p.get_token().asInput() + "}";
skip_braces(p);
}
// handle some TIPA special characters
- if (name == "\\textglobfall") {
- name = "End";
- skip_braces(p);
- }
- if (name == "\\textdoublevertline") {
- name = "\\textbardbl";
- skip_braces(p);
- }
- if (name == "\\!" ) {
- if (p.next_token().asInput() == "b") {
- p.get_token(); // eat 'b'
- name = "\\texthtb";
+ else if (preamble.isPackageUsed("tipa")) {
+ if (name == "\\textglobfall") {
+ name = "End";
skip_braces(p);
- }
- if (p.next_token().asInput() == "d") {
- p.get_token();
- name = "\\texthtd";
- skip_braces(p);
- }
- if (p.next_token().asInput() == "g") {
- p.get_token();
- name = "\\texthtg";
- skip_braces(p);
- }
- if (p.next_token().asInput() == "G") {
- p.get_token();
- name = "\\texthtscg";
- skip_braces(p);
- }
- if (p.next_token().asInput() == "j") {
- p.get_token();
- name = "\\texthtbardotlessj";
- skip_braces(p);
- }
- if (p.next_token().asInput() == "o") {
+ } else if (name == "\\s") {
+ // fromLaTeXCommand() does not yet
+ // recognize tipa short cuts
+ name = "\\textsyllabic";
+ } else if (name == "\\=" &&
+ p.next_token().asInput() == "*") {
+ // fromLaTeXCommand() does not yet
+ // recognize tipa short cuts
p.get_token();
- name = "\\textbullseye";
+ name = "\\b";
+ } else if (name == "\\textdoublevertline") {
+ // FIXME: This is not correct,
+ // \textvertline is higher than \textbardbl
+ name = "\\textbardbl";
skip_braces(p);
+ } else if (name == "\\!" ) {
+ if (p.next_token().asInput() == "b") {
+ p.get_token(); // eat 'b'
+ name = "\\texthtb";
+ skip_braces(p);
+ } else if (p.next_token().asInput() == "d") {
+ p.get_token();
+ name = "\\texthtd";
+ skip_braces(p);
+ } else if (p.next_token().asInput() == "g") {
+ p.get_token();
+ name = "\\texthtg";
+ skip_braces(p);
+ } else if (p.next_token().asInput() == "G") {
+ p.get_token();
+ name = "\\texthtscg";
+ skip_braces(p);
+ } else if (p.next_token().asInput() == "j") {
+ p.get_token();
+ name = "\\texthtbardotlessj";
+ skip_braces(p);
+ } else if (p.next_token().asInput() == "o") {
+ p.get_token();
+ name = "\\textbullseye";
+ skip_braces(p);
+ }
+ } else if (name == "\\*" ) {
+ if (p.next_token().asInput() == "k") {
+ p.get_token();
+ name = "\\textturnk";
+ skip_braces(p);
+ } else if (p.next_token().asInput() == "r") {
+ p.get_token(); // eat 'b'
+ name = "\\textturnr";
+ skip_braces(p);
+ } else if (p.next_token().asInput() == "t") {
+ p.get_token();
+ name = "\\textturnt";
+ skip_braces(p);
+ } else if (p.next_token().asInput() == "w") {
+ p.get_token();
+ name = "\\textturnw";
+ skip_braces(p);
+ }
}
}
- if (name == "\\*" ) {
- if (p.next_token().asInput() == "k") {
- p.get_token();
- name = "\\textturnk";
- skip_braces(p);
- }
- if (p.next_token().asInput() == "r") {
- p.get_token(); // eat 'b'
- name = "\\textturnr";
- skip_braces(p);
- }
- if (p.next_token().asInput() == "t") {
- p.get_token();
- name = "\\textturnt";
- skip_braces(p);
- }
- if (p.next_token().asInput() == "w") {
- p.get_token();
- name = "\\textturnw";
- skip_braces(p);
- }
+ if ((name.size() == 2 &&
+ contains("\"'.=^`bcdHkrtuv~", name[1]) &&
+ p.next_token().asInput() != "*") ||
+ is_known(name.substr(1), known_tipa_marks)) {
+ // name is a command that corresponds to a
+ // combining character in unicodesymbols.
+ // Append the argument, fromLaTeXCommand()
+ // will either convert it to a single
+ // character or a combining sequence.
+ name += '{' + p.verbatim_item() + '}';
}
// now get the character from unicodesymbols
+ bool termination;
+ docstring rem;
+ set<string> req;
docstring s = encodings.fromLaTeXCommand(from_utf8(name),
Encodings::TEXT_CMD, termination, rem, &req);
if (!s.empty()) {
- if (!rem.empty())
- cerr << "When parsing " << t.cs()
- << ", result is " << to_utf8(s)
- << "+" << to_utf8(rem) << endl;
context.check_layout(os);
os << to_utf8(s);
+ if (!rem.empty())
+ output_ert_inset(os, to_utf8(rem), context);
if (termination)
skip_spaces_braces(p);
for (set<string>::const_iterator it = req.begin(); it != req.end(); ++it)
output_ert_inset(os, s + ' ', context);
*/
else {
- string name2 = t.asInput();
- if (p.next_token().asInput() == "*") {
+ if (t.asInput() == name &&
+ p.next_token().asInput() == "*") {
// Starred commands like \vspace*{}
p.get_token(); // Eat '*'
- name2 += '*';
+ name += '*';
}
- if (!parse_command(name2, p, os, outer, context))
- output_ert_inset(os, name2, context);
+ if (!parse_command(name, p, os, outer, context))
+ output_ert_inset(os, name, context);
}
}