string key = name + "{" + param[i] + "}";
// get the corresponding utf8-value
if ((values[start] & 0xc0) != 0xc0) {
- // should not happen, utf8 encoding starts at least with 11xxxxxx
+ // should not happen, utf8 encoding starts at least with 11xxxxxx
+ // but value for '\dot{i}' is 'i', which is ascii
+ if ((values[start] & 0x80) == 0) {
+ // is ascii
+ accents[key] = values.substr(start, 1);
+ }
start++;
continue;
}
for (int j = 1; ;j++) {
- if (start + j >= values.size())
- break;
- if ((values[start+j] & 0xc0) == 0xc0) {
- // This is the first byte of following utf8 char
- accents[key] = values.substr(start, j);
- start += j;
- break;
- }
+ if (start + j >= values.size()) {
+ accents[key] = values.substr(start, j);
+ start = values.size() - 1;
+ break;
+ }
+ else if ((values[start+j] & 0xc0) != 0x80) {
+ // This is the first byte of following utf8 char
+ accents[key] = values.substr(start, j);
+ start += j;
+ break;
+ }
}
}
}
static void buildAccentsMap()
{
accents["imath"] = "ı";
+ accents["i"] = "ı";
accents["jmath"] = "ȷ";
accents["lyxmathsym{ß}"] = "ß";
+ accents["text{ß}"] = "ß";
accents["ddot{\\imath}"] = "ï";
- buildaccent("ddot", "aAeEIoOuUyY",
- "äÃ\84ëÃ\8bÃ\8föÃ\96üÃ\9cÿŸ"); // umlaut
- buildaccent("dot|.", "cCeEgGIzZaAoObBdDfFyY",
- "ċĊėĖġĠİżŻȧȦȯȮḃḂḋḊḟḞẏẎ");
+ buildaccent("ddot", "aAeEiIioOuUyY",
+ "äÃ\84ëÃ\8bïÃ\8fïöÃ\96üÃ\9cÿŸ"); // umlaut
+ buildaccent("dot|.", "cCeEGgIizZaAoObBdDfFyY",
+ "ċĊėĖĠġİİżŻȧȦȯȮḃḂḋḊḟḞẏẎ"); // dot{i} can only happen if ignoring case, but there is no lowercase of 'İ'
accents["acute{\\imath}"] = "í";
- buildaccent("acute", "aAcCeElLoOnNrRsSuUyYzZI",
- "áÁćĆéÉĺĹóÓńŃŕŔśŚúÚýÝźŹÍ");
+ buildaccent("acute", "aAcCeElLoOnNrRsSuUyYzZiI",
+ "áÃ\81Ä\87Ä\86éÃ\89ĺĹóÃ\93Å\84Å\83Å\95Å\94Å\9bÅ\9aúÃ\9aýÃ\9dźŹÃÃ\8d");
buildaccent("dacute|H|h", "oOuU", "őŐűŰ"); // double acute
- buildaccent("mathring|r", "uU", "ůŮ");
+ buildaccent("mathring|r", "aAuUwy",
+ "åÅůŮẘẙ"); // ring
accents["check{\\imath}"] = "ǐ";
accents["check{\\jmath}"] = "ǰ";
- buildaccent("check|v", "cCdDaAeEIoOuUgGkKhHlLnNrRsSTzZ",
- "Ä\8dÄ\8cÄ\8fÄ\8eÇ\8eÇ\8dÄ\9bÄ\9aÇ\8fÇ\92Ç\91Ç\94Ç\93ǧǦǩǨÈ\9fÈ\9eľĽÅ\88Å\87Å\99Å\98šŠŤžŽ"); // caron
+ buildaccent("check|v", "cCdDaAeEiIoOuUgGkKhHlLnNrRsSTtzZ",
+ "Ä\8dÄ\8cÄ\8fÄ\8eÇ\8eÇ\8dÄ\9bÄ\9aÇ\90Ç\8fÇ\92Ç\91Ç\94Ç\93ǧǦǩǨÈ\9fÈ\9eľĽÅ\88Å\87Å\99Å\98šŠŤťžŽ"); // caron
accents["hat{\\imath}"] = "î";
accents["hat{\\jmath}"] = "ĵ";
- buildaccent("hat|^", "aAeEiIcCgGhHJsSwWyYzZoOuU",
- "âÃ\82êÃ\8aîÃ\8eÄ\89Ä\88Ä\9dÄ\9cĥĤĴÅ\9dÅ\9cŵŴŷŶáº\91áº\90ôÃ\94ûÃ\9b"); // circ
+ buildaccent("hat|^", "aAeEiIcCgGhHjJsSwWyYzZoOuU",
+ "âÃ\82êÃ\8aîÃ\8eÄ\89Ä\88Ä\9dÄ\9cĥĤĵĴÅ\9dÅ\9cŵŴŷŶáº\91áº\90ôÃ\94ûÃ\9b"); // circ
accents["bar{\\imath}"] = "ī";
- buildaccent("bar|=", "aAeEIoOuUyY",
- "Ä\81Ä\80Ä\93Ä\92ĪÅ\8dÅ\8cūŪȳȲ"); // macron
+ buildaccent("bar|=", "aAeEiIoOuUyY",
+ "Ä\81Ä\80Ä\93Ä\92īĪÅ\8dÅ\8cūŪȳȲ"); // macron
accents["tilde{\\imath}"] = "ĩ";
- buildaccent("tilde", "aAnNoOIuU",
- "ãÃñÑõÕĨũŨ"); // tilde
+ buildaccent("tilde", "aAnNoOiIuU",
+ "ãÃñÑõÕĩĨũŨ"); // tilde
+ accents["breve{\\imath}"] = "ĭ";
+ buildaccent("breve|u", "aAeEgGiIoOuU",
+ "ăĂĕĔğĞĭĬŏŎŭŬ"); // breve
+ accents["grave{\\imath}"] = "ì";
+ buildaccent("grave|`", "aAeEiIoOuUnNwWyY",
+ "àÀèÈìÌòÒùÙǹǸẁẀỳỲ"); // grave
+ buildaccent("subdot|d", "BbDdHhKkLlMmNnRrSsTtVvWwZzAaEeIiOoUuYy",
+ "ḄḅḌḍḤḥḲḳḶḷṂṃṆṇṚṛṢṣṬṭṾṿẈẉẒẓẠạẸẹỊịỌọỤụỴỵ"); // dot below
}
/*
{
if (accents.empty())
buildAccentsMap();
- static regex const accre("\\\\((lyxmathsym|ddot|dot|.|acute|dacute|h|H|mathring|r|check|v|hat|^|bar|=|tilde)\\{[^\\{\\}]+\\}|imath|jmath)");
+ static regex const accre("\\\\((.|grave|breve|lyxmathsym|text|ddot|dot|acute|dacute|mathring|check|hat|bar|tilde|subdot)\\{[^\\{\\}]+\\}|(i|imath|jmath)(?![a-zA-Z]))");
smatch sub;
for (sregex_iterator itacc(par.begin(), par.end(), accre), end; itacc != end; ++itacc) {
sub = *itacc;
}
addIntervall(pos+val.size(), pos + sub.str(0).size());
for (size_t i = pos+val.size(); i < pos + sub.str(0).size(); i++) {
- // remove any remaining parentheses
+ // remove traces of any remaining chars
par[i] = ' ';
}
}
missed = 0;
if (withformat) {
regex_f = identifyFeatures(result);
- string features = "";
+ string features = "";
for (auto it = regex_f.cbegin(); it != regex_f.cend(); ++it) {
string a = it->first;
regex_with_format = true;
- features += " " + a;
+ features += " " + a;
// LYXERR0("Identified regex format:" << a);
}
- LYXERR(Debug::FIND, "Identified Features" << features);
+ LYXERR(Debug::FIND, "Identified Features" << features);
}
} else if (regex_with_format) {