#include "ParIterator.h"
#include "TexRow.h"
#include "Text.h"
+#include "Encoding.h"
#include "frontends/Application.h"
#include "frontends/alert.h"
static docstring buffer_to_latex(Buffer & buffer)
{
- OutputParams runparams(&buffer.params().encoding());
+ //OutputParams runparams(&buffer.params().encoding());
+ OutputParams runparams(encodings.fromLyXName("utf8"));
odocstringstream ods;
otexstream os(ods);
runparams.nice = true;
if (!opt.ignoreformat) {
str = buffer_to_latex(buffer);
} else {
- OutputParams runparams(&buffer.params().encoding());
+ // OutputParams runparams(&buffer.params().encoding());
+ OutputParams runparams(encodings.fromLyXName("utf8"));
runparams.nice = true;
runparams.flavor = OutputParams::XETEX;
runparams.linelen = 10000; //lyxrc.plaintext_linelen;
string key = name + "{" + param[i] + "}";
// get the corresponding utf8-value
if ((values[start] & 0xc0) != 0xc0) {
- // should not happen, utf8 encoding starts at least with 11xxxxxx
+ // should not happen, utf8 encoding starts at least with 11xxxxxx
+ // but value for '\dot{i}' is 'i', which is ascii
+ if ((values[start] & 0x80) == 0) {
+ // is ascii
+ accents[key] = values.substr(start, 1);
+ }
start++;
continue;
}
for (int j = 1; ;j++) {
- if (start + j >= values.size())
- break;
- if ((values[start+j] & 0xc0) == 0xc0) {
- // This is the first byte of following utf8 char
- accents[key] = values.substr(start, j);
- start += j;
- break;
- }
+ if (start + j >= values.size()) {
+ accents[key] = values.substr(start, j);
+ start = values.size() - 1;
+ break;
+ }
+ else if ((values[start+j] & 0xc0) != 0x80) {
+ // This is the first byte of following utf8 char
+ accents[key] = values.substr(start, j);
+ start += j;
+ break;
+ }
}
}
}
static void buildAccentsMap()
{
accents["imath"] = "ı";
+ accents["i"] = "ı";
+ accents["jmath"] = "ȷ";
+ accents["lyxmathsym{ß}"] = "ß";
+ accents["text{ß}"] = "ß";
accents["ddot{\\imath}"] = "ï";
+ buildaccent("ddot", "aAeEiIioOuUyY",
+ "äÄëËïÏïöÖüÜÿŸ"); // umlaut
+ buildaccent("dot|.", "cCeEGgIizZaAoObBdDfFyY",
+ "ċĊėĖĠġİİżŻȧȦȯȮḃḂḋḊḟḞẏẎ"); // dot{i} can only happen if ignoring case, but there is no lowercase of 'İ'
accents["acute{\\imath}"] = "í";
- accents["tilde{\\imath}"] = "ĩ";
- accents["jmath"] = "ȷ";
+ buildaccent("acute", "aAcCeElLoOnNrRsSuUyYzZiI",
+ "áÁćĆéÉĺĹóÓńŃŕŔśŚúÚýÝźŹíÍ");
+ buildaccent("dacute|H|h", "oOuU", "őŐűŰ"); // double acute
+ buildaccent("mathring|r", "aAuUwy",
+ "åÅůŮẘẙ"); // ring
+ accents["check{\\imath}"] = "ǐ";
+ accents["check{\\jmath}"] = "ǰ";
+ buildaccent("check|v", "cCdDaAeEiIoOuUgGkKhHlLnNrRsSTtzZ",
+ "čČďĎǎǍěĚǐǏǒǑǔǓǧǦǩǨȟȞľĽňŇřŘšŠŤťžŽ"); // caron
+ accents["hat{\\imath}"] = "î";
accents["hat{\\jmath}"] = "ĵ";
- accents["lyxmathsym{ß}"] = "ß";
- buildaccent("ddot", "aeouyAEOUY", "äëöüÿÄËÖÜŸ"); // umlaut
- buildaccent("dot", "aeoyzAEOYZ", "ȧėȯẏżȦĖȮẎŻ");
- buildaccent("acute", "aAcCeElLoOnNrRsSuUyYzZI", "áÁćĆéÉĺĹóÓńŃŕŔśŚúÚýÝźŹÍ");
- /*
- buildaccent("dacute", "oOuU", "őŐűŰ");
- buildaccent("H", "oOuU", "őŐűŰ"); // dacute in text
- */
- buildaccent("mathring|r", "uU", "ůŮ");
- buildaccent("check", "cCdDeElLnNrRsSTzZ", "čČďĎěĚľĽňŇřŘšŠŤžŽ"); // caron
- buildaccent("hat", "cCgGhHJsSwWyYoOgG", "ĉĈĝĜĥĤĴŝŜŵŴŷŶôÔĝĜ"); // circ
- buildaccent("bar|=", "aAeEoOuU", "āĀēĒōŌūŪ"); // macron
- buildaccent("tilde", "I", "Ĩ"); // macron
+ buildaccent("hat|^", "aAeEiIcCgGhHjJsSwWyYzZoOuU",
+ "âÂêÊîÎĉĈĝĜĥĤĵĴŝŜŵŴŷŶẑẐôÔûÛ"); // circ
+ accents["bar{\\imath}"] = "ī";
+ buildaccent("bar|=", "aAeEiIoOuUyY",
+ "āĀēĒīĪōŌūŪȳȲ"); // macron
+ accents["tilde{\\imath}"] = "ĩ";
+ buildaccent("tilde", "aAnNoOiIuU",
+ "ãÃñÑõÕĩĨũŨ"); // tilde
+ accents["breve{\\imath}"] = "ĭ";
+ buildaccent("breve|u", "aAeEgGiIoOuU",
+ "ăĂĕĔğĞĭĬŏŎŭŬ"); // breve
+ accents["grave{\\imath}"] = "ì";
+ buildaccent("grave|`", "aAeEiIoOuUnNwWyY",
+ "àÀèÈìÌòÒùÙǹǸẁẀỳỲ"); // grave
+ buildaccent("subdot|d", "BbDdHhKkLlMmNnRrSsTtVvWwZzAaEeIiOoUuYy",
+ "ḄḅḌḍḤḥḲḳḶḷṂṃṆṇṚṛṢṣṬṭṾṿẈẉẒẓẠạẸẹỊịỌọỤụỴỵ"); // dot below
}
/*
{
if (accents.empty())
buildAccentsMap();
- static regex const accre("\\\\((lyxmathsym|ddot|dot|acute|mathring|r|check|check|hat|bar|=)\\{[^\\{\\}]+\\}|imath|jmath)");
+ static regex const accre("\\\\((.|grave|breve|lyxmathsym|text|ddot|dot|acute|dacute|mathring|check|hat|bar|tilde|subdot)\\{[^\\{\\}]+\\}|(i|imath|jmath)(?![a-zA-Z]))");
smatch sub;
for (sregex_iterator itacc(par.begin(), par.end(), accre), end; itacc != end; ++itacc) {
sub = *itacc;
par[pos+i] = val[i];
}
addIntervall(pos+val.size(), pos + sub.str(0).size());
+ for (size_t i = pos+val.size(); i < pos + sub.str(0).size(); i++) {
+ // remove traces of any remaining chars
+ par[i] = ' ';
+ }
}
else {
LYXERR0("Not added accent for \"" << key << "\"");
missed = 0;
if (withformat) {
regex_f = identifyFeatures(result);
- string features = "";
+ string features = "";
for (auto it = regex_f.cbegin(); it != regex_f.cend(); ++it) {
string a = it->first;
regex_with_format = true;
- features += " " + a;
+ features += " " + a;
// LYXERR0("Identified regex format:" << a);
}
- LYXERR(Debug::FIND, "Identified Features" << features);
+ LYXERR(Debug::FIND, "Identified Features" << features);
}
} else if (regex_with_format) {
// TODO Try adding a AS_STR_INSERTS as last arg
pos_type end = ( len == -1 || cur.pos() + len > int(par.size()) ) ?
int(par.size()) : cur.pos() + len;
- OutputParams runparams(&cur.buffer()->params().encoding());
+ // OutputParams runparams(&cur.buffer()->params().encoding());
+ OutputParams runparams(encodings.fromLyXName("utf8"));
runparams.nice = true;
runparams.flavor = OutputParams::XETEX;
runparams.linelen = 10000; //lyxrc.plaintext_linelen;
odocstringstream ods;
otexstream os(ods);
- OutputParams runparams(&buf.params().encoding());
+ //OutputParams runparams(&buf.params().encoding());
+ OutputParams runparams(encodings.fromLyXName("utf8"));
runparams.nice = false;
runparams.flavor = OutputParams::XETEX;
runparams.linelen = 8000; //lyxrc.plaintext_linelen;
DocIterator old_cur = cur;
for (int i = 0; i < increment && cur; cur.forwardPos(), i++) {
}
- if (! cur) {
+ if (! cur || (cur.pit() > old_cur.pit())) {
+ // Are we outside of the paragraph?
+ // This can happen if moving past some UTF8-encoded chars
cur = old_cur;
increment /= 2;
}
} else if (cur.inMathed()) {
odocstringstream ods;
otexstream os(ods);
- OutputParams runparams(&repl_buffer.params().encoding());
+ // OutputParams runparams(&repl_buffer.params().encoding());
+ OutputParams runparams(encodings.fromLyXName("utf8"));
runparams.nice = false;
runparams.flavor = OutputParams::XETEX;
runparams.linelen = 8000; //lyxrc.plaintext_linelen;