#include "ParIterator.h"
#include "TexRow.h"
#include "Text.h"
+#include "Encoding.h"
#include "frontends/Application.h"
#include "frontends/alert.h"
static docstring buffer_to_latex(Buffer & buffer)
{
- OutputParams runparams(&buffer.params().encoding());
+ //OutputParams runparams(&buffer.params().encoding());
+ OutputParams runparams(encodings.fromLyXName("utf8"));
odocstringstream ods;
otexstream os(ods);
runparams.nice = true;
if (!opt.ignoreformat) {
str = buffer_to_latex(buffer);
} else {
- OutputParams runparams(&buffer.params().encoding());
+ // OutputParams runparams(&buffer.params().encoding());
+ OutputParams runparams(encodings.fromLyXName("utf8"));
runparams.nice = true;
runparams.flavor = OutputParams::XETEX;
runparams.linelen = 10000; //lyxrc.plaintext_linelen;
typedef map<string, string> AccentsMap;
static AccentsMap accents = map<string, string>();
-static void buildaccent(string name, string param, string values)
+static void buildaccent(string n, string param, string values)
{
- size_t start = 0;
- for (size_t i = 0; i < param.size(); i++) {
- string key = name + "{" + param[i] + "}";
- // get the corresponding utf8-value
- if ((values[start] & 0xc0) != 0xc0) {
- // should not happen, utf8 encoding starts at least with 11xxxxxx
- start++;
- continue;
- }
- for (int j = 1; ;j++) {
- if (start + j >= values.size())
- break;
- if ((values[start+j] & 0xc0) == 0xc0) {
- // This is the first byte of following utf8 char
- accents[key] = values.substr(start, j);
- start += j;
- break;
+ stringstream s(n);
+ string name;
+ const char delim = '|';
+ while (getline(s, name, delim)) {
+ size_t start = 0;
+ for (size_t i = 0; i < param.size(); i++) {
+ string key = name + "{" + param[i] + "}";
+ // get the corresponding utf8-value
+ if ((values[start] & 0xc0) != 0xc0) {
+ // should not happen, utf8 encoding starts at least with 11xxxxxx
+ // but value for '\dot{i}' is 'i', which is ascii
+ if ((values[start] & 0x80) == 0) {
+ // is ascii
+ accents[key] = values.substr(start, 1);
+ }
+ start++;
+ continue;
+ }
+ for (int j = 1; ;j++) {
+ if (start + j >= values.size()) {
+ accents[key] = values.substr(start, j);
+ start = values.size() - 1;
+ break;
+ }
+ else if ((values[start+j] & 0xc0) != 0x80) {
+ // This is the first byte of following utf8 char
+ accents[key] = values.substr(start, j);
+ start += j;
+ break;
+ }
}
}
}
static void buildAccentsMap()
{
accents["imath"] = "ı";
+ accents["i"] = "ı";
+ accents["jmath"] = "ȷ";
+ accents["lyxmathsym{ß}"] = "ß";
+ accents["text{ß}"] = "ß";
accents["ddot{\\imath}"] = "ï";
+ buildaccent("ddot", "aAeEiIioOuUyY",
+ "äÄëËïÏïöÖüÜÿŸ"); // umlaut
+ buildaccent("dot|.", "cCeEGgIizZaAoObBdDfFyY",
+ "ċĊėĖĠġİİżŻȧȦȯȮḃḂḋḊḟḞẏẎ"); // dot{i} can only happen if ignoring case, but there is no lowercase of 'İ'
accents["acute{\\imath}"] = "í";
- accents["lyxmathsym{ß}"] = "ß";
- buildaccent("ddot", "aeouyAEOUY", "äëöüÿÄËÖÜŸ");
- buildaccent("dot", "aeoyzAEOYZ", "ȧėȯẏżȦĖȮẎŻ");
- buildaccent("acute", "aeouyAEOUY", "äëöüÿÄËÖÜŸ");
- /*
- buildaccent("dacute", "oOuU", "őŐűŰ");
- buildaccent("H", "oOuU", "őŐűŰ"); // dacute in text
- */
- buildaccent("mathring", "uU", "ůŮ");
- buildaccent("r", "uU", "ůŮ"); //mathring in text
- buildaccent("check", "cdnrszCDNRSZ", "čďřňšžČĎŘŇŠŽ");
- buildaccent("hat", "cCoOgGhHsS", "ĉĈôÔĝĜĥĤŝŜ");
- buildaccent("bar", "aAeE", "āĀēĒ");
+ buildaccent("acute", "aAcCeElLoOnNrRsSuUyYzZiI",
+ "áÁćĆéÉĺĹóÓńŃŕŔśŚúÚýÝźŹíÍ");
+ buildaccent("dacute|H|h", "oOuU", "őŐűŰ"); // double acute
+ buildaccent("mathring|r", "aAuUwy",
+ "åÅůŮẘẙ"); // ring
+ accents["check{\\imath}"] = "ǐ";
+ accents["check{\\jmath}"] = "ǰ";
+ buildaccent("check|v", "cCdDaAeEiIoOuUgGkKhHlLnNrRsSTtzZ",
+ "čČďĎǎǍěĚǐǏǒǑǔǓǧǦǩǨȟȞľĽňŇřŘšŠŤťžŽ"); // caron
+ accents["hat{\\imath}"] = "î";
+ accents["hat{\\jmath}"] = "ĵ";
+ buildaccent("hat|^", "aAeEiIcCgGhHjJsSwWyYzZoOuU",
+ "âÂêÊîÎĉĈĝĜĥĤĵĴŝŜŵŴŷŶẑẐôÔûÛ"); // circ
+ accents["bar{\\imath}"] = "ī";
+ buildaccent("bar|=", "aAeEiIoOuUyY",
+ "āĀēĒīĪōŌūŪȳȲ"); // macron
+ accents["tilde{\\imath}"] = "ĩ";
+ buildaccent("tilde", "aAnNoOiIuU",
+ "ãÃñÑõÕĩĨũŨ"); // tilde
+ accents["breve{\\imath}"] = "ĭ";
+ buildaccent("breve|u", "aAeEgGiIoOuU",
+ "ăĂĕĔğĞĭĬŏŎŭŬ"); // breve
+ accents["grave{\\imath}"] = "ì";
+ buildaccent("grave|`", "aAeEiIoOuUnNwWyY",
+ "àÀèÈìÌòÒùÙǹǸẁẀỳỲ"); // grave
+ buildaccent("subdot|d", "BbDdHhKkLlMmNnRrSsTtVvWwZzAaEeIiOoUuYy",
+ "ḄḅḌḍḤḥḲḳḶḷṂṃṆṇṚṛṢṣṬṭṾṿẈẉẒẓẠạẸẹỊịỌọỤụỴỵ"); // dot below
}
/*
{
if (accents.empty())
buildAccentsMap();
- static regex const accre("\\\\((lyxmathsym|ddot|dot|acute|mathring|r|check|check|hat|bar)\\{[^\\{\\}]+\\}|imath)");
+ static regex const accre("\\\\((.|grave|breve|lyxmathsym|text|ddot|dot|acute|dacute|mathring|check|hat|bar|tilde|subdot)\\{[^\\{\\}]+\\}|(i|imath|jmath)(?![a-zA-Z]))");
smatch sub;
for (sregex_iterator itacc(par.begin(), par.end(), accre), end; itacc != end; ++itacc) {
sub = *itacc;
par[pos+i] = val[i];
}
addIntervall(pos+val.size(), pos + sub.str(0).size());
+ for (size_t i = pos+val.size(); i < pos + sub.str(0).size(); i++) {
+ // remove traces of any remaining chars
+ par[i] = ' ';
+ }
}
else {
LYXERR0("Not added accent for \"" << key << "\"");
missed = 0;
if (withformat) {
regex_f = identifyFeatures(result);
- string features = "";
+ string features = "";
for (auto it = regex_f.cbegin(); it != regex_f.cend(); ++it) {
string a = it->first;
regex_with_format = true;
- features += " " + a;
+ features += " " + a;
// LYXERR0("Identified regex format:" << a);
}
- LYXERR(Debug::FIND, "Identified Features" << features);
+ LYXERR(Debug::FIND, "Identified Features" << features);
}
} else if (regex_with_format) {
// TODO Try adding a AS_STR_INSERTS as last arg
pos_type end = ( len == -1 || cur.pos() + len > int(par.size()) ) ?
int(par.size()) : cur.pos() + len;
- OutputParams runparams(&cur.buffer()->params().encoding());
+ // OutputParams runparams(&cur.buffer()->params().encoding());
+ OutputParams runparams(encodings.fromLyXName("utf8"));
runparams.nice = true;
runparams.flavor = OutputParams::XETEX;
runparams.linelen = 10000; //lyxrc.plaintext_linelen;
odocstringstream ods;
otexstream os(ods);
- OutputParams runparams(&buf.params().encoding());
+ //OutputParams runparams(&buf.params().encoding());
+ OutputParams runparams(encodings.fromLyXName("utf8"));
runparams.nice = false;
runparams.flavor = OutputParams::XETEX;
runparams.linelen = 8000; //lyxrc.plaintext_linelen;
DocIterator old_cur = cur;
for (int i = 0; i < increment && cur; cur.forwardPos(), i++) {
}
- if (! cur) {
+ if (! cur || (cur.pit() > old_cur.pit())) {
+ // Are we outside of the paragraph?
+ // This can happen if moving past some UTF8-encoded chars
cur = old_cur;
increment /= 2;
}
} else if (cur.inMathed()) {
odocstringstream ods;
otexstream os(ods);
- OutputParams runparams(&repl_buffer.params().encoding());
+ // OutputParams runparams(&repl_buffer.params().encoding());
+ OutputParams runparams(encodings.fromLyXName("utf8"));
runparams.nice = false;
runparams.flavor = OutputParams::XETEX;
runparams.linelen = 8000; //lyxrc.plaintext_linelen;