+ return entries_[actualIdx_].mathprefixsize;
+ }
+ size_t getPostfixSize() const {
+ if (entries_.empty() || (actualIdx_ >= entries_.size())) {
+ return 0;
+ }
+ return entries_[actualIdx_].mathpostfixsize;
+ }
+ size_t getFirstPos() {
+ actualIdx_ = 0;
+ return getStartPos();
+ }
+ size_t getSize() const {
+ if (entries_.empty() || (actualIdx_ >= entries_.size())) {
+ return size_t(0);
+ }
+ return entries_[actualIdx_].mathSize;
+ }
+ void incrEntry() { actualIdx_++; }
+};
+
+void LatexInfo::buildEntries(bool isPatternString)
+{
+ static regex const rmath("(\\\\)*(\\$|\\\\\\[|\\\\\\]|\\\\(begin|end)\\{((eqnarray|equation|flalign|gather|multline|align|x?x?alignat)\\*?\\})(\\{[0-9]+\\})?)");
+ static regex const rkeys("(\\\\)*(\\$|\\\\\\[|\\\\\\]|\\\\((([a-zA-Z]+\\*?)(\\{([a-z]+\\*?)\\}|=[0-9]+[a-z]+)?)))");
+ static bool disableLanguageOverride = false;
+ smatch sub, submath;
+ bool evaluatingRegexp = false;
+ MathInfo mi;
+ bool evaluatingMath = false;
+ bool evaluatingCode = false;
+ size_t codeEnd = 0;
+ bool evaluatingOptional = false;
+ size_t optionalEnd = 0;
+ int codeStart = -1;
+ KeyInfo found;
+ bool math_end_waiting = false;
+ size_t math_pos = 10000;
+ size_t math_prefix_size = 1;
+ string math_end;
+ static vector<string> usedText = vector<string>();
+ static bool removeMathHull = false;
+
+ interval_.removeAccents();
+ interval_.removeInvalidClosings();
+
+ for (sregex_iterator itmath(interval_.par.begin(), interval_.par.end(), rmath), end; itmath != end; ++itmath) {
+ submath = *itmath;
+ if ((submath.position(2) - submath.position(0)) %2 == 1) {
+ // prefixed by odd count of '\\'
+ continue;
+ }
+ if (math_end_waiting) {
+ size_t pos = submath.position(size_t(2));
+ if ((math_end == "$") &&
+ (submath.str(2) == "$")) {
+ mi.insert("$", math_pos, 1, pos, 1);
+ math_end_waiting = false;
+ }
+ else if ((math_end == "\\]") &&
+ (submath.str(2) == "\\]")) {
+ mi.insert("\\]", math_pos, 2, pos, 2);
+ math_end_waiting = false;
+ }
+ else if ((submath.str(3).compare("end") == 0) &&
+ (submath.str(5).compare(math_end) == 0)) {
+ mi.insert(math_end, math_pos, math_prefix_size, pos, submath.str(2).length());
+ math_end_waiting = false;
+ }
+ else
+ continue;
+ }
+ else {
+ if (submath.str(3).compare("begin") == 0) {
+ math_end_waiting = true;
+ math_end = submath.str(5);
+ math_pos = submath.position(size_t(2));
+ math_prefix_size = submath.str(2).length();
+ }
+ else if (submath.str(2).compare("\\[") == 0) {
+ math_end_waiting = true;
+ math_end = "\\]";
+ math_pos = submath.position(size_t(2));
+ }
+ else if (submath.str(2) == "$") {
+ size_t pos = submath.position(size_t(2));
+ math_end_waiting = true;
+ math_end = "$";
+ math_pos = pos;
+ }
+ }
+ }
+ // Ignore language if there is math somewhere in pattern-string
+ if (isPatternString) {
+ for (auto s: usedText) {
+ // Remove entries created in previous search runs
+ keys.erase(s);
+ }
+ usedText = vector<string>();
+ if (! mi.empty()) {
+ // Disable language
+ keys["foreignlanguage"].disabled = true;
+ disableLanguageOverride = true;
+ removeMathHull = false;
+ }
+ else {
+ removeMathHull = true; // used later if not isPatternString
+ disableLanguageOverride = false;
+ }
+ }
+ else {
+ if (disableLanguageOverride) {
+ keys["foreignlanguage"].disabled = true;
+ }
+ }
+ math_pos = mi.getFirstPos();
+ for (sregex_iterator it(interval_.par.begin(), interval_.par.end(), rkeys), end; it != end; ++it) {
+ sub = *it;
+ if ((sub.position(2) - sub.position(0)) %2 == 1) {
+ // prefixed by odd count of '\\'
+ continue;
+ }
+ string key = sub.str(5);
+ if (key == "") {
+ if (sub.str(2)[0] == '\\')
+ key = sub.str(2)[1];
+ else {
+ key = sub.str(2);
+ }
+ }
+ KeysIterator it_key = keys.find(key);
+ if (it_key != keys.end()) {
+ if (it_key->second.keytype == KeyInfo::headRemove) {
+ KeyInfo found1 = it_key->second;
+ found1.disabled = true;
+ found1.head = "\\" + key + "{";
+ found1._tokenstart = sub.position(size_t(2));
+ found1._tokensize = found1.head.length();
+ found1._dataStart = found1._tokenstart + found1.head.length();
+ int endpos = interval_.findclosing(found1._dataStart, interval_.par.length(), '{', '}', 1);
+ found1._dataEnd = endpos;
+ removeHead(found1);
+ continue;
+ }
+ }
+ if (evaluatingRegexp) {
+ if (sub.str(3).compare("endregexp") == 0) {
+ evaluatingRegexp = false;
+ // found._tokenstart already set
+ found._dataEnd = sub.position(size_t(2)) + 13;
+ found._dataStart = found._dataEnd;
+ found._tokensize = found._dataEnd - found._tokenstart;
+ found.parenthesiscount = 0;
+ found.head = interval_.par.substr(found._tokenstart, found._tokensize);
+ }
+ else {
+ continue;
+ }
+ }
+ else {
+ if (evaluatingMath) {
+ if (size_t(sub.position(size_t(2))) < mi.getEndPos())
+ continue;
+ evaluatingMath = false;
+ mi.incrEntry();
+ math_pos = mi.getStartPos();
+ }
+ if (it_key == keys.end()) {
+ found = KeyInfo(KeyInfo::isStandard, 0, true);
+ LYXERR(Debug::INFO, "Undefined key " << key << " ==> will be used as text");
+ found = KeyInfo(KeyInfo::isText, 0, false);
+ if (isPatternString) {
+ found.keytype = KeyInfo::isChar;
+ found.disabled = false;
+ found.used = true;
+ }
+ keys[key] = found;
+ usedText.push_back(key);
+ }
+ else
+ found = keys[key];
+ if (key.compare("regexp") == 0) {
+ evaluatingRegexp = true;
+ found._tokenstart = sub.position(size_t(2));
+ found._tokensize = 0;
+ continue;
+ }
+ }
+ // Handle the other params of key
+ if (found.keytype == KeyInfo::isIgnored)
+ continue;
+ else if (found.keytype == KeyInfo::isMath) {
+ if (size_t(sub.position(size_t(2))) == math_pos) {
+ found = keys[key];
+ found._tokenstart = sub.position(size_t(2));
+ found._tokensize = mi.getSize();
+ found._dataEnd = found._tokenstart + found._tokensize;
+ found._dataStart = found._dataEnd;
+ found.parenthesiscount = 0;
+ found.head = interval_.par.substr(found._tokenstart, found._tokensize);
+ if (removeMathHull) {
+ interval_.addIntervall(found._tokenstart, found._tokenstart + mi.getPrefixSize());
+ interval_.addIntervall(found._dataEnd - mi.getPostfixSize(), found._dataEnd);
+ }
+ else {
+ // Treate all math constructs as simple math
+ interval_.par[found._tokenstart] = '$';
+ interval_.par[found._dataEnd - mi.getPostfixSize()] = '$';
+ interval_.addIntervall(found._tokenstart + 1, found._tokenstart + mi.getPrefixSize());
+ interval_.addIntervall(found._dataEnd - mi.getPostfixSize() + 1, found._dataEnd);
+ }
+ evaluatingMath = true;
+ }
+ else {
+ // begin|end of unknown env, discard
+ // First handle tables
+ // longtable|tabular
+ bool discardComment;
+ found = keys[key];
+ found.keytype = KeyInfo::doRemove;
+ if ((sub.str(7).compare("longtable") == 0) ||
+ (sub.str(7).compare("tabular") == 0)) {
+ discardComment = true; /* '%' */
+ }
+ else {
+ discardComment = false;
+ static regex const removeArgs("^(multicols|multipar|sectionbox|subsectionbox|tcolorbox)$");
+ smatch sub2;
+ string token = sub.str(7);
+ if (regex_match(token, sub2, removeArgs)) {
+ found.keytype = KeyInfo::removeWithArg;
+ }
+ }
+ // discard spaces before pos(2)
+ int pos = sub.position(size_t(2));
+ int count;
+ for (count = 0; pos - count > 0; count++) {
+ char c = interval_.par[pos-count-1];
+ if (discardComment) {
+ if ((c != ' ') && (c != '%'))
+ break;
+ }
+ else if (c != ' ')
+ break;
+ }
+ found._tokenstart = pos - count;
+ if (sub.str(3).compare(0, 5, "begin") == 0) {
+ size_t pos1 = pos + sub.str(2).length();
+ if (sub.str(7).compare("cjk") == 0) {
+ pos1 = interval_.findclosing(pos1+1, interval_.par.length()) + 1;
+ if ((interval_.par[pos1] == '{') && (interval_.par[pos1+1] == '}'))
+ pos1 += 2;
+ found.keytype = KeyInfo::isMain;
+ found._dataStart = pos1;
+ found._dataEnd = interval_.par.length();
+ found.disabled = keys["foreignlanguage"].disabled;
+ found.used = keys["foreignlanguage"].used;
+ found._tokensize = pos1 - found._tokenstart;
+ found.head = interval_.par.substr(found._tokenstart, found._tokensize);
+ }
+ else {
+ // Swallow possible optional params
+ while (interval_.par[pos1] == '[') {
+ pos1 = interval_.findclosing(pos1+1, interval_.par.length(), '[', ']')+1;
+ }
+ // Swallow also the eventual parameter
+ if (interval_.par[pos1] == '{') {
+ found._dataEnd = interval_.findclosing(pos1+1, interval_.par.length()) + 1;
+ }
+ else {
+ found._dataEnd = pos1;
+ }
+ found._dataStart = found._dataEnd;
+ found._tokensize = count + found._dataEnd - pos;
+ found.parenthesiscount = 0;
+ found.head = interval_.par.substr(found._tokenstart, found._tokensize);
+ found.disabled = true;
+ }
+ }
+ else {
+ // Handle "\end{...}"
+ found._dataStart = pos + sub.str(2).length();
+ found._dataEnd = found._dataStart;
+ found._tokensize = count + found._dataEnd - pos;
+ found.parenthesiscount = 0;
+ found.head = interval_.par.substr(found._tokenstart, found._tokensize);
+ found.disabled = true;
+ }
+ }
+ }
+ else if (found.keytype != KeyInfo::isRegex) {
+ found._tokenstart = sub.position(size_t(2));
+ if (found.parenthesiscount == 0) {
+ // Probably to be discarded
+ size_t following_pos = sub.position(size_t(2)) + sub.str(5).length() + 1;
+ char following = interval_.par[following_pos];
+ if (following == ' ')
+ found.head = "\\" + sub.str(5) + " ";
+ else if (following == '=') {
+ // like \uldepth=1000pt
+ found.head = sub.str(2);
+ }
+ else
+ found.head = "\\" + key;
+ found._tokensize = found.head.length();
+ found._dataEnd = found._tokenstart + found._tokensize;
+ found._dataStart = found._dataEnd;
+ }
+ else {
+ int params = found._tokenstart + key.length() + 1;
+ if (evaluatingOptional) {
+ if (size_t(found._tokenstart) > optionalEnd) {
+ evaluatingOptional = false;
+ }
+ else {
+ found.disabled = true;
+ }
+ }
+ int optend = params;
+ while (interval_.par[optend] == '[') {
+ // discard optional parameters
+ optend = interval_.findclosing(optend+1, interval_.par.length(), '[', ']') + 1;
+ }
+ if (optend > params) {
+ key += interval_.par.substr(params, optend-params);
+ evaluatingOptional = true;
+ optionalEnd = optend;
+ if (found.keytype == KeyInfo::isSectioning) {
+ // Remove optional values (but still keep in header)
+ interval_.addIntervall(params, optend);
+ }
+ }
+ string token = sub.str(7);
+ int closings;
+ if (interval_.par[optend] != '{') {
+ closings = 0;
+ found.parenthesiscount = 0;
+ found.head = "\\" + key;
+ }
+ else
+ closings = found.parenthesiscount;
+ if (found.parenthesiscount == 1) {
+ found.head = "\\" + key + "{";
+ }
+ else if (found.parenthesiscount > 1) {
+ if (token != "") {
+ found.head = sub.str(2) + "{";
+ closings = found.parenthesiscount - 1;
+ }
+ else {
+ found.head = "\\" + key + "{";
+ }
+ }
+ found._tokensize = found.head.length();
+ found._dataStart = found._tokenstart + found.head.length();
+ if (found.keytype == KeyInfo::doRemove) {
+ if (closings > 0) {
+ size_t endpar = 2 + interval_.findclosing(found._dataStart, interval_.par.length(), '{', '}', closings);
+ if (endpar >= interval_.par.length())
+ found._dataStart = interval_.par.length();
+ else
+ found._dataStart = endpar;
+ found._tokensize = found._dataStart - found._tokenstart;
+ }
+ else {
+ found._dataStart = found._tokenstart + found._tokensize;
+ }
+ closings = 0;
+ }
+ if (interval_.par.substr(found._dataStart, 15).compare("\\endarguments{}") == 0) {
+ found._dataStart += 15;
+ }
+ size_t endpos;
+ if (closings < 1)
+ endpos = found._dataStart - 1;
+ else
+ endpos = interval_.findclosing(found._dataStart, interval_.par.length(), '{', '}', closings);
+ if (found.keytype == KeyInfo::isList) {
+ // Check if it really is list env
+ static regex const listre("^([a-z]+)$");
+ smatch sub2;
+ if (!regex_match(token, sub2, listre)) {
+ // Change the key of this entry. It is not in a list/item environment
+ found.keytype = KeyInfo::endArguments;
+ }
+ }
+ if (found.keytype == KeyInfo::noMain) {
+ evaluatingCode = true;
+ codeEnd = endpos;
+ codeStart = found._dataStart;
+ }
+ else if (evaluatingCode) {
+ if (size_t(found._dataStart) > codeEnd)
+ evaluatingCode = false;
+ else if (found.keytype == KeyInfo::isMain) {
+ // Disable this key, treate it as standard
+ found.keytype = KeyInfo::isStandard;
+ found.disabled = true;
+ if ((codeEnd +1 >= interval_.par.length()) &&
+ (found._tokenstart == codeStart)) {
+ // trickery, because the code inset starts
+ // with \selectlanguage ...
+ codeEnd = endpos;
+ if (entries_.size() > 1) {
+ entries_[entries_.size()-1]._dataEnd = codeEnd;
+ }
+ }
+ }
+ }
+ if ((endpos == interval_.par.length()) &&
+ (found.keytype == KeyInfo::doRemove)) {
+ // Missing closing => error in latex-input?
+ // therefore do not delete remaining data
+ found._dataStart -= 1;
+ found._dataEnd = found._dataStart;
+ }
+ else
+ found._dataEnd = endpos;
+ }
+ if (isPatternString) {
+ keys[key].used = true;
+ }
+ }
+ entries_.push_back(found);
+ }
+}
+
+void LatexInfo::makeKey(const string &keysstring, KeyInfo keyI, bool isPatternString)
+{
+ stringstream s(keysstring);
+ string key;
+ const char delim = '|';
+ while (getline(s, key, delim)) {
+ KeyInfo keyII(keyI);
+ if (isPatternString) {
+ keyII.used = false;
+ }
+ else if ( !keys[key].used)
+ keyII.disabled = true;
+ keys[key] = keyII;
+ }
+}
+
+void LatexInfo::buildKeys(bool isPatternString)
+{
+
+ static bool keysBuilt = false;
+ if (keysBuilt && !isPatternString) return;
+
+ // Keys to ignore in any case
+ makeKey("text|lyxmathsym|ensuremath", KeyInfo(KeyInfo::headRemove, 1, true), true);
+ makeKey("nonumber|notag", KeyInfo(KeyInfo::headRemove, 0, true), true);
+ // Known standard keys with 1 parameter.
+ // Split is done, if not at start of region
+ makeKey("textsf|textss|texttt", KeyInfo(KeyInfo::isStandard, 1, ignoreFormats.getFamily()), isPatternString);
+ makeKey("textbf", KeyInfo(KeyInfo::isStandard, 1, ignoreFormats.getSeries()), isPatternString);
+ makeKey("textit|textsc|textsl", KeyInfo(KeyInfo::isStandard, 1, ignoreFormats.getShape()), isPatternString);
+ makeKey("uuline|uline|uwave", KeyInfo(KeyInfo::isStandard, 1, ignoreFormats.getUnderline()), isPatternString);
+ makeKey("emph|noun", KeyInfo(KeyInfo::isStandard, 1, ignoreFormats.getMarkUp()), isPatternString);
+ makeKey("sout|xout", KeyInfo(KeyInfo::isStandard, 1, ignoreFormats.getStrikeOut()), isPatternString);
+
+ makeKey("section|subsection|subsubsection|paragraph|subparagraph|minisec",
+ KeyInfo(KeyInfo::isSectioning, 1, ignoreFormats.getSectioning()), isPatternString);
+ makeKey("section*|subsection*|subsubsection*|paragraph*",
+ KeyInfo(KeyInfo::isSectioning, 1, ignoreFormats.getSectioning()), isPatternString);
+ makeKey("part|part*|chapter|chapter*", KeyInfo(KeyInfo::isSectioning, 1, ignoreFormats.getSectioning()), isPatternString);
+ makeKey("title|subtitle|author|subject|publishers|dedication|uppertitleback|lowertitleback|extratitle|lyxaddress|lyxrightaddress", KeyInfo(KeyInfo::isTitle, 1, ignoreFormats.getFrontMatter()), isPatternString);
+ // Regex
+ makeKey("regexp", KeyInfo(KeyInfo::isRegex, 1, false), isPatternString);
+
+ // Split is done, if not at start of region
+ makeKey("textcolor", KeyInfo(KeyInfo::isStandard, 2, ignoreFormats.getColor()), isPatternString);
+ makeKey("latexenvironment", KeyInfo(KeyInfo::isStandard, 2, false), isPatternString);
+
+ // Split is done always.
+ makeKey("foreignlanguage", KeyInfo(KeyInfo::isMain, 2, ignoreFormats.getLanguage()), isPatternString);
+
+ // Known charaters
+ // No split
+ makeKey("backslash|textbackslash|slash", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
+ makeKey("textasciicircum|textasciitilde", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
+ makeKey("textasciiacute|texemdash", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
+ makeKey("dots|ldots", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
+ // Spaces
+ makeKey("quad|qquad|hfill|dotfill", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
+ makeKey("textvisiblespace|nobreakspace", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
+ makeKey("negthickspace|negmedspace|negthinspace", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
+ makeKey("thickspace|medspace|thinspace", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
+ // Skip
+ // makeKey("enskip|smallskip|medskip|bigskip|vfill", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
+ // Custom space/skip, remove the content (== length value)
+ makeKey("vspace|vspace*|hspace|hspace*|mspace", KeyInfo(KeyInfo::noContent, 1, false), isPatternString);
+ // Found in fr/UserGuide.lyx
+ makeKey("og|fg", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
+ // quotes
+ makeKey("textquotedbl|quotesinglbase|lyxarrow", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
+ makeKey("textquotedblleft|textquotedblright", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
+ // Known macros to remove (including their parameter)
+ // No split
+ makeKey("input|inputencoding|label|ref|index|bibitem", KeyInfo(KeyInfo::doRemove, 1, false), isPatternString);
+ makeKey("addtocounter|setlength", KeyInfo(KeyInfo::noContent, 2, true), isPatternString);
+ // handle like standard keys with 1 parameter.
+ makeKey("url|href|vref|thanks", KeyInfo(KeyInfo::isStandard, 1, false), isPatternString);
+
+ if (ignoreFormats.getDeleted()) {
+ // Ignore deleted text
+ makeKey("lyxdeleted", KeyInfo(KeyInfo::doRemove, 3, false), isPatternString);
+ }
+ else {
+ // but preserve added text
+ makeKey("lyxdeleted", KeyInfo(KeyInfo::doRemove, 2, false), isPatternString);
+ }
+ makeKey("lyxadded", KeyInfo(KeyInfo::doRemove, 2, false), isPatternString);
+
+ // Macros to remove, but let the parameter survive
+ // No split
+ makeKey("menuitem|textmd|textrm", KeyInfo(KeyInfo::isStandard, 1, true), isPatternString);
+
+ // Remove language spec from content of these insets
+ makeKey("code", KeyInfo(KeyInfo::noMain, 1, false), isPatternString);
+
+ // Same effect as previous, parameter will survive (because there is no one anyway)
+ // No split
+ makeKey("noindent|textcompwordmark|maketitle", KeyInfo(KeyInfo::isStandard, 0, true), isPatternString);
+ // Remove table decorations
+ makeKey("hline|tabularnewline|toprule|bottomrule|midrule", KeyInfo(KeyInfo::doRemove, 0, true), isPatternString);
+ // Discard shape-header.
+ // For footnote or shortcut too, because of lang settings
+ // and wrong handling if used 'KeyInfo::noMain'
+ makeKey("circlepar|diamondpar|heartpar|nutpar", KeyInfo(KeyInfo::isStandard, 1, true), isPatternString);
+ makeKey("trianglerightpar|hexagonpar|starpar", KeyInfo(KeyInfo::isStandard, 1, true), isPatternString);
+ makeKey("triangleuppar|triangledownpar|droppar", KeyInfo(KeyInfo::isStandard, 1, true), isPatternString);
+ makeKey("triangleleftpar|shapepar|dropuppar", KeyInfo(KeyInfo::isStandard, 1, true), isPatternString);
+ makeKey("hphantom|vphantom|note|footnote|shortcut|include|includegraphics", KeyInfo(KeyInfo::isStandard, 1, true), isPatternString);
+ makeKey("textgreek|textcyrillic", KeyInfo(KeyInfo::isStandard, 1, true), false);
+ makeKey("parbox", KeyInfo(KeyInfo::doRemove, 1, true), isPatternString);
+ // like ('tiny{}' or '\tiny ' ... )
+ makeKey("footnotesize|tiny|scriptsize|small|large|Large|LARGE|huge|Huge", KeyInfo(KeyInfo::isSize, 0, ignoreFormats.getSize()), isPatternString);
+
+ // Survives, like known character
+ // makeKey("lyx|LyX|latex|LaTeX|latexe|LaTeXe|tex|TeX", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
+ makeKey("tableofcontents", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
+ makeKey("item|listitem", KeyInfo(KeyInfo::isList, 1, false), isPatternString);
+
+ makeKey("begin|end", KeyInfo(KeyInfo::isMath, 1, false), isPatternString);
+ makeKey("[|]", KeyInfo(KeyInfo::isMath, 1, false), isPatternString);
+ makeKey("$", KeyInfo(KeyInfo::isMath, 1, false), isPatternString);
+
+ makeKey("par|uldepth|ULdepth|protect|nobreakdash|medskip|relax", KeyInfo(KeyInfo::isStandard, 0, true), isPatternString);
+ // Remove RTL/LTR marker
+ makeKey("l|r|textlr|textfr|textar|beginl|endl", KeyInfo(KeyInfo::isStandard, 0, true), isPatternString);
+ makeKey("lettrine", KeyInfo(KeyInfo::cleanToStart, 0, true), isPatternString);
+ makeKey("lyxslide", KeyInfo(KeyInfo::isSectioning, 1, true), isPatternString);
+ makeKey("endarguments", KeyInfo(KeyInfo::endArguments, 0, true), isPatternString);
+ makeKey("twocolumn", KeyInfo(KeyInfo::removeWithArg, 2, true), isPatternString);
+ makeKey("tnotetext|ead|fntext|cortext|address", KeyInfo(KeyInfo::removeWithArg, 0, true), isPatternString);
+ makeKey("lyxend", KeyInfo(KeyInfo::isStandard, 0, true), isPatternString);
+ if (isPatternString) {
+ // Allow the first searched string to rebuild the keys too
+ keysBuilt = false;
+ }
+ else {
+ // no need to rebuild again
+ keysBuilt = true;
+ }
+}
+
+/*
+ * Keep the list of actual opened parentheses actual
+ * (e.g. depth == 4 means there are 4 '{' not processed yet)
+ */
+void Intervall::handleParentheses(int lastpos, bool closingAllowed)
+{
+ int skip = 0;
+ for (int i = depts[actualdeptindex]; i < lastpos; i+= 1 + skip) {
+ char c;
+ c = par[i];
+ skip = 0;
+ if (c == '\\') skip = 1;
+ else if (c == '{') {
+ handleOpenP(i);
+ }
+ else if (c == '}') {
+ handleCloseP(i, closingAllowed);
+ }
+ }
+}
+
+#if (0)
+string Intervall::show(int lastpos)
+{
+ int idx = 0; /* int intervalls */
+ string s;
+ int i = 0;
+ if ((unsigned) lastpos > par.size())
+ lastpos = par.size();
+ for (idx = 0; idx <= ignoreidx; idx++) {
+ while (i < lastpos) {
+ int printsize;
+ if (i <= borders[idx].low) {
+ if (borders[idx].low > lastpos)
+ printsize = lastpos - i;
+ else
+ printsize = borders[idx].low - i;
+ s += par.substr(i, printsize);
+ i += printsize;
+ if (i >= borders[idx].low)
+ i = borders[idx].upper;
+ }
+ else {
+ i = borders[idx].upper;
+ break;
+ }
+ }
+ }
+ if (lastpos > i) {
+ s += par.substr(i, lastpos-i);
+ }
+ return s;
+}
+#endif
+
+void Intervall::output(ostringstream &os, int lastpos)
+{
+ // get number of chars to output
+ int idx = 0; /* int intervalls */
+ int i = 0;
+ int printed = 0;
+ string startTitle = titleValue;
+ for (idx = 0; idx <= ignoreidx; idx++) {
+ if (i < lastpos) {
+ if (i <= borders[idx].low) {
+ int printsize;
+ if (borders[idx].low > lastpos)
+ printsize = lastpos - i;
+ else
+ printsize = borders[idx].low - i;
+ if (printsize > 0) {
+ os << startTitle << par.substr(i, printsize);
+ i += printsize;
+ printed += printsize;
+ startTitle = "";
+ }
+ handleParentheses(i, false);
+ if (i >= borders[idx].low)
+ i = borders[idx].upper;
+ }
+ else {
+ i = borders[idx].upper;
+ }
+ }
+ else
+ break;
+ }
+ if (lastpos > i) {
+ os << startTitle << par.substr(i, lastpos-i);
+ printed += lastpos-i;
+ }
+ handleParentheses(lastpos, false);
+ int startindex;
+ if (keys["foreignlanguage"].disabled)
+ startindex = actualdeptindex-langcount;
+ else
+ startindex = actualdeptindex;
+ for (int i = startindex; i > 0; --i) {
+ os << "}";
+ }
+ if (hasTitle && (printed > 0))
+ os << "}";
+ if (! isPatternString_)
+ os << "\n";
+ handleParentheses(lastpos, true); /* extra closings '}' allowed here */
+}
+
+void LatexInfo::processRegion(int start, int region_end)
+{
+ while (start < region_end) { /* Let {[} and {]} survive */
+ int cnt = interval_.isOpeningPar(start);
+ if (cnt == 1) {
+ // Closing is allowed past the region
+ int closing = interval_.findclosing(start+1, interval_.par.length());
+ interval_.addIntervall(start, start+1);
+ interval_.addIntervall(closing, closing+1);
+ }
+ else if (cnt == 3)
+ start += 2;
+ start = interval_.nextNotIgnored(start+1);
+ }
+}
+
+void LatexInfo::removeHead(KeyInfo const & actual, int count)
+{
+ if (actual.parenthesiscount == 0) {
+ // "{\tiny{} ...}" ==> "{{} ...}"
+ interval_.addIntervall(actual._tokenstart-count, actual._tokenstart + actual._tokensize);
+ }
+ else {
+ // Remove header hull, that is "\url{abcd}" ==> "abcd"
+ interval_.addIntervall(actual._tokenstart - count, actual._dataStart);
+ interval_.addIntervall(actual._dataEnd, actual._dataEnd+1);
+ }
+}
+
+int LatexInfo::dispatch(ostringstream &os, int previousStart, KeyInfo &actual)
+{
+ int nextKeyIdx = 0;
+ switch (actual.keytype)
+ {
+ case KeyInfo::isTitle: {
+ removeHead(actual);
+ nextKeyIdx = getNextKey();
+ break;
+ }
+ case KeyInfo::cleanToStart: {
+ actual._dataEnd = actual._dataStart;
+ nextKeyIdx = getNextKey();
+ // Search for end of arguments
+ int tmpIdx = find(nextKeyIdx, KeyInfo::endArguments);
+ if (tmpIdx > 0) {
+ for (int i = nextKeyIdx; i <= tmpIdx; i++) {
+ entries_[i].disabled = true;
+ }
+ actual._dataEnd = entries_[tmpIdx]._dataEnd;
+ }
+ while (interval_.par[actual._dataEnd] == ' ')
+ actual._dataEnd++;
+ interval_.addIntervall(0, actual._dataEnd+1);
+ interval_.actualdeptindex = 0;
+ interval_.depts[0] = actual._dataEnd+1;
+ interval_.closes[0] = -1;
+ break;
+ }
+ case KeyInfo::isText:
+ interval_.par[actual._tokenstart] = '#';
+ //interval_.addIntervall(actual._tokenstart, actual._tokenstart+1);
+ nextKeyIdx = getNextKey();
+ break;
+ case KeyInfo::noContent: { /* char like "\hspace{2cm}" */
+ if (actual.disabled)
+ interval_.addIntervall(actual._tokenstart, actual._dataEnd);
+ else
+ interval_.addIntervall(actual._dataStart, actual._dataEnd);
+ }
+ // fall through
+ case KeyInfo::isChar: {
+ nextKeyIdx = getNextKey();
+ break;
+ }
+ case KeyInfo::isSize: {
+ if (actual.disabled || (interval_.par[actual._dataStart] != '{') || (interval_.par[actual._dataStart-1] == ' ')) {
+ if (actual.parenthesiscount == 0)
+ interval_.addIntervall(actual._tokenstart, actual._dataEnd);
+ else {
+ interval_.addIntervall(actual._tokenstart, actual._dataEnd+1);
+ }
+ nextKeyIdx = getNextKey();
+ } else {
+ // Here _dataStart points to '{', so correct it
+ actual._dataStart += 1;
+ actual._tokensize += 1;
+ actual.parenthesiscount = 1;
+ if (interval_.par[actual._dataStart] == '}') {
+ // Determine the end if used like '{\tiny{}...}'
+ actual._dataEnd = interval_.findclosing(actual._dataStart+1, interval_.par.length()) + 1;
+ interval_.addIntervall(actual._dataStart, actual._dataStart+1);
+ }
+ else {
+ // Determine the end if used like '\tiny{...}'
+ actual._dataEnd = interval_.findclosing(actual._dataStart, interval_.par.length()) + 1;
+ }
+ // Split on this key if not at start
+ int start = interval_.nextNotIgnored(previousStart);
+ if (start < actual._tokenstart) {
+ interval_.output(os, actual._tokenstart);
+ interval_.addIntervall(start, actual._tokenstart);
+ }
+ // discard entry if at end of actual
+ nextKeyIdx = process(os, actual);
+ }
+ break;
+ }
+ case KeyInfo::endArguments: {
+ // Remove trailing '{}' too
+ actual._dataStart += 1;
+ actual._dataEnd += 1;
+ interval_.addIntervall(actual._tokenstart, actual._dataEnd+1);
+ nextKeyIdx = getNextKey();
+ break;
+ }
+ case KeyInfo::noMain:
+ // fall through
+ case KeyInfo::isStandard: {
+ if (actual.disabled) {
+ removeHead(actual);
+ processRegion(actual._dataStart, actual._dataStart+1);
+ nextKeyIdx = getNextKey();
+ } else {
+ // Split on this key if not at datastart of calling entry
+ int start = interval_.nextNotIgnored(previousStart);
+ if (start < actual._tokenstart) {
+ interval_.output(os, actual._tokenstart);
+ interval_.addIntervall(start, actual._tokenstart);
+ }
+ // discard entry if at end of actual
+ nextKeyIdx = process(os, actual);
+ }
+ break;
+ }
+ case KeyInfo::removeWithArg: {
+ nextKeyIdx = getNextKey();
+ // Search for end of arguments
+ int tmpIdx = find(nextKeyIdx, KeyInfo::endArguments);
+ if (tmpIdx > 0) {
+ for (int i = nextKeyIdx; i <= tmpIdx; i++) {
+ entries_[i].disabled = true;
+ }
+ actual._dataEnd = entries_[tmpIdx]._dataEnd;
+ }
+ interval_.addIntervall(actual._tokenstart, actual._dataEnd+1);
+ break;
+ }
+ case KeyInfo::doRemove: {
+ // Remove the key with all parameters and following spaces
+ size_t pos;
+ size_t start;
+ if (interval_.par[actual._dataEnd-1] == ' ' || interval_.par[actual._dataEnd-1] == '}')
+ start = actual._dataEnd;
+ else
+ start = actual._dataEnd+1;
+ for (pos = start; pos < interval_.par.length(); pos++) {
+ if ((interval_.par[pos] != ' ') && (interval_.par[pos] != '%'))
+ break;
+ }
+ // Remove also enclosing parentheses [] and {}
+ int numpars = 0;
+ int spaces = 0;
+ while (actual._tokenstart > numpars) {
+ if (pos+numpars >= interval_.par.size())
+ break;
+ else if (interval_.par[pos+numpars] == ']' && interval_.par[actual._tokenstart-numpars-1] == '[')
+ numpars++;
+ else if (interval_.par[pos+numpars] == '}' && interval_.par[actual._tokenstart-numpars-1] == '{')
+ numpars++;
+ else
+ break;
+ }
+ if (numpars > 0) {
+ if (interval_.par[pos+numpars] == ' ')
+ spaces++;
+ }
+
+ interval_.addIntervall(actual._tokenstart-numpars, pos+numpars+spaces);
+ nextKeyIdx = getNextKey();
+ break;
+ }
+ case KeyInfo::isList: {
+ // Discard space before _tokenstart
+ int count;
+ for (count = 0; count < actual._tokenstart; count++) {
+ if (interval_.par[actual._tokenstart-count-1] != ' ')
+ break;
+ }
+ nextKeyIdx = getNextKey();
+ int tmpIdx = find(nextKeyIdx, KeyInfo::endArguments);
+ if (tmpIdx > 0) {
+ // Special case: \item is not a list, but a command (like in Style Author_Biography in maa-monthly.layout)
+ // with arguments
+ // How else can we catch this one?
+ for (int i = nextKeyIdx; i <= tmpIdx; i++) {
+ entries_[i].disabled = true;
+ }
+ actual._dataEnd = entries_[tmpIdx]._dataEnd;
+ }
+ else if (nextKeyIdx > 0) {
+ // Ignore any lang entries inside data region
+ for (int i = nextKeyIdx; i < int(entries_.size()) && entries_[i]._tokenstart < actual._dataEnd; i++) {
+ if (entries_[i].keytype == KeyInfo::isMain)
+ entries_[i].disabled = true;
+ }
+ }
+ if (actual.disabled) {
+ interval_.addIntervall(actual._tokenstart-count, actual._dataEnd+1);
+ }
+ else {
+ interval_.addIntervall(actual._tokenstart-count, actual._tokenstart);
+ }
+ if (interval_.par[actual._dataEnd+1] == '[') {
+ int posdown = interval_.findclosing(actual._dataEnd+2, interval_.par.length(), '[', ']');
+ if ((interval_.par[actual._dataEnd+2] == '{') &&
+ (interval_.par[posdown-1] == '}')) {
+ interval_.addIntervall(actual._dataEnd+1,actual._dataEnd+3);
+ interval_.addIntervall(posdown-1, posdown+1);
+ }
+ else {
+ interval_.addIntervall(actual._dataEnd+1, actual._dataEnd+2);
+ interval_.addIntervall(posdown, posdown+1);
+ }
+ int blk = interval_.nextNotIgnored(actual._dataEnd+1);
+ if (blk > posdown) {
+ // Discard at most 1 space after empty item
+ int count;
+ for (count = 0; count < 1; count++) {
+ if (interval_.par[blk+count] != ' ')
+ break;
+ }
+ if (count > 0)
+ interval_.addIntervall(blk, blk+count);
+ }
+ }
+ break;
+ }
+ case KeyInfo::isSectioning: {
+ // Discard spaces before _tokenstart
+ int count;
+ int val = actual._tokenstart;
+ for (count = 0; count < actual._tokenstart;) {
+ val = interval_.previousNotIgnored(val-1);
+ if (val < 0 || interval_.par[val] != ' ')
+ break;
+ else {
+ count = actual._tokenstart - val;
+ }
+ }
+ if (actual.disabled) {
+ removeHead(actual, count);
+ nextKeyIdx = getNextKey();
+ } else {
+ interval_.addIntervall(actual._tokenstart-count, actual._tokenstart);
+ nextKeyIdx = process(os, actual);
+ }
+ break;
+ }
+ case KeyInfo::isMath: {
+ // Same as regex, use the content unchanged
+ nextKeyIdx = getNextKey();
+ break;
+ }
+ case KeyInfo::isRegex: {
+ // DO NOT SPLIT ON REGEX
+ // Do not disable
+ nextKeyIdx = getNextKey();
+ break;
+ }
+ case KeyInfo::isIgnored: {
+ // Treat like a character for now
+ nextKeyIdx = getNextKey();
+ break;
+ }
+ case KeyInfo::isMain: {
+ if (interval_.par.substr(actual._dataStart, 2) == "% ")
+ interval_.addIntervall(actual._dataStart, actual._dataStart+2);
+ if (actual._tokenstart > 0) {
+ int prev = interval_.previousNotIgnored(actual._tokenstart - 1);
+ if ((prev >= 0) && interval_.par[prev] == '%')
+ interval_.addIntervall(prev, prev+1);
+ }
+ if (actual.disabled) {
+ removeHead(actual);
+ interval_.langcount++;
+ if ((interval_.par.substr(actual._dataStart, 3) == " \\[") ||
+ (interval_.par.substr(actual._dataStart, 8) == " \\begin{")) {
+ // Discard also the space before math-equation
+ interval_.addIntervall(actual._dataStart, actual._dataStart+1);
+ }
+ nextKeyIdx = getNextKey();
+ // interval.resetOpenedP(actual._dataStart-1);
+ }
+ else {
+ if (actual._tokenstart < 26) {
+ // for the first (and maybe dummy) language
+ interval_.setForDefaultLang(actual);
+ }
+ interval_.resetOpenedP(actual._dataStart-1);
+ }
+ break;
+ }
+ case KeyInfo::invalid:
+ case KeyInfo::headRemove:
+ // These two cases cannot happen, already handled
+ // fall through
+ default: {
+ // LYXERR(Debug::INFO, "Unhandled keytype");
+ nextKeyIdx = getNextKey();
+ break;
+ }
+ }
+ return nextKeyIdx;
+}
+
+int LatexInfo::process(ostringstream & os, KeyInfo const & actual )
+{
+ int end = interval_.nextNotIgnored(actual._dataEnd);
+ int oldStart = actual._dataStart;
+ int nextKeyIdx = getNextKey();
+ while (true) {
+ if ((nextKeyIdx < 0) ||
+ (entries_[nextKeyIdx]._tokenstart >= actual._dataEnd) ||
+ (entries_[nextKeyIdx].keytype == KeyInfo::invalid)) {
+ if (oldStart <= end) {
+ processRegion(oldStart, end);
+ oldStart = end+1;
+ }
+ break;
+ }
+ KeyInfo &nextKey = getKeyInfo(nextKeyIdx);
+
+ if ((nextKey.keytype == KeyInfo::isMain) && !nextKey.disabled) {
+ (void) dispatch(os, actual._dataStart, nextKey);
+ end = nextKey._tokenstart;
+ break;
+ }
+ processRegion(oldStart, nextKey._tokenstart);
+ nextKeyIdx = dispatch(os, actual._dataStart, nextKey);
+
+ oldStart = nextKey._dataEnd+1;
+ }
+ // now nextKey is either invalid or is outside of actual._dataEnd
+ // output the remaining and discard myself
+ if (oldStart <= end) {
+ processRegion(oldStart, end);
+ }
+ if (interval_.par.size() > (size_t) end && interval_.par[end] == '}') {
+ end += 1;
+ // This is the normal case.
+ // But if using the firstlanguage, the closing may be missing
+ }
+ // get minimum of 'end' and 'actual._dataEnd' in case that the nextKey.keytype was 'KeyInfo::isMain'
+ int output_end;
+ if (actual._dataEnd < end)
+ output_end = interval_.nextNotIgnored(actual._dataEnd);
+ else if (interval_.par.size() > (size_t) end)
+ output_end = interval_.nextNotIgnored(end);
+ else
+ output_end = interval_.par.size();
+ if ((actual.keytype == KeyInfo::isMain) && actual.disabled) {
+ interval_.addIntervall(actual._tokenstart, actual._tokenstart+actual._tokensize);
+ }
+ // Remove possible empty data
+ int dstart = interval_.nextNotIgnored(actual._dataStart);
+ while (interval_.isOpeningPar(dstart) == 1) {
+ interval_.addIntervall(dstart, dstart+1);
+ int dend = interval_.findclosing(dstart+1, output_end);
+ interval_.addIntervall(dend, dend+1);
+ dstart = interval_.nextNotIgnored(dstart+1);
+ }
+ if (dstart < output_end)
+ interval_.output(os, output_end);
+ if (nextKeyIdx < 0)
+ interval_.addIntervall(0, end);
+ else
+ interval_.addIntervall(actual._tokenstart, end);
+ return nextKeyIdx;
+}
+
+string splitOnKnownMacros(string par, bool isPatternString)
+{
+ ostringstream os;
+ LatexInfo li(par, isPatternString);
+ // LYXERR(Debug::INFO, "Berfore split: " << par);
+ KeyInfo DummyKey = KeyInfo(KeyInfo::KeyType::isMain, 2, true);
+ DummyKey.head = "";
+ DummyKey._tokensize = 0;
+ DummyKey._dataStart = 0;
+ DummyKey._dataEnd = par.length();
+ DummyKey.disabled = true;
+ int firstkeyIdx = li.getFirstKey();
+ string s;
+ if (firstkeyIdx >= 0) {
+ KeyInfo firstKey = li.getKeyInfo(firstkeyIdx);
+ DummyKey._tokenstart = firstKey._tokenstart;
+ int nextkeyIdx;
+ if ((firstKey.keytype != KeyInfo::isMain) || firstKey.disabled) {
+ // Use dummy firstKey
+ firstKey = DummyKey;
+ (void) li.setNextKey(firstkeyIdx);
+ }
+ else {
+ if (par.substr(firstKey._dataStart, 2) == "% ")
+ li.addIntervall(firstKey._dataStart, firstKey._dataStart+2);
+ }
+ nextkeyIdx = li.process(os, firstKey);
+ while (nextkeyIdx >= 0) {
+ // Check for a possible gap between the last
+ // entry and this one
+ int datastart = li.nextNotIgnored(firstKey._dataStart);
+ KeyInfo &nextKey = li.getKeyInfo(nextkeyIdx);
+ if ((nextKey._tokenstart > datastart)) {
+ // Handle the gap
+ firstKey._dataStart = datastart;
+ firstKey._dataEnd = par.length();
+ (void) li.setNextKey(nextkeyIdx);
+ // Fake the last opened parenthesis
+ li.setForDefaultLang(firstKey);
+ nextkeyIdx = li.process(os, firstKey);
+ }
+ else {
+ if (nextKey.keytype != KeyInfo::isMain) {
+ firstKey._dataStart = datastart;
+ firstKey._dataEnd = nextKey._dataEnd+1;
+ (void) li.setNextKey(nextkeyIdx);
+ li.setForDefaultLang(firstKey);
+ nextkeyIdx = li.process(os, firstKey);
+ }
+ else {
+ nextkeyIdx = li.process(os, nextKey);
+ }
+ }
+ }
+ // Handle the remaining
+ firstKey._dataStart = li.nextNotIgnored(firstKey._dataStart);
+ firstKey._dataEnd = par.length();
+ // Check if ! empty
+ if ((firstKey._dataStart < firstKey._dataEnd) &&
+ (par[firstKey._dataStart] != '}')) {
+ li.setForDefaultLang(firstKey);
+ (void) li.process(os, firstKey);
+ }
+ s = os.str();
+ // return string definitelly impossible to match, but should be known
+ }
+ else
+ s = par; /* no known macros found */
+ // LYXERR(Debug::INFO, "After split: " << s);
+ return s;
+}
+
+/*
+ * Try to unify the language specs in the latexified text.
+ * Resulting modified string is set to "", if
+ * the searched tex does not contain all the features in the search pattern
+ */
+static string correctlanguagesetting(string par, bool isPatternString, bool withformat, lyx::Buffer *pbuf = nullptr)
+{
+ static Features regex_f;
+ static int missed = 0;
+ static bool regex_with_format = false;
+
+ int parlen = par.length();
+
+ while ((parlen > 0) && (par[parlen-1] == '\n')) {
+ parlen--;
+ }
+#if 0
+ if (isPatternString && (parlen > 0) && (par[parlen-1] == '~')) {
+ // Happens to be there in case of description or labeling environment
+ parlen--;
+ }
+#endif
+ string result;
+ if (withformat) {
+ // Split the latex input into pieces which
+ // can be digested by our search engine
+ LYXERR(Debug::FINDVERBOSE, "input: \"" << par << "\"");
+ if (isPatternString && (pbuf != nullptr)) { // Check if we should disable/enable test for language
+ // We check for polyglossia, because in runparams.flavor we use Flavor::XeTeX
+ string doclang = pbuf->params().language->polyglossia();
+ static regex langre("\\\\(foreignlanguage)\\{([^\\}]+)\\}");
+ smatch sub;
+ bool toIgnoreLang = true;
+ for (sregex_iterator it(par.begin(), par.end(), langre), end; it != end; ++it) {
+ sub = *it;
+ if (sub.str(2) != doclang) {
+ toIgnoreLang = false;
+ break;
+ }
+ }
+ setIgnoreFormat("language", toIgnoreLang, false);
+
+ }
+ result = splitOnKnownMacros(par.substr(0,parlen), isPatternString);
+ LYXERR(Debug::FINDVERBOSE, "After splitOnKnownMacros:\n\"" << result << "\"");
+ }
+ else
+ result = par.substr(0, parlen);
+ if (isPatternString) {
+ missed = 0;
+ if (withformat) {
+ regex_f = identifyFeatures(result);
+ string features = "";
+ for (auto it = regex_f.cbegin(); it != regex_f.cend(); ++it) {
+ string a = it->first;
+ regex_with_format = true;
+ features += " " + a;
+ // LYXERR(Debug::INFO, "Identified regex format:" << a);
+ }
+ LYXERR(Debug::FINDVERBOSE, "Identified Features" << features);
+
+ }
+ } else if (regex_with_format) {
+ Features info = identifyFeatures(result);
+ for (auto it = regex_f.cbegin(); it != regex_f.cend(); ++it) {
+ string a = it->first;
+ bool b = it->second;
+ if (b && ! info[a]) {
+ missed++;
+ LYXERR(Debug::FINDVERBOSE, "Missed(" << missed << " " << a <<", srclen = " << parlen );
+ return "";
+ }
+ }
+