return(par);
}
+class emptyResult {
+ public:
+ bool isEmpty;
+ int lastPosition;
+ emptyResult(bool empty, int pos) : isEmpty(empty), lastPosition(pos) {};
+};
class LangInfo {
public:
void output(ostringstream &os, int);
void addIntervall(int upper);
void addIntervall(int low, int upper); /* if explicit */
- void handleParentheses(int lastpos);
- string show(int lastpos);
- bool discardParethesizedInBlock(int start);
+ void handleParentheses(int lastpos, bool closingAllowed);
+ int discardParethesizedInBlock(int start);
private:
string par;
string _search;
int ignoreidx;
bool regexPossible;
void adaptIgnoringParts(bool useOld = false);
- int nextNotIgored(int start);
+ int nextNotIgnored(int start);
+ int previousNotIgnored(int start);
bool discarSuperfluousParentheses(int start);
+ emptyResult checkEmpty(int start, bool atStart);
};
void LangInfo::setDataEnd(int dataend)
{
if (dataend < _tokenend) {
_dataEnd = _tokenend;
- // cout << "Wrong data start, too low\n";
+ LYXERR(Debug::FIND, "Wrong data start, too low");
}
else if (size_t(dataend) > par.length()) {
- // cout << "Wrong data start, too high\n";
+ LYXERR(Debug::FIND, "Wrong data start, too high");
_dataEnd = par.length();
}
else
bool reUse = true; /* Reuse previous ignoring intervalls */
if (datastart < _tokenend) {
_dataStart = _tokenend;
- // cout << "Wrong data start, too low\n";
+ LYXERR(Debug::FIND, "Wrong data start, too low");
reUse = false;
}
else if (size_t(datastart) > par.length()) {
- // cout << "Wrong data start, too high\n";
+ LYXERR(Debug::FIND, "Wrong data start, too high");
_dataStart = par.length();
reUse = false;
}
else
_dataStart = datastart;
- //cout << "found entry at " << _tokenstart << "\n";
+ LYXERR(Debug::FIND, "found entry at " << _tokenstart);
actualdeptindex = 1; /* == Number of open brases */
depts[0] = _dataStart;
closes[0] = -1;
* Keep the list of actual opened parentheses actual
* (e.g. depth == 4 means there are 4 '{' not processed yet)
*/
-void LangInfo::handleParentheses(int lastpos)
+void LangInfo::handleParentheses(int lastpos, bool closingAllowed)
{
int skip = 0;
for (int i = depts[actualdeptindex]; i < lastpos; i+= 1 + skip) {
}
else if (c == '}') {
if (actualdeptindex <= 0) {
- LYXERR(Debug::FIND, "ERROR ERROR ERROR"); /* should never happen! */
+ if (closingAllowed) {
+ // if we are at the very end
+ addIntervall(i, i+1);
+ }
+ else {
+ LYXERR(Debug::FIND, "Bad closing parenthesis in latex"); /* should never happen! */
+ }
}
else {
closes[actualdeptindex] = i+1;
if (actualdeptindex >= 0)
low = depts[actualdeptindex]; /* the position of last unclosed '{' */
else {
- LYXERR(Debug::FIND, "ERROR ERROR ERROR2");
+ LYXERR(Debug::FIND, "Error while checking the position of last open parenthesis");
low = upper;
}
addIntervall(low, upper);
}
-string LangInfo::show(int lastpos)
+int LangInfo::previousNotIgnored(int start)
{
- ostringstream os;
-
- os << par.substr(_tokenstart, _tokenend - _tokenstart);
- int idx = 0;
- for (int i = _dataStart; i < lastpos;) {
- if (i <= ignoreIntervalls[idx][0]) {
- os << par.substr(i, ignoreIntervalls[idx][0] - i);
- i = ignoreIntervalls[idx][1];
- }
- idx++;
- if (idx > ignoreidx) {
- os << par.substr(i, lastpos-i);
- break;
+ int idx = 0; /* int intervalls */
+ for (idx = ignoreidx; idx >= 0; --idx) {
+ if (start > ignoreIntervalls[idx][1])
+ return(start);
+ if (start >= ignoreIntervalls[idx][0])
+ start = ignoreIntervalls[idx][0]-1;
}
- }
- for (int i = actualdeptindex; i > 0; --i)
- os << "}";
- return os.str();
+ return start;
}
-int LangInfo::nextNotIgored(int start)
+int LangInfo::nextNotIgnored(int start)
{
int idx = 0; /* int intervalls */
for (idx = 0; idx <= ignoreidx; idx++) {
for (int i = _dataStart; i < lastpos;) {
if (i <= ignoreIntervalls[idx][0]) {
os << par.substr(i, ignoreIntervalls[idx][0] - i);
- handleParentheses(ignoreIntervalls[idx][0]);
i = ignoreIntervalls[idx][1];
+ handleParentheses(ignoreIntervalls[idx][1], false);
}
idx++;
if (idx > ignoreidx) {
break;
}
}
+ handleParentheses(lastpos, false);
for (int i = actualdeptindex; i > 0; --i)
os << "}";
}
- handleParentheses(lastpos);
+ handleParentheses(lastpos, true); /* extra closings '}' allowed here */
}
bool LangInfo::nextInfo()
{
int start = _tokenstart;
- // cout << par << "\n";
if (valid == Invalid)
_dataEnd = _tokenstart;
else if (valid == LastValid)
return nextInfo();
}
-bool LangInfo::discardParethesizedInBlock(int start)
+/*
+ * Return 0 if nothing found
+ * >0 size of found a known macro
+ * <0 -size of emmty unknow macro
+ */
+static int checkMacro(string checked)
+{
+ static regex anymacro("(\\\\([a-z]+)(\\{\\})+).*", regex_constants::ECMAScript);
+ static regex known("(backslash)$", regex_constants::ECMAScript);
+ cmatch cm;
+
+ if (regex_match(checked.c_str(), cm, anymacro)) {
+ string found2 = cm[2];
+ if (regex_match(found2, known)) {
+ return cm[1].second - cm[1].first;
+ }
+ else {
+ return cm[1].first - cm[1].second;
+ }
+ }
+ else
+ return 0;
+}
+
+emptyResult LangInfo::checkEmpty(int start, bool atStartOrigin)
{
- int depth = 0;
- int skip = 0;
- bool isempty = true;
+ emptyResult Result(true, start);
+
+ bool atStart = atStartOrigin;
+ while (start < _dataEnd) {
+ if (par[start] == '{') {
+ emptyResult inside = checkEmpty(start+1, atStart);
+ if (inside.isEmpty) {
+ if (atStart)
+ addIntervall(start, inside.lastPosition+1);
+ else
+ addIntervall(start+1,inside.lastPosition);
+ }
+ else {
+ // non empty parenthesis
+ if (atStart) {
+ addIntervall(start, start+1);
+ addIntervall(inside.lastPosition, inside.lastPosition+1);
+ }
+ }
+ Result.isEmpty &= inside.isEmpty;
+ start = inside.lastPosition+1;
+ }
+ else if (par[start] == '}') {
+ Result.lastPosition = start;
+ return(Result);
+ }
+ else if (par[start] == '\\') {
+ int check = checkMacro(par.substr(start, 20));
+ if (check > 0) {
+ // Known char,
+ start += check;
+ Result.isEmpty = false;
+ atStart = false;
+ }
+ else if (check == 0) {
+ // skip next escaped
+ // or it is \regexp{.*\endregexp{}} which counts as 1 char!
+ if (regexPossible && (par.compare(start, 8, "\\regexp{") == 0)) {
+ size_t endreg = par.find("\\endregexp{}}");
+ if (endreg > size_t(_dataEnd) - 13)
+ start = _dataEnd;
+ else
+ start = endreg + 12;
+ }
+ else
+ start += 2;
+ Result.isEmpty = false;
+ atStart = false;
+ }
+ else {
+ // Here follows maybe empty macro?
+ // discard e.g. '\noun{}', or '\noun{}{}'
+ addIntervall(start, start - check);
+ start = start - check;
+ atStart = atStartOrigin;
+ }
+ }
+ else {
+ // Normal chars
+ Result.isEmpty = false;
+ if (par[start] != ' ')
+ atStart = false;
+ else
+ atStart = atStartOrigin;
+ start += 1;
+ }
+ }
+ return Result;
+}
- size_t regex_start, regex_end;
+int LangInfo::discardParethesizedInBlock(int start)
+{
if (regexPossible) {
+ size_t regex_start, regex_end;
regex_start = par.find("\\regexp{", start);
if (regex_start == string::npos)
regexPossible = false;
regexPossible = false;
}
}
- if (!regexPossible) {
- regex_start = _dataEnd;
- regex_end = _dataEnd;
- }
- for (int i = start; i < _dataEnd; i += 1+skip) {
- char c = par[i];
- skip = 0;
- if (c == '\\') {
- if (size_t(i) == regex_start) {
- // 12 is correct, even if the length of "\\endregexp{}}" is 13
- skip = regex_end + 12 - i;
- }
- else
- skip = 1;
- isempty = false;
- }
- else if (c == '{') {
- if (depth == 0) {
- addIntervall(i, i+1);
- // cout << "discard '{' at " << i << "\n";
- }
- else
- isempty = false;
- depth++;
- }
- else if (c == '}') {
- if (depth == 1) {
- addIntervall(i, i+1);
- // cout << "discard '}' at " << i << "\n";
- }
- else if (depth < 1)
- break;
- depth--;
- }
- else
- isempty = false;
- }
- return(isempty);
+ int previous = previousNotIgnored(start-1);
+ bool atStart = (par[previous] == '{');
+ emptyResult inside = checkEmpty(start, atStart);
+ return inside.lastPosition+1;
}
bool LangInfo::discarSuperfluousParentheses(int start)
{
- start = nextNotIgored(start);
+ start = nextNotIgnored(start);
+ start = discardParethesizedInBlock(start);
while ((par[start] == '{') && (start < _dataEnd)) {
- discardParethesizedInBlock(start);
- start = nextNotIgored(start+1);
+ start = discardParethesizedInBlock(start);
}
// It is empty if (par[start] == '}')
return ((start >= _dataEnd) || (par[start] == '}'));
start = color.getEnd()+1;
else {
// Apparently nothing output so far
- start = _dataStart;
+ start = nextNotIgnored(_dataStart);
}
discarSuperfluousParentheses(start);
output(os, _dataEnd);
firstLanguage.setDataEnd(par.length());
// discard old closing
firstLanguage.addIntervall(oldend, oldend+1);
- for (int i = 1; i < firstLanguage.getEnd(); i++) {
- if (par[i] == '{')
- firstLanguage.discardParethesizedInBlock(i);
- else
- break;
- }
}
firstLanguage.process(os);
// For the case, that the first language ends unexpected
docstring docstr = stringifyFromForSearch(opt, cur, len);
string str = normalize(docstr, true);
+ if (!opt.ignoreformat) {
+ str = removefontinfo(str);
+ str = correctlanguagesetting(str, false, false);
+ }
if (str.empty()) return(-1);
LYXERR(Debug::FIND, "Matching against '" << lyx::to_utf8(docstr) << "'");
LYXERR(Debug::FIND, "After normalization: '" << str << "'");
endpos = cur.pos() + len;
TeXOnePar(buf, *cur.innerText(), cur.pit(), os, runparams,
string(), cur.pos(), endpos);
- string s = correctlanguagesetting(lyx::to_utf8(ods.str()), false, false);
+ string s = lyx::to_utf8(ods.str());
LYXERR(Debug::FIND, "Latexified +modified text: '" << s << "'");
return(lyx::from_utf8(s));
} else if (cur.inMathed()) {