src/lyxfind.cpp

   1 /**
   2  * \file lyxfind.cpp
   3  * This file is part of LyX, the document processor.
   4  * License details can be found in the file COPYING.
   5  *
   6  * \author Lars Gullik Bjønnes
   7  * \author John Levon
   8  * \author Jürgen Vigna
   9  * \author Alfredo Braunstein
  10  * \author Tommaso Cucinotta
  11  * \author Kornel Benko
  12  *
  13  * Full author contact details are available in file CREDITS.
  14  */
  15
  16 #include <config.h>
  17
  18 #include "lyxfind.h"
  19
  20 #include "Buffer.h"
  21 #include "BufferList.h"
  22 #include "BufferParams.h"
  23 #include "BufferView.h"
  24 #include "Changes.h"
  25 #include "Cursor.h"
  26 #include "CutAndPaste.h"
  27 #include "FuncRequest.h"
  28 #include "LyX.h"
  29 #include "output_latex.h"
  30 #include "OutputParams.h"
  31 #include "Paragraph.h"
  32 #include "Text.h"
  33 #include "Encoding.h"
  34 #include "Language.h"
  35
  36 #include "frontends/Application.h"
  37 #include "frontends/alert.h"
  38
  39 #include "mathed/InsetMath.h"
  40 #include "mathed/InsetMathHull.h"
  41 #include "mathed/MathData.h"
  42 #include "mathed/MathStream.h"
  43 #include "mathed/MathSupport.h"
  44
  45 #include "support/debug.h"
  46 #include "support/docstream.h"
  47 #include "support/FileName.h"
  48 #include "support/gettext.h"
  49 #include "support/lassert.h"
  50 #include "support/lstrings.h"
  51 #include "support/textutils.h"
  52
  53 #include <unordered_map>
  54 #include <regex>
  55
  56 //#define ResultsDebug
  57 #define USE_QT_FOR_SEARCH
  58 #if defined(USE_QT_FOR_SEARCH)
  59         #include <QtCore>       // sets QT_VERSION
  60         #if (QT_VERSION >= 0x050000)
  61                 #include <QRegularExpression>
  62                 #define QTSEARCH 1
  63         #else
  64                 #define QTSEARCH 0
  65         #endif
  66 #else
  67         #define QTSEARCH 0
  68 #endif
  69
  70 using namespace std;
  71 using namespace lyx::support;
  72
  73 namespace lyx {
  74
  75 typedef unordered_map<string, string> AccentsMap;
  76 typedef unordered_map<string,string>::const_iterator AccentsIterator;
  77 static AccentsMap accents = unordered_map<string, string>();
  78
  79 // Helper class for deciding what should be ignored
  80 class IgnoreFormats {
  81  public:
  82         ///
  83         IgnoreFormats() = default;
  84         ///
  85         bool getFamily() const { return ignoreFamily_; }
  86         ///
  87         bool getSeries() const { return ignoreSeries_; }
  88         ///
  89         bool getShape() const { return ignoreShape_; }
  90         ///
  91         bool getUnderline() const { return ignoreUnderline_; }
  92         ///
  93         bool getMarkUp() const { return ignoreMarkUp_; }
  94         ///
  95         bool getStrikeOut() const { return ignoreStrikeOut_; }
  96         ///
  97         bool getSectioning() const { return ignoreSectioning_; }
  98         ///
  99         bool getFrontMatter() const { return ignoreFrontMatter_; }
 100         ///
 101         bool getColor() const { return ignoreColor_; }
 102         ///
 103         bool getLanguage() const { return ignoreLanguage_; }
 104         ///
 105         bool getDeleted() const { return ignoreDeleted_; }
 106         ///
 107         void setIgnoreDeleted(bool value);
 108         ///
 109         void setIgnoreFormat(string const & type, bool value, bool fromUser = true);
 110
 111 private:
 112         ///
 113         bool ignoreFamily_ = false;
 114         ///
 115         bool ignoreSeries_ = false;
 116         ///
 117         bool ignoreShape_ = false;
 118         ///
 119         bool ignoreUnderline_ = false;
 120         ///
 121         bool ignoreMarkUp_ = false;
 122         ///
 123         bool ignoreStrikeOut_ = false;
 124         ///
 125         bool ignoreSectioning_ = false;
 126         ///
 127         bool ignoreFrontMatter_ = false;
 128         ///
 129         bool ignoreColor_ = false;
 130         ///
 131         bool ignoreLanguage_ = false;
 132         bool userSelectedIgnoreLanguage_ = false;
 133         ///
 134         bool ignoreDeleted_ = true;
 135 };
 136
 137 void IgnoreFormats::setIgnoreFormat(string const & type, bool value, bool fromUser)
 138 {
 139         if (type == "color") {
 140                 ignoreColor_ = value;
 141         }
 142         else if (type == "language") {
 143                 if (fromUser) {
 144                         userSelectedIgnoreLanguage_ = value;
 145                         ignoreLanguage_ = value;
 146                 }
 147                 else
 148                         ignoreLanguage_ = (value || userSelectedIgnoreLanguage_);
 149         }
 150         else if (type == "sectioning") {
 151                 ignoreSectioning_ = value;
 152                 ignoreFrontMatter_ = value;
 153         }
 154         else if (type == "font") {
 155                 ignoreSeries_ = value;
 156                 ignoreShape_ = value;
 157                 ignoreFamily_ = value;
 158         }
 159         else if (type == "series") {
 160                 ignoreSeries_ = value;
 161         }
 162         else if (type == "shape") {
 163                 ignoreShape_ = value;
 164         }
 165         else if (type == "family") {
 166                 ignoreFamily_ = value;
 167         }
 168         else if (type == "markup") {
 169                 ignoreMarkUp_ = value;
 170         }
 171         else if (type == "underline") {
 172                 ignoreUnderline_ = value;
 173         }
 174         else if (type == "strike") {
 175                 ignoreStrikeOut_ = value;
 176         }
 177         else if (type == "deleted") {
 178                 ignoreDeleted_ = value;
 179         }
 180 }
 181
 182 // The global variable that can be changed from outside
 183 IgnoreFormats ignoreFormats;
 184
 185
 186 void setIgnoreFormat(string const & type, bool value, bool fromUser)
 187 {
 188   ignoreFormats.setIgnoreFormat(type, value, fromUser);
 189 }
 190
 191
 192 namespace {
 193
 194 bool parse_bool(docstring & howto)
 195 {
 196         if (howto.empty())
 197                 return false;
 198         docstring var;
 199         howto = split(howto, var, ' ');
 200         return var == "1";
 201 }
 202
 203
 204 class MatchString
 205 {
 206 public:
 207         MatchString(docstring const & s, bool cs, bool mw)
 208                 : str(s), case_sens(cs), whole_words(mw)
 209         {}
 210
 211         // returns true if the specified string is at the specified position
 212         // del specifies whether deleted strings in ct mode will be considered
 213         int operator()(Paragraph const & par, pos_type pos, bool del = true) const
 214         {
 215                 return par.find(str, case_sens, whole_words, pos, del);
 216         }
 217
 218 private:
 219         // search string
 220         docstring str;
 221         // case sensitive
 222         bool case_sens;
 223         // match whole words only
 224         bool whole_words;
 225 };
 226
 227
 228 int findForward(DocIterator & cur, MatchString const & match,
 229                 bool find_del = true)
 230 {
 231         for (; cur; cur.forwardChar())
 232                 if (cur.inTexted()) {
 233                         int len = match(cur.paragraph(), cur.pos(), find_del);
 234                         if (len > 0)
 235                                 return len;
 236                 }
 237         return 0;
 238 }
 239
 240
 241 int findBackwards(DocIterator & cur, MatchString const & match,
 242                   bool find_del = true)
 243 {
 244         while (cur) {
 245                 cur.backwardChar();
 246                 if (cur.inTexted()) {
 247                         int len = match(cur.paragraph(), cur.pos(), find_del);
 248                         if (len > 0)
 249                                 return len;
 250                 }
 251         }
 252         return 0;
 253 }
 254
 255
 256 bool searchAllowed(docstring const & str)
 257 {
 258         if (str.empty()) {
 259                 frontend::Alert::error(_("Search error"), _("Search string is empty"));
 260                 return false;
 261         }
 262         return true;
 263 }
 264
 265 } // namespace
 266
 267
 268 bool findOne(BufferView * bv, docstring const & searchstr,
 269              bool case_sens, bool whole, bool forward,
 270              bool find_del, bool check_wrap)
 271 {
 272         if (!searchAllowed(searchstr))
 273                 return false;
 274
 275         DocIterator cur = forward
 276                 ? bv->cursor().selectionEnd()
 277                 : bv->cursor().selectionBegin();
 278
 279         MatchString const match(searchstr, case_sens, whole);
 280
 281         int match_len = forward
 282                 ? findForward(cur, match, find_del)
 283                 : findBackwards(cur, match, find_del);
 284
 285         if (match_len > 0)
 286                 bv->putSelectionAt(cur, match_len, !forward);
 287         else if (check_wrap) {
 288                 DocIterator cur_orig(bv->cursor());
 289                 docstring q;
 290                 if (forward)
 291                         q = _("End of file reached while searching forward.\n"
 292                           "Continue searching from the beginning?");
 293                 else
 294                         q = _("Beginning of file reached while searching backward.\n"
 295                           "Continue searching from the end?");
 296                 int wrap_answer = frontend::Alert::prompt(_("Wrap search?"),
 297                         q, 0, 1, _("&Yes"), _("&No"));
 298                 if (wrap_answer == 0) {
 299                         if (forward) {
 300                                 bv->cursor().clear();
 301                                 bv->cursor().push_back(CursorSlice(bv->buffer().inset()));
 302                         } else {
 303                                 bv->cursor().setCursor(doc_iterator_end(&bv->buffer()));
 304                                 bv->cursor().backwardPos();
 305                         }
 306                         bv->clearSelection();
 307                         if (findOne(bv, searchstr, case_sens, whole, forward, find_del, false))
 308                                 return true;
 309                 }
 310                 bv->cursor().setCursor(cur_orig);
 311                 return false;
 312         }
 313
 314         return match_len > 0;
 315 }
 316
 317
 318 namespace {
 319
 320 int replaceAll(BufferView * bv,
 321                docstring const & searchstr, docstring const & replacestr,
 322                bool case_sens, bool whole)
 323 {
 324         Buffer & buf = bv->buffer();
 325
 326         if (!searchAllowed(searchstr) || buf.isReadonly())
 327                 return 0;
 328
 329         DocIterator cur_orig(bv->cursor());
 330
 331         MatchString const match(searchstr, case_sens, whole);
 332         int num = 0;
 333
 334         int const rsize = replacestr.size();
 335         int const ssize = searchstr.size();
 336
 337         Cursor cur(*bv);
 338         cur.setCursor(doc_iterator_begin(&buf));
 339         int match_len = findForward(cur, match, false);
 340         while (match_len > 0) {
 341                 // Backup current cursor position and font.
 342                 pos_type const pos = cur.pos();
 343                 Font const font = cur.paragraph().getFontSettings(buf.params(), pos);
 344                 cur.recordUndo();
 345                 int striked = ssize -
 346                         cur.paragraph().eraseChars(pos, pos + match_len,
 347                                                    buf.params().track_changes);
 348                 cur.paragraph().insert(pos, replacestr, font,
 349                                        Change(buf.params().track_changes
 350                                               ? Change::INSERTED
 351                                               : Change::UNCHANGED));
 352                 for (int i = 0; i < rsize + striked; ++i)
 353                         cur.forwardChar();
 354                 ++num;
 355                 match_len = findForward(cur, match, false);
 356         }
 357
 358         bv->putSelectionAt(doc_iterator_begin(&buf), 0, false);
 359
 360         cur_orig.fixIfBroken();
 361         bv->setCursor(cur_orig);
 362
 363         return num;
 364 }
 365
 366
 367 // the idea here is that we are going to replace the string that
 368 // is selected IF it is the search string.
 369 // if there is a selection, but it is not the search string, then
 370 // we basically ignore it. (FIXME We ought to replace only within
 371 // the selection.)
 372 // if there is no selection, then:
 373 //  (i) if some search string has been provided, then we find it.
 374 //      (think of how the dialog works when you hit "replace" the
 375 //      first time.)
 376 // (ii) if no search string has been provided, then we treat the
 377 //      word the cursor is in as the search string. (why? i have no
 378 //      idea.) but this only works in text?
 379 //
 380 // returns the number of replacements made (one, if any) and
 381 // whether anything at all was done.
 382 pair<bool, int> replaceOne(BufferView * bv, docstring searchstr,
 383                            docstring const & replacestr, bool case_sens,
 384                            bool whole, bool forward, bool findnext)
 385 {
 386         Cursor & cur = bv->cursor();
 387         if (!cur.selection()) {
 388                 // no selection, non-empty search string: find it
 389                 if (!searchstr.empty()) {
 390                         bool const found = findOne(bv, searchstr, case_sens, whole, forward, true, findnext);
 391                         return make_pair(found, 0);
 392                 }
 393                 // empty search string
 394                 if (!cur.inTexted())
 395                         // bail in math
 396                         return make_pair(false, 0);
 397                 // select current word and treat it as the search string.
 398                 // This causes a minor bug as undo will restore this selection,
 399                 // which the user did not create (#8986).
 400                 cur.innerText()->selectWord(cur, WHOLE_WORD);
 401                 searchstr = cur.selectionAsString(false, true);
 402         }
 403
 404         // if we still don't have a search string, report the error
 405         // and abort.
 406         if (!searchAllowed(searchstr))
 407                 return make_pair(false, 0);
 408
 409         bool have_selection = cur.selection();
 410         docstring const selected = cur.selectionAsString(false, true);
 411         bool match =
 412                 case_sens
 413                 ? searchstr == selected
 414                 : compare_no_case(searchstr, selected) == 0;
 415
 416         // no selection or current selection is not search word:
 417         // just find the search word
 418         if (!have_selection || !match) {
 419                 bool const found = findOne(bv, searchstr, case_sens, whole, forward, true, findnext);
 420                 return make_pair(found, 0);
 421         }
 422
 423         // we're now actually ready to replace. if the buffer is
 424         // read-only, we can't, though.
 425         if (bv->buffer().isReadonly())
 426                 return make_pair(false, 0);
 427
 428         cap::replaceSelectionWithString(cur, replacestr);
 429         if (forward) {
 430                 cur.pos() += replacestr.length();
 431                 LASSERT(cur.pos() <= cur.lastpos(),
 432                         cur.pos() = cur.lastpos());
 433         }
 434         if (findnext)
 435                 findOne(bv, searchstr, case_sens, whole, forward, false, findnext);
 436
 437         return make_pair(true, 1);
 438 }
 439
 440 } // namespace
 441
 442
 443 docstring const find2string(docstring const & search,
 444                             bool casesensitive, bool matchword, bool forward)
 445 {
 446         odocstringstream ss;
 447         ss << search << '\n'
 448            << int(casesensitive) << ' '
 449            << int(matchword) << ' '
 450            << int(forward);
 451         return ss.str();
 452 }
 453
 454
 455 docstring const replace2string(docstring const & replace,
 456                                docstring const & search,
 457                                bool casesensitive, bool matchword,
 458                                bool all, bool forward, bool findnext)
 459 {
 460         odocstringstream ss;
 461         ss << replace << '\n'
 462            << search << '\n'
 463            << int(casesensitive) << ' '
 464            << int(matchword) << ' '
 465            << int(all) << ' '
 466            << int(forward) << ' '
 467            << int(findnext);
 468         return ss.str();
 469 }
 470
 471
 472 docstring const string2find(docstring const & argument,
 473                               bool &casesensitive,
 474                               bool &matchword,
 475                               bool &forward)
 476 {
 477         // data is of the form
 478         // "<search>
 479         //  <casesensitive> <matchword> <forward>"
 480         docstring search;
 481         docstring howto = split(argument, search, '\n');
 482
 483         casesensitive = parse_bool(howto);
 484         matchword     = parse_bool(howto);
 485         forward       = parse_bool(howto);
 486
 487         return search;
 488 }
 489
 490
 491 bool lyxfind(BufferView * bv, FuncRequest const & ev)
 492 {
 493         if (!bv || ev.action() != LFUN_WORD_FIND)
 494                 return false;
 495
 496         //lyxerr << "find called, cmd: " << ev << endl;
 497         bool casesensitive;
 498         bool matchword;
 499         bool forward;
 500         docstring search = string2find(ev.argument(), casesensitive, matchword, forward);
 501
 502         return findOne(bv, search, casesensitive, matchword, forward, false, true);
 503 }
 504
 505
 506 bool lyxreplace(BufferView * bv, FuncRequest const & ev)
 507 {
 508         if (!bv || ev.action() != LFUN_WORD_REPLACE)
 509                 return false;
 510
 511         // data is of the form
 512         // "<search>
 513         //  <replace>
 514         //  <casesensitive> <matchword> <all> <forward> <findnext>"
 515         docstring search;
 516         docstring rplc;
 517         docstring howto = split(ev.argument(), rplc, '\n');
 518         howto = split(howto, search, '\n');
 519
 520         bool casesensitive = parse_bool(howto);
 521         bool matchword     = parse_bool(howto);
 522         bool all           = parse_bool(howto);
 523         bool forward       = parse_bool(howto);
 524         bool findnext      = howto.empty() ? true : parse_bool(howto);
 525
 526         bool update = false;
 527
 528         int replace_count = 0;
 529         if (all) {
 530                 replace_count = replaceAll(bv, search, rplc, casesensitive, matchword);
 531                 update = replace_count > 0;
 532         } else {
 533                 pair<bool, int> rv =
 534                         replaceOne(bv, search, rplc, casesensitive, matchword, forward, findnext);
 535                 update = rv.first;
 536                 replace_count = rv.second;
 537         }
 538
 539         Buffer const & buf = bv->buffer();
 540         if (!update) {
 541                 // emit message signal.
 542                 buf.message(_("String not found."));
 543         } else {
 544                 if (replace_count == 0) {
 545                         buf.message(_("String found."));
 546                 } else if (replace_count == 1) {
 547                         buf.message(_("String has been replaced."));
 548                 } else {
 549                         docstring const str =
 550                                 bformat(_("%1$d strings have been replaced."), replace_count);
 551                         buf.message(str);
 552                 }
 553         }
 554         return update;
 555 }
 556
 557
 558 bool findNextChange(BufferView * bv, Cursor & cur, bool const check_wrap)
 559 {
 560         for (; cur; cur.forwardPos())
 561                 if (cur.inTexted() && cur.paragraph().isChanged(cur.pos()))
 562                         return true;
 563
 564         if (check_wrap) {
 565                 DocIterator cur_orig(bv->cursor());
 566                 docstring q = _("End of file reached while searching forward.\n"
 567                           "Continue searching from the beginning?");
 568                 int wrap_answer = frontend::Alert::prompt(_("Wrap search?"),
 569                         q, 0, 1, _("&Yes"), _("&No"));
 570                 if (wrap_answer == 0) {
 571                         bv->cursor().clear();
 572                         bv->cursor().push_back(CursorSlice(bv->buffer().inset()));
 573                         bv->clearSelection();
 574                         cur.setCursor(bv->cursor().selectionBegin());
 575                         if (findNextChange(bv, cur, false))
 576                                 return true;
 577                 }
 578                 bv->cursor().setCursor(cur_orig);
 579         }
 580
 581         return false;
 582 }
 583
 584
 585 bool findPreviousChange(BufferView * bv, Cursor & cur, bool const check_wrap)
 586 {
 587         for (cur.backwardPos(); cur; cur.backwardPos()) {
 588                 if (cur.inTexted() && cur.paragraph().isChanged(cur.pos()))
 589                         return true;
 590         }
 591
 592         if (check_wrap) {
 593                 DocIterator cur_orig(bv->cursor());
 594                 docstring q = _("Beginning of file reached while searching backward.\n"
 595                           "Continue searching from the end?");
 596                 int wrap_answer = frontend::Alert::prompt(_("Wrap search?"),
 597                         q, 0, 1, _("&Yes"), _("&No"));
 598                 if (wrap_answer == 0) {
 599                         bv->cursor().setCursor(doc_iterator_end(&bv->buffer()));
 600                         bv->cursor().backwardPos();
 601                         bv->clearSelection();
 602                         cur.setCursor(bv->cursor().selectionBegin());
 603                         if (findPreviousChange(bv, cur, false))
 604                                 return true;
 605                 }
 606                 bv->cursor().setCursor(cur_orig);
 607         }
 608
 609         return false;
 610 }
 611
 612
 613 bool selectChange(Cursor & cur, bool forward)
 614 {
 615         if (!cur.inTexted() || !cur.paragraph().isChanged(cur.pos()))
 616                 return false;
 617         Change ch = cur.paragraph().lookupChange(cur.pos());
 618
 619         CursorSlice tip1 = cur.top();
 620         for (; tip1.pit() < tip1.lastpit() || tip1.pos() < tip1.lastpos(); tip1.forwardPos()) {
 621                 Change ch2 = tip1.paragraph().lookupChange(tip1.pos());
 622                 if (!ch2.isSimilarTo(ch))
 623                         break;
 624         }
 625         CursorSlice tip2 = cur.top();
 626         for (; tip2.pit() > 0 || tip2.pos() > 0;) {
 627                 tip2.backwardPos();
 628                 Change ch2 = tip2.paragraph().lookupChange(tip2.pos());
 629                 if (!ch2.isSimilarTo(ch)) {
 630                         // take a step forward to correctly set the selection
 631                         tip2.forwardPos();
 632                         break;
 633                 }
 634         }
 635         if (forward)
 636                 swap(tip1, tip2);
 637         cur.top() = tip1;
 638         cur.bv().mouseSetCursor(cur, false);
 639         cur.top() = tip2;
 640         cur.bv().mouseSetCursor(cur, true);
 641         return true;
 642 }
 643
 644
 645 namespace {
 646
 647
 648 bool findChange(BufferView * bv, bool forward)
 649 {
 650         Cursor cur(*bv);
 651         cur.setCursor(forward ? bv->cursor().selectionEnd()
 652                       : bv->cursor().selectionBegin());
 653         forward ? findNextChange(bv, cur, true) : findPreviousChange(bv, cur, true);
 654         return selectChange(cur, forward);
 655 }
 656
 657 } // namespace
 658
 659 bool findNextChange(BufferView * bv)
 660 {
 661         return findChange(bv, true);
 662 }
 663
 664
 665 bool findPreviousChange(BufferView * bv)
 666 {
 667         return findChange(bv, false);
 668 }
 669
 670
 671
 672 namespace {
 673
 674 typedef vector<pair<string, string> > Escapes;
 675
 676 string string2regex(string in)
 677 {
 678         static std::regex specialChars { R"([-[\]{}()*+?.,\^$|#\s\$\\])" };
 679         string temp = std::regex_replace(in, specialChars,  R"(\$&)" );
 680         string temp2("");
 681         size_t lastpos = 0;
 682         size_t fl_pos = 0;
 683         int offset = 1;
 684         while (fl_pos < temp.size()) {
 685                 fl_pos = temp.find("\\\\foreignlanguage", lastpos + offset);
 686                 if (fl_pos == string::npos)
 687                         break;
 688                 offset = 16;
 689                 temp2 += temp.substr(lastpos, fl_pos - lastpos);
 690                 temp2 += "\\n";
 691                 lastpos = fl_pos;
 692         }
 693         if (lastpos == 0)
 694                 return(temp);
 695         if (lastpos < temp.size()) {
 696                 temp2 += temp.substr(lastpos, temp.size() - lastpos);
 697         }
 698         return temp2;
 699 }
 700
 701 string correctRegex(string t, bool withformat)
 702 {
 703         /* Convert \backslash => \
 704          * and \{, \}, \[, \] => {, }, [, ]
 705          */
 706         string s("");
 707         regex wordre("(\\\\)*(\\\\((backslash|mathcircumflex) ?|[\\[\\]\\{\\}]))");
 708         size_t lastpos = 0;
 709         smatch sub;
 710         bool backslashed = false;
 711         for (sregex_iterator it(t.begin(), t.end(), wordre), end; it != end; ++it) {
 712                 sub = *it;
 713                 string replace;
 714                 if ((sub.position(2) - sub.position(0)) % 2 == 1) {
 715                         continue;
 716                 }
 717                 else {
 718                         if (sub.str(4) == "backslash") {
 719                                 replace = "\\";
 720                                 if (withformat) {
 721                                         // transforms '\backslash \{' into '\{'
 722                                         // and '\{' into '{'
 723                                         string next = t.substr(sub.position(2) + sub.str(2).length(), 2);
 724                                         if ((next == "\\{") || (next == "\\}")) {
 725                                                 replace = "";
 726                                                 backslashed = true;
 727                                         }
 728                                 }
 729                         }
 730                         else if (sub.str(4) == "mathcircumflex")
 731                                 replace = "^";
 732                         else if (backslashed) {
 733                                 backslashed = false;
 734                                 if (withformat && (sub.str(3) == "{"))
 735                                         replace = accents["braceleft"];
 736                                 else if (withformat && (sub.str(3) == "}"))
 737                                         replace = accents["braceright"];
 738                                 else {
 739                                         // else part should not exist
 740                                         LASSERT(1, /**/);
 741                                 }
 742                         }
 743                         else
 744                                 replace = sub.str(3);
 745                 }
 746                 if (lastpos < (size_t) sub.position(2))
 747                         s += t.substr(lastpos, sub.position(2) - lastpos);
 748                 s += replace;
 749                 lastpos = sub.position(2) + sub.length(2);
 750         }
 751         if (lastpos == 0)
 752                 return t;
 753         else if (lastpos < t.length())
 754                 s += t.substr(lastpos, t.length() - lastpos);
 755         return s;
 756 }
 757
 758 /// Within \regexp{} apply get_lyx_unescapes() only (i.e., preserve regexp semantics of the string),
 759 /// while outside apply get_lyx_unescapes()+get_regexp_escapes().
 760 /// If match_latex is true, then apply regexp_latex_escapes() to \regexp{} contents as well.
 761 string escape_for_regex(string s, bool withformat)
 762 {
 763         size_t lastpos = 0;
 764         string result = "";
 765         while (lastpos < s.size()) {
 766                 size_t regex_pos = s.find("\\regexp{", lastpos);
 767                 if (regex_pos == string::npos) {
 768                         regex_pos = s.size();
 769                 }
 770                 if (regex_pos > lastpos) {
 771                         result += string2regex(s.substr(lastpos, regex_pos-lastpos));
 772                         lastpos = regex_pos;
 773                         if (lastpos == s.size())
 774                                 break;
 775                 }
 776                 size_t end_pos = s.find("\\endregexp{}}", regex_pos + 8);
 777                 result += correctRegex(s.substr(regex_pos + 8, end_pos -(regex_pos + 8)), withformat);
 778                 lastpos = end_pos + 13;
 779         }
 780         return result;
 781 }
 782
 783
 784 /// Wrapper for lyx::regex_replace with simpler interface
 785 bool regex_replace(string const & s, string & t, string const & searchstr,
 786                    string const & replacestr)
 787 {
 788         regex e(searchstr, regex_constants::ECMAScript);
 789         ostringstream oss;
 790         ostream_iterator<char, char> it(oss);
 791         regex_replace(it, s.begin(), s.end(), e, replacestr);
 792         // tolerate t and s be references to the same variable
 793         bool rv = (s != oss.str());
 794         t = oss.str();
 795         return rv;
 796 }
 797
 798 class MatchResult {
 799 public:
 800         enum range {
 801                 newIsTooFar,
 802                 newIsBetter,
 803                 newIsInvalid
 804         };
 805         int match_len;
 806         int match_prefix;
 807         int match2end;
 808         int pos;
 809         int leadsize;
 810         int pos_len;
 811         int searched_size;
 812         vector <string> result = vector <string>();
 813         MatchResult(int len = 0): match_len(len),match_prefix(0),match2end(0), pos(0),leadsize(0),pos_len(-1),searched_size(0) {};
 814 };
 815
 816 static MatchResult::range interpretMatch(MatchResult &oldres, MatchResult &newres)
 817 {
 818   if (newres.match2end < oldres.match2end)
 819     return MatchResult::newIsTooFar;
 820   if (newres.match_len < oldres.match_len)
 821     return MatchResult::newIsTooFar;
 822
 823   if (newres.match_len == oldres.match_len) {
 824     if (newres.match2end == oldres.match2end)
 825       return MatchResult::newIsBetter;
 826   }
 827   return MatchResult::newIsInvalid;
 828 }
 829
 830 /** The class performing a match between a position in the document and the FindAdvOptions.
 831  **/
 832
 833 class MatchStringAdv {
 834 public:
 835         MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions & opt);
 836
 837         /** Tests if text starting at the supplied position matches with the one provided to the MatchStringAdv
 838          ** constructor as opt.search, under the opt.* options settings.
 839          **
 840          ** @param at_begin
 841          **     If set, then match is searched only against beginning of text starting at cur.
 842          **     If unset, then match is searched anywhere in text starting at cur.
 843          **
 844          ** @return
 845          ** The length of the matching text, or zero if no match was found.
 846          **/
 847         MatchResult operator()(DocIterator const & cur, int len = -1, bool at_begin = true) const;
 848 #if QTSEARCH
 849         bool regexIsValid;
 850         string regexError;
 851 #endif
 852
 853 public:
 854         /// buffer
 855         lyx::Buffer * p_buf;
 856         /// first buffer on which search was started
 857         lyx::Buffer * const p_first_buf;
 858         /// options
 859         FindAndReplaceOptions const & opt;
 860
 861 private:
 862         /// Auxiliary find method (does not account for opt.matchword)
 863         MatchResult findAux(DocIterator const & cur, int len = -1, bool at_begin = true) const;
 864         void CreateRegexp(FindAndReplaceOptions const & opt, string regexp_str, string regexp2_str, string par_as_string = "");
 865
 866         /** Normalize a stringified or latexified LyX paragraph.
 867          **
 868          ** Normalize means:
 869          ** <ul>
 870          **   <li>if search is not casesensitive, then lowercase the string;
 871          **   <li>remove any newline at begin or end of the string;
 872          **   <li>replace any newline in the middle of the string with a simple space;
 873          **   <li>remove stale empty styles and environments, like \emph{} and \textbf{}.
 874          ** </ul>
 875          **
 876          ** @todo Normalization should also expand macros, if the corresponding
 877          ** search option was checked.
 878          **/
 879         string normalize(docstring const & s) const;
 880         // normalized string to search
 881         string par_as_string;
 882         // regular expression to use for searching
 883         // regexp2 is same as regexp, but prefixed with a ".*?"
 884 #if QTSEARCH
 885         QRegularExpression regexp;
 886         QRegularExpression regexp2;
 887 #else
 888         regex regexp;
 889         regex regexp2;
 890 #endif
 891         // leading format material as string
 892         string lead_as_string;
 893         // par_as_string after removal of lead_as_string
 894         string par_as_string_nolead;
 895         // unmatched open braces in the search string/regexp
 896         int open_braces;
 897         // number of (.*?) subexpressions added at end of search regexp for closing
 898         // environments, math mode, styles, etc...
 899         int close_wildcards;
 900 public:
 901         // Are we searching with regular expressions ?
 902         bool use_regexp;
 903         static int valid_matches;
 904         static vector <string> matches;
 905         void FillResults(MatchResult &found_mr);
 906 };
 907
 908 int MatchStringAdv::valid_matches = 0;
 909 vector <string> MatchStringAdv::matches = vector <string>(10);
 910
 911 void MatchStringAdv::FillResults(MatchResult &found_mr)
 912 {
 913   if (found_mr.match_len > 0) {
 914     valid_matches = found_mr.result.size();
 915     for (size_t i = 0; i < found_mr.result.size(); i++)
 916       matches[i] = found_mr.result[i];
 917   }
 918   else
 919     valid_matches = 0;
 920 }
 921
 922 static docstring buffer_to_latex(Buffer & buffer)
 923 {
 924         //OutputParams runparams(&buffer.params().encoding());
 925         OutputParams runparams(encodings.fromLyXName("utf8"));
 926         odocstringstream ods;
 927         otexstream os(ods);
 928         runparams.nice = true;
 929         runparams.flavor = Flavor::XeTeX;
 930         runparams.linelen = 10000; //lyxrc.plaintext_linelen;
 931         // No side effect of file copying and image conversion
 932         runparams.dryrun = true;
 933         if (ignoreFormats.getDeleted())
 934                 runparams.for_searchAdv = OutputParams::SearchWithoutDeleted;
 935         else
 936                 runparams.for_searchAdv = OutputParams::SearchWithDeleted;
 937         pit_type const endpit = buffer.paragraphs().size();
 938         for (pit_type pit = 0; pit != endpit; ++pit) {
 939                 TeXOnePar(buffer, buffer.text(), pit, os, runparams);
 940                 LYXERR(Debug::FIND, "searchString up to here: " << ods.str());
 941         }
 942         return ods.str();
 943 }
 944
 945
 946 static docstring stringifySearchBuffer(Buffer & buffer, FindAndReplaceOptions const & opt)
 947 {
 948         docstring str;
 949         if (!opt.ignoreformat) {
 950                 str = buffer_to_latex(buffer);
 951         } else {
 952                 // OutputParams runparams(&buffer.params().encoding());
 953                 OutputParams runparams(encodings.fromLyXName("utf8"));
 954                 runparams.nice = true;
 955                 runparams.flavor = Flavor::XeTeX;
 956                 runparams.linelen = 10000; //lyxrc.plaintext_linelen;
 957                 runparams.dryrun = true;
 958                 int option = AS_STR_INSETS |AS_STR_PLAINTEXT;
 959                 if (ignoreFormats.getDeleted()) {
 960                         option |= AS_STR_SKIPDELETE;
 961                         runparams.for_searchAdv = OutputParams::SearchWithoutDeleted;
 962                 }
 963                 else {
 964                         runparams.for_searchAdv = OutputParams::SearchWithDeleted;
 965                 }
 966                 for (pos_type pit = pos_type(0); pit < (pos_type)buffer.paragraphs().size(); ++pit) {
 967                         Paragraph const & par = buffer.paragraphs().at(pit);
 968                         LYXERR(Debug::FIND, "Adding to search string: '"
 969                                << par.asString(pos_type(0), par.size(),
 970                                                option,
 971                                                &runparams)
 972                                << "'");
 973                         str += par.asString(pos_type(0), par.size(),
 974                                             option,
 975                                             &runparams);
 976                 }
 977                 // Even in ignore-format we have to remove "\text{}, \lyxmathsym{}" parts
 978                 string t = to_utf8(str);
 979                 while (regex_replace(t, t, "\\\\(text|lyxmathsym|ensuremath)\\{([^\\}]*)\\}", "$2"));
 980                 str = from_utf8(t);
 981         }
 982         return str;
 983 }
 984
 985
 986 /// Return separation pos between the leading material and the rest
 987 static size_t identifyLeading(string const & s)
 988 {
 989         string t = s;
 990         // @TODO Support \item[text]
 991         // Kornel: Added textsl, textsf, textit, texttt and noun
 992         // + allow to search for colored text too
 993         while (regex_replace(t, t, "^\\\\(("
 994                              "(author|title|subtitle|subject|publishers|dedication|uppertitleback|lowertitleback|extratitle|"
 995                                "lyxaddress|lyxrightaddress|"
 996                                "footnotesize|tiny|scriptsize|small|large|Large|LARGE|huge|Huge|"
 997                                "emph|noun|minisec|text(bf|md|sl|sf|it|tt))|"
 998                              "((textcolor|foreignlanguage|latexenvironment)\\{[a-z]+\\*?\\})|"
 999                              "(u|uu)line|(s|x)out|uwave)|((sub)?(((sub)?section)|paragraph)|part|chapter)\\*?)\\{", "")
1000                || regex_replace(t, t, "^\\$", "")
1001                || regex_replace(t, t, "^\\\\\\[", "")
1002                || regex_replace(t, t, "^ ?\\\\item\\{[a-z]+\\}", "")
1003                || regex_replace(t, t, "^\\\\begin\\{[a-zA-Z_]*\\*?\\}", ""))
1004                ;
1005         LYXERR(Debug::FIND, "  after removing leading $, \\[ , \\emph{, \\textbf{, etc.: '" << t << "'");
1006         return s.find(t);
1007 }
1008
1009 /*
1010  * Given a latexified string, retrieve some handled features
1011  * The features of the regex will later be compared with the features
1012  * of the searched text. If the regex features are not a
1013  * subset of the analized, then, in not format ignoring search
1014  * we can early stop the search in the relevant inset.
1015  */
1016 typedef map<string, bool> Features;
1017
1018 static Features identifyFeatures(string const & s)
1019 {
1020         static regex const feature("\\\\(([a-zA-Z]+(\\{([a-z]+\\*?)\\}|\\*)?))\\{");
1021         static regex const valid("^("
1022                 "("
1023                         "(footnotesize|tiny|scriptsize|small|large|Large|LARGE|huge|Huge|"
1024                                 "emph|noun|text(bf|md|sl|sf|it|tt)|"
1025                                 "(textcolor|foreignlanguage|item|listitem|latexenvironment)\\{[a-z]+\\*?\\})|"
1026                         "(u|uu)line|(s|x)out|uwave|"
1027                         "(sub|extra)?title|author|subject|publishers|dedication|(upper|lower)titleback|lyx(right)?address)|"
1028                 "((sub)?(((sub)?section)|paragraph)|part|chapter|lyxslide)\\*?)$");
1029         smatch sub;
1030         bool displ = true;
1031         Features info;
1032
1033         for (sregex_iterator it(s.begin(), s.end(), feature), end; it != end; ++it) {
1034                 sub = *it;
1035                 if (displ) {
1036                         if (sub.str(1).compare("regexp") == 0) {
1037                                 displ = false;
1038                                 continue;
1039                         }
1040                         string token = sub.str(1);
1041                         smatch sub2;
1042                         if (regex_match(token, sub2, valid)) {
1043                                 info[token] = true;
1044                         }
1045                         else {
1046                                 // ignore
1047                         }
1048                 }
1049                 else {
1050                         if (sub.str(1).compare("endregexp") == 0) {
1051                                 displ = true;
1052                                 continue;
1053                         }
1054                 }
1055         }
1056         return info;
1057 }
1058
1059 /*
1060  * defines values features of a key "\\[a-z]+{"
1061  */
1062 class KeyInfo {
1063  public:
1064   enum KeyType {
1065     /* Char type with content discarded
1066      * like \hspace{1cm} */
1067     noContent,
1068     /* Char, like \backslash */
1069     isChar,
1070     /* replace starting backslash with '#' */
1071     isText,
1072     /* \part, \section*, ... */
1073     isSectioning,
1074     /* title, author etc */
1075     isTitle,
1076     /* \foreignlanguage{ngerman}, ... */
1077     isMain,
1078     /* inside \code{}
1079      * to discard language in content */
1080     noMain,
1081     isRegex,
1082     /* \begin{eqnarray}...\end{eqnarray}, ... $...$ */
1083     isMath,
1084     /* fonts, colors, markups, ... */
1085     isStandard,
1086     /* footnotesize, ... large, ...
1087      * Ignore all of them */
1088     isSize,
1089     invalid,
1090     /* inputencoding, ...
1091      * Discard also content, because they do not help in search */
1092     doRemove,
1093     /* twocolumns, ...
1094      * like remove, but also all arguments */
1095     removeWithArg,
1096     /* item, listitem */
1097     isList,
1098     /* tex, latex, ... like isChar */
1099     isIgnored,
1100     /* like \lettrine[lines=5]{}{} */
1101     cleanToStart,
1102     // like isStandard, but always remove head
1103     headRemove,
1104     /* End of arguments marker for lettrine,
1105      * so that they can be ignored */
1106     endArguments
1107   };
1108  KeyInfo() = default;
1109  KeyInfo(KeyType type, int parcount, bool disable)
1110    : keytype(type),
1111     parenthesiscount(parcount),
1112     disabled(disable) {}
1113   KeyType keytype = invalid;
1114   string head;
1115   int _tokensize = -1;
1116   int _tokenstart = -1;
1117   int _dataStart = -1;
1118   int _dataEnd = -1;
1119   int parenthesiscount = 1;
1120   bool disabled = false;
1121   bool used = false;                    /* by pattern */
1122 };
1123
1124 class Border {
1125  public:
1126  Border(int l=0, int u=0) : low(l), upper(u) {};
1127   int low;
1128   int upper;
1129 };
1130
1131 #define MAXOPENED 30
1132 class Intervall {
1133   bool isPatternString_;
1134 public:
1135   explicit Intervall(bool isPattern, string const & p) :
1136         isPatternString_(isPattern), par(p), ignoreidx(-1), actualdeptindex(0),
1137         hasTitle(false), langcount(0)
1138   {
1139     depts[0] = 0;
1140     closes[0] = 0;
1141   }
1142
1143   string par;
1144   int ignoreidx;
1145   static vector<Border> borders;
1146   int depts[MAXOPENED];
1147   int closes[MAXOPENED];
1148   int actualdeptindex;
1149   int previousNotIgnored(int) const;
1150   int nextNotIgnored(int) const;
1151   void handleOpenP(int i);
1152   void handleCloseP(int i, bool closingAllowed);
1153   void resetOpenedP(int openPos);
1154   void addIntervall(int upper);
1155   void addIntervall(int low, int upper); /* if explicit */
1156   void removeAccents();
1157   void setForDefaultLang(KeyInfo const & defLang) const;
1158   int findclosing(int start, int end, char up, char down, int repeat);
1159   void handleParentheses(int lastpos, bool closingAllowed);
1160   bool hasTitle;
1161   int langcount;        // Number of disabled language specs up to current position in actual interval
1162   int isOpeningPar(int pos) const;
1163   string titleValue;
1164   void output(ostringstream &os, int lastpos);
1165   // string show(int lastpos);
1166 };
1167
1168 vector<Border> Intervall::borders = vector<Border>(30);
1169
1170 int Intervall::isOpeningPar(int pos) const
1171 {
1172   if ((pos < 0) || (size_t(pos) >= par.size()))
1173     return 0;
1174   if (par[pos] != '{')
1175     return 0;
1176   if (size_t(pos) + 2 >= par.size())
1177     return 1;
1178   if (par[pos+2] != '}')
1179     return 1;
1180   if (par[pos+1] == '[' || par[pos+1] == ']')
1181     return 3;
1182   return 1;
1183 }
1184
1185 void Intervall::setForDefaultLang(KeyInfo const & defLang) const
1186 {
1187   // Enable the use of first token again
1188   if (ignoreidx >= 0) {
1189     int value = defLang._tokenstart + defLang._tokensize;
1190     int borderidx = 0;
1191     if (hasTitle) {
1192       borderidx = 1;
1193     }
1194     if (value > 0) {
1195       if (borders[borderidx].low < value)
1196         borders[borderidx].low = value;
1197       if (borders[borderidx].upper < value)
1198         borders[borderidx].upper = value;
1199     }
1200   }
1201 }
1202
1203 static void checkDepthIndex(int val)
1204 {
1205   static int maxdepthidx = MAXOPENED-2;
1206   static int lastmaxdepth = 0;
1207   if (val > lastmaxdepth) {
1208     LYXERR(Debug::INFO, "Depth reached " << val);
1209     lastmaxdepth = val;
1210   }
1211   if (val > maxdepthidx) {
1212     maxdepthidx = val;
1213     LYXERR(Debug::INFO, "maxdepthidx now " << val);
1214   }
1215 }
1216
1217 #if 0
1218 // Not needed, because borders are now dynamically expanded
1219 static void checkIgnoreIdx(int val)
1220 {
1221   static int lastmaxignore = -1;
1222   if ((lastmaxignore < val) && (size_t(val+1) >= borders.size())) {
1223     LYXERR(Debug::INFO, "IgnoreIdx reached " << val);
1224     lastmaxignore = val;
1225   }
1226 }
1227 #endif
1228
1229 /*
1230  * Expand the region of ignored parts of the input latex string
1231  * The region is only relevant in output()
1232  */
1233 void Intervall::addIntervall(int low, int upper)
1234 {
1235   int idx;
1236   if (low == upper) return;
1237   for (idx = ignoreidx+1; idx > 0; --idx) {
1238     if (low > borders[idx-1].upper) {
1239       break;
1240     }
1241   }
1242   Border br(low, upper);
1243   if (idx > ignoreidx) {
1244     if (borders.size() <= size_t(idx)) {
1245       borders.push_back(br);
1246     }
1247     else {
1248       borders[idx] = br;
1249     }
1250     ignoreidx = idx;
1251     // checkIgnoreIdx(ignoreidx);
1252     return;
1253   }
1254   else {
1255     // Expand only if one of the new bound is inside the interwall
1256     // We know here that br.low > borders[idx-1].upper
1257     if (br.upper < borders[idx].low) {
1258       // We have to insert at this pos
1259       if (size_t(ignoreidx+1) >= borders.size()) {
1260         borders.push_back(borders[ignoreidx]);
1261       }
1262       else {
1263         borders[ignoreidx+1] = borders[ignoreidx];
1264       }
1265       for (int i = ignoreidx; i > idx; --i) {
1266         borders[i] = borders[i-1];
1267       }
1268       borders[idx] = br;
1269       ignoreidx += 1;
1270       // checkIgnoreIdx(ignoreidx);
1271       return;
1272     }
1273     // Here we know, that we are overlapping
1274     if (br.low > borders[idx].low)
1275       br.low = borders[idx].low;
1276     // check what has to be concatenated
1277     int count = 0;
1278     for (int i = idx; i <= ignoreidx; i++) {
1279       if (br.upper >= borders[i].low) {
1280         count++;
1281         if (br.upper < borders[i].upper)
1282           br.upper = borders[i].upper;
1283       }
1284       else {
1285         break;
1286       }
1287     }
1288     // count should be >= 1 here
1289     borders[idx] = br;
1290     if (count > 1) {
1291       for (int i = idx + count; i <= ignoreidx; i++) {
1292         borders[i-count+1] = borders[i];
1293       }
1294       ignoreidx -= count - 1;
1295       return;
1296     }
1297   }
1298 }
1299
1300 static void buildaccent(string n, string param, string values)
1301 {
1302   stringstream s(n);
1303   string name;
1304   const char delim = '|';
1305   while (getline(s, name, delim)) {
1306     size_t start = 0;
1307     for (char c : param) {
1308       string key = name + "{" + c + "}";
1309       // get the corresponding utf8-value
1310       if ((values[start] & 0xc0) != 0xc0) {
1311         // should not happen, utf8 encoding starts at least with 11xxxxxx
1312         // but value for '\dot{i}' is 'i', which is ascii
1313         if ((values[start] & 0x80) == 0) {
1314           // is ascii
1315           accents[key] = values.substr(start, 1);
1316           // LYXERR(Debug::INFO, "" << key << "=" << accents[key]);
1317         }
1318         start++;
1319         continue;
1320       }
1321       for (int j = 1; ;j++) {
1322         if (start + j >= values.size()) {
1323           accents[key] = values.substr(start, j);
1324           start = values.size() - 1;
1325           break;
1326         }
1327         else if ((values[start+j] & 0xc0) != 0x80) {
1328           // This is the first byte of following utf8 char
1329           accents[key] = values.substr(start, j);
1330           start += j;
1331           // LYXERR(Debug::INFO, "" << key << "=" << accents[key]);
1332           break;
1333         }
1334       }
1335     }
1336   }
1337 }
1338
1339 // Helper function
1340 static string getutf8(unsigned uchar)
1341 {
1342         #define maxc 5
1343         string ret = string();
1344         char c[maxc] = {0};
1345         if (uchar <= 0x7f) {
1346                 c[maxc-1] = uchar & 0x7f;
1347         }
1348         else {
1349                 unsigned char rest = 0x40;
1350                 unsigned char first = 0x80;
1351                 int start = maxc-1;
1352                 for (int i = start; i >=0; --i) {
1353                         if (uchar < rest) {
1354                                 c[i] = first + uchar;
1355                                 break;
1356                         }
1357                         c[i] = 0x80 | (uchar &  0x3f);
1358                         uchar >>= 6;
1359                         rest >>= 1;
1360                         first >>= 1;
1361                         first |= 0x80;
1362                 }
1363         }
1364         for (int i = 0; i < maxc; i++) {
1365                 if (c[i] == 0) continue;
1366                 ret += c[i];
1367         }
1368         return(ret);
1369 }
1370
1371 static void buildAccentsMap()
1372 {
1373   accents["imath"] = "ı";
1374   accents["i"] = "ı";
1375   accents["jmath"] = "ȷ";
1376   accents["cdot"] = "·";
1377   accents["textasciicircum"] = "^";
1378   accents["mathcircumflex"] = "^";
1379   accents["sim"] = "~";
1380   accents["guillemotright"] = "»";
1381   accents["guillemotleft"] = "«";
1382   accents["hairspace"]     = getutf8(0xf0000);  // select from free unicode plane 15
1383   accents["thinspace"]     = getutf8(0xf0002);  // and used _only_ by findadv
1384   accents["negthinspace"]  = getutf8(0xf0003);  // to omit backslashed latex macros
1385   accents["medspace"]      = getutf8(0xf0004);  // See https://en.wikipedia.org/wiki/Private_Use_Areas
1386   accents["negmedspace"]   = getutf8(0xf0005);
1387   accents["thickspace"]    = getutf8(0xf0006);
1388   accents["negthickspace"] = getutf8(0xf0007);
1389   accents["lyx"]           = getutf8(0xf0010);  // Used logos
1390   accents["LyX"]           = getutf8(0xf0010);
1391   accents["tex"]           = getutf8(0xf0011);
1392   accents["TeX"]           = getutf8(0xf0011);
1393   accents["latex"]         = getutf8(0xf0012);
1394   accents["LaTeX"]         = getutf8(0xf0012);
1395   accents["latexe"]        = getutf8(0xf0013);
1396   accents["LaTeXe"]        = getutf8(0xf0013);
1397   accents["lyxarrow"]      = getutf8(0xf0020);
1398   accents["braceleft"]     = getutf8(0xf0030);
1399   accents["braceright"]    = getutf8(0xf0031);
1400   accents["backslash lyx"]           = getutf8(0xf0010);        // Used logos inserted with starting \backslash
1401   accents["backslash LyX"]           = getutf8(0xf0010);
1402   accents["backslash tex"]           = getutf8(0xf0011);
1403   accents["backslash TeX"]           = getutf8(0xf0011);
1404   accents["backslash latex"]         = getutf8(0xf0012);
1405   accents["backslash LaTeX"]         = getutf8(0xf0012);
1406   accents["backslash latexe"]        = getutf8(0xf0013);
1407   accents["backslash LaTeXe"]        = getutf8(0xf0013);
1408   accents["backslash lyxarrow"]      = getutf8(0xf0020);
1409   accents["ddot{\\imath}"] = "ï";
1410   buildaccent("ddot", "aAeEhHiIioOtuUwWxXyY",
1411                       "äÄëËḧḦïÏïöÖẗüÜẅẄẍẌÿŸ");       // umlaut
1412   buildaccent("dot|.", "aAbBcCdDeEfFGghHIimMnNoOpPrRsStTwWxXyYzZ",
1413                        "ȧȦḃḂċĊḋḊėĖḟḞĠġḣḢİİṁṀṅṄȯȮṗṖṙṘṡṠṫṪẇẆẋẊẏẎżŻ");   // dot{i} can only happen if ignoring case, but there is no lowercase of 'İ'
1414   accents["acute{\\imath}"] = "í";
1415   buildaccent("acute", "aAcCeEgGkKlLmMoOnNpPrRsSuUwWyYzZiI",
1416                        "áÁćĆéÉǵǴḱḰĺĹḿḾóÓńŃṕṔŕŔśŚúÚẃẂýÝźŹíÍ");
1417   buildaccent("dacute|H|h", "oOuU", "őŐűŰ");        // double acute
1418   buildaccent("mathring|r", "aAuUwy",
1419                             "åÅůŮẘẙ");  // ring
1420   accents["check{\\imath}"] = "ǐ";
1421   accents["check{\\jmath}"] = "ǰ";
1422   buildaccent("check|v", "cCdDaAeEiIoOuUgGkKhHlLnNrRsSTtzZ",
1423                          "čČďĎǎǍěĚǐǏǒǑǔǓǧǦǩǨȟȞľĽňŇřŘšŠŤťžŽ");   // caron
1424   accents["hat{\\imath}"] = "î";
1425   accents["hat{\\jmath}"] = "ĵ";
1426   buildaccent("hat|^", "aAcCeEgGhHiIjJoOsSuUwWyYzZ",
1427                        "âÂĉĈêÊĝĜĥĤîÎĵĴôÔŝŜûÛŵŴŷŶẑẐ");       // circ
1428   accents["bar{\\imath}"] = "ī";
1429   buildaccent("bar|=", "aAeEiIoOuUyY",
1430                        "āĀēĒīĪōŌūŪȳȲ");     // macron
1431   accents["tilde{\\imath}"] = "ĩ";
1432   buildaccent("tilde", "aAeEiInNoOuUvVyY",
1433                        "ãÃẽẼĩĨñÑõÕũŨṽṼỹỸ");       // tilde
1434   accents["breve{\\imath}"] = "ĭ";
1435   buildaccent("breve|u", "aAeEgGiIoOuU",
1436                          "ăĂĕĔğĞĭĬŏŎŭŬ");   // breve
1437   accents["grave{\\imath}"] = "ì";
1438   buildaccent("grave|`", "aAeEiIoOuUnNwWyY",
1439                          "àÀèÈìÌòÒùÙǹǸẁẀỳỲ");       // grave
1440   buildaccent("subdot|d", "BbDdHhKkLlMmNnRrSsTtVvWwZzAaEeIiOoUuYy",
1441                           "ḄḅḌḍḤḥḲḳḶḷṂṃṆṇṚṛṢṣṬṭṾṿẈẉẒẓẠạẸẹỊịỌọỤụỴỵ");        // dot below
1442   buildaccent("ogonek|k", "AaEeIiUuOo",
1443                           "ĄąĘęĮįŲųǪǫ");      // ogonek
1444   buildaccent("cedilla|c", "CcGgKkLlNnRrSsTtEeDdHh",
1445                            "ÇçĢģĶķĻļŅņŖŗŞşŢţȨȩḐḑḨḩ"); // cedilla
1446   buildaccent("subring|textsubring", "Aa",
1447                                      "Ḁḁ"); // subring
1448   buildaccent("subhat|textsubcircum", "DdEeLlNnTtUu",
1449                                       "ḒḓḘḙḼḽṊṋṰṱṶṷ");  // subcircum
1450   buildaccent("subtilde|textsubtilde", "EeIiUu",
1451                                        "ḚḛḬḭṴṵ");   // subtilde
1452   accents["dgrave{\\imath}"] = "ȉ";
1453   accents["textdoublegrave{\\i}"] = "ȉ";
1454   buildaccent("dgrave|textdoublegrave", "AaEeIiOoRrUu",
1455                                         "ȀȁȄȅȈȉȌȍȐȑȔȕ"); // double grave
1456   accents["rcap{\\imath}"] = "ȉ";
1457   accents["textroundcap{\\i}"] = "ȉ";
1458   buildaccent("rcap|textroundcap", "AaEeIiOoRrUu",
1459                                    "ȂȃȆȇȊȋȎȏȒȓȖȗ"); // inverted breve
1460   buildaccent("slashed", "oO",
1461                          "øØ"); // slashed
1462 }
1463
1464 /*
1465  * Created accents in math or regexp environment
1466  * are macros, but we need the utf8 equivalent
1467  */
1468 void Intervall::removeAccents()
1469 {
1470   if (accents.empty())
1471     buildAccentsMap();
1472   static regex const accre("\\\\(([\\S]|grave|breve|ddot|dot|acute|dacute|mathring|check|hat|bar|tilde|subdot|ogonek|"
1473          "cedilla|subring|textsubring|subhat|textsubcircum|subtilde|textsubtilde|dgrave|textdoublegrave|rcap|textroundcap|slashed)\\{[^\\{\\}]+\\}"
1474       "|((i|imath|jmath|cdot|[a-z]+space)|((backslash )?([lL]y[xX]|[tT]e[xX]|[lL]a[tT]e[xX]e?|lyxarrow))|(brace|guillemot)(left|right)|textasciicircum|mathcircumflex|sim)(?![a-zA-Z]))");
1475   smatch sub;
1476   for (sregex_iterator itacc(par.begin(), par.end(), accre), end; itacc != end; ++itacc) {
1477     sub = *itacc;
1478     string key = sub.str(1);
1479     AccentsIterator it_ac = accents.find(key);
1480     if (it_ac != accents.end()) {
1481       string val = it_ac->second;
1482       size_t pos = sub.position(size_t(0));
1483       for (size_t i = 0; i < val.size(); i++) {
1484         par[pos+i] = val[i];
1485       }
1486       // Remove possibly following space too
1487       if (par[pos+sub.str(0).size()] == ' ')
1488         addIntervall(pos+val.size(), pos + sub.str(0).size()+1);
1489       else
1490         addIntervall(pos+val.size(), pos + sub.str(0).size());
1491       for (size_t i = pos+val.size(); i < pos + sub.str(0).size(); i++) {
1492         // remove traces of any remaining chars
1493         par[i] = ' ';
1494       }
1495     }
1496     else {
1497       LYXERR(Debug::INFO, "Not added accent for \"" << key << "\"");
1498     }
1499   }
1500 }
1501
1502 void Intervall::handleOpenP(int i)
1503 {
1504   actualdeptindex++;
1505   depts[actualdeptindex] = i+1;
1506   closes[actualdeptindex] = -1;
1507   checkDepthIndex(actualdeptindex);
1508 }
1509
1510 void Intervall::handleCloseP(int i, bool closingAllowed)
1511 {
1512   if (actualdeptindex <= 0) {
1513     if (! closingAllowed)
1514       LYXERR(Debug::FIND, "Bad closing parenthesis in latex");  /* should not happen, but the latex input may be wrong */
1515     // if we are at the very end
1516     addIntervall(i, i+1);
1517   }
1518   else {
1519     closes[actualdeptindex] = i+1;
1520     actualdeptindex--;
1521   }
1522 }
1523
1524 void Intervall::resetOpenedP(int openPos)
1525 {
1526   // Used as initializer for foreignlanguage entry
1527   actualdeptindex = 1;
1528   depts[1] = openPos+1;
1529   closes[1] = -1;
1530 }
1531
1532 int Intervall::previousNotIgnored(int start) const
1533 {
1534     int idx = 0;                          /* int intervalls */
1535     for (idx = ignoreidx; idx >= 0; --idx) {
1536       if (start > borders[idx].upper)
1537         return start;
1538       if (start >= borders[idx].low)
1539         start = borders[idx].low-1;
1540     }
1541     return start;
1542 }
1543
1544 int Intervall::nextNotIgnored(int start) const
1545 {
1546     int idx = 0;                          /* int intervalls */
1547     for (idx = 0; idx <= ignoreidx; idx++) {
1548       if (start < borders[idx].low)
1549         return start;
1550       if (start < borders[idx].upper)
1551         start = borders[idx].upper;
1552     }
1553     return start;
1554 }
1555
1556 typedef unordered_map<string, KeyInfo> KeysMap;
1557 typedef unordered_map<string, KeyInfo>::const_iterator KeysIterator;
1558 typedef vector< KeyInfo> Entries;
1559 static KeysMap keys = unordered_map<string, KeyInfo>();
1560
1561 class LatexInfo {
1562  private:
1563   int entidx_;
1564   Entries entries_;
1565   Intervall interval_;
1566   void buildKeys(bool);
1567   void buildEntries(bool);
1568   void makeKey(const string &, KeyInfo, bool isPatternString);
1569   void processRegion(int start, int region_end); /*  remove {} parts */
1570   void removeHead(KeyInfo const &, int count=0);
1571
1572  public:
1573  LatexInfo(string const & par, bool isPatternString)
1574          : entidx_(-1), interval_(isPatternString, par)
1575   {
1576     buildKeys(isPatternString);
1577     entries_ = vector<KeyInfo>();
1578     buildEntries(isPatternString);
1579   };
1580   int getFirstKey() {
1581     entidx_ = 0;
1582     if (entries_.empty()) {
1583       return -1;
1584     }
1585     if (entries_[0].keytype == KeyInfo::isTitle) {
1586       interval_.hasTitle = true;
1587       if (! entries_[0].disabled) {
1588         interval_.titleValue = entries_[0].head;
1589       }
1590       else {
1591         interval_.titleValue = "";
1592       }
1593       removeHead(entries_[0]);
1594       if (entries_.size() > 1)
1595         return 1;
1596       else
1597         return -1;
1598     }
1599     return 0;
1600   };
1601   int getNextKey() {
1602     entidx_++;
1603     if (int(entries_.size()) > entidx_) {
1604       return entidx_;
1605     }
1606     else {
1607       return -1;
1608     }
1609   };
1610   bool setNextKey(int idx) {
1611     if ((idx == entidx_) && (entidx_ >= 0)) {
1612       entidx_--;
1613       return true;
1614     }
1615     else
1616       return false;
1617   };
1618   int find(int start, KeyInfo::KeyType keytype) const {
1619     if (start < 0)
1620       return -1;
1621     int tmpIdx = start;
1622     while (tmpIdx < int(entries_.size())) {
1623       if (entries_[tmpIdx].keytype == keytype)
1624         return tmpIdx;
1625       tmpIdx++;
1626     }
1627     return -1;
1628   };
1629   int process(ostringstream & os, KeyInfo const & actual);
1630   int dispatch(ostringstream & os, int previousStart, KeyInfo & actual);
1631   // string show(int lastpos) { return interval.show(lastpos);};
1632   int nextNotIgnored(int start) { return interval_.nextNotIgnored(start);};
1633   KeyInfo &getKeyInfo(int keyinfo) {
1634     static KeyInfo invalidInfo = KeyInfo();
1635     if ((keyinfo < 0) || ( keyinfo >= int(entries_.size())))
1636       return invalidInfo;
1637     else
1638       return entries_[keyinfo];
1639   };
1640   void setForDefaultLang(KeyInfo const & defLang) {interval_.setForDefaultLang(defLang);};
1641   void addIntervall(int low, int up) { interval_.addIntervall(low, up); };
1642 };
1643
1644
1645 int Intervall::findclosing(int start, int end, char up = '{', char down = '}', int repeat = 1)
1646 {
1647   int skip = 0;
1648   int depth = 0;
1649   for (int i = start; i < end; i += 1 + skip) {
1650     char c;
1651     c = par[i];
1652     skip = 0;
1653     if (c == '\\') skip = 1;
1654     else if (c == up) {
1655       depth++;
1656     }
1657     else if (c == down) {
1658       if (depth == 0) {
1659         repeat--;
1660         if ((repeat <= 0) || (par[i+1] != up))
1661           return i;
1662       }
1663       --depth;
1664     }
1665   }
1666   return end;
1667 }
1668
1669 class MathInfo {
1670   class MathEntry {
1671   public:
1672     string wait;
1673     size_t mathEnd;
1674     size_t mathpostfixsize;
1675     size_t mathStart;
1676     size_t mathprefixsize;
1677     size_t mathSize;
1678   };
1679   size_t actualIdx_;
1680   vector<MathEntry> entries_;
1681  public:
1682   MathInfo() {
1683     actualIdx_ = 0;
1684   }
1685   void insert(string const & wait, size_t start, size_t prefixsize, size_t end, size_t postfixsize) {
1686     MathEntry m = MathEntry();
1687     m.wait = wait;
1688     m.mathStart = start;
1689     m.mathprefixsize = prefixsize;
1690     m.mathEnd = end + postfixsize;
1691     m.mathpostfixsize = postfixsize;
1692     m.mathSize = m.mathEnd - m.mathStart;
1693     entries_.push_back(m);
1694   }
1695   bool empty() const { return entries_.empty(); };
1696   size_t getEndPos() const {
1697     if (entries_.empty() || (actualIdx_ >= entries_.size())) {
1698       return 0;
1699     }
1700     return entries_[actualIdx_].mathEnd;
1701   }
1702   size_t getStartPos() const {
1703     if (entries_.empty() || (actualIdx_ >= entries_.size())) {
1704       return 100000;                    /*  definitely enough? */
1705     }
1706     return entries_[actualIdx_].mathStart;
1707   }
1708   size_t getPrefixSize() const {
1709     if (entries_.empty() || (actualIdx_ >= entries_.size())) {
1710       return 0;
1711     }
1712     return entries_[actualIdx_].mathprefixsize;
1713   }
1714   size_t getPostfixSize() const {
1715     if (entries_.empty() || (actualIdx_ >= entries_.size())) {
1716       return 0;
1717     }
1718     return entries_[actualIdx_].mathpostfixsize;
1719   }
1720   size_t getFirstPos() {
1721     actualIdx_ = 0;
1722     return getStartPos();
1723   }
1724   size_t getSize() const {
1725     if (entries_.empty() || (actualIdx_ >= entries_.size())) {
1726       return size_t(0);
1727     }
1728     return entries_[actualIdx_].mathSize;
1729   }
1730   void incrEntry() { actualIdx_++; };
1731 };
1732
1733 void LatexInfo::buildEntries(bool isPatternString)
1734 {
1735   static regex const rmath("(\\\\)*(\\$|\\\\\\[|\\\\\\]|\\\\(begin|end)\\{((eqnarray|equation|flalign|gather|multline|align|alignat)\\*?)\\})");
1736   static regex const rkeys("(\\\\)*(\\$|\\\\\\[|\\\\\\]|\\\\((([a-zA-Z]+\\*?)(\\{([a-z]+\\*?)\\}|=[0-9]+[a-z]+)?)))");
1737   static bool disableLanguageOverride = false;
1738   smatch sub, submath;
1739   bool evaluatingRegexp = false;
1740   MathInfo mi;
1741   bool evaluatingMath = false;
1742   bool evaluatingCode = false;
1743   size_t codeEnd = 0;
1744   bool evaluatingOptional = false;
1745   size_t optionalEnd = 0;
1746   int codeStart = -1;
1747   KeyInfo found;
1748   bool math_end_waiting = false;
1749   size_t math_pos = 10000;
1750   size_t math_prefix_size = 1;
1751   string math_end;
1752   static vector<string> usedText = vector<string>();
1753   static bool removeMathHull = false;
1754
1755   interval_.removeAccents();
1756
1757   for (sregex_iterator itmath(interval_.par.begin(), interval_.par.end(), rmath), end; itmath != end; ++itmath) {
1758     submath = *itmath;
1759     if ((submath.position(2) - submath.position(0)) %2 == 1) {
1760       // prefixed by odd count of '\\'
1761       continue;
1762     }
1763     if (math_end_waiting) {
1764       size_t pos = submath.position(size_t(2));
1765       if ((math_end == "$") &&
1766           (submath.str(2) == "$")) {
1767         mi.insert("$", math_pos, 1, pos, 1);
1768         math_end_waiting = false;
1769       }
1770       else if ((math_end == "\\]") &&
1771                (submath.str(2) == "\\]")) {
1772         mi.insert("\\]", math_pos, 2, pos, 2);
1773         math_end_waiting = false;
1774       }
1775       else if ((submath.str(3).compare("end") == 0) &&
1776           (submath.str(4).compare(math_end) == 0)) {
1777         mi.insert(math_end, math_pos, math_prefix_size, pos, submath.str(2).length());
1778         math_end_waiting = false;
1779       }
1780       else
1781         continue;
1782     }
1783     else {
1784       if (submath.str(3).compare("begin") == 0) {
1785         math_end_waiting = true;
1786         math_end = submath.str(4);
1787         math_pos = submath.position(size_t(2));
1788         math_prefix_size = submath.str(2).length();
1789       }
1790       else if (submath.str(2).compare("\\[") == 0) {
1791         math_end_waiting = true;
1792         math_end = "\\]";
1793         math_pos = submath.position(size_t(2));
1794       }
1795       else if (submath.str(2) == "$") {
1796         size_t pos = submath.position(size_t(2));
1797         math_end_waiting = true;
1798         math_end = "$";
1799         math_pos = pos;
1800       }
1801     }
1802   }
1803   // Ignore language if there is math somewhere in pattern-string
1804   if (isPatternString) {
1805     for (auto s: usedText) {
1806       // Remove entries created in previous search runs
1807       keys.erase(s);
1808     }
1809     usedText = vector<string>();
1810     if (! mi.empty()) {
1811       // Disable language
1812       keys["foreignlanguage"].disabled = true;
1813       disableLanguageOverride = true;
1814       removeMathHull = false;
1815     }
1816     else {
1817       removeMathHull = true;    // used later if not isPatternString
1818       disableLanguageOverride = false;
1819     }
1820   }
1821   else {
1822     if (disableLanguageOverride) {
1823       keys["foreignlanguage"].disabled = true;
1824     }
1825   }
1826   math_pos = mi.getFirstPos();
1827   for (sregex_iterator it(interval_.par.begin(), interval_.par.end(), rkeys), end; it != end; ++it) {
1828     sub = *it;
1829     if ((sub.position(2) - sub.position(0)) %2 == 1) {
1830       // prefixed by odd count of '\\'
1831       continue;
1832     }
1833     string key = sub.str(5);
1834     if (key == "") {
1835       if (sub.str(2)[0] == '\\')
1836         key = sub.str(2)[1];
1837       else {
1838         key = sub.str(2);
1839       }
1840     }
1841     KeysIterator it_key = keys.find(key);
1842     if (it_key != keys.end()) {
1843       if (it_key->second.keytype == KeyInfo::headRemove) {
1844         KeyInfo found1 = it_key->second;
1845         found1.disabled = true;
1846         found1.head = "\\" + key + "{";
1847         found1._tokenstart = sub.position(size_t(2));
1848         found1._tokensize = found1.head.length();
1849         found1._dataStart = found1._tokenstart + found1.head.length();
1850         int endpos = interval_.findclosing(found1._dataStart, interval_.par.length(), '{', '}', 1);
1851         found1._dataEnd = endpos;
1852         removeHead(found1);
1853         continue;
1854       }
1855     }
1856     if (evaluatingRegexp) {
1857       if (sub.str(3).compare("endregexp") == 0) {
1858         evaluatingRegexp = false;
1859         // found._tokenstart already set
1860         found._dataEnd = sub.position(size_t(2)) + 13;
1861         found._dataStart = found._dataEnd;
1862         found._tokensize = found._dataEnd - found._tokenstart;
1863         found.parenthesiscount = 0;
1864         found.head = interval_.par.substr(found._tokenstart, found._tokensize);
1865       }
1866       else {
1867         continue;
1868       }
1869     }
1870     else {
1871       if (evaluatingMath) {
1872         if (size_t(sub.position(size_t(2))) < mi.getEndPos())
1873           continue;
1874         evaluatingMath = false;
1875         mi.incrEntry();
1876         math_pos = mi.getStartPos();
1877       }
1878       if (it_key == keys.end()) {
1879         found = KeyInfo(KeyInfo::isStandard, 0, true);
1880         LYXERR(Debug::INFO, "Undefined key " << key << " ==> will be used as text");
1881         found = KeyInfo(KeyInfo::isText, 0, false);
1882         if (isPatternString) {
1883           found.keytype = KeyInfo::isChar;
1884           found.disabled = false;
1885           found.used = true;
1886         }
1887         keys[key] = found;
1888         usedText.push_back(key);
1889       }
1890       else
1891         found = keys[key];
1892       if (key.compare("regexp") == 0) {
1893         evaluatingRegexp = true;
1894         found._tokenstart = sub.position(size_t(2));
1895         found._tokensize = 0;
1896         continue;
1897       }
1898     }
1899     // Handle the other params of key
1900     if (found.keytype == KeyInfo::isIgnored)
1901       continue;
1902     else if (found.keytype == KeyInfo::isMath) {
1903       if (size_t(sub.position(size_t(2))) == math_pos) {
1904         found = keys[key];
1905         found._tokenstart = sub.position(size_t(2));
1906         found._tokensize = mi.getSize();
1907         found._dataEnd = found._tokenstart + found._tokensize;
1908         found._dataStart = found._dataEnd;
1909         found.parenthesiscount = 0;
1910         found.head = interval_.par.substr(found._tokenstart, found._tokensize);
1911         if (removeMathHull) {
1912           interval_.addIntervall(found._tokenstart, found._tokenstart + mi.getPrefixSize());
1913           interval_.addIntervall(found._dataEnd - mi.getPostfixSize(), found._dataEnd);
1914         }
1915         evaluatingMath = true;
1916       }
1917       else {
1918         // begin|end of unknown env, discard
1919         // First handle tables
1920         // longtable|tabular
1921         bool discardComment;
1922         found = keys[key];
1923         found.keytype = KeyInfo::doRemove;
1924         if ((sub.str(7).compare("longtable") == 0) ||
1925             (sub.str(7).compare("tabular") == 0)) {
1926           discardComment = true;        /* '%' */
1927         }
1928         else {
1929           discardComment = false;
1930           static regex const removeArgs("^(multicols|multipar|sectionbox|subsectionbox|tcolorbox)$");
1931           smatch sub2;
1932           string token = sub.str(7);
1933           if (regex_match(token, sub2, removeArgs)) {
1934             found.keytype = KeyInfo::removeWithArg;
1935           }
1936         }
1937         // discard spaces before pos(2)
1938         int pos = sub.position(size_t(2));
1939         int count;
1940         for (count = 0; pos - count > 0; count++) {
1941           char c = interval_.par[pos-count-1];
1942           if (discardComment) {
1943             if ((c != ' ') && (c != '%'))
1944               break;
1945           }
1946           else if (c != ' ')
1947             break;
1948         }
1949         found._tokenstart = pos - count;
1950         if (sub.str(3).compare(0, 5, "begin") == 0) {
1951           size_t pos1 = pos + sub.str(2).length();
1952           if (sub.str(7).compare("cjk") == 0) {
1953             pos1 = interval_.findclosing(pos1+1, interval_.par.length()) + 1;
1954             if ((interval_.par[pos1] == '{') && (interval_.par[pos1+1] == '}'))
1955               pos1 += 2;
1956             found.keytype = KeyInfo::isMain;
1957             found._dataStart = pos1;
1958             found._dataEnd = interval_.par.length();
1959             found.disabled = keys["foreignlanguage"].disabled;
1960             found.used = keys["foreignlanguage"].used;
1961             found._tokensize = pos1 - found._tokenstart;
1962             found.head = interval_.par.substr(found._tokenstart, found._tokensize);
1963           }
1964           else {
1965             // Swallow possible optional params
1966             while (interval_.par[pos1] == '[') {
1967               pos1 = interval_.findclosing(pos1+1, interval_.par.length(), '[', ']')+1;
1968             }
1969             // Swallow also the eventual parameter
1970             if (interval_.par[pos1] == '{') {
1971               found._dataEnd = interval_.findclosing(pos1+1, interval_.par.length()) + 1;
1972             }
1973             else {
1974               found._dataEnd = pos1;
1975             }
1976             found._dataStart = found._dataEnd;
1977             found._tokensize = count + found._dataEnd - pos;
1978             found.parenthesiscount = 0;
1979             found.head = interval_.par.substr(found._tokenstart, found._tokensize);
1980             found.disabled = true;
1981           }
1982         }
1983         else {
1984           // Handle "\end{...}"
1985           found._dataStart = pos + sub.str(2).length();
1986           found._dataEnd = found._dataStart;
1987           found._tokensize = count + found._dataEnd - pos;
1988           found.parenthesiscount = 0;
1989           found.head = interval_.par.substr(found._tokenstart, found._tokensize);
1990           found.disabled = true;
1991         }
1992       }
1993     }
1994     else if (found.keytype != KeyInfo::isRegex) {
1995       found._tokenstart = sub.position(size_t(2));
1996       if (found.parenthesiscount == 0) {
1997         // Probably to be discarded
1998         size_t following_pos = sub.position(size_t(2)) + sub.str(5).length() + 1;
1999         char following = interval_.par[following_pos];
2000         if (following == ' ')
2001           found.head = "\\" + sub.str(5) + " ";
2002         else if (following == '=') {
2003           // like \uldepth=1000pt
2004           found.head = sub.str(2);
2005         }
2006         else
2007           found.head = "\\" + key;
2008         found._tokensize = found.head.length();
2009         found._dataEnd = found._tokenstart + found._tokensize;
2010         found._dataStart = found._dataEnd;
2011       }
2012       else {
2013         int params = found._tokenstart + key.length() + 1;
2014         if (evaluatingOptional) {
2015           if (size_t(found._tokenstart) > optionalEnd) {
2016             evaluatingOptional = false;
2017           }
2018           else {
2019             found.disabled = true;
2020           }
2021         }
2022         int optend = params;
2023         while (interval_.par[optend] == '[') {
2024           // discard optional parameters
2025           optend = interval_.findclosing(optend+1, interval_.par.length(), '[', ']') + 1;
2026         }
2027         if (optend > params) {
2028           key += interval_.par.substr(params, optend-params);
2029           evaluatingOptional = true;
2030           optionalEnd = optend;
2031           if (found.keytype == KeyInfo::isSectioning) {
2032             // Remove optional values (but still keep in header)
2033             interval_.addIntervall(params, optend);
2034           }
2035         }
2036         string token = sub.str(7);
2037         int closings;
2038         if (interval_.par[optend] != '{') {
2039           closings = 0;
2040           found.parenthesiscount = 0;
2041           found.head = "\\" + key;
2042         }
2043         else
2044           closings = found.parenthesiscount;
2045         if (found.parenthesiscount == 1) {
2046           found.head = "\\" + key + "{";
2047         }
2048         else if (found.parenthesiscount > 1) {
2049           if (token != "") {
2050             found.head = sub.str(2) + "{";
2051             closings = found.parenthesiscount - 1;
2052           }
2053           else {
2054             found.head = "\\" + key + "{";
2055           }
2056         }
2057         found._tokensize = found.head.length();
2058         found._dataStart = found._tokenstart + found.head.length();
2059         if (found.keytype == KeyInfo::doRemove) {
2060           if (closings > 0) {
2061             size_t endpar = 2 + interval_.findclosing(found._dataStart, interval_.par.length(), '{', '}', closings);
2062             if (endpar >= interval_.par.length())
2063               found._dataStart = interval_.par.length();
2064             else
2065               found._dataStart = endpar;
2066             found._tokensize = found._dataStart - found._tokenstart;
2067           }
2068           else {
2069             found._dataStart = found._tokenstart + found._tokensize;
2070           }
2071           closings = 0;
2072         }
2073         if (interval_.par.substr(found._dataStart, 15).compare("\\endarguments{}") == 0) {
2074           found._dataStart += 15;
2075         }
2076         size_t endpos;
2077         if (closings < 1)
2078           endpos = found._dataStart - 1;
2079         else
2080           endpos = interval_.findclosing(found._dataStart, interval_.par.length(), '{', '}', closings);
2081         if (found.keytype == KeyInfo::isList) {
2082           // Check if it really is list env
2083           static regex const listre("^([a-z]+)$");
2084           smatch sub2;
2085           if (!regex_match(token, sub2, listre)) {
2086             // Change the key of this entry. It is not in a list/item environment
2087             found.keytype = KeyInfo::endArguments;
2088           }
2089         }
2090         if (found.keytype == KeyInfo::noMain) {
2091           evaluatingCode = true;
2092           codeEnd = endpos;
2093           codeStart = found._dataStart;
2094         }
2095         else if (evaluatingCode) {
2096           if (size_t(found._dataStart) > codeEnd)
2097             evaluatingCode = false;
2098           else if (found.keytype == KeyInfo::isMain) {
2099             // Disable this key, treate it as standard
2100             found.keytype = KeyInfo::isStandard;
2101             found.disabled = true;
2102             if ((codeEnd +1 >= interval_.par.length()) &&
2103                 (found._tokenstart == codeStart)) {
2104               // trickery, because the code inset starts
2105               // with \selectlanguage ...
2106               codeEnd = endpos;
2107               if (entries_.size() > 1) {
2108                 entries_[entries_.size()-1]._dataEnd = codeEnd;
2109               }
2110             }
2111           }
2112         }
2113         if ((endpos == interval_.par.length()) &&
2114             (found.keytype == KeyInfo::doRemove)) {
2115           // Missing closing => error in latex-input?
2116           // therefore do not delete remaining data
2117           found._dataStart -= 1;
2118           found._dataEnd = found._dataStart;
2119         }
2120         else
2121           found._dataEnd = endpos;
2122       }
2123       if (isPatternString) {
2124         keys[key].used = true;
2125       }
2126     }
2127     entries_.push_back(found);
2128   }
2129 }
2130
2131 void LatexInfo::makeKey(const string &keysstring, KeyInfo keyI, bool isPatternString)
2132 {
2133   stringstream s(keysstring);
2134   string key;
2135   const char delim = '|';
2136   while (getline(s, key, delim)) {
2137     KeyInfo keyII(keyI);
2138     if (isPatternString) {
2139       keyII.used = false;
2140     }
2141     else if ( !keys[key].used)
2142       keyII.disabled = true;
2143     keys[key] = keyII;
2144   }
2145 }
2146
2147 void LatexInfo::buildKeys(bool isPatternString)
2148 {
2149
2150   static bool keysBuilt = false;
2151   if (keysBuilt && !isPatternString) return;
2152
2153   // Keys to ignore in any case
2154   makeKey("text|textcyrillic|lyxmathsym|ensuremath", KeyInfo(KeyInfo::headRemove, 1, true), true);
2155   // Known standard keys with 1 parameter.
2156   // Split is done, if not at start of region
2157   makeKey("textsf|textss|texttt", KeyInfo(KeyInfo::isStandard, 1, ignoreFormats.getFamily()), isPatternString);
2158   makeKey("textbf",               KeyInfo(KeyInfo::isStandard, 1, ignoreFormats.getSeries()), isPatternString);
2159   makeKey("textit|textsc|textsl", KeyInfo(KeyInfo::isStandard, 1, ignoreFormats.getShape()), isPatternString);
2160   makeKey("uuline|uline|uwave",   KeyInfo(KeyInfo::isStandard, 1, ignoreFormats.getUnderline()), isPatternString);
2161   makeKey("emph|noun",            KeyInfo(KeyInfo::isStandard, 1, ignoreFormats.getMarkUp()), isPatternString);
2162   makeKey("sout|xout",            KeyInfo(KeyInfo::isStandard, 1, ignoreFormats.getStrikeOut()), isPatternString);
2163
2164   makeKey("section|subsection|subsubsection|paragraph|subparagraph|minisec",
2165           KeyInfo(KeyInfo::isSectioning, 1, ignoreFormats.getSectioning()), isPatternString);
2166   makeKey("section*|subsection*|subsubsection*|paragraph*",
2167           KeyInfo(KeyInfo::isSectioning, 1, ignoreFormats.getSectioning()), isPatternString);
2168   makeKey("part|part*|chapter|chapter*", KeyInfo(KeyInfo::isSectioning, 1, ignoreFormats.getSectioning()), isPatternString);
2169   makeKey("title|subtitle|author|subject|publishers|dedication|uppertitleback|lowertitleback|extratitle|lyxaddress|lyxrightaddress", KeyInfo(KeyInfo::isTitle, 1, ignoreFormats.getFrontMatter()), isPatternString);
2170   // Regex
2171   makeKey("regexp", KeyInfo(KeyInfo::isRegex, 1, false), isPatternString);
2172
2173   // Split is done, if not at start of region
2174   makeKey("textcolor", KeyInfo(KeyInfo::isStandard, 2, ignoreFormats.getColor()), isPatternString);
2175   makeKey("latexenvironment", KeyInfo(KeyInfo::isStandard, 2, false), isPatternString);
2176
2177   // Split is done always.
2178   makeKey("foreignlanguage", KeyInfo(KeyInfo::isMain, 2, ignoreFormats.getLanguage()), isPatternString);
2179
2180   // Known charaters
2181   // No split
2182   makeKey("backslash|textbackslash|slash",  KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
2183   makeKey("textasciicircum|textasciitilde", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
2184   makeKey("textasciiacute|texemdash",       KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
2185   makeKey("dots|ldots",                     KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
2186   // Spaces
2187   makeKey("quad|qquad|hfill|dotfill",               KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
2188   makeKey("textvisiblespace|nobreakspace",          KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
2189   makeKey("negthickspace|negmedspace|negthinspace", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
2190   makeKey("thickspace|medspace|thinspace",          KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
2191   // Skip
2192   // makeKey("enskip|smallskip|medskip|bigskip|vfill", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
2193   // Custom space/skip, remove the content (== length value)
2194   makeKey("vspace|vspace*|hspace|hspace*|mspace", KeyInfo(KeyInfo::noContent, 1, false), isPatternString);
2195   // Found in fr/UserGuide.lyx
2196   makeKey("og|fg", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
2197   // quotes
2198   makeKey("textquotedbl|quotesinglbase|lyxarrow", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
2199   makeKey("textquotedblleft|textquotedblright", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
2200   // Known macros to remove (including their parameter)
2201   // No split
2202   makeKey("input|inputencoding|label|ref|index|bibitem", KeyInfo(KeyInfo::doRemove, 1, false), isPatternString);
2203   makeKey("addtocounter|setlength",                 KeyInfo(KeyInfo::noContent, 2, true), isPatternString);
2204   // handle like standard keys with 1 parameter.
2205   makeKey("url|href|vref|thanks", KeyInfo(KeyInfo::isStandard, 1, false), isPatternString);
2206
2207   // Ignore deleted text
2208   makeKey("lyxdeleted", KeyInfo(KeyInfo::doRemove, 3, false), isPatternString);
2209   // but preserve added text
2210   makeKey("lyxadded", KeyInfo(KeyInfo::doRemove, 2, false), isPatternString);
2211
2212   // Macros to remove, but let the parameter survive
2213   // No split
2214   makeKey("menuitem|textmd|textrm", KeyInfo(KeyInfo::isStandard, 1, true), isPatternString);
2215
2216   // Remove language spec from content of these insets
2217   makeKey("code", KeyInfo(KeyInfo::noMain, 1, false), isPatternString);
2218
2219   // Same effect as previous, parameter will survive (because there is no one anyway)
2220   // No split
2221   makeKey("noindent|textcompwordmark|maketitle", KeyInfo(KeyInfo::isStandard, 0, true), isPatternString);
2222   // Remove table decorations
2223   makeKey("hline|tabularnewline|toprule|bottomrule|midrule", KeyInfo(KeyInfo::doRemove, 0, true), isPatternString);
2224   // Discard shape-header.
2225   // For footnote or shortcut too, because of lang settings
2226   // and wrong handling if used 'KeyInfo::noMain'
2227   makeKey("circlepar|diamondpar|heartpar|nutpar",  KeyInfo(KeyInfo::isStandard, 1, true), isPatternString);
2228   makeKey("trianglerightpar|hexagonpar|starpar",   KeyInfo(KeyInfo::isStandard, 1, true), isPatternString);
2229   makeKey("triangleuppar|triangledownpar|droppar", KeyInfo(KeyInfo::isStandard, 1, true), isPatternString);
2230   makeKey("triangleleftpar|shapepar|dropuppar",    KeyInfo(KeyInfo::isStandard, 1, true), isPatternString);
2231   makeKey("hphantom|vphantom|footnote|shortcut|include|includegraphics",     KeyInfo(KeyInfo::isStandard, 1, true), isPatternString);
2232   makeKey("parbox", KeyInfo(KeyInfo::doRemove, 1, true), isPatternString);
2233   // like ('tiny{}' or '\tiny ' ... )
2234   makeKey("footnotesize|tiny|scriptsize|small|large|Large|LARGE|huge|Huge", KeyInfo(KeyInfo::isSize, 0, false), isPatternString);
2235
2236   // Survives, like known character
2237   // makeKey("lyx|LyX|latex|LaTeX|latexe|LaTeXe|tex|TeX", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
2238   makeKey("tableofcontents", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
2239   makeKey("item|listitem", KeyInfo(KeyInfo::isList, 1, false), isPatternString);
2240
2241   makeKey("begin|end", KeyInfo(KeyInfo::isMath, 1, false), isPatternString);
2242   makeKey("[|]", KeyInfo(KeyInfo::isMath, 1, false), isPatternString);
2243   makeKey("$", KeyInfo(KeyInfo::isMath, 1, false), isPatternString);
2244
2245   makeKey("par|uldepth|ULdepth|protect|nobreakdash|medskip|relax", KeyInfo(KeyInfo::isStandard, 0, true), isPatternString);
2246   // Remove RTL/LTR marker
2247   makeKey("l|r|textlr|textfr|textar|beginl|endl", KeyInfo(KeyInfo::isStandard, 0, true), isPatternString);
2248   makeKey("lettrine", KeyInfo(KeyInfo::cleanToStart, 0, true), isPatternString);
2249   makeKey("lyxslide", KeyInfo(KeyInfo::isSectioning, 1, true), isPatternString);
2250   makeKey("endarguments", KeyInfo(KeyInfo::endArguments, 0, true), isPatternString);
2251   makeKey("twocolumn", KeyInfo(KeyInfo::removeWithArg, 2, true), isPatternString);
2252   makeKey("tnotetext|ead|fntext|cortext|address", KeyInfo(KeyInfo::removeWithArg, 0, true), isPatternString);
2253   makeKey("lyxend", KeyInfo(KeyInfo::isStandard, 0, true), isPatternString);
2254   if (isPatternString) {
2255     // Allow the first searched string to rebuild the keys too
2256     keysBuilt = false;
2257   }
2258   else {
2259     // no need to rebuild again
2260     keysBuilt = true;
2261   }
2262 }
2263
2264 /*
2265  * Keep the list of actual opened parentheses actual
2266  * (e.g. depth == 4 means there are 4 '{' not processed yet)
2267  */
2268 void Intervall::handleParentheses(int lastpos, bool closingAllowed)
2269 {
2270   int skip = 0;
2271   for (int i = depts[actualdeptindex]; i < lastpos; i+= 1 + skip) {
2272     char c;
2273     c = par[i];
2274     skip = 0;
2275     if (c == '\\') skip = 1;
2276     else if (c == '{') {
2277       handleOpenP(i);
2278     }
2279     else if (c == '}') {
2280       handleCloseP(i, closingAllowed);
2281     }
2282   }
2283 }
2284
2285 #if (0)
2286 string Intervall::show(int lastpos)
2287 {
2288   int idx = 0;                          /* int intervalls */
2289   string s;
2290   int i = 0;
2291   for (idx = 0; idx <= ignoreidx; idx++) {
2292     while (i < lastpos) {
2293       int printsize;
2294       if (i <= borders[idx].low) {
2295         if (borders[idx].low > lastpos)
2296           printsize = lastpos - i;
2297         else
2298           printsize = borders[idx].low - i;
2299         s += par.substr(i, printsize);
2300         i += printsize;
2301         if (i >= borders[idx].low)
2302           i = borders[idx].upper;
2303       }
2304       else {
2305         i = borders[idx].upper;
2306         break;
2307       }
2308     }
2309   }
2310   if (lastpos > i) {
2311     s += par.substr(i, lastpos-i);
2312   }
2313   return s;
2314 }
2315 #endif
2316
2317 void Intervall::output(ostringstream &os, int lastpos)
2318 {
2319   // get number of chars to output
2320   int idx = 0;                          /* int intervalls */
2321   int i = 0;
2322   int printed = 0;
2323   string startTitle = titleValue;
2324   for (idx = 0; idx <= ignoreidx; idx++) {
2325     if (i < lastpos) {
2326       if (i <= borders[idx].low) {
2327         int printsize;
2328         if (borders[idx].low > lastpos)
2329           printsize = lastpos - i;
2330         else
2331           printsize = borders[idx].low - i;
2332         if (printsize > 0) {
2333           os << startTitle << par.substr(i, printsize);
2334           i += printsize;
2335           printed += printsize;
2336           startTitle = "";
2337         }
2338         handleParentheses(i, false);
2339         if (i >= borders[idx].low)
2340           i = borders[idx].upper;
2341       }
2342       else {
2343         i = borders[idx].upper;
2344       }
2345     }
2346     else
2347       break;
2348   }
2349   if (lastpos > i) {
2350     os << startTitle << par.substr(i, lastpos-i);
2351     printed += lastpos-i;
2352   }
2353   handleParentheses(lastpos, false);
2354   int startindex;
2355   if (keys["foreignlanguage"].disabled)
2356     startindex = actualdeptindex-langcount;
2357   else
2358     startindex = actualdeptindex;
2359   for (int i = startindex; i > 0; --i) {
2360     os << "}";
2361   }
2362   if (hasTitle && (printed > 0))
2363     os << "}";
2364   if (! isPatternString_)
2365     os << "\n";
2366   handleParentheses(lastpos, true); /* extra closings '}' allowed here */
2367 }
2368
2369 void LatexInfo::processRegion(int start, int region_end)
2370 {
2371   while (start < region_end) {          /* Let {[} and {]} survive */
2372     int cnt = interval_.isOpeningPar(start);
2373     if (cnt == 1) {
2374       // Closing is allowed past the region
2375       int closing = interval_.findclosing(start+1, interval_.par.length());
2376       interval_.addIntervall(start, start+1);
2377       interval_.addIntervall(closing, closing+1);
2378     }
2379     else if (cnt == 3)
2380       start += 2;
2381     start = interval_.nextNotIgnored(start+1);
2382   }
2383 }
2384
2385 void LatexInfo::removeHead(KeyInfo const & actual, int count)
2386 {
2387   if (actual.parenthesiscount == 0) {
2388     // "{\tiny{} ...}" ==> "{{} ...}"
2389     interval_.addIntervall(actual._tokenstart-count, actual._tokenstart + actual._tokensize);
2390   }
2391   else {
2392     // Remove header hull, that is "\url{abcd}" ==> "abcd"
2393     interval_.addIntervall(actual._tokenstart - count, actual._dataStart);
2394     interval_.addIntervall(actual._dataEnd, actual._dataEnd+1);
2395   }
2396 }
2397
2398 int LatexInfo::dispatch(ostringstream &os, int previousStart, KeyInfo &actual)
2399 {
2400   int nextKeyIdx = 0;
2401   switch (actual.keytype)
2402   {
2403     case KeyInfo::isTitle: {
2404       removeHead(actual);
2405       nextKeyIdx = getNextKey();
2406       break;
2407     }
2408     case KeyInfo::cleanToStart: {
2409       actual._dataEnd = actual._dataStart;
2410       nextKeyIdx = getNextKey();
2411       // Search for end of arguments
2412       int tmpIdx = find(nextKeyIdx, KeyInfo::endArguments);
2413       if (tmpIdx > 0) {
2414         for (int i = nextKeyIdx; i <= tmpIdx; i++) {
2415           entries_[i].disabled = true;
2416         }
2417         actual._dataEnd = entries_[tmpIdx]._dataEnd;
2418       }
2419       while (interval_.par[actual._dataEnd] == ' ')
2420         actual._dataEnd++;
2421       interval_.addIntervall(0, actual._dataEnd+1);
2422       interval_.actualdeptindex = 0;
2423       interval_.depts[0] = actual._dataEnd+1;
2424       interval_.closes[0] = -1;
2425       break;
2426     }
2427     case KeyInfo::isText:
2428       interval_.par[actual._tokenstart] = '#';
2429       //interval_.addIntervall(actual._tokenstart, actual._tokenstart+1);
2430       nextKeyIdx = getNextKey();
2431       break;
2432     case KeyInfo::noContent: {          /* char like "\hspace{2cm}" */
2433       if (actual.disabled)
2434         interval_.addIntervall(actual._tokenstart, actual._dataEnd);
2435       else
2436         interval_.addIntervall(actual._dataStart, actual._dataEnd);
2437     }
2438       // fall through
2439     case KeyInfo::isChar: {
2440       nextKeyIdx = getNextKey();
2441       break;
2442     }
2443     case KeyInfo::isSize: {
2444       if (actual.disabled || (interval_.par[actual._dataStart] != '{') || (interval_.par[actual._dataStart-1] == ' ')) {
2445         if (actual.parenthesiscount == 0)
2446           interval_.addIntervall(actual._tokenstart, actual._dataEnd);
2447         else {
2448           interval_.addIntervall(actual._tokenstart, actual._dataEnd+1);
2449         }
2450         nextKeyIdx = getNextKey();
2451       } else {
2452         // Here _dataStart points to '{', so correct it
2453         actual._dataStart += 1;
2454         actual._tokensize += 1;
2455         actual.parenthesiscount = 1;
2456         if (interval_.par[actual._dataStart] == '}') {
2457           // Determine the end if used like '{\tiny{}...}'
2458           actual._dataEnd = interval_.findclosing(actual._dataStart+1, interval_.par.length()) + 1;
2459           interval_.addIntervall(actual._dataStart, actual._dataStart+1);
2460         }
2461         else {
2462           // Determine the end if used like '\tiny{...}'
2463           actual._dataEnd = interval_.findclosing(actual._dataStart, interval_.par.length()) + 1;
2464         }
2465         // Split on this key if not at start
2466         int start = interval_.nextNotIgnored(previousStart);
2467         if (start < actual._tokenstart) {
2468           interval_.output(os, actual._tokenstart);
2469           interval_.addIntervall(start, actual._tokenstart);
2470         }
2471         // discard entry if at end of actual
2472         nextKeyIdx = process(os, actual);
2473       }
2474       break;
2475     }
2476     case KeyInfo::endArguments: {
2477       // Remove trailing '{}' too
2478       actual._dataStart += 1;
2479       actual._dataEnd += 1;
2480       interval_.addIntervall(actual._tokenstart, actual._dataEnd+1);
2481       nextKeyIdx = getNextKey();
2482       break;
2483     }
2484     case KeyInfo::noMain:
2485       // fall through
2486     case KeyInfo::isStandard: {
2487       if (actual.disabled) {
2488         removeHead(actual);
2489         processRegion(actual._dataStart, actual._dataStart+1);
2490         nextKeyIdx = getNextKey();
2491       } else {
2492         // Split on this key if not at datastart of calling entry
2493         int start = interval_.nextNotIgnored(previousStart);
2494         if (start < actual._tokenstart) {
2495           interval_.output(os, actual._tokenstart);
2496           interval_.addIntervall(start, actual._tokenstart);
2497         }
2498         // discard entry if at end of actual
2499         nextKeyIdx = process(os, actual);
2500       }
2501       break;
2502     }
2503     case KeyInfo::removeWithArg: {
2504       nextKeyIdx = getNextKey();
2505       // Search for end of arguments
2506       int tmpIdx = find(nextKeyIdx, KeyInfo::endArguments);
2507       if (tmpIdx > 0) {
2508         for (int i = nextKeyIdx; i <= tmpIdx; i++) {
2509           entries_[i].disabled = true;
2510         }
2511         actual._dataEnd = entries_[tmpIdx]._dataEnd;
2512       }
2513       interval_.addIntervall(actual._tokenstart, actual._dataEnd+1);
2514       break;
2515     }
2516     case KeyInfo::doRemove: {
2517       // Remove the key with all parameters and following spaces
2518       size_t pos;
2519       size_t start;
2520       if (interval_.par[actual._dataEnd-1] == ' ')
2521         start = actual._dataEnd;
2522       else
2523         start = actual._dataEnd+1;
2524       for (pos = start; pos < interval_.par.length(); pos++) {
2525         if ((interval_.par[pos] != ' ') && (interval_.par[pos] != '%'))
2526           break;
2527       }
2528       // Remove also enclosing parentheses [] and {}
2529       int numpars = 0;
2530       int spaces = 0;
2531       while (actual._tokenstart > numpars) {
2532         if (pos+numpars >= interval_.par.size())
2533           break;
2534         else if (interval_.par[pos+numpars] == ']' && interval_.par[actual._tokenstart-numpars-1] == '[')
2535           numpars++;
2536         else if (interval_.par[pos+numpars] == '}' && interval_.par[actual._tokenstart-numpars-1] == '{')
2537           numpars++;
2538         else
2539           break;
2540       }
2541       if (numpars > 0) {
2542         if (interval_.par[pos+numpars] == ' ')
2543           spaces++;
2544       }
2545
2546       interval_.addIntervall(actual._tokenstart-numpars, pos+numpars+spaces);
2547       nextKeyIdx = getNextKey();
2548       break;
2549     }
2550     case KeyInfo::isList: {
2551       // Discard space before _tokenstart
2552       int count;
2553       for (count = 0; count < actual._tokenstart; count++) {
2554         if (interval_.par[actual._tokenstart-count-1] != ' ')
2555           break;
2556       }
2557       nextKeyIdx = getNextKey();
2558       int tmpIdx = find(nextKeyIdx, KeyInfo::endArguments);
2559       if (tmpIdx > 0) {
2560         // Special case: \item is not a list, but a command (like in Style Author_Biography in maa-monthly.layout)
2561         // with arguments
2562         // How else can we catch this one?
2563         for (int i = nextKeyIdx; i <= tmpIdx; i++) {
2564           entries_[i].disabled = true;
2565         }
2566         actual._dataEnd = entries_[tmpIdx]._dataEnd;
2567       }
2568       else if (nextKeyIdx > 0) {
2569         // Ignore any lang entries inside data region
2570         for (int i = nextKeyIdx; i < int(entries_.size()) && entries_[i]._tokenstart < actual._dataEnd; i++) {
2571           if (entries_[i].keytype == KeyInfo::isMain)
2572             entries_[i].disabled = true;
2573         }
2574       }
2575       if (actual.disabled) {
2576         interval_.addIntervall(actual._tokenstart-count, actual._dataEnd+1);
2577       }
2578       else {
2579         interval_.addIntervall(actual._tokenstart-count, actual._tokenstart);
2580       }
2581       if (interval_.par[actual._dataEnd+1] == '[') {
2582         int posdown = interval_.findclosing(actual._dataEnd+2, interval_.par.length(), '[', ']');
2583         if ((interval_.par[actual._dataEnd+2] == '{') &&
2584             (interval_.par[posdown-1] == '}')) {
2585           interval_.addIntervall(actual._dataEnd+1,actual._dataEnd+3);
2586           interval_.addIntervall(posdown-1, posdown+1);
2587         }
2588         else {
2589           interval_.addIntervall(actual._dataEnd+1, actual._dataEnd+2);
2590           interval_.addIntervall(posdown, posdown+1);
2591         }
2592         int blk = interval_.nextNotIgnored(actual._dataEnd+1);
2593         if (blk > posdown) {
2594           // Discard at most 1 space after empty item
2595           int count;
2596           for (count = 0; count < 1; count++) {
2597             if (interval_.par[blk+count] != ' ')
2598               break;
2599           }
2600           if (count > 0)
2601             interval_.addIntervall(blk, blk+count);
2602         }
2603       }
2604       break;
2605     }
2606     case KeyInfo::isSectioning: {
2607       // Discard spaces before _tokenstart
2608       int count;
2609       int val = actual._tokenstart;
2610       for (count = 0; count < actual._tokenstart;) {
2611         val = interval_.previousNotIgnored(val-1);
2612         if (val < 0 || interval_.par[val] != ' ')
2613           break;
2614         else {
2615           count = actual._tokenstart - val;
2616         }
2617       }
2618       if (actual.disabled) {
2619         removeHead(actual, count);
2620         nextKeyIdx = getNextKey();
2621       } else {
2622         interval_.addIntervall(actual._tokenstart-count, actual._tokenstart);
2623         nextKeyIdx = process(os, actual);
2624       }
2625       break;
2626     }
2627     case KeyInfo::isMath: {
2628       // Same as regex, use the content unchanged
2629       nextKeyIdx = getNextKey();
2630       break;
2631     }
2632     case KeyInfo::isRegex: {
2633       // DO NOT SPLIT ON REGEX
2634       // Do not disable
2635       nextKeyIdx = getNextKey();
2636       break;
2637     }
2638     case KeyInfo::isIgnored: {
2639       // Treat like a character for now
2640       nextKeyIdx = getNextKey();
2641       break;
2642     }
2643     case KeyInfo::isMain: {
2644       if (interval_.par.substr(actual._dataStart, 2) == "% ")
2645         interval_.addIntervall(actual._dataStart, actual._dataStart+2);
2646       if (actual._tokenstart > 0) {
2647         int prev = interval_.previousNotIgnored(actual._tokenstart - 1);
2648         if ((prev >= 0) && interval_.par[prev] == '%')
2649           interval_.addIntervall(prev, prev+1);
2650       }
2651       if (actual.disabled) {
2652         removeHead(actual);
2653         interval_.langcount++;
2654         if ((interval_.par.substr(actual._dataStart, 3) == " \\[") ||
2655             (interval_.par.substr(actual._dataStart, 8) == " \\begin{")) {
2656           // Discard also the space before math-equation
2657           interval_.addIntervall(actual._dataStart, actual._dataStart+1);
2658         }
2659         nextKeyIdx = getNextKey();
2660         // interval.resetOpenedP(actual._dataStart-1);
2661       }
2662       else {
2663         if (actual._tokenstart < 26) {
2664           // for the first (and maybe dummy) language
2665           interval_.setForDefaultLang(actual);
2666         }
2667         interval_.resetOpenedP(actual._dataStart-1);
2668       }
2669       break;
2670     }
2671     case KeyInfo::invalid:
2672     case KeyInfo::headRemove:
2673       // These two cases cannot happen, already handled
2674       // fall through
2675     default: {
2676       // LYXERR(Debug::INFO, "Unhandled keytype");
2677       nextKeyIdx = getNextKey();
2678       break;
2679     }
2680   }
2681   return nextKeyIdx;
2682 }
2683
2684 int LatexInfo::process(ostringstream & os, KeyInfo const & actual )
2685 {
2686   int end = interval_.nextNotIgnored(actual._dataEnd);
2687   int oldStart = actual._dataStart;
2688   int nextKeyIdx = getNextKey();
2689   while (true) {
2690     if ((nextKeyIdx < 0) ||
2691         (entries_[nextKeyIdx]._tokenstart >= actual._dataEnd) ||
2692         (entries_[nextKeyIdx].keytype == KeyInfo::invalid)) {
2693       if (oldStart <= end) {
2694         processRegion(oldStart, end);
2695         oldStart = end+1;
2696       }
2697       break;
2698     }
2699     KeyInfo &nextKey = getKeyInfo(nextKeyIdx);
2700
2701     if ((nextKey.keytype == KeyInfo::isMain) && !nextKey.disabled) {
2702       (void) dispatch(os, actual._dataStart, nextKey);
2703       end = nextKey._tokenstart;
2704       break;
2705     }
2706     processRegion(oldStart, nextKey._tokenstart);
2707     nextKeyIdx = dispatch(os, actual._dataStart, nextKey);
2708
2709     oldStart = nextKey._dataEnd+1;
2710   }
2711   // now nextKey is either invalid or is outside of actual._dataEnd
2712   // output the remaining and discard myself
2713   if (oldStart <= end) {
2714     processRegion(oldStart, end);
2715   }
2716   if (interval_.par.size() > (size_t) end && interval_.par[end] == '}') {
2717     end += 1;
2718     // This is the normal case.
2719     // But if using the firstlanguage, the closing may be missing
2720   }
2721   // get minimum of 'end' and  'actual._dataEnd' in case that the nextKey.keytype was 'KeyInfo::isMain'
2722   int output_end;
2723   if (actual._dataEnd < end)
2724     output_end = interval_.nextNotIgnored(actual._dataEnd);
2725   else if (interval_.par.size() > (size_t) end)
2726     output_end = interval_.nextNotIgnored(end);
2727   else
2728     output_end = interval_.par.size();
2729   if ((actual.keytype == KeyInfo::isMain) && actual.disabled) {
2730     interval_.addIntervall(actual._tokenstart, actual._tokenstart+actual._tokensize);
2731   }
2732   // Remove possible empty data
2733   int dstart = interval_.nextNotIgnored(actual._dataStart);
2734   while (interval_.isOpeningPar(dstart) == 1) {
2735     interval_.addIntervall(dstart, dstart+1);
2736     int dend = interval_.findclosing(dstart+1, output_end);
2737     interval_.addIntervall(dend, dend+1);
2738     dstart = interval_.nextNotIgnored(dstart+1);
2739   }
2740   if (dstart < output_end)
2741     interval_.output(os, output_end);
2742   if (nextKeyIdx < 0)
2743     interval_.addIntervall(0, end);
2744   else
2745     interval_.addIntervall(actual._tokenstart, end);
2746   return nextKeyIdx;
2747 }
2748
2749 string splitOnKnownMacros(string par, bool isPatternString)
2750 {
2751   ostringstream os;
2752   LatexInfo li(par, isPatternString);
2753   // LYXERR(Debug::INFO, "Berfore split: " << par);
2754   KeyInfo DummyKey = KeyInfo(KeyInfo::KeyType::isMain, 2, true);
2755   DummyKey.head = "";
2756   DummyKey._tokensize = 0;
2757   DummyKey._dataStart = 0;
2758   DummyKey._dataEnd = par.length();
2759   DummyKey.disabled = true;
2760   int firstkeyIdx = li.getFirstKey();
2761   string s;
2762   if (firstkeyIdx >= 0) {
2763     KeyInfo firstKey = li.getKeyInfo(firstkeyIdx);
2764     DummyKey._tokenstart = firstKey._tokenstart;
2765     int nextkeyIdx;
2766     if ((firstKey.keytype != KeyInfo::isMain) || firstKey.disabled) {
2767       // Use dummy firstKey
2768       firstKey = DummyKey;
2769       (void) li.setNextKey(firstkeyIdx);
2770     }
2771     else {
2772       if (par.substr(firstKey._dataStart, 2) == "% ")
2773         li.addIntervall(firstKey._dataStart, firstKey._dataStart+2);
2774     }
2775     nextkeyIdx = li.process(os, firstKey);
2776     while (nextkeyIdx >= 0) {
2777       // Check for a possible gap between the last
2778       // entry and this one
2779       int datastart = li.nextNotIgnored(firstKey._dataStart);
2780       KeyInfo &nextKey = li.getKeyInfo(nextkeyIdx);
2781       if ((nextKey._tokenstart > datastart)) {
2782         // Handle the gap
2783         firstKey._dataStart = datastart;
2784         firstKey._dataEnd = par.length();
2785         (void) li.setNextKey(nextkeyIdx);
2786         // Fake the last opened parenthesis
2787         li.setForDefaultLang(firstKey);
2788         nextkeyIdx = li.process(os, firstKey);
2789       }
2790       else {
2791         if (nextKey.keytype != KeyInfo::isMain) {
2792           firstKey._dataStart = datastart;
2793           firstKey._dataEnd = nextKey._dataEnd+1;
2794           (void) li.setNextKey(nextkeyIdx);
2795           li.setForDefaultLang(firstKey);
2796           nextkeyIdx = li.process(os, firstKey);
2797         }
2798         else {
2799           nextkeyIdx = li.process(os, nextKey);
2800         }
2801       }
2802     }
2803     // Handle the remaining
2804     firstKey._dataStart = li.nextNotIgnored(firstKey._dataStart);
2805     firstKey._dataEnd = par.length();
2806     // Check if ! empty
2807     if ((firstKey._dataStart < firstKey._dataEnd) &&
2808         (par[firstKey._dataStart] != '}')) {
2809       li.setForDefaultLang(firstKey);
2810       (void) li.process(os, firstKey);
2811     }
2812     s = os.str();
2813     if (s.empty()) {
2814       // return string definitelly impossible to match
2815       s = "\\foreignlanguage{ignore}{ }";
2816     }
2817   }
2818   else
2819     s = par;                            /* no known macros found */
2820   // LYXERR(Debug::INFO, "After split: " << s);
2821   return s;
2822 }
2823
2824 /*
2825  * Try to unify the language specs in the latexified text.
2826  * Resulting modified string is set to "", if
2827  * the searched tex does not contain all the features in the search pattern
2828  */
2829 static string correctlanguagesetting(string par, bool isPatternString, bool withformat, lyx::Buffer *pbuf = nullptr)
2830 {
2831         static Features regex_f;
2832         static int missed = 0;
2833         static bool regex_with_format = false;
2834
2835         int parlen = par.length();
2836
2837         while ((parlen > 0) && (par[parlen-1] == '\n')) {
2838                 parlen--;
2839         }
2840         if (isPatternString && (parlen > 0) && (par[parlen-1] == '~')) {
2841                 // Happens to be there in case of description or labeling environment
2842                 parlen--;
2843         }
2844         string result;
2845         if (withformat) {
2846                 // Split the latex input into pieces which
2847                 // can be digested by our search engine
2848                 LYXERR(Debug::FIND, "input: \"" << par << "\"");
2849                 if (isPatternString && (pbuf != nullptr)) { // Check if we should disable/enable test for language
2850                         // We check for polyglossia, because in runparams.flavor we use Flavor::XeTeX
2851                         string doclang = pbuf->params().language->polyglossia();
2852                         static regex langre("\\\\(foreignlanguage)\\{([^\\}]+)\\}");
2853                         smatch sub;
2854                         bool toIgnoreLang = true;
2855                         for (sregex_iterator it(par.begin(), par.end(), langre), end; it != end; ++it) {
2856                                 sub = *it;
2857                                 if (sub.str(2) != doclang) {
2858                                         toIgnoreLang = false;
2859                                         break;
2860                                 }
2861                         }
2862                         setIgnoreFormat("language", toIgnoreLang, false);
2863
2864                 }
2865                 result = splitOnKnownMacros(par.substr(0,parlen), isPatternString);
2866                 LYXERR(Debug::FIND, "After splitOnKnownMacros:\n\"" << result << "\"");
2867         }
2868         else
2869                 result = par.substr(0, parlen);
2870         if (isPatternString) {
2871                 missed = 0;
2872                 if (withformat) {
2873                         regex_f = identifyFeatures(result);
2874                         string features = "";
2875                         for (auto it = regex_f.cbegin(); it != regex_f.cend(); ++it) {
2876                                 string a = it->first;
2877                                 regex_with_format = true;
2878                                 features += " " + a;
2879                                 // LYXERR(Debug::INFO, "Identified regex format:" << a);
2880                         }
2881                         LYXERR(Debug::FIND, "Identified Features" << features);
2882
2883                 }
2884         } else if (regex_with_format) {
2885                 Features info = identifyFeatures(result);
2886                 for (auto it = regex_f.cbegin(); it != regex_f.cend(); ++it) {
2887                         string a = it->first;
2888                         bool b = it->second;
2889                         if (b && ! info[a]) {
2890                                 missed++;
2891                                 LYXERR(Debug::FIND, "Missed(" << missed << " " << a <<", srclen = " << parlen );
2892                                 return "";
2893                         }
2894                 }
2895
2896         }
2897         else {
2898                 // LYXERR(Debug::INFO, "No regex formats");
2899         }
2900         return result;
2901 }
2902
2903
2904 // Remove trailing closure of math, macros and environments, so to catch parts of them.
2905 static int identifyClosing(string & t)
2906 {
2907         int open_braces = 0;
2908         do {
2909                 LYXERR(Debug::FIND, "identifyClosing(): t now is '" << t << "'");
2910                 if (regex_replace(t, t, "(.*[^\\\\])\\$$", "$1"))
2911                         continue;
2912                 if (regex_replace(t, t, "(.*[^\\\\])\\\\\\]$", "$1"))
2913                         continue;
2914                 if (regex_replace(t, t, "(.*[^\\\\])\\\\end\\{[a-zA-Z_]*\\*?\\}$", "$1"))
2915                         continue;
2916                 if (regex_replace(t, t, "(.*[^\\\\])\\}$", "$1")) {
2917                         ++open_braces;
2918                         continue;
2919                 }
2920                 break;
2921         } while (true);
2922         return open_braces;
2923 }
2924
2925 static int num_replaced = 0;
2926 static bool previous_single_replace = true;
2927
2928 void MatchStringAdv::CreateRegexp(FindAndReplaceOptions const & opt, string regexp_str, string regexp2_str, string par_as_string)
2929 {
2930 #if QTSEARCH
2931         // Handle \w properly
2932         QRegularExpression::PatternOptions popts = QRegularExpression::UseUnicodePropertiesOption | QRegularExpression::MultilineOption;
2933         if (! opt.casesensitive) {
2934                 popts |= QRegularExpression::CaseInsensitiveOption;
2935         }
2936         regexp = QRegularExpression(QString::fromStdString(regexp_str), popts);
2937         regexp2 = QRegularExpression(QString::fromStdString(regexp2_str), popts);
2938         regexError = "";
2939         if (regexp.isValid() && regexp2.isValid()) {
2940                 regexIsValid = true;
2941                 // Check '{', '}' pairs inside the regex
2942                 int balanced = 0;
2943                 int skip = 1;
2944                 for (unsigned i = 0; i < par_as_string.size(); i+= skip) {
2945                         char c = par_as_string[i];
2946                         if (c == '\\') {
2947                                 skip = 2;
2948                                 continue;
2949                         }
2950                         if (c == '{')
2951                                 balanced++;
2952                         else if (c == '}') {
2953                                 balanced--;
2954                                 if (balanced < 0)
2955                                         break;
2956                                 }
2957                                 skip = 1;
2958                         }
2959                 if (balanced != 0) {
2960                         regexIsValid = false;
2961                         regexError = "Unbalanced curly brackets in regexp \"" + regexp_str + "\"";
2962                 }
2963         }
2964         else {
2965                 regexIsValid = false;
2966                 if (!regexp.isValid())
2967                         regexError += "Invalid regexp \"" + regexp_str + "\", error = " + regexp.errorString().toStdString();
2968                 else
2969                         regexError += "Invalid regexp2 \"" + regexp2_str + "\", error = " + regexp2.errorString().toStdString();
2970         }
2971 #else
2972         if (opt.casesensitive) {
2973                 regexp = regex(regexp_str);
2974                 regexp2 = regex(regexp2_str);
2975         }
2976         else {
2977                 regexp = regex(regexp_str, std::regex_constants::icase);
2978                 regexp2 = regex(regexp2_str, std::regex_constants::icase);
2979         }
2980 #endif
2981 }
2982
2983 static void modifyRegexForMatchWord(string &t)
2984 {
2985         string s("");
2986         regex wordre("(\\\\)*((\\.|\\\\b))");
2987         size_t lastpos = 0;
2988         smatch sub;
2989         for (sregex_iterator it(t.begin(), t.end(), wordre), end; it != end; ++it) {
2990                 sub = *it;
2991                 if ((sub.position(2) - sub.position(0)) % 2 == 1) {
2992                         continue;
2993                 }
2994                 else if (sub.str(2) == "\\\\b")
2995                         return;
2996                 if (lastpos < (size_t) sub.position(2))
2997                         s += t.substr(lastpos, sub.position(2) - lastpos);
2998                 s += "\\S";
2999                 lastpos = sub.position(2) + sub.length(2);
3000         }
3001         if (lastpos == 0) {
3002                 s = "\\b" + t + "\\b";
3003                 t = s;
3004                 return;
3005         }
3006         else if (lastpos < t.length())
3007                 s += t.substr(lastpos, t.length() - lastpos);
3008       t = "\\b" + s + "\\b";
3009 }
3010
3011 MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions & opt)
3012         : p_buf(&buf), p_first_buf(&buf), opt(opt)
3013 {
3014         Buffer & find_buf = *theBufferList().getBuffer(FileName(to_utf8(opt.find_buf_name)), true);
3015         docstring const & ds = stringifySearchBuffer(find_buf, opt);
3016         use_regexp = lyx::to_utf8(ds).find("\\regexp{") != std::string::npos;
3017         if (opt.replace_all && previous_single_replace) {
3018                 previous_single_replace = false;
3019                 num_replaced = 0;
3020         }
3021         else if (!opt.replace_all) {
3022                 num_replaced = 0;       // count number of replaced strings
3023                 previous_single_replace = true;
3024         }
3025         // When using regexp, braces are hacked already by escape_for_regex()
3026         par_as_string = normalize(ds);
3027         open_braces = 0;
3028         close_wildcards = 0;
3029
3030         size_t lead_size = 0;
3031         // correct the language settings
3032         par_as_string = correctlanguagesetting(par_as_string, true, !opt.ignoreformat, &buf);
3033         opt.matchAtStart = false;
3034         if (!use_regexp) {
3035                 identifyClosing(par_as_string); // Removes math closings ($, ], ...) at end of string
3036                 if (opt.ignoreformat) {
3037                         lead_size = 0;
3038                 }
3039                 else {
3040                         lead_size = identifyLeading(par_as_string);
3041                 }
3042                 lead_as_string = par_as_string.substr(0, lead_size);
3043                 string lead_as_regex_string = string2regex(lead_as_string);
3044                 par_as_string_nolead = par_as_string.substr(lead_size, par_as_string.size() - lead_size);
3045                 string par_as_regex_string_nolead = string2regex(par_as_string_nolead);
3046                 /* Handle whole words too in this case
3047                 */
3048                 if (opt.matchword) {
3049                         par_as_regex_string_nolead = "\\b" + par_as_regex_string_nolead + "\\b";
3050                         opt.matchword = false;
3051                 }
3052                 string regexp_str = "(" + lead_as_regex_string + ")()" + par_as_regex_string_nolead;
3053                 string regexp2_str = "(" + lead_as_regex_string + ")(.*?)" + par_as_regex_string_nolead;
3054                 CreateRegexp(opt, regexp_str, regexp2_str);
3055                 use_regexp = true;
3056                 LYXERR(Debug::FIND, "Setting regexp to : '" << regexp_str << "'");
3057                 LYXERR(Debug::FIND, "Setting regexp2 to: '" << regexp2_str << "'");
3058                 return;
3059         }
3060
3061         if (!opt.ignoreformat) {
3062                 lead_size = identifyLeading(par_as_string);
3063                 LYXERR(Debug::FIND, "Lead_size: " << lead_size);
3064                 lead_as_string = par_as_string.substr(0, lead_size);
3065                 par_as_string_nolead = par_as_string.substr(lead_size, par_as_string.size() - lead_size);
3066         }
3067
3068         // Here we are using regexp
3069         LASSERT(use_regexp, /**/);
3070         {
3071                 string lead_as_regexp;
3072                 if (lead_size > 0) {
3073                         lead_as_regexp = string2regex(par_as_string.substr(0, lead_size));
3074                         regex_replace(par_as_string_nolead, par_as_string_nolead, "}$", "");
3075                         par_as_string = par_as_string_nolead;
3076                         LYXERR(Debug::FIND, "lead_as_regexp is '" << lead_as_regexp << "'");
3077                         LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'");
3078                 }
3079                 // LYXERR(Debug::FIND, "par_as_string before escape_for_regex() is '" << par_as_string << "'");
3080                 par_as_string = escape_for_regex(par_as_string, !opt.ignoreformat);
3081                 // Insert (.*?) before trailing closure of math, macros and environments, so to catch parts of them.
3082                 // LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'");
3083                 ++close_wildcards;
3084                 size_t lng = par_as_string.size();
3085                 if (!opt.ignoreformat) {
3086                         // Remove extra '\}' at end if not part of \{\.\}
3087                         while(lng > 2) {
3088                                 if (par_as_string.substr(lng-2, 2).compare("\\}") == 0) {
3089                                         if (lng >= 6) {
3090                                                 if (par_as_string.substr(lng-6,3).compare("\\{\\") == 0)
3091                                                         break;
3092                                         }
3093                                         lng -= 2;
3094                                         open_braces++;
3095                                 }
3096                                 else
3097                                         break;
3098                         }
3099                         if (lng < par_as_string.size())
3100                                 par_as_string = par_as_string.substr(0,lng);
3101                 }
3102                 LYXERR(Debug::FIND, "par_as_string after correctRegex is '" << par_as_string << "'");
3103                 if ((lng > 0) && (par_as_string[0] == '^')) {
3104                         par_as_string = par_as_string.substr(1);
3105                         --lng;
3106                         opt.matchAtStart = true;
3107                 }
3108                 // LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'");
3109                 // LYXERR(Debug::FIND, "Open braces: " << open_braces);
3110                 // LYXERR(Debug::FIND, "Replaced text (to be used as regex): " << par_as_string);
3111
3112                 // If entered regexp must match at begin of searched string buffer
3113                 // Kornel: Added parentheses to use $1 for size of the leading string
3114                 string regexp_str;
3115                 string regexp2_str;
3116                 {
3117                         // TODO: Adapt '\[12345678]' in par_as_string to acount for the first '()
3118                         // Unfortunately is '\1', '\2', etc not working for strings with extra format
3119                         // so the convert has no effect in that case
3120                         for (int i = 7; i > 0; --i) {
3121                                 string orig = "\\\\" + std::to_string(i);
3122                                 string dest = "\\" + std::to_string(i+2);
3123                                 while (regex_replace(par_as_string, par_as_string, orig, dest));
3124                         }
3125                         if (opt.matchword) {
3126                                 modifyRegexForMatchWord(par_as_string);
3127                                 opt.matchword = false;
3128                         }
3129                         regexp_str = "(" + lead_as_regexp + ")()" + par_as_string;
3130                         regexp2_str = "(" + lead_as_regexp + ")(.*?)" + par_as_string;
3131                 }
3132                 LYXERR(Debug::FIND, "Setting regexp to : '" << regexp_str << "'");
3133                 LYXERR(Debug::FIND, "Setting regexp2 to: '" << regexp2_str << "'");
3134                 CreateRegexp(opt, regexp_str, regexp2_str, par_as_string);
3135         }
3136 }
3137
3138 MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_begin) const
3139 {
3140         MatchResult mres;
3141
3142         mres.searched_size = len;
3143         if (at_begin &&
3144                 (opt.restr == FindAndReplaceOptions::R_ONLY_MATHS && !cur.inMathed()) )
3145                 return mres;
3146
3147         docstring docstr = stringifyFromForSearch(opt, cur, len);
3148         string str;
3149         str = normalize(docstr);
3150         if (!opt.ignoreformat) {
3151                 str = correctlanguagesetting(str, false, !opt.ignoreformat);
3152                 // remove closing '}' and '\n' to allow for use of '$' in regex
3153                 size_t lng = str.size();
3154                 while ((lng > 1) && ((str[lng -1] == '}') || (str[lng -1] == '\n')))
3155                         lng--;
3156                 if (lng != str.size()) {
3157                         str = str.substr(0, lng);
3158                 }
3159         }
3160         if (str.empty()) {
3161                 mres.match_len = -1;
3162                 return mres;
3163         }
3164         LYXERR(Debug::FIND, "After normalization: Matching against:\n'" << str << "'");
3165
3166         LASSERT(use_regexp, /**/);
3167         {
3168                 // use_regexp always true
3169                 LYXERR(Debug::FIND, "Searching in regexp mode: at_begin=" << at_begin);
3170 #if QTSEARCH
3171                 QString qstr = QString::fromStdString(str);
3172                 QRegularExpression const *p_regexp;
3173                 QRegularExpression::MatchType flags = QRegularExpression::NormalMatch;
3174                 if (at_begin) {
3175                         p_regexp = &regexp;
3176                 } else {
3177                         p_regexp = &regexp2;
3178                 }
3179                 QRegularExpressionMatch match = p_regexp->match(qstr, 0, flags);
3180                 if (!match.hasMatch())
3181                         return mres;
3182 #else
3183                 regex const *p_regexp;
3184                 regex_constants::match_flag_type flags;
3185                 if (at_begin) {
3186                         flags = regex_constants::match_continuous;
3187                         p_regexp = &regexp;
3188                 } else {
3189                         flags = regex_constants::match_default;
3190                         p_regexp = &regexp2;
3191                 }
3192                 sregex_iterator re_it(str.begin(), str.end(), *p_regexp, flags);
3193                 if (re_it == sregex_iterator())
3194                         return mres;
3195                 match_results<string::const_iterator> const & m = *re_it;
3196 #endif
3197                 // Whole found string, including the leading
3198                 // std: m[0].second - m[0].first
3199                 // Qt: match.capturedEnd(0) - match.capturedStart(0)
3200                 //
3201                 // Size of the leading string
3202                 // std: m[1].second - m[1].first
3203                 // Qt: match.capturedEnd(1) - match.capturedStart(1)
3204                 int leadingsize = 0;
3205 #if QTSEARCH
3206                 if (match.lastCapturedIndex() > 0) {
3207                         leadingsize = match.capturedEnd(1) - match.capturedStart(1);
3208                 }
3209
3210 #else
3211                 if (m.size() > 2) {
3212                         leadingsize = m[1].second - m[1].first;
3213                 }
3214 #endif
3215 #if QTSEARCH
3216                 mres.match_prefix = match.capturedEnd(2) - match.capturedStart(2);
3217                 mres.match_len = match.capturedEnd(0) - match.capturedEnd(2);
3218                 // because of different number of closing at end of string
3219                 // we have to 'unify' the length of the post-match.
3220                 // Done by ignoring closing parenthesis and linefeeds at string end
3221                 int matchend = match.capturedEnd(0);
3222                 size_t strsize = qstr.size();
3223                 if (!opt.ignoreformat) {
3224                         while (mres.match_len > 0) {
3225                                 QChar c = qstr.at(matchend - 1);
3226                                 if ((c == '\n') || (c == '}') || (c == '{')) {
3227                                         mres.match_len--;
3228                                         matchend--;
3229                                 }
3230                                 else
3231                                         break;
3232                         }
3233                         while (strsize > (size_t) match.capturedEnd(0)) {
3234                                 QChar c = qstr.at(strsize-1);
3235                                 if ((c == '\n') || (c == '}')) {
3236                                         --strsize;
3237                                 }
3238                                 else
3239                                         break;
3240                         }
3241                 }
3242                 // LYXERR0(qstr.toStdString());
3243                 mres.match2end = strsize - matchend;
3244                 mres.pos = match.capturedStart(2);
3245 #else
3246                 mres.match_prefix = m[2].second - m[2].first;
3247                 mres.match_len = m[0].second - m[2].second;
3248                 // ignore closing parenthesis and linefeeds at string end
3249                 size_t strend = m[0].second - m[0].first;
3250                 int matchend = strend;
3251                 size_t strsize = str.size();
3252                 if (!opt.ignoreformat) {
3253                         while (mres.match_len > 0) {
3254                                 char c = str.at(matchend - 1);
3255                                 if ((c == '\n') || (c == '}') || (c == '{')) {
3256                                         mres.match_len--;
3257                                         matchend--;
3258                                 }
3259                                 else
3260                                         break;
3261                         }
3262                         while (strsize > strend) {
3263                                 if ((str.at(strsize-1) == '}') || (str.at(strsize-1) == '\n')) {
3264                                         --strsize;
3265                                 }
3266                                 else
3267                                         break;
3268                         }
3269                 }
3270                 // LYXERR0(str);
3271                 mres.match2end = strsize - matchend;
3272                 mres.pos = m[2].first - m[0].first;;
3273 #endif
3274                 if (mres.match2end < 0)
3275                   mres.match_len = 0;
3276                 mres.leadsize = leadingsize;
3277 #if QTSEARCH
3278                 if (mres.match_len > 0) {
3279                   string a0 = match.captured(0).mid(mres.pos + mres.match_prefix, mres.match_len).toStdString();
3280                   mres.result.push_back(a0);
3281                   for (int i = 3; i <= match.lastCapturedIndex(); i++) {
3282                     mres.result.push_back(match.captured(i).toStdString());
3283                   }
3284                 }
3285 #else
3286                 if (mres.match_len > 0) {
3287                   string a0 = m[0].str().substr(mres.pos + mres.match_prefix, mres.match_len);
3288                   mres.result.push_back(a0);
3289                   for (size_t i = 3; i < m.size(); i++) {
3290                     mres.result.push_back(m[i]);
3291                   }
3292                 }
3293 #endif
3294                 return mres;
3295         }
3296 }
3297
3298
3299 MatchResult MatchStringAdv::operator()(DocIterator const & cur, int len, bool at_begin) const
3300 {
3301         MatchResult mres = findAux(cur, len, at_begin);
3302         int res = mres.match_len;
3303         LYXERR(Debug::FIND,
3304                "res=" << res << ", at_begin=" << at_begin
3305                << ", matchAtStart=" << opt.matchAtStart
3306                << ", inTexted=" << cur.inTexted());
3307         if (opt.matchAtStart) {
3308                 if (cur.pos() != 0)
3309                         mres.match_len = 0;
3310                 else if (mres.match_prefix > 0)
3311                         mres.match_len = 0;
3312                 return mres;
3313         }
3314         else
3315                 return mres;
3316 }
3317
3318 #if 0
3319 static bool simple_replace(string &t, string from, string to)
3320 {
3321   regex repl("(\\\\)*(" + from + ")");
3322   string s("");
3323   size_t lastpos = 0;
3324   smatch sub;
3325   for (sregex_iterator it(t.begin(), t.end(), repl), end; it != end; ++it) {
3326     sub = *it;
3327     if ((sub.position(2) - sub.position(0)) % 2 == 1)
3328       continue;
3329     if (lastpos < (size_t) sub.position(2))
3330       s += t.substr(lastpos, sub.position(2) - lastpos);
3331     s += to;
3332     lastpos = sub.position(2) + sub.length(2);
3333   }
3334   if (lastpos == 0)
3335     return false;
3336   else if (lastpos < t.length())
3337     s += t.substr(lastpos, t.length() - lastpos);
3338   t = s;
3339   return true;
3340 }
3341 #endif
3342
3343 string MatchStringAdv::normalize(docstring const & s) const
3344 {
3345         string t;
3346         t = lyx::to_utf8(s);
3347         // Remove \n at begin
3348         while (!t.empty() && t[0] == '\n')
3349                 t = t.substr(1);
3350         // Remove \n at end
3351         while (!t.empty() && t[t.size() - 1] == '\n')
3352                 t = t.substr(0, t.size() - 1);
3353         size_t pos;
3354         // Handle all other '\n'
3355         while ((pos = t.find("\n")) != string::npos) {
3356                 if (pos > 1 && t[pos-1] == '\\' && t[pos-2] == '\\' ) {
3357                         // Handle '\\\n'
3358                         if (isAlnumASCII(t[pos+1])) {
3359                                 t.replace(pos-2, 3, " ");
3360                         }
3361                         else {
3362                                 t.replace(pos-2, 3, "");
3363                         }
3364                 }
3365                 else if (!isAlnumASCII(t[pos+1]) || !isAlnumASCII(t[pos-1])) {
3366                         // '\n' adjacent to non-alpha-numerics, discard
3367                         t.replace(pos, 1, "");
3368                 }
3369                 else {
3370                         // Replace all other \n with spaces
3371                         t.replace(pos, 1, " ");
3372                 }
3373         }
3374         // Remove stale empty \emph{}, \textbf{} and similar blocks from latexify
3375         // Kornel: Added textsl, textsf, textit, texttt and noun
3376         // + allow to seach for colored text too
3377         LYXERR(Debug::FIND, "Removing stale empty macros from: " << t);
3378         while (regex_replace(t, t, "\\\\(emph|noun|text(bf|sl|sf|it|tt)|(u|uu)line|(s|x)out|uwave)(\\{(\\{\\})?\\})+", ""))
3379                 LYXERR(Debug::FIND, "  further removing stale empty \\emph{}, \\textbf{} macros from: " << t);
3380         while (regex_replace(t, t, "\\\\((sub)?(((sub)?section)|paragraph)|part)\\*?(\\{(\\{\\})?\\})+", ""))
3381                 LYXERR(Debug::FIND, "  further removing stale empty \\emph{}, \\textbf{} macros from: " << t);
3382         while (regex_replace(t, t, "\\\\(foreignlanguage|textcolor|item)\\{[a-z]+\\}(\\{(\\{\\})?\\})+", ""));
3383
3384         return t;
3385 }
3386
3387
3388 docstring stringifyFromCursor(DocIterator const & cur, int len)
3389 {
3390         LYXERR(Debug::FIND, "Stringifying with len=" << len << " from cursor at pos: " << cur);
3391         if (cur.inTexted()) {
3392                 Paragraph const & par = cur.paragraph();
3393                 // TODO what about searching beyond/across paragraph breaks ?
3394                 // TODO Try adding a AS_STR_INSERTS as last arg
3395                 pos_type end = ( len == -1 || cur.pos() + len > int(par.size()) ) ?
3396                         int(par.size()) : cur.pos() + len;
3397                 // OutputParams runparams(&cur.buffer()->params().encoding());
3398                 OutputParams runparams(encodings.fromLyXName("utf8"));
3399                 runparams.nice = true;
3400                 runparams.flavor = Flavor::XeTeX;
3401                 runparams.linelen = 10000; //lyxrc.plaintext_linelen;
3402                 // No side effect of file copying and image conversion
3403                 runparams.dryrun = true;
3404                 int option = AS_STR_INSETS | AS_STR_PLAINTEXT;
3405                 if (ignoreFormats.getDeleted()) {
3406                         option |= AS_STR_SKIPDELETE;
3407                         runparams.for_searchAdv = OutputParams::SearchWithoutDeleted;
3408                 }
3409                 else {
3410                         runparams.for_searchAdv = OutputParams::SearchWithDeleted;
3411                 }
3412                 LYXERR(Debug::FIND, "Stringifying with cur: "
3413                        << cur << ", from pos: " << cur.pos() << ", end: " << end);
3414                 return par.asString(cur.pos(), end,
3415                         option,
3416                         &runparams);
3417         } else if (cur.inMathed()) {
3418                 CursorSlice cs = cur.top();
3419                 MathData md = cs.cell();
3420                 MathData::const_iterator it_end =
3421                         (( len == -1 || cs.pos() + len > int(md.size()))
3422                          ? md.end()
3423                          : md.begin() + cs.pos() + len );
3424                 MathData md2;
3425                 for (MathData::const_iterator it = md.begin() + cs.pos();
3426                      it != it_end; ++it)
3427                         md2.push_back(*it);
3428                 docstring s = asString(md2);
3429                 LYXERR(Debug::FIND, "Stringified math: '" << s << "'");
3430                 return s;
3431         }
3432         LYXERR(Debug::FIND, "Don't know how to stringify from here: " << cur);
3433         return docstring();
3434 }
3435
3436
3437 /** Computes the LaTeX export of buf starting from cur and ending len positions
3438  * after cur, if len is positive, or at the paragraph or innermost inset end
3439  * if len is -1.
3440  */
3441 docstring latexifyFromCursor(DocIterator const & cur, int len)
3442 {
3443         /*
3444         LYXERR(Debug::FIND, "Latexifying with len=" << len << " from cursor at pos: " << cur);
3445         LYXERR(Debug::FIND, "  with cur.lastpost=" << cur.lastpos() << ", cur.lastrow="
3446                << cur.lastrow() << ", cur.lastcol=" << cur.lastcol());
3447         */
3448         Buffer const & buf = *cur.buffer();
3449
3450         odocstringstream ods;
3451         otexstream os(ods);
3452         //OutputParams runparams(&buf.params().encoding());
3453         OutputParams runparams(encodings.fromLyXName("utf8"));
3454         runparams.nice = false;
3455         runparams.flavor = Flavor::XeTeX;
3456         runparams.linelen = 8000; //lyxrc.plaintext_linelen;
3457         // No side effect of file copying and image conversion
3458         runparams.dryrun = true;
3459         if (ignoreFormats.getDeleted()) {
3460                 runparams.for_searchAdv = OutputParams::SearchWithoutDeleted;
3461         }
3462         else {
3463                 runparams.for_searchAdv = OutputParams::SearchWithDeleted;
3464         }
3465
3466         if (cur.inTexted()) {
3467                 // @TODO what about searching beyond/across paragraph breaks ?
3468                 pos_type endpos = cur.paragraph().size();
3469                 if (len != -1 && endpos > cur.pos() + len)
3470                         endpos = cur.pos() + len;
3471                 TeXOnePar(buf, *cur.innerText(), cur.pit(), os, runparams,
3472                           string(), cur.pos(), endpos);
3473                 string s = lyx::to_utf8(ods.str());
3474                 LYXERR(Debug::FIND, "Latexified +modified text: '" << s << "'");
3475                 return(lyx::from_utf8(s));
3476         } else if (cur.inMathed()) {
3477                 // Retrieve the math environment type, and add '$' or '$[' or others (\begin{equation}) accordingly
3478                 for (int s = cur.depth() - 1; s >= 0; --s) {
3479                         CursorSlice const & cs = cur[s];
3480                         if (cs.asInsetMath() && cs.asInsetMath()->asHullInset()) {
3481                                 TeXMathStream ws(os);
3482                                 cs.asInsetMath()->asHullInset()->header_write(ws);
3483                                 break;
3484                         }
3485                 }
3486
3487                 CursorSlice const & cs = cur.top();
3488                 MathData md = cs.cell();
3489                 MathData::const_iterator it_end =
3490                         ((len == -1 || cs.pos() + len > int(md.size()))
3491                          ? md.end()
3492                          : md.begin() + cs.pos() + len);
3493                 MathData md2;
3494                 for (MathData::const_iterator it = md.begin() + cs.pos();
3495                      it != it_end; ++it)
3496                         md2.push_back(*it);
3497
3498                 ods << asString(md2);
3499                 // Retrieve the math environment type, and add '$' or '$]'
3500                 // or others (\end{equation}) accordingly
3501                 for (int s = cur.depth() - 1; s >= 0; --s) {
3502                         CursorSlice const & cs2 = cur[s];
3503                         InsetMath * inset = cs2.asInsetMath();
3504                         if (inset && inset->asHullInset()) {
3505                                 TeXMathStream ws(os);
3506                                 inset->asHullInset()->footer_write(ws);
3507                                 break;
3508                         }
3509                 }
3510                 LYXERR(Debug::FIND, "Latexified math: '" << lyx::to_utf8(ods.str()) << "'");
3511         } else {
3512                 LYXERR(Debug::FIND, "Don't know how to stringify from here: " << cur);
3513         }
3514         return ods.str();
3515 }
3516
3517 #if defined(ResultsDebug)
3518 // Debugging output
3519 static void displayMResult(MatchResult &mres, string from, DocIterator & cur)
3520 {
3521         LYXERR0( "from:\t\t\t" << from);
3522         string status;
3523         if (mres.pos_len > 0) {
3524                 // Set in finalize
3525                 status = "FINALSEARCH";
3526         }
3527         else {
3528                 if (mres.match_len > 0) {
3529                         if ((mres.match_prefix == 0) && (mres.pos == mres.leadsize))
3530                                 status = "Good Match";
3531                         else
3532                                 status = "Matched in";
3533                 }
3534                 else
3535                         status = "MissedSearch";
3536         }
3537
3538         LYXERR0( status << "(" << cur.pos() << " ... " << mres.searched_size + cur.pos() << ") cur.lastpos(" << cur.lastpos() << ")");
3539         if ((mres.leadsize > 0) || (mres.match_len > 0) || (mres.match2end > 0))
3540                 LYXERR0( "leadsize(" << mres.leadsize << ") match_len(" << mres.match_len << ") match2end(" << mres.match2end << ")");
3541         if ((mres.pos > 0) || (mres.match_prefix > 0))
3542                 LYXERR0( "pos(" << mres.pos << ") match_prefix(" << mres.match_prefix << ")");
3543         for (size_t i = 0; i < mres.result.size(); i++)
3544                 LYXERR0( "Match " << i << " = \"" << mres.result[i] << "\"");
3545 }
3546         #define displayMres(s, txt, cur) displayMResult(s, txt, cur);
3547 #else
3548         #define displayMres(s, txt, cur)
3549 #endif
3550
3551 /** Finalize an advanced find operation, advancing the cursor to the innermost
3552  ** position that matches, plus computing the length of the matching text to
3553  ** be selected
3554  ** Return the cur.pos() difference between start and end of found match
3555  **/
3556 MatchResult findAdvFinalize(DocIterator & cur, MatchStringAdv const & match, MatchResult const & expected = MatchResult(-1))
3557 {
3558         // Search the foremost position that matches (avoids find of entire math
3559         // inset when match at start of it)
3560         DocIterator old_cur(cur.buffer());
3561         MatchResult mres;
3562         static MatchResult fail = MatchResult();
3563         MatchResult max_match;
3564         // If (prefix_len > 0) means that forwarding 1 position will remove the complete entry
3565         // Happens with e.g. hyperlinks
3566         // either one sees "http://www.bla.bla" or nothing
3567         // so the search for "www" gives prefix_len = 7 (== sizeof("http://")
3568         // and although we search for only 3 chars, we find the whole hyperlink inset
3569         bool at_begin = (expected.match_prefix == 0);
3570         if (!match.opt.forward && match.opt.ignoreformat) {
3571                 if (expected.pos > 0)
3572                         return fail;
3573         }
3574         LASSERT(at_begin, /**/);
3575         if (expected.match_len > 0 && at_begin) {
3576                 // Search for deepest match
3577                 old_cur = cur;
3578                 max_match = expected;
3579                 do {
3580                         size_t d = cur.depth();
3581                         cur.forwardPos();
3582                         if (!cur)
3583                                 break;
3584                         if (cur.depth() < d)
3585                                 break;
3586                         if (cur.depth() == d)
3587                                 break;
3588                         size_t lastd = d;
3589                         while (cur && cur.depth() > lastd) {
3590                                 lastd = cur.depth();
3591                                 mres = match(cur, -1, at_begin);
3592                                 displayMres(mres, "Checking innermost", cur);
3593                                 if (mres.match_len > 0)
3594                                         break;
3595                                 // maybe deeper?
3596                                 cur.forwardPos();
3597                         }
3598                         if (mres.match_len < expected.match_len)
3599                                 break;
3600                         max_match = mres;
3601                         old_cur = cur;;
3602                 } while(1);
3603                 cur = old_cur;
3604         }
3605         else {
3606                 // (expected.match_len <= 0)
3607                 mres = match(cur);      /* match valid only if not searching whole words */
3608                 displayMres(mres, "Start with negative match", cur);
3609                 max_match = mres;
3610         }
3611         if (max_match.match_len <= 0) return fail;
3612         LYXERR(Debug::FIND, "Ok");
3613
3614         // Compute the match length
3615         int len = 1;
3616         if (cur.pos() + len > cur.lastpos())
3617           return fail;
3618
3619         LASSERT(match.use_regexp, /**/);
3620         {
3621           int minl = 1;
3622           int maxl = cur.lastpos() - cur.pos();
3623           // Greedy behaviour while matching regexps
3624           while (maxl > minl) {
3625             MatchResult mres2;
3626             mres2 = match(cur, len, at_begin);
3627             displayMres(mres2, "Finalize loop", cur);
3628             int actual_match_len = mres2.match_len;
3629             if (actual_match_len >= max_match.match_len) {
3630               // actual_match_len > max_match _can_ happen,
3631               // if the search area splits
3632               // some following word so that the regex
3633               // (e.g. 'r.*r\b' matches 'r' from the middle of the
3634               // splitted word)
3635               // This means, the len value is too big
3636               actual_match_len = max_match.match_len;
3637               max_match = mres2;
3638               max_match.match_len = actual_match_len;
3639               maxl = len;
3640               if (maxl - minl < 4)
3641                 len = (int)((maxl + minl)/2);
3642               else
3643                 len = (int)(minl + (maxl - minl + 3)/4);
3644             }
3645             else {
3646               // (actual_match_len < max_match.match_len)
3647               minl = len + 1;
3648               len = (int)((maxl + minl)/2);
3649             }
3650           }
3651           len = minl;
3652           old_cur = cur;
3653           // Search for real start of matched characters
3654           while (len > 1) {
3655             MatchResult actual_match;
3656             do {
3657               cur.forwardPos();
3658             } while (cur.depth() > old_cur.depth()); /* Skip inner insets */
3659             if (cur.depth() < old_cur.depth()) {
3660               // Outer inset?
3661               LYXERR(Debug::INFO, "cur.depth() < old_cur.depth(), this should never happen");
3662               break;
3663             }
3664             if (cur.pos() != old_cur.pos()) {
3665               // OK, forwarded 1 pos in actual inset
3666               actual_match = match(cur, len-1, at_begin);
3667               if (actual_match.match_len == max_match.match_len) {
3668                 // Ha, got it! The shorter selection has the same match length
3669                 len--;
3670                 old_cur = cur;
3671                 max_match = actual_match;
3672               }
3673               else {
3674                 // OK, the shorter selection matches less chars, revert to previous value
3675                 cur = old_cur;
3676                 break;
3677               }
3678             }
3679             else {
3680               LYXERR(Debug::INFO, "cur.pos() == old_cur.pos(), this should never happen");
3681               actual_match = match(cur, len, at_begin);
3682               if (actual_match.match_len == max_match.match_len) {
3683                 old_cur = cur;
3684                 max_match = actual_match;
3685               }
3686             }
3687           }
3688           if (len == 0)
3689             return fail;
3690           else {
3691             max_match.pos_len = len;
3692             displayMres(max_match, "SEARCH RESULT", cur)
3693             return max_match;
3694           }
3695         }
3696 }
3697
3698 /// Finds forward
3699 int findForwardAdv(DocIterator & cur, MatchStringAdv & match)
3700 {
3701         if (!cur)
3702                 return 0;
3703         bool repeat = false;
3704         DocIterator orig_cur;   // to be used if repeat not successful
3705         MatchResult orig_mres;
3706         while (!theApp()->longOperationCancelled() && cur) {
3707                 //(void) findAdvForwardInnermost(cur);
3708                 LYXERR(Debug::FIND, "findForwardAdv() cur: " << cur);
3709                 MatchResult mres = match(cur, -1, false);
3710                 string msg = "Starting";
3711                 if (repeat)
3712                         msg = "Repeated";
3713                 displayMres(mres, msg + " findForwardAdv", cur)
3714                 int match_len = mres.match_len;
3715                 if ((mres.pos > 100000) || (mres.match2end > 100000) || (match_len > 100000)) {
3716                         LYXERR(Debug::INFO, "BIG LENGTHS: " << mres.pos << ", " << match_len << ", " << mres.match2end);
3717                         match_len = 0;
3718                 }
3719                 if (match_len <= 0) {
3720                         // This should exit nested insets, if any, or otherwise undefine the currsor.
3721                         cur.pos() = cur.lastpos();
3722                         LYXERR(Debug::FIND, "Advancing pos: cur=" << cur);
3723                         cur.forwardPos();
3724                 }
3725                 else {  // match_len > 0
3726                         // Try to find the begin of searched string
3727                         int increment;
3728                         int firstInvalid = cur.lastpos() - cur.pos();
3729                         {
3730                                 int incrmatch = (mres.match_prefix + mres.pos - mres.leadsize + 1)*3/4;
3731                                 int incrcur = (firstInvalid + 1 )*3/4;
3732                                 if (incrcur < incrmatch)
3733                                         increment = incrcur;
3734                                 else
3735                                         increment = incrmatch;
3736                                 if (increment < 1)
3737                                         increment = 1;
3738                         }
3739                         LYXERR(Debug::FIND, "Set increment to " << increment);
3740                         while (increment > 0) {
3741                                 DocIterator old_cur = cur;
3742                                 if (cur.pos() + increment >= cur.lastpos()) {
3743                                         increment /= 2;
3744                                         continue;
3745                                 }
3746                                 cur.pos() = cur.pos() + increment;
3747                                 MatchResult mres2 = match(cur, -1, false);
3748                                 displayMres(mres2, "findForwardAdv loop", cur)
3749                                 switch (interpretMatch(mres, mres2)) {
3750                                         case MatchResult::newIsTooFar:
3751                                                 // behind the expected match
3752                                                 firstInvalid = increment;
3753                                                 cur = old_cur;
3754                                                 increment /= 2;
3755                                                 break;
3756                                         case MatchResult::newIsBetter:
3757                                                 // not reached yet, but cur.pos()+increment is bettert
3758                                                 mres = mres2;
3759                                                 firstInvalid -= increment;
3760                                                 if (increment > firstInvalid*3/4)
3761                                                         increment = firstInvalid*3/4;
3762                                                 if ((mres2.pos == mres2.leadsize) && (increment >= mres2.match_prefix)) {
3763                                                         if (increment >= mres2.match_prefix)
3764                                                                 increment = (mres2.match_prefix+1)*3/4;
3765                                                 }
3766                                                 break;
3767                                         default:
3768                                                 // Todo@
3769                                                 // Handle not like MatchResult::newIsTooFar
3770                                                 LYXERR0( "Probably too far: Increment = " << increment << " match_prefix = " << mres.match_prefix);
3771                                                 firstInvalid--;
3772                                                 increment = increment*3/4;
3773                                                 cur = old_cur;
3774                                         break;
3775                                 }
3776                         }
3777                         if (mres.match_len > 0) {
3778                                 if (mres.match_prefix + mres.pos - mres.leadsize > 0) {
3779                                         // The match seems to indicate some deeper level
3780                                         repeat = true;
3781                                         orig_cur = cur;
3782                                         orig_mres = mres;
3783                                         cur.forwardPos();
3784                                         continue;
3785                                 }
3786                         }
3787                         else if (repeat) {
3788                                 // should never be reached.
3789                                 cur = orig_cur;
3790                                 mres = orig_mres;
3791                         }
3792                         // LYXERR0("Leaving first loop");
3793                         LYXERR(Debug::FIND, "Finalizing 1");
3794                         MatchResult found_match = findAdvFinalize(cur, match, mres);
3795                         if (found_match.match_len > 0) {
3796                                 LASSERT(found_match.pos_len > 0, /**/);
3797                                 match.FillResults(found_match);
3798                                 return found_match.pos_len;
3799                         }
3800                         else {
3801                                 // try next possible match
3802                                 cur.forwardPos();
3803                                 repeat = false;
3804                                 continue;
3805                         }
3806                 }
3807         }
3808         return 0;
3809 }
3810
3811
3812 /// Find the most backward consecutive match within same paragraph while searching backwards.
3813 MatchResult findMostBackwards(DocIterator & cur, MatchStringAdv const & match, MatchResult &expected)
3814 {
3815         DocIterator cur_begin = cur;
3816         cur_begin.pos() = 0;
3817         DocIterator tmp_cur = cur;
3818         MatchResult mr = findAdvFinalize(tmp_cur, match, expected);
3819         Inset & inset = cur.inset();
3820         for (; cur != cur_begin; cur.backwardPos()) {
3821                 LYXERR(Debug::FIND, "findMostBackwards(): cur=" << cur);
3822                 DocIterator new_cur = cur;
3823                 new_cur.backwardPos();
3824                 if (new_cur == cur || &new_cur.inset() != &inset || !match(new_cur).match_len)
3825                         break;
3826                 MatchResult new_mr = findAdvFinalize(new_cur, match, expected);
3827                 if (new_mr.match_len == mr.match_len)
3828                         break;
3829                 mr = new_mr;
3830         }
3831         LYXERR(Debug::FIND, "findMostBackwards(): exiting with cur=" << cur);
3832         return mr;
3833 }
3834
3835
3836 /// Finds backwards
3837 int findBackwardsAdv(DocIterator & cur, MatchStringAdv & match)
3838 {
3839         if (! cur)
3840                 return 0;
3841         // Backup of original position
3842         DocIterator cur_begin = doc_iterator_begin(cur.buffer());
3843         if (cur == cur_begin)
3844                 return 0;
3845         cur.backwardPos();
3846         DocIterator cur_orig(cur);
3847         bool pit_changed = false;
3848         do {
3849                 cur.pos() = 0;
3850                 MatchResult found_match = match(cur, -1, false);
3851
3852                 if (found_match.match_len > 0) {
3853                         if (pit_changed)
3854                                 cur.pos() = cur.lastpos();
3855                         else
3856                                 cur.pos() = cur_orig.pos();
3857                         LYXERR(Debug::FIND, "findBackAdv2: cur: " << cur);
3858                         DocIterator cur_prev_iter;
3859                         do {
3860                                 found_match = match(cur);
3861                                 LYXERR(Debug::FIND, "findBackAdv3: found_match="
3862                                        << (found_match.match_len > 0) << ", cur: " << cur);
3863                                 if (found_match.match_len > 0) {
3864                                         MatchResult found_mr = findMostBackwards(cur, match, found_match);
3865                                         if (found_mr.pos_len > 0) {
3866                                                 match.FillResults(found_mr);
3867                                                 return found_mr.pos_len;
3868                                         }
3869                                 }
3870
3871                                 // Stop if begin of document reached
3872                                 if (cur == cur_begin)
3873                                         break;
3874                                 cur_prev_iter = cur;
3875                                 cur.backwardPos();
3876                         } while (true);
3877                 }
3878                 if (cur == cur_begin)
3879                         break;
3880                 if (cur.pit() > 0)
3881                         --cur.pit();
3882                 else
3883                         cur.backwardPos();
3884                 pit_changed = true;
3885         } while (!theApp()->longOperationCancelled());
3886         return 0;
3887 }
3888
3889
3890 } // namespace
3891
3892
3893 docstring stringifyFromForSearch(FindAndReplaceOptions const & opt,
3894                                  DocIterator const & cur, int len)
3895 {
3896         if (cur.pos() < 0 || cur.pos() > cur.lastpos())
3897                 return docstring();
3898         if (!opt.ignoreformat)
3899                 return latexifyFromCursor(cur, len);
3900         else
3901                 return stringifyFromCursor(cur, len);
3902 }
3903
3904
3905 FindAndReplaceOptions::FindAndReplaceOptions(
3906         docstring const & _find_buf_name, bool _casesensitive,
3907         bool _matchword, bool _forward, bool _expandmacros, bool _ignoreformat,
3908         docstring const & _repl_buf_name, bool _keep_case,
3909         SearchScope _scope, SearchRestriction _restr, bool _replace_all)
3910         : find_buf_name(_find_buf_name), casesensitive(_casesensitive), matchword(_matchword),
3911           forward(_forward), expandmacros(_expandmacros), ignoreformat(_ignoreformat),
3912           repl_buf_name(_repl_buf_name), keep_case(_keep_case), scope(_scope), restr(_restr), replace_all(_replace_all)
3913 {
3914 }
3915
3916
3917 namespace {
3918
3919
3920 /** Check if 'len' letters following cursor are all non-lowercase */
3921 static bool allNonLowercase(Cursor const & cur, int len)
3922 {
3923         pos_type beg_pos = cur.selectionBegin().pos();
3924         pos_type end_pos = cur.selectionBegin().pos() + len;
3925         if (len > cur.lastpos() + 1 - beg_pos) {
3926                 LYXERR(Debug::FIND, "This should not happen, more debug needed");
3927                 len = cur.lastpos() + 1 - beg_pos;
3928                 end_pos = beg_pos + len;
3929         }
3930         for (pos_type pos = beg_pos; pos != end_pos; ++pos)
3931                 if (isLowerCase(cur.paragraph().getChar(pos)))
3932                         return false;
3933         return true;
3934 }
3935
3936
3937 /** Check if first letter is upper case and second one is lower case */
3938 static bool firstUppercase(Cursor const & cur)
3939 {
3940         char_type ch1, ch2;
3941         pos_type pos = cur.selectionBegin().pos();
3942         if (pos >= cur.lastpos() - 1) {
3943                 LYXERR(Debug::FIND, "No upper-case at cur: " << cur);
3944                 return false;
3945         }
3946         ch1 = cur.paragraph().getChar(pos);
3947         ch2 = cur.paragraph().getChar(pos + 1);
3948         bool result = isUpperCase(ch1) && isLowerCase(ch2);
3949         LYXERR(Debug::FIND, "firstUppercase(): "
3950                << "ch1=" << ch1 << "(" << char(ch1) << "), ch2="
3951                << ch2 << "(" << char(ch2) << ")"
3952                << ", result=" << result << ", cur=" << cur);
3953         return result;
3954 }
3955
3956
3957 /** Make first letter of supplied buffer upper-case, and the rest lower-case.
3958  **
3959  ** \fixme What to do with possible further paragraphs in replace buffer ?
3960  **/
3961 static void changeFirstCase(Buffer & buffer, TextCase first_case, TextCase others_case)
3962 {
3963         ParagraphList::iterator pit = buffer.paragraphs().begin();
3964         LASSERT(!pit->empty(), /**/);
3965         pos_type right = pos_type(1);
3966         pit->changeCase(buffer.params(), pos_type(0), right, first_case);
3967         right = pit->size();
3968         pit->changeCase(buffer.params(), pos_type(1), right, others_case);
3969 }
3970 } // namespace
3971
3972 static bool replaceMatches(string &t, int maxmatchnum, vector <string> const & replacements)
3973 {
3974   // Should replace the string "$" + std::to_string(matchnum) with replacement
3975   // if the char '$' is not prefixed with odd number of char '\\'
3976   static regex const rematch("(\\\\)*(\\$\\$([0-9]))");
3977   string s;
3978   size_t lastpos = 0;
3979   smatch sub;
3980   for (sregex_iterator it(t.begin(), t.end(), rematch), end; it != end; ++it) {
3981     sub = *it;
3982     if ((sub.position(2) - sub.position(0)) % 2 == 1)
3983       continue;
3984     int num = stoi(sub.str(3), nullptr, 10);
3985     if (num >= maxmatchnum)
3986       continue;
3987     if (lastpos < (size_t) sub.position(2))
3988       s += t.substr(lastpos, sub.position(2) - lastpos);
3989     s += replacements[num];
3990     lastpos = sub.position(2) + sub.length(2);
3991   }
3992   if (lastpos == 0)
3993     return false;
3994   else if (lastpos < t.length())
3995     s += t.substr(lastpos, t.length() - lastpos);
3996   t = s;
3997   return true;
3998 }
3999
4000 ///
4001 static int findAdvReplace(BufferView * bv, FindAndReplaceOptions const & opt, MatchStringAdv & matchAdv)
4002 {
4003         Cursor & cur = bv->cursor();
4004         if (opt.repl_buf_name.empty()
4005             || theBufferList().getBuffer(FileName(to_utf8(opt.repl_buf_name)), true) == 0
4006             || theBufferList().getBuffer(FileName(to_utf8(opt.find_buf_name)), true) == 0)
4007                 return 0;
4008
4009         DocIterator sel_beg = cur.selectionBegin();
4010         DocIterator sel_end = cur.selectionEnd();
4011         if (&sel_beg.inset() != &sel_end.inset()
4012             || sel_beg.pit() != sel_end.pit()
4013             || sel_beg.idx() != sel_end.idx())
4014                 return 0;
4015         int sel_len = sel_end.pos() - sel_beg.pos();
4016         LYXERR(Debug::FIND, "sel_beg: " << sel_beg << ", sel_end: " << sel_end
4017                << ", sel_len: " << sel_len << endl);
4018         if (sel_len == 0)
4019                 return 0;
4020         LASSERT(sel_len > 0, return 0);
4021
4022         if (!matchAdv(sel_beg, sel_len).match_len)
4023                 return 0;
4024
4025         // Build a copy of the replace buffer, adapted to the KeepCase option
4026         Buffer const & repl_buffer_orig = *theBufferList().getBuffer(FileName(to_utf8(opt.repl_buf_name)), true);
4027         ostringstream oss;
4028         repl_buffer_orig.write(oss);
4029         string lyx = oss.str();
4030         if (matchAdv.valid_matches > 0) {
4031           replaceMatches(lyx, matchAdv.valid_matches, matchAdv.matches);
4032         }
4033         Buffer repl_buffer("", false);
4034         repl_buffer.setUnnamed(true);
4035         LASSERT(repl_buffer.readString(lyx), return 0);
4036         if (opt.keep_case && sel_len >= 2) {
4037                 LYXERR(Debug::FIND, "keep_case true: cur.pos()=" << cur.pos() << ", sel_len=" << sel_len);
4038                 if (cur.inTexted()) {
4039                         if (firstUppercase(cur))
4040                                 changeFirstCase(repl_buffer, text_uppercase, text_lowercase);
4041                         else if (allNonLowercase(cur, sel_len))
4042                                 changeFirstCase(repl_buffer, text_uppercase, text_uppercase);
4043                 }
4044         }
4045         cap::cutSelection(cur, false);
4046         if (cur.inTexted()) {
4047                 repl_buffer.changeLanguage(
4048                         repl_buffer.language(),
4049                         cur.getFont().language());
4050                 LYXERR(Debug::FIND, "Replacing by pasteParagraphList()ing repl_buffer");
4051                 LYXERR(Debug::FIND, "Before pasteParagraphList() cur=" << cur << endl);
4052                 cap::pasteParagraphList(cur, repl_buffer.paragraphs(),
4053                                         repl_buffer.params().documentClassPtr(),
4054                                         repl_buffer.params().authors(),
4055                                         bv->buffer().errorList("Paste"));
4056                 LYXERR(Debug::FIND, "After pasteParagraphList() cur=" << cur << endl);
4057                 sel_len = repl_buffer.paragraphs().begin()->size();
4058         } else if (cur.inMathed()) {
4059                 odocstringstream ods;
4060                 otexstream os(ods);
4061                 // OutputParams runparams(&repl_buffer.params().encoding());
4062                 OutputParams runparams(encodings.fromLyXName("utf8"));
4063                 runparams.nice = false;
4064                 runparams.flavor = Flavor::XeTeX;
4065                 runparams.linelen = 8000; //lyxrc.plaintext_linelen;
4066                 runparams.dryrun = true;
4067                 TeXOnePar(repl_buffer, repl_buffer.text(), 0, os, runparams);
4068                 //repl_buffer.getSourceCode(ods, 0, repl_buffer.paragraphs().size(), false);
4069                 docstring repl_latex = ods.str();
4070                 LYXERR(Debug::FIND, "Latexified replace_buffer: '" << repl_latex << "'");
4071                 string s;
4072                 (void)regex_replace(to_utf8(repl_latex), s, "\\$(.*)\\$", "$1");
4073                 (void)regex_replace(s, s, "\\\\\\[(.*)\\\\\\]", "$1");
4074                 repl_latex = from_utf8(s);
4075                 LYXERR(Debug::FIND, "Replacing by insert()ing latex: '" << repl_latex << "' cur=" << cur << " with depth=" << cur.depth());
4076                 MathData ar(cur.buffer());
4077                 asArray(repl_latex, ar, Parse::NORMAL);
4078                 cur.insert(ar);
4079                 sel_len = ar.size();
4080                 LYXERR(Debug::FIND, "After insert() cur=" << cur << " with depth: " << cur.depth() << " and len: " << sel_len);
4081         }
4082         if (cur.pos() >= sel_len)
4083                 cur.pos() -= sel_len;
4084         else
4085                 cur.pos() = 0;
4086         LYXERR(Debug::FIND, "After pos adj cur=" << cur << " with depth: " << cur.depth() << " and len: " << sel_len);
4087         bv->putSelectionAt(DocIterator(cur), sel_len, !opt.forward);
4088         bv->processUpdateFlags(Update::Force);
4089         return 1;
4090 }
4091
4092
4093 /// Perform a FindAdv operation.
4094 bool findAdv(BufferView * bv, FindAndReplaceOptions & opt)
4095 {
4096         DocIterator cur;
4097         int pos_len = 0;
4098
4099         // e.g., when invoking word-findadv from mini-buffer wither with
4100         //       wrong options syntax or before ever opening advanced F&R pane
4101         if (theBufferList().getBuffer(FileName(to_utf8(opt.find_buf_name)), true) == 0)
4102                 return false;
4103
4104         try {
4105                 MatchStringAdv matchAdv(bv->buffer(), opt);
4106 #if QTSEARCH
4107                 if (!matchAdv.regexIsValid) {
4108                         bv->message(lyx::from_utf8(matchAdv.regexError));
4109                         return(false);
4110                 }
4111 #endif
4112                 int length = bv->cursor().selectionEnd().pos() - bv->cursor().selectionBegin().pos();
4113                 if (length > 0)
4114                         bv->putSelectionAt(bv->cursor().selectionBegin(), length, !opt.forward);
4115                 num_replaced += findAdvReplace(bv, opt, matchAdv);
4116                 cur = bv->cursor();
4117                 if (opt.forward)
4118                         pos_len = findForwardAdv(cur, matchAdv);
4119                 else
4120                         pos_len = findBackwardsAdv(cur, matchAdv);
4121         } catch (exception & ex) {
4122                 bv->message(from_utf8(ex.what()));
4123                 return false;
4124         }
4125
4126         if (pos_len == 0) {
4127                 if (num_replaced > 0) {
4128                         switch (num_replaced)
4129                         {
4130                                 case 1:
4131                                         bv->message(_("One match has been replaced."));
4132                                         break;
4133                                 case 2:
4134                                         bv->message(_("Two matches have been replaced."));
4135                                         break;
4136                                 default:
4137                                         bv->message(bformat(_("%1$d matches have been replaced."), num_replaced));
4138                                         break;
4139                         }
4140                         num_replaced = 0;
4141                 }
4142                 else {
4143                         bv->message(_("Match not found."));
4144                 }
4145                 return false;
4146         }
4147
4148         if (num_replaced > 0)
4149                 bv->message(_("Match has been replaced."));
4150         else
4151                 bv->message(_("Match found."));
4152
4153         if (cur.pos() + pos_len > cur.lastpos()) {
4154                 // Prevent crash in bv->putSelectionAt()
4155                 // Should never happen, maybe LASSERT() here?
4156                 pos_len = cur.lastpos() - cur.pos();
4157         }
4158         LYXERR(Debug::FIND, "Putting selection at cur=" << cur << " with len: " << pos_len);
4159         bv->putSelectionAt(cur, pos_len, !opt.forward);
4160
4161         return true;
4162 }
4163
4164
4165 ostringstream & operator<<(ostringstream & os, FindAndReplaceOptions const & opt)
4166 {
4167         os << to_utf8(opt.find_buf_name) << "\nEOSS\n"
4168            << opt.casesensitive << ' '
4169            << opt.matchword << ' '
4170            << opt.forward << ' '
4171            << opt.expandmacros << ' '
4172            << opt.ignoreformat << ' '
4173            << opt.replace_all << ' '
4174            << to_utf8(opt.repl_buf_name) << "\nEOSS\n"
4175            << opt.keep_case << ' '
4176            << int(opt.scope) << ' '
4177            << int(opt.restr);
4178
4179         LYXERR(Debug::FIND, "built: " << os.str());
4180
4181         return os;
4182 }
4183
4184
4185 istringstream & operator>>(istringstream & is, FindAndReplaceOptions & opt)
4186 {
4187         // LYXERR(Debug::FIND, "parsing");
4188         string s;
4189         string line;
4190         getline(is, line);
4191         while (line != "EOSS") {
4192                 if (! s.empty())
4193                         s = s + "\n";
4194                 s = s + line;
4195                 if (is.eof())   // Tolerate malformed request
4196                         break;
4197                 getline(is, line);
4198         }
4199         // LYXERR(Debug::FIND, "file_buf_name: '" << s << "'");
4200         opt.find_buf_name = from_utf8(s);
4201         is >> opt.casesensitive >> opt.matchword >> opt.forward >> opt.expandmacros >> opt.ignoreformat >> opt.replace_all;
4202         is.get();       // Waste space before replace string
4203         s = "";
4204         getline(is, line);
4205         while (line != "EOSS") {
4206                 if (! s.empty())
4207                         s = s + "\n";
4208                 s = s + line;
4209                 if (is.eof())   // Tolerate malformed request
4210                         break;
4211                 getline(is, line);
4212         }
4213         // LYXERR(Debug::FIND, "repl_buf_name: '" << s << "'");
4214         opt.repl_buf_name = from_utf8(s);
4215         is >> opt.keep_case;
4216         int i;
4217         is >> i;
4218         opt.scope = FindAndReplaceOptions::SearchScope(i);
4219         is >> i;
4220         opt.restr = FindAndReplaceOptions::SearchRestriction(i);
4221
4222         /*
4223         LYXERR(Debug::FIND, "parsed: " << opt.casesensitive << ' ' << opt.matchword << ' ' << opt.forward << ' '
4224                << opt.expandmacros << ' ' << opt.ignoreformat << ' ' << opt.keep_case << ' '
4225                << opt.scope << ' ' << opt.restr);
4226         */
4227         return is;
4228 }
4229
4230 } // namespace lyx