src/lyxfind.cpp

   1 /**
   2  * \file lyxfind.cpp
   3  * This file is part of LyX, the document processor.
   4  * License details can be found in the file COPYING.
   5  *
   6  * \author Lars Gullik Bjønnes
   7  * \author John Levon
   8  * \author Jürgen Vigna
   9  * \author Alfredo Braunstein
  10  * \author Tommaso Cucinotta
  11  * \author Kornel Benko
  12  *
  13  * Full author contact details are available in file CREDITS.
  14  */
  15
  16 #include <config.h>
  17
  18 #include "lyxfind.h"
  19
  20 #include "Buffer.h"
  21 #include "BufferList.h"
  22 #include "BufferParams.h"
  23 #include "BufferView.h"
  24 #include "Changes.h"
  25 #include "Cursor.h"
  26 #include "CutAndPaste.h"
  27 #include "FuncRequest.h"
  28 #include "LyX.h"
  29 #include "output_latex.h"
  30 #include "OutputParams.h"
  31 #include "Paragraph.h"
  32 #include "Text.h"
  33 #include "Encoding.h"
  34 #include "Language.h"
  35
  36 #include "frontends/Application.h"
  37 #include "frontends/alert.h"
  38
  39 #include "mathed/InsetMath.h"
  40 #include "mathed/InsetMathHull.h"
  41 #include "mathed/MathData.h"
  42 #include "mathed/MathStream.h"
  43 #include "mathed/MathSupport.h"
  44
  45 #include "support/debug.h"
  46 #include "support/docstream.h"
  47 #include "support/FileName.h"
  48 #include "support/gettext.h"
  49 #include "support/lassert.h"
  50 #include "support/lstrings.h"
  51 #include "support/textutils.h"
  52
  53 #include <unordered_map>
  54 #include <regex>
  55
  56 //#define ResultsDebug
  57 #define USE_QT_FOR_SEARCH
  58 #if defined(USE_QT_FOR_SEARCH)
  59         #include <QtCore>       // sets QT_VERSION
  60         #if (QT_VERSION >= 0x050000)
  61                 #include <QRegularExpression>
  62                 #define QTSEARCH 1
  63         #else
  64                 #define QTSEARCH 0
  65         #endif
  66 #else
  67         #define QTSEARCH 0
  68 #endif
  69
  70 using namespace std;
  71 using namespace lyx::support;
  72
  73 namespace lyx {
  74
  75 typedef unordered_map<string, string> AccentsMap;
  76 typedef unordered_map<string,string>::const_iterator AccentsIterator;
  77 static AccentsMap accents = unordered_map<string, string>();
  78
  79 // Helper class for deciding what should be ignored
  80 class IgnoreFormats {
  81  public:
  82         ///
  83         IgnoreFormats() = default;
  84         ///
  85         bool getFamily() const { return ignoreFamily_; }
  86         ///
  87         bool getSeries() const { return ignoreSeries_; }
  88         ///
  89         bool getShape() const { return ignoreShape_; }
  90         ///
  91         bool getUnderline() const { return ignoreUnderline_; }
  92         ///
  93         bool getMarkUp() const { return ignoreMarkUp_; }
  94         ///
  95         bool getStrikeOut() const { return ignoreStrikeOut_; }
  96         ///
  97         bool getSectioning() const { return ignoreSectioning_; }
  98         ///
  99         bool getFrontMatter() const { return ignoreFrontMatter_; }
 100         ///
 101         bool getColor() const { return ignoreColor_; }
 102         ///
 103         bool getLanguage() const { return ignoreLanguage_; }
 104         ///
 105         bool getDeleted() const { return ignoreDeleted_; }
 106         ///
 107         void setIgnoreDeleted(bool value);
 108         ///
 109         void setIgnoreFormat(string const & type, bool value, bool fromUser = true);
 110
 111 private:
 112         ///
 113         bool ignoreFamily_ = false;
 114         ///
 115         bool ignoreSeries_ = false;
 116         ///
 117         bool ignoreShape_ = false;
 118         ///
 119         bool ignoreUnderline_ = false;
 120         ///
 121         bool ignoreMarkUp_ = false;
 122         ///
 123         bool ignoreStrikeOut_ = false;
 124         ///
 125         bool ignoreSectioning_ = false;
 126         ///
 127         bool ignoreFrontMatter_ = false;
 128         ///
 129         bool ignoreColor_ = false;
 130         ///
 131         bool ignoreLanguage_ = false;
 132         bool userSelectedIgnoreLanguage_ = false;
 133         ///
 134         bool ignoreDeleted_ = true;
 135 };
 136
 137 void IgnoreFormats::setIgnoreFormat(string const & type, bool value, bool fromUser)
 138 {
 139         if (type == "color") {
 140                 ignoreColor_ = value;
 141         }
 142         else if (type == "language") {
 143                 if (fromUser) {
 144                         userSelectedIgnoreLanguage_ = value;
 145                         ignoreLanguage_ = value;
 146                 }
 147                 else
 148                         ignoreLanguage_ = (value || userSelectedIgnoreLanguage_);
 149         }
 150         else if (type == "sectioning") {
 151                 ignoreSectioning_ = value;
 152                 ignoreFrontMatter_ = value;
 153         }
 154         else if (type == "font") {
 155                 ignoreSeries_ = value;
 156                 ignoreShape_ = value;
 157                 ignoreFamily_ = value;
 158         }
 159         else if (type == "series") {
 160                 ignoreSeries_ = value;
 161         }
 162         else if (type == "shape") {
 163                 ignoreShape_ = value;
 164         }
 165         else if (type == "family") {
 166                 ignoreFamily_ = value;
 167         }
 168         else if (type == "markup") {
 169                 ignoreMarkUp_ = value;
 170         }
 171         else if (type == "underline") {
 172                 ignoreUnderline_ = value;
 173         }
 174         else if (type == "strike") {
 175                 ignoreStrikeOut_ = value;
 176         }
 177         else if (type == "deleted") {
 178                 ignoreDeleted_ = value;
 179         }
 180 }
 181
 182 // The global variable that can be changed from outside
 183 IgnoreFormats ignoreFormats;
 184
 185
 186 void setIgnoreFormat(string const & type, bool value, bool fromUser)
 187 {
 188   ignoreFormats.setIgnoreFormat(type, value, fromUser);
 189 }
 190
 191
 192 namespace {
 193
 194 bool parse_bool(docstring & howto, bool const defvalue = false)
 195 {
 196         if (howto.empty())
 197                 return defvalue;
 198         docstring var;
 199         howto = split(howto, var, ' ');
 200         return var == "1";
 201 }
 202
 203
 204 class MatchString
 205 {
 206 public:
 207         MatchString(docstring const & s, bool cs, bool mw)
 208                 : str(s), case_sens(cs), whole_words(mw)
 209         {}
 210
 211         // returns true if the specified string is at the specified position
 212         // del specifies whether deleted strings in ct mode will be considered
 213         int operator()(Paragraph const & par, pos_type pos, bool del = true) const
 214         {
 215                 return par.find(str, case_sens, whole_words, pos, del);
 216         }
 217
 218 private:
 219         // search string
 220         docstring str;
 221         // case sensitive
 222         bool case_sens;
 223         // match whole words only
 224         bool whole_words;
 225 };
 226
 227
 228 int findForward(DocIterator & cur, DocIterator const endcur,
 229                 MatchString const & match,
 230                 bool find_del = true, bool onlysel = false)
 231 {
 232         for (; cur; cur.forwardChar()) {
 233                 if (onlysel && endcur.pit() == cur.pit()
 234                     && endcur.idx() == cur.idx() && endcur.pos() < cur.pos())
 235                         break;
 236                 if (cur.inTexted()) {
 237                         int len = match(cur.paragraph(), cur.pos(), find_del);
 238                         if (len > 0)
 239                                 return len;
 240                 }
 241         }
 242         return 0;
 243 }
 244
 245
 246 int findBackwards(DocIterator & cur, DocIterator const endcur,
 247                   MatchString const & match,
 248                   bool find_del = true, bool onlysel = false)
 249 {
 250         while (cur) {
 251                 cur.backwardChar();
 252                 if (onlysel && endcur.pit() == cur.pit()
 253                     && endcur.idx() == cur.idx() && endcur.pos() > cur.pos())
 254                         break;
 255                 if (cur.inTexted()) {
 256                         int len = match(cur.paragraph(), cur.pos(), find_del);
 257                         if (len > 0)
 258                                 return len;
 259                 }
 260         }
 261         return 0;
 262 }
 263
 264
 265 bool searchAllowed(docstring const & str)
 266 {
 267         if (str.empty()) {
 268                 frontend::Alert::error(_("Search error"), _("Search string is empty"));
 269                 return false;
 270         }
 271         return true;
 272 }
 273
 274 } // namespace
 275
 276
 277 bool findOne(BufferView * bv, docstring const & searchstr,
 278              bool case_sens, bool whole, bool forward,
 279              bool find_del, bool check_wrap, bool auto_wrap,
 280              bool instant, bool onlysel)
 281 {
 282         if (!searchAllowed(searchstr))
 283                 return false;
 284
 285         DocIterator const endcur = forward ? bv->cursor().selectionEnd() : bv->cursor().selectionBegin();
 286
 287         if (onlysel && bv->cursor().selection()) {
 288                 docstring const matchstring = bv->cursor().selectionAsString(false);
 289                 docstring const lcmatchsting = support::lowercase(matchstring);
 290                 if (matchstring == searchstr || (!case_sens && lcmatchsting == lowercase(searchstr))) {
 291                         docstring q = _("The search string matches the selection, and search is limited to selection.\n"
 292                                         "Continue search outside?");
 293                         int search_answer = frontend::Alert::prompt(_("Search outside selection?"),
 294                                 q, 0, 1, _("&Yes"), _("&No"));
 295                         if (search_answer == 0) {
 296                                 bv->clearSelection();
 297                                 if (findOne(bv, searchstr, case_sens, whole, forward,
 298                                             find_del, check_wrap, auto_wrap, false, false))
 299                                         return true;
 300                         }
 301                         return false;
 302                 }
 303         }
 304
 305         DocIterator cur = forward
 306                 ? ((instant || onlysel) ? bv->cursor().selectionBegin() : bv->cursor().selectionEnd())
 307                 : ((instant || onlysel) ? bv->cursor().selectionEnd() : bv->cursor().selectionBegin());
 308
 309         MatchString const match(searchstr, case_sens, whole);
 310
 311         int match_len = forward
 312                 ? findForward(cur, endcur, match, find_del, onlysel)
 313                 : findBackwards(cur, endcur, match, find_del, onlysel);
 314
 315         if (match_len > 0)
 316                 bv->putSelectionAt(cur, match_len, !forward);
 317         else if (onlysel) {
 318                 docstring q = _("The search string was not found within the selection.\n"
 319                                 "Continue search outside?");
 320                 int search_answer = frontend::Alert::prompt(_("Search outside selection?"),
 321                         q, 0, 1, _("&Yes"), _("&No"));
 322                 if (search_answer == 0) {
 323                         bv->clearSelection();
 324                         if (findOne(bv, searchstr, case_sens, whole, forward,
 325                                     find_del, check_wrap, auto_wrap, false, false))
 326                                 return true;
 327                 }
 328                 return false;
 329         }
 330         else if (check_wrap) {
 331                 DocIterator cur_orig(bv->cursor());
 332                 if (!auto_wrap) {
 333                         docstring q;
 334                         if (forward)
 335                                 q = _("End of file reached while searching forward.\n"
 336                                   "Continue searching from the beginning?");
 337                         else
 338                                 q = _("Beginning of file reached while searching backward.\n"
 339                                   "Continue searching from the end?");
 340                         int wrap_answer = frontend::Alert::prompt(_("Wrap search?"),
 341                                 q, 0, 1, _("&Yes"), _("&No"));
 342                         auto_wrap = wrap_answer == 0;
 343                 }
 344                 if (auto_wrap) {
 345                         if (forward) {
 346                                 bv->cursor().clear();
 347                                 bv->cursor().push_back(CursorSlice(bv->buffer().inset()));
 348                         } else {
 349                                 bv->cursor().setCursor(doc_iterator_end(&bv->buffer()));
 350                                 bv->cursor().backwardPos();
 351                         }
 352                         bv->clearSelection();
 353                         if (findOne(bv, searchstr, case_sens, whole, forward,
 354                                     find_del, false, false, false, false))
 355                                 return true;
 356                 }
 357                 bv->cursor().setCursor(cur_orig);
 358                 return false;
 359         }
 360
 361         return match_len > 0;
 362 }
 363
 364
 365 namespace {
 366
 367 int replaceAll(BufferView * bv,
 368                docstring const & searchstr, docstring const & replacestr,
 369                bool case_sens, bool whole, bool onlysel)
 370 {
 371         Buffer & buf = bv->buffer();
 372
 373         if (!searchAllowed(searchstr) || buf.isReadonly())
 374                 return 0;
 375
 376         DocIterator startcur = bv->cursor().selectionBegin();
 377         DocIterator endcur = bv->cursor().selectionEnd();
 378         bool const had_selection = bv->cursor().selection();
 379
 380         MatchString const match(searchstr, case_sens, whole);
 381         int num = 0;
 382
 383         int const rsize = replacestr.size();
 384         int const ssize = searchstr.size();
 385
 386         Cursor cur(*bv);
 387         cur.setCursor(doc_iterator_begin(&buf));
 388         int match_len = findForward(cur, endcur, match, false, onlysel);
 389         while (match_len > 0) {
 390                 // Backup current cursor position and font.
 391                 pos_type const pos = cur.pos();
 392                 Font const font = cur.paragraph().getFontSettings(buf.params(), pos);
 393                 cur.recordUndo();
 394                 int struck = ssize -
 395                         cur.paragraph().eraseChars(pos, pos + match_len,
 396                                                    buf.params().track_changes);
 397                 cur.paragraph().insert(pos, replacestr, font,
 398                                        Change(buf.params().track_changes
 399                                               ? Change::INSERTED
 400                                               : Change::UNCHANGED));
 401                 for (int i = 0; i < rsize + struck; ++i)
 402                         cur.forwardChar();
 403                 if (onlysel && cur.pit() == endcur.pit() && cur.idx() == endcur.idx()) {
 404                         // Adjust end of selection for replace-all in selection
 405                         if (rsize > ssize) {
 406                                 int const offset = rsize - ssize;
 407                                 for (int i = 0; i < offset + struck; ++i)
 408                                         endcur.forwardPos();
 409                         } else {
 410                                 int const offset = ssize - rsize;
 411                                 for (int i = 0; i < offset + struck; ++i)
 412                                         endcur.backwardPos();
 413                         }
 414                 }
 415                 ++num;
 416                 match_len = findForward(cur, endcur, match, false, onlysel);
 417         }
 418
 419         bv->putSelectionAt(doc_iterator_begin(&buf), 0, false);
 420
 421         startcur.fixIfBroken();
 422         bv->setCursor(startcur);
 423
 424         // Reset selection, accounting for changes in selection
 425         if (had_selection) {
 426                 endcur.fixIfBroken();
 427                 bv->cursor().resetAnchor();
 428                 bv->setCursorSelectionTo(endcur);
 429         }
 430
 431         return num;
 432 }
 433
 434
 435 // the idea here is that we are going to replace the string that
 436 // is selected IF it is the search string.
 437 // if there is a selection, but it is not the search string, then
 438 // we basically ignore it. (FIXME We ought to replace only within
 439 // the selection.)
 440 // if there is no selection, then:
 441 //  (i) if some search string has been provided, then we find it.
 442 //      (think of how the dialog works when you hit "replace" the
 443 //      first time.)
 444 // (ii) if no search string has been provided, then we treat the
 445 //      word the cursor is in as the search string. (why? i have no
 446 //      idea.) but this only works in text?
 447 //
 448 // returns the number of replacements made (one, if any) and
 449 // whether anything at all was done.
 450 pair<bool, int> replaceOne(BufferView * bv, docstring searchstr,
 451                            docstring const & replacestr, bool case_sens,
 452                            bool whole, bool forward, bool findnext, bool wrap,
 453                            bool onlysel)
 454 {
 455         Cursor & cur = bv->cursor();
 456         if (!cur.selection() || onlysel) {
 457                 // no selection, non-empty search string: find it
 458                 if (!searchstr.empty()) {
 459                         bool const found = findOne(bv, searchstr, case_sens, whole,
 460                                                    forward, true, findnext, wrap, false, onlysel);
 461                         return make_pair(found, 0);
 462                 }
 463                 // empty search string
 464                 if (!cur.inTexted())
 465                         // bail in math
 466                         return make_pair(false, 0);
 467                 // select current word and treat it as the search string.
 468                 // This causes a minor bug as undo will restore this selection,
 469                 // which the user did not create (#8986).
 470                 cur.innerText()->selectWord(cur, WHOLE_WORD);
 471                 searchstr = cur.selectionAsString(false, true);
 472         }
 473
 474         // if we still don't have a search string, report the error
 475         // and abort.
 476         if (!searchAllowed(searchstr))
 477                 return make_pair(false, 0);
 478
 479         bool have_selection = cur.selection();
 480         docstring const selected = cur.selectionAsString(false, true);
 481         bool match =
 482                 case_sens
 483                 ? searchstr == selected
 484                 : compare_no_case(searchstr, selected) == 0;
 485
 486         // no selection or current selection is not search word:
 487         // just find the search word
 488         if (!have_selection || !match) {
 489                 bool const found = findOne(bv, searchstr, case_sens, whole, forward,
 490                                            true, findnext, wrap, false, onlysel);
 491                 return make_pair(found, 0);
 492         }
 493
 494         // we're now actually ready to replace. if the buffer is
 495         // read-only, we can't, though.
 496         if (bv->buffer().isReadonly())
 497                 return make_pair(false, 0);
 498
 499         cap::replaceSelectionWithString(cur, replacestr);
 500         if (forward) {
 501                 cur.pos() += replacestr.length();
 502                 LASSERT(cur.pos() <= cur.lastpos(),
 503                         cur.pos() = cur.lastpos());
 504         }
 505         if (findnext)
 506                 findOne(bv, searchstr, case_sens, whole,
 507                         forward, false, findnext, wrap, false, onlysel);
 508
 509         return make_pair(true, 1);
 510 }
 511
 512 } // namespace
 513
 514
 515 docstring const find2string(docstring const & search,
 516                             bool casesensitive, bool matchword,
 517                             bool forward, bool wrap, bool instant,
 518                             bool onlysel)
 519 {
 520         odocstringstream ss;
 521         ss << search << '\n'
 522            << int(casesensitive) << ' '
 523            << int(matchword) << ' '
 524            << int(forward) << ' '
 525            << int(wrap) << ' '
 526            << int(instant) << ' '
 527            << int(onlysel);
 528         return ss.str();
 529 }
 530
 531
 532 docstring const replace2string(docstring const & replace,
 533                                docstring const & search,
 534                                bool casesensitive, bool matchword,
 535                                bool all, bool forward, bool findnext,
 536                                bool wrap, bool onlysel)
 537 {
 538         odocstringstream ss;
 539         ss << replace << '\n'
 540            << search << '\n'
 541            << int(casesensitive) << ' '
 542            << int(matchword) << ' '
 543            << int(all) << ' '
 544            << int(forward) << ' '
 545            << int(findnext) << ' '
 546            << int(wrap) << ' '
 547            << int(onlysel);
 548         return ss.str();
 549 }
 550
 551
 552 docstring const string2find(docstring const & argument,
 553                               bool &casesensitive,
 554                               bool &matchword,
 555                               bool &forward,
 556                               bool &wrap,
 557                               bool &instant,
 558                               bool &onlysel)
 559 {
 560         // data is of the form
 561         // "<search>
 562         //  <casesensitive> <matchword> <forward> <wrap> <onlysel>"
 563         docstring search;
 564         docstring howto = split(argument, search, '\n');
 565
 566         casesensitive = parse_bool(howto);
 567         matchword     = parse_bool(howto);
 568         forward       = parse_bool(howto, true);
 569         wrap          = parse_bool(howto);
 570         instant       = parse_bool(howto);
 571         onlysel       = parse_bool(howto);
 572
 573         return search;
 574 }
 575
 576
 577 bool lyxfind(BufferView * bv, FuncRequest const & ev)
 578 {
 579         if (!bv || ev.action() != LFUN_WORD_FIND)
 580                 return false;
 581
 582         //lyxerr << "find called, cmd: " << ev << endl;
 583         bool casesensitive;
 584         bool matchword;
 585         bool forward;
 586         bool wrap;
 587         bool instant;
 588         bool onlysel;
 589
 590         docstring search = string2find(ev.argument(), casesensitive,
 591                                        matchword, forward, wrap, instant, onlysel);
 592
 593         return findOne(bv, search, casesensitive, matchword, forward,
 594                        false, true, wrap, instant, onlysel);
 595 }
 596
 597
 598 bool lyxreplace(BufferView * bv, FuncRequest const & ev)
 599 {
 600         if (!bv || ev.action() != LFUN_WORD_REPLACE)
 601                 return false;
 602
 603         // data is of the form
 604         // "<search>
 605         //  <replace>
 606         //  <casesensitive> <matchword> <all> <forward> <findnext> <wrap> <onlysel>"
 607         docstring search;
 608         docstring rplc;
 609         docstring howto = split(ev.argument(), rplc, '\n');
 610         howto = split(howto, search, '\n');
 611
 612         bool casesensitive = parse_bool(howto);
 613         bool matchword     = parse_bool(howto);
 614         bool all           = parse_bool(howto);
 615         bool forward       = parse_bool(howto, true);
 616         bool findnext      = parse_bool(howto, true);
 617         bool wrap          = parse_bool(howto);
 618         bool onlysel       = parse_bool(howto);
 619
 620         if (!bv->cursor().selection())
 621                 // only selection only makes sense with selection
 622                 onlysel = false;
 623
 624         bool update = false;
 625
 626         int replace_count = 0;
 627         if (all) {
 628                 replace_count = replaceAll(bv, search, rplc, casesensitive,
 629                                            matchword, onlysel);
 630                 update = replace_count > 0;
 631         } else {
 632                 pair<bool, int> rv =
 633                         replaceOne(bv, search, rplc, casesensitive, matchword,
 634                                    forward, findnext, wrap, onlysel);
 635                 update = rv.first;
 636                 replace_count = rv.second;
 637         }
 638
 639         Buffer const & buf = bv->buffer();
 640         if (!update) {
 641                 // emit message signal.
 642                 if (onlysel)
 643                         buf.message(_("String not found in selection."));
 644                 else
 645                         buf.message(_("String not found."));
 646         } else {
 647                 if (replace_count == 0) {
 648                         buf.message(_("String found."));
 649                 } else if (replace_count == 1) {
 650                         buf.message(_("String has been replaced."));
 651                 } else {
 652                         docstring const str = onlysel
 653                                         ? bformat(_("%1$d strings have been replaced in the selection."), replace_count)
 654                                         : bformat(_("%1$d strings have been replaced."), replace_count);
 655                         buf.message(str);
 656                 }
 657         }
 658         return update;
 659 }
 660
 661
 662 bool findNextChange(BufferView * bv, Cursor & cur, bool const check_wrap)
 663 {
 664         for (; cur; cur.forwardPos())
 665                 if (cur.inTexted() && cur.paragraph().isChanged(cur.pos()))
 666                         return true;
 667
 668         if (check_wrap) {
 669                 DocIterator cur_orig(bv->cursor());
 670                 docstring q = _("End of file reached while searching forward.\n"
 671                           "Continue searching from the beginning?");
 672                 int wrap_answer = frontend::Alert::prompt(_("Wrap search?"),
 673                         q, 0, 1, _("&Yes"), _("&No"));
 674                 if (wrap_answer == 0) {
 675                         bv->cursor().clear();
 676                         bv->cursor().push_back(CursorSlice(bv->buffer().inset()));
 677                         bv->clearSelection();
 678                         cur.setCursor(bv->cursor().selectionBegin());
 679                         if (findNextChange(bv, cur, false))
 680                                 return true;
 681                 }
 682                 bv->cursor().setCursor(cur_orig);
 683         }
 684
 685         return false;
 686 }
 687
 688
 689 bool findPreviousChange(BufferView * bv, Cursor & cur, bool const check_wrap)
 690 {
 691         for (cur.backwardPos(); cur; cur.backwardPos()) {
 692                 if (cur.inTexted() && cur.paragraph().isChanged(cur.pos()))
 693                         return true;
 694         }
 695
 696         if (check_wrap) {
 697                 DocIterator cur_orig(bv->cursor());
 698                 docstring q = _("Beginning of file reached while searching backward.\n"
 699                           "Continue searching from the end?");
 700                 int wrap_answer = frontend::Alert::prompt(_("Wrap search?"),
 701                         q, 0, 1, _("&Yes"), _("&No"));
 702                 if (wrap_answer == 0) {
 703                         bv->cursor().setCursor(doc_iterator_end(&bv->buffer()));
 704                         bv->cursor().backwardPos();
 705                         bv->clearSelection();
 706                         cur.setCursor(bv->cursor().selectionBegin());
 707                         if (findPreviousChange(bv, cur, false))
 708                                 return true;
 709                 }
 710                 bv->cursor().setCursor(cur_orig);
 711         }
 712
 713         return false;
 714 }
 715
 716
 717 bool selectChange(Cursor & cur, bool forward)
 718 {
 719         if (!cur.inTexted() || !cur.paragraph().isChanged(cur.pos()))
 720                 return false;
 721         Change ch = cur.paragraph().lookupChange(cur.pos());
 722
 723         CursorSlice tip1 = cur.top();
 724         for (; tip1.pit() < tip1.lastpit() || tip1.pos() < tip1.lastpos(); tip1.forwardPos()) {
 725                 Change ch2 = tip1.paragraph().lookupChange(tip1.pos());
 726                 if (!ch2.isSimilarTo(ch))
 727                         break;
 728         }
 729         CursorSlice tip2 = cur.top();
 730         for (; tip2.pit() > 0 || tip2.pos() > 0;) {
 731                 tip2.backwardPos();
 732                 Change ch2 = tip2.paragraph().lookupChange(tip2.pos());
 733                 if (!ch2.isSimilarTo(ch)) {
 734                         // take a step forward to correctly set the selection
 735                         tip2.forwardPos();
 736                         break;
 737                 }
 738         }
 739         if (forward)
 740                 swap(tip1, tip2);
 741         cur.top() = tip1;
 742         cur.bv().mouseSetCursor(cur, false);
 743         cur.top() = tip2;
 744         cur.bv().mouseSetCursor(cur, true);
 745         return true;
 746 }
 747
 748
 749 namespace {
 750
 751
 752 bool findChange(BufferView * bv, bool forward)
 753 {
 754         Cursor cur(*bv);
 755         cur.setCursor(forward ? bv->cursor().selectionEnd()
 756                       : bv->cursor().selectionBegin());
 757         forward ? findNextChange(bv, cur, true) : findPreviousChange(bv, cur, true);
 758         return selectChange(cur, forward);
 759 }
 760
 761 } // namespace
 762
 763 bool findNextChange(BufferView * bv)
 764 {
 765         return findChange(bv, true);
 766 }
 767
 768
 769 bool findPreviousChange(BufferView * bv)
 770 {
 771         return findChange(bv, false);
 772 }
 773
 774
 775
 776 namespace {
 777
 778 typedef vector<pair<string, string> > Escapes;
 779
 780 string string2regex(string in)
 781 {
 782         static std::regex specialChars { R"([-[\]{}()*+?.,\^$|#\s\$\\])" };
 783         string temp = std::regex_replace(in, specialChars,  R"(\$&)" );
 784         string temp2("");
 785         size_t lastpos = 0;
 786         size_t fl_pos = 0;
 787         int offset = 1;
 788         while (fl_pos < temp.size()) {
 789                 fl_pos = temp.find("\\\\foreignlanguage", lastpos + offset);
 790                 if (fl_pos == string::npos)
 791                         break;
 792                 offset = 16;
 793                 temp2 += temp.substr(lastpos, fl_pos - lastpos);
 794                 temp2 += "\\n";
 795                 lastpos = fl_pos;
 796         }
 797         if (lastpos == 0)
 798                 return(temp);
 799         if (lastpos < temp.size()) {
 800                 temp2 += temp.substr(lastpos, temp.size() - lastpos);
 801         }
 802         return temp2;
 803 }
 804
 805 string correctRegex(string t, bool withformat)
 806 {
 807         /* Convert \backslash => \
 808          * and \{, \}, \[, \] => {, }, [, ]
 809          */
 810         string s("");
 811         regex wordre("(\\\\)*(\\\\((backslash|mathcircumflex) ?|[\\[\\]\\{\\}]))");
 812         size_t lastpos = 0;
 813         smatch sub;
 814         bool backslashed = false;
 815         for (sregex_iterator it(t.begin(), t.end(), wordre), end; it != end; ++it) {
 816                 sub = *it;
 817                 string replace;
 818                 if ((sub.position(2) - sub.position(0)) % 2 == 1) {
 819                         continue;
 820                 }
 821                 else {
 822                         if (sub.str(4) == "backslash") {
 823                                 replace = "\\";
 824                                 if (withformat) {
 825                                         // transforms '\backslash \{' into '\{'
 826                                         // and '\{' into '{'
 827                                         string next = t.substr(sub.position(2) + sub.str(2).length(), 2);
 828                                         if ((next == "\\{") || (next == "\\}")) {
 829                                                 replace = "";
 830                                                 backslashed = true;
 831                                         }
 832                                 }
 833                         }
 834                         else if (sub.str(4) == "mathcircumflex")
 835                                 replace = "^";
 836                         else if (backslashed) {
 837                                 backslashed = false;
 838                                 if (withformat && (sub.str(3) == "{"))
 839                                         replace = accents["braceleft"];
 840                                 else if (withformat && (sub.str(3) == "}"))
 841                                         replace = accents["braceright"];
 842                                 else {
 843                                         // else part should not exist
 844                                         LASSERT(1, /**/);
 845                                 }
 846                         }
 847                         else
 848                                 replace = sub.str(3);
 849                 }
 850                 if (lastpos < (size_t) sub.position(2))
 851                         s += t.substr(lastpos, sub.position(2) - lastpos);
 852                 s += replace;
 853                 lastpos = sub.position(2) + sub.length(2);
 854         }
 855         if (lastpos == 0)
 856                 return t;
 857         else if (lastpos < t.length())
 858                 s += t.substr(lastpos, t.length() - lastpos);
 859         return s;
 860 }
 861
 862 /// Within \regexp{} apply get_lyx_unescapes() only (i.e., preserve regexp semantics of the string),
 863 /// while outside apply get_lyx_unescapes()+get_regexp_escapes().
 864 /// If match_latex is true, then apply regexp_latex_escapes() to \regexp{} contents as well.
 865 string escape_for_regex(string s, bool withformat)
 866 {
 867         size_t lastpos = 0;
 868         string result = "";
 869         while (lastpos < s.size()) {
 870                 size_t regex_pos = s.find("\\regexp{", lastpos);
 871                 if (regex_pos == string::npos) {
 872                         regex_pos = s.size();
 873                 }
 874                 if (regex_pos > lastpos) {
 875                         result += string2regex(s.substr(lastpos, regex_pos-lastpos));
 876                         lastpos = regex_pos;
 877                         if (lastpos == s.size())
 878                                 break;
 879                 }
 880                 size_t end_pos = s.find("\\endregexp{}}", regex_pos + 8);
 881                 result += correctRegex(s.substr(regex_pos + 8, end_pos -(regex_pos + 8)), withformat);
 882                 lastpos = end_pos + 13;
 883         }
 884         return result;
 885 }
 886
 887
 888 /// Wrapper for lyx::regex_replace with simpler interface
 889 bool regex_replace(string const & s, string & t, string const & searchstr,
 890                    string const & replacestr)
 891 {
 892         regex e(searchstr, regex_constants::ECMAScript);
 893         ostringstream oss;
 894         ostream_iterator<char, char> it(oss);
 895         regex_replace(it, s.begin(), s.end(), e, replacestr);
 896         // tolerate t and s be references to the same variable
 897         bool rv = (s != oss.str());
 898         t = oss.str();
 899         return rv;
 900 }
 901
 902 class MatchResult {
 903 public:
 904         enum range {
 905                 newIsTooFar,
 906                 newIsBetter,
 907                 newIsInvalid
 908         };
 909         int match_len;
 910         int match_prefix;
 911         int match2end;
 912         int pos;
 913         int leadsize;
 914         int pos_len;
 915         int searched_size;
 916         vector <string> result = vector <string>();
 917         MatchResult(int len = 0): match_len(len),match_prefix(0),match2end(0), pos(0),leadsize(0),pos_len(-1),searched_size(0) {};
 918 };
 919
 920 static MatchResult::range interpretMatch(MatchResult &oldres, MatchResult &newres)
 921 {
 922   if (newres.match2end < oldres.match2end)
 923     return MatchResult::newIsTooFar;
 924   if (newres.match_len < oldres.match_len)
 925     return MatchResult::newIsTooFar;
 926
 927   if (newres.match_len == oldres.match_len) {
 928     if (newres.match2end == oldres.match2end)
 929       return MatchResult::newIsBetter;
 930   }
 931   return MatchResult::newIsInvalid;
 932 }
 933
 934 /** The class performing a match between a position in the document and the FindAdvOptions.
 935  **/
 936
 937 class MatchStringAdv {
 938 public:
 939         MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions & opt);
 940
 941         /** Tests if text starting at the supplied position matches with the one provided to the MatchStringAdv
 942          ** constructor as opt.search, under the opt.* options settings.
 943          **
 944          ** @param at_begin
 945          **     If set, then match is searched only against beginning of text starting at cur.
 946          **     If unset, then match is searched anywhere in text starting at cur.
 947          **
 948          ** @return
 949          ** The length of the matching text, or zero if no match was found.
 950          **/
 951         MatchResult operator()(DocIterator const & cur, int len = -1, bool at_begin = true) const;
 952 #if QTSEARCH
 953         bool regexIsValid;
 954         string regexError;
 955 #endif
 956
 957 public:
 958         /// buffer
 959         lyx::Buffer * p_buf;
 960         /// first buffer on which search was started
 961         lyx::Buffer * const p_first_buf;
 962         /// options
 963         FindAndReplaceOptions const & opt;
 964
 965 private:
 966         /// Auxiliary find method (does not account for opt.matchword)
 967         MatchResult findAux(DocIterator const & cur, int len = -1, bool at_begin = true) const;
 968         void CreateRegexp(FindAndReplaceOptions const & opt, string regexp_str, string regexp2_str, string par_as_string = "");
 969
 970         /** Normalize a stringified or latexified LyX paragraph.
 971          **
 972          ** Normalize means:
 973          ** <ul>
 974          **   <li>if search is not casesensitive, then lowercase the string;
 975          **   <li>remove any newline at begin or end of the string;
 976          **   <li>replace any newline in the middle of the string with a simple space;
 977          **   <li>remove stale empty styles and environments, like \emph{} and \textbf{}.
 978          ** </ul>
 979          **
 980          ** @todo Normalization should also expand macros, if the corresponding
 981          ** search option was checked.
 982          **/
 983         string normalize(docstring const & s) const;
 984         // normalized string to search
 985         string par_as_string;
 986         // regular expression to use for searching
 987         // regexp2 is same as regexp, but prefixed with a ".*?"
 988 #if QTSEARCH
 989         QRegularExpression regexp;
 990         QRegularExpression regexp2;
 991 #else
 992         regex regexp;
 993         regex regexp2;
 994 #endif
 995         // leading format material as string
 996         string lead_as_string;
 997         // par_as_string after removal of lead_as_string
 998         string par_as_string_nolead;
 999         // unmatched open braces in the search string/regexp
1000         int open_braces;
1001         // number of (.*?) subexpressions added at end of search regexp for closing
1002         // environments, math mode, styles, etc...
1003         int close_wildcards;
1004 public:
1005         // Are we searching with regular expressions ?
1006         bool use_regexp;
1007         static int valid_matches;
1008         static vector <string> matches;
1009         void FillResults(MatchResult &found_mr);
1010 };
1011
1012 int MatchStringAdv::valid_matches = 0;
1013 vector <string> MatchStringAdv::matches = vector <string>(10);
1014
1015 void MatchStringAdv::FillResults(MatchResult &found_mr)
1016 {
1017   if (found_mr.match_len > 0) {
1018     valid_matches = found_mr.result.size();
1019     for (size_t i = 0; i < found_mr.result.size(); i++)
1020       matches[i] = found_mr.result[i];
1021   }
1022   else
1023     valid_matches = 0;
1024 }
1025
1026 static docstring buffer_to_latex(Buffer & buffer)
1027 {
1028         //OutputParams runparams(&buffer.params().encoding());
1029         OutputParams runparams(encodings.fromLyXName("utf8"));
1030         odocstringstream ods;
1031         otexstream os(ods);
1032         runparams.nice = true;
1033         runparams.flavor = Flavor::XeTeX;
1034         runparams.linelen = 10000; //lyxrc.plaintext_linelen;
1035         // No side effect of file copying and image conversion
1036         runparams.dryrun = true;
1037         if (ignoreFormats.getDeleted())
1038                 runparams.for_searchAdv = OutputParams::SearchWithoutDeleted;
1039         else
1040                 runparams.for_searchAdv = OutputParams::SearchWithDeleted;
1041         pit_type const endpit = buffer.paragraphs().size();
1042         for (pit_type pit = 0; pit != endpit; ++pit) {
1043                 TeXOnePar(buffer, buffer.text(), pit, os, runparams);
1044                 LYXERR(Debug::FIND, "searchString up to here: " << ods.str());
1045         }
1046         return ods.str();
1047 }
1048
1049
1050 static docstring stringifySearchBuffer(Buffer & buffer, FindAndReplaceOptions const & opt)
1051 {
1052         docstring str;
1053         if (!opt.ignoreformat) {
1054                 str = buffer_to_latex(buffer);
1055         } else {
1056                 // OutputParams runparams(&buffer.params().encoding());
1057                 OutputParams runparams(encodings.fromLyXName("utf8"));
1058                 runparams.nice = true;
1059                 runparams.flavor = Flavor::XeTeX;
1060                 runparams.linelen = 10000; //lyxrc.plaintext_linelen;
1061                 runparams.dryrun = true;
1062                 int option = AS_STR_INSETS |AS_STR_PLAINTEXT;
1063                 if (ignoreFormats.getDeleted()) {
1064                         option |= AS_STR_SKIPDELETE;
1065                         runparams.for_searchAdv = OutputParams::SearchWithoutDeleted;
1066                 }
1067                 else {
1068                         runparams.for_searchAdv = OutputParams::SearchWithDeleted;
1069                 }
1070                 for (pos_type pit = pos_type(0); pit < (pos_type)buffer.paragraphs().size(); ++pit) {
1071                         Paragraph const & par = buffer.paragraphs().at(pit);
1072                         LYXERR(Debug::FIND, "Adding to search string: '"
1073                                << par.asString(pos_type(0), par.size(),
1074                                                option,
1075                                                &runparams)
1076                                << "'");
1077                         str += par.asString(pos_type(0), par.size(),
1078                                             option,
1079                                             &runparams);
1080                 }
1081                 // Even in ignore-format we have to remove "\text{}, \lyxmathsym{}" parts
1082                 string t = to_utf8(str);
1083                 while (regex_replace(t, t, "\\\\(text|lyxmathsym|ensuremath)\\{([^\\}]*)\\}", "$2"));
1084                 str = from_utf8(t);
1085         }
1086         return str;
1087 }
1088
1089
1090 /// Return separation pos between the leading material and the rest
1091 static size_t identifyLeading(string const & s)
1092 {
1093         string t = s;
1094         // @TODO Support \item[text]
1095         // Kornel: Added textsl, textsf, textit, texttt and noun
1096         // + allow to search for colored text too
1097         while (regex_replace(t, t, "^\\\\(("
1098                              "(author|title|subtitle|subject|publishers|dedication|uppertitleback|lowertitleback|extratitle|"
1099                                "lyxaddress|lyxrightaddress|"
1100                                "footnotesize|tiny|scriptsize|small|large|Large|LARGE|huge|Huge|"
1101                                "emph|noun|minisec|text(bf|md|sl|sf|it|tt))|"
1102                              "((textcolor|foreignlanguage|latexenvironment)\\{[a-z]+\\*?\\})|"
1103                              "(u|uu)line|(s|x)out|uwave)|((sub)?(((sub)?section)|paragraph)|part|chapter)\\*?)\\{", "")
1104                || regex_replace(t, t, "^\\$", "")
1105                || regex_replace(t, t, "^\\\\\\[", "")
1106                || regex_replace(t, t, "^ ?\\\\item\\{[a-z]+\\}", "")
1107                || regex_replace(t, t, "^\\\\begin\\{[a-zA-Z_]*\\*?\\}", ""))
1108                ;
1109         LYXERR(Debug::FIND, "  after removing leading $, \\[ , \\emph{, \\textbf{, etc.: '" << t << "'");
1110         return s.find(t);
1111 }
1112
1113 /*
1114  * Given a latexified string, retrieve some handled features
1115  * The features of the regex will later be compared with the features
1116  * of the searched text. If the regex features are not a
1117  * subset of the analized, then, in not format ignoring search
1118  * we can early stop the search in the relevant inset.
1119  */
1120 typedef map<string, bool> Features;
1121
1122 static Features identifyFeatures(string const & s)
1123 {
1124         static regex const feature("\\\\(([a-zA-Z]+(\\{([a-z]+\\*?)\\}|\\*)?))\\{");
1125         static regex const valid("^("
1126                 "("
1127                         "(footnotesize|tiny|scriptsize|small|large|Large|LARGE|huge|Huge|"
1128                                 "emph|noun|text(bf|md|sl|sf|it|tt)|"
1129                                 "(textcolor|foreignlanguage|item|listitem|latexenvironment)\\{[a-z]+\\*?\\})|"
1130                         "(u|uu)line|(s|x)out|uwave|"
1131                         "(sub|extra)?title|author|subject|publishers|dedication|(upper|lower)titleback|lyx(right)?address)|"
1132                 "((sub)?(((sub)?section)|paragraph)|part|chapter|lyxslide)\\*?)$");
1133         smatch sub;
1134         bool displ = true;
1135         Features info;
1136
1137         for (sregex_iterator it(s.begin(), s.end(), feature), end; it != end; ++it) {
1138                 sub = *it;
1139                 if (displ) {
1140                         if (sub.str(1).compare("regexp") == 0) {
1141                                 displ = false;
1142                                 continue;
1143                         }
1144                         string token = sub.str(1);
1145                         smatch sub2;
1146                         if (regex_match(token, sub2, valid)) {
1147                                 info[token] = true;
1148                         }
1149                         else {
1150                                 // ignore
1151                         }
1152                 }
1153                 else {
1154                         if (sub.str(1).compare("endregexp") == 0) {
1155                                 displ = true;
1156                                 continue;
1157                         }
1158                 }
1159         }
1160         return info;
1161 }
1162
1163 /*
1164  * defines values features of a key "\\[a-z]+{"
1165  */
1166 class KeyInfo {
1167  public:
1168   enum KeyType {
1169     /* Char type with content discarded
1170      * like \hspace{1cm} */
1171     noContent,
1172     /* Char, like \backslash */
1173     isChar,
1174     /* replace starting backslash with '#' */
1175     isText,
1176     /* \part, \section*, ... */
1177     isSectioning,
1178     /* title, author etc */
1179     isTitle,
1180     /* \foreignlanguage{ngerman}, ... */
1181     isMain,
1182     /* inside \code{}
1183      * to discard language in content */
1184     noMain,
1185     isRegex,
1186     /* \begin{eqnarray}...\end{eqnarray}, ... $...$ */
1187     isMath,
1188     /* fonts, colors, markups, ... */
1189     isStandard,
1190     /* footnotesize, ... large, ...
1191      * Ignore all of them */
1192     isSize,
1193     invalid,
1194     /* inputencoding, ...
1195      * Discard also content, because they do not help in search */
1196     doRemove,
1197     /* twocolumns, ...
1198      * like remove, but also all arguments */
1199     removeWithArg,
1200     /* item, listitem */
1201     isList,
1202     /* tex, latex, ... like isChar */
1203     isIgnored,
1204     /* like \lettrine[lines=5]{}{} */
1205     cleanToStart,
1206     // like isStandard, but always remove head
1207     headRemove,
1208     /* End of arguments marker for lettrine,
1209      * so that they can be ignored */
1210     endArguments
1211   };
1212  KeyInfo() = default;
1213  KeyInfo(KeyType type, int parcount, bool disable)
1214    : keytype(type),
1215     parenthesiscount(parcount),
1216     disabled(disable) {}
1217   KeyType keytype = invalid;
1218   string head;
1219   int _tokensize = -1;
1220   int _tokenstart = -1;
1221   int _dataStart = -1;
1222   int _dataEnd = -1;
1223   int parenthesiscount = 1;
1224   bool disabled = false;
1225   bool used = false;                    /* by pattern */
1226 };
1227
1228 class Border {
1229  public:
1230  Border(int l=0, int u=0) : low(l), upper(u) {};
1231   int low;
1232   int upper;
1233 };
1234
1235 #define MAXOPENED 30
1236 class Intervall {
1237   bool isPatternString_;
1238 public:
1239   explicit Intervall(bool isPattern, string const & p) :
1240         isPatternString_(isPattern), par(p), ignoreidx(-1), actualdeptindex(0),
1241         hasTitle(false), langcount(0)
1242   {
1243     depts[0] = 0;
1244     closes[0] = 0;
1245   }
1246
1247   string par;
1248   int ignoreidx;
1249   static vector<Border> borders;
1250   int depts[MAXOPENED];
1251   int closes[MAXOPENED];
1252   int actualdeptindex;
1253   int previousNotIgnored(int) const;
1254   int nextNotIgnored(int) const;
1255   void handleOpenP(int i);
1256   void handleCloseP(int i, bool closingAllowed);
1257   void resetOpenedP(int openPos);
1258   void addIntervall(int upper);
1259   void addIntervall(int low, int upper); /* if explicit */
1260   void removeAccents();
1261   void setForDefaultLang(KeyInfo const & defLang) const;
1262   int findclosing(int start, int end, char up, char down, int repeat);
1263   void handleParentheses(int lastpos, bool closingAllowed);
1264   bool hasTitle;
1265   int langcount;        // Number of disabled language specs up to current position in actual interval
1266   int isOpeningPar(int pos) const;
1267   string titleValue;
1268   void output(ostringstream &os, int lastpos);
1269   // string show(int lastpos);
1270 };
1271
1272 vector<Border> Intervall::borders = vector<Border>(30);
1273
1274 int Intervall::isOpeningPar(int pos) const
1275 {
1276   if ((pos < 0) || (size_t(pos) >= par.size()))
1277     return 0;
1278   if (par[pos] != '{')
1279     return 0;
1280   if (size_t(pos) + 2 >= par.size())
1281     return 1;
1282   if (par[pos+2] != '}')
1283     return 1;
1284   if (par[pos+1] == '[' || par[pos+1] == ']')
1285     return 3;
1286   return 1;
1287 }
1288
1289 void Intervall::setForDefaultLang(KeyInfo const & defLang) const
1290 {
1291   // Enable the use of first token again
1292   if (ignoreidx >= 0) {
1293     int value = defLang._tokenstart + defLang._tokensize;
1294     int borderidx = 0;
1295     if (hasTitle) {
1296       borderidx = 1;
1297     }
1298     if (value > 0) {
1299       if (borders[borderidx].low < value)
1300         borders[borderidx].low = value;
1301       if (borders[borderidx].upper < value)
1302         borders[borderidx].upper = value;
1303     }
1304   }
1305 }
1306
1307 static void checkDepthIndex(int val)
1308 {
1309   static int maxdepthidx = MAXOPENED-2;
1310   static int lastmaxdepth = 0;
1311   if (val > lastmaxdepth) {
1312     LYXERR(Debug::INFO, "Depth reached " << val);
1313     lastmaxdepth = val;
1314   }
1315   if (val > maxdepthidx) {
1316     maxdepthidx = val;
1317     LYXERR(Debug::INFO, "maxdepthidx now " << val);
1318   }
1319 }
1320
1321 #if 0
1322 // Not needed, because borders are now dynamically expanded
1323 static void checkIgnoreIdx(int val)
1324 {
1325   static int lastmaxignore = -1;
1326   if ((lastmaxignore < val) && (size_t(val+1) >= borders.size())) {
1327     LYXERR(Debug::INFO, "IgnoreIdx reached " << val);
1328     lastmaxignore = val;
1329   }
1330 }
1331 #endif
1332
1333 /*
1334  * Expand the region of ignored parts of the input latex string
1335  * The region is only relevant in output()
1336  */
1337 void Intervall::addIntervall(int low, int upper)
1338 {
1339   int idx;
1340   if (low == upper) return;
1341   for (idx = ignoreidx+1; idx > 0; --idx) {
1342     if (low > borders[idx-1].upper) {
1343       break;
1344     }
1345   }
1346   Border br(low, upper);
1347   if (idx > ignoreidx) {
1348     if (borders.size() <= size_t(idx)) {
1349       borders.push_back(br);
1350     }
1351     else {
1352       borders[idx] = br;
1353     }
1354     ignoreidx = idx;
1355     // checkIgnoreIdx(ignoreidx);
1356     return;
1357   }
1358   else {
1359     // Expand only if one of the new bound is inside the interwall
1360     // We know here that br.low > borders[idx-1].upper
1361     if (br.upper < borders[idx].low) {
1362       // We have to insert at this pos
1363       if (size_t(ignoreidx+1) >= borders.size()) {
1364         borders.push_back(borders[ignoreidx]);
1365       }
1366       else {
1367         borders[ignoreidx+1] = borders[ignoreidx];
1368       }
1369       for (int i = ignoreidx; i > idx; --i) {
1370         borders[i] = borders[i-1];
1371       }
1372       borders[idx] = br;
1373       ignoreidx += 1;
1374       // checkIgnoreIdx(ignoreidx);
1375       return;
1376     }
1377     // Here we know, that we are overlapping
1378     if (br.low > borders[idx].low)
1379       br.low = borders[idx].low;
1380     // check what has to be concatenated
1381     int count = 0;
1382     for (int i = idx; i <= ignoreidx; i++) {
1383       if (br.upper >= borders[i].low) {
1384         count++;
1385         if (br.upper < borders[i].upper)
1386           br.upper = borders[i].upper;
1387       }
1388       else {
1389         break;
1390       }
1391     }
1392     // count should be >= 1 here
1393     borders[idx] = br;
1394     if (count > 1) {
1395       for (int i = idx + count; i <= ignoreidx; i++) {
1396         borders[i-count+1] = borders[i];
1397       }
1398       ignoreidx -= count - 1;
1399       return;
1400     }
1401   }
1402 }
1403
1404 static void buildaccent(string n, string param, string values)
1405 {
1406   stringstream s(n);
1407   string name;
1408   const char delim = '|';
1409   while (getline(s, name, delim)) {
1410     size_t start = 0;
1411     for (char c : param) {
1412       string key = name + "{" + c + "}";
1413       // get the corresponding utf8-value
1414       if ((values[start] & 0xc0) != 0xc0) {
1415         // should not happen, utf8 encoding starts at least with 11xxxxxx
1416         // but value for '\dot{i}' is 'i', which is ascii
1417         if ((values[start] & 0x80) == 0) {
1418           // is ascii
1419           accents[key] = values.substr(start, 1);
1420           // LYXERR(Debug::INFO, "" << key << "=" << accents[key]);
1421         }
1422         start++;
1423         continue;
1424       }
1425       for (int j = 1; ;j++) {
1426         if (start + j >= values.size()) {
1427           accents[key] = values.substr(start, j);
1428           start = values.size() - 1;
1429           break;
1430         }
1431         else if ((values[start+j] & 0xc0) != 0x80) {
1432           // This is the first byte of following utf8 char
1433           accents[key] = values.substr(start, j);
1434           start += j;
1435           // LYXERR(Debug::INFO, "" << key << "=" << accents[key]);
1436           break;
1437         }
1438       }
1439     }
1440   }
1441 }
1442
1443 // Helper function
1444 static string getutf8(unsigned uchar)
1445 {
1446         #define maxc 5
1447         string ret = string();
1448         char c[maxc] = {0};
1449         if (uchar <= 0x7f) {
1450                 c[maxc-1] = uchar & 0x7f;
1451         }
1452         else {
1453                 unsigned char rest = 0x40;
1454                 unsigned char first = 0x80;
1455                 int start = maxc-1;
1456                 for (int i = start; i >=0; --i) {
1457                         if (uchar < rest) {
1458                                 c[i] = first + uchar;
1459                                 break;
1460                         }
1461                         c[i] = 0x80 | (uchar &  0x3f);
1462                         uchar >>= 6;
1463                         rest >>= 1;
1464                         first >>= 1;
1465                         first |= 0x80;
1466                 }
1467         }
1468         for (int i = 0; i < maxc; i++) {
1469                 if (c[i] == 0) continue;
1470                 ret += c[i];
1471         }
1472         return(ret);
1473 }
1474
1475 static void buildAccentsMap()
1476 {
1477   accents["imath"] = "ı";
1478   accents["i"] = "ı";
1479   accents["jmath"] = "ȷ";
1480   accents["cdot"] = "·";
1481   accents["textasciicircum"] = "^";
1482   accents["mathcircumflex"] = "^";
1483   accents["sim"] = "~";
1484   accents["guillemotright"] = "»";
1485   accents["guillemotleft"] = "«";
1486   accents["hairspace"]     = getutf8(0xf0000);  // select from free unicode plane 15
1487   accents["thinspace"]     = getutf8(0xf0002);  // and used _only_ by findadv
1488   accents["negthinspace"]  = getutf8(0xf0003);  // to omit backslashed latex macros
1489   accents["medspace"]      = getutf8(0xf0004);  // See https://en.wikipedia.org/wiki/Private_Use_Areas
1490   accents["negmedspace"]   = getutf8(0xf0005);
1491   accents["thickspace"]    = getutf8(0xf0006);
1492   accents["negthickspace"] = getutf8(0xf0007);
1493   accents["lyx"]           = getutf8(0xf0010);  // Used logos
1494   accents["LyX"]           = getutf8(0xf0010);
1495   accents["tex"]           = getutf8(0xf0011);
1496   accents["TeX"]           = getutf8(0xf0011);
1497   accents["latex"]         = getutf8(0xf0012);
1498   accents["LaTeX"]         = getutf8(0xf0012);
1499   accents["latexe"]        = getutf8(0xf0013);
1500   accents["LaTeXe"]        = getutf8(0xf0013);
1501   accents["lyxarrow"]      = getutf8(0xf0020);
1502   accents["braceleft"]     = getutf8(0xf0030);
1503   accents["braceright"]    = getutf8(0xf0031);
1504   accents["backslash lyx"]           = getutf8(0xf0010);        // Used logos inserted with starting \backslash
1505   accents["backslash LyX"]           = getutf8(0xf0010);
1506   accents["backslash tex"]           = getutf8(0xf0011);
1507   accents["backslash TeX"]           = getutf8(0xf0011);
1508   accents["backslash latex"]         = getutf8(0xf0012);
1509   accents["backslash LaTeX"]         = getutf8(0xf0012);
1510   accents["backslash latexe"]        = getutf8(0xf0013);
1511   accents["backslash LaTeXe"]        = getutf8(0xf0013);
1512   accents["backslash lyxarrow"]      = getutf8(0xf0020);
1513   accents["ddot{\\imath}"] = "ï";
1514   buildaccent("ddot", "aAeEhHiIioOtuUwWxXyY",
1515                       "äÄëËḧḦïÏïöÖẗüÜẅẄẍẌÿŸ");       // umlaut
1516   buildaccent("dot|.", "aAbBcCdDeEfFGghHIimMnNoOpPrRsStTwWxXyYzZ",
1517                        "ȧȦḃḂċĊḋḊėĖḟḞĠġḣḢİİṁṀṅṄȯȮṗṖṙṘṡṠṫṪẇẆẋẊẏẎżŻ");   // dot{i} can only happen if ignoring case, but there is no lowercase of 'İ'
1518   accents["acute{\\imath}"] = "í";
1519   buildaccent("acute", "aAcCeEgGkKlLmMoOnNpPrRsSuUwWyYzZiI",
1520                        "áÁćĆéÉǵǴḱḰĺĹḿḾóÓńŃṕṔŕŔśŚúÚẃẂýÝźŹíÍ");
1521   buildaccent("dacute|H|h", "oOuU", "őŐűŰ");        // double acute
1522   buildaccent("mathring|r", "aAuUwy",
1523                             "åÅůŮẘẙ");  // ring
1524   accents["check{\\imath}"] = "ǐ";
1525   accents["check{\\jmath}"] = "ǰ";
1526   buildaccent("check|v", "cCdDaAeEiIoOuUgGkKhHlLnNrRsSTtzZ",
1527                          "čČďĎǎǍěĚǐǏǒǑǔǓǧǦǩǨȟȞľĽňŇřŘšŠŤťžŽ");   // caron
1528   accents["hat{\\imath}"] = "î";
1529   accents["hat{\\jmath}"] = "ĵ";
1530   buildaccent("hat|^", "aAcCeEgGhHiIjJoOsSuUwWyYzZ",
1531                        "âÂĉĈêÊĝĜĥĤîÎĵĴôÔŝŜûÛŵŴŷŶẑẐ");       // circ
1532   accents["bar{\\imath}"] = "ī";
1533   buildaccent("bar|=", "aAeEiIoOuUyY",
1534                        "āĀēĒīĪōŌūŪȳȲ");     // macron
1535   accents["tilde{\\imath}"] = "ĩ";
1536   buildaccent("tilde", "aAeEiInNoOuUvVyY",
1537                        "ãÃẽẼĩĨñÑõÕũŨṽṼỹỸ");       // tilde
1538   accents["breve{\\imath}"] = "ĭ";
1539   buildaccent("breve|u", "aAeEgGiIoOuU",
1540                          "ăĂĕĔğĞĭĬŏŎŭŬ");   // breve
1541   accents["grave{\\imath}"] = "ì";
1542   buildaccent("grave|`", "aAeEiIoOuUnNwWyY",
1543                          "àÀèÈìÌòÒùÙǹǸẁẀỳỲ");       // grave
1544   buildaccent("subdot|d", "BbDdHhKkLlMmNnRrSsTtVvWwZzAaEeIiOoUuYy",
1545                           "ḄḅḌḍḤḥḲḳḶḷṂṃṆṇṚṛṢṣṬṭṾṿẈẉẒẓẠạẸẹỊịỌọỤụỴỵ");        // dot below
1546   buildaccent("ogonek|k", "AaEeIiUuOo",
1547                           "ĄąĘęĮįŲųǪǫ");      // ogonek
1548   buildaccent("cedilla|c", "CcGgKkLlNnRrSsTtEeDdHh",
1549                            "ÇçĢģĶķĻļŅņŖŗŞşŢţȨȩḐḑḨḩ"); // cedilla
1550   buildaccent("subring|textsubring", "Aa",
1551                                      "Ḁḁ"); // subring
1552   buildaccent("subhat|textsubcircum", "DdEeLlNnTtUu",
1553                                       "ḒḓḘḙḼḽṊṋṰṱṶṷ");  // subcircum
1554   buildaccent("subtilde|textsubtilde", "EeIiUu",
1555                                        "ḚḛḬḭṴṵ");   // subtilde
1556   accents["dgrave{\\imath}"] = "ȉ";
1557   accents["textdoublegrave{\\i}"] = "ȉ";
1558   buildaccent("dgrave|textdoublegrave", "AaEeIiOoRrUu",
1559                                         "ȀȁȄȅȈȉȌȍȐȑȔȕ"); // double grave
1560   accents["rcap{\\imath}"] = "ȉ";
1561   accents["textroundcap{\\i}"] = "ȉ";
1562   buildaccent("rcap|textroundcap", "AaEeIiOoRrUu",
1563                                    "ȂȃȆȇȊȋȎȏȒȓȖȗ"); // inverted breve
1564   buildaccent("slashed", "oO",
1565                          "øØ"); // slashed
1566 }
1567
1568 /*
1569  * Created accents in math or regexp environment
1570  * are macros, but we need the utf8 equivalent
1571  */
1572 void Intervall::removeAccents()
1573 {
1574   if (accents.empty())
1575     buildAccentsMap();
1576   static regex const accre("\\\\(([\\S]|grave|breve|ddot|dot|acute|dacute|mathring|check|hat|bar|tilde|subdot|ogonek|"
1577          "cedilla|subring|textsubring|subhat|textsubcircum|subtilde|textsubtilde|dgrave|textdoublegrave|rcap|textroundcap|slashed)\\{[^\\{\\}]+\\}"
1578       "|((i|imath|jmath|cdot|[a-z]+space)|((backslash )?([lL]y[xX]|[tT]e[xX]|[lL]a[tT]e[xX]e?|lyxarrow))|(brace|guillemot)(left|right)|textasciicircum|mathcircumflex|sim)(?![a-zA-Z]))");
1579   smatch sub;
1580   for (sregex_iterator itacc(par.begin(), par.end(), accre), end; itacc != end; ++itacc) {
1581     sub = *itacc;
1582     string key = sub.str(1);
1583     AccentsIterator it_ac = accents.find(key);
1584     if (it_ac != accents.end()) {
1585       string val = it_ac->second;
1586       size_t pos = sub.position(size_t(0));
1587       for (size_t i = 0; i < val.size(); i++) {
1588         par[pos+i] = val[i];
1589       }
1590       // Remove possibly following space too
1591       if (par[pos+sub.str(0).size()] == ' ')
1592         addIntervall(pos+val.size(), pos + sub.str(0).size()+1);
1593       else
1594         addIntervall(pos+val.size(), pos + sub.str(0).size());
1595       for (size_t i = pos+val.size(); i < pos + sub.str(0).size(); i++) {
1596         // remove traces of any remaining chars
1597         par[i] = ' ';
1598       }
1599     }
1600     else {
1601       LYXERR(Debug::INFO, "Not added accent for \"" << key << "\"");
1602     }
1603   }
1604 }
1605
1606 void Intervall::handleOpenP(int i)
1607 {
1608   actualdeptindex++;
1609   depts[actualdeptindex] = i+1;
1610   closes[actualdeptindex] = -1;
1611   checkDepthIndex(actualdeptindex);
1612 }
1613
1614 void Intervall::handleCloseP(int i, bool closingAllowed)
1615 {
1616   if (actualdeptindex <= 0) {
1617     if (! closingAllowed)
1618       LYXERR(Debug::FIND, "Bad closing parenthesis in latex");  /* should not happen, but the latex input may be wrong */
1619     // if we are at the very end
1620     addIntervall(i, i+1);
1621   }
1622   else {
1623     closes[actualdeptindex] = i+1;
1624     actualdeptindex--;
1625   }
1626 }
1627
1628 void Intervall::resetOpenedP(int openPos)
1629 {
1630   // Used as initializer for foreignlanguage entry
1631   actualdeptindex = 1;
1632   depts[1] = openPos+1;
1633   closes[1] = -1;
1634 }
1635
1636 int Intervall::previousNotIgnored(int start) const
1637 {
1638     int idx = 0;                          /* int intervalls */
1639     for (idx = ignoreidx; idx >= 0; --idx) {
1640       if (start > borders[idx].upper)
1641         return start;
1642       if (start >= borders[idx].low)
1643         start = borders[idx].low-1;
1644     }
1645     return start;
1646 }
1647
1648 int Intervall::nextNotIgnored(int start) const
1649 {
1650     int idx = 0;                          /* int intervalls */
1651     for (idx = 0; idx <= ignoreidx; idx++) {
1652       if (start < borders[idx].low)
1653         return start;
1654       if (start < borders[idx].upper)
1655         start = borders[idx].upper;
1656     }
1657     return start;
1658 }
1659
1660 typedef unordered_map<string, KeyInfo> KeysMap;
1661 typedef unordered_map<string, KeyInfo>::const_iterator KeysIterator;
1662 typedef vector< KeyInfo> Entries;
1663 static KeysMap keys = unordered_map<string, KeyInfo>();
1664
1665 class LatexInfo {
1666  private:
1667   int entidx_;
1668   Entries entries_;
1669   Intervall interval_;
1670   void buildKeys(bool);
1671   void buildEntries(bool);
1672   void makeKey(const string &, KeyInfo, bool isPatternString);
1673   void processRegion(int start, int region_end); /*  remove {} parts */
1674   void removeHead(KeyInfo const &, int count=0);
1675
1676  public:
1677  LatexInfo(string const & par, bool isPatternString)
1678          : entidx_(-1), interval_(isPatternString, par)
1679   {
1680     buildKeys(isPatternString);
1681     entries_ = vector<KeyInfo>();
1682     buildEntries(isPatternString);
1683   };
1684   int getFirstKey() {
1685     entidx_ = 0;
1686     if (entries_.empty()) {
1687       return -1;
1688     }
1689     if (entries_[0].keytype == KeyInfo::isTitle) {
1690       interval_.hasTitle = true;
1691       if (! entries_[0].disabled) {
1692         interval_.titleValue = entries_[0].head;
1693       }
1694       else {
1695         interval_.titleValue = "";
1696       }
1697       removeHead(entries_[0]);
1698       if (entries_.size() > 1)
1699         return 1;
1700       else
1701         return -1;
1702     }
1703     return 0;
1704   };
1705   int getNextKey() {
1706     entidx_++;
1707     if (int(entries_.size()) > entidx_) {
1708       return entidx_;
1709     }
1710     else {
1711       return -1;
1712     }
1713   };
1714   bool setNextKey(int idx) {
1715     if ((idx == entidx_) && (entidx_ >= 0)) {
1716       entidx_--;
1717       return true;
1718     }
1719     else
1720       return false;
1721   };
1722   int find(int start, KeyInfo::KeyType keytype) const {
1723     if (start < 0)
1724       return -1;
1725     int tmpIdx = start;
1726     while (tmpIdx < int(entries_.size())) {
1727       if (entries_[tmpIdx].keytype == keytype)
1728         return tmpIdx;
1729       tmpIdx++;
1730     }
1731     return -1;
1732   };
1733   int process(ostringstream & os, KeyInfo const & actual);
1734   int dispatch(ostringstream & os, int previousStart, KeyInfo & actual);
1735   // string show(int lastpos) { return interval.show(lastpos);};
1736   int nextNotIgnored(int start) { return interval_.nextNotIgnored(start);};
1737   KeyInfo &getKeyInfo(int keyinfo) {
1738     static KeyInfo invalidInfo = KeyInfo();
1739     if ((keyinfo < 0) || ( keyinfo >= int(entries_.size())))
1740       return invalidInfo;
1741     else
1742       return entries_[keyinfo];
1743   };
1744   void setForDefaultLang(KeyInfo const & defLang) {interval_.setForDefaultLang(defLang);};
1745   void addIntervall(int low, int up) { interval_.addIntervall(low, up); };
1746 };
1747
1748
1749 int Intervall::findclosing(int start, int end, char up = '{', char down = '}', int repeat = 1)
1750 {
1751   int skip = 0;
1752   int depth = 0;
1753   for (int i = start; i < end; i += 1 + skip) {
1754     char c;
1755     c = par[i];
1756     skip = 0;
1757     if (c == '\\') skip = 1;
1758     else if (c == up) {
1759       depth++;
1760     }
1761     else if (c == down) {
1762       if (depth == 0) {
1763         repeat--;
1764         if ((repeat <= 0) || (par[i+1] != up))
1765           return i;
1766       }
1767       --depth;
1768     }
1769   }
1770   return end;
1771 }
1772
1773 class MathInfo {
1774   class MathEntry {
1775   public:
1776     string wait;
1777     size_t mathEnd;
1778     size_t mathpostfixsize;
1779     size_t mathStart;
1780     size_t mathprefixsize;
1781     size_t mathSize;
1782   };
1783   size_t actualIdx_;
1784   vector<MathEntry> entries_;
1785  public:
1786   MathInfo() {
1787     actualIdx_ = 0;
1788   }
1789   void insert(string const & wait, size_t start, size_t prefixsize, size_t end, size_t postfixsize) {
1790     MathEntry m = MathEntry();
1791     m.wait = wait;
1792     m.mathStart = start;
1793     m.mathprefixsize = prefixsize;
1794     m.mathEnd = end + postfixsize;
1795     m.mathpostfixsize = postfixsize;
1796     m.mathSize = m.mathEnd - m.mathStart;
1797     entries_.push_back(m);
1798   }
1799   bool empty() const { return entries_.empty(); };
1800   size_t getEndPos() const {
1801     if (entries_.empty() || (actualIdx_ >= entries_.size())) {
1802       return 0;
1803     }
1804     return entries_[actualIdx_].mathEnd;
1805   }
1806   size_t getStartPos() const {
1807     if (entries_.empty() || (actualIdx_ >= entries_.size())) {
1808       return 100000;                    /*  definitely enough? */
1809     }
1810     return entries_[actualIdx_].mathStart;
1811   }
1812   size_t getPrefixSize() const {
1813     if (entries_.empty() || (actualIdx_ >= entries_.size())) {
1814       return 0;
1815     }
1816     return entries_[actualIdx_].mathprefixsize;
1817   }
1818   size_t getPostfixSize() const {
1819     if (entries_.empty() || (actualIdx_ >= entries_.size())) {
1820       return 0;
1821     }
1822     return entries_[actualIdx_].mathpostfixsize;
1823   }
1824   size_t getFirstPos() {
1825     actualIdx_ = 0;
1826     return getStartPos();
1827   }
1828   size_t getSize() const {
1829     if (entries_.empty() || (actualIdx_ >= entries_.size())) {
1830       return size_t(0);
1831     }
1832     return entries_[actualIdx_].mathSize;
1833   }
1834   void incrEntry() { actualIdx_++; };
1835 };
1836
1837 void LatexInfo::buildEntries(bool isPatternString)
1838 {
1839   static regex const rmath("(\\\\)*(\\$|\\\\\\[|\\\\\\]|\\\\(begin|end)\\{((eqnarray|equation|flalign|gather|multline|align|alignat)\\*?)\\})");
1840   static regex const rkeys("(\\\\)*(\\$|\\\\\\[|\\\\\\]|\\\\((([a-zA-Z]+\\*?)(\\{([a-z]+\\*?)\\}|=[0-9]+[a-z]+)?)))");
1841   static bool disableLanguageOverride = false;
1842   smatch sub, submath;
1843   bool evaluatingRegexp = false;
1844   MathInfo mi;
1845   bool evaluatingMath = false;
1846   bool evaluatingCode = false;
1847   size_t codeEnd = 0;
1848   bool evaluatingOptional = false;
1849   size_t optionalEnd = 0;
1850   int codeStart = -1;
1851   KeyInfo found;
1852   bool math_end_waiting = false;
1853   size_t math_pos = 10000;
1854   size_t math_prefix_size = 1;
1855   string math_end;
1856   static vector<string> usedText = vector<string>();
1857   static bool removeMathHull = false;
1858
1859   interval_.removeAccents();
1860
1861   for (sregex_iterator itmath(interval_.par.begin(), interval_.par.end(), rmath), end; itmath != end; ++itmath) {
1862     submath = *itmath;
1863     if ((submath.position(2) - submath.position(0)) %2 == 1) {
1864       // prefixed by odd count of '\\'
1865       continue;
1866     }
1867     if (math_end_waiting) {
1868       size_t pos = submath.position(size_t(2));
1869       if ((math_end == "$") &&
1870           (submath.str(2) == "$")) {
1871         mi.insert("$", math_pos, 1, pos, 1);
1872         math_end_waiting = false;
1873       }
1874       else if ((math_end == "\\]") &&
1875                (submath.str(2) == "\\]")) {
1876         mi.insert("\\]", math_pos, 2, pos, 2);
1877         math_end_waiting = false;
1878       }
1879       else if ((submath.str(3).compare("end") == 0) &&
1880           (submath.str(4).compare(math_end) == 0)) {
1881         mi.insert(math_end, math_pos, math_prefix_size, pos, submath.str(2).length());
1882         math_end_waiting = false;
1883       }
1884       else
1885         continue;
1886     }
1887     else {
1888       if (submath.str(3).compare("begin") == 0) {
1889         math_end_waiting = true;
1890         math_end = submath.str(4);
1891         math_pos = submath.position(size_t(2));
1892         math_prefix_size = submath.str(2).length();
1893       }
1894       else if (submath.str(2).compare("\\[") == 0) {
1895         math_end_waiting = true;
1896         math_end = "\\]";
1897         math_pos = submath.position(size_t(2));
1898       }
1899       else if (submath.str(2) == "$") {
1900         size_t pos = submath.position(size_t(2));
1901         math_end_waiting = true;
1902         math_end = "$";
1903         math_pos = pos;
1904       }
1905     }
1906   }
1907   // Ignore language if there is math somewhere in pattern-string
1908   if (isPatternString) {
1909     for (auto s: usedText) {
1910       // Remove entries created in previous search runs
1911       keys.erase(s);
1912     }
1913     usedText = vector<string>();
1914     if (! mi.empty()) {
1915       // Disable language
1916       keys["foreignlanguage"].disabled = true;
1917       disableLanguageOverride = true;
1918       removeMathHull = false;
1919     }
1920     else {
1921       removeMathHull = true;    // used later if not isPatternString
1922       disableLanguageOverride = false;
1923     }
1924   }
1925   else {
1926     if (disableLanguageOverride) {
1927       keys["foreignlanguage"].disabled = true;
1928     }
1929   }
1930   math_pos = mi.getFirstPos();
1931   for (sregex_iterator it(interval_.par.begin(), interval_.par.end(), rkeys), end; it != end; ++it) {
1932     sub = *it;
1933     if ((sub.position(2) - sub.position(0)) %2 == 1) {
1934       // prefixed by odd count of '\\'
1935       continue;
1936     }
1937     string key = sub.str(5);
1938     if (key == "") {
1939       if (sub.str(2)[0] == '\\')
1940         key = sub.str(2)[1];
1941       else {
1942         key = sub.str(2);
1943       }
1944     }
1945     KeysIterator it_key = keys.find(key);
1946     if (it_key != keys.end()) {
1947       if (it_key->second.keytype == KeyInfo::headRemove) {
1948         KeyInfo found1 = it_key->second;
1949         found1.disabled = true;
1950         found1.head = "\\" + key + "{";
1951         found1._tokenstart = sub.position(size_t(2));
1952         found1._tokensize = found1.head.length();
1953         found1._dataStart = found1._tokenstart + found1.head.length();
1954         int endpos = interval_.findclosing(found1._dataStart, interval_.par.length(), '{', '}', 1);
1955         found1._dataEnd = endpos;
1956         removeHead(found1);
1957         continue;
1958       }
1959     }
1960     if (evaluatingRegexp) {
1961       if (sub.str(3).compare("endregexp") == 0) {
1962         evaluatingRegexp = false;
1963         // found._tokenstart already set
1964         found._dataEnd = sub.position(size_t(2)) + 13;
1965         found._dataStart = found._dataEnd;
1966         found._tokensize = found._dataEnd - found._tokenstart;
1967         found.parenthesiscount = 0;
1968         found.head = interval_.par.substr(found._tokenstart, found._tokensize);
1969       }
1970       else {
1971         continue;
1972       }
1973     }
1974     else {
1975       if (evaluatingMath) {
1976         if (size_t(sub.position(size_t(2))) < mi.getEndPos())
1977           continue;
1978         evaluatingMath = false;
1979         mi.incrEntry();
1980         math_pos = mi.getStartPos();
1981       }
1982       if (it_key == keys.end()) {
1983         found = KeyInfo(KeyInfo::isStandard, 0, true);
1984         LYXERR(Debug::INFO, "Undefined key " << key << " ==> will be used as text");
1985         found = KeyInfo(KeyInfo::isText, 0, false);
1986         if (isPatternString) {
1987           found.keytype = KeyInfo::isChar;
1988           found.disabled = false;
1989           found.used = true;
1990         }
1991         keys[key] = found;
1992         usedText.push_back(key);
1993       }
1994       else
1995         found = keys[key];
1996       if (key.compare("regexp") == 0) {
1997         evaluatingRegexp = true;
1998         found._tokenstart = sub.position(size_t(2));
1999         found._tokensize = 0;
2000         continue;
2001       }
2002     }
2003     // Handle the other params of key
2004     if (found.keytype == KeyInfo::isIgnored)
2005       continue;
2006     else if (found.keytype == KeyInfo::isMath) {
2007       if (size_t(sub.position(size_t(2))) == math_pos) {
2008         found = keys[key];
2009         found._tokenstart = sub.position(size_t(2));
2010         found._tokensize = mi.getSize();
2011         found._dataEnd = found._tokenstart + found._tokensize;
2012         found._dataStart = found._dataEnd;
2013         found.parenthesiscount = 0;
2014         found.head = interval_.par.substr(found._tokenstart, found._tokensize);
2015         if (removeMathHull) {
2016           interval_.addIntervall(found._tokenstart, found._tokenstart + mi.getPrefixSize());
2017           interval_.addIntervall(found._dataEnd - mi.getPostfixSize(), found._dataEnd);
2018         }
2019         evaluatingMath = true;
2020       }
2021       else {
2022         // begin|end of unknown env, discard
2023         // First handle tables
2024         // longtable|tabular
2025         bool discardComment;
2026         found = keys[key];
2027         found.keytype = KeyInfo::doRemove;
2028         if ((sub.str(7).compare("longtable") == 0) ||
2029             (sub.str(7).compare("tabular") == 0)) {
2030           discardComment = true;        /* '%' */
2031         }
2032         else {
2033           discardComment = false;
2034           static regex const removeArgs("^(multicols|multipar|sectionbox|subsectionbox|tcolorbox)$");
2035           smatch sub2;
2036           string token = sub.str(7);
2037           if (regex_match(token, sub2, removeArgs)) {
2038             found.keytype = KeyInfo::removeWithArg;
2039           }
2040         }
2041         // discard spaces before pos(2)
2042         int pos = sub.position(size_t(2));
2043         int count;
2044         for (count = 0; pos - count > 0; count++) {
2045           char c = interval_.par[pos-count-1];
2046           if (discardComment) {
2047             if ((c != ' ') && (c != '%'))
2048               break;
2049           }
2050           else if (c != ' ')
2051             break;
2052         }
2053         found._tokenstart = pos - count;
2054         if (sub.str(3).compare(0, 5, "begin") == 0) {
2055           size_t pos1 = pos + sub.str(2).length();
2056           if (sub.str(7).compare("cjk") == 0) {
2057             pos1 = interval_.findclosing(pos1+1, interval_.par.length()) + 1;
2058             if ((interval_.par[pos1] == '{') && (interval_.par[pos1+1] == '}'))
2059               pos1 += 2;
2060             found.keytype = KeyInfo::isMain;
2061             found._dataStart = pos1;
2062             found._dataEnd = interval_.par.length();
2063             found.disabled = keys["foreignlanguage"].disabled;
2064             found.used = keys["foreignlanguage"].used;
2065             found._tokensize = pos1 - found._tokenstart;
2066             found.head = interval_.par.substr(found._tokenstart, found._tokensize);
2067           }
2068           else {
2069             // Swallow possible optional params
2070             while (interval_.par[pos1] == '[') {
2071               pos1 = interval_.findclosing(pos1+1, interval_.par.length(), '[', ']')+1;
2072             }
2073             // Swallow also the eventual parameter
2074             if (interval_.par[pos1] == '{') {
2075               found._dataEnd = interval_.findclosing(pos1+1, interval_.par.length()) + 1;
2076             }
2077             else {
2078               found._dataEnd = pos1;
2079             }
2080             found._dataStart = found._dataEnd;
2081             found._tokensize = count + found._dataEnd - pos;
2082             found.parenthesiscount = 0;
2083             found.head = interval_.par.substr(found._tokenstart, found._tokensize);
2084             found.disabled = true;
2085           }
2086         }
2087         else {
2088           // Handle "\end{...}"
2089           found._dataStart = pos + sub.str(2).length();
2090           found._dataEnd = found._dataStart;
2091           found._tokensize = count + found._dataEnd - pos;
2092           found.parenthesiscount = 0;
2093           found.head = interval_.par.substr(found._tokenstart, found._tokensize);
2094           found.disabled = true;
2095         }
2096       }
2097     }
2098     else if (found.keytype != KeyInfo::isRegex) {
2099       found._tokenstart = sub.position(size_t(2));
2100       if (found.parenthesiscount == 0) {
2101         // Probably to be discarded
2102         size_t following_pos = sub.position(size_t(2)) + sub.str(5).length() + 1;
2103         char following = interval_.par[following_pos];
2104         if (following == ' ')
2105           found.head = "\\" + sub.str(5) + " ";
2106         else if (following == '=') {
2107           // like \uldepth=1000pt
2108           found.head = sub.str(2);
2109         }
2110         else
2111           found.head = "\\" + key;
2112         found._tokensize = found.head.length();
2113         found._dataEnd = found._tokenstart + found._tokensize;
2114         found._dataStart = found._dataEnd;
2115       }
2116       else {
2117         int params = found._tokenstart + key.length() + 1;
2118         if (evaluatingOptional) {
2119           if (size_t(found._tokenstart) > optionalEnd) {
2120             evaluatingOptional = false;
2121           }
2122           else {
2123             found.disabled = true;
2124           }
2125         }
2126         int optend = params;
2127         while (interval_.par[optend] == '[') {
2128           // discard optional parameters
2129           optend = interval_.findclosing(optend+1, interval_.par.length(), '[', ']') + 1;
2130         }
2131         if (optend > params) {
2132           key += interval_.par.substr(params, optend-params);
2133           evaluatingOptional = true;
2134           optionalEnd = optend;
2135           if (found.keytype == KeyInfo::isSectioning) {
2136             // Remove optional values (but still keep in header)
2137             interval_.addIntervall(params, optend);
2138           }
2139         }
2140         string token = sub.str(7);
2141         int closings;
2142         if (interval_.par[optend] != '{') {
2143           closings = 0;
2144           found.parenthesiscount = 0;
2145           found.head = "\\" + key;
2146         }
2147         else
2148           closings = found.parenthesiscount;
2149         if (found.parenthesiscount == 1) {
2150           found.head = "\\" + key + "{";
2151         }
2152         else if (found.parenthesiscount > 1) {
2153           if (token != "") {
2154             found.head = sub.str(2) + "{";
2155             closings = found.parenthesiscount - 1;
2156           }
2157           else {
2158             found.head = "\\" + key + "{";
2159           }
2160         }
2161         found._tokensize = found.head.length();
2162         found._dataStart = found._tokenstart + found.head.length();
2163         if (found.keytype == KeyInfo::doRemove) {
2164           if (closings > 0) {
2165             size_t endpar = 2 + interval_.findclosing(found._dataStart, interval_.par.length(), '{', '}', closings);
2166             if (endpar >= interval_.par.length())
2167               found._dataStart = interval_.par.length();
2168             else
2169               found._dataStart = endpar;
2170             found._tokensize = found._dataStart - found._tokenstart;
2171           }
2172           else {
2173             found._dataStart = found._tokenstart + found._tokensize;
2174           }
2175           closings = 0;
2176         }
2177         if (interval_.par.substr(found._dataStart, 15).compare("\\endarguments{}") == 0) {
2178           found._dataStart += 15;
2179         }
2180         size_t endpos;
2181         if (closings < 1)
2182           endpos = found._dataStart - 1;
2183         else
2184           endpos = interval_.findclosing(found._dataStart, interval_.par.length(), '{', '}', closings);
2185         if (found.keytype == KeyInfo::isList) {
2186           // Check if it really is list env
2187           static regex const listre("^([a-z]+)$");
2188           smatch sub2;
2189           if (!regex_match(token, sub2, listre)) {
2190             // Change the key of this entry. It is not in a list/item environment
2191             found.keytype = KeyInfo::endArguments;
2192           }
2193         }
2194         if (found.keytype == KeyInfo::noMain) {
2195           evaluatingCode = true;
2196           codeEnd = endpos;
2197           codeStart = found._dataStart;
2198         }
2199         else if (evaluatingCode) {
2200           if (size_t(found._dataStart) > codeEnd)
2201             evaluatingCode = false;
2202           else if (found.keytype == KeyInfo::isMain) {
2203             // Disable this key, treate it as standard
2204             found.keytype = KeyInfo::isStandard;
2205             found.disabled = true;
2206             if ((codeEnd +1 >= interval_.par.length()) &&
2207                 (found._tokenstart == codeStart)) {
2208               // trickery, because the code inset starts
2209               // with \selectlanguage ...
2210               codeEnd = endpos;
2211               if (entries_.size() > 1) {
2212                 entries_[entries_.size()-1]._dataEnd = codeEnd;
2213               }
2214             }
2215           }
2216         }
2217         if ((endpos == interval_.par.length()) &&
2218             (found.keytype == KeyInfo::doRemove)) {
2219           // Missing closing => error in latex-input?
2220           // therefore do not delete remaining data
2221           found._dataStart -= 1;
2222           found._dataEnd = found._dataStart;
2223         }
2224         else
2225           found._dataEnd = endpos;
2226       }
2227       if (isPatternString) {
2228         keys[key].used = true;
2229       }
2230     }
2231     entries_.push_back(found);
2232   }
2233 }
2234
2235 void LatexInfo::makeKey(const string &keysstring, KeyInfo keyI, bool isPatternString)
2236 {
2237   stringstream s(keysstring);
2238   string key;
2239   const char delim = '|';
2240   while (getline(s, key, delim)) {
2241     KeyInfo keyII(keyI);
2242     if (isPatternString) {
2243       keyII.used = false;
2244     }
2245     else if ( !keys[key].used)
2246       keyII.disabled = true;
2247     keys[key] = keyII;
2248   }
2249 }
2250
2251 void LatexInfo::buildKeys(bool isPatternString)
2252 {
2253
2254   static bool keysBuilt = false;
2255   if (keysBuilt && !isPatternString) return;
2256
2257   // Keys to ignore in any case
2258   makeKey("text|textcyrillic|lyxmathsym|ensuremath", KeyInfo(KeyInfo::headRemove, 1, true), true);
2259   // Known standard keys with 1 parameter.
2260   // Split is done, if not at start of region
2261   makeKey("textsf|textss|texttt", KeyInfo(KeyInfo::isStandard, 1, ignoreFormats.getFamily()), isPatternString);
2262   makeKey("textbf",               KeyInfo(KeyInfo::isStandard, 1, ignoreFormats.getSeries()), isPatternString);
2263   makeKey("textit|textsc|textsl", KeyInfo(KeyInfo::isStandard, 1, ignoreFormats.getShape()), isPatternString);
2264   makeKey("uuline|uline|uwave",   KeyInfo(KeyInfo::isStandard, 1, ignoreFormats.getUnderline()), isPatternString);
2265   makeKey("emph|noun",            KeyInfo(KeyInfo::isStandard, 1, ignoreFormats.getMarkUp()), isPatternString);
2266   makeKey("sout|xout",            KeyInfo(KeyInfo::isStandard, 1, ignoreFormats.getStrikeOut()), isPatternString);
2267
2268   makeKey("section|subsection|subsubsection|paragraph|subparagraph|minisec",
2269           KeyInfo(KeyInfo::isSectioning, 1, ignoreFormats.getSectioning()), isPatternString);
2270   makeKey("section*|subsection*|subsubsection*|paragraph*",
2271           KeyInfo(KeyInfo::isSectioning, 1, ignoreFormats.getSectioning()), isPatternString);
2272   makeKey("part|part*|chapter|chapter*", KeyInfo(KeyInfo::isSectioning, 1, ignoreFormats.getSectioning()), isPatternString);
2273   makeKey("title|subtitle|author|subject|publishers|dedication|uppertitleback|lowertitleback|extratitle|lyxaddress|lyxrightaddress", KeyInfo(KeyInfo::isTitle, 1, ignoreFormats.getFrontMatter()), isPatternString);
2274   // Regex
2275   makeKey("regexp", KeyInfo(KeyInfo::isRegex, 1, false), isPatternString);
2276
2277   // Split is done, if not at start of region
2278   makeKey("textcolor", KeyInfo(KeyInfo::isStandard, 2, ignoreFormats.getColor()), isPatternString);
2279   makeKey("latexenvironment", KeyInfo(KeyInfo::isStandard, 2, false), isPatternString);
2280
2281   // Split is done always.
2282   makeKey("foreignlanguage", KeyInfo(KeyInfo::isMain, 2, ignoreFormats.getLanguage()), isPatternString);
2283
2284   // Known charaters
2285   // No split
2286   makeKey("backslash|textbackslash|slash",  KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
2287   makeKey("textasciicircum|textasciitilde", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
2288   makeKey("textasciiacute|texemdash",       KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
2289   makeKey("dots|ldots",                     KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
2290   // Spaces
2291   makeKey("quad|qquad|hfill|dotfill",               KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
2292   makeKey("textvisiblespace|nobreakspace",          KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
2293   makeKey("negthickspace|negmedspace|negthinspace", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
2294   makeKey("thickspace|medspace|thinspace",          KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
2295   // Skip
2296   // makeKey("enskip|smallskip|medskip|bigskip|vfill", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
2297   // Custom space/skip, remove the content (== length value)
2298   makeKey("vspace|vspace*|hspace|hspace*|mspace", KeyInfo(KeyInfo::noContent, 1, false), isPatternString);
2299   // Found in fr/UserGuide.lyx
2300   makeKey("og|fg", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
2301   // quotes
2302   makeKey("textquotedbl|quotesinglbase|lyxarrow", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
2303   makeKey("textquotedblleft|textquotedblright", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
2304   // Known macros to remove (including their parameter)
2305   // No split
2306   makeKey("input|inputencoding|label|ref|index|bibitem", KeyInfo(KeyInfo::doRemove, 1, false), isPatternString);
2307   makeKey("addtocounter|setlength",                 KeyInfo(KeyInfo::noContent, 2, true), isPatternString);
2308   // handle like standard keys with 1 parameter.
2309   makeKey("url|href|vref|thanks", KeyInfo(KeyInfo::isStandard, 1, false), isPatternString);
2310
2311   // Ignore deleted text
2312   makeKey("lyxdeleted", KeyInfo(KeyInfo::doRemove, 3, false), isPatternString);
2313   // but preserve added text
2314   makeKey("lyxadded", KeyInfo(KeyInfo::doRemove, 2, false), isPatternString);
2315
2316   // Macros to remove, but let the parameter survive
2317   // No split
2318   makeKey("menuitem|textmd|textrm", KeyInfo(KeyInfo::isStandard, 1, true), isPatternString);
2319
2320   // Remove language spec from content of these insets
2321   makeKey("code", KeyInfo(KeyInfo::noMain, 1, false), isPatternString);
2322
2323   // Same effect as previous, parameter will survive (because there is no one anyway)
2324   // No split
2325   makeKey("noindent|textcompwordmark|maketitle", KeyInfo(KeyInfo::isStandard, 0, true), isPatternString);
2326   // Remove table decorations
2327   makeKey("hline|tabularnewline|toprule|bottomrule|midrule", KeyInfo(KeyInfo::doRemove, 0, true), isPatternString);
2328   // Discard shape-header.
2329   // For footnote or shortcut too, because of lang settings
2330   // and wrong handling if used 'KeyInfo::noMain'
2331   makeKey("circlepar|diamondpar|heartpar|nutpar",  KeyInfo(KeyInfo::isStandard, 1, true), isPatternString);
2332   makeKey("trianglerightpar|hexagonpar|starpar",   KeyInfo(KeyInfo::isStandard, 1, true), isPatternString);
2333   makeKey("triangleuppar|triangledownpar|droppar", KeyInfo(KeyInfo::isStandard, 1, true), isPatternString);
2334   makeKey("triangleleftpar|shapepar|dropuppar",    KeyInfo(KeyInfo::isStandard, 1, true), isPatternString);
2335   makeKey("hphantom|vphantom|footnote|shortcut|include|includegraphics",     KeyInfo(KeyInfo::isStandard, 1, true), isPatternString);
2336   makeKey("parbox", KeyInfo(KeyInfo::doRemove, 1, true), isPatternString);
2337   // like ('tiny{}' or '\tiny ' ... )
2338   makeKey("footnotesize|tiny|scriptsize|small|large|Large|LARGE|huge|Huge", KeyInfo(KeyInfo::isSize, 0, false), isPatternString);
2339
2340   // Survives, like known character
2341   // makeKey("lyx|LyX|latex|LaTeX|latexe|LaTeXe|tex|TeX", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
2342   makeKey("tableofcontents", KeyInfo(KeyInfo::isChar, 0, false), isPatternString);
2343   makeKey("item|listitem", KeyInfo(KeyInfo::isList, 1, false), isPatternString);
2344
2345   makeKey("begin|end", KeyInfo(KeyInfo::isMath, 1, false), isPatternString);
2346   makeKey("[|]", KeyInfo(KeyInfo::isMath, 1, false), isPatternString);
2347   makeKey("$", KeyInfo(KeyInfo::isMath, 1, false), isPatternString);
2348
2349   makeKey("par|uldepth|ULdepth|protect|nobreakdash|medskip|relax", KeyInfo(KeyInfo::isStandard, 0, true), isPatternString);
2350   // Remove RTL/LTR marker
2351   makeKey("l|r|textlr|textfr|textar|beginl|endl", KeyInfo(KeyInfo::isStandard, 0, true), isPatternString);
2352   makeKey("lettrine", KeyInfo(KeyInfo::cleanToStart, 0, true), isPatternString);
2353   makeKey("lyxslide", KeyInfo(KeyInfo::isSectioning, 1, true), isPatternString);
2354   makeKey("endarguments", KeyInfo(KeyInfo::endArguments, 0, true), isPatternString);
2355   makeKey("twocolumn", KeyInfo(KeyInfo::removeWithArg, 2, true), isPatternString);
2356   makeKey("tnotetext|ead|fntext|cortext|address", KeyInfo(KeyInfo::removeWithArg, 0, true), isPatternString);
2357   makeKey("lyxend", KeyInfo(KeyInfo::isStandard, 0, true), isPatternString);
2358   if (isPatternString) {
2359     // Allow the first searched string to rebuild the keys too
2360     keysBuilt = false;
2361   }
2362   else {
2363     // no need to rebuild again
2364     keysBuilt = true;
2365   }
2366 }
2367
2368 /*
2369  * Keep the list of actual opened parentheses actual
2370  * (e.g. depth == 4 means there are 4 '{' not processed yet)
2371  */
2372 void Intervall::handleParentheses(int lastpos, bool closingAllowed)
2373 {
2374   int skip = 0;
2375   for (int i = depts[actualdeptindex]; i < lastpos; i+= 1 + skip) {
2376     char c;
2377     c = par[i];
2378     skip = 0;
2379     if (c == '\\') skip = 1;
2380     else if (c == '{') {
2381       handleOpenP(i);
2382     }
2383     else if (c == '}') {
2384       handleCloseP(i, closingAllowed);
2385     }
2386   }
2387 }
2388
2389 #if (0)
2390 string Intervall::show(int lastpos)
2391 {
2392   int idx = 0;                          /* int intervalls */
2393   string s;
2394   int i = 0;
2395   for (idx = 0; idx <= ignoreidx; idx++) {
2396     while (i < lastpos) {
2397       int printsize;
2398       if (i <= borders[idx].low) {
2399         if (borders[idx].low > lastpos)
2400           printsize = lastpos - i;
2401         else
2402           printsize = borders[idx].low - i;
2403         s += par.substr(i, printsize);
2404         i += printsize;
2405         if (i >= borders[idx].low)
2406           i = borders[idx].upper;
2407       }
2408       else {
2409         i = borders[idx].upper;
2410         break;
2411       }
2412     }
2413   }
2414   if (lastpos > i) {
2415     s += par.substr(i, lastpos-i);
2416   }
2417   return s;
2418 }
2419 #endif
2420
2421 void Intervall::output(ostringstream &os, int lastpos)
2422 {
2423   // get number of chars to output
2424   int idx = 0;                          /* int intervalls */
2425   int i = 0;
2426   int printed = 0;
2427   string startTitle = titleValue;
2428   for (idx = 0; idx <= ignoreidx; idx++) {
2429     if (i < lastpos) {
2430       if (i <= borders[idx].low) {
2431         int printsize;
2432         if (borders[idx].low > lastpos)
2433           printsize = lastpos - i;
2434         else
2435           printsize = borders[idx].low - i;
2436         if (printsize > 0) {
2437           os << startTitle << par.substr(i, printsize);
2438           i += printsize;
2439           printed += printsize;
2440           startTitle = "";
2441         }
2442         handleParentheses(i, false);
2443         if (i >= borders[idx].low)
2444           i = borders[idx].upper;
2445       }
2446       else {
2447         i = borders[idx].upper;
2448       }
2449     }
2450     else
2451       break;
2452   }
2453   if (lastpos > i) {
2454     os << startTitle << par.substr(i, lastpos-i);
2455     printed += lastpos-i;
2456   }
2457   handleParentheses(lastpos, false);
2458   int startindex;
2459   if (keys["foreignlanguage"].disabled)
2460     startindex = actualdeptindex-langcount;
2461   else
2462     startindex = actualdeptindex;
2463   for (int i = startindex; i > 0; --i) {
2464     os << "}";
2465   }
2466   if (hasTitle && (printed > 0))
2467     os << "}";
2468   if (! isPatternString_)
2469     os << "\n";
2470   handleParentheses(lastpos, true); /* extra closings '}' allowed here */
2471 }
2472
2473 void LatexInfo::processRegion(int start, int region_end)
2474 {
2475   while (start < region_end) {          /* Let {[} and {]} survive */
2476     int cnt = interval_.isOpeningPar(start);
2477     if (cnt == 1) {
2478       // Closing is allowed past the region
2479       int closing = interval_.findclosing(start+1, interval_.par.length());
2480       interval_.addIntervall(start, start+1);
2481       interval_.addIntervall(closing, closing+1);
2482     }
2483     else if (cnt == 3)
2484       start += 2;
2485     start = interval_.nextNotIgnored(start+1);
2486   }
2487 }
2488
2489 void LatexInfo::removeHead(KeyInfo const & actual, int count)
2490 {
2491   if (actual.parenthesiscount == 0) {
2492     // "{\tiny{} ...}" ==> "{{} ...}"
2493     interval_.addIntervall(actual._tokenstart-count, actual._tokenstart + actual._tokensize);
2494   }
2495   else {
2496     // Remove header hull, that is "\url{abcd}" ==> "abcd"
2497     interval_.addIntervall(actual._tokenstart - count, actual._dataStart);
2498     interval_.addIntervall(actual._dataEnd, actual._dataEnd+1);
2499   }
2500 }
2501
2502 int LatexInfo::dispatch(ostringstream &os, int previousStart, KeyInfo &actual)
2503 {
2504   int nextKeyIdx = 0;
2505   switch (actual.keytype)
2506   {
2507     case KeyInfo::isTitle: {
2508       removeHead(actual);
2509       nextKeyIdx = getNextKey();
2510       break;
2511     }
2512     case KeyInfo::cleanToStart: {
2513       actual._dataEnd = actual._dataStart;
2514       nextKeyIdx = getNextKey();
2515       // Search for end of arguments
2516       int tmpIdx = find(nextKeyIdx, KeyInfo::endArguments);
2517       if (tmpIdx > 0) {
2518         for (int i = nextKeyIdx; i <= tmpIdx; i++) {
2519           entries_[i].disabled = true;
2520         }
2521         actual._dataEnd = entries_[tmpIdx]._dataEnd;
2522       }
2523       while (interval_.par[actual._dataEnd] == ' ')
2524         actual._dataEnd++;
2525       interval_.addIntervall(0, actual._dataEnd+1);
2526       interval_.actualdeptindex = 0;
2527       interval_.depts[0] = actual._dataEnd+1;
2528       interval_.closes[0] = -1;
2529       break;
2530     }
2531     case KeyInfo::isText:
2532       interval_.par[actual._tokenstart] = '#';
2533       //interval_.addIntervall(actual._tokenstart, actual._tokenstart+1);
2534       nextKeyIdx = getNextKey();
2535       break;
2536     case KeyInfo::noContent: {          /* char like "\hspace{2cm}" */
2537       if (actual.disabled)
2538         interval_.addIntervall(actual._tokenstart, actual._dataEnd);
2539       else
2540         interval_.addIntervall(actual._dataStart, actual._dataEnd);
2541     }
2542       // fall through
2543     case KeyInfo::isChar: {
2544       nextKeyIdx = getNextKey();
2545       break;
2546     }
2547     case KeyInfo::isSize: {
2548       if (actual.disabled || (interval_.par[actual._dataStart] != '{') || (interval_.par[actual._dataStart-1] == ' ')) {
2549         if (actual.parenthesiscount == 0)
2550           interval_.addIntervall(actual._tokenstart, actual._dataEnd);
2551         else {
2552           interval_.addIntervall(actual._tokenstart, actual._dataEnd+1);
2553         }
2554         nextKeyIdx = getNextKey();
2555       } else {
2556         // Here _dataStart points to '{', so correct it
2557         actual._dataStart += 1;
2558         actual._tokensize += 1;
2559         actual.parenthesiscount = 1;
2560         if (interval_.par[actual._dataStart] == '}') {
2561           // Determine the end if used like '{\tiny{}...}'
2562           actual._dataEnd = interval_.findclosing(actual._dataStart+1, interval_.par.length()) + 1;
2563           interval_.addIntervall(actual._dataStart, actual._dataStart+1);
2564         }
2565         else {
2566           // Determine the end if used like '\tiny{...}'
2567           actual._dataEnd = interval_.findclosing(actual._dataStart, interval_.par.length()) + 1;
2568         }
2569         // Split on this key if not at start
2570         int start = interval_.nextNotIgnored(previousStart);
2571         if (start < actual._tokenstart) {
2572           interval_.output(os, actual._tokenstart);
2573           interval_.addIntervall(start, actual._tokenstart);
2574         }
2575         // discard entry if at end of actual
2576         nextKeyIdx = process(os, actual);
2577       }
2578       break;
2579     }
2580     case KeyInfo::endArguments: {
2581       // Remove trailing '{}' too
2582       actual._dataStart += 1;
2583       actual._dataEnd += 1;
2584       interval_.addIntervall(actual._tokenstart, actual._dataEnd+1);
2585       nextKeyIdx = getNextKey();
2586       break;
2587     }
2588     case KeyInfo::noMain:
2589       // fall through
2590     case KeyInfo::isStandard: {
2591       if (actual.disabled) {
2592         removeHead(actual);
2593         processRegion(actual._dataStart, actual._dataStart+1);
2594         nextKeyIdx = getNextKey();
2595       } else {
2596         // Split on this key if not at datastart of calling entry
2597         int start = interval_.nextNotIgnored(previousStart);
2598         if (start < actual._tokenstart) {
2599           interval_.output(os, actual._tokenstart);
2600           interval_.addIntervall(start, actual._tokenstart);
2601         }
2602         // discard entry if at end of actual
2603         nextKeyIdx = process(os, actual);
2604       }
2605       break;
2606     }
2607     case KeyInfo::removeWithArg: {
2608       nextKeyIdx = getNextKey();
2609       // Search for end of arguments
2610       int tmpIdx = find(nextKeyIdx, KeyInfo::endArguments);
2611       if (tmpIdx > 0) {
2612         for (int i = nextKeyIdx; i <= tmpIdx; i++) {
2613           entries_[i].disabled = true;
2614         }
2615         actual._dataEnd = entries_[tmpIdx]._dataEnd;
2616       }
2617       interval_.addIntervall(actual._tokenstart, actual._dataEnd+1);
2618       break;
2619     }
2620     case KeyInfo::doRemove: {
2621       // Remove the key with all parameters and following spaces
2622       size_t pos;
2623       size_t start;
2624       if (interval_.par[actual._dataEnd-1] == ' ')
2625         start = actual._dataEnd;
2626       else
2627         start = actual._dataEnd+1;
2628       for (pos = start; pos < interval_.par.length(); pos++) {
2629         if ((interval_.par[pos] != ' ') && (interval_.par[pos] != '%'))
2630           break;
2631       }
2632       // Remove also enclosing parentheses [] and {}
2633       int numpars = 0;
2634       int spaces = 0;
2635       while (actual._tokenstart > numpars) {
2636         if (pos+numpars >= interval_.par.size())
2637           break;
2638         else if (interval_.par[pos+numpars] == ']' && interval_.par[actual._tokenstart-numpars-1] == '[')
2639           numpars++;
2640         else if (interval_.par[pos+numpars] == '}' && interval_.par[actual._tokenstart-numpars-1] == '{')
2641           numpars++;
2642         else
2643           break;
2644       }
2645       if (numpars > 0) {
2646         if (interval_.par[pos+numpars] == ' ')
2647           spaces++;
2648       }
2649
2650       interval_.addIntervall(actual._tokenstart-numpars, pos+numpars+spaces);
2651       nextKeyIdx = getNextKey();
2652       break;
2653     }
2654     case KeyInfo::isList: {
2655       // Discard space before _tokenstart
2656       int count;
2657       for (count = 0; count < actual._tokenstart; count++) {
2658         if (interval_.par[actual._tokenstart-count-1] != ' ')
2659           break;
2660       }
2661       nextKeyIdx = getNextKey();
2662       int tmpIdx = find(nextKeyIdx, KeyInfo::endArguments);
2663       if (tmpIdx > 0) {
2664         // Special case: \item is not a list, but a command (like in Style Author_Biography in maa-monthly.layout)
2665         // with arguments
2666         // How else can we catch this one?
2667         for (int i = nextKeyIdx; i <= tmpIdx; i++) {
2668           entries_[i].disabled = true;
2669         }
2670         actual._dataEnd = entries_[tmpIdx]._dataEnd;
2671       }
2672       else if (nextKeyIdx > 0) {
2673         // Ignore any lang entries inside data region
2674         for (int i = nextKeyIdx; i < int(entries_.size()) && entries_[i]._tokenstart < actual._dataEnd; i++) {
2675           if (entries_[i].keytype == KeyInfo::isMain)
2676             entries_[i].disabled = true;
2677         }
2678       }
2679       if (actual.disabled) {
2680         interval_.addIntervall(actual._tokenstart-count, actual._dataEnd+1);
2681       }
2682       else {
2683         interval_.addIntervall(actual._tokenstart-count, actual._tokenstart);
2684       }
2685       if (interval_.par[actual._dataEnd+1] == '[') {
2686         int posdown = interval_.findclosing(actual._dataEnd+2, interval_.par.length(), '[', ']');
2687         if ((interval_.par[actual._dataEnd+2] == '{') &&
2688             (interval_.par[posdown-1] == '}')) {
2689           interval_.addIntervall(actual._dataEnd+1,actual._dataEnd+3);
2690           interval_.addIntervall(posdown-1, posdown+1);
2691         }
2692         else {
2693           interval_.addIntervall(actual._dataEnd+1, actual._dataEnd+2);
2694           interval_.addIntervall(posdown, posdown+1);
2695         }
2696         int blk = interval_.nextNotIgnored(actual._dataEnd+1);
2697         if (blk > posdown) {
2698           // Discard at most 1 space after empty item
2699           int count;
2700           for (count = 0; count < 1; count++) {
2701             if (interval_.par[blk+count] != ' ')
2702               break;
2703           }
2704           if (count > 0)
2705             interval_.addIntervall(blk, blk+count);
2706         }
2707       }
2708       break;
2709     }
2710     case KeyInfo::isSectioning: {
2711       // Discard spaces before _tokenstart
2712       int count;
2713       int val = actual._tokenstart;
2714       for (count = 0; count < actual._tokenstart;) {
2715         val = interval_.previousNotIgnored(val-1);
2716         if (val < 0 || interval_.par[val] != ' ')
2717           break;
2718         else {
2719           count = actual._tokenstart - val;
2720         }
2721       }
2722       if (actual.disabled) {
2723         removeHead(actual, count);
2724         nextKeyIdx = getNextKey();
2725       } else {
2726         interval_.addIntervall(actual._tokenstart-count, actual._tokenstart);
2727         nextKeyIdx = process(os, actual);
2728       }
2729       break;
2730     }
2731     case KeyInfo::isMath: {
2732       // Same as regex, use the content unchanged
2733       nextKeyIdx = getNextKey();
2734       break;
2735     }
2736     case KeyInfo::isRegex: {
2737       // DO NOT SPLIT ON REGEX
2738       // Do not disable
2739       nextKeyIdx = getNextKey();
2740       break;
2741     }
2742     case KeyInfo::isIgnored: {
2743       // Treat like a character for now
2744       nextKeyIdx = getNextKey();
2745       break;
2746     }
2747     case KeyInfo::isMain: {
2748       if (interval_.par.substr(actual._dataStart, 2) == "% ")
2749         interval_.addIntervall(actual._dataStart, actual._dataStart+2);
2750       if (actual._tokenstart > 0) {
2751         int prev = interval_.previousNotIgnored(actual._tokenstart - 1);
2752         if ((prev >= 0) && interval_.par[prev] == '%')
2753           interval_.addIntervall(prev, prev+1);
2754       }
2755       if (actual.disabled) {
2756         removeHead(actual);
2757         interval_.langcount++;
2758         if ((interval_.par.substr(actual._dataStart, 3) == " \\[") ||
2759             (interval_.par.substr(actual._dataStart, 8) == " \\begin{")) {
2760           // Discard also the space before math-equation
2761           interval_.addIntervall(actual._dataStart, actual._dataStart+1);
2762         }
2763         nextKeyIdx = getNextKey();
2764         // interval.resetOpenedP(actual._dataStart-1);
2765       }
2766       else {
2767         if (actual._tokenstart < 26) {
2768           // for the first (and maybe dummy) language
2769           interval_.setForDefaultLang(actual);
2770         }
2771         interval_.resetOpenedP(actual._dataStart-1);
2772       }
2773       break;
2774     }
2775     case KeyInfo::invalid:
2776     case KeyInfo::headRemove:
2777       // These two cases cannot happen, already handled
2778       // fall through
2779     default: {
2780       // LYXERR(Debug::INFO, "Unhandled keytype");
2781       nextKeyIdx = getNextKey();
2782       break;
2783     }
2784   }
2785   return nextKeyIdx;
2786 }
2787
2788 int LatexInfo::process(ostringstream & os, KeyInfo const & actual )
2789 {
2790   int end = interval_.nextNotIgnored(actual._dataEnd);
2791   int oldStart = actual._dataStart;
2792   int nextKeyIdx = getNextKey();
2793   while (true) {
2794     if ((nextKeyIdx < 0) ||
2795         (entries_[nextKeyIdx]._tokenstart >= actual._dataEnd) ||
2796         (entries_[nextKeyIdx].keytype == KeyInfo::invalid)) {
2797       if (oldStart <= end) {
2798         processRegion(oldStart, end);
2799         oldStart = end+1;
2800       }
2801       break;
2802     }
2803     KeyInfo &nextKey = getKeyInfo(nextKeyIdx);
2804
2805     if ((nextKey.keytype == KeyInfo::isMain) && !nextKey.disabled) {
2806       (void) dispatch(os, actual._dataStart, nextKey);
2807       end = nextKey._tokenstart;
2808       break;
2809     }
2810     processRegion(oldStart, nextKey._tokenstart);
2811     nextKeyIdx = dispatch(os, actual._dataStart, nextKey);
2812
2813     oldStart = nextKey._dataEnd+1;
2814   }
2815   // now nextKey is either invalid or is outside of actual._dataEnd
2816   // output the remaining and discard myself
2817   if (oldStart <= end) {
2818     processRegion(oldStart, end);
2819   }
2820   if (interval_.par.size() > (size_t) end && interval_.par[end] == '}') {
2821     end += 1;
2822     // This is the normal case.
2823     // But if using the firstlanguage, the closing may be missing
2824   }
2825   // get minimum of 'end' and  'actual._dataEnd' in case that the nextKey.keytype was 'KeyInfo::isMain'
2826   int output_end;
2827   if (actual._dataEnd < end)
2828     output_end = interval_.nextNotIgnored(actual._dataEnd);
2829   else if (interval_.par.size() > (size_t) end)
2830     output_end = interval_.nextNotIgnored(end);
2831   else
2832     output_end = interval_.par.size();
2833   if ((actual.keytype == KeyInfo::isMain) && actual.disabled) {
2834     interval_.addIntervall(actual._tokenstart, actual._tokenstart+actual._tokensize);
2835   }
2836   // Remove possible empty data
2837   int dstart = interval_.nextNotIgnored(actual._dataStart);
2838   while (interval_.isOpeningPar(dstart) == 1) {
2839     interval_.addIntervall(dstart, dstart+1);
2840     int dend = interval_.findclosing(dstart+1, output_end);
2841     interval_.addIntervall(dend, dend+1);
2842     dstart = interval_.nextNotIgnored(dstart+1);
2843   }
2844   if (dstart < output_end)
2845     interval_.output(os, output_end);
2846   if (nextKeyIdx < 0)
2847     interval_.addIntervall(0, end);
2848   else
2849     interval_.addIntervall(actual._tokenstart, end);
2850   return nextKeyIdx;
2851 }
2852
2853 string splitOnKnownMacros(string par, bool isPatternString)
2854 {
2855   ostringstream os;
2856   LatexInfo li(par, isPatternString);
2857   // LYXERR(Debug::INFO, "Berfore split: " << par);
2858   KeyInfo DummyKey = KeyInfo(KeyInfo::KeyType::isMain, 2, true);
2859   DummyKey.head = "";
2860   DummyKey._tokensize = 0;
2861   DummyKey._dataStart = 0;
2862   DummyKey._dataEnd = par.length();
2863   DummyKey.disabled = true;
2864   int firstkeyIdx = li.getFirstKey();
2865   string s;
2866   if (firstkeyIdx >= 0) {
2867     KeyInfo firstKey = li.getKeyInfo(firstkeyIdx);
2868     DummyKey._tokenstart = firstKey._tokenstart;
2869     int nextkeyIdx;
2870     if ((firstKey.keytype != KeyInfo::isMain) || firstKey.disabled) {
2871       // Use dummy firstKey
2872       firstKey = DummyKey;
2873       (void) li.setNextKey(firstkeyIdx);
2874     }
2875     else {
2876       if (par.substr(firstKey._dataStart, 2) == "% ")
2877         li.addIntervall(firstKey._dataStart, firstKey._dataStart+2);
2878     }
2879     nextkeyIdx = li.process(os, firstKey);
2880     while (nextkeyIdx >= 0) {
2881       // Check for a possible gap between the last
2882       // entry and this one
2883       int datastart = li.nextNotIgnored(firstKey._dataStart);
2884       KeyInfo &nextKey = li.getKeyInfo(nextkeyIdx);
2885       if ((nextKey._tokenstart > datastart)) {
2886         // Handle the gap
2887         firstKey._dataStart = datastart;
2888         firstKey._dataEnd = par.length();
2889         (void) li.setNextKey(nextkeyIdx);
2890         // Fake the last opened parenthesis
2891         li.setForDefaultLang(firstKey);
2892         nextkeyIdx = li.process(os, firstKey);
2893       }
2894       else {
2895         if (nextKey.keytype != KeyInfo::isMain) {
2896           firstKey._dataStart = datastart;
2897           firstKey._dataEnd = nextKey._dataEnd+1;
2898           (void) li.setNextKey(nextkeyIdx);
2899           li.setForDefaultLang(firstKey);
2900           nextkeyIdx = li.process(os, firstKey);
2901         }
2902         else {
2903           nextkeyIdx = li.process(os, nextKey);
2904         }
2905       }
2906     }
2907     // Handle the remaining
2908     firstKey._dataStart = li.nextNotIgnored(firstKey._dataStart);
2909     firstKey._dataEnd = par.length();
2910     // Check if ! empty
2911     if ((firstKey._dataStart < firstKey._dataEnd) &&
2912         (par[firstKey._dataStart] != '}')) {
2913       li.setForDefaultLang(firstKey);
2914       (void) li.process(os, firstKey);
2915     }
2916     s = os.str();
2917     if (s.empty()) {
2918       // return string definitelly impossible to match
2919       s = "\\foreignlanguage{ignore}{ }";
2920     }
2921   }
2922   else
2923     s = par;                            /* no known macros found */
2924   // LYXERR(Debug::INFO, "After split: " << s);
2925   return s;
2926 }
2927
2928 /*
2929  * Try to unify the language specs in the latexified text.
2930  * Resulting modified string is set to "", if
2931  * the searched tex does not contain all the features in the search pattern
2932  */
2933 static string correctlanguagesetting(string par, bool isPatternString, bool withformat, lyx::Buffer *pbuf = nullptr)
2934 {
2935         static Features regex_f;
2936         static int missed = 0;
2937         static bool regex_with_format = false;
2938
2939         int parlen = par.length();
2940
2941         while ((parlen > 0) && (par[parlen-1] == '\n')) {
2942                 parlen--;
2943         }
2944         if (isPatternString && (parlen > 0) && (par[parlen-1] == '~')) {
2945                 // Happens to be there in case of description or labeling environment
2946                 parlen--;
2947         }
2948         string result;
2949         if (withformat) {
2950                 // Split the latex input into pieces which
2951                 // can be digested by our search engine
2952                 LYXERR(Debug::FIND, "input: \"" << par << "\"");
2953                 if (isPatternString && (pbuf != nullptr)) { // Check if we should disable/enable test for language
2954                         // We check for polyglossia, because in runparams.flavor we use Flavor::XeTeX
2955                         string doclang = pbuf->params().language->polyglossia();
2956                         static regex langre("\\\\(foreignlanguage)\\{([^\\}]+)\\}");
2957                         smatch sub;
2958                         bool toIgnoreLang = true;
2959                         for (sregex_iterator it(par.begin(), par.end(), langre), end; it != end; ++it) {
2960                                 sub = *it;
2961                                 if (sub.str(2) != doclang) {
2962                                         toIgnoreLang = false;
2963                                         break;
2964                                 }
2965                         }
2966                         setIgnoreFormat("language", toIgnoreLang, false);
2967
2968                 }
2969                 result = splitOnKnownMacros(par.substr(0,parlen), isPatternString);
2970                 LYXERR(Debug::FIND, "After splitOnKnownMacros:\n\"" << result << "\"");
2971         }
2972         else
2973                 result = par.substr(0, parlen);
2974         if (isPatternString) {
2975                 missed = 0;
2976                 if (withformat) {
2977                         regex_f = identifyFeatures(result);
2978                         string features = "";
2979                         for (auto it = regex_f.cbegin(); it != regex_f.cend(); ++it) {
2980                                 string a = it->first;
2981                                 regex_with_format = true;
2982                                 features += " " + a;
2983                                 // LYXERR(Debug::INFO, "Identified regex format:" << a);
2984                         }
2985                         LYXERR(Debug::FIND, "Identified Features" << features);
2986
2987                 }
2988         } else if (regex_with_format) {
2989                 Features info = identifyFeatures(result);
2990                 for (auto it = regex_f.cbegin(); it != regex_f.cend(); ++it) {
2991                         string a = it->first;
2992                         bool b = it->second;
2993                         if (b && ! info[a]) {
2994                                 missed++;
2995                                 LYXERR(Debug::FIND, "Missed(" << missed << " " << a <<", srclen = " << parlen );
2996                                 return "";
2997                         }
2998                 }
2999
3000         }
3001         else {
3002                 // LYXERR(Debug::INFO, "No regex formats");
3003         }
3004         return result;
3005 }
3006
3007
3008 // Remove trailing closure of math, macros and environments, so to catch parts of them.
3009 static int identifyClosing(string & t)
3010 {
3011         int open_braces = 0;
3012         do {
3013                 LYXERR(Debug::FIND, "identifyClosing(): t now is '" << t << "'");
3014                 if (regex_replace(t, t, "(.*[^\\\\])\\$$", "$1"))
3015                         continue;
3016                 if (regex_replace(t, t, "(.*[^\\\\])\\\\\\]$", "$1"))
3017                         continue;
3018                 if (regex_replace(t, t, "(.*[^\\\\])\\\\end\\{[a-zA-Z_]*\\*?\\}$", "$1"))
3019                         continue;
3020                 if (regex_replace(t, t, "(.*[^\\\\])\\}$", "$1")) {
3021                         ++open_braces;
3022                         continue;
3023                 }
3024                 break;
3025         } while (true);
3026         return open_braces;
3027 }
3028
3029 static int num_replaced = 0;
3030 static bool previous_single_replace = true;
3031
3032 void MatchStringAdv::CreateRegexp(FindAndReplaceOptions const & opt, string regexp_str, string regexp2_str, string par_as_string)
3033 {
3034 #if QTSEARCH
3035         // Handle \w properly
3036         QRegularExpression::PatternOptions popts = QRegularExpression::UseUnicodePropertiesOption | QRegularExpression::MultilineOption;
3037         if (! opt.casesensitive) {
3038                 popts |= QRegularExpression::CaseInsensitiveOption;
3039         }
3040         regexp = QRegularExpression(QString::fromStdString(regexp_str), popts);
3041         regexp2 = QRegularExpression(QString::fromStdString(regexp2_str), popts);
3042         regexError = "";
3043         if (regexp.isValid() && regexp2.isValid()) {
3044                 regexIsValid = true;
3045                 // Check '{', '}' pairs inside the regex
3046                 int balanced = 0;
3047                 int skip = 1;
3048                 for (unsigned i = 0; i < par_as_string.size(); i+= skip) {
3049                         char c = par_as_string[i];
3050                         if (c == '\\') {
3051                                 skip = 2;
3052                                 continue;
3053                         }
3054                         if (c == '{')
3055                                 balanced++;
3056                         else if (c == '}') {
3057                                 balanced--;
3058                                 if (balanced < 0)
3059                                         break;
3060                                 }
3061                                 skip = 1;
3062                         }
3063                 if (balanced != 0) {
3064                         regexIsValid = false;
3065                         regexError = "Unbalanced curly brackets in regexp \"" + regexp_str + "\"";
3066                 }
3067         }
3068         else {
3069                 regexIsValid = false;
3070                 if (!regexp.isValid())
3071                         regexError += "Invalid regexp \"" + regexp_str + "\", error = " + regexp.errorString().toStdString();
3072                 else
3073                         regexError += "Invalid regexp2 \"" + regexp2_str + "\", error = " + regexp2.errorString().toStdString();
3074         }
3075 #else
3076         if (opt.casesensitive) {
3077                 regexp = regex(regexp_str);
3078                 regexp2 = regex(regexp2_str);
3079         }
3080         else {
3081                 regexp = regex(regexp_str, std::regex_constants::icase);
3082                 regexp2 = regex(regexp2_str, std::regex_constants::icase);
3083         }
3084 #endif
3085 }
3086
3087 static void modifyRegexForMatchWord(string &t)
3088 {
3089         string s("");
3090         regex wordre("(\\\\)*((\\.|\\\\b))");
3091         size_t lastpos = 0;
3092         smatch sub;
3093         for (sregex_iterator it(t.begin(), t.end(), wordre), end; it != end; ++it) {
3094                 sub = *it;
3095                 if ((sub.position(2) - sub.position(0)) % 2 == 1) {
3096                         continue;
3097                 }
3098                 else if (sub.str(2) == "\\\\b")
3099                         return;
3100                 if (lastpos < (size_t) sub.position(2))
3101                         s += t.substr(lastpos, sub.position(2) - lastpos);
3102                 s += "\\S";
3103                 lastpos = sub.position(2) + sub.length(2);
3104         }
3105         if (lastpos == 0) {
3106                 s = "\\b" + t + "\\b";
3107                 t = s;
3108                 return;
3109         }
3110         else if (lastpos < t.length())
3111                 s += t.substr(lastpos, t.length() - lastpos);
3112       t = "\\b" + s + "\\b";
3113 }
3114
3115 MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions & opt)
3116         : p_buf(&buf), p_first_buf(&buf), opt(opt)
3117 {
3118         Buffer & find_buf = *theBufferList().getBuffer(FileName(to_utf8(opt.find_buf_name)), true);
3119         docstring const & ds = stringifySearchBuffer(find_buf, opt);
3120         use_regexp = lyx::to_utf8(ds).find("\\regexp{") != std::string::npos;
3121         if (opt.replace_all && previous_single_replace) {
3122                 previous_single_replace = false;
3123                 num_replaced = 0;
3124         }
3125         else if (!opt.replace_all) {
3126                 num_replaced = 0;       // count number of replaced strings
3127                 previous_single_replace = true;
3128         }
3129         // When using regexp, braces are hacked already by escape_for_regex()
3130         par_as_string = normalize(ds);
3131         open_braces = 0;
3132         close_wildcards = 0;
3133
3134         size_t lead_size = 0;
3135         // correct the language settings
3136         par_as_string = correctlanguagesetting(par_as_string, true, !opt.ignoreformat, &buf);
3137         opt.matchAtStart = false;
3138         if (!use_regexp) {
3139                 identifyClosing(par_as_string); // Removes math closings ($, ], ...) at end of string
3140                 if (opt.ignoreformat) {
3141                         lead_size = 0;
3142                 }
3143                 else {
3144                         lead_size = identifyLeading(par_as_string);
3145                 }
3146                 lead_as_string = par_as_string.substr(0, lead_size);
3147                 string lead_as_regex_string = string2regex(lead_as_string);
3148                 par_as_string_nolead = par_as_string.substr(lead_size, par_as_string.size() - lead_size);
3149                 string par_as_regex_string_nolead = string2regex(par_as_string_nolead);
3150                 /* Handle whole words too in this case
3151                 */
3152                 if (opt.matchword) {
3153                         par_as_regex_string_nolead = "\\b" + par_as_regex_string_nolead + "\\b";
3154                         opt.matchword = false;
3155                 }
3156                 string regexp_str = "(" + lead_as_regex_string + ")()" + par_as_regex_string_nolead;
3157                 string regexp2_str = "(" + lead_as_regex_string + ")(.*?)" + par_as_regex_string_nolead;
3158                 CreateRegexp(opt, regexp_str, regexp2_str);
3159                 use_regexp = true;
3160                 LYXERR(Debug::FIND, "Setting regexp to : '" << regexp_str << "'");
3161                 LYXERR(Debug::FIND, "Setting regexp2 to: '" << regexp2_str << "'");
3162                 return;
3163         }
3164
3165         if (!opt.ignoreformat) {
3166                 lead_size = identifyLeading(par_as_string);
3167                 LYXERR(Debug::FIND, "Lead_size: " << lead_size);
3168                 lead_as_string = par_as_string.substr(0, lead_size);
3169                 par_as_string_nolead = par_as_string.substr(lead_size, par_as_string.size() - lead_size);
3170         }
3171
3172         // Here we are using regexp
3173         LASSERT(use_regexp, /**/);
3174         {
3175                 string lead_as_regexp;
3176                 if (lead_size > 0) {
3177                         lead_as_regexp = string2regex(par_as_string.substr(0, lead_size));
3178                         regex_replace(par_as_string_nolead, par_as_string_nolead, "}$", "");
3179                         par_as_string = par_as_string_nolead;
3180                         LYXERR(Debug::FIND, "lead_as_regexp is '" << lead_as_regexp << "'");
3181                         LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'");
3182                 }
3183                 // LYXERR(Debug::FIND, "par_as_string before escape_for_regex() is '" << par_as_string << "'");
3184                 par_as_string = escape_for_regex(par_as_string, !opt.ignoreformat);
3185                 // Insert (.*?) before trailing closure of math, macros and environments, so to catch parts of them.
3186                 // LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'");
3187                 ++close_wildcards;
3188                 size_t lng = par_as_string.size();
3189                 if (!opt.ignoreformat) {
3190                         // Remove extra '\}' at end if not part of \{\.\}
3191                         while(lng > 2) {
3192                                 if (par_as_string.substr(lng-2, 2).compare("\\}") == 0) {
3193                                         if (lng >= 6) {
3194                                                 if (par_as_string.substr(lng-6,3).compare("\\{\\") == 0)
3195                                                         break;
3196                                         }
3197                                         lng -= 2;
3198                                         open_braces++;
3199                                 }
3200                                 else
3201                                         break;
3202                         }
3203                         if (lng < par_as_string.size())
3204                                 par_as_string = par_as_string.substr(0,lng);
3205                 }
3206                 LYXERR(Debug::FIND, "par_as_string after correctRegex is '" << par_as_string << "'");
3207                 if ((lng > 0) && (par_as_string[0] == '^')) {
3208                         par_as_string = par_as_string.substr(1);
3209                         --lng;
3210                         opt.matchAtStart = true;
3211                 }
3212                 // LYXERR(Debug::FIND, "par_as_string now is '" << par_as_string << "'");
3213                 // LYXERR(Debug::FIND, "Open braces: " << open_braces);
3214                 // LYXERR(Debug::FIND, "Replaced text (to be used as regex): " << par_as_string);
3215
3216                 // If entered regexp must match at begin of searched string buffer
3217                 // Kornel: Added parentheses to use $1 for size of the leading string
3218                 string regexp_str;
3219                 string regexp2_str;
3220                 {
3221                         // TODO: Adapt '\[12345678]' in par_as_string to acount for the first '()
3222                         // Unfortunately is '\1', '\2', etc not working for strings with extra format
3223                         // so the convert has no effect in that case
3224                         for (int i = 7; i > 0; --i) {
3225                                 string orig = "\\\\" + std::to_string(i);
3226                                 string dest = "\\" + std::to_string(i+2);
3227                                 while (regex_replace(par_as_string, par_as_string, orig, dest));
3228                         }
3229                         if (opt.matchword) {
3230                                 modifyRegexForMatchWord(par_as_string);
3231                                 opt.matchword = false;
3232                         }
3233                         regexp_str = "(" + lead_as_regexp + ")()" + par_as_string;
3234                         regexp2_str = "(" + lead_as_regexp + ")(.*?)" + par_as_string;
3235                 }
3236                 LYXERR(Debug::FIND, "Setting regexp to : '" << regexp_str << "'");
3237                 LYXERR(Debug::FIND, "Setting regexp2 to: '" << regexp2_str << "'");
3238                 CreateRegexp(opt, regexp_str, regexp2_str, par_as_string);
3239         }
3240 }
3241
3242 MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_begin) const
3243 {
3244         MatchResult mres;
3245
3246         mres.searched_size = len;
3247         if (at_begin &&
3248                 (opt.restr == FindAndReplaceOptions::R_ONLY_MATHS && !cur.inMathed()) )
3249                 return mres;
3250
3251         docstring docstr = stringifyFromForSearch(opt, cur, len);
3252         string str;
3253         str = normalize(docstr);
3254         if (!opt.ignoreformat) {
3255                 str = correctlanguagesetting(str, false, !opt.ignoreformat);
3256                 // remove closing '}' and '\n' to allow for use of '$' in regex
3257                 size_t lng = str.size();
3258                 while ((lng > 1) && ((str[lng -1] == '}') || (str[lng -1] == '\n')))
3259                         lng--;
3260                 if (lng != str.size()) {
3261                         str = str.substr(0, lng);
3262                 }
3263         }
3264         if (str.empty()) {
3265                 mres.match_len = -1;
3266                 return mres;
3267         }
3268         LYXERR(Debug::FIND, "After normalization: Matching against:\n'" << str << "'");
3269
3270         LASSERT(use_regexp, /**/);
3271         {
3272                 // use_regexp always true
3273                 LYXERR(Debug::FIND, "Searching in regexp mode: at_begin=" << at_begin);
3274 #if QTSEARCH
3275                 QString qstr = QString::fromStdString(str);
3276                 QRegularExpression const *p_regexp;
3277                 QRegularExpression::MatchType flags = QRegularExpression::NormalMatch;
3278                 if (at_begin) {
3279                         p_regexp = &regexp;
3280                 } else {
3281                         p_regexp = &regexp2;
3282                 }
3283                 QRegularExpressionMatch match = p_regexp->match(qstr, 0, flags);
3284                 if (!match.hasMatch())
3285                         return mres;
3286 #else
3287                 regex const *p_regexp;
3288                 regex_constants::match_flag_type flags;
3289                 if (at_begin) {
3290                         flags = regex_constants::match_continuous;
3291                         p_regexp = &regexp;
3292                 } else {
3293                         flags = regex_constants::match_default;
3294                         p_regexp = &regexp2;
3295                 }
3296                 sregex_iterator re_it(str.begin(), str.end(), *p_regexp, flags);
3297                 if (re_it == sregex_iterator())
3298                         return mres;
3299                 match_results<string::const_iterator> const & m = *re_it;
3300 #endif
3301                 // Whole found string, including the leading
3302                 // std: m[0].second - m[0].first
3303                 // Qt: match.capturedEnd(0) - match.capturedStart(0)
3304                 //
3305                 // Size of the leading string
3306                 // std: m[1].second - m[1].first
3307                 // Qt: match.capturedEnd(1) - match.capturedStart(1)
3308                 int leadingsize = 0;
3309 #if QTSEARCH
3310                 if (match.lastCapturedIndex() > 0) {
3311                         leadingsize = match.capturedEnd(1) - match.capturedStart(1);
3312                 }
3313
3314 #else
3315                 if (m.size() > 2) {
3316                         leadingsize = m[1].second - m[1].first;
3317                 }
3318 #endif
3319 #if QTSEARCH
3320                 mres.match_prefix = match.capturedEnd(2) - match.capturedStart(2);
3321                 mres.match_len = match.capturedEnd(0) - match.capturedEnd(2);
3322                 // because of different number of closing at end of string
3323                 // we have to 'unify' the length of the post-match.
3324                 // Done by ignoring closing parenthesis and linefeeds at string end
3325                 int matchend = match.capturedEnd(0);
3326                 size_t strsize = qstr.size();
3327                 if (!opt.ignoreformat) {
3328                         while (mres.match_len > 0) {
3329                                 QChar c = qstr.at(matchend - 1);
3330                                 if ((c == '\n') || (c == '}') || (c == '{')) {
3331                                         mres.match_len--;
3332                                         matchend--;
3333                                 }
3334                                 else
3335                                         break;
3336                         }
3337                         while (strsize > (size_t) match.capturedEnd(0)) {
3338                                 QChar c = qstr.at(strsize-1);
3339                                 if ((c == '\n') || (c == '}')) {
3340                                         --strsize;
3341                                 }
3342                                 else
3343                                         break;
3344                         }
3345                 }
3346                 // LYXERR0(qstr.toStdString());
3347                 mres.match2end = strsize - matchend;
3348                 mres.pos = match.capturedStart(2);
3349 #else
3350                 mres.match_prefix = m[2].second - m[2].first;
3351                 mres.match_len = m[0].second - m[2].second;
3352                 // ignore closing parenthesis and linefeeds at string end
3353                 size_t strend = m[0].second - m[0].first;
3354                 int matchend = strend;
3355                 size_t strsize = str.size();
3356                 if (!opt.ignoreformat) {
3357                         while (mres.match_len > 0) {
3358                                 char c = str.at(matchend - 1);
3359                                 if ((c == '\n') || (c == '}') || (c == '{')) {
3360                                         mres.match_len--;
3361                                         matchend--;
3362                                 }
3363                                 else
3364                                         break;
3365                         }
3366                         while (strsize > strend) {
3367                                 if ((str.at(strsize-1) == '}') || (str.at(strsize-1) == '\n')) {
3368                                         --strsize;
3369                                 }
3370                                 else
3371                                         break;
3372                         }
3373                 }
3374                 // LYXERR0(str);
3375                 mres.match2end = strsize - matchend;
3376                 mres.pos = m[2].first - m[0].first;;
3377 #endif
3378                 if (mres.match2end < 0)
3379                   mres.match_len = 0;
3380                 mres.leadsize = leadingsize;
3381 #if QTSEARCH
3382                 if (mres.match_len > 0) {
3383                   string a0 = match.captured(0).mid(mres.pos + mres.match_prefix, mres.match_len).toStdString();
3384                   mres.result.push_back(a0);
3385                   for (int i = 3; i <= match.lastCapturedIndex(); i++) {
3386                     mres.result.push_back(match.captured(i).toStdString());
3387                   }
3388                 }
3389 #else
3390                 if (mres.match_len > 0) {
3391                   string a0 = m[0].str().substr(mres.pos + mres.match_prefix, mres.match_len);
3392                   mres.result.push_back(a0);
3393                   for (size_t i = 3; i < m.size(); i++) {
3394                     mres.result.push_back(m[i]);
3395                   }
3396                 }
3397 #endif
3398                 return mres;
3399         }
3400 }
3401
3402
3403 MatchResult MatchStringAdv::operator()(DocIterator const & cur, int len, bool at_begin) const
3404 {
3405         MatchResult mres = findAux(cur, len, at_begin);
3406         int res = mres.match_len;
3407         LYXERR(Debug::FIND,
3408                "res=" << res << ", at_begin=" << at_begin
3409                << ", matchAtStart=" << opt.matchAtStart
3410                << ", inTexted=" << cur.inTexted());
3411         if (opt.matchAtStart) {
3412                 if (cur.pos() != 0)
3413                         mres.match_len = 0;
3414                 else if (mres.match_prefix > 0)
3415                         mres.match_len = 0;
3416                 return mres;
3417         }
3418         else
3419                 return mres;
3420 }
3421
3422 #if 0
3423 static bool simple_replace(string &t, string from, string to)
3424 {
3425   regex repl("(\\\\)*(" + from + ")");
3426   string s("");
3427   size_t lastpos = 0;
3428   smatch sub;
3429   for (sregex_iterator it(t.begin(), t.end(), repl), end; it != end; ++it) {
3430     sub = *it;
3431     if ((sub.position(2) - sub.position(0)) % 2 == 1)
3432       continue;
3433     if (lastpos < (size_t) sub.position(2))
3434       s += t.substr(lastpos, sub.position(2) - lastpos);
3435     s += to;
3436     lastpos = sub.position(2) + sub.length(2);
3437   }
3438   if (lastpos == 0)
3439     return false;
3440   else if (lastpos < t.length())
3441     s += t.substr(lastpos, t.length() - lastpos);
3442   t = s;
3443   return true;
3444 }
3445 #endif
3446
3447 string MatchStringAdv::normalize(docstring const & s) const
3448 {
3449         string t;
3450         t = lyx::to_utf8(s);
3451         // Remove \n at begin
3452         while (!t.empty() && t[0] == '\n')
3453                 t = t.substr(1);
3454         // Remove \n at end
3455         while (!t.empty() && t[t.size() - 1] == '\n')
3456                 t = t.substr(0, t.size() - 1);
3457         size_t pos;
3458         // Handle all other '\n'
3459         while ((pos = t.find("\n")) != string::npos) {
3460                 if (pos > 1 && t[pos-1] == '\\' && t[pos-2] == '\\' ) {
3461                         // Handle '\\\n'
3462                         if (isAlnumASCII(t[pos+1])) {
3463                                 t.replace(pos-2, 3, " ");
3464                         }
3465                         else {
3466                                 t.replace(pos-2, 3, "");
3467                         }
3468                 }
3469                 else if (!isAlnumASCII(t[pos+1]) || !isAlnumASCII(t[pos-1])) {
3470                         // '\n' adjacent to non-alpha-numerics, discard
3471                         t.replace(pos, 1, "");
3472                 }
3473                 else {
3474                         // Replace all other \n with spaces
3475                         t.replace(pos, 1, " ");
3476                 }
3477         }
3478         // Remove stale empty \emph{}, \textbf{} and similar blocks from latexify
3479         // Kornel: Added textsl, textsf, textit, texttt and noun
3480         // + allow to seach for colored text too
3481         LYXERR(Debug::FIND, "Removing stale empty macros from: " << t);
3482         while (regex_replace(t, t, "\\\\(emph|noun|text(bf|sl|sf|it|tt)|(u|uu)line|(s|x)out|uwave)(\\{(\\{\\})?\\})+", ""))
3483                 LYXERR(Debug::FIND, "  further removing stale empty \\emph{}, \\textbf{} macros from: " << t);
3484         while (regex_replace(t, t, "\\\\((sub)?(((sub)?section)|paragraph)|part)\\*?(\\{(\\{\\})?\\})+", ""))
3485                 LYXERR(Debug::FIND, "  further removing stale empty \\emph{}, \\textbf{} macros from: " << t);
3486         while (regex_replace(t, t, "\\\\(foreignlanguage|textcolor|item)\\{[a-z]+\\}(\\{(\\{\\})?\\})+", ""));
3487
3488         return t;
3489 }
3490
3491
3492 docstring stringifyFromCursor(DocIterator const & cur, int len)
3493 {
3494         LYXERR(Debug::FIND, "Stringifying with len=" << len << " from cursor at pos: " << cur);
3495         if (cur.inTexted()) {
3496                 Paragraph const & par = cur.paragraph();
3497                 // TODO what about searching beyond/across paragraph breaks ?
3498                 // TODO Try adding a AS_STR_INSERTS as last arg
3499                 pos_type end = ( len == -1 || cur.pos() + len > int(par.size()) ) ?
3500                         int(par.size()) : cur.pos() + len;
3501                 // OutputParams runparams(&cur.buffer()->params().encoding());
3502                 OutputParams runparams(encodings.fromLyXName("utf8"));
3503                 runparams.nice = true;
3504                 runparams.flavor = Flavor::XeTeX;
3505                 runparams.linelen = 10000; //lyxrc.plaintext_linelen;
3506                 // No side effect of file copying and image conversion
3507                 runparams.dryrun = true;
3508                 int option = AS_STR_INSETS | AS_STR_PLAINTEXT;
3509                 if (ignoreFormats.getDeleted()) {
3510                         option |= AS_STR_SKIPDELETE;
3511                         runparams.for_searchAdv = OutputParams::SearchWithoutDeleted;
3512                 }
3513                 else {
3514                         runparams.for_searchAdv = OutputParams::SearchWithDeleted;
3515                 }
3516                 LYXERR(Debug::FIND, "Stringifying with cur: "
3517                        << cur << ", from pos: " << cur.pos() << ", end: " << end);
3518                 return par.asString(cur.pos(), end,
3519                         option,
3520                         &runparams);
3521         } else if (cur.inMathed()) {
3522                 CursorSlice cs = cur.top();
3523                 MathData md = cs.cell();
3524                 MathData::const_iterator it_end =
3525                         (( len == -1 || cs.pos() + len > int(md.size()))
3526                          ? md.end()
3527                          : md.begin() + cs.pos() + len );
3528                 MathData md2;
3529                 for (MathData::const_iterator it = md.begin() + cs.pos();
3530                      it != it_end; ++it)
3531                         md2.push_back(*it);
3532                 docstring s = asString(md2);
3533                 LYXERR(Debug::FIND, "Stringified math: '" << s << "'");
3534                 return s;
3535         }
3536         LYXERR(Debug::FIND, "Don't know how to stringify from here: " << cur);
3537         return docstring();
3538 }
3539
3540
3541 /** Computes the LaTeX export of buf starting from cur and ending len positions
3542  * after cur, if len is positive, or at the paragraph or innermost inset end
3543  * if len is -1.
3544  */
3545 docstring latexifyFromCursor(DocIterator const & cur, int len)
3546 {
3547         /*
3548         LYXERR(Debug::FIND, "Latexifying with len=" << len << " from cursor at pos: " << cur);
3549         LYXERR(Debug::FIND, "  with cur.lastpost=" << cur.lastpos() << ", cur.lastrow="
3550                << cur.lastrow() << ", cur.lastcol=" << cur.lastcol());
3551         */
3552         Buffer const & buf = *cur.buffer();
3553
3554         odocstringstream ods;
3555         otexstream os(ods);
3556         //OutputParams runparams(&buf.params().encoding());
3557         OutputParams runparams(encodings.fromLyXName("utf8"));
3558         runparams.nice = false;
3559         runparams.flavor = Flavor::XeTeX;
3560         runparams.linelen = 8000; //lyxrc.plaintext_linelen;
3561         // No side effect of file copying and image conversion
3562         runparams.dryrun = true;
3563         if (ignoreFormats.getDeleted()) {
3564                 runparams.for_searchAdv = OutputParams::SearchWithoutDeleted;
3565         }
3566         else {
3567                 runparams.for_searchAdv = OutputParams::SearchWithDeleted;
3568         }
3569
3570         if (cur.inTexted()) {
3571                 // @TODO what about searching beyond/across paragraph breaks ?
3572                 pos_type endpos = cur.paragraph().size();
3573                 if (len != -1 && endpos > cur.pos() + len)
3574                         endpos = cur.pos() + len;
3575                 TeXOnePar(buf, *cur.innerText(), cur.pit(), os, runparams,
3576                           string(), cur.pos(), endpos);
3577                 string s = lyx::to_utf8(ods.str());
3578                 LYXERR(Debug::FIND, "Latexified +modified text: '" << s << "'");
3579                 return(lyx::from_utf8(s));
3580         } else if (cur.inMathed()) {
3581                 // Retrieve the math environment type, and add '$' or '$[' or others (\begin{equation}) accordingly
3582                 for (int s = cur.depth() - 1; s >= 0; --s) {
3583                         CursorSlice const & cs = cur[s];
3584                         if (cs.asInsetMath() && cs.asInsetMath()->asHullInset()) {
3585                                 TeXMathStream ws(os);
3586                                 cs.asInsetMath()->asHullInset()->header_write(ws);
3587                                 break;
3588                         }
3589                 }
3590
3591                 CursorSlice const & cs = cur.top();
3592                 MathData md = cs.cell();
3593                 MathData::const_iterator it_end =
3594                         ((len == -1 || cs.pos() + len > int(md.size()))
3595                          ? md.end()
3596                          : md.begin() + cs.pos() + len);
3597                 MathData md2;
3598                 for (MathData::const_iterator it = md.begin() + cs.pos();
3599                      it != it_end; ++it)
3600                         md2.push_back(*it);
3601
3602                 ods << asString(md2);
3603                 // Retrieve the math environment type, and add '$' or '$]'
3604                 // or others (\end{equation}) accordingly
3605                 for (int s = cur.depth() - 1; s >= 0; --s) {
3606                         CursorSlice const & cs2 = cur[s];
3607                         InsetMath * inset = cs2.asInsetMath();
3608                         if (inset && inset->asHullInset()) {
3609                                 TeXMathStream ws(os);
3610                                 inset->asHullInset()->footer_write(ws);
3611                                 break;
3612                         }
3613                 }
3614                 LYXERR(Debug::FIND, "Latexified math: '" << lyx::to_utf8(ods.str()) << "'");
3615         } else {
3616                 LYXERR(Debug::FIND, "Don't know how to stringify from here: " << cur);
3617         }
3618         return ods.str();
3619 }
3620
3621 #if defined(ResultsDebug)
3622 // Debugging output
3623 static void displayMResult(MatchResult &mres, string from, DocIterator & cur)
3624 {
3625         LYXERR0( "from:\t\t\t" << from);
3626         string status;
3627         if (mres.pos_len > 0) {
3628                 // Set in finalize
3629                 status = "FINALSEARCH";
3630         }
3631         else {
3632                 if (mres.match_len > 0) {
3633                         if ((mres.match_prefix == 0) && (mres.pos == mres.leadsize))
3634                                 status = "Good Match";
3635                         else
3636                                 status = "Matched in";
3637                 }
3638                 else
3639                         status = "MissedSearch";
3640         }
3641
3642         LYXERR0( status << "(" << cur.pos() << " ... " << mres.searched_size + cur.pos() << ") cur.lastpos(" << cur.lastpos() << ")");
3643         if ((mres.leadsize > 0) || (mres.match_len > 0) || (mres.match2end > 0))
3644                 LYXERR0( "leadsize(" << mres.leadsize << ") match_len(" << mres.match_len << ") match2end(" << mres.match2end << ")");
3645         if ((mres.pos > 0) || (mres.match_prefix > 0))
3646                 LYXERR0( "pos(" << mres.pos << ") match_prefix(" << mres.match_prefix << ")");
3647         for (size_t i = 0; i < mres.result.size(); i++)
3648                 LYXERR0( "Match " << i << " = \"" << mres.result[i] << "\"");
3649 }
3650         #define displayMres(s, txt, cur) displayMResult(s, txt, cur);
3651 #else
3652         #define displayMres(s, txt, cur)
3653 #endif
3654
3655 /** Finalize an advanced find operation, advancing the cursor to the innermost
3656  ** position that matches, plus computing the length of the matching text to
3657  ** be selected
3658  ** Return the cur.pos() difference between start and end of found match
3659  **/
3660 MatchResult findAdvFinalize(DocIterator & cur, MatchStringAdv const & match, MatchResult const & expected = MatchResult(-1))
3661 {
3662         // Search the foremost position that matches (avoids find of entire math
3663         // inset when match at start of it)
3664         DocIterator old_cur(cur.buffer());
3665         MatchResult mres;
3666         static MatchResult fail = MatchResult();
3667         MatchResult max_match;
3668         // If (prefix_len > 0) means that forwarding 1 position will remove the complete entry
3669         // Happens with e.g. hyperlinks
3670         // either one sees "http://www.bla.bla" or nothing
3671         // so the search for "www" gives prefix_len = 7 (== sizeof("http://")
3672         // and although we search for only 3 chars, we find the whole hyperlink inset
3673         bool at_begin = (expected.match_prefix == 0);
3674         if (!match.opt.forward && match.opt.ignoreformat) {
3675                 if (expected.pos > 0)
3676                         return fail;
3677         }
3678         LASSERT(at_begin, /**/);
3679         if (expected.match_len > 0 && at_begin) {
3680                 // Search for deepest match
3681                 old_cur = cur;
3682                 max_match = expected;
3683                 do {
3684                         size_t d = cur.depth();
3685                         cur.forwardPos();
3686                         if (!cur)
3687                                 break;
3688                         if (cur.depth() < d)
3689                                 break;
3690                         if (cur.depth() == d)
3691                                 break;
3692                         size_t lastd = d;
3693                         while (cur && cur.depth() > lastd) {
3694                                 lastd = cur.depth();
3695                                 mres = match(cur, -1, at_begin);
3696                                 displayMres(mres, "Checking innermost", cur);
3697                                 if (mres.match_len > 0)
3698                                         break;
3699                                 // maybe deeper?
3700                                 cur.forwardPos();
3701                         }
3702                         if (mres.match_len < expected.match_len)
3703                                 break;
3704                         max_match = mres;
3705                         old_cur = cur;;
3706                 } while(1);
3707                 cur = old_cur;
3708         }
3709         else {
3710                 // (expected.match_len <= 0)
3711                 mres = match(cur);      /* match valid only if not searching whole words */
3712                 displayMres(mres, "Start with negative match", cur);
3713                 max_match = mres;
3714         }
3715         if (max_match.match_len <= 0) return fail;
3716         LYXERR(Debug::FIND, "Ok");
3717
3718         // Compute the match length
3719         int len = 1;
3720         if (cur.pos() + len > cur.lastpos())
3721           return fail;
3722
3723         LASSERT(match.use_regexp, /**/);
3724         {
3725           int minl = 1;
3726           int maxl = cur.lastpos() - cur.pos();
3727           // Greedy behaviour while matching regexps
3728           while (maxl > minl) {
3729             MatchResult mres2;
3730             mres2 = match(cur, len, at_begin);
3731             displayMres(mres2, "Finalize loop", cur);
3732             int actual_match_len = mres2.match_len;
3733             if (actual_match_len >= max_match.match_len) {
3734               // actual_match_len > max_match _can_ happen,
3735               // if the search area splits
3736               // some following word so that the regex
3737               // (e.g. 'r.*r\b' matches 'r' from the middle of the
3738               // splitted word)
3739               // This means, the len value is too big
3740               actual_match_len = max_match.match_len;
3741               max_match = mres2;
3742               max_match.match_len = actual_match_len;
3743               maxl = len;
3744               if (maxl - minl < 4)
3745                 len = (int)((maxl + minl)/2);
3746               else
3747                 len = (int)(minl + (maxl - minl + 3)/4);
3748             }
3749             else {
3750               // (actual_match_len < max_match.match_len)
3751               minl = len + 1;
3752               len = (int)((maxl + minl)/2);
3753             }
3754           }
3755           len = minl;
3756           old_cur = cur;
3757           // Search for real start of matched characters
3758           while (len > 1) {
3759             MatchResult actual_match;
3760             do {
3761               cur.forwardPos();
3762             } while (cur.depth() > old_cur.depth()); /* Skip inner insets */
3763             if (cur.depth() < old_cur.depth()) {
3764               // Outer inset?
3765               LYXERR(Debug::INFO, "cur.depth() < old_cur.depth(), this should never happen");
3766               break;
3767             }
3768             if (cur.pos() != old_cur.pos()) {
3769               // OK, forwarded 1 pos in actual inset
3770               actual_match = match(cur, len-1, at_begin);
3771               if (actual_match.match_len == max_match.match_len) {
3772                 // Ha, got it! The shorter selection has the same match length
3773                 len--;
3774                 old_cur = cur;
3775                 max_match = actual_match;
3776               }
3777               else {
3778                 // OK, the shorter selection matches less chars, revert to previous value
3779                 cur = old_cur;
3780                 break;
3781               }
3782             }
3783             else {
3784               LYXERR(Debug::INFO, "cur.pos() == old_cur.pos(), this should never happen");
3785               actual_match = match(cur, len, at_begin);
3786               if (actual_match.match_len == max_match.match_len) {
3787                 old_cur = cur;
3788                 max_match = actual_match;
3789               }
3790             }
3791           }
3792           if (len == 0)
3793             return fail;
3794           else {
3795             max_match.pos_len = len;
3796             displayMres(max_match, "SEARCH RESULT", cur)
3797             return max_match;
3798           }
3799         }
3800 }
3801
3802 /// Finds forward
3803 int findForwardAdv(DocIterator & cur, MatchStringAdv & match)
3804 {
3805         if (!cur)
3806                 return 0;
3807         bool repeat = false;
3808         DocIterator orig_cur;   // to be used if repeat not successful
3809         MatchResult orig_mres;
3810         while (!theApp()->longOperationCancelled() && cur) {
3811                 //(void) findAdvForwardInnermost(cur);
3812                 LYXERR(Debug::FIND, "findForwardAdv() cur: " << cur);
3813                 MatchResult mres = match(cur, -1, false);
3814                 string msg = "Starting";
3815                 if (repeat)
3816                         msg = "Repeated";
3817                 displayMres(mres, msg + " findForwardAdv", cur)
3818                 int match_len = mres.match_len;
3819                 if ((mres.pos > 100000) || (mres.match2end > 100000) || (match_len > 100000)) {
3820                         LYXERR(Debug::INFO, "BIG LENGTHS: " << mres.pos << ", " << match_len << ", " << mres.match2end);
3821                         match_len = 0;
3822                 }
3823                 if (match_len <= 0) {
3824                         // This should exit nested insets, if any, or otherwise undefine the currsor.
3825                         cur.pos() = cur.lastpos();
3826                         LYXERR(Debug::FIND, "Advancing pos: cur=" << cur);
3827                         cur.forwardPos();
3828                 }
3829                 else {  // match_len > 0
3830                         // Try to find the begin of searched string
3831                         int increment;
3832                         int firstInvalid = cur.lastpos() - cur.pos();
3833                         {
3834                                 int incrmatch = (mres.match_prefix + mres.pos - mres.leadsize + 1)*3/4;
3835                                 int incrcur = (firstInvalid + 1 )*3/4;
3836                                 if (incrcur < incrmatch)
3837                                         increment = incrcur;
3838                                 else
3839                                         increment = incrmatch;
3840                                 if (increment < 1)
3841                                         increment = 1;
3842                         }
3843                         LYXERR(Debug::FIND, "Set increment to " << increment);
3844                         while (increment > 0) {
3845                                 DocIterator old_cur = cur;
3846                                 if (cur.pos() + increment >= cur.lastpos()) {
3847                                         increment /= 2;
3848                                         continue;
3849                                 }
3850                                 cur.pos() = cur.pos() + increment;
3851                                 MatchResult mres2 = match(cur, -1, false);
3852                                 displayMres(mres2, "findForwardAdv loop", cur)
3853                                 switch (interpretMatch(mres, mres2)) {
3854                                         case MatchResult::newIsTooFar:
3855                                                 // behind the expected match
3856                                                 firstInvalid = increment;
3857                                                 cur = old_cur;
3858                                                 increment /= 2;
3859                                                 break;
3860                                         case MatchResult::newIsBetter:
3861                                                 // not reached yet, but cur.pos()+increment is bettert
3862                                                 mres = mres2;
3863                                                 firstInvalid -= increment;
3864                                                 if (increment > firstInvalid*3/4)
3865                                                         increment = firstInvalid*3/4;
3866                                                 if ((mres2.pos == mres2.leadsize) && (increment >= mres2.match_prefix)) {
3867                                                         if (increment >= mres2.match_prefix)
3868                                                                 increment = (mres2.match_prefix+1)*3/4;
3869                                                 }
3870                                                 break;
3871                                         default:
3872                                                 // Todo@
3873                                                 // Handle not like MatchResult::newIsTooFar
3874                                                 LYXERR0( "Probably too far: Increment = " << increment << " match_prefix = " << mres.match_prefix);
3875                                                 firstInvalid--;
3876                                                 increment = increment*3/4;
3877                                                 cur = old_cur;
3878                                         break;
3879                                 }
3880                         }
3881                         if (mres.match_len > 0) {
3882                                 if (mres.match_prefix + mres.pos - mres.leadsize > 0) {
3883                                         // The match seems to indicate some deeper level
3884                                         repeat = true;
3885                                         orig_cur = cur;
3886                                         orig_mres = mres;
3887                                         cur.forwardPos();
3888                                         continue;
3889                                 }
3890                         }
3891                         else if (repeat) {
3892                                 // should never be reached.
3893                                 cur = orig_cur;
3894                                 mres = orig_mres;
3895                         }
3896                         // LYXERR0("Leaving first loop");
3897                         LYXERR(Debug::FIND, "Finalizing 1");
3898                         MatchResult found_match = findAdvFinalize(cur, match, mres);
3899                         if (found_match.match_len > 0) {
3900                                 LASSERT(found_match.pos_len > 0, /**/);
3901                                 match.FillResults(found_match);
3902                                 return found_match.pos_len;
3903                         }
3904                         else {
3905                                 // try next possible match
3906                                 cur.forwardPos();
3907                                 repeat = false;
3908                                 continue;
3909                         }
3910                 }
3911         }
3912         return 0;
3913 }
3914
3915
3916 /// Find the most backward consecutive match within same paragraph while searching backwards.
3917 MatchResult findMostBackwards(DocIterator & cur, MatchStringAdv const & match, MatchResult &expected)
3918 {
3919         DocIterator cur_begin = cur;
3920         cur_begin.pos() = 0;
3921         DocIterator tmp_cur = cur;
3922         MatchResult mr = findAdvFinalize(tmp_cur, match, expected);
3923         Inset & inset = cur.inset();
3924         for (; cur != cur_begin; cur.backwardPos()) {
3925                 LYXERR(Debug::FIND, "findMostBackwards(): cur=" << cur);
3926                 DocIterator new_cur = cur;
3927                 new_cur.backwardPos();
3928                 if (new_cur == cur || &new_cur.inset() != &inset || !match(new_cur).match_len)
3929                         break;
3930                 MatchResult new_mr = findAdvFinalize(new_cur, match, expected);
3931                 if (new_mr.match_len == mr.match_len)
3932                         break;
3933                 mr = new_mr;
3934         }
3935         LYXERR(Debug::FIND, "findMostBackwards(): exiting with cur=" << cur);
3936         return mr;
3937 }
3938
3939
3940 /// Finds backwards
3941 int findBackwardsAdv(DocIterator & cur, MatchStringAdv & match)
3942 {
3943         if (! cur)
3944                 return 0;
3945         // Backup of original position
3946         DocIterator cur_begin = doc_iterator_begin(cur.buffer());
3947         if (cur == cur_begin)
3948                 return 0;
3949         cur.backwardPos();
3950         DocIterator cur_orig(cur);
3951         bool pit_changed = false;
3952         do {
3953                 cur.pos() = 0;
3954                 MatchResult found_match = match(cur, -1, false);
3955
3956                 if (found_match.match_len > 0) {
3957                         if (pit_changed)
3958                                 cur.pos() = cur.lastpos();
3959                         else
3960                                 cur.pos() = cur_orig.pos();
3961                         LYXERR(Debug::FIND, "findBackAdv2: cur: " << cur);
3962                         DocIterator cur_prev_iter;
3963                         do {
3964                                 found_match = match(cur);
3965                                 LYXERR(Debug::FIND, "findBackAdv3: found_match="
3966                                        << (found_match.match_len > 0) << ", cur: " << cur);
3967                                 if (found_match.match_len > 0) {
3968                                         MatchResult found_mr = findMostBackwards(cur, match, found_match);
3969                                         if (found_mr.pos_len > 0) {
3970                                                 match.FillResults(found_mr);
3971                                                 return found_mr.pos_len;
3972                                         }
3973                                 }
3974
3975                                 // Stop if begin of document reached
3976                                 if (cur == cur_begin)
3977                                         break;
3978                                 cur_prev_iter = cur;
3979                                 cur.backwardPos();
3980                         } while (true);
3981                 }
3982                 if (cur == cur_begin)
3983                         break;
3984                 if (cur.pit() > 0)
3985                         --cur.pit();
3986                 else
3987                         cur.backwardPos();
3988                 pit_changed = true;
3989         } while (!theApp()->longOperationCancelled());
3990         return 0;
3991 }
3992
3993
3994 } // namespace
3995
3996
3997 docstring stringifyFromForSearch(FindAndReplaceOptions const & opt,
3998                                  DocIterator const & cur, int len)
3999 {
4000         if (cur.pos() < 0 || cur.pos() > cur.lastpos())
4001                 return docstring();
4002         if (!opt.ignoreformat)
4003                 return latexifyFromCursor(cur, len);
4004         else
4005                 return stringifyFromCursor(cur, len);
4006 }
4007
4008
4009 FindAndReplaceOptions::FindAndReplaceOptions(
4010         docstring const & _find_buf_name, bool _casesensitive,
4011         bool _matchword, bool _forward, bool _expandmacros, bool _ignoreformat,
4012         docstring const & _repl_buf_name, bool _keep_case,
4013         SearchScope _scope, SearchRestriction _restr, bool _replace_all)
4014         : find_buf_name(_find_buf_name), casesensitive(_casesensitive), matchword(_matchword),
4015           forward(_forward), expandmacros(_expandmacros), ignoreformat(_ignoreformat),
4016           repl_buf_name(_repl_buf_name), keep_case(_keep_case), scope(_scope), restr(_restr), replace_all(_replace_all)
4017 {
4018 }
4019
4020
4021 namespace {
4022
4023
4024 /** Check if 'len' letters following cursor are all non-lowercase */
4025 static bool allNonLowercase(Cursor const & cur, int len)
4026 {
4027         pos_type beg_pos = cur.selectionBegin().pos();
4028         pos_type end_pos = cur.selectionBegin().pos() + len;
4029         if (len > cur.lastpos() + 1 - beg_pos) {
4030                 LYXERR(Debug::FIND, "This should not happen, more debug needed");
4031                 len = cur.lastpos() + 1 - beg_pos;
4032                 end_pos = beg_pos + len;
4033         }
4034         for (pos_type pos = beg_pos; pos != end_pos; ++pos)
4035                 if (isLowerCase(cur.paragraph().getChar(pos)))
4036                         return false;
4037         return true;
4038 }
4039
4040
4041 /** Check if first letter is upper case and second one is lower case */
4042 static bool firstUppercase(Cursor const & cur)
4043 {
4044         char_type ch1, ch2;
4045         pos_type pos = cur.selectionBegin().pos();
4046         if (pos >= cur.lastpos() - 1) {
4047                 LYXERR(Debug::FIND, "No upper-case at cur: " << cur);
4048                 return false;
4049         }
4050         ch1 = cur.paragraph().getChar(pos);
4051         ch2 = cur.paragraph().getChar(pos + 1);
4052         bool result = isUpperCase(ch1) && isLowerCase(ch2);
4053         LYXERR(Debug::FIND, "firstUppercase(): "
4054                << "ch1=" << ch1 << "(" << char(ch1) << "), ch2="
4055                << ch2 << "(" << char(ch2) << ")"
4056                << ", result=" << result << ", cur=" << cur);
4057         return result;
4058 }
4059
4060
4061 /** Make first letter of supplied buffer upper-case, and the rest lower-case.
4062  **
4063  ** \fixme What to do with possible further paragraphs in replace buffer ?
4064  **/
4065 static void changeFirstCase(Buffer & buffer, TextCase first_case, TextCase others_case)
4066 {
4067         ParagraphList::iterator pit = buffer.paragraphs().begin();
4068         LASSERT(!pit->empty(), /**/);
4069         pos_type right = pos_type(1);
4070         pit->changeCase(buffer.params(), pos_type(0), right, first_case);
4071         right = pit->size();
4072         pit->changeCase(buffer.params(), pos_type(1), right, others_case);
4073 }
4074 } // namespace
4075
4076 static bool replaceMatches(string &t, int maxmatchnum, vector <string> const & replacements)
4077 {
4078   // Should replace the string "$" + std::to_string(matchnum) with replacement
4079   // if the char '$' is not prefixed with odd number of char '\\'
4080   static regex const rematch("(\\\\)*(\\$\\$([0-9]))");
4081   string s;
4082   size_t lastpos = 0;
4083   smatch sub;
4084   for (sregex_iterator it(t.begin(), t.end(), rematch), end; it != end; ++it) {
4085     sub = *it;
4086     if ((sub.position(2) - sub.position(0)) % 2 == 1)
4087       continue;
4088     int num = stoi(sub.str(3), nullptr, 10);
4089     if (num >= maxmatchnum)
4090       continue;
4091     if (lastpos < (size_t) sub.position(2))
4092       s += t.substr(lastpos, sub.position(2) - lastpos);
4093     s += replacements[num];
4094     lastpos = sub.position(2) + sub.length(2);
4095   }
4096   if (lastpos == 0)
4097     return false;
4098   else if (lastpos < t.length())
4099     s += t.substr(lastpos, t.length() - lastpos);
4100   t = s;
4101   return true;
4102 }
4103
4104 ///
4105 static int findAdvReplace(BufferView * bv, FindAndReplaceOptions const & opt, MatchStringAdv & matchAdv)
4106 {
4107         Cursor & cur = bv->cursor();
4108         if (opt.repl_buf_name.empty()
4109             || theBufferList().getBuffer(FileName(to_utf8(opt.repl_buf_name)), true) == 0
4110             || theBufferList().getBuffer(FileName(to_utf8(opt.find_buf_name)), true) == 0)
4111                 return 0;
4112
4113         DocIterator sel_beg = cur.selectionBegin();
4114         DocIterator sel_end = cur.selectionEnd();
4115         if (&sel_beg.inset() != &sel_end.inset()
4116             || sel_beg.pit() != sel_end.pit()
4117             || sel_beg.idx() != sel_end.idx())
4118                 return 0;
4119         int sel_len = sel_end.pos() - sel_beg.pos();
4120         LYXERR(Debug::FIND, "sel_beg: " << sel_beg << ", sel_end: " << sel_end
4121                << ", sel_len: " << sel_len << endl);
4122         if (sel_len == 0)
4123                 return 0;
4124         LASSERT(sel_len > 0, return 0);
4125
4126         if (!matchAdv(sel_beg, sel_len).match_len)
4127                 return 0;
4128
4129         // Build a copy of the replace buffer, adapted to the KeepCase option
4130         Buffer const & repl_buffer_orig = *theBufferList().getBuffer(FileName(to_utf8(opt.repl_buf_name)), true);
4131         ostringstream oss;
4132         repl_buffer_orig.write(oss);
4133         string lyx = oss.str();
4134         if (matchAdv.valid_matches > 0) {
4135           replaceMatches(lyx, matchAdv.valid_matches, matchAdv.matches);
4136         }
4137         Buffer repl_buffer("", false);
4138         repl_buffer.setUnnamed(true);
4139         LASSERT(repl_buffer.readString(lyx), return 0);
4140         if (opt.keep_case && sel_len >= 2) {
4141                 LYXERR(Debug::FIND, "keep_case true: cur.pos()=" << cur.pos() << ", sel_len=" << sel_len);
4142                 if (cur.inTexted()) {
4143                         if (firstUppercase(cur))
4144                                 changeFirstCase(repl_buffer, text_uppercase, text_lowercase);
4145                         else if (allNonLowercase(cur, sel_len))
4146                                 changeFirstCase(repl_buffer, text_uppercase, text_uppercase);
4147                 }
4148         }
4149         cap::cutSelection(cur, false);
4150         if (cur.inTexted()) {
4151                 repl_buffer.changeLanguage(
4152                         repl_buffer.language(),
4153                         cur.getFont().language());
4154                 LYXERR(Debug::FIND, "Replacing by pasteParagraphList()ing repl_buffer");
4155                 LYXERR(Debug::FIND, "Before pasteParagraphList() cur=" << cur << endl);
4156                 cap::pasteParagraphList(cur, repl_buffer.paragraphs(),
4157                                         repl_buffer.params().documentClassPtr(),
4158                                         repl_buffer.params().authors(),
4159                                         bv->buffer().errorList("Paste"));
4160                 LYXERR(Debug::FIND, "After pasteParagraphList() cur=" << cur << endl);
4161                 sel_len = repl_buffer.paragraphs().begin()->size();
4162         } else if (cur.inMathed()) {
4163                 odocstringstream ods;
4164                 otexstream os(ods);
4165                 // OutputParams runparams(&repl_buffer.params().encoding());
4166                 OutputParams runparams(encodings.fromLyXName("utf8"));
4167                 runparams.nice = false;
4168                 runparams.flavor = Flavor::XeTeX;
4169                 runparams.linelen = 8000; //lyxrc.plaintext_linelen;
4170                 runparams.dryrun = true;
4171                 TeXOnePar(repl_buffer, repl_buffer.text(), 0, os, runparams);
4172                 //repl_buffer.getSourceCode(ods, 0, repl_buffer.paragraphs().size(), false);
4173                 docstring repl_latex = ods.str();
4174                 LYXERR(Debug::FIND, "Latexified replace_buffer: '" << repl_latex << "'");
4175                 string s;
4176                 (void)regex_replace(to_utf8(repl_latex), s, "\\$(.*)\\$", "$1");
4177                 (void)regex_replace(s, s, "\\\\\\[(.*)\\\\\\]", "$1");
4178                 repl_latex = from_utf8(s);
4179                 LYXERR(Debug::FIND, "Replacing by insert()ing latex: '" << repl_latex << "' cur=" << cur << " with depth=" << cur.depth());
4180                 MathData ar(cur.buffer());
4181                 asArray(repl_latex, ar, Parse::NORMAL);
4182                 cur.insert(ar);
4183                 sel_len = ar.size();
4184                 LYXERR(Debug::FIND, "After insert() cur=" << cur << " with depth: " << cur.depth() << " and len: " << sel_len);
4185         }
4186         if (cur.pos() >= sel_len)
4187                 cur.pos() -= sel_len;
4188         else
4189                 cur.pos() = 0;
4190         LYXERR(Debug::FIND, "After pos adj cur=" << cur << " with depth: " << cur.depth() << " and len: " << sel_len);
4191         bv->putSelectionAt(DocIterator(cur), sel_len, !opt.forward);
4192         bv->processUpdateFlags(Update::Force);
4193         return 1;
4194 }
4195
4196
4197 /// Perform a FindAdv operation.
4198 bool findAdv(BufferView * bv, FindAndReplaceOptions & opt)
4199 {
4200         DocIterator cur;
4201         int pos_len = 0;
4202
4203         // e.g., when invoking word-findadv from mini-buffer wither with
4204         //       wrong options syntax or before ever opening advanced F&R pane
4205         if (theBufferList().getBuffer(FileName(to_utf8(opt.find_buf_name)), true) == 0)
4206                 return false;
4207
4208         try {
4209                 MatchStringAdv matchAdv(bv->buffer(), opt);
4210 #if QTSEARCH
4211                 if (!matchAdv.regexIsValid) {
4212                         bv->message(lyx::from_utf8(matchAdv.regexError));
4213                         return(false);
4214                 }
4215 #endif
4216                 int length = bv->cursor().selectionEnd().pos() - bv->cursor().selectionBegin().pos();
4217                 if (length > 0)
4218                         bv->putSelectionAt(bv->cursor().selectionBegin(), length, !opt.forward);
4219                 num_replaced += findAdvReplace(bv, opt, matchAdv);
4220                 cur = bv->cursor();
4221                 if (opt.forward)
4222                         pos_len = findForwardAdv(cur, matchAdv);
4223                 else
4224                         pos_len = findBackwardsAdv(cur, matchAdv);
4225         } catch (exception & ex) {
4226                 bv->message(from_utf8(ex.what()));
4227                 return false;
4228         }
4229
4230         if (pos_len == 0) {
4231                 if (num_replaced > 0) {
4232                         switch (num_replaced)
4233                         {
4234                                 case 1:
4235                                         bv->message(_("One match has been replaced."));
4236                                         break;
4237                                 case 2:
4238                                         bv->message(_("Two matches have been replaced."));
4239                                         break;
4240                                 default:
4241                                         bv->message(bformat(_("%1$d matches have been replaced."), num_replaced));
4242                                         break;
4243                         }
4244                         num_replaced = 0;
4245                 }
4246                 else {
4247                         bv->message(_("Match not found."));
4248                 }
4249                 return false;
4250         }
4251
4252         if (num_replaced > 0)
4253                 bv->message(_("Match has been replaced."));
4254         else
4255                 bv->message(_("Match found."));
4256
4257         if (cur.pos() + pos_len > cur.lastpos()) {
4258                 // Prevent crash in bv->putSelectionAt()
4259                 // Should never happen, maybe LASSERT() here?
4260                 pos_len = cur.lastpos() - cur.pos();
4261         }
4262         LYXERR(Debug::FIND, "Putting selection at cur=" << cur << " with len: " << pos_len);
4263         bv->putSelectionAt(cur, pos_len, !opt.forward);
4264
4265         return true;
4266 }
4267
4268
4269 ostringstream & operator<<(ostringstream & os, FindAndReplaceOptions const & opt)
4270 {
4271         os << to_utf8(opt.find_buf_name) << "\nEOSS\n"
4272            << opt.casesensitive << ' '
4273            << opt.matchword << ' '
4274            << opt.forward << ' '
4275            << opt.expandmacros << ' '
4276            << opt.ignoreformat << ' '
4277            << opt.replace_all << ' '
4278            << to_utf8(opt.repl_buf_name) << "\nEOSS\n"
4279            << opt.keep_case << ' '
4280            << int(opt.scope) << ' '
4281            << int(opt.restr);
4282
4283         LYXERR(Debug::FIND, "built: " << os.str());
4284
4285         return os;
4286 }
4287
4288
4289 istringstream & operator>>(istringstream & is, FindAndReplaceOptions & opt)
4290 {
4291         // LYXERR(Debug::FIND, "parsing");
4292         string s;
4293         string line;
4294         getline(is, line);
4295         while (line != "EOSS") {
4296                 if (! s.empty())
4297                         s = s + "\n";
4298                 s = s + line;
4299                 if (is.eof())   // Tolerate malformed request
4300                         break;
4301                 getline(is, line);
4302         }
4303         // LYXERR(Debug::FIND, "file_buf_name: '" << s << "'");
4304         opt.find_buf_name = from_utf8(s);
4305         is >> opt.casesensitive >> opt.matchword >> opt.forward >> opt.expandmacros >> opt.ignoreformat >> opt.replace_all;
4306         is.get();       // Waste space before replace string
4307         s = "";
4308         getline(is, line);
4309         while (line != "EOSS") {
4310                 if (! s.empty())
4311                         s = s + "\n";
4312                 s = s + line;
4313                 if (is.eof())   // Tolerate malformed request
4314                         break;
4315                 getline(is, line);
4316         }
4317         // LYXERR(Debug::FIND, "repl_buf_name: '" << s << "'");
4318         opt.repl_buf_name = from_utf8(s);
4319         is >> opt.keep_case;
4320         int i;
4321         is >> i;
4322         opt.scope = FindAndReplaceOptions::SearchScope(i);
4323         is >> i;
4324         opt.restr = FindAndReplaceOptions::SearchRestriction(i);
4325
4326         /*
4327         LYXERR(Debug::FIND, "parsed: " << opt.casesensitive << ' ' << opt.matchword << ' ' << opt.forward << ' '
4328                << opt.expandmacros << ' ' << opt.ignoreformat << ' ' << opt.keep_case << ' '
4329                << opt.scope << ' ' << opt.restr);
4330         */
4331         return is;
4332 }
4333
4334 } // namespace lyx