src/Compare.cpp

   1 /**
   2  * \file Compare.cpp
   3  * This file is part of LyX, the document processor.
   4  * Licence details can be found in the file COPYING.
   5  *
   6  * \author Vincent van Ravesteijn
   7  *
   8  * Full author contact details are available in file CREDITS.
   9  */
  10
  11 #include <config.h>
  12
  13 #include "Compare.h"
  14
  15 #include "BufferParams.h"
  16 #include "Changes.h"
  17
  18 #include "insets/InsetText.h"
  19
  20 #include "support/lassert.h"
  21
  22 #include <boost/next_prior.hpp>
  23
  24 using namespace std;
  25 using namespace lyx::support;
  26
  27
  28 namespace lyx {
  29
  30
  31 enum Direction {
  32         Forward = 0,
  33         Backward
  34 };
  35
  36
  37 void step_forward(DocIterator & dit)
  38 {
  39         dit.top().forwardPos();
  40 }
  41
  42
  43 void step_backward(DocIterator & dit)
  44 {
  45         dit.top().backwardPos();
  46 }
  47
  48
  49 bool step_forward(DocIterator & dit, DocIterator const & end)
  50 {
  51         if (dit == end)
  52                 return false;
  53         step_forward(dit);
  54         return true;
  55 }
  56
  57
  58 bool step_backward(DocIterator & dit, DocIterator const & beg)
  59 {
  60         if (dit == beg)
  61                 return false;
  62         step_backward(dit);
  63         return true;
  64 }
  65
  66 /**
  67  * A pair of two DocIterators that form a range.
  68  */
  69 class DocRange {
  70 public:
  71         DocRange(DocIterator from_, DocIterator to_)
  72                 : from(from_), to(to_)
  73         {}
  74
  75         DocRange(Buffer const * buf)
  76         {
  77                 from = doc_iterator_begin(buf);
  78                 to = doc_iterator_end(buf);
  79                 to.backwardPos();
  80         }
  81
  82         ///
  83         Text * text() const { return from.text(); }
  84         ///
  85         bool empty() const { return to <= from; }
  86         ///
  87         size_t length() const;
  88
  89         /// The begin of the range
  90         DocIterator from;
  91         /// The end of the range
  92         DocIterator to;
  93 };
  94
  95
  96 size_t DocRange::length() const
  97 {
  98         pit_type startpit = from.pit();
  99         pit_type endpit = to.pit();
 100         ParagraphList const & ps_ = from.text()->paragraphs();
 101
 102         ParagraphList pars(boost::next(ps_.begin(), startpit),
 103                                 boost::next(ps_.begin(), endpit + 1));
 104
 105         // Remove the end of the last paragraph; afterwards, remove the
 106         // beginning of the first paragraph.
 107         Paragraph & back = pars.back();
 108         back.eraseChars(to.pos(), back.size(), false);
 109         Paragraph & front = pars.front();
 110         front.eraseChars(0, from.pos(), false);
 111
 112         ParagraphList::const_iterator pit = pars.begin();
 113         ParagraphList::const_iterator end_it = pars.end();
 114
 115         size_t length = 0;
 116         for (; pit != end_it; ++pit)
 117                 length += pit->size() + 1;
 118
 119         // The last paragraph has no paragraph-end
 120         --length;
 121         return length;
 122 }
 123
 124
 125 class DocPair {
 126 public:
 127         DocPair() {}
 128
 129         DocPair(DocIterator o_, DocIterator n_)
 130                 : o(o_), n(n_)
 131         {}
 132
 133         bool operator!=(DocPair const & rhs) {
 134                 // this might not be intuitive but correct for our purpose
 135                 return o != rhs.o && n != rhs.n;
 136         }
 137
 138
 139         DocPair & operator++()
 140         {
 141                 step_forward(o);
 142                 step_forward(n);
 143                 return *this;
 144         }
 145
 146         DocPair & operator--()
 147         {
 148                 step_backward(o);
 149                 step_backward(n);
 150                 return *this;
 151         }
 152         ///
 153         DocIterator o;
 154         ///
 155         DocIterator n;
 156 };
 157
 158 /**
 159  * A pair of two DocRanges.
 160  */
 161 class DocRangePair {
 162 public:
 163         DocRangePair(DocRange o_, DocRange n_)
 164                 : o(o_), n(n_)
 165         {}
 166
 167         DocRangePair(DocPair from, DocPair to)
 168                 : o(from.o, to.o), n(from.n, to.n)
 169         {}
 170
 171         DocRangePair(Buffer const * o_buf, Buffer const * n_buf)
 172                 : o(o_buf), n(n_buf)
 173         {}
 174
 175         /// Returns the from pair
 176         DocPair from() const { return DocPair(o.from, n.from); }
 177
 178         /// Returns the to pair
 179         DocPair to() const { return DocPair(o.to, n.to); }
 180
 181         DocRange o;
 182         DocRange n;
 183 };
 184
 185
 186 DocRangePair stepIntoInset(DocPair const & inset_location)
 187 {
 188         DocRangePair rp(inset_location, inset_location);
 189         rp.o.from.forwardPos();
 190         rp.n.from.forwardPos();
 191         step_forward(rp.o.to);
 192         step_forward(rp.n.to);
 193         rp.o.to.backwardPos();
 194         rp.n.to.backwardPos();
 195         return rp;
 196 }
 197
 198
 199 /**
 200  * The implementation of the algorithm that does the comparison
 201  * between two documents.
 202  */
 203 class Compare::Impl {
 204 public:
 205         ///
 206         Impl(Compare const & compare)
 207                 : abort_(false), compare_(compare)
 208         {}
 209
 210         ///
 211         ~Impl() {}
 212
 213         // Algorithm to find the shortest edit string. This algorithm
 214         // only needs a linear amount of memory (linear with the sum
 215         // of the number of characters in the two paragraph-lists).
 216         bool diff(Buffer const * new_buf, Buffer const * old_buf,
 217                 Buffer const * dest_buf);
 218
 219         /// Set to true to cancel the algorithm
 220         bool abort_;
 221
 222 private:
 223         /// Finds the middle snake and returns the length of the
 224         /// shortest edit script.
 225         int find_middle_snake(DocRangePair const & rp, DocPair & middle_snake);
 226
 227         /// This function is called recursively by a divide and conquer
 228         /// algorithm. Each time, the string is divided into two split
 229         /// around the middle snake.
 230         void diff_i(DocRangePair const & rp);
 231
 232         /// Processes the splitted chunks. It either adds them as deleted,
 233         /// as added, or call diff_i for further processing.
 234         void diff_part(DocRangePair const & rp);
 235
 236         /// Runs the algorithm for the inset located at /c it and /c it_n
 237         /// and adds the result to /c pars.
 238         void diff_inset(Inset * inset, DocPair const & p);
 239
 240         /// Adds the snake to the destination buffer. The algorithm will
 241         /// recursively be applied to any InsetTexts that are within the snake.
 242         void process_snake(DocRangePair const & rp);
 243
 244         /// Writes the range to the destination buffer
 245         void writeToDestBuffer(DocRange const & range,
 246                 Change::Type type = Change::UNCHANGED);
 247
 248         /// Writes the paragraph list to the destination buffer
 249         void writeToDestBuffer(ParagraphList const & copy_pars) const;
 250
 251         /// The length of the old chunk currently processed
 252         int N;
 253         /// The length of the new chunk currently processed
 254         int M;
 255
 256         /// The thread object, used to emit signals to the GUI
 257         Compare const & compare_;
 258
 259         /// The buffer containing text that will be marked as old
 260         Buffer const * old_buf_;
 261         /// The buffer containing text that will be marked as new
 262         Buffer const * new_buf_;
 263         /// The buffer containing text that will be marked as new
 264         Buffer const * dest_buf_;
 265
 266         /// The paragraph list of the destination buffer
 267         ParagraphList * dest_pars_;
 268
 269         /// The level of recursion
 270         int recursion_level_;
 271
 272         /// The number of nested insets at this level
 273         int nested_inset_level_;
 274 };
 275
 276 /////////////////////////////////////////////////////////////////////
 277 //
 278 // Compare
 279 //
 280 /////////////////////////////////////////////////////////////////////
 281
 282 Compare::Compare(Buffer const * new_buf, Buffer const * old_buf,
 283         Buffer * const dest_buf, CompareOptions const & options)
 284         : new_buffer(new_buf), old_buffer(old_buf), dest_buffer(dest_buf),
 285           options_(options), pimpl_(new Impl(*this))
 286 {
 287 }
 288
 289
 290 void Compare::run()
 291 {
 292         if (!dest_buffer || !new_buffer || !old_buffer)
 293                 return;
 294
 295         // Copy the buffer params to the new buffer
 296         dest_buffer->params() = options_.settings_from_new
 297                 ? new_buffer->params() : old_buffer->params();
 298
 299         // do the real work
 300         if (!doCompare())
 301                 return;
 302
 303         finished(pimpl_->abort_);
 304         return;
 305 }
 306
 307
 308 int Compare::doCompare()
 309 {
 310         return pimpl_->diff(new_buffer, old_buffer, dest_buffer);
 311 }
 312
 313
 314 void Compare::abort()
 315 {
 316         pimpl_->abort_ = true;
 317         condition_.wakeOne();
 318         wait();
 319         pimpl_->abort_ = false;
 320 }
 321
 322
 323 void get_paragraph_list(DocRange const & range,
 324         ParagraphList & pars)
 325 {
 326         // Clone the paragraphs within the selection.
 327         pit_type startpit = range.from.pit();
 328         pit_type endpit = range.to.pit();
 329         ParagraphList const & ps_ = range.text()->paragraphs();
 330         ParagraphList tmp_pars(boost::next(ps_.begin(), startpit),
 331                 boost::next(ps_.begin(), endpit + 1));
 332
 333         // Remove the end of the last paragraph; afterwards, remove the
 334         // beginning of the first paragraph. Keep this order - there may only
 335         // be one paragraph!
 336         Paragraph & back = tmp_pars.back();
 337         back.eraseChars(range.to.pos(), back.size(), false);
 338         Paragraph & front = tmp_pars.front();
 339         front.eraseChars(0, range.from.pos(), false);
 340
 341         pars.insert(pars.begin(), tmp_pars.begin(), tmp_pars.end());
 342 }
 343
 344
 345 bool equal(Inset const * i_o, Inset const * i_n)
 346 {
 347         if (!i_o || !i_n)
 348                 return false;
 349
 350         // Different types of insets
 351         if (i_o->lyxCode() != i_n->lyxCode())
 352                 return false;
 353
 354         // Editable insets are assumed to be the same as they are of the
 355         // same type. If we later on decide that we insert them in the
 356         // document as being unchanged, we will run the algorithm on the
 357         // contents of the two insets.
 358         // FIXME: This fails if the parameters of the insets differ.
 359         // FIXME: We do not recurse into InsetTabulars.
 360         // FIXME: We need methods inset->equivalent(inset).
 361         if (i_o->editable() && !i_o->asInsetMath()
 362                   && i_o->asInsetText())
 363                 return true;
 364
 365         ostringstream o_os;
 366         ostringstream n_os;
 367         i_o->write(o_os);
 368         i_n->write(n_os);
 369         return o_os.str() == n_os.str();
 370 }
 371
 372
 373 bool equal(DocIterator & o, DocIterator & n) {
 374         Paragraph const & old_par = o.text()->getPar(o.pit());
 375         Paragraph const & new_par = n.text()->getPar(n.pit());
 376
 377         char_type const c_o = old_par.getChar(o.pos());
 378         char_type const c_n = new_par.getChar(n.pos());
 379         if (c_o != c_n)
 380                 return false;
 381
 382         if (old_par.isInset(o.pos())) {
 383                 Inset const * i_o = old_par.getInset(o.pos());
 384                 Inset const * i_n = new_par.getInset(n.pos());
 385
 386                 if (i_o && i_n)
 387                         return equal(i_o, i_n);
 388         }
 389
 390         Font fo = old_par.getFontSettings(o.buffer()->params(), o.pos());
 391         Font fn = new_par.getFontSettings(n.buffer()->params(), n.pos());
 392         return fo == fn;
 393 }
 394
 395
 396 bool traverse_snake(DocPair & p, DocRangePair const & rp, Direction direction)
 397 {
 398         bool ret = false;
 399         DocPair const & p_end = direction == Forward ? rp.to() : rp.from();
 400         while (p != p_end) {
 401                 if (direction == Backward)
 402                         --p;
 403                 if (!equal(p.o, p.n)) {
 404                         if (direction == Backward)
 405                                 ++p;
 406                         return ret;
 407                 }
 408                 if (direction == Forward)
 409                         ++p;
 410                 ret = true;
 411         }
 412         return ret;
 413 }
 414
 415
 416 /////////////////////////////////////////////////////////////////////
 417 //
 418 // Compare::Impl
 419 //
 420 /////////////////////////////////////////////////////////////////////
 421
 422 int Compare::Impl::find_middle_snake(DocRangePair const & rp,
 423         DocPair &)
 424 {
 425         N = rp.o.length();
 426         M = rp.n.length();
 427         return M+N;
 428 }
 429
 430
 431 bool Compare::Impl::diff(Buffer const * new_buf, Buffer const * old_buf,
 432         Buffer const * dest_buf)
 433 {
 434         if (!new_buf || !old_buf || !dest_buf)
 435                 return false;
 436
 437         old_buf_ = old_buf;
 438         new_buf_ = new_buf;
 439         dest_buf_ = dest_buf;
 440         dest_pars_ = &dest_buf->inset().asInsetText()->paragraphs();
 441         dest_pars_->clear();
 442
 443         recursion_level_ = 0;
 444         nested_inset_level_ = 0;
 445
 446         DocRangePair rp(old_buf_, new_buf_);
 447
 448         DocPair from = rp.from();
 449         traverse_snake(from, rp, Forward);
 450         DocRangePair const snake(rp.from(), from);
 451         process_snake(snake);
 452
 453         // Start the recursive algorithm
 454         diff_i(rp);
 455
 456         for (pit_type p = 0; p < (pit_type)dest_pars_->size(); ++p) {
 457                 (*dest_pars_)[p].setBuffer(const_cast<Buffer &>(*dest_buf));
 458                 (*dest_pars_)[p].setInsetOwner(&dest_buf_->inset());
 459         }
 460
 461         return true;
 462 }
 463
 464
 465 void Compare::Impl::diff_i(DocRangePair const & rp)
 466 {
 467         // The middle snake
 468         DocPair middle_snake;
 469
 470         // Divides the problem into two smaller problems, split around
 471         // the snake in the middle.
 472         int const L_ses = find_middle_snake(rp, middle_snake);
 473
 474         // Set maximum of progress bar
 475         if (++recursion_level_ == 1)
 476                 compare_.progressMax(L_ses);
 477
 478         // There are now three possibilities: the strings were the same,
 479         // the strings were completely different, or we found a middle
 480         // snake and we can split the string into two parts to process.
 481         if (L_ses == 0)
 482                 // Two the same strings (this must be a very rare case, because
 483                 // usually this will be part of a snake adjacent to these strings).
 484                 writeToDestBuffer(rp.o);
 485
 486         else if (middle_snake.o.empty()) {
 487                 // Two totally different strings
 488                 writeToDestBuffer(rp.o, Change::DELETED);
 489                 writeToDestBuffer(rp.n, Change::INSERTED);
 490
 491         } else {
 492                 // Retrieve the complete snake
 493                 DocPair first_part_end = middle_snake;
 494                 traverse_snake(first_part_end, rp, Backward);
 495                 DocRangePair first_part(rp.from(), first_part_end);
 496
 497                 DocPair second_part_begin = middle_snake;
 498                 traverse_snake(second_part_begin, rp, Forward);
 499                 DocRangePair second_part(second_part_begin, rp.to());
 500
 501                 // Split the string in three parts:
 502                 // 1. in front of the snake
 503                 diff_part(first_part);
 504
 505                 // 2. the snake itself, and
 506                 DocRangePair const snake(first_part.to(), second_part.from());
 507                 process_snake(snake);
 508
 509                 // 3. behind the snake.
 510                 diff_part(second_part);
 511         }
 512         --recursion_level_;
 513 }
 514
 515
 516 void Compare::Impl::diff_part(DocRangePair const & rp)
 517 {
 518         // Is there a finite length string in both buffers, if not there
 519         // is an empty string and we write the other one to the buffer.
 520         if (!rp.o.empty() && !rp.n.empty())
 521                 diff_i(rp);
 522
 523         else if (!rp.o.empty())
 524                 writeToDestBuffer(rp.o, Change::DELETED);
 525
 526         else if (!rp.n.empty())
 527                 writeToDestBuffer(rp.n, Change::INSERTED);
 528 }
 529
 530
 531 void Compare::Impl::diff_inset(Inset * inset, DocPair const & p)
 532 {
 533         // Find the dociterators for the beginning and the
 534         // end of the inset, for the old and new document.
 535         DocRangePair const rp = stepIntoInset(p);
 536
 537         // Recurse into the inset. Temporarily replace the dest_pars
 538         // paragraph list by the paragraph list of the nested inset.
 539         ParagraphList * backup_dest_pars = dest_pars_;
 540         dest_pars_ = &inset->asInsetText()->text().paragraphs();
 541         dest_pars_->clear();
 542
 543         ++nested_inset_level_;
 544         diff_i(rp);
 545         --nested_inset_level_;
 546
 547         dest_pars_ = backup_dest_pars;
 548 }
 549
 550
 551 void Compare::Impl::process_snake(DocRangePair const & rp)
 552 {
 553         ParagraphList pars;
 554         get_paragraph_list(rp.o, pars);
 555
 556         // Find insets in this paragaph list
 557         DocPair it = rp.from();
 558         for (; it.o < rp.o.to; ++it) {
 559                 Inset * inset = it.o.text()->getPar(it.o.pit()).getInset(it.o.pos());
 560                 if (inset && inset->editable() && inset->asInsetText()) {
 561                         // Find the inset in the paragraph list that will be pasted into
 562                         // the final document. The contents of the inset will be replaced
 563                         // by the output of the algorithm below.
 564                         pit_type const pit = it.o.pit() - rp.o.from.pit();
 565                         pos_type const pos = pit ? it.o.pos() : it.o.pos() - rp.o.from.pos();
 566                         inset = pars[pit].getInset(pos);
 567                         LASSERT(inset, /**/);
 568                         diff_inset(inset, it);
 569                 }
 570         }
 571         writeToDestBuffer(pars);
 572 }
 573
 574
 575 void Compare::Impl::writeToDestBuffer(DocRange const & range,
 576         Change::Type type)
 577 {
 578         ParagraphList pars;
 579         get_paragraph_list(range, pars);
 580
 581         pos_type size = 0;
 582
 583         // Set the change
 584         ParagraphList::iterator it = pars.begin();
 585         for (; it != pars.end(); ++it) {
 586                 it->setChange(Change(type));
 587                 size += it->size();
 588         }
 589
 590         writeToDestBuffer(pars);
 591
 592         if (nested_inset_level_ == 0)
 593                 compare_.progress(size);
 594 }
 595
 596
 597 void Compare::Impl::writeToDestBuffer(ParagraphList const & pars) const
 598 {
 599         pit_type const pit = dest_pars_->size() - 1;
 600         dest_pars_->insert(dest_pars_->end(), pars.begin(), pars.end());
 601         if (pit >= 0)
 602                 mergeParagraph(dest_buf_->params(), *dest_pars_, pit);
 603 }
 604
 605
 606 #include "moc_Compare.cpp"
 607
 608 } // namespace lyx