src/Compare.cpp

   1 /**
   2  * \file Compare.cpp
   3  * This file is part of LyX, the document processor.
   4  * Licence details can be found in the file COPYING.
   5  *
   6  * \author Vincent van Ravesteijn
   7  *
   8  * Full author contact details are available in file CREDITS.
   9  */
  10
  11 #include <config.h>
  12
  13 #include "Compare.h"
  14
  15 #include "BufferParams.h"
  16 #include "Changes.h"
  17
  18 #include "insets/InsetText.h"
  19
  20 #include "support/lassert.h"
  21
  22 #include <boost/next_prior.hpp>
  23
  24 using namespace std;
  25 using namespace lyx::support;
  26
  27
  28 namespace lyx {
  29
  30
  31 enum Direction {
  32         Forward = 0,
  33         Backward
  34 };
  35
  36
  37 static void step(DocIterator & dit, Direction direction)
  38 {
  39         if (direction == Forward)
  40                 dit.top().forwardPos();
  41         else
  42                 dit.top().backwardPos();
  43 }
  44
  45
  46 static void step(DocIterator & dit, DocIterator const & end, Direction direction)
  47 {
  48         if (dit != end)
  49                 step(dit, direction);
  50 }
  51
  52
  53 /**
  54  * A pair of two DocIterators that form a range.
  55  */
  56 class DocRange {
  57 public:
  58         DocRange(DocIterator from_, DocIterator to_)
  59                 : from(from_), to(to_)
  60         {}
  61
  62         DocRange(Buffer const * buf)
  63         {
  64                 from = doc_iterator_begin(buf);
  65                 to = doc_iterator_end(buf);
  66                 to.backwardPos();
  67         }
  68
  69         ///
  70         Text * text() const { return from.text(); }
  71         ///
  72         bool empty() const { return to <= from; }
  73         ///
  74         size_t length() const;
  75
  76         /// The begin of the range
  77         DocIterator from;
  78         /// The end of the range
  79         DocIterator to;
  80 };
  81
  82
  83 size_t DocRange::length() const
  84 {
  85         pit_type startpit = from.pit();
  86         pit_type endpit = to.pit();
  87         ParagraphList const & ps_ = from.text()->paragraphs();
  88
  89         ParagraphList pars(boost::next(ps_.begin(), startpit),
  90                                 boost::next(ps_.begin(), endpit + 1));
  91
  92         // Remove the end of the last paragraph; afterwards, remove the
  93         // beginning of the first paragraph.
  94         Paragraph & back = pars.back();
  95         back.eraseChars(to.pos(), back.size(), false);
  96         Paragraph & front = pars.front();
  97         front.eraseChars(0, from.pos(), false);
  98
  99         ParagraphList::const_iterator pit = pars.begin();
 100         ParagraphList::const_iterator end_it = pars.end();
 101
 102         size_t length = 0;
 103         for (; pit != end_it; ++pit)
 104                 length += pit->size() + 1;
 105
 106         // The last paragraph has no paragraph-end
 107         --length;
 108         return length;
 109 }
 110
 111
 112 class DocPair {
 113 public:
 114         DocPair() {}
 115
 116         DocPair(DocIterator o_, DocIterator n_)
 117                 : o(o_), n(n_)
 118         {}
 119
 120         bool operator!=(DocPair const & rhs) {
 121                 // this might not be intuitive but correct for our purpose
 122                 return o != rhs.o && n != rhs.n;
 123         }
 124
 125
 126         DocPair & operator++()
 127         {
 128                 step(o, Forward);
 129                 step(n, Forward);
 130                 return *this;
 131         }
 132
 133         DocPair & operator--()
 134         {
 135                 step(o, Backward);
 136                 step(n, Backward);
 137                 return *this;
 138         }
 139         ///
 140         DocIterator o;
 141         ///
 142         DocIterator n;
 143 };
 144
 145 /**
 146  * A pair of two DocRanges.
 147  */
 148 class DocRangePair {
 149 public:
 150         DocRangePair(DocRange o_, DocRange n_)
 151                 : o(o_), n(n_)
 152         {}
 153
 154         DocRangePair(DocPair from, DocPair to)
 155                 : o(from.o, to.o), n(from.n, to.n)
 156         {}
 157
 158         DocRangePair(Buffer const * o_buf, Buffer const * n_buf)
 159                 : o(o_buf), n(n_buf)
 160         {}
 161
 162         /// Returns the from pair
 163         DocPair from() const { return DocPair(o.from, n.from); }
 164
 165         /// Returns the to pair
 166         DocPair to() const { return DocPair(o.to, n.to); }
 167
 168         DocRange o;
 169         DocRange n;
 170 };
 171
 172
 173 static DocRangePair stepIntoInset(DocPair const & inset_location)
 174 {
 175         DocRangePair rp(inset_location, inset_location);
 176         rp.o.from.forwardPos();
 177         rp.n.from.forwardPos();
 178         step(rp.o.to, Forward);
 179         step(rp.n.to, Forward);
 180         rp.o.to.backwardPos();
 181         rp.n.to.backwardPos();
 182         return rp;
 183 }
 184
 185
 186 /**
 187  *  This class is designed to hold a vector that has both positive as
 188  *  negative indices. It is internally represented as two vectors, one
 189  *  for non-zero indices and one for negative indices. In this way, the
 190  *  vector can grow in both directions.
 191  *    If an index is not available in the vector, the default value is
 192  *  returned. If an object is put in the vector beyond its size, the
 193  *  empty spots in between are also filled with the default value.
 194  */
 195 template<class T>
 196 class compl_vector {
 197 public:
 198         compl_vector() {}
 199
 200         void reset(T const & def)
 201         {
 202                 default_ = def;
 203                 Vp_.clear();
 204                 Vn_.clear();
 205         }
 206
 207
 208         /// Gets the value at index. If it is not in the vector
 209         /// the default value is returned.
 210         T & get(int index) {
 211                 if (-index <= int(Vn_.size()) && index < int(Vp_.size()))
 212                         return index >= 0 ? Vp_[index] : Vn_[-index-1];
 213                 else
 214                         return default_;
 215         }
 216
 217         /// Sets the value at index if it already
 218         /// is in the vector. Otherwise it will be added to the
 219         /// end padded with the default value.
 220         void set(int index, T const & t) {
 221                 if (index >= -int(Vn_.size()) && index < int(Vp_.size())) {
 222                         if (index >= 0)
 223                                 Vp_[index] = t;
 224                         else
 225                                 Vn_[-index-1] = t;
 226                 } else {
 227                         while (index > int(Vp_.size()))
 228                                 Vp_.push_back(default_);
 229                         while (index < -int(Vn_.size()) - 1)
 230                                 Vn_.push_back(default_);
 231
 232                         if (index >= 0)
 233                                 Vp_.push_back(t);
 234                         else
 235                                 Vn_.push_back(t);
 236                 }
 237         }
 238
 239 private:
 240         /// The vector for positive indices
 241         vector<T> Vp_;
 242         /// The vector for negative indices
 243         vector<T> Vn_;
 244         /// The default value that is inserted in the vector
 245         /// if more space is needed
 246         T default_;
 247 };
 248
 249
 250 /**
 251  * The implementation of the algorithm that does the comparison
 252  * between two documents.
 253  */
 254 class Compare::Impl {
 255 public:
 256         ///
 257         Impl(Compare const & compare)
 258                 : abort_(false), compare_(compare)
 259         {}
 260
 261         ///
 262         ~Impl() {}
 263
 264         // Algorithm to find the shortest edit string. This algorithm
 265         // only needs a linear amount of memory (linear with the sum
 266         // of the number of characters in the two paragraph-lists).
 267         bool diff(Buffer const * new_buf, Buffer const * old_buf,
 268                 Buffer const * dest_buf);
 269
 270         /// Set to true to cancel the algorithm
 271         bool abort_;
 272
 273 private:
 274         /// Finds the middle snake and returns the length of the
 275         /// shortest edit script.
 276         int find_middle_snake(DocRangePair const & rp, DocPair & middle_snake);
 277
 278         /// This function is called recursively by a divide and conquer
 279         /// algorithm. Each time, the string is divided into two split
 280         /// around the middle snake.
 281         void diff_i(DocRangePair const & rp);
 282
 283         /// Processes the splitted chunks. It either adds them as deleted,
 284         /// as added, or call diff_i for further processing.
 285         void diff_part(DocRangePair const & rp);
 286
 287         /// Runs the algorithm for the inset located at /c it and /c it_n
 288         /// and adds the result to /c pars.
 289         void diff_inset(Inset * inset, DocPair const & p);
 290
 291         /// Adds the snake to the destination buffer. The algorithm will
 292         /// recursively be applied to any InsetTexts that are within the snake.
 293         void process_snake(DocRangePair const & rp);
 294
 295         /// Writes the range to the destination buffer
 296         void writeToDestBuffer(DocRange const & range,
 297                 Change::Type type = Change::UNCHANGED);
 298
 299         /// Writes the paragraph list to the destination buffer
 300         void writeToDestBuffer(ParagraphList const & copy_pars) const;
 301
 302         /// The length of the old chunk currently processed
 303         int N_;
 304         /// The length of the new chunk currently processed
 305         int M_;
 306
 307         /// The thread object, used to emit signals to the GUI
 308         Compare const & compare_;
 309
 310         /// The buffer containing text that will be marked as old
 311         Buffer const * old_buf_;
 312         /// The buffer containing text that will be marked as new
 313         Buffer const * new_buf_;
 314         /// The buffer containing text that will be marked as new
 315         Buffer const * dest_buf_;
 316
 317         /// The paragraph list of the destination buffer
 318         ParagraphList * dest_pars_;
 319
 320         /// The level of recursion
 321         int recursion_level_;
 322
 323         /// The number of nested insets at this level
 324         int nested_inset_level_;
 325 };
 326
 327 /////////////////////////////////////////////////////////////////////
 328 //
 329 // Compare
 330 //
 331 /////////////////////////////////////////////////////////////////////
 332
 333 Compare::Compare(Buffer const * new_buf, Buffer const * old_buf,
 334         Buffer * const dest_buf, CompareOptions const & options)
 335         : new_buffer(new_buf), old_buffer(old_buf), dest_buffer(dest_buf),
 336           options_(options), pimpl_(new Impl(*this))
 337 {
 338 }
 339
 340
 341 void Compare::run()
 342 {
 343         if (!dest_buffer || !new_buffer || !old_buffer)
 344                 return;
 345
 346         // Copy the buffer params to the new buffer
 347         dest_buffer->params() = options_.settings_from_new
 348                 ? new_buffer->params() : old_buffer->params();
 349
 350         // do the real work
 351         if (!doCompare())
 352                 return;
 353
 354         finished(pimpl_->abort_);
 355         return;
 356 }
 357
 358
 359 int Compare::doCompare()
 360 {
 361         return pimpl_->diff(new_buffer, old_buffer, dest_buffer);
 362 }
 363
 364
 365 void Compare::abort()
 366 {
 367         pimpl_->abort_ = true;
 368         condition_.wakeOne();
 369         wait();
 370         pimpl_->abort_ = false;
 371 }
 372
 373
 374 static void get_paragraph_list(DocRange const & range,
 375         ParagraphList & pars)
 376 {
 377         // Clone the paragraphs within the selection.
 378         pit_type startpit = range.from.pit();
 379         pit_type endpit = range.to.pit();
 380         ParagraphList const & ps_ = range.text()->paragraphs();
 381         ParagraphList tmp_pars(boost::next(ps_.begin(), startpit),
 382                 boost::next(ps_.begin(), endpit + 1));
 383
 384         // Remove the end of the last paragraph; afterwards, remove the
 385         // beginning of the first paragraph. Keep this order - there may only
 386         // be one paragraph!
 387         Paragraph & back = tmp_pars.back();
 388         back.eraseChars(range.to.pos(), back.size(), false);
 389         Paragraph & front = tmp_pars.front();
 390         front.eraseChars(0, range.from.pos(), false);
 391
 392         pars.insert(pars.begin(), tmp_pars.begin(), tmp_pars.end());
 393 }
 394
 395
 396 static bool equal(Inset const * i_o, Inset const * i_n)
 397 {
 398         if (!i_o || !i_n)
 399                 return false;
 400
 401         // Different types of insets
 402         if (i_o->lyxCode() != i_n->lyxCode())
 403                 return false;
 404
 405         // Editable insets are assumed to be the same as they are of the
 406         // same type. If we later on decide that we insert them in the
 407         // document as being unchanged, we will run the algorithm on the
 408         // contents of the two insets.
 409         // FIXME: This fails if the parameters of the insets differ.
 410         // FIXME: We do not recurse into InsetTabulars.
 411         // FIXME: We need methods inset->equivalent(inset).
 412         if (i_o->editable() && !i_o->asInsetMath()
 413                   && i_o->asInsetText())
 414                 return true;
 415
 416         ostringstream o_os;
 417         ostringstream n_os;
 418         i_o->write(o_os);
 419         i_n->write(n_os);
 420         return o_os.str() == n_os.str();
 421 }
 422
 423
 424 static bool equal(DocIterator & o, DocIterator & n) {
 425         Paragraph const & old_par = o.text()->getPar(o.pit());
 426         Paragraph const & new_par = n.text()->getPar(n.pit());
 427
 428         char_type const c_o = old_par.getChar(o.pos());
 429         char_type const c_n = new_par.getChar(n.pos());
 430         if (c_o != c_n)
 431                 return false;
 432
 433         if (old_par.isInset(o.pos())) {
 434                 Inset const * i_o = old_par.getInset(o.pos());
 435                 Inset const * i_n = new_par.getInset(n.pos());
 436
 437                 if (i_o && i_n)
 438                         return equal(i_o, i_n);
 439         }
 440
 441         Font fo = old_par.getFontSettings(o.buffer()->params(), o.pos());
 442         Font fn = new_par.getFontSettings(n.buffer()->params(), n.pos());
 443         return fo == fn;
 444 }
 445
 446
 447 /// Traverses a snake in a certain direction. p points to a
 448 /// position in the old and new file and they are synchronously
 449 /// moved along the snake. The function returns true if a snake
 450 /// was found.
 451 static bool traverse_snake(DocPair & p, DocRangePair const & range,
 452         Direction direction)
 453 {
 454         bool ret = false;
 455         DocPair const & p_end =
 456                 direction == Forward ? range.to() : range.from();
 457
 458         while (p != p_end) {
 459                 if (direction == Backward)
 460                         --p;
 461                 if (!equal(p.o, p.n)) {
 462                         if (direction == Backward)
 463                                 ++p;
 464                         return ret;
 465                 }
 466                 if (direction == Forward)
 467                         ++p;
 468                 ret = true;
 469         }
 470         return ret;
 471 }
 472
 473
 474 /////////////////////////////////////////////////////////////////////
 475 //
 476 // Compare::Impl
 477 //
 478 /////////////////////////////////////////////////////////////////////
 479
 480 int Compare::Impl::find_middle_snake(DocRangePair const & rp,
 481         DocPair &)
 482 {
 483         N_ = rp.o.length();
 484         M_ = rp.n.length();
 485         return M_ + N_;
 486 }
 487
 488
 489 bool Compare::Impl::diff(Buffer const * new_buf, Buffer const * old_buf,
 490         Buffer const * dest_buf)
 491 {
 492         if (!new_buf || !old_buf || !dest_buf)
 493                 return false;
 494
 495         old_buf_ = old_buf;
 496         new_buf_ = new_buf;
 497         dest_buf_ = dest_buf;
 498         dest_pars_ = &dest_buf->inset().asInsetText()->paragraphs();
 499         dest_pars_->clear();
 500
 501         recursion_level_ = 0;
 502         nested_inset_level_ = 0;
 503
 504         DocRangePair rp(old_buf_, new_buf_);
 505
 506         DocPair from = rp.from();
 507         traverse_snake(from, rp, Forward);
 508         DocRangePair const snake(rp.from(), from);
 509         process_snake(snake);
 510
 511         // Start the recursive algorithm
 512         diff_i(rp);
 513
 514         for (pit_type p = 0; p < (pit_type)dest_pars_->size(); ++p) {
 515                 (*dest_pars_)[p].setBuffer(const_cast<Buffer &>(*dest_buf));
 516                 (*dest_pars_)[p].setInsetOwner(&dest_buf_->inset());
 517         }
 518
 519         return true;
 520 }
 521
 522
 523 void Compare::Impl::diff_i(DocRangePair const & rp)
 524 {
 525         // The middle snake
 526         DocPair middle_snake;
 527
 528         // Divides the problem into two smaller problems, split around
 529         // the snake in the middle.
 530         int const L_ses = find_middle_snake(rp, middle_snake);
 531
 532         // Set maximum of progress bar
 533         if (++recursion_level_ == 1)
 534                 compare_.progressMax(L_ses);
 535
 536         // There are now three possibilities: the strings were the same,
 537         // the strings were completely different, or we found a middle
 538         // snake and we can split the string into two parts to process.
 539         if (L_ses == 0)
 540                 // Two the same strings (this must be a very rare case, because
 541                 // usually this will be part of a snake adjacent to these strings).
 542                 writeToDestBuffer(rp.o);
 543
 544         else if (middle_snake.o.empty()) {
 545                 // Two totally different strings
 546                 writeToDestBuffer(rp.o, Change::DELETED);
 547                 writeToDestBuffer(rp.n, Change::INSERTED);
 548
 549         } else {
 550                 // Retrieve the complete snake
 551                 DocPair first_part_end = middle_snake;
 552                 traverse_snake(first_part_end, rp, Backward);
 553                 DocRangePair first_part(rp.from(), first_part_end);
 554
 555                 DocPair second_part_begin = middle_snake;
 556                 traverse_snake(second_part_begin, rp, Forward);
 557                 DocRangePair second_part(second_part_begin, rp.to());
 558
 559                 // Split the string in three parts:
 560                 // 1. in front of the snake
 561                 diff_part(first_part);
 562
 563                 // 2. the snake itself, and
 564                 DocRangePair const snake(first_part.to(), second_part.from());
 565                 process_snake(snake);
 566
 567                 // 3. behind the snake.
 568                 diff_part(second_part);
 569         }
 570         --recursion_level_;
 571 }
 572
 573
 574 void Compare::Impl::diff_part(DocRangePair const & rp)
 575 {
 576         // Is there a finite length string in both buffers, if not there
 577         // is an empty string and we write the other one to the buffer.
 578         if (!rp.o.empty() && !rp.n.empty())
 579                 diff_i(rp);
 580
 581         else if (!rp.o.empty())
 582                 writeToDestBuffer(rp.o, Change::DELETED);
 583
 584         else if (!rp.n.empty())
 585                 writeToDestBuffer(rp.n, Change::INSERTED);
 586 }
 587
 588
 589 void Compare::Impl::diff_inset(Inset * inset, DocPair const & p)
 590 {
 591         // Find the dociterators for the beginning and the
 592         // end of the inset, for the old and new document.
 593         DocRangePair const rp = stepIntoInset(p);
 594
 595         // Recurse into the inset. Temporarily replace the dest_pars
 596         // paragraph list by the paragraph list of the nested inset.
 597         ParagraphList * backup_dest_pars = dest_pars_;
 598         dest_pars_ = &inset->asInsetText()->text().paragraphs();
 599         dest_pars_->clear();
 600
 601         ++nested_inset_level_;
 602         diff_i(rp);
 603         --nested_inset_level_;
 604
 605         dest_pars_ = backup_dest_pars;
 606 }
 607
 608
 609 void Compare::Impl::process_snake(DocRangePair const & rp)
 610 {
 611         ParagraphList pars;
 612         get_paragraph_list(rp.o, pars);
 613
 614         // Find insets in this paragaph list
 615         DocPair it = rp.from();
 616         for (; it.o < rp.o.to; ++it) {
 617                 Inset * inset = it.o.text()->getPar(it.o.pit()).getInset(it.o.pos());
 618                 if (inset && inset->editable() && inset->asInsetText()) {
 619                         // Find the inset in the paragraph list that will be pasted into
 620                         // the final document. The contents of the inset will be replaced
 621                         // by the output of the algorithm below.
 622                         pit_type const pit = it.o.pit() - rp.o.from.pit();
 623                         pos_type const pos = pit ? it.o.pos() : it.o.pos() - rp.o.from.pos();
 624                         inset = pars[pit].getInset(pos);
 625                         LASSERT(inset, /**/);
 626                         diff_inset(inset, it);
 627                 }
 628         }
 629         writeToDestBuffer(pars);
 630 }
 631
 632
 633 void Compare::Impl::writeToDestBuffer(DocRange const & range,
 634         Change::Type type)
 635 {
 636         ParagraphList pars;
 637         get_paragraph_list(range, pars);
 638
 639         pos_type size = 0;
 640
 641         // Set the change
 642         ParagraphList::iterator it = pars.begin();
 643         for (; it != pars.end(); ++it) {
 644                 it->setChange(Change(type));
 645                 size += it->size();
 646         }
 647
 648         writeToDestBuffer(pars);
 649
 650         if (nested_inset_level_ == 0)
 651                 compare_.progress(size);
 652 }
 653
 654
 655 void Compare::Impl::writeToDestBuffer(ParagraphList const & pars) const
 656 {
 657         pit_type const pit = dest_pars_->size() - 1;
 658         dest_pars_->insert(dest_pars_->end(), pars.begin(), pars.end());
 659         if (pit >= 0)
 660                 mergeParagraph(dest_buf_->params(), *dest_pars_, pit);
 661 }
 662
 663
 664 #include "moc_Compare.cpp"
 665
 666 } // namespace lyx