3 * This file is part of LyX, the document processor.
4 * Licence details can be found in the file COPYING.
6 * \author Vincent van Ravesteijn
8 * Full author contact details are available in file CREDITS.
16 #include "BufferParams.h"
18 #include "CutAndPaste.h"
19 #include "ErrorList.h"
22 #include "insets/InsetText.h"
24 #include "support/docstream.h"
25 #include "support/lassert.h"
26 #include "support/lyxalgo.h"
27 #include "support/qstring_helpers.h"
30 using namespace lyx::support;
42 static void step(DocIterator & dit, Direction direction)
44 if (direction == Forward)
45 dit.top().forwardPos();
47 dit.top().backwardPos();
51 static void step(DocIterator & dit, DocIterator const & end, Direction direction)
59 * A pair of two DocIterators that form a range.
63 DocRange(DocIterator const & from_, DocIterator const & to_)
64 : from(from_), to(to_)
67 DocRange(Buffer const * buf) :
68 from(doc_iterator_begin(buf)),
69 to(doc_iterator_end(buf))
75 Text * text() const { return from.text(); }
77 bool empty() const { return to <= from; }
79 size_t length() const;
81 /// The begin of the range
83 /// The end of the range
88 size_t DocRange::length() const
90 ParagraphList const & ps = from.text()->paragraphs();
92 pit_type pit = from.pit();
93 pit_type const endpit = to.pit();
94 for (; pit < endpit; ++pit)
95 length += ps[pit].size() + 1;
96 length += to.pos() - from.pos();
106 DocPair(DocIterator o_, DocIterator n_)
110 bool operator!=(DocPair const & rhs)
112 // this might not be intuitive but correct for our purpose
113 return o != rhs.o && n != rhs.n;
117 DocPair & operator++()
124 DocPair & operator--()
137 * A pair of two DocRanges.
141 DocRangePair(DocRange const & o_, DocRange const & n_)
145 DocRangePair(DocPair const & from, DocPair const & to)
146 : o(from.o, to.o), n(from.n, to.n)
149 DocRangePair(Buffer const * o_buf, Buffer const * n_buf)
153 /// Returns the from pair
156 return DocPair(o.from, n.from);
159 /// Returns the to pair
162 return DocPair(o.to, n.to);
170 static DocRangePair stepIntoInset(DocPair const & inset_location)
172 DocRangePair rp(inset_location, inset_location);
173 rp.o.from.forwardPos();
174 rp.n.from.forwardPos();
175 step(rp.o.to, Forward);
176 step(rp.n.to, Forward);
177 rp.o.to.backwardPos();
178 rp.n.to.backwardPos();
184 * This class is designed to hold a vector that has both positive as
185 * negative indices. It is internally represented as two vectors, one
186 * for non-zero indices and one for negative indices. In this way, the
187 * vector can grow in both directions.
188 * If an index is not available in the vector, the default value is
189 * returned. If an object is put in the vector beyond its size, the
190 * empty spots in between are also filled with the default value.
198 void reset(T const & def)
205 /// Gets the value at index. If it is not in the vector
206 /// the default value is inserted and returned.
207 T & operator[](int index) {
208 vector<T> & V = index >= 0 ? Vp_ : Vn_;
209 unsigned int const ii = index >= 0 ? index : -index - 1;
210 while (ii >= V.size())
211 V.push_back(default_);
216 /// The vector for positive indices
218 /// The vector for negative indices
220 /// The default value that is inserted in the vector
221 /// if more space is needed
227 * The implementation of the algorithm that does the comparison
228 * between two documents.
230 class Compare::Impl {
233 Impl(Compare const & compare)
234 : abort_(false), n_(0), m_(0), offset_reverse_diagonal_(0),
235 odd_offset_(0), compare_(compare),
236 old_buf_(0), new_buf_(0), dest_buf_(0), dest_pars_(0),
237 recursion_level_(0), nested_inset_level_(0), D_(0)
244 // Algorithm to find the shortest edit string. This algorithm
245 // only needs a linear amount of memory (linear with the sum
246 // of the number of characters in the two paragraph-lists).
247 bool diff(Buffer const * new_buf, Buffer const * old_buf,
248 Buffer const * dest_buf);
250 /// Set to true to cancel the algorithm
257 status += toqstr("recursion level:") + " " + QString::number(recursion_level_)
258 + " " + toqstr("differences:") + " " + QString::number(D_);
263 /// Finds the middle snake and returns the length of the
264 /// shortest edit script.
265 int findMiddleSnake(DocRangePair const & rp, DocPair & middle_snake);
273 /// Retrieve the middle snake when there is overlap between
274 /// the forward and backward path.
275 SnakeResult retrieveMiddleSnake(int k, int D, Direction direction,
276 DocPair & middle_snake);
278 /// Find the furthest reaching D-path (number of horizontal
279 /// and vertical steps; differences between the old and new
280 /// document) in the k-diagonal (vertical minus horizontal steps).
281 void furthestDpathKdiagonal(int D, int k,
282 DocRangePair const & rp, Direction direction);
284 /// Is there overlap between the forward and backward path
285 bool overlap(int k, int D);
287 /// This function is called recursively by a divide and conquer
288 /// algorithm. Each time, the string is divided into two split
289 /// around the middle snake.
290 void diff_i(DocRangePair const & rp);
292 /// Processes the split chunks. It either adds them as deleted,
293 /// as added, or call diff_i for further processing.
294 void diffPart(DocRangePair const & rp);
296 /// Runs the algorithm for the inset located at /c it and /c it_n
297 /// and adds the result to /c pars.
298 void diffInset(Inset * inset, DocPair const & p);
300 /// Adds the snake to the destination buffer. The algorithm will
301 /// recursively be applied to any InsetTexts that are within the snake.
302 void processSnake(DocRangePair const & rp);
304 /// Writes the range to the destination buffer
305 void writeToDestBuffer(DocRange const & range,
306 Change::Type type = Change::UNCHANGED);
308 /// Writes the paragraph list to the destination buffer
309 void writeToDestBuffer(ParagraphList const & copy_pars) const;
311 /// The length of the old chunk currently processed
313 /// The length of the new chunk currently processed
315 /// The offset diagonal of the reverse path of the
316 /// currently processed chunk
317 int offset_reverse_diagonal_;
318 /// Is the offset odd or even ?
321 /// The thread object, used to emit signals to the GUI
322 Compare const & compare_;
324 /// The buffer containing text that will be marked as old
325 Buffer const * old_buf_;
326 /// The buffer containing text that will be marked as new
327 Buffer const * new_buf_;
328 /// The buffer containing text that will be marked as new
329 Buffer const * dest_buf_;
331 /// The paragraph list of the destination buffer
332 ParagraphList * dest_pars_;
334 /// The level of recursion
335 int recursion_level_;
337 /// The number of nested insets at this level
338 int nested_inset_level_;
340 /// The position/snake in the old/new document
341 /// of the forward/reverse search
342 compl_vector<DocIterator> ofp;
343 compl_vector<DocIterator> nfp;
344 compl_vector<DocIterator> ofs;
345 compl_vector<DocIterator> nfs;
346 compl_vector<DocIterator> orp;
347 compl_vector<DocIterator> nrp;
348 compl_vector<DocIterator> ors;
349 compl_vector<DocIterator> nrs;
351 /// The number of differences in the path the algorithm
352 /// is currently processing.
356 /////////////////////////////////////////////////////////////////////
360 /////////////////////////////////////////////////////////////////////
362 Compare::Compare(Buffer const * new_buf, Buffer const * old_buf,
363 Buffer * const dest_buf, CompareOptions const & options)
364 : new_buffer(new_buf), old_buffer(old_buf), dest_buffer(dest_buf),
365 options_(options), pimpl_(new Impl(*this))
367 connect(&status_timer_, SIGNAL(timeout()),
368 this, SLOT(doStatusMessage()));
369 status_timer_.start(1000);
373 void Compare::doStatusMessage()
375 statusMessage(pimpl_->status());
381 if (!dest_buffer || !new_buffer || !old_buffer)
384 // Copy the buffer params to the destination buffer
385 dest_buffer->params() = options_.settings_from_new
386 ? new_buffer->params() : old_buffer->params();
387 // Copy extra authors to the destination buffer
388 AuthorList const & extra_authors = options_.settings_from_new ?
389 old_buffer->params().authors() : new_buffer->params().authors();
390 AuthorList::Authors::const_iterator it = extra_authors.begin();
391 for (; it != extra_authors.end(); ++it)
392 dest_buffer->params().authors().record(*it);
394 // We will need this later
395 DocumentClassConstPtr const olddc =
396 dest_buffer->params().documentClassPtr();
397 // We do not want to share the DocumentClass with the other Buffer.
399 dest_buffer->params().makeDocumentClass();
406 // The comparison routine simply copies the paragraphs over into the
407 // new buffer with the document class from wherever they came from.
408 // So we need to reset the document class of all the paragraphs.
411 cap::switchBetweenClasses(
412 olddc, dest_buffer->params().documentClassPtr(),
413 static_cast<InsetText &>(dest_buffer->inset()), el);
415 finished(pimpl_->abort_);
420 int Compare::doCompare()
422 return pimpl_->diff(new_buffer, old_buffer, dest_buffer);
426 void Compare::abort()
428 pimpl_->abort_ = true;
429 condition_.wakeOne();
431 pimpl_->abort_ = false;
435 static void getParagraphList(DocRange const & range,
436 ParagraphList & pars)
438 // Clone the paragraphs within the selection.
439 pit_type startpit = range.from.pit();
440 pit_type endpit = range.to.pit();
441 ParagraphList const & ps_ = range.text()->paragraphs();
442 ParagraphList tmp_pars(lyx::next(ps_.begin(), startpit),
443 lyx::next(ps_.begin(), endpit + 1));
445 // Remove the end of the last paragraph; afterwards, remove the
446 // beginning of the first paragraph. Keep this order - there may only
448 Paragraph & back = tmp_pars.back();
449 back.eraseChars(range.to.pos(), back.size(), false);
450 Paragraph & front = tmp_pars.front();
451 front.eraseChars(0, range.from.pos(), false);
453 pars.insert(pars.begin(), tmp_pars.begin(), tmp_pars.end());
457 static bool equal(Inset const * i_o, Inset const * i_n)
462 // Different types of insets
463 if (i_o->lyxCode() != i_n->lyxCode())
466 // Editable insets are assumed to be the same as they are of the
467 // same type. If we later on decide that we insert them in the
468 // document as being unchanged, we will run the algorithm on the
469 // contents of the two insets.
470 // FIXME: This fails if the parameters of the insets differ.
471 // FIXME: We do not recurse into InsetTabulars.
472 // FIXME: We need methods inset->equivalent(inset).
473 if (i_o->editable() && !i_o->asInsetMath()
474 && i_o->asInsetText())
481 return o_os.str() == n_os.str();
485 static bool equal(DocIterator & o, DocIterator & n)
487 // Explicitly check for this, so we won't call
488 // Paragraph::getChar for the last pos.
489 bool const o_lastpos = o.pos() == o.lastpos();
490 bool const n_lastpos = n.pos() == n.lastpos();
491 if (o_lastpos || n_lastpos)
492 return o_lastpos && n_lastpos;
494 Paragraph const & old_par = o.text()->getPar(o.pit());
495 Paragraph const & new_par = n.text()->getPar(n.pit());
497 char_type const c_o = old_par.getChar(o.pos());
498 char_type const c_n = new_par.getChar(n.pos());
502 if (old_par.isInset(o.pos())) {
503 Inset const * i_o = old_par.getInset(o.pos());
504 Inset const * i_n = new_par.getInset(n.pos());
507 return equal(i_o, i_n);
510 Font fo = old_par.getFontSettings(o.buffer()->params(), o.pos());
511 Font fn = new_par.getFontSettings(n.buffer()->params(), n.pos());
516 /// Traverses a snake in a certain direction. p points to a
517 /// position in the old and new file and they are synchronously
518 /// moved along the snake. The function returns true if a snake
520 static bool traverseSnake(DocPair & p, DocRangePair const & range,
524 DocPair const & p_end =
525 direction == Forward ? range.to() : range.from();
528 if (direction == Backward)
530 if (!equal(p.o, p.n)) {
531 if (direction == Backward)
535 if (direction == Forward)
543 /////////////////////////////////////////////////////////////////////
547 /////////////////////////////////////////////////////////////////////
550 void Compare::Impl::furthestDpathKdiagonal(int D, int k,
551 DocRangePair const & rp, Direction direction)
553 compl_vector<DocIterator> & op = direction == Forward ? ofp : orp;
554 compl_vector<DocIterator> & np = direction == Forward ? nfp : nrp;
555 compl_vector<DocIterator> & os = direction == Forward ? ofs : ors;
556 compl_vector<DocIterator> & ns = direction == Forward ? nfs : nrs;
558 // A vertical step means stepping one character in the new document.
559 bool vertical_step = k == -D;
560 if (!vertical_step && k != D) {
561 vertical_step = direction == Forward
562 ? op[k - 1] < op[k + 1] : op[k - 1] > op[k + 1];
565 // Where do we take the step from ?
566 int const kk = vertical_step ? k + 1 : k - 1;
567 DocPair p(op[kk], np[kk]);
568 DocPair const s(os[kk], ns[kk]);
570 // If D==0 we simulate a vertical step from (0,-1) by doing nothing.
573 if (vertical_step && direction == Forward)
574 step(p.n, rp.n.to, direction);
575 else if (vertical_step && direction == Backward)
576 step(p.n, rp.n.from, direction);
577 else if (!vertical_step && direction == Forward)
578 step(p.o, rp.o.to, direction);
579 else if (!vertical_step && direction == Backward)
580 step(p.o, rp.o.from, direction);
584 if (traverseSnake(p, rp, direction)) {
589 // Copy last snake from the previous step
594 //Record new position
600 bool Compare::Impl::overlap(int k, int D)
602 // To generalize for the forward and reverse checks
603 int kk = offset_reverse_diagonal_ - k;
605 // Can we have overlap ?
606 if (kk <= D && kk >= -D) {
607 // Do we have overlap ?
609 return ofp[k] >= orp[kk] && nfp[k] >= nrp[kk];
611 return ofp[kk] >= orp[k] && nfp[kk] >= nrp[k];
617 Compare::Impl::SnakeResult Compare::Impl::retrieveMiddleSnake(
618 int k, int D, Direction direction, DocPair & middle_snake)
620 compl_vector<DocIterator> & os = direction == Forward ? ofs : ors;
621 compl_vector<DocIterator> & ns = direction == Forward ? nfs : nrs;
622 compl_vector<DocIterator> & os_r = direction == Forward ? ors : ofs;
623 compl_vector<DocIterator> & ns_r = direction == Forward ? nrs : nfs;
625 // The diagonal while doing the backward search
626 int kk = -k + offset_reverse_diagonal_;
628 // Did we find a snake ?
629 if (os[k].empty() && os_r[kk].empty()) {
630 // No, there is no snake at all, in which case
631 // the length of the shortest edit script is M+N.
632 LATTEST(2 * D - odd_offset_ == m_ + n_);
637 // Yes, but there is only 1 snake and we found it in the
639 middle_snake.o = os_r[kk];
640 middle_snake.n = ns_r[kk];
644 middle_snake.o = os[k];
645 middle_snake.n = ns[k];
650 int Compare::Impl::findMiddleSnake(DocRangePair const & rp,
651 DocPair & middle_snake)
653 // The lengths of the old and new chunks.
657 // Forward paths are centered around the 0-diagonal; reverse paths
658 // are centered around the diagonal N - M. (Delta in the article)
659 offset_reverse_diagonal_ = n_ - m_;
661 // If the offset is odd, only check for overlap while extending forward
662 // paths, otherwise only check while extending reverse paths.
663 odd_offset_ = (offset_reverse_diagonal_ % 2 != 0);
665 ofp.reset(rp.o.from);
666 nfp.reset(rp.n.from);
667 ofs.reset(DocIterator());
668 nfs.reset(DocIterator());
671 ors.reset(DocIterator());
672 nrs.reset(DocIterator());
674 // In the formula below, the "+ 1" ensures we round like ceil()
675 int const D_max = (m_ + n_ + 1)/2;
676 // D is the number of horizontal and vertical steps, i.e.
677 // different characters in the old and new chunk.
678 for (int D = 0; D <= D_max; ++D) {
679 // to be used in the status messages
682 // Forward and reverse paths
683 for (int f = 0; f < 2; ++f) {
684 Direction direction = f == 0 ? Forward : Backward;
686 // Diagonals between -D and D can be reached by a D-path
687 for (int k = -D; k <= D; k += 2) {
688 // Find the furthest reaching D-path on this diagonal
689 furthestDpathKdiagonal(D, k, rp, direction);
691 // Only check for overlap for forward paths if the offset is odd
692 // and only for reverse paths if the offset is even.
693 if (odd_offset_ == (direction == Forward)) {
695 // Do the forward and backward paths overlap ?
696 if (overlap(k, D - odd_offset_)) {
697 retrieveMiddleSnake(k, D, direction, middle_snake);
698 return 2 * D - odd_offset_;
706 // This should never be reached
711 bool Compare::Impl::diff(Buffer const * new_buf, Buffer const * old_buf,
712 Buffer const * dest_buf)
714 if (!new_buf || !old_buf || !dest_buf)
719 dest_buf_ = dest_buf;
720 dest_pars_ = &dest_buf->inset().asInsetText()->paragraphs();
723 recursion_level_ = 0;
724 nested_inset_level_ = 0;
726 DocRangePair rp(old_buf_, new_buf_);
728 DocPair from = rp.from();
729 traverseSnake(from, rp, Forward);
730 DocRangePair const snake(rp.from(), from);
733 // Start the recursive algorithm
734 DocRangePair rp_new(from, rp.to());
735 if (!rp_new.o.empty() || !rp_new.n.empty())
738 for (pit_type p = 0; p < (pit_type)dest_pars_->size(); ++p) {
739 (*dest_pars_)[p].setInsetBuffers(const_cast<Buffer &>(*dest_buf));
740 (*dest_pars_)[p].setInsetOwner(&dest_buf_->inset());
747 void Compare::Impl::diff_i(DocRangePair const & rp)
753 DocPair middle_snake;
755 // Divides the problem into two smaller problems, split around
756 // the snake in the middle.
757 int const L_ses = findMiddleSnake(rp, middle_snake);
759 // Set maximum of progress bar
760 if (++recursion_level_ == 1)
761 compare_.progressMax(L_ses);
763 // There are now three possibilities: the strings were the same,
764 // the strings were completely different, or we found a middle
765 // snake and we can split the string into two parts to process.
767 // Two the same strings (this must be a very rare case, because
768 // usually this will be part of a snake adjacent to these strings).
769 writeToDestBuffer(rp.o);
771 else if (middle_snake.o.empty()) {
772 // Two totally different strings
773 writeToDestBuffer(rp.o, Change::DELETED);
774 writeToDestBuffer(rp.n, Change::INSERTED);
777 // Retrieve the complete snake
778 DocPair first_part_end = middle_snake;
779 traverseSnake(first_part_end, rp, Backward);
780 DocRangePair first_part(rp.from(), first_part_end);
782 DocPair second_part_begin = middle_snake;
783 traverseSnake(second_part_begin, rp, Forward);
784 DocRangePair second_part(second_part_begin, rp.to());
786 // Split the string in three parts:
787 // 1. in front of the snake
788 diffPart(first_part);
790 // 2. the snake itself, and
791 DocRangePair const snake(first_part.to(), second_part.from());
794 // 3. behind the snake.
795 diffPart(second_part);
801 void Compare::Impl::diffPart(DocRangePair const & rp)
803 // Is there a finite length string in both buffers, if not there
804 // is an empty string and we write the other one to the buffer.
805 if (!rp.o.empty() && !rp.n.empty())
808 else if (!rp.o.empty())
809 writeToDestBuffer(rp.o, Change::DELETED);
811 else if (!rp.n.empty())
812 writeToDestBuffer(rp.n, Change::INSERTED);
816 void Compare::Impl::diffInset(Inset * inset, DocPair const & p)
818 // Find the dociterators for the beginning and the
819 // end of the inset, for the old and new document.
820 DocRangePair const rp = stepIntoInset(p);
822 // Recurse into the inset. Temporarily replace the dest_pars
823 // paragraph list by the paragraph list of the nested inset.
824 ParagraphList * backup_dest_pars = dest_pars_;
825 dest_pars_ = &inset->asInsetText()->text().paragraphs();
828 ++nested_inset_level_;
830 --nested_inset_level_;
832 dest_pars_ = backup_dest_pars;
836 void Compare::Impl::processSnake(DocRangePair const & rp)
839 getParagraphList(rp.o, pars);
841 // Find insets in this paragaph list
842 DocPair it = rp.from();
843 for (; it.o < rp.o.to; ++it) {
844 Inset * inset = it.o.text()->getPar(it.o.pit()).getInset(it.o.pos());
845 if (inset && inset->editable() && inset->asInsetText()) {
846 // Find the inset in the paragraph list that will be pasted into
847 // the final document. The contents of the inset will be replaced
848 // by the output of the algorithm below.
849 pit_type const pit = it.o.pit() - rp.o.from.pit();
850 pos_type const pos = pit ? it.o.pos() : it.o.pos() - rp.o.from.pos();
851 inset = pars[pit].getInset(pos);
852 LASSERT(inset, continue);
853 diffInset(inset, it);
856 writeToDestBuffer(pars);
860 void Compare::Impl::writeToDestBuffer(DocRange const & range,
864 getParagraphList(range, pars);
869 ParagraphList::iterator it = pars.begin();
870 for (; it != pars.end(); ++it) {
871 it->setChange(Change(type));
875 writeToDestBuffer(pars);
877 if (nested_inset_level_ == 0)
878 compare_.progress(size);
882 void Compare::Impl::writeToDestBuffer(ParagraphList const & pars) const
884 pit_type const pit = dest_pars_->size() - 1;
885 dest_pars_->insert(dest_pars_->end(), pars.begin(), pars.end());
887 mergeParagraph(dest_buf_->params(), *dest_pars_, pit);
891 #include "moc_Compare.cpp"