3 * This file is part of LyX, the document processor.
4 * Licence details can be found in the file COPYING.
6 * \author Vincent van Ravesteijn
8 * Full author contact details are available in file CREDITS.
15 #include "BufferParams.h"
18 #include "insets/InsetText.h"
20 #include "support/lassert.h"
21 #include "support/qstring_helpers.h"
23 #include <boost/next_prior.hpp>
26 using namespace lyx::support;
38 static void step(DocIterator & dit, Direction direction)
40 if (direction == Forward)
41 dit.top().forwardPos();
43 dit.top().backwardPos();
47 static void step(DocIterator & dit, DocIterator const & end, Direction direction)
55 * A pair of two DocIterators that form a range.
59 DocRange(DocIterator from_, DocIterator to_)
60 : from(from_), to(to_)
63 DocRange(Buffer const * buf)
65 from = doc_iterator_begin(buf);
66 to = doc_iterator_end(buf);
71 Text * text() const { return from.text(); }
73 bool empty() const { return to <= from; }
75 size_t length() const;
77 /// The begin of the range
79 /// The end of the range
84 size_t DocRange::length() const
86 ParagraphList const & ps = from.text()->paragraphs();
88 pit_type pit = from.pit();
89 pit_type const endpit = to.pit();
90 for (; pit < endpit; ++pit)
91 length += ps[pit].size() + 1;
92 length += to.pos() - from.pos();
101 DocPair(DocIterator o_, DocIterator n_)
105 bool operator!=(DocPair const & rhs) {
106 // this might not be intuitive but correct for our purpose
107 return o != rhs.o && n != rhs.n;
111 DocPair & operator++()
118 DocPair & operator--()
131 * A pair of two DocRanges.
135 DocRangePair(DocRange o_, DocRange n_)
139 DocRangePair(DocPair from, DocPair to)
140 : o(from.o, to.o), n(from.n, to.n)
143 DocRangePair(Buffer const * o_buf, Buffer const * n_buf)
147 /// Returns the from pair
148 DocPair from() const { return DocPair(o.from, n.from); }
150 /// Returns the to pair
151 DocPair to() const { return DocPair(o.to, n.to); }
158 static DocRangePair stepIntoInset(DocPair const & inset_location)
160 DocRangePair rp(inset_location, inset_location);
161 rp.o.from.forwardPos();
162 rp.n.from.forwardPos();
163 step(rp.o.to, Forward);
164 step(rp.n.to, Forward);
165 rp.o.to.backwardPos();
166 rp.n.to.backwardPos();
172 * This class is designed to hold a vector that has both positive as
173 * negative indices. It is internally represented as two vectors, one
174 * for non-zero indices and one for negative indices. In this way, the
175 * vector can grow in both directions.
176 * If an index is not available in the vector, the default value is
177 * returned. If an object is put in the vector beyond its size, the
178 * empty spots in between are also filled with the default value.
185 void reset(T const & def)
192 /// Gets the value at index. If it is not in the vector
193 /// the default value is inserted and returned.
194 T & operator[](int index) {
195 vector<T> & V = index >= 0 ? Vp_ : Vn_;
196 unsigned int const ii = index >= 0 ? index : -index - 1;
197 while (ii >= V.size())
198 V.push_back(default_);
203 /// The vector for positive indices
205 /// The vector for negative indices
207 /// The default value that is inserted in the vector
208 /// if more space is needed
214 * The implementation of the algorithm that does the comparison
215 * between two documents.
217 class Compare::Impl {
220 Impl(Compare const & compare)
221 : abort_(false), compare_(compare), recursion_level_(0), D_(0)
227 // Algorithm to find the shortest edit string. This algorithm
228 // only needs a linear amount of memory (linear with the sum
229 // of the number of characters in the two paragraph-lists).
230 bool diff(Buffer const * new_buf, Buffer const * old_buf,
231 Buffer const * dest_buf);
233 /// Set to true to cancel the algorithm
239 status += toqstr("recursion level:") + " " + QString::number(recursion_level_)
240 + " " + toqstr("differences:") + " " + QString::number(D_);
245 /// Finds the middle snake and returns the length of the
246 /// shortest edit script.
247 int findMiddleSnake(DocRangePair const & rp, DocPair & middle_snake);
255 /// Retrieve the middle snake when there is overlap between
256 /// the forward and backward path.
257 SnakeResult retrieveMiddleSnake(int k, int D, Direction direction,
258 DocPair & middle_snake);
260 /// Find the the furthest reaching D-path (number of horizontal
261 /// and vertical steps; differences between the old and new
262 /// document) in the k-diagonal (vertical minus horizontal steps).
263 void furthestDpathKdiagonal(int D, int k,
264 DocRangePair const & rp, Direction direction);
266 /// Is there overlap between the forward and backward path
267 bool overlap(int k, int D);
269 /// This function is called recursively by a divide and conquer
270 /// algorithm. Each time, the string is divided into two split
271 /// around the middle snake.
272 void diff_i(DocRangePair const & rp);
274 /// Processes the splitted chunks. It either adds them as deleted,
275 /// as added, or call diff_i for further processing.
276 void diffPart(DocRangePair const & rp);
278 /// Runs the algorithm for the inset located at /c it and /c it_n
279 /// and adds the result to /c pars.
280 void diffInset(Inset * inset, DocPair const & p);
282 /// Adds the snake to the destination buffer. The algorithm will
283 /// recursively be applied to any InsetTexts that are within the snake.
284 void processSnake(DocRangePair const & rp);
286 /// Writes the range to the destination buffer
287 void writeToDestBuffer(DocRange const & range,
288 Change::Type type = Change::UNCHANGED);
290 /// Writes the paragraph list to the destination buffer
291 void writeToDestBuffer(ParagraphList const & copy_pars) const;
293 /// The length of the old chunk currently processed
295 /// The length of the new chunk currently processed
297 /// The offset diagonal of the reverse path of the
298 /// currently processed chunk
299 int offset_reverse_diagonal_;
300 /// Is the offset odd or even ?
303 /// The thread object, used to emit signals to the GUI
304 Compare const & compare_;
306 /// The buffer containing text that will be marked as old
307 Buffer const * old_buf_;
308 /// The buffer containing text that will be marked as new
309 Buffer const * new_buf_;
310 /// The buffer containing text that will be marked as new
311 Buffer const * dest_buf_;
313 /// The paragraph list of the destination buffer
314 ParagraphList * dest_pars_;
316 /// The level of recursion
317 int recursion_level_;
319 /// The number of nested insets at this level
320 int nested_inset_level_;
322 /// The position/snake in the old/new document
323 /// of the forward/reverse search
324 compl_vector<DocIterator> ofp;
325 compl_vector<DocIterator> nfp;
326 compl_vector<DocIterator> ofs;
327 compl_vector<DocIterator> nfs;
328 compl_vector<DocIterator> orp;
329 compl_vector<DocIterator> nrp;
330 compl_vector<DocIterator> ors;
331 compl_vector<DocIterator> nrs;
333 /// The number of differences in the path the algorithm
334 /// is currently processing.
338 /////////////////////////////////////////////////////////////////////
342 /////////////////////////////////////////////////////////////////////
344 Compare::Compare(Buffer const * new_buf, Buffer const * old_buf,
345 Buffer * const dest_buf, CompareOptions const & options)
346 : new_buffer(new_buf), old_buffer(old_buf), dest_buffer(dest_buf),
347 options_(options), pimpl_(new Impl(*this))
349 connect(&status_timer_, SIGNAL(timeout()),
350 this, SLOT(doStatusMessage()));
351 status_timer_.start(1000);
355 void Compare::doStatusMessage()
357 statusMessage(pimpl_->status());
363 if (!dest_buffer || !new_buffer || !old_buffer)
366 // Copy the buffer params to the new buffer
367 dest_buffer->params() = options_.settings_from_new
368 ? new_buffer->params() : old_buffer->params();
376 finished(pimpl_->abort_);
381 int Compare::doCompare()
383 return pimpl_->diff(new_buffer, old_buffer, dest_buffer);
387 void Compare::abort()
389 pimpl_->abort_ = true;
390 condition_.wakeOne();
392 pimpl_->abort_ = false;
396 static void getParagraphList(DocRange const & range,
397 ParagraphList & pars)
399 // Clone the paragraphs within the selection.
400 pit_type startpit = range.from.pit();
401 pit_type endpit = range.to.pit();
402 ParagraphList const & ps_ = range.text()->paragraphs();
403 ParagraphList tmp_pars(boost::next(ps_.begin(), startpit),
404 boost::next(ps_.begin(), endpit + 1));
406 // Remove the end of the last paragraph; afterwards, remove the
407 // beginning of the first paragraph. Keep this order - there may only
409 Paragraph & back = tmp_pars.back();
410 back.eraseChars(range.to.pos(), back.size(), false);
411 Paragraph & front = tmp_pars.front();
412 front.eraseChars(0, range.from.pos(), false);
414 pars.insert(pars.begin(), tmp_pars.begin(), tmp_pars.end());
418 static bool equal(Inset const * i_o, Inset const * i_n)
423 // Different types of insets
424 if (i_o->lyxCode() != i_n->lyxCode())
427 // Editable insets are assumed to be the same as they are of the
428 // same type. If we later on decide that we insert them in the
429 // document as being unchanged, we will run the algorithm on the
430 // contents of the two insets.
431 // FIXME: This fails if the parameters of the insets differ.
432 // FIXME: We do not recurse into InsetTabulars.
433 // FIXME: We need methods inset->equivalent(inset).
434 if (i_o->editable() && !i_o->asInsetMath()
435 && i_o->asInsetText())
442 return o_os.str() == n_os.str();
446 static bool equal(DocIterator & o, DocIterator & n) {
447 Paragraph const & old_par = o.text()->getPar(o.pit());
448 Paragraph const & new_par = n.text()->getPar(n.pit());
450 char_type const c_o = old_par.getChar(o.pos());
451 char_type const c_n = new_par.getChar(n.pos());
455 if (old_par.isInset(o.pos())) {
456 Inset const * i_o = old_par.getInset(o.pos());
457 Inset const * i_n = new_par.getInset(n.pos());
460 return equal(i_o, i_n);
463 Font fo = old_par.getFontSettings(o.buffer()->params(), o.pos());
464 Font fn = new_par.getFontSettings(n.buffer()->params(), n.pos());
469 /// Traverses a snake in a certain direction. p points to a
470 /// position in the old and new file and they are synchronously
471 /// moved along the snake. The function returns true if a snake
473 static bool traverseSnake(DocPair & p, DocRangePair const & range,
477 DocPair const & p_end =
478 direction == Forward ? range.to() : range.from();
481 if (direction == Backward)
483 if (!equal(p.o, p.n)) {
484 if (direction == Backward)
488 if (direction == Forward)
496 /////////////////////////////////////////////////////////////////////
500 /////////////////////////////////////////////////////////////////////
503 void Compare::Impl::furthestDpathKdiagonal(int D, int k,
504 DocRangePair const & rp, Direction direction)
506 compl_vector<DocIterator> & op = direction == Forward ? ofp : orp;
507 compl_vector<DocIterator> & np = direction == Forward ? nfp : nrp;
508 compl_vector<DocIterator> & os = direction == Forward ? ofs : ors;
509 compl_vector<DocIterator> & ns = direction == Forward ? nfs : nrs;
511 // A vertical step means stepping one character in the new document.
512 bool vertical_step = k == -D;
513 if (!vertical_step && k != D) {
514 vertical_step = direction == Forward
515 ? op[k - 1] < op[k + 1] : op[k - 1] > op[k + 1];
518 // Where do we take the step from ?
519 int const kk = vertical_step ? k + 1 : k - 1;
520 DocPair p(op[kk], np[kk]);
522 // If D==0 we simulate a vertical step from (0,-1) by doing nothing.
525 if (vertical_step && direction == Forward)
526 step(p.n, rp.n.to, direction);
527 else if (vertical_step && direction == Backward)
528 step(p.n, rp.n.from, direction);
529 else if (!vertical_step && direction == Forward)
530 step(p.o, rp.o.to, direction);
531 else if (!vertical_step && direction == Backward)
532 step(p.o, rp.o.from, direction);
536 if (traverseSnake(p, rp, direction)) {
541 // Copy last snake from the previous step
546 //Record new position
552 bool Compare::Impl::overlap(int k, int D)
554 // To generalize for the forward and reverse checks
555 int kk = offset_reverse_diagonal_ - k;
557 // Can we have overlap ?
558 if (kk <= D && kk >= -D) {
559 // Do we have overlap ?
561 return ofp[k] >= orp[kk] && nfp[k] >= nrp[kk];
563 return ofp[kk] >= orp[k] && nfp[kk] >= nrp[k];
569 Compare::Impl::SnakeResult Compare::Impl::retrieveMiddleSnake(
570 int k, int D, Direction direction, DocPair & middle_snake)
572 compl_vector<DocIterator> & os = direction == Forward ? ofs : ors;
573 compl_vector<DocIterator> & ns = direction == Forward ? nfs : nrs;
574 compl_vector<DocIterator> & os_r = direction == Forward ? ors : ofs;
575 compl_vector<DocIterator> & ns_r = direction == Forward ? nrs : nfs;
577 // The diagonal while doing the backward search
578 int kk = -k + offset_reverse_diagonal_;
580 // Did we find a snake ?
581 if (os[k].empty() && os_r[kk].empty()) {
582 // No, there is no snake at all, in which case
583 // the length of the shortest edit script is M+N.
584 LASSERT(2 * D - odd_offset_ == M_ + N_, /**/);
589 // Yes, but there is only 1 snake and we found it in the
591 middle_snake.o = os_r[kk];
592 middle_snake.n = ns_r[kk];
596 middle_snake.o = os[k];
597 middle_snake.n = ns[k];
602 int Compare::Impl::findMiddleSnake(DocRangePair const & rp,
603 DocPair & middle_snake)
605 // The lengths of the old and new chunks.
609 // Forward paths are centered around the 0-diagonal; reverse paths
610 // are centered around the diagonal N - M. (Delta in the article)
611 offset_reverse_diagonal_ = N_ - M_;
613 // If the offset is odd, only check for overlap while extending forward
614 // paths, otherwise only check while extending reverse paths.
615 odd_offset_ = (offset_reverse_diagonal_ % 2 != 0);
617 ofp.reset(rp.o.from);
618 nfp.reset(rp.n.from);
619 ofs.reset(DocIterator());
620 nfs.reset(DocIterator());
623 ors.reset(DocIterator());
624 nrs.reset(DocIterator());
626 // In the formula below, the "+ 1" ensures we round like ceil()
627 int const D_max = (M_ + N_ + 1)/2;
628 // D is the number of horizontal and vertical steps, i.e.
629 // different characters in the old and new chunk.
630 for (int D = 0; D <= D_max; ++D) {
631 // to be used in the status messages
634 // Forward and reverse paths
635 for (int f = 0; f < 2; ++f) {
636 Direction direction = f == 0 ? Forward : Backward;
638 // Diagonals between -D and D can be reached by a D-path
639 for (int k = -D; k <= D; k += 2) {
640 // Find the furthest reaching D-path on this diagonal
641 furthestDpathKdiagonal(D, k, rp, direction);
643 // Only check for overlap for forward paths if the offset is odd
644 // and only for reverse paths if the offset is even.
645 if (odd_offset_ == (direction == Forward)) {
647 // Do the forward and backward paths overlap ?
648 if (overlap(k, D - odd_offset_)) {
649 retrieveMiddleSnake(k, D, direction, middle_snake);
650 return 2 * D - odd_offset_;
658 // This should never be reached
663 bool Compare::Impl::diff(Buffer const * new_buf, Buffer const * old_buf,
664 Buffer const * dest_buf)
666 if (!new_buf || !old_buf || !dest_buf)
671 dest_buf_ = dest_buf;
672 dest_pars_ = &dest_buf->inset().asInsetText()->paragraphs();
675 recursion_level_ = 0;
676 nested_inset_level_ = 0;
678 DocRangePair rp(old_buf_, new_buf_);
680 DocPair from = rp.from();
681 traverseSnake(from, rp, Forward);
682 DocRangePair const snake(rp.from(), from);
685 // Start the recursive algorithm
688 for (pit_type p = 0; p < (pit_type)dest_pars_->size(); ++p) {
689 (*dest_pars_)[p].setBuffer(const_cast<Buffer &>(*dest_buf));
690 (*dest_pars_)[p].setInsetOwner(&dest_buf_->inset());
697 void Compare::Impl::diff_i(DocRangePair const & rp)
703 DocPair middle_snake;
705 // Divides the problem into two smaller problems, split around
706 // the snake in the middle.
707 int const L_ses = findMiddleSnake(rp, middle_snake);
709 // Set maximum of progress bar
710 if (++recursion_level_ == 1)
711 compare_.progressMax(L_ses);
713 // There are now three possibilities: the strings were the same,
714 // the strings were completely different, or we found a middle
715 // snake and we can split the string into two parts to process.
717 // Two the same strings (this must be a very rare case, because
718 // usually this will be part of a snake adjacent to these strings).
719 writeToDestBuffer(rp.o);
721 else if (middle_snake.o.empty()) {
722 // Two totally different strings
723 writeToDestBuffer(rp.o, Change::DELETED);
724 writeToDestBuffer(rp.n, Change::INSERTED);
727 // Retrieve the complete snake
728 DocPair first_part_end = middle_snake;
729 traverseSnake(first_part_end, rp, Backward);
730 DocRangePair first_part(rp.from(), first_part_end);
732 DocPair second_part_begin = middle_snake;
733 traverseSnake(second_part_begin, rp, Forward);
734 DocRangePair second_part(second_part_begin, rp.to());
736 // Split the string in three parts:
737 // 1. in front of the snake
738 diffPart(first_part);
740 // 2. the snake itself, and
741 DocRangePair const snake(first_part.to(), second_part.from());
744 // 3. behind the snake.
745 diffPart(second_part);
751 void Compare::Impl::diffPart(DocRangePair const & rp)
753 // Is there a finite length string in both buffers, if not there
754 // is an empty string and we write the other one to the buffer.
755 if (!rp.o.empty() && !rp.n.empty())
758 else if (!rp.o.empty())
759 writeToDestBuffer(rp.o, Change::DELETED);
761 else if (!rp.n.empty())
762 writeToDestBuffer(rp.n, Change::INSERTED);
766 void Compare::Impl::diffInset(Inset * inset, DocPair const & p)
768 // Find the dociterators for the beginning and the
769 // end of the inset, for the old and new document.
770 DocRangePair const rp = stepIntoInset(p);
772 // Recurse into the inset. Temporarily replace the dest_pars
773 // paragraph list by the paragraph list of the nested inset.
774 ParagraphList * backup_dest_pars = dest_pars_;
775 dest_pars_ = &inset->asInsetText()->text().paragraphs();
778 ++nested_inset_level_;
780 --nested_inset_level_;
782 dest_pars_ = backup_dest_pars;
786 void Compare::Impl::processSnake(DocRangePair const & rp)
789 getParagraphList(rp.o, pars);
791 // Find insets in this paragaph list
792 DocPair it = rp.from();
793 for (; it.o < rp.o.to; ++it) {
794 Inset * inset = it.o.text()->getPar(it.o.pit()).getInset(it.o.pos());
795 if (inset && inset->editable() && inset->asInsetText()) {
796 // Find the inset in the paragraph list that will be pasted into
797 // the final document. The contents of the inset will be replaced
798 // by the output of the algorithm below.
799 pit_type const pit = it.o.pit() - rp.o.from.pit();
800 pos_type const pos = pit ? it.o.pos() : it.o.pos() - rp.o.from.pos();
801 inset = pars[pit].getInset(pos);
802 LASSERT(inset, /**/);
803 diffInset(inset, it);
806 writeToDestBuffer(pars);
810 void Compare::Impl::writeToDestBuffer(DocRange const & range,
814 getParagraphList(range, pars);
819 ParagraphList::iterator it = pars.begin();
820 for (; it != pars.end(); ++it) {
821 it->setChange(Change(type));
825 writeToDestBuffer(pars);
827 if (nested_inset_level_ == 0)
828 compare_.progress(size);
832 void Compare::Impl::writeToDestBuffer(ParagraphList const & pars) const
834 pit_type const pit = dest_pars_->size() - 1;
835 dest_pars_->insert(dest_pars_->end(), pars.begin(), pars.end());
837 mergeParagraph(dest_buf_->params(), *dest_pars_, pit);
841 #include "moc_Compare.cpp"