3 * This file is part of LyX, the document processor.
4 * Licence details can be found in the file COPYING.
6 * \author Vincent van Ravesteijn
8 * Full author contact details are available in file CREDITS.
15 #include "BufferParams.h"
18 #include "insets/InsetText.h"
20 #include "support/lassert.h"
22 #include <boost/next_prior.hpp>
26 using namespace lyx::support;
38 static void step(DocIterator & dit, Direction direction)
40 if (direction == Forward)
41 dit.top().forwardPos();
43 dit.top().backwardPos();
47 static void step(DocIterator & dit, DocIterator const & end, Direction direction)
55 * A pair of two DocIterators that form a range.
59 DocRange(DocIterator from_, DocIterator to_)
60 : from(from_), to(to_)
63 DocRange(Buffer const * buf)
65 from = doc_iterator_begin(buf);
66 to = doc_iterator_end(buf);
71 Text * text() const { return from.text(); }
73 bool empty() const { return to <= from; }
75 size_t length() const;
77 /// The begin of the range
79 /// The end of the range
84 size_t DocRange::length() const
86 pit_type startpit = from.pit();
87 pit_type endpit = to.pit();
88 ParagraphList const & ps_ = from.text()->paragraphs();
90 ParagraphList pars(boost::next(ps_.begin(), startpit),
91 boost::next(ps_.begin(), endpit + 1));
93 // Remove the end of the last paragraph; afterwards, remove the
94 // beginning of the first paragraph.
95 Paragraph & back = pars.back();
96 back.eraseChars(to.pos(), back.size(), false);
97 Paragraph & front = pars.front();
98 front.eraseChars(0, from.pos(), false);
100 ParagraphList::const_iterator pit = pars.begin();
101 ParagraphList::const_iterator end_it = pars.end();
104 for (; pit != end_it; ++pit)
105 length += pit->size() + 1;
107 // The last paragraph has no paragraph-end
117 DocPair(DocIterator o_, DocIterator n_)
121 bool operator!=(DocPair const & rhs) {
122 // this might not be intuitive but correct for our purpose
123 return o != rhs.o && n != rhs.n;
127 DocPair & operator++()
134 DocPair & operator--()
147 * A pair of two DocRanges.
151 DocRangePair(DocRange o_, DocRange n_)
155 DocRangePair(DocPair from, DocPair to)
156 : o(from.o, to.o), n(from.n, to.n)
159 DocRangePair(Buffer const * o_buf, Buffer const * n_buf)
163 /// Returns the from pair
164 DocPair from() const { return DocPair(o.from, n.from); }
166 /// Returns the to pair
167 DocPair to() const { return DocPair(o.to, n.to); }
174 static DocRangePair stepIntoInset(DocPair const & inset_location)
176 DocRangePair rp(inset_location, inset_location);
177 rp.o.from.forwardPos();
178 rp.n.from.forwardPos();
179 step(rp.o.to, Forward);
180 step(rp.n.to, Forward);
181 rp.o.to.backwardPos();
182 rp.n.to.backwardPos();
188 * This class is designed to hold a vector that has both positive as
189 * negative indices. It is internally represented as two vectors, one
190 * for non-zero indices and one for negative indices. In this way, the
191 * vector can grow in both directions.
192 * If an index is not available in the vector, the default value is
193 * returned. If an object is put in the vector beyond its size, the
194 * empty spots in between are also filled with the default value.
201 void reset(T const & def)
209 /// Gets the value at index. If it is not in the vector
210 /// the default value is returned.
212 if (-index <= int(Vn_.size()) && index < int(Vp_.size()))
213 return index >= 0 ? Vp_[index] : Vn_[-index-1];
218 /// Sets the value at index if it already
219 /// is in the vector. Otherwise it will be added to the
220 /// end padded with the default value.
221 void set(int index, T const & t) {
222 if (index >= -int(Vn_.size()) && index < int(Vp_.size())) {
228 while (index > int(Vp_.size()))
229 Vp_.push_back(default_);
230 while (index < -int(Vn_.size()) - 1)
231 Vn_.push_back(default_);
241 /// The vector for positive indices
243 /// The vector for negative indices
245 /// The default value that is inserted in the vector
246 /// if more space is needed
252 * The implementation of the algorithm that does the comparison
253 * between two documents.
255 class Compare::Impl {
258 Impl(Compare const & compare)
259 : abort_(false), compare_(compare)
265 // Algorithm to find the shortest edit string. This algorithm
266 // only needs a linear amount of memory (linear with the sum
267 // of the number of characters in the two paragraph-lists).
268 bool diff(Buffer const * new_buf, Buffer const * old_buf,
269 Buffer const * dest_buf);
271 /// Set to true to cancel the algorithm
275 /// Finds the middle snake and returns the length of the
276 /// shortest edit script.
277 int find_middle_snake(DocRangePair const & rp, DocPair & middle_snake);
285 /// Retrieve the middle snake when there is overlap between
286 /// the forward and backward path.
287 SnakeResult retrieve_middle_snake(int k, int D, Direction direction,
288 DocPair & middle_snake);
290 /// Find the the furthest reaching D-path (number of horizontal
291 /// and vertical steps; differences between the old and new
292 /// document) in the k-diagonal (vertical minus horizontal steps).
293 void furthest_Dpath_kdiagonal(int D, int k,
294 DocRangePair const & rp, Direction direction);
296 /// Is there overlap between the forward and backward path
297 bool overlap(int k, int D);
299 /// This function is called recursively by a divide and conquer
300 /// algorithm. Each time, the string is divided into two split
301 /// around the middle snake.
302 void diff_i(DocRangePair const & rp);
304 /// Processes the splitted chunks. It either adds them as deleted,
305 /// as added, or call diff_i for further processing.
306 void diff_part(DocRangePair const & rp);
308 /// Runs the algorithm for the inset located at /c it and /c it_n
309 /// and adds the result to /c pars.
310 void diff_inset(Inset * inset, DocPair const & p);
312 /// Adds the snake to the destination buffer. The algorithm will
313 /// recursively be applied to any InsetTexts that are within the snake.
314 void process_snake(DocRangePair const & rp);
316 /// Writes the range to the destination buffer
317 void writeToDestBuffer(DocRange const & range,
318 Change::Type type = Change::UNCHANGED);
320 /// Writes the paragraph list to the destination buffer
321 void writeToDestBuffer(ParagraphList const & copy_pars) const;
323 /// The length of the old chunk currently processed
325 /// The length of the new chunk currently processed
327 /// The offset diagonal of the reverse path of the
328 /// currently processed chunk
329 int offset_reverse_diagonal_;
330 /// Is the offset odd or even ?
333 /// The thread object, used to emit signals to the GUI
334 Compare const & compare_;
336 /// The buffer containing text that will be marked as old
337 Buffer const * old_buf_;
338 /// The buffer containing text that will be marked as new
339 Buffer const * new_buf_;
340 /// The buffer containing text that will be marked as new
341 Buffer const * dest_buf_;
343 /// The paragraph list of the destination buffer
344 ParagraphList * dest_pars_;
346 /// The level of recursion
347 int recursion_level_;
349 /// The number of nested insets at this level
350 int nested_inset_level_;
352 /// The position/snake in the old/new document
353 /// of the forward/reverse search
354 compl_vector<DocIterator> ofp;
355 compl_vector<DocIterator> nfp;
356 compl_vector<DocIterator> ofs;
357 compl_vector<DocIterator> nfs;
358 compl_vector<DocIterator> orp;
359 compl_vector<DocIterator> nrp;
360 compl_vector<DocIterator> ors;
361 compl_vector<DocIterator> nrs;
364 /////////////////////////////////////////////////////////////////////
368 /////////////////////////////////////////////////////////////////////
370 Compare::Compare(Buffer const * new_buf, Buffer const * old_buf,
371 Buffer * const dest_buf, CompareOptions const & options)
372 : new_buffer(new_buf), old_buffer(old_buf), dest_buffer(dest_buf),
373 options_(options), pimpl_(new Impl(*this))
380 if (!dest_buffer || !new_buffer || !old_buffer)
383 // Copy the buffer params to the new buffer
384 dest_buffer->params() = options_.settings_from_new
385 ? new_buffer->params() : old_buffer->params();
391 finished(pimpl_->abort_);
396 int Compare::doCompare()
398 return pimpl_->diff(new_buffer, old_buffer, dest_buffer);
402 void Compare::abort()
404 pimpl_->abort_ = true;
405 condition_.wakeOne();
407 pimpl_->abort_ = false;
411 static void get_paragraph_list(DocRange const & range,
412 ParagraphList & pars)
414 // Clone the paragraphs within the selection.
415 pit_type startpit = range.from.pit();
416 pit_type endpit = range.to.pit();
417 ParagraphList const & ps_ = range.text()->paragraphs();
418 ParagraphList tmp_pars(boost::next(ps_.begin(), startpit),
419 boost::next(ps_.begin(), endpit + 1));
421 // Remove the end of the last paragraph; afterwards, remove the
422 // beginning of the first paragraph. Keep this order - there may only
424 Paragraph & back = tmp_pars.back();
425 back.eraseChars(range.to.pos(), back.size(), false);
426 Paragraph & front = tmp_pars.front();
427 front.eraseChars(0, range.from.pos(), false);
429 pars.insert(pars.begin(), tmp_pars.begin(), tmp_pars.end());
433 static bool equal(Inset const * i_o, Inset const * i_n)
438 // Different types of insets
439 if (i_o->lyxCode() != i_n->lyxCode())
442 // Editable insets are assumed to be the same as they are of the
443 // same type. If we later on decide that we insert them in the
444 // document as being unchanged, we will run the algorithm on the
445 // contents of the two insets.
446 // FIXME: This fails if the parameters of the insets differ.
447 // FIXME: We do not recurse into InsetTabulars.
448 // FIXME: We need methods inset->equivalent(inset).
449 if (i_o->editable() && !i_o->asInsetMath()
450 && i_o->asInsetText())
457 return o_os.str() == n_os.str();
461 static bool equal(DocIterator & o, DocIterator & n) {
462 Paragraph const & old_par = o.text()->getPar(o.pit());
463 Paragraph const & new_par = n.text()->getPar(n.pit());
465 char_type const c_o = old_par.getChar(o.pos());
466 char_type const c_n = new_par.getChar(n.pos());
470 if (old_par.isInset(o.pos())) {
471 Inset const * i_o = old_par.getInset(o.pos());
472 Inset const * i_n = new_par.getInset(n.pos());
475 return equal(i_o, i_n);
478 Font fo = old_par.getFontSettings(o.buffer()->params(), o.pos());
479 Font fn = new_par.getFontSettings(n.buffer()->params(), n.pos());
484 /// Traverses a snake in a certain direction. p points to a
485 /// position in the old and new file and they are synchronously
486 /// moved along the snake. The function returns true if a snake
488 static bool traverse_snake(DocPair & p, DocRangePair const & range,
492 DocPair const & p_end =
493 direction == Forward ? range.to() : range.from();
496 if (direction == Backward)
498 if (!equal(p.o, p.n)) {
499 if (direction == Backward)
503 if (direction == Forward)
511 /////////////////////////////////////////////////////////////////////
515 /////////////////////////////////////////////////////////////////////
518 void Compare::Impl::furthest_Dpath_kdiagonal(int D, int k,
519 DocRangePair const & rp, Direction direction)
521 compl_vector<DocIterator> * op = direction == Forward ? &ofp : &orp;
522 compl_vector<DocIterator> * np = direction == Forward ? &nfp : &nrp;
523 compl_vector<DocIterator> * os = direction == Forward ? &ofs : &ors;
524 compl_vector<DocIterator> * ns = direction == Forward ? &nfs : &nrs;
526 // A vertical step means stepping one character in the new document.
527 bool vertical_step = k == -D;
528 if (!vertical_step && k != D) {
529 vertical_step = direction == Forward
530 ? op->get(k - 1) < op->get(k + 1)
531 : op->get(k - 1) > op->get(k + 1);
534 // Where do we take the step from ?
535 int const kk = vertical_step ? k + 1 : k - 1;
536 DocPair p(op->get(kk), np->get(kk));
538 // If D==0 we simulate a vertical step from (0,-1) by doing nothing.
541 if (vertical_step && direction == Forward)
542 step(p.n, rp.n.to, direction);
543 else if (vertical_step && direction == Backward)
544 step(p.n, rp.n.from, direction);
545 else if (!vertical_step && direction == Forward)
546 step(p.o, rp.o.to, direction);
547 else if (!vertical_step && direction == Backward)
548 step(p.o, rp.o.from, direction);
552 if (traverse_snake(p, rp, direction)) {
557 // Copy last snake from the previous step
558 os->set(k, os->get(kk));
559 ns->set(k, ns->get(kk));
562 //Record new position
568 bool Compare::Impl::overlap(int k, int D)
570 // To generalize for the forward and reverse checks
571 int kk = offset_reverse_diagonal_ - k;
573 // Can we have overlap ?
574 if (kk <= D && kk >= -D) {
575 // Do we have overlap ?
577 return ofp.get(k) >= orp.get(kk) && nfp.get(k) >= nrp.get(kk);
579 return ofp.get(kk) >= orp.get(k) && nfp.get(kk) >= nrp.get(k);
585 Compare::Impl::SnakeResult Compare::Impl::retrieve_middle_snake(
586 int k, int D, Direction direction, DocPair & middle_snake)
588 compl_vector<DocIterator> * os = direction == Forward ? &ofs : &ors;
589 compl_vector<DocIterator> * ns = direction == Forward ? &nfs : &nrs;
590 compl_vector<DocIterator> * os_r = direction == Forward ? &ors : &ofs;
591 compl_vector<DocIterator> * ns_r = direction == Forward ? &nrs : &nfs;
593 // The diagonal while doing the backward search
594 int kk = -k + offset_reverse_diagonal_;
596 // Did we find a snake ?
597 if (os->get(k).empty() && os_r->get(kk).empty()) {
598 // No, there is no snake at all, in which case
599 // the length of the shortest edit script is M+N.
600 LASSERT(2 * D - odd_offset_ == M_ + N_, /**/);
604 if (os->get(k).empty()) {
605 // Yes, but there is only 1 snake and we found it in the
607 middle_snake.o = os_r->get(kk);
608 middle_snake.n = ns_r->get(kk);
612 middle_snake.o = os->get(k);
613 middle_snake.n = ns->get(k);
618 int Compare::Impl::find_middle_snake(DocRangePair const & rp,
619 DocPair & middle_snake)
621 // The lengths of the old and new chunks.
625 // Forward paths are centered around the 0-diagonal; reverse paths
626 // are centered around the diagonal N - M. (Delta in the article)
627 offset_reverse_diagonal_ = N_ - M_;
629 // If the offset is odd, only check for overlap while extending forward
630 // paths, otherwise only check while extending reverse paths.
631 odd_offset_ = (offset_reverse_diagonal_ % 2 != 0);
633 ofp.reset(rp.o.from);
634 nfp.reset(rp.n.from);
635 ofs.reset(DocIterator());
636 nfs.reset(DocIterator());
639 ors.reset(DocIterator());
640 nrs.reset(DocIterator());
642 // D is the number of horizontal and vertical steps, i.e.
643 // different characters in the old and new chunk.
644 int const D_max = ceil(((double)M_ + N_)/2);
645 for (int D = 0; D <= D_max; ++D) {
647 // Forward and reverse paths
648 for (int f = 0; f < 2; ++f) {
649 Direction direction = f == 0 ? Forward : Backward;
651 // Diagonals between -D and D can be reached by a D-path
652 for (int k = -D; k <= D; k += 2) {
653 // Find the furthest reaching D-path on this diagonal
654 furthest_Dpath_kdiagonal(D, k, rp, direction);
656 // Only check for overlap for forward paths if the offset is odd
657 // and only for reverse paths if the offset is even.
658 if (odd_offset_ == (direction == Forward)) {
660 // Do the forward and backward paths overlap ?
661 if (overlap(k, D - odd_offset_)) {
662 retrieve_middle_snake(k, D, direction, middle_snake);
663 return 2 * D - odd_offset_;
669 // This should never be reached
674 bool Compare::Impl::diff(Buffer const * new_buf, Buffer const * old_buf,
675 Buffer const * dest_buf)
677 if (!new_buf || !old_buf || !dest_buf)
682 dest_buf_ = dest_buf;
683 dest_pars_ = &dest_buf->inset().asInsetText()->paragraphs();
686 recursion_level_ = 0;
687 nested_inset_level_ = 0;
689 DocRangePair rp(old_buf_, new_buf_);
691 DocPair from = rp.from();
692 traverse_snake(from, rp, Forward);
693 DocRangePair const snake(rp.from(), from);
694 process_snake(snake);
696 // Start the recursive algorithm
699 for (pit_type p = 0; p < (pit_type)dest_pars_->size(); ++p) {
700 (*dest_pars_)[p].setBuffer(const_cast<Buffer &>(*dest_buf));
701 (*dest_pars_)[p].setInsetOwner(&dest_buf_->inset());
708 void Compare::Impl::diff_i(DocRangePair const & rp)
711 DocPair middle_snake;
713 // Divides the problem into two smaller problems, split around
714 // the snake in the middle.
715 int const L_ses = find_middle_snake(rp, middle_snake);
717 // Set maximum of progress bar
718 if (++recursion_level_ == 1)
719 compare_.progressMax(L_ses);
721 // There are now three possibilities: the strings were the same,
722 // the strings were completely different, or we found a middle
723 // snake and we can split the string into two parts to process.
725 // Two the same strings (this must be a very rare case, because
726 // usually this will be part of a snake adjacent to these strings).
727 writeToDestBuffer(rp.o);
729 else if (middle_snake.o.empty()) {
730 // Two totally different strings
731 writeToDestBuffer(rp.o, Change::DELETED);
732 writeToDestBuffer(rp.n, Change::INSERTED);
735 // Retrieve the complete snake
736 DocPair first_part_end = middle_snake;
737 traverse_snake(first_part_end, rp, Backward);
738 DocRangePair first_part(rp.from(), first_part_end);
740 DocPair second_part_begin = middle_snake;
741 traverse_snake(second_part_begin, rp, Forward);
742 DocRangePair second_part(second_part_begin, rp.to());
744 // Split the string in three parts:
745 // 1. in front of the snake
746 diff_part(first_part);
748 // 2. the snake itself, and
749 DocRangePair const snake(first_part.to(), second_part.from());
750 process_snake(snake);
752 // 3. behind the snake.
753 diff_part(second_part);
759 void Compare::Impl::diff_part(DocRangePair const & rp)
761 // Is there a finite length string in both buffers, if not there
762 // is an empty string and we write the other one to the buffer.
763 if (!rp.o.empty() && !rp.n.empty())
766 else if (!rp.o.empty())
767 writeToDestBuffer(rp.o, Change::DELETED);
769 else if (!rp.n.empty())
770 writeToDestBuffer(rp.n, Change::INSERTED);
774 void Compare::Impl::diff_inset(Inset * inset, DocPair const & p)
776 // Find the dociterators for the beginning and the
777 // end of the inset, for the old and new document.
778 DocRangePair const rp = stepIntoInset(p);
780 // Recurse into the inset. Temporarily replace the dest_pars
781 // paragraph list by the paragraph list of the nested inset.
782 ParagraphList * backup_dest_pars = dest_pars_;
783 dest_pars_ = &inset->asInsetText()->text().paragraphs();
786 ++nested_inset_level_;
788 --nested_inset_level_;
790 dest_pars_ = backup_dest_pars;
794 void Compare::Impl::process_snake(DocRangePair const & rp)
797 get_paragraph_list(rp.o, pars);
799 // Find insets in this paragaph list
800 DocPair it = rp.from();
801 for (; it.o < rp.o.to; ++it) {
802 Inset * inset = it.o.text()->getPar(it.o.pit()).getInset(it.o.pos());
803 if (inset && inset->editable() && inset->asInsetText()) {
804 // Find the inset in the paragraph list that will be pasted into
805 // the final document. The contents of the inset will be replaced
806 // by the output of the algorithm below.
807 pit_type const pit = it.o.pit() - rp.o.from.pit();
808 pos_type const pos = pit ? it.o.pos() : it.o.pos() - rp.o.from.pos();
809 inset = pars[pit].getInset(pos);
810 LASSERT(inset, /**/);
811 diff_inset(inset, it);
814 writeToDestBuffer(pars);
818 void Compare::Impl::writeToDestBuffer(DocRange const & range,
822 get_paragraph_list(range, pars);
827 ParagraphList::iterator it = pars.begin();
828 for (; it != pars.end(); ++it) {
829 it->setChange(Change(type));
833 writeToDestBuffer(pars);
835 if (nested_inset_level_ == 0)
836 compare_.progress(size);
840 void Compare::Impl::writeToDestBuffer(ParagraphList const & pars) const
842 pit_type const pit = dest_pars_->size() - 1;
843 dest_pars_->insert(dest_pars_->end(), pars.begin(), pars.end());
845 mergeParagraph(dest_buf_->params(), *dest_pars_, pit);
849 #include "moc_Compare.cpp"