3 * This file is part of LyX, the document processor.
4 * Licence details can be found in the file COPYING.
6 * \author Vincent van Ravesteijn
8 * Full author contact details are available in file CREDITS.
15 #include "BufferParams.h"
18 #include "insets/InsetText.h"
20 #include "support/lassert.h"
22 #include <boost/next_prior.hpp>
27 using namespace lyx::support;
39 static void step(DocIterator & dit, Direction direction)
41 if (direction == Forward)
42 dit.top().forwardPos();
44 dit.top().backwardPos();
48 static void step(DocIterator & dit, DocIterator const & end, Direction direction)
56 * A pair of two DocIterators that form a range.
60 DocRange(DocIterator from_, DocIterator to_)
61 : from(from_), to(to_)
64 DocRange(Buffer const * buf)
66 from = doc_iterator_begin(buf);
67 to = doc_iterator_end(buf);
72 Text * text() const { return from.text(); }
74 bool empty() const { return to <= from; }
76 size_t length() const;
78 /// The begin of the range
80 /// The end of the range
85 size_t DocRange::length() const
87 ParagraphList const & ps = from.text()->paragraphs();
89 pit_type pit = from.pit();
90 pit_type const endpit = to.pit();
91 for (; pit < endpit; ++pit)
92 length += ps[pit].size() + 1;
93 length += to.pos() - from.pos();
102 DocPair(DocIterator o_, DocIterator n_)
106 bool operator!=(DocPair const & rhs) {
107 // this might not be intuitive but correct for our purpose
108 return o != rhs.o && n != rhs.n;
112 DocPair & operator++()
119 DocPair & operator--()
132 * A pair of two DocRanges.
136 DocRangePair(DocRange o_, DocRange n_)
140 DocRangePair(DocPair from, DocPair to)
141 : o(from.o, to.o), n(from.n, to.n)
144 DocRangePair(Buffer const * o_buf, Buffer const * n_buf)
148 /// Returns the from pair
149 DocPair from() const { return DocPair(o.from, n.from); }
151 /// Returns the to pair
152 DocPair to() const { return DocPair(o.to, n.to); }
159 static DocRangePair stepIntoInset(DocPair const & inset_location)
161 DocRangePair rp(inset_location, inset_location);
162 rp.o.from.forwardPos();
163 rp.n.from.forwardPos();
164 step(rp.o.to, Forward);
165 step(rp.n.to, Forward);
166 rp.o.to.backwardPos();
167 rp.n.to.backwardPos();
173 * This class is designed to hold a vector that has both positive as
174 * negative indices. It is internally represented as two vectors, one
175 * for non-zero indices and one for negative indices. In this way, the
176 * vector can grow in both directions.
177 * If an index is not available in the vector, the default value is
178 * returned. If an object is put in the vector beyond its size, the
179 * empty spots in between are also filled with the default value.
186 void reset(T const & def)
193 /// Gets the value at index. If it is not in the vector
194 /// the default value is inserted and returned.
195 T & operator[](int index) {
196 vector<T> & V = index >= 0 ? Vp_ : Vn_;
197 unsigned int const ii = index >= 0 ? index : -index - 1;
198 while (ii >= V.size())
199 V.push_back(default_);
204 /// The vector for positive indices
206 /// The vector for negative indices
208 /// The default value that is inserted in the vector
209 /// if more space is needed
215 * The implementation of the algorithm that does the comparison
216 * between two documents.
218 class Compare::Impl {
221 Impl(Compare const & compare)
222 : abort_(false), compare_(compare)
228 // Algorithm to find the shortest edit string. This algorithm
229 // only needs a linear amount of memory (linear with the sum
230 // of the number of characters in the two paragraph-lists).
231 bool diff(Buffer const * new_buf, Buffer const * old_buf,
232 Buffer const * dest_buf);
234 /// Set to true to cancel the algorithm
238 /// Finds the middle snake and returns the length of the
239 /// shortest edit script.
240 int find_middle_snake(DocRangePair const & rp, DocPair & middle_snake);
248 /// Retrieve the middle snake when there is overlap between
249 /// the forward and backward path.
250 SnakeResult retrieve_middle_snake(int k, int D, Direction direction,
251 DocPair & middle_snake);
253 /// Find the the furthest reaching D-path (number of horizontal
254 /// and vertical steps; differences between the old and new
255 /// document) in the k-diagonal (vertical minus horizontal steps).
256 void furthest_Dpath_kdiagonal(int D, int k,
257 DocRangePair const & rp, Direction direction);
259 /// Is there overlap between the forward and backward path
260 bool overlap(int k, int D);
262 /// This function is called recursively by a divide and conquer
263 /// algorithm. Each time, the string is divided into two split
264 /// around the middle snake.
265 void diff_i(DocRangePair const & rp);
267 /// Processes the splitted chunks. It either adds them as deleted,
268 /// as added, or call diff_i for further processing.
269 void diff_part(DocRangePair const & rp);
271 /// Runs the algorithm for the inset located at /c it and /c it_n
272 /// and adds the result to /c pars.
273 void diff_inset(Inset * inset, DocPair const & p);
275 /// Adds the snake to the destination buffer. The algorithm will
276 /// recursively be applied to any InsetTexts that are within the snake.
277 void process_snake(DocRangePair const & rp);
279 /// Writes the range to the destination buffer
280 void writeToDestBuffer(DocRange const & range,
281 Change::Type type = Change::UNCHANGED);
283 /// Writes the paragraph list to the destination buffer
284 void writeToDestBuffer(ParagraphList const & copy_pars) const;
286 /// The length of the old chunk currently processed
288 /// The length of the new chunk currently processed
290 /// The offset diagonal of the reverse path of the
291 /// currently processed chunk
292 int offset_reverse_diagonal_;
293 /// Is the offset odd or even ?
296 /// The thread object, used to emit signals to the GUI
297 Compare const & compare_;
299 /// The buffer containing text that will be marked as old
300 Buffer const * old_buf_;
301 /// The buffer containing text that will be marked as new
302 Buffer const * new_buf_;
303 /// The buffer containing text that will be marked as new
304 Buffer const * dest_buf_;
306 /// The paragraph list of the destination buffer
307 ParagraphList * dest_pars_;
309 /// The level of recursion
310 int recursion_level_;
312 /// The number of nested insets at this level
313 int nested_inset_level_;
315 /// The position/snake in the old/new document
316 /// of the forward/reverse search
317 compl_vector<DocIterator> ofp;
318 compl_vector<DocIterator> nfp;
319 compl_vector<DocIterator> ofs;
320 compl_vector<DocIterator> nfs;
321 compl_vector<DocIterator> orp;
322 compl_vector<DocIterator> nrp;
323 compl_vector<DocIterator> ors;
324 compl_vector<DocIterator> nrs;
327 /////////////////////////////////////////////////////////////////////
331 /////////////////////////////////////////////////////////////////////
333 Compare::Compare(Buffer const * new_buf, Buffer const * old_buf,
334 Buffer * const dest_buf, CompareOptions const & options)
335 : new_buffer(new_buf), old_buffer(old_buf), dest_buffer(dest_buf),
336 options_(options), pimpl_(new Impl(*this))
343 if (!dest_buffer || !new_buffer || !old_buffer)
346 // Copy the buffer params to the new buffer
347 dest_buffer->params() = options_.settings_from_new
348 ? new_buffer->params() : old_buffer->params();
354 finished(pimpl_->abort_);
359 int Compare::doCompare()
361 return pimpl_->diff(new_buffer, old_buffer, dest_buffer);
365 void Compare::abort()
367 pimpl_->abort_ = true;
368 condition_.wakeOne();
370 pimpl_->abort_ = false;
374 static void get_paragraph_list(DocRange const & range,
375 ParagraphList & pars)
377 // Clone the paragraphs within the selection.
378 pit_type startpit = range.from.pit();
379 pit_type endpit = range.to.pit();
380 ParagraphList const & ps_ = range.text()->paragraphs();
381 ParagraphList tmp_pars(boost::next(ps_.begin(), startpit),
382 boost::next(ps_.begin(), endpit + 1));
384 // Remove the end of the last paragraph; afterwards, remove the
385 // beginning of the first paragraph. Keep this order - there may only
387 Paragraph & back = tmp_pars.back();
388 back.eraseChars(range.to.pos(), back.size(), false);
389 Paragraph & front = tmp_pars.front();
390 front.eraseChars(0, range.from.pos(), false);
392 pars.insert(pars.begin(), tmp_pars.begin(), tmp_pars.end());
396 static bool equal(Inset const * i_o, Inset const * i_n)
401 // Different types of insets
402 if (i_o->lyxCode() != i_n->lyxCode())
405 // Editable insets are assumed to be the same as they are of the
406 // same type. If we later on decide that we insert them in the
407 // document as being unchanged, we will run the algorithm on the
408 // contents of the two insets.
409 // FIXME: This fails if the parameters of the insets differ.
410 // FIXME: We do not recurse into InsetTabulars.
411 // FIXME: We need methods inset->equivalent(inset).
412 if (i_o->editable() && !i_o->asInsetMath()
413 && i_o->asInsetText())
420 return o_os.str() == n_os.str();
424 static bool equal(DocIterator & o, DocIterator & n) {
425 Paragraph const & old_par = o.text()->getPar(o.pit());
426 Paragraph const & new_par = n.text()->getPar(n.pit());
428 char_type const c_o = old_par.getChar(o.pos());
429 char_type const c_n = new_par.getChar(n.pos());
433 if (old_par.isInset(o.pos())) {
434 Inset const * i_o = old_par.getInset(o.pos());
435 Inset const * i_n = new_par.getInset(n.pos());
438 return equal(i_o, i_n);
441 Font fo = old_par.getFontSettings(o.buffer()->params(), o.pos());
442 Font fn = new_par.getFontSettings(n.buffer()->params(), n.pos());
447 /// Traverses a snake in a certain direction. p points to a
448 /// position in the old and new file and they are synchronously
449 /// moved along the snake. The function returns true if a snake
451 static bool traverse_snake(DocPair & p, DocRangePair const & range,
455 DocPair const & p_end =
456 direction == Forward ? range.to() : range.from();
459 if (direction == Backward)
461 if (!equal(p.o, p.n)) {
462 if (direction == Backward)
466 if (direction == Forward)
474 /////////////////////////////////////////////////////////////////////
478 /////////////////////////////////////////////////////////////////////
481 void Compare::Impl::furthest_Dpath_kdiagonal(int D, int k,
482 DocRangePair const & rp, Direction direction)
484 compl_vector<DocIterator> & op = direction == Forward ? ofp : orp;
485 compl_vector<DocIterator> & np = direction == Forward ? nfp : nrp;
486 compl_vector<DocIterator> & os = direction == Forward ? ofs : ors;
487 compl_vector<DocIterator> & ns = direction == Forward ? nfs : nrs;
489 // A vertical step means stepping one character in the new document.
490 bool vertical_step = k == -D;
491 if (!vertical_step && k != D) {
492 vertical_step = direction == Forward
493 ? op[k - 1] < op[k + 1] : op[k - 1] > op[k + 1];
496 // Where do we take the step from ?
497 int const kk = vertical_step ? k + 1 : k - 1;
498 DocPair p(op[kk], np[kk]);
500 // If D==0 we simulate a vertical step from (0,-1) by doing nothing.
503 if (vertical_step && direction == Forward)
504 step(p.n, rp.n.to, direction);
505 else if (vertical_step && direction == Backward)
506 step(p.n, rp.n.from, direction);
507 else if (!vertical_step && direction == Forward)
508 step(p.o, rp.o.to, direction);
509 else if (!vertical_step && direction == Backward)
510 step(p.o, rp.o.from, direction);
514 if (traverse_snake(p, rp, direction)) {
519 // Copy last snake from the previous step
524 //Record new position
530 bool Compare::Impl::overlap(int k, int D)
532 // To generalize for the forward and reverse checks
533 int kk = offset_reverse_diagonal_ - k;
535 // Can we have overlap ?
536 if (kk <= D && kk >= -D) {
537 // Do we have overlap ?
539 return ofp[k] >= orp[kk] && nfp[k] >= nrp[kk];
541 return ofp[kk] >= orp[k] && nfp[kk] >= nrp[k];
547 Compare::Impl::SnakeResult Compare::Impl::retrieve_middle_snake(
548 int k, int D, Direction direction, DocPair & middle_snake)
550 compl_vector<DocIterator> & os = direction == Forward ? ofs : ors;
551 compl_vector<DocIterator> & ns = direction == Forward ? nfs : nrs;
552 compl_vector<DocIterator> & os_r = direction == Forward ? ors : ofs;
553 compl_vector<DocIterator> & ns_r = direction == Forward ? nrs : nfs;
555 // The diagonal while doing the backward search
556 int kk = -k + offset_reverse_diagonal_;
558 // Did we find a snake ?
559 if (os[k].empty() && os_r[kk].empty()) {
560 // No, there is no snake at all, in which case
561 // the length of the shortest edit script is M+N.
562 LASSERT(2 * D - odd_offset_ == M_ + N_, /**/);
567 // Yes, but there is only 1 snake and we found it in the
569 middle_snake.o = os_r[kk];
570 middle_snake.n = ns_r[kk];
574 middle_snake.o = os[k];
575 middle_snake.n = ns[k];
580 int Compare::Impl::find_middle_snake(DocRangePair const & rp,
581 DocPair & middle_snake)
583 // The lengths of the old and new chunks.
587 // Forward paths are centered around the 0-diagonal; reverse paths
588 // are centered around the diagonal N - M. (Delta in the article)
589 offset_reverse_diagonal_ = N_ - M_;
591 // If the offset is odd, only check for overlap while extending forward
592 // paths, otherwise only check while extending reverse paths.
593 odd_offset_ = (offset_reverse_diagonal_ % 2 != 0);
595 ofp.reset(rp.o.from);
596 nfp.reset(rp.n.from);
597 ofs.reset(DocIterator());
598 nfs.reset(DocIterator());
601 ors.reset(DocIterator());
602 nrs.reset(DocIterator());
604 // D is the number of horizontal and vertical steps, i.e.
605 // different characters in the old and new chunk.
606 int const D_max = ceil(((double)M_ + N_)/2);
607 for (int D = 0; D <= D_max; ++D) {
609 // Forward and reverse paths
610 for (int f = 0; f < 2; ++f) {
611 Direction direction = f == 0 ? Forward : Backward;
613 // Diagonals between -D and D can be reached by a D-path
614 for (int k = -D; k <= D; k += 2) {
615 // Find the furthest reaching D-path on this diagonal
616 furthest_Dpath_kdiagonal(D, k, rp, direction);
618 // Only check for overlap for forward paths if the offset is odd
619 // and only for reverse paths if the offset is even.
620 if (odd_offset_ == (direction == Forward)) {
622 // Do the forward and backward paths overlap ?
623 if (overlap(k, D - odd_offset_)) {
624 retrieve_middle_snake(k, D, direction, middle_snake);
625 return 2 * D - odd_offset_;
631 // This should never be reached
636 bool Compare::Impl::diff(Buffer const * new_buf, Buffer const * old_buf,
637 Buffer const * dest_buf)
639 if (!new_buf || !old_buf || !dest_buf)
644 dest_buf_ = dest_buf;
645 dest_pars_ = &dest_buf->inset().asInsetText()->paragraphs();
648 recursion_level_ = 0;
649 nested_inset_level_ = 0;
651 DocRangePair rp(old_buf_, new_buf_);
653 DocPair from = rp.from();
654 traverse_snake(from, rp, Forward);
655 DocRangePair const snake(rp.from(), from);
656 process_snake(snake);
658 // Start the recursive algorithm
661 for (pit_type p = 0; p < (pit_type)dest_pars_->size(); ++p) {
662 (*dest_pars_)[p].setBuffer(const_cast<Buffer &>(*dest_buf));
663 (*dest_pars_)[p].setInsetOwner(&dest_buf_->inset());
670 void Compare::Impl::diff_i(DocRangePair const & rp)
673 DocPair middle_snake;
675 // Divides the problem into two smaller problems, split around
676 // the snake in the middle.
677 int const L_ses = find_middle_snake(rp, middle_snake);
679 // Set maximum of progress bar
680 if (++recursion_level_ == 1)
681 compare_.progressMax(L_ses);
683 // There are now three possibilities: the strings were the same,
684 // the strings were completely different, or we found a middle
685 // snake and we can split the string into two parts to process.
687 // Two the same strings (this must be a very rare case, because
688 // usually this will be part of a snake adjacent to these strings).
689 writeToDestBuffer(rp.o);
691 else if (middle_snake.o.empty()) {
692 // Two totally different strings
693 writeToDestBuffer(rp.o, Change::DELETED);
694 writeToDestBuffer(rp.n, Change::INSERTED);
697 // Retrieve the complete snake
698 DocPair first_part_end = middle_snake;
699 traverse_snake(first_part_end, rp, Backward);
700 DocRangePair first_part(rp.from(), first_part_end);
702 DocPair second_part_begin = middle_snake;
703 traverse_snake(second_part_begin, rp, Forward);
704 DocRangePair second_part(second_part_begin, rp.to());
706 // Split the string in three parts:
707 // 1. in front of the snake
708 diff_part(first_part);
710 // 2. the snake itself, and
711 DocRangePair const snake(first_part.to(), second_part.from());
712 process_snake(snake);
714 // 3. behind the snake.
715 diff_part(second_part);
721 void Compare::Impl::diff_part(DocRangePair const & rp)
723 // Is there a finite length string in both buffers, if not there
724 // is an empty string and we write the other one to the buffer.
725 if (!rp.o.empty() && !rp.n.empty())
728 else if (!rp.o.empty())
729 writeToDestBuffer(rp.o, Change::DELETED);
731 else if (!rp.n.empty())
732 writeToDestBuffer(rp.n, Change::INSERTED);
736 void Compare::Impl::diff_inset(Inset * inset, DocPair const & p)
738 // Find the dociterators for the beginning and the
739 // end of the inset, for the old and new document.
740 DocRangePair const rp = stepIntoInset(p);
742 // Recurse into the inset. Temporarily replace the dest_pars
743 // paragraph list by the paragraph list of the nested inset.
744 ParagraphList * backup_dest_pars = dest_pars_;
745 dest_pars_ = &inset->asInsetText()->text().paragraphs();
748 ++nested_inset_level_;
750 --nested_inset_level_;
752 dest_pars_ = backup_dest_pars;
756 void Compare::Impl::process_snake(DocRangePair const & rp)
759 get_paragraph_list(rp.o, pars);
761 // Find insets in this paragaph list
762 DocPair it = rp.from();
763 for (; it.o < rp.o.to; ++it) {
764 Inset * inset = it.o.text()->getPar(it.o.pit()).getInset(it.o.pos());
765 if (inset && inset->editable() && inset->asInsetText()) {
766 // Find the inset in the paragraph list that will be pasted into
767 // the final document. The contents of the inset will be replaced
768 // by the output of the algorithm below.
769 pit_type const pit = it.o.pit() - rp.o.from.pit();
770 pos_type const pos = pit ? it.o.pos() : it.o.pos() - rp.o.from.pos();
771 inset = pars[pit].getInset(pos);
772 LASSERT(inset, /**/);
773 diff_inset(inset, it);
776 writeToDestBuffer(pars);
780 void Compare::Impl::writeToDestBuffer(DocRange const & range,
784 get_paragraph_list(range, pars);
789 ParagraphList::iterator it = pars.begin();
790 for (; it != pars.end(); ++it) {
791 it->setChange(Change(type));
795 writeToDestBuffer(pars);
797 if (nested_inset_level_ == 0)
798 compare_.progress(size);
802 void Compare::Impl::writeToDestBuffer(ParagraphList const & pars) const
804 pit_type const pit = dest_pars_->size() - 1;
805 dest_pars_->insert(dest_pars_->end(), pars.begin(), pars.end());
807 mergeParagraph(dest_buf_->params(), *dest_pars_, pit);
811 #include "moc_Compare.cpp"