3 * This file is part of LyX, the document processor.
4 * Licence details can be found in the file COPYING.
6 * \author Vincent van Ravesteijn
8 * Full author contact details are available in file CREDITS.
15 #include "BufferParams.h"
18 #include "insets/InsetText.h"
20 #include "support/lassert.h"
22 #include <boost/next_prior.hpp>
25 using namespace lyx::support;
37 static void step(DocIterator & dit, Direction direction)
39 if (direction == Forward)
40 dit.top().forwardPos();
42 dit.top().backwardPos();
46 static void step(DocIterator & dit, DocIterator const & end, Direction direction)
54 * A pair of two DocIterators that form a range.
58 DocRange(DocIterator from_, DocIterator to_)
59 : from(from_), to(to_)
62 DocRange(Buffer const * buf)
64 from = doc_iterator_begin(buf);
65 to = doc_iterator_end(buf);
70 Text * text() const { return from.text(); }
72 bool empty() const { return to <= from; }
74 size_t length() const;
76 /// The begin of the range
78 /// The end of the range
83 size_t DocRange::length() const
85 pit_type startpit = from.pit();
86 pit_type endpit = to.pit();
87 ParagraphList const & ps_ = from.text()->paragraphs();
89 ParagraphList pars(boost::next(ps_.begin(), startpit),
90 boost::next(ps_.begin(), endpit + 1));
92 // Remove the end of the last paragraph; afterwards, remove the
93 // beginning of the first paragraph.
94 Paragraph & back = pars.back();
95 back.eraseChars(to.pos(), back.size(), false);
96 Paragraph & front = pars.front();
97 front.eraseChars(0, from.pos(), false);
99 ParagraphList::const_iterator pit = pars.begin();
100 ParagraphList::const_iterator end_it = pars.end();
103 for (; pit != end_it; ++pit)
104 length += pit->size() + 1;
106 // The last paragraph has no paragraph-end
116 DocPair(DocIterator o_, DocIterator n_)
120 bool operator!=(DocPair const & rhs) {
121 // this might not be intuitive but correct for our purpose
122 return o != rhs.o && n != rhs.n;
126 DocPair & operator++()
133 DocPair & operator--()
146 * A pair of two DocRanges.
150 DocRangePair(DocRange o_, DocRange n_)
154 DocRangePair(DocPair from, DocPair to)
155 : o(from.o, to.o), n(from.n, to.n)
158 DocRangePair(Buffer const * o_buf, Buffer const * n_buf)
162 /// Returns the from pair
163 DocPair from() const { return DocPair(o.from, n.from); }
165 /// Returns the to pair
166 DocPair to() const { return DocPair(o.to, n.to); }
173 static DocRangePair stepIntoInset(DocPair const & inset_location)
175 DocRangePair rp(inset_location, inset_location);
176 rp.o.from.forwardPos();
177 rp.n.from.forwardPos();
178 step(rp.o.to, Forward);
179 step(rp.n.to, Forward);
180 rp.o.to.backwardPos();
181 rp.n.to.backwardPos();
187 * The implementation of the algorithm that does the comparison
188 * between two documents.
190 class Compare::Impl {
193 Impl(Compare const & compare)
194 : abort_(false), compare_(compare)
200 // Algorithm to find the shortest edit string. This algorithm
201 // only needs a linear amount of memory (linear with the sum
202 // of the number of characters in the two paragraph-lists).
203 bool diff(Buffer const * new_buf, Buffer const * old_buf,
204 Buffer const * dest_buf);
206 /// Set to true to cancel the algorithm
210 /// Finds the middle snake and returns the length of the
211 /// shortest edit script.
212 int find_middle_snake(DocRangePair const & rp, DocPair & middle_snake);
214 /// This function is called recursively by a divide and conquer
215 /// algorithm. Each time, the string is divided into two split
216 /// around the middle snake.
217 void diff_i(DocRangePair const & rp);
219 /// Processes the splitted chunks. It either adds them as deleted,
220 /// as added, or call diff_i for further processing.
221 void diff_part(DocRangePair const & rp);
223 /// Runs the algorithm for the inset located at /c it and /c it_n
224 /// and adds the result to /c pars.
225 void diff_inset(Inset * inset, DocPair const & p);
227 /// Adds the snake to the destination buffer. The algorithm will
228 /// recursively be applied to any InsetTexts that are within the snake.
229 void process_snake(DocRangePair const & rp);
231 /// Writes the range to the destination buffer
232 void writeToDestBuffer(DocRange const & range,
233 Change::Type type = Change::UNCHANGED);
235 /// Writes the paragraph list to the destination buffer
236 void writeToDestBuffer(ParagraphList const & copy_pars) const;
238 /// The length of the old chunk currently processed
240 /// The length of the new chunk currently processed
243 /// The thread object, used to emit signals to the GUI
244 Compare const & compare_;
246 /// The buffer containing text that will be marked as old
247 Buffer const * old_buf_;
248 /// The buffer containing text that will be marked as new
249 Buffer const * new_buf_;
250 /// The buffer containing text that will be marked as new
251 Buffer const * dest_buf_;
253 /// The paragraph list of the destination buffer
254 ParagraphList * dest_pars_;
256 /// The level of recursion
257 int recursion_level_;
259 /// The number of nested insets at this level
260 int nested_inset_level_;
263 /////////////////////////////////////////////////////////////////////
267 /////////////////////////////////////////////////////////////////////
269 Compare::Compare(Buffer const * new_buf, Buffer const * old_buf,
270 Buffer * const dest_buf, CompareOptions const & options)
271 : new_buffer(new_buf), old_buffer(old_buf), dest_buffer(dest_buf),
272 options_(options), pimpl_(new Impl(*this))
279 if (!dest_buffer || !new_buffer || !old_buffer)
282 // Copy the buffer params to the new buffer
283 dest_buffer->params() = options_.settings_from_new
284 ? new_buffer->params() : old_buffer->params();
290 finished(pimpl_->abort_);
295 int Compare::doCompare()
297 return pimpl_->diff(new_buffer, old_buffer, dest_buffer);
301 void Compare::abort()
303 pimpl_->abort_ = true;
304 condition_.wakeOne();
306 pimpl_->abort_ = false;
310 static void get_paragraph_list(DocRange const & range,
311 ParagraphList & pars)
313 // Clone the paragraphs within the selection.
314 pit_type startpit = range.from.pit();
315 pit_type endpit = range.to.pit();
316 ParagraphList const & ps_ = range.text()->paragraphs();
317 ParagraphList tmp_pars(boost::next(ps_.begin(), startpit),
318 boost::next(ps_.begin(), endpit + 1));
320 // Remove the end of the last paragraph; afterwards, remove the
321 // beginning of the first paragraph. Keep this order - there may only
323 Paragraph & back = tmp_pars.back();
324 back.eraseChars(range.to.pos(), back.size(), false);
325 Paragraph & front = tmp_pars.front();
326 front.eraseChars(0, range.from.pos(), false);
328 pars.insert(pars.begin(), tmp_pars.begin(), tmp_pars.end());
332 static bool equal(Inset const * i_o, Inset const * i_n)
337 // Different types of insets
338 if (i_o->lyxCode() != i_n->lyxCode())
341 // Editable insets are assumed to be the same as they are of the
342 // same type. If we later on decide that we insert them in the
343 // document as being unchanged, we will run the algorithm on the
344 // contents of the two insets.
345 // FIXME: This fails if the parameters of the insets differ.
346 // FIXME: We do not recurse into InsetTabulars.
347 // FIXME: We need methods inset->equivalent(inset).
348 if (i_o->editable() && !i_o->asInsetMath()
349 && i_o->asInsetText())
356 return o_os.str() == n_os.str();
360 static bool equal(DocIterator & o, DocIterator & n) {
361 Paragraph const & old_par = o.text()->getPar(o.pit());
362 Paragraph const & new_par = n.text()->getPar(n.pit());
364 char_type const c_o = old_par.getChar(o.pos());
365 char_type const c_n = new_par.getChar(n.pos());
369 if (old_par.isInset(o.pos())) {
370 Inset const * i_o = old_par.getInset(o.pos());
371 Inset const * i_n = new_par.getInset(n.pos());
374 return equal(i_o, i_n);
377 Font fo = old_par.getFontSettings(o.buffer()->params(), o.pos());
378 Font fn = new_par.getFontSettings(n.buffer()->params(), n.pos());
383 /// Traverses a snake in a certain direction. p points to a
384 /// position in the old and new file and they are synchronously
385 /// moved along the snake. The function returns true if a snake
387 static bool traverse_snake(DocPair & p, DocRangePair const & range,
391 DocPair const & p_end =
392 direction == Forward ? range.to() : range.from();
395 if (direction == Backward)
397 if (!equal(p.o, p.n)) {
398 if (direction == Backward)
402 if (direction == Forward)
410 /////////////////////////////////////////////////////////////////////
414 /////////////////////////////////////////////////////////////////////
416 int Compare::Impl::find_middle_snake(DocRangePair const & rp,
425 bool Compare::Impl::diff(Buffer const * new_buf, Buffer const * old_buf,
426 Buffer const * dest_buf)
428 if (!new_buf || !old_buf || !dest_buf)
433 dest_buf_ = dest_buf;
434 dest_pars_ = &dest_buf->inset().asInsetText()->paragraphs();
437 recursion_level_ = 0;
438 nested_inset_level_ = 0;
440 DocRangePair rp(old_buf_, new_buf_);
442 DocPair from = rp.from();
443 traverse_snake(from, rp, Forward);
444 DocRangePair const snake(rp.from(), from);
445 process_snake(snake);
447 // Start the recursive algorithm
450 for (pit_type p = 0; p < (pit_type)dest_pars_->size(); ++p) {
451 (*dest_pars_)[p].setBuffer(const_cast<Buffer &>(*dest_buf));
452 (*dest_pars_)[p].setInsetOwner(&dest_buf_->inset());
459 void Compare::Impl::diff_i(DocRangePair const & rp)
462 DocPair middle_snake;
464 // Divides the problem into two smaller problems, split around
465 // the snake in the middle.
466 int const L_ses = find_middle_snake(rp, middle_snake);
468 // Set maximum of progress bar
469 if (++recursion_level_ == 1)
470 compare_.progressMax(L_ses);
472 // There are now three possibilities: the strings were the same,
473 // the strings were completely different, or we found a middle
474 // snake and we can split the string into two parts to process.
476 // Two the same strings (this must be a very rare case, because
477 // usually this will be part of a snake adjacent to these strings).
478 writeToDestBuffer(rp.o);
480 else if (middle_snake.o.empty()) {
481 // Two totally different strings
482 writeToDestBuffer(rp.o, Change::DELETED);
483 writeToDestBuffer(rp.n, Change::INSERTED);
486 // Retrieve the complete snake
487 DocPair first_part_end = middle_snake;
488 traverse_snake(first_part_end, rp, Backward);
489 DocRangePair first_part(rp.from(), first_part_end);
491 DocPair second_part_begin = middle_snake;
492 traverse_snake(second_part_begin, rp, Forward);
493 DocRangePair second_part(second_part_begin, rp.to());
495 // Split the string in three parts:
496 // 1. in front of the snake
497 diff_part(first_part);
499 // 2. the snake itself, and
500 DocRangePair const snake(first_part.to(), second_part.from());
501 process_snake(snake);
503 // 3. behind the snake.
504 diff_part(second_part);
510 void Compare::Impl::diff_part(DocRangePair const & rp)
512 // Is there a finite length string in both buffers, if not there
513 // is an empty string and we write the other one to the buffer.
514 if (!rp.o.empty() && !rp.n.empty())
517 else if (!rp.o.empty())
518 writeToDestBuffer(rp.o, Change::DELETED);
520 else if (!rp.n.empty())
521 writeToDestBuffer(rp.n, Change::INSERTED);
525 void Compare::Impl::diff_inset(Inset * inset, DocPair const & p)
527 // Find the dociterators for the beginning and the
528 // end of the inset, for the old and new document.
529 DocRangePair const rp = stepIntoInset(p);
531 // Recurse into the inset. Temporarily replace the dest_pars
532 // paragraph list by the paragraph list of the nested inset.
533 ParagraphList * backup_dest_pars = dest_pars_;
534 dest_pars_ = &inset->asInsetText()->text().paragraphs();
537 ++nested_inset_level_;
539 --nested_inset_level_;
541 dest_pars_ = backup_dest_pars;
545 void Compare::Impl::process_snake(DocRangePair const & rp)
548 get_paragraph_list(rp.o, pars);
550 // Find insets in this paragaph list
551 DocPair it = rp.from();
552 for (; it.o < rp.o.to; ++it) {
553 Inset * inset = it.o.text()->getPar(it.o.pit()).getInset(it.o.pos());
554 if (inset && inset->editable() && inset->asInsetText()) {
555 // Find the inset in the paragraph list that will be pasted into
556 // the final document. The contents of the inset will be replaced
557 // by the output of the algorithm below.
558 pit_type const pit = it.o.pit() - rp.o.from.pit();
559 pos_type const pos = pit ? it.o.pos() : it.o.pos() - rp.o.from.pos();
560 inset = pars[pit].getInset(pos);
561 LASSERT(inset, /**/);
562 diff_inset(inset, it);
565 writeToDestBuffer(pars);
569 void Compare::Impl::writeToDestBuffer(DocRange const & range,
573 get_paragraph_list(range, pars);
578 ParagraphList::iterator it = pars.begin();
579 for (; it != pars.end(); ++it) {
580 it->setChange(Change(type));
584 writeToDestBuffer(pars);
586 if (nested_inset_level_ == 0)
587 compare_.progress(size);
591 void Compare::Impl::writeToDestBuffer(ParagraphList const & pars) const
593 pit_type const pit = dest_pars_->size() - 1;
594 dest_pars_->insert(dest_pars_->end(), pars.begin(), pars.end());
596 mergeParagraph(dest_buf_->params(), *dest_pars_, pit);
600 #include "moc_Compare.cpp"