X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=src%2FCompare.cpp;h=709a25c39537a9e07f61b7833a5d029a096b5ec2;hb=2098f1d8c20d51e63e670bcdc9da8996068975bf;hp=745b3218a3bf84aa8a39e0916e4ce9d667a74434;hpb=1ff6b1122b064912725fcf58a711bce8d510e0a6;p=lyx.git

diff --git a/src/Compare.cpp b/src/Compare.cpp
index 745b3218a3..709a25c395 100644
--- a/src/Compare.cpp
+++ b/src/Compare.cpp
@@ -14,7 +14,13 @@
 
 #include "Buffer.h"
 #include "BufferParams.h"
+#include "Changes.h"
 
+#include "insets/InsetText.h"
+
+#include "support/lassert.h"	
+
+#include <boost/next_prior.hpp>
 
 using namespace std;
 using namespace lyx::support;
@@ -22,6 +28,155 @@ using namespace lyx::support;
 
 namespace lyx {
 
+void step_forward(DocIterator & dit)
+{
+	dit.top().forwardPos();
+}
+
+
+void step_backward(DocIterator & dit)
+{
+	dit.top().backwardPos();
+}
+
+
+bool step_forward(DocIterator & dit, DocIterator const & end)
+{
+	if (dit == end)
+		return false;
+	step_forward(dit);
+	return true;
+}
+
+
+bool step_backward(DocIterator & dit, DocIterator const & beg)
+{
+	if (dit == beg)
+		return false;
+	step_backward(dit);
+	return true;
+}
+
+/**
+ * A pair of two DocIterators that form a range.
+ */
+class DocRange {
+public:
+	DocRange(DocIterator from_, DocIterator to_)
+		: from(from_), to(to_)
+	{}
+
+	DocRange(Buffer const * buf)
+	{
+		from = doc_iterator_begin(buf);
+		to = doc_iterator_end(buf);
+		to.backwardPos();
+	}
+
+	///
+	Text * text() const { return from.text(); }
+	///
+	bool empty() const { return to <= from;	}
+	///
+	size_t length() const;
+
+	/// The begin of the range
+	DocIterator from;
+	/// The end of the range
+	DocIterator to;
+};
+
+
+size_t DocRange::length() const
+{
+	pit_type startpit = from.pit();
+	pit_type endpit = to.pit();
+	ParagraphList const & ps_ = from.text()->paragraphs();
+
+	ParagraphList pars(boost::next(ps_.begin(), startpit),
+				boost::next(ps_.begin(), endpit + 1));
+
+	// Remove the end of the last paragraph; afterwards, remove the
+	// beginning of the first paragraph.
+	Paragraph & back = pars.back();
+	back.eraseChars(to.pos(), back.size(), false);
+	Paragraph & front = pars.front();
+	front.eraseChars(0, from.pos(), false);
+
+	ParagraphList::const_iterator pit = pars.begin();
+	ParagraphList::const_iterator end_it = pars.end();
+
+	size_t length = 0;
+	for (; pit != end_it; ++pit)
+		length += pit->size() + 1;
+
+	// The last paragraph has no paragraph-end
+	--length;
+	return length;	
+}
+
+
+class DocPair {
+public:
+	DocPair() {}
+
+	DocPair(DocIterator o_, DocIterator n_)
+		: o(o_), n(n_)
+	{}
+
+	DocPair & operator++()
+	{
+		step_forward(o);
+		step_forward(n);
+		return *this;
+	}
+	///
+	DocIterator o;
+	///
+	DocIterator n;
+};
+	
+/**
+ * A pair of two DocRanges.
+ */
+class DocRangePair {
+public:
+	DocRangePair(DocRange o_, DocRange n_)
+		: o(o_), n(n_)
+	{}
+	
+	DocRangePair(DocPair from, DocPair to)
+		: o(from.o, to.o), n(from.n, to.n)
+	{}
+
+	DocRangePair(Buffer const * o_buf, Buffer const * n_buf)
+		: o(o_buf), n(n_buf)
+	{}
+
+	/// Returns the from pair
+	DocPair from() const { return DocPair(o.from, n.from); }
+
+	/// Returns the to pair
+	DocPair to() const { return DocPair(o.to, n.to); }
+
+	DocRange o;
+	DocRange n;
+};
+
+
+DocRangePair stepIntoInset(DocPair const & inset_location)
+{
+	DocRangePair rp(inset_location, inset_location);
+	rp.o.from.forwardPos();
+	rp.n.from.forwardPos();
+	step_forward(rp.o.to);
+	step_forward(rp.n.to);
+	rp.o.to.backwardPos();
+	rp.n.to.backwardPos();
+	return rp;
+}
+
+
 /**
  * The implementation of the algorithm that does the comparison
  * between two documents.
@@ -36,14 +191,74 @@ public:
 	///
 	~Impl() {}
 
-	/// Set to true to abort the algorithm
+	// Algorithm to find the shortest edit string. This algorithm 
+	// only needs a linear amount of memory (linear with the sum
+	// of the number of characters in the two paragraph-lists).
+	bool diff(Buffer const * new_buf, Buffer const * old_buf,
+		Buffer const * dest_buf);
+
+	/// Set to true to cancel the algorithm
 	bool abort_;
 
 private:
+	// Finds the middle snake and returns the length of the
+	// shortest edit script.
+	int find_middle_snake(DocRangePair const & rp, DocPair & middle_snake);
+
+	// This function is called recursively by a divide and conquer
+	// algorithm. Each time, the string is divided into two split
+	// around the middle snake.
+	void diff_i(DocRangePair const & rp);
+
+	/// Processes the splitted chunks. It either adds them as deleted,
+	/// as added, or call diff_i for further processing.
+	void diff_part(DocRangePair const & rp);
+
+	/// Runs the algorithm for the inset located at /c it and /c it_n 
+	/// and adds the result to /c pars.
+	void diff_inset(Inset * inset, DocPair const & p);
+
+	// Adds the snake to the destination buffer. The algorithm will
+	// recursively be applied to any InsetTexts that are within the snake.
+	void process_snake(DocRangePair const & rp);
+
+	/// Writes the range to the destination buffer
+	void writeToDestBuffer(DocRange const & range,
+		Change::Type type = Change::UNCHANGED);
+	
+	/// Writes the paragraph list to the destination buffer
+	void writeToDestBuffer(ParagraphList const & copy_pars) const;
+
+	/// The length of the first chunk currently processed
+	int N;
+	/// The length of the second chunk currently processed
+	int M;
+
 	/// The thread object, used to emit signals to the GUI
 	Compare const & compare_;
+
+	/// The buffer containing text that will be marked as old
+	Buffer const * old_buf_;
+	/// The buffer containing text that will be marked as new
+	Buffer const * new_buf_;
+	/// The buffer containing text that will be marked as new
+	Buffer const * dest_buf_;
+
+	/// The paragraph list of the destination buffer
+	ParagraphList * dest_pars_;
+
+	/// The level of recursion
+	int recursion_level_;
+
+	/// The number of nested insets at this level
+	int nested_inset_level_;
 };
 
+/////////////////////////////////////////////////////////////////////
+//
+// Compare
+//
+/////////////////////////////////////////////////////////////////////
 
 Compare::Compare(Buffer const * new_buf, Buffer const * old_buf,
 	Buffer * const dest_buf, CompareOptions const & options)
@@ -55,10 +270,8 @@ Compare::Compare(Buffer const * new_buf, Buffer const * old_buf,
 
 void Compare::run()
 {
-	if (!dest_buffer || !new_buffer || !old_buffer) {
-		error();
+	if (!dest_buffer || !new_buffer || !old_buffer)
 		return;
-	}
 
 	// Copy the buffer params to the new buffer
 	dest_buffer->params() = options_.settings_from_new
@@ -66,13 +279,19 @@ void Compare::run()
 	
 	// do the real work
 	if (!doCompare())
-		error();
-	else
-		finished(pimpl_->abort_);
+		return;
+	
+	finished(pimpl_->abort_);
 	return;
 }
 
 
+int Compare::doCompare()
+{
+	return pimpl_->diff(new_buffer, old_buffer, dest_buffer);
+}
+
+
 void Compare::abort()
 {
 	pimpl_->abort_ = true;
@@ -82,9 +301,293 @@ void Compare::abort()
 }
 
 
-int Compare::doCompare()
+void get_paragraph_list(DocRange const & range,
+	ParagraphList & pars)
+{
+	// Clone the paragraphs within the selection.
+	pit_type startpit = range.from.pit();
+	pit_type endpit = range.to.pit();
+	ParagraphList const & ps_ = range.text()->paragraphs();
+	ParagraphList tmp_pars(boost::next(ps_.begin(), startpit),
+		boost::next(ps_.begin(), endpit + 1));
+
+	// Remove the end of the last paragraph; afterwards, remove the
+	// beginning of the first paragraph. Keep this order - there may only
+	// be one paragraph!
+	Paragraph & back = tmp_pars.back();
+	back.eraseChars(range.to.pos(), back.size(), false);
+	Paragraph & front = tmp_pars.front();
+	front.eraseChars(0, range.from.pos(), false);
+
+	pars.insert(pars.begin(), tmp_pars.begin(), tmp_pars.end());
+}
+
+
+bool equal(Inset const * i_o, Inset const * i_n)
+{
+	if (!i_o || !i_n)
+		return false;
+
+	// Different types of insets
+	if (i_o->lyxCode() != i_n->lyxCode())
+		return false;
+
+	// Editable insets are assumed to be the same as they are of the 
+	// same type. If we later on decide that we insert them in the
+	// document as being unchanged, we will run the algorithm on the
+	// contents of the two insets.
+	// FIXME: This fails if the parameters of the insets differ.
+	// FIXME: We do not recurse into InsetTabulars.
+	// FIXME: We need methods inset->equivalent(inset).
+	if (i_o->editable() && !i_o->asInsetMath()
+		  && i_o->asInsetText())
+		return true;
+
+	ostringstream o_os;
+	ostringstream n_os;
+	i_o->write(o_os);
+	i_n->write(n_os);
+	return o_os.str() == n_os.str();
+}
+
+
+bool equal(DocIterator & o, DocIterator & n) {
+	Paragraph const & old_par = o.text()->getPar(o.pit());
+	Paragraph const & new_par = n.text()->getPar(n.pit());
+
+	Inset const * i_o = old_par.getInset(o.pos());
+	Inset const * i_n = new_par.getInset(n.pos());
+
+	if (i_o && i_n)
+		return equal(i_o, i_n);
+	
+	char_type c_o = old_par.getChar(o.pos());
+	char_type c_n = new_par.getChar(n.pos());
+	Font fo = old_par.getFontSettings(o.buffer()->params(), o.pos());
+	Font fn = new_par.getFontSettings(n.buffer()->params(), n.pos());
+	return c_o == c_n && fo == fn;
+}
+
+
+void traverse_snake_back(DocRangePair & rp)
+{
+	while (true) {
+		// Traverse snake
+		if (!step_backward(rp.o.to, rp.o.from))
+			break;
+
+		if (!step_backward(rp.n.to, rp.n.from)) {
+			step_forward(rp.o.to);
+			break;
+		}
+
+		if (!equal(rp.o.to, rp.n.to)) {
+			step_forward(rp.o.to);
+			step_forward(rp.n.to);
+			break;
+		}
+	}
+}
+
+
+void traverse_snake_forw(DocRangePair & rp)
+{
+	while (equal(rp.o.from, rp.n.from)) {
+		if (!step_forward(rp.o.from, rp.o.to))
+			break;
+
+		if (!step_forward(rp.n.from, rp.n.to)) {
+			step_backward(rp.o.from);
+			break;
+		}
+	}
+}
+
+/////////////////////////////////////////////////////////////////////
+//
+// Compare::Impl
+//
+/////////////////////////////////////////////////////////////////////
+
+int Compare::Impl::find_middle_snake(DocRangePair const & rp,
+	DocPair &)
+{
+	N = rp.o.length();
+	M = rp.n.length();
+	return M+N;
+}
+
+
+bool Compare::Impl::diff(Buffer const * new_buf, Buffer const * old_buf,
+	Buffer const * dest_buf)
+{
+	if (!new_buf || !old_buf || !dest_buf)
+		return false;
+
+	old_buf_ = old_buf;
+	new_buf_ = new_buf;
+	dest_buf_ = dest_buf;
+	dest_pars_ = &dest_buf->inset().asInsetText()->paragraphs();
+	dest_pars_->clear();
+
+	recursion_level_ = 0;
+	nested_inset_level_ = 0;
+
+	DocRangePair rp(old_buf_, new_buf_);
+
+	DocPair from = rp.from();
+	traverse_snake_forw(rp);
+	DocRangePair const snake(from, rp.from());
+	process_snake(snake);
+	
+	// Start the recursive algorithm
+	diff_i(rp);
+
+	for (pit_type p = 0; p < (pit_type)dest_pars_->size(); ++p) {
+		(*dest_pars_)[p].setBuffer(const_cast<Buffer &>(*dest_buf));
+		(*dest_pars_)[p].setInsetOwner(&dest_buf_->inset());
+	}
+
+	return true;
+}
+
+
+void Compare::Impl::diff_i(DocRangePair const & rp)
+{
+	// The middle snake
+	DocPair middle_snake;
+
+	// Divides the problem into two smaller problems, split around
+	// the snake in the middle.
+	int const L_ses = find_middle_snake(rp, middle_snake);
+
+	// Set maximum of progress bar
+	if (++recursion_level_ == 1)
+		compare_.progressMax(L_ses);
+
+	// There are now three possibilities: the strings were the same,
+	// the strings were completely different, or we found a middle
+	// snake and we can split the string into two parts to process.
+	if (L_ses == 0)
+		// Two the same strings (this must be a very rare case, because
+		// usually this will be part of a snake adjacent to these strings).
+		writeToDestBuffer(rp.o);
+
+	else if (middle_snake.o.empty()) {
+		// Two totally different strings
+		writeToDestBuffer(rp.o, Change::DELETED);
+		writeToDestBuffer(rp.n, Change::INSERTED);
+
+	} else {
+		// Retrieve the complete snake
+		DocRangePair first_part(rp.from(), middle_snake);
+		traverse_snake_back(first_part);
+			
+		DocRangePair second_part(middle_snake, rp.to());
+		traverse_snake_forw(second_part);
+			
+		// Split the string in three parts:
+		// 1. in front of the snake
+		diff_part(first_part);
+
+		// 2. the snake itself, and
+		DocRangePair const snake(first_part.to(), second_part.from());
+		process_snake(snake);
+
+		// 3. behind the snake.
+		diff_part(second_part);
+	}
+	--recursion_level_;
+}
+
+
+void Compare::Impl::diff_part(DocRangePair const & rp)
+{
+	// Is there a finite length string in both buffers, if not there
+	// is an empty string and we write the other one to the buffer.
+	if (!rp.o.empty() && !rp.n.empty())
+		diff_i(rp);
+	
+	else if (!rp.o.empty())
+		writeToDestBuffer(rp.o, Change::DELETED);
+
+	else if (!rp.n.empty())
+		writeToDestBuffer(rp.n, Change::INSERTED);
+}
+
+
+void Compare::Impl::diff_inset(Inset * inset, DocPair const & p)
+{
+	// Find the dociterators for the beginning and the
+	// end of the inset, for the old and new document.
+	DocRangePair const rp = stepIntoInset(p);
+
+	// Recurse into the inset. Temporarily replace the dest_pars
+	// paragraph list by the paragraph list of the nested inset.
+	ParagraphList * backup_dest_pars = dest_pars_;
+	dest_pars_ = &inset->asInsetText()->text().paragraphs();
+	dest_pars_->clear();
+	
+	++nested_inset_level_;
+	diff_i(rp);
+	--nested_inset_level_;
+
+	dest_pars_ = backup_dest_pars;
+}
+
+
+void Compare::Impl::process_snake(DocRangePair const & rp)
+{
+	ParagraphList pars;
+	get_paragraph_list(rp.o, pars);
+
+	// Find insets in this paragaph list
+	DocPair it = rp.from();
+	for (; it.o < rp.o.to; ++it) {
+		Inset * inset = it.o.text()->getPar(it.o.pit()).getInset(it.o.pos());
+		if (inset && inset->editable() && inset->asInsetText()) {
+			// Find the inset in the paragraph list that will be pasted into
+			// the final document. The contents of the inset will be replaced
+			// by the output of the algorithm below.
+			pit_type const pit = it.o.pit() - rp.o.from.pit();
+			pos_type const pos = pit ? it.o.pos() : it.o.pos() - rp.o.from.pos();
+			inset = pars[pit].getInset(pos);
+			LASSERT(inset, /**/);
+			diff_inset(inset, it);
+		}
+	}
+	writeToDestBuffer(pars);
+}
+
+
+void Compare::Impl::writeToDestBuffer(DocRange const & range,
+	Change::Type type)
+{
+	ParagraphList pars;
+	get_paragraph_list(range, pars);
+
+	pos_type size = 0;
+
+	// Set the change
+	ParagraphList::iterator it = pars.begin();
+	for (; it != pars.end(); ++it) {
+		it->setChange(Change(type));
+		size += it->size();
+	}
+
+	writeToDestBuffer(pars);
+
+	if (nested_inset_level_ == 0)
+		compare_.progress(size);
+}
+
+
+void Compare::Impl::writeToDestBuffer(ParagraphList const & pars) const
 {
-	return 0;
+	pit_type const pit = dest_pars_->size() - 1;
+	dest_pars_->insert(dest_pars_->end(), pars.begin(), pars.end());
+	if (pit >= 0)
+		mergeParagraph(dest_buf_->params(), *dest_pars_, pit);
 }