X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=src%2FCompare.cpp;h=82236aa8de7f5eea7af0d69ec15d2ff8bf2b6290;hb=4ed0312c51704780af1c452d3a82a84171b3725a;hp=745b3218a3bf84aa8a39e0916e4ce9d667a74434;hpb=1ff6b1122b064912725fcf58a711bce8d510e0a6;p=lyx.git diff --git a/src/Compare.cpp b/src/Compare.cpp index 745b3218a3..82236aa8de 100644 --- a/src/Compare.cpp +++ b/src/Compare.cpp @@ -12,9 +12,19 @@ #include "Compare.h" +#include "Author.h" #include "Buffer.h" #include "BufferParams.h" +#include "Changes.h" +#include "CutAndPaste.h" +#include "ErrorList.h" +#include "Font.h" +#include "insets/InsetText.h" + +#include "support/docstream.h" +#include "support/lassert.h" +#include "support/qstring_helpers.h" using namespace std; using namespace lyx::support; @@ -22,6 +32,197 @@ using namespace lyx::support; namespace lyx { + +enum Direction { + Forward = 0, + Backward +}; + + +static void step(DocIterator & dit, Direction direction) +{ + if (direction == Forward) + dit.top().forwardPos(); + else + dit.top().backwardPos(); +} + + +static void step(DocIterator & dit, DocIterator const & end, Direction direction) +{ + if (dit != end) + step(dit, direction); +} + + +/** + * A pair of two DocIterators that form a range. + */ +class DocRange { +public: + DocRange(DocIterator const & from_, DocIterator const & to_) + : from(from_), to(to_) + {} + + DocRange(Buffer const * buf) : + from(doc_iterator_begin(buf)), + to(doc_iterator_end(buf)) + { + to.backwardPos(); + } + + /// + Text * text() const { return from.text(); } + /// + bool empty() const { return to <= from; } + /// + size_t length() const; + + /// The begin of the range + DocIterator from; + /// The end of the range + DocIterator to; +}; + + +size_t DocRange::length() const +{ + ParagraphList const & ps = from.text()->paragraphs(); + size_t length = 0; + pit_type pit = from.pit(); + pit_type const endpit = to.pit(); + for (; pit < endpit; ++pit) + length += ps[pit].size() + 1; + length += to.pos() - from.pos(); + return length; +} + + +class DocPair { +public: + DocPair() + {} + + DocPair(DocIterator const & o_, DocIterator const & n_) + : o(o_), n(n_) + {} + + bool operator!=(DocPair const & rhs) const + { + // this might not be intuitive but correct for our purpose + return o != rhs.o && n != rhs.n; + } + + + DocPair & operator++() + { + step(o, Forward); + step(n, Forward); + return *this; + } + + DocPair & operator--() + { + step(o, Backward); + step(n, Backward); + return *this; + } + /// + DocIterator o; + /// + DocIterator n; +}; + +/** + * A pair of two DocRanges. + */ +class DocRangePair { +public: + DocRangePair(DocRange const & o_, DocRange const & n_) + : o(o_), n(n_) + {} + + DocRangePair(DocPair const & from, DocPair const & to) + : o(from.o, to.o), n(from.n, to.n) + {} + + DocRangePair(Buffer const * o_buf, Buffer const * n_buf) + : o(o_buf), n(n_buf) + {} + + /// Returns the from pair + DocPair from() const + { + return DocPair(o.from, n.from); + } + + /// Returns the to pair + DocPair to() const + { + return DocPair(o.to, n.to); + } + + DocRange o; + DocRange n; +}; + + +static DocRangePair stepIntoInset(DocPair const & inset_location) +{ + DocRangePair rp(inset_location, inset_location); + rp.o.from.forwardPos(); + rp.n.from.forwardPos(); + step(rp.o.to, Forward); + step(rp.n.to, Forward); + rp.o.to.backwardPos(); + rp.n.to.backwardPos(); + return rp; +} + + +/** + * This class is designed to hold a vector that has both positive as + * negative indices. It is internally represented as two vectors, one + * for non-zero indices and one for negative indices. In this way, the + * vector can grow in both directions. + * If an index is not available in the vector, the default value is + * returned. If an object is put in the vector beyond its size, the + * empty spots in between are also filled with the default value. + */ +template +class compl_vector { +public: + compl_vector() + {} + + void reset(T const & def) + { + default_ = def; + Vp_.clear(); + Vn_.clear(); + } + + /// Gets the value at index. If it is not in the vector + /// the default value is inserted and returned. + T & operator[](int index) { + vector & V = index >= 0 ? Vp_ : Vn_; + unsigned int const ii = index >= 0 ? index : -index - 1; + while (ii >= V.size()) + V.push_back(default_); + return V[ii]; + } + +private: + /// The vector for positive indices + vector Vp_; + /// The vector for negative indices + vector Vn_; + /// The default value that is inserted in the vector + /// if more space is needed + T default_; +}; + + /** * The implementation of the algorithm that does the comparison * between two documents. @@ -29,47 +230,194 @@ namespace lyx { class Compare::Impl { public: /// - Impl(Compare const & compare) - : abort_(false), compare_(compare) + Impl(Compare const & compare) + : abort_(false), n_(0), m_(0), offset_reverse_diagonal_(0), + odd_offset_(false), compare_(compare), + old_buf_(nullptr), new_buf_(nullptr), dest_buf_(nullptr), + dest_pars_(nullptr), recursion_level_(0), nested_inset_level_(0), D_(0) {} /// - ~Impl() {} + ~Impl() + {} + + // Algorithm to find the shortest edit string. This algorithm + // only needs a linear amount of memory (linear with the sum + // of the number of characters in the two paragraph-lists). + bool diff(Buffer const * new_buf, Buffer const * old_buf, + Buffer const * dest_buf); - /// Set to true to abort the algorithm + /// Set to true to cancel the algorithm bool abort_; + /// + QString status() + { + QString status; + status += toqstr("recursion level:") + " " + QString::number(recursion_level_) + + " " + toqstr("differences:") + " " + QString::number(D_); + return status; + } + private: + /// Finds the middle snake and returns the length of the + /// shortest edit script. + int findMiddleSnake(DocRangePair const & rp, DocPair & middle_snake); + + enum SnakeResult { + NoSnake, + SingleSnake, + NormalSnake + }; + + /// Retrieve the middle snake when there is overlap between + /// the forward and backward path. + SnakeResult retrieveMiddleSnake(int k, int D, Direction direction, + DocPair & middle_snake); + + /// Find the furthest reaching D-path (number of horizontal + /// and vertical steps; differences between the old and new + /// document) in the k-diagonal (vertical minus horizontal steps). + void furthestDpathKdiagonal(int D, int k, + DocRangePair const & rp, Direction direction); + + /// Is there overlap between the forward and backward path + bool overlap(int k, int D); + + /// This function is called recursively by a divide and conquer + /// algorithm. Each time, the string is divided into two split + /// around the middle snake. + void diff_i(DocRangePair const & rp); + + /// Processes the split chunks. It either adds them as deleted, + /// as added, or call diff_i for further processing. + void diffPart(DocRangePair const & rp); + + /// Runs the algorithm for the inset located at /c it and /c it_n + /// and adds the result to /c pars. + void diffInset(Inset * inset, DocPair const & p); + + /// Adds the snake to the destination buffer. The algorithm will + /// recursively be applied to any InsetTexts that are within the snake. + void processSnake(DocRangePair const & rp); + + /// Writes the range to the destination buffer + void writeToDestBuffer(DocRange const & range, + Change::Type type = Change::UNCHANGED); + + /// Writes the paragraph list to the destination buffer + void writeToDestBuffer(ParagraphList const & copy_pars) const; + + /// The length of the old chunk currently processed + int n_; + /// The length of the new chunk currently processed + int m_; + /// The offset diagonal of the reverse path of the + /// currently processed chunk + int offset_reverse_diagonal_; + /// Is the offset odd or even ? + bool odd_offset_; + /// The thread object, used to emit signals to the GUI Compare const & compare_; + + /// The buffer containing text that will be marked as old + Buffer const * old_buf_; + /// The buffer containing text that will be marked as new + Buffer const * new_buf_; + /// The buffer containing text that will be marked as new + Buffer const * dest_buf_; + + /// The paragraph list of the destination buffer + ParagraphList * dest_pars_; + + /// The level of recursion + int recursion_level_; + + /// The number of nested insets at this level + int nested_inset_level_; + + /// The position/snake in the old/new document + /// of the forward/reverse search + compl_vector ofp; + compl_vector nfp; + compl_vector ofs; + compl_vector nfs; + compl_vector orp; + compl_vector nrp; + compl_vector ors; + compl_vector nrs; + + /// The number of differences in the path the algorithm + /// is currently processing. + int D_; }; +///////////////////////////////////////////////////////////////////// +// +// Compare +// +///////////////////////////////////////////////////////////////////// Compare::Compare(Buffer const * new_buf, Buffer const * old_buf, Buffer * const dest_buf, CompareOptions const & options) : new_buffer(new_buf), old_buffer(old_buf), dest_buffer(dest_buf), options_(options), pimpl_(new Impl(*this)) { + connect(&status_timer_, SIGNAL(timeout()), + this, SLOT(doStatusMessage())); + status_timer_.start(1000); +} + + +void Compare::doStatusMessage() +{ + statusMessage(pimpl_->status()); } void Compare::run() { - if (!dest_buffer || !new_buffer || !old_buffer) { - error(); + if (!dest_buffer || !new_buffer || !old_buffer) return; - } - // Copy the buffer params to the new buffer + // Copy the buffer params to the destination buffer dest_buffer->params() = options_.settings_from_new ? new_buffer->params() : old_buffer->params(); - - // do the real work + // Copy extra authors to the destination buffer + AuthorList const & extra_authors = options_.settings_from_new ? + old_buffer->params().authors() : new_buffer->params().authors(); + AuthorList::Authors::const_iterator it = extra_authors.begin(); + for (; it != extra_authors.end(); ++it) + dest_buffer->params().authors().record(*it); + + // We will need this later + DocumentClassConstPtr const olddc = + dest_buffer->params().documentClassPtr(); + // We do not want to share the DocumentClass with the other Buffer. + // See bug #10295. + dest_buffer->params().makeDocumentClass(dest_buffer->isClone(), dest_buffer->isInternal()); + + doStatusMessage(); + // Do the real work if (!doCompare()) - error(); - else - finished(pimpl_->abort_); - return; + return; + + // The comparison routine simply copies the paragraphs over into the + // new buffer with the document class from wherever they came from. + // So we need to reset the document class of all the paragraphs. + // See bug #10295. + cap::switchBetweenClasses( + olddc, dest_buffer->params().documentClassPtr(), + static_cast(dest_buffer->inset())); + + finished(pimpl_->abort_); +} + + +int Compare::doCompare() +{ + return pimpl_->diff(new_buffer, old_buffer, dest_buffer); } @@ -82,9 +430,459 @@ void Compare::abort() } -int Compare::doCompare() +static void getParagraphList(DocRange const & range, + ParagraphList & pars) +{ + // Clone the paragraphs within the selection. + pit_type startpit = range.from.pit(); + pit_type endpit = range.to.pit(); + ParagraphList const & ps_ = range.text()->paragraphs(); + ParagraphList tmp_pars(ps_.iterator_at(startpit), + ps_.iterator_at(endpit + 1)); + + // Remove the end of the last paragraph; afterwards, remove the + // beginning of the first paragraph. Keep this order - there may only + // be one paragraph! + Paragraph & back = tmp_pars.back(); + back.eraseChars(range.to.pos(), back.size(), false); + Paragraph & front = tmp_pars.front(); + front.eraseChars(0, range.from.pos(), false); + + pars.insert(pars.begin(), tmp_pars.begin(), tmp_pars.end()); +} + + +static bool equal(Inset const * i_o, Inset const * i_n) +{ + if (!i_o || !i_n) + return false; + + // Different types of insets + if (i_o->lyxCode() != i_n->lyxCode()) + return false; + + // Editable insets are assumed to be the same as they are of the + // same type. If we later on decide that we insert them in the + // document as being unchanged, we will run the algorithm on the + // contents of the two insets. + // FIXME: This fails if the parameters of the insets differ. + // FIXME: We do not recurse into InsetTabulars. + // FIXME: We need methods inset->equivalent(inset). + if (i_o->editable() && !i_o->asInsetMath() + && i_o->asInsetText()) + return true; + + ostringstream o_os; + ostringstream n_os; + i_o->write(o_os); + i_n->write(n_os); + return o_os.str() == n_os.str(); +} + + +static bool equal(DocIterator & o, DocIterator & n) +{ + // Explicitly check for this, so we won't call + // Paragraph::getChar for the last pos. + bool const o_lastpos = o.pos() == o.lastpos(); + bool const n_lastpos = n.pos() == n.lastpos(); + if (o_lastpos || n_lastpos) + return o_lastpos && n_lastpos; + + Paragraph const & old_par = o.text()->getPar(o.pit()); + Paragraph const & new_par = n.text()->getPar(n.pit()); + + char_type const c_o = old_par.getChar(o.pos()); + char_type const c_n = new_par.getChar(n.pos()); + if (c_o != c_n) + return false; + + if (old_par.isInset(o.pos())) { + Inset const * i_o = old_par.getInset(o.pos()); + Inset const * i_n = new_par.getInset(n.pos()); + + if (i_o && i_n) + return equal(i_o, i_n); + } + + Font fo = old_par.getFontSettings(o.buffer()->params(), o.pos()); + Font fn = new_par.getFontSettings(n.buffer()->params(), n.pos()); + return fo == fn; +} + + +/// Traverses a snake in a certain direction. p points to a +/// position in the old and new file and they are synchronously +/// moved along the snake. The function returns true if a snake +/// was found. +static bool traverseSnake(DocPair & p, DocRangePair const & range, + Direction direction) +{ + bool ret = false; + DocPair const & p_end = + direction == Forward ? range.to() : range.from(); + + while (p != p_end) { + if (direction == Backward) + --p; + if (!equal(p.o, p.n)) { + if (direction == Backward) + ++p; + return ret; + } + if (direction == Forward) + ++p; + ret = true; + } + return ret; +} + + +///////////////////////////////////////////////////////////////////// +// +// Compare::Impl +// +///////////////////////////////////////////////////////////////////// + + +void Compare::Impl::furthestDpathKdiagonal(int D, int k, + DocRangePair const & rp, Direction direction) +{ + compl_vector & op = direction == Forward ? ofp : orp; + compl_vector & np = direction == Forward ? nfp : nrp; + compl_vector & os = direction == Forward ? ofs : ors; + compl_vector & ns = direction == Forward ? nfs : nrs; + + // A vertical step means stepping one character in the new document. + bool vertical_step = k == -D; + if (!vertical_step && k != D) { + vertical_step = direction == Forward + ? op[k - 1] < op[k + 1] : op[k - 1] > op[k + 1]; + } + + // Where do we take the step from ? + int const kk = vertical_step ? k + 1 : k - 1; + DocPair p(op[kk], np[kk]); + DocPair const s(os[kk], ns[kk]); + + // If D==0 we simulate a vertical step from (0,-1) by doing nothing. + if (D != 0) { + // Take a step + if (vertical_step && direction == Forward) + step(p.n, rp.n.to, direction); + else if (vertical_step && direction == Backward) + step(p.n, rp.n.from, direction); + else if (!vertical_step && direction == Forward) + step(p.o, rp.o.to, direction); + else if (!vertical_step && direction == Backward) + step(p.o, rp.o.from, direction); + } + + // Traverse snake + if (traverseSnake(p, rp, direction)) { + // Record last snake + os[k] = p.o; + ns[k] = p.n; + } else { + // Copy last snake from the previous step + os[k] = s.o; + ns[k] = s.n; + } + + //Record new position + op[k] = p.o; + np[k] = p.n; +} + + +bool Compare::Impl::overlap(int k, int D) +{ + // To generalize for the forward and reverse checks + int kk = offset_reverse_diagonal_ - k; + + // Can we have overlap ? + if (kk <= D && kk >= -D) { + // Do we have overlap ? + if (odd_offset_) + return ofp[k] >= orp[kk] && nfp[k] >= nrp[kk]; + else + return ofp[kk] >= orp[k] && nfp[kk] >= nrp[k]; + } + return false; +} + + +Compare::Impl::SnakeResult Compare::Impl::retrieveMiddleSnake( + int k, int D, Direction direction, DocPair & middle_snake) +{ + compl_vector & os = direction == Forward ? ofs : ors; + compl_vector & ns = direction == Forward ? nfs : nrs; + compl_vector & os_r = direction == Forward ? ors : ofs; + compl_vector & ns_r = direction == Forward ? nrs : nfs; + + // The diagonal while doing the backward search + int kk = -k + offset_reverse_diagonal_; + + // Did we find a snake ? + if (os[k].empty() && os_r[kk].empty()) { + // No, there is no snake at all, in which case + // the length of the shortest edit script is M+N. + LATTEST(2 * D - odd_offset_ == m_ + n_); + return NoSnake; + } + + if (os[k].empty()) { + // Yes, but there is only 1 snake and we found it in the + // reverse path. + middle_snake.o = os_r[kk]; + middle_snake.n = ns_r[kk]; + return SingleSnake; + } + + middle_snake.o = os[k]; + middle_snake.n = ns[k]; + return NormalSnake; +} + + +int Compare::Impl::findMiddleSnake(DocRangePair const & rp, + DocPair & middle_snake) +{ + // The lengths of the old and new chunks. + n_ = rp.o.length(); + m_ = rp.n.length(); + + // Forward paths are centered around the 0-diagonal; reverse paths + // are centered around the diagonal N - M. (Delta in the article) + offset_reverse_diagonal_ = n_ - m_; + + // If the offset is odd, only check for overlap while extending forward + // paths, otherwise only check while extending reverse paths. + odd_offset_ = (offset_reverse_diagonal_ % 2 != 0); + + ofp.reset(rp.o.from); + nfp.reset(rp.n.from); + ofs.reset(DocIterator()); + nfs.reset(DocIterator()); + orp.reset(rp.o.to); + nrp.reset(rp.n.to); + ors.reset(DocIterator()); + nrs.reset(DocIterator()); + + // In the formula below, the "+ 1" ensures we round like ceil() + int const D_max = (m_ + n_ + 1)/2; + // D is the number of horizontal and vertical steps, i.e. + // different characters in the old and new chunk. + for (int D = 0; D <= D_max; ++D) { + // to be used in the status messages + D_ = D; + + // Forward and reverse paths + for (int f = 0; f < 2; ++f) { + Direction direction = f == 0 ? Forward : Backward; + + // Diagonals between -D and D can be reached by a D-path + for (int k = -D; k <= D; k += 2) { + // Find the furthest reaching D-path on this diagonal + furthestDpathKdiagonal(D, k, rp, direction); + + // Only check for overlap for forward paths if the offset is odd + // and only for reverse paths if the offset is even. + if (odd_offset_ == (direction == Forward)) { + + // Do the forward and backward paths overlap ? + if (overlap(k, D - odd_offset_)) { + retrieveMiddleSnake(k, D, direction, middle_snake); + return 2 * D - odd_offset_; + } + } + if (abort_) + return 0; + } + } + } + // This should never be reached + return -2; +} + + +bool Compare::Impl::diff(Buffer const * new_buf, Buffer const * old_buf, + Buffer const * dest_buf) +{ + if (!new_buf || !old_buf || !dest_buf) + return false; + + old_buf_ = old_buf; + new_buf_ = new_buf; + dest_buf_ = dest_buf; + dest_pars_ = &dest_buf->inset().asInsetText()->paragraphs(); + dest_pars_->clear(); + + recursion_level_ = 0; + nested_inset_level_ = 0; + + DocRangePair rp(old_buf_, new_buf_); + + DocPair from = rp.from(); + traverseSnake(from, rp, Forward); + DocRangePair const snake(rp.from(), from); + processSnake(snake); + + // Start the recursive algorithm + DocRangePair rp_new(from, rp.to()); + if (!rp_new.o.empty() || !rp_new.n.empty()) + diff_i(rp_new); + + for (pit_type p = 0; p < (pit_type)dest_pars_->size(); ++p) { + (*dest_pars_)[p].setInsetBuffers(const_cast(*dest_buf)); + (*dest_pars_)[p].setInsetOwner(&dest_buf_->inset()); + } + + return true; +} + + +void Compare::Impl::diff_i(DocRangePair const & rp) +{ + if (abort_) + return; + + // The middle snake + DocPair middle_snake; + + // Divides the problem into two smaller problems, split around + // the snake in the middle. + int const L_ses = findMiddleSnake(rp, middle_snake); + + // Set maximum of progress bar + if (++recursion_level_ == 1) + compare_.progressMax(L_ses); + + // There are now three possibilities: the strings were the same, + // the strings were completely different, or we found a middle + // snake and we can split the string into two parts to process. + if (L_ses == 0) + // Two the same strings (this must be a very rare case, because + // usually this will be part of a snake adjacent to these strings). + writeToDestBuffer(rp.o); + + else if (middle_snake.o.empty()) { + // Two totally different strings + writeToDestBuffer(rp.o, Change::DELETED); + writeToDestBuffer(rp.n, Change::INSERTED); + + } else { + // Retrieve the complete snake + DocPair first_part_end = middle_snake; + traverseSnake(first_part_end, rp, Backward); + DocRangePair first_part(rp.from(), first_part_end); + + DocPair second_part_begin = middle_snake; + traverseSnake(second_part_begin, rp, Forward); + DocRangePair second_part(second_part_begin, rp.to()); + + // Split the string in three parts: + // 1. in front of the snake + diffPart(first_part); + + // 2. the snake itself, and + DocRangePair const snake(first_part.to(), second_part.from()); + processSnake(snake); + + // 3. behind the snake. + diffPart(second_part); + } + --recursion_level_; +} + + +void Compare::Impl::diffPart(DocRangePair const & rp) +{ + // Is there a finite length string in both buffers, if not there + // is an empty string and we write the other one to the buffer. + if (!rp.o.empty() && !rp.n.empty()) + diff_i(rp); + + else if (!rp.o.empty()) + writeToDestBuffer(rp.o, Change::DELETED); + + else if (!rp.n.empty()) + writeToDestBuffer(rp.n, Change::INSERTED); +} + + +void Compare::Impl::diffInset(Inset * inset, DocPair const & p) +{ + // Find the dociterators for the beginning and the + // end of the inset, for the old and new document. + DocRangePair const rp = stepIntoInset(p); + + // Recurse into the inset. Temporarily replace the dest_pars + // paragraph list by the paragraph list of the nested inset. + ParagraphList * backup_dest_pars = dest_pars_; + dest_pars_ = &inset->asInsetText()->text().paragraphs(); + dest_pars_->clear(); + + ++nested_inset_level_; + diff_i(rp); + --nested_inset_level_; + + dest_pars_ = backup_dest_pars; +} + + +void Compare::Impl::processSnake(DocRangePair const & rp) +{ + ParagraphList pars; + getParagraphList(rp.o, pars); + + // Find insets in this paragaph list + DocPair it = rp.from(); + for (; it.o < rp.o.to; ++it) { + Inset * inset = it.o.text()->getPar(it.o.pit()).getInset(it.o.pos()); + if (inset && inset->editable() && inset->asInsetText()) { + // Find the inset in the paragraph list that will be pasted into + // the final document. The contents of the inset will be replaced + // by the output of the algorithm below. + pit_type const pit = it.o.pit() - rp.o.from.pit(); + pos_type const pos = pit ? it.o.pos() : it.o.pos() - rp.o.from.pos(); + inset = pars[pit].getInset(pos); + LASSERT(inset, continue); + diffInset(inset, it); + } + } + writeToDestBuffer(pars); +} + + +void Compare::Impl::writeToDestBuffer(DocRange const & range, + Change::Type type) +{ + ParagraphList pars; + getParagraphList(range, pars); + + pos_type size = 0; + + // Set the change + ParagraphList::iterator it = pars.begin(); + for (; it != pars.end(); ++it) { + it->setChange(Change(type, compare_.options_.author)); + size += it->size(); + } + + writeToDestBuffer(pars); + + if (nested_inset_level_ == 0) + compare_.progress(size); +} + + +void Compare::Impl::writeToDestBuffer(ParagraphList const & pars) const { - return 0; + pit_type const pit = dest_pars_->size() - 1; + dest_pars_->insert(dest_pars_->end(), pars.begin(), pars.end()); + if (pit >= 0) + mergeParagraph(dest_buf_->params(), *dest_pars_, pit); }