]> git.lyx.org Git - lyx.git/blobdiff - src/Paragraph.cpp
Remove extra whitespace.
[lyx.git] / src / Paragraph.cpp
index f3ba6921cf628c5478f0337cb603ccd3dcb66a3b..df8de4c39f09ced1bece8559b5f0405d4eb102b3 100644 (file)
@@ -43,7 +43,6 @@
 #include "TextClass.h"
 #include "TexRow.h"
 #include "Text.h"
-#include "VSpace.h"
 #include "WordLangTuple.h"
 #include "WordList.h"
 
@@ -51,6 +50,7 @@
 
 #include "insets/InsetBibitem.h"
 #include "insets/InsetLabel.h"
+#include "insets/InsetSpecialChar.h"
 
 #include "support/debug.h"
 #include "support/docstring_list.h"
@@ -71,6 +71,199 @@ namespace lyx {
 namespace {
 /// Inset identifier (above 0x10ffff, for ucs-4)
 char_type const META_INSET = 0x200001;
+}
+
+
+/////////////////////////////////////////////////////////////////////
+//
+// SpellResultRange
+//
+/////////////////////////////////////////////////////////////////////
+
+class SpellResultRange {
+public:
+       SpellResultRange(FontSpan range, SpellChecker::Result result)
+       : range_(range), result_(result)
+       {}
+       ///
+       FontSpan const & range() const { return range_; }
+       ///
+       void range(FontSpan const & r) { range_ = r; }
+       ///
+       SpellChecker::Result result() const { return result_; }
+       ///
+       void result(SpellChecker::Result r) { result_ = r; }
+       ///
+       bool inside(pos_type pos) const { return range_.inside(pos); }
+       ///
+       bool covered(FontSpan const & r) const
+       {
+               // 1. first of new range inside current range or
+               // 2. last of new range inside current range or
+               // 3. first of current range inside new range or
+               // 4. last of current range inside new range
+               return range_.inside(r.first) || range_.inside(r.last) ||
+                       r.inside(range_.first) || r.inside(range_.last);
+       }
+       ///
+       void shift(pos_type pos, int offset)
+       {
+               if (range_.first > pos) {
+                       range_.first += offset;
+                       range_.last += offset;
+               } else if (range_.last >= pos) {
+                       range_.last += offset;
+               }
+       }
+private:
+       FontSpan range_ ;
+       SpellChecker::Result result_ ;
+};
+
+
+/////////////////////////////////////////////////////////////////////
+//
+// SpellCheckerState
+//
+/////////////////////////////////////////////////////////////////////
+
+class SpellCheckerState {
+public:
+       SpellCheckerState() {
+               needs_refresh_ = true;
+               current_change_number_ = 0;
+       }
+
+       void setRange(FontSpan const fp, SpellChecker::Result state)
+       {
+               Ranges result;
+               RangesIterator et = ranges_.end();
+               RangesIterator it = ranges_.begin();
+               for (; it != et; ++it) {
+                       if (!it->covered(fp))
+                               result.push_back(SpellResultRange(it->range(), it->result()));
+                       else if (state == SpellChecker::WORD_OK) {
+                               // trim or split the current misspelled range
+                               // store misspelled ranges only
+                               FontSpan range = it->range();
+                               if (fp.first > range.first) {
+                                       // misspelled area in front of WORD_OK
+                                       range.last = fp.first - 1;
+                                       result.push_back(SpellResultRange(range, it->result()));
+                                       range = it->range();
+                               }
+                               if (fp.last < range.last) {
+                                       // misspelled area after WORD_OK range
+                                       range.first = fp.last + 1;
+                                       result.push_back(SpellResultRange(range, it->result()));
+                               }
+                       }
+               }
+               ranges_ = result;
+               if (state != SpellChecker::WORD_OK)
+                       ranges_.push_back(SpellResultRange(fp, state));
+       }
+
+       void increasePosAfterPos(pos_type pos)
+       {
+               correctRangesAfterPos(pos, 1);
+               needsRefresh(pos);
+       }
+
+       void decreasePosAfterPos(pos_type pos)
+       {
+               correctRangesAfterPos(pos, -1);
+               needsRefresh(pos);
+       }
+
+       void refreshLast(pos_type pos)
+       {
+               if (pos < refresh_.last)
+                       refresh_.last = pos;
+       }
+
+       SpellChecker::Result getState(pos_type pos) const
+       {
+               SpellChecker::Result result = SpellChecker::WORD_OK;
+               RangesIterator et = ranges_.end();
+               RangesIterator it = ranges_.begin();
+               for (; it != et; ++it) {
+                       if(it->inside(pos)) {
+                               return it->result();
+                       }
+               }
+               return result;
+       }
+
+       FontSpan const & getRange(pos_type pos) const
+       {
+               /// empty span to indicate mismatch
+               static FontSpan empty_;
+               RangesIterator et = ranges_.end();
+               RangesIterator it = ranges_.begin();
+               for (; it != et; ++it) {
+                       if(it->inside(pos)) {
+                               return it->range();
+                       }
+               }
+               return empty_;
+       }
+
+       bool needsRefresh() const {
+               return needs_refresh_;
+       }
+
+       SpellChecker::ChangeNumber currentChangeNumber() const {
+               return current_change_number_;
+       }
+
+       void refreshRange(pos_type & first, pos_type & last) const {
+               first = refresh_.first;
+               last = refresh_.last;
+       }
+
+       void needsRefresh(pos_type pos) {
+               if (needs_refresh_ && pos != -1) {
+                       if (pos < refresh_.first)
+                               refresh_.first = pos;
+                       if (pos > refresh_.last)
+                               refresh_.last = pos;
+               } else if (pos != -1) {
+                       // init request check for neighbour positions too
+                       refresh_.first = pos > 0 ? pos - 1 : 0;
+                       // no need for special end of paragraph check
+                       refresh_.last = pos + 1;
+               }
+               needs_refresh_ = pos != -1;
+       }
+
+       void needsCompleteRefresh(SpellChecker::ChangeNumber change_number) {
+               needs_refresh_ = true;
+               refresh_.first = 0;
+               refresh_.last = -1;
+               current_change_number_ = change_number;
+       }
+
+private:
+       typedef vector<SpellResultRange> Ranges;
+       typedef Ranges::const_iterator RangesIterator;
+       Ranges ranges_;
+       /// the area of the paragraph with pending spell check
+       FontSpan refresh_;
+       bool needs_refresh_;
+       /// spell state cache version number
+       SpellChecker::ChangeNumber current_change_number_;
+
+
+       void correctRangesAfterPos(pos_type pos, int offset)
+       {
+               RangesIterator et = ranges_.end();
+               Ranges::iterator it = ranges_.begin();
+               for (; it != et; ++it) {
+                       it->shift(pos, offset);
+               }
+       }
+
 };
 
 /////////////////////////////////////////////////////////////////////
@@ -94,14 +287,14 @@ public:
 
        /// Output the surrogate pair formed by \p c and \p next to \p os.
        /// \return the number of characters written.
-       int latexSurrogatePair(odocstream & os, char_type c, char_type next,
+       int latexSurrogatePair(otexstream & os, char_type c, char_type next,
                               OutputParams const &);
 
        /// Output a space in appropriate formatting (or a surrogate pair
        /// if the next character is a combining character).
        /// \return whether a surrogate pair was output.
        bool simpleTeXBlanks(OutputParams const &,
-                            odocstream &, TexRow & texrow,
+                            otexstream &,
                             pos_type i,
                             unsigned int & column,
                             Font const & font,
@@ -110,21 +303,21 @@ public:
        /// Output consecutive unicode chars, belonging to the same script as
        /// specified by the latex macro \p ltx, to \p os starting from \p i.
        /// \return the number of characters written.
-       int writeScriptChars(odocstream & os, docstring const & ltx,
+       int writeScriptChars(otexstream & os, docstring const & ltx,
                           Change const &, Encoding const &, pos_type & i);
 
        /// This could go to ParagraphParameters if we want to.
-       int startTeXParParams(BufferParams const &, odocstream &, TexRow &,
+       int startTeXParParams(BufferParams const &, otexstream &,
                              OutputParams const &) const;
 
        /// This could go to ParagraphParameters if we want to.
-       int endTeXParParams(BufferParams const &, odocstream &, TexRow &,
-                           OutputParams const &) const;
+       bool endTeXParParams(BufferParams const &, otexstream &,
+                            OutputParams const &) const;
 
        ///
        void latexInset(BufferParams const &,
-                                  odocstream &,
-                                  TexRow & texrow, OutputParams &,
+                                  otexstream &,
+                                  OutputParams &,
                                   Font & running_font,
                                   Font & basefont,
                                   Font const & outerfont,
@@ -136,30 +329,32 @@ public:
 
        ///
        void latexSpecialChar(
-                                  odocstream & os,
+                                  otexstream & os,
                                   OutputParams const & runparams,
                                   Font const & running_font,
                                   Change const & running_change,
                                   Layout const & style,
                                   pos_type & i,
+                                  pos_type end_pos,
                                   unsigned int & column);
 
        ///
        bool latexSpecialT1(
                char_type const c,
-               odocstream & os,
+               otexstream & os,
                pos_type i,
                unsigned int & column);
        ///
        bool latexSpecialTypewriter(
                char_type const c,
-               odocstream & os,
+               otexstream & os,
                pos_type i,
                unsigned int & column);
        ///
        bool latexSpecialPhrase(
-               odocstream & os,
+               otexstream & os,
                pos_type & i,
+               pos_type end_pos,
                unsigned int & column,
                OutputParams const & runparams);
 
@@ -173,12 +368,91 @@ public:
        /// match a string against a particular point in the paragraph
        bool isTextAt(string const & str, pos_type pos) const;
 
+       /// a vector of speller skip positions
+       typedef vector<FontSpan> SkipPositions;
+       typedef SkipPositions::const_iterator SkipPositionsIterator;
+
+       void appendSkipPosition(SkipPositions & skips, pos_type const pos) const;
+       
+       Language * getSpellLanguage(pos_type const from) const;
+
+       Language * locateSpellRange(pos_type & from, pos_type & to,
+                                                               SkipPositions & skips) const;
+
+       bool hasSpellerChange() const {
+               SpellChecker::ChangeNumber speller_change_number = 0;
+               if (theSpellChecker())
+                       speller_change_number = theSpellChecker()->changeNumber();
+               return speller_change_number > speller_state_.currentChangeNumber();
+       }
+
+       bool ignoreWord(docstring const & word) const ;
+       
+       void setMisspelled(pos_type from, pos_type to, SpellChecker::Result state)
+       {
+               pos_type textsize = owner_->size();
+               // check for sane arguments
+               if (to <= from || from >= textsize)
+                       return;
+               FontSpan fp = FontSpan(from, to - 1);
+               speller_state_.setRange(fp, state);
+       }
+
+       void requestSpellCheck(pos_type pos) {
+               if (pos == -1)
+                       speller_state_.needsCompleteRefresh(speller_state_.currentChangeNumber());
+               else
+                       speller_state_.needsRefresh(pos);
+       }
+
+       void readySpellCheck() {
+               speller_state_.needsRefresh(-1);
+       }
+
+       bool needsSpellCheck() const
+       {
+               return speller_state_.needsRefresh();
+       }
+
+       void rangeOfSpellCheck(pos_type & first, pos_type & last) const
+       {
+               speller_state_.refreshRange(first, last);
+               if (last == -1) {
+                       last = owner_->size();
+                       return;
+               }
+               pos_type endpos = last;
+               owner_->locateWord(first, endpos, WHOLE_WORD);
+               if (endpos < last) {
+                       endpos = last;
+                       owner_->locateWord(last, endpos, WHOLE_WORD);
+               }
+               last = endpos;
+       }
+
+       int countSkips(SkipPositionsIterator & it, SkipPositionsIterator const et,
+                           int & start) const
+       {
+               int numskips = 0;
+               while (it != et && it->first < start) {
+                       int skip = it->last - it->first + 1;
+                       start += skip;
+                       numskips += skip;
+                       ++it;
+               }
+               return numskips;
+       }
+
+       void markMisspelledWords(pos_type const & first, pos_type const & last,
+                                                        SpellChecker::Result result,
+                                                        docstring const & word,
+                                                        SkipPositions const & skips);
 
        InsetCode ownerCode() const
        {
                return inset_owner_ ? inset_owner_->lyxCode() : NO_CODE;
        }
-       
+
        /// Which Paragraph owns us?
        Paragraph * owner_;
 
@@ -189,9 +463,8 @@ public:
        FontList fontlist_;
 
        ///
-       unsigned int id_;
-       ///
-       static unsigned int paragraph_id;
+       int id_;
+
        ///
        ParagraphParameters params_;
 
@@ -207,18 +480,18 @@ public:
        typedef docstring TextContainer;
        ///
        TextContainer text_;
-       
-       typedef std::set<docstring> Words;
+
+       typedef set<docstring> Words;
+       typedef map<Language, Words> LangWordsMap;
        ///
-       Words words_;
+       LangWordsMap words_;
        ///
        Layout const * layout_;
+       ///
+       SpellCheckerState speller_state_;
 };
 
 
-// Initialization of the counter for the paragraph id's,
-unsigned int Paragraph::Private::paragraph_id = 0;
-
 namespace {
 
 struct special_phrase {
@@ -240,20 +513,27 @@ size_t const phrases_nr = sizeof(special_phrases)/sizeof(special_phrase);
 
 
 Paragraph::Private::Private(Paragraph * owner, Layout const & layout)
-       : owner_(owner), inset_owner_(0), begin_of_body_(0), layout_(&layout)
+       : owner_(owner), inset_owner_(0), id_(-1), begin_of_body_(0), layout_(&layout)
 {
-       id_ = paragraph_id++;
        text_.reserve(100);
 }
 
 
+// Initialization of the counter for the paragraph id's,
+//
+// FIXME: There should be a more intelligent way to generate and use the
+// paragraph ids per buffer instead a global static counter for all InsetText
+// in the running program.
+static int paragraph_id = -1;
+
 Paragraph::Private::Private(Private const & p, Paragraph * owner)
-       : owner_(owner), inset_owner_(p.inset_owner_), fontlist_(p.fontlist_), 
+       : owner_(owner), inset_owner_(p.inset_owner_), fontlist_(p.fontlist_),
          params_(p.params_), changes_(p.changes_), insetlist_(p.insetlist_),
          begin_of_body_(p.begin_of_body_), text_(p.text_), words_(p.words_),
          layout_(p.layout_)
 {
-       id_ = paragraph_id++;
+       id_ = ++paragraph_id;
+       requestSpellCheck(p.text_.size());
 }
 
 
@@ -265,7 +545,7 @@ Paragraph::Private::Private(Private const & p, Paragraph * owner,
          begin_of_body_(p.begin_of_body_), words_(p.words_),
          layout_(p.layout_)
 {
-       id_ = paragraph_id++;
+       id_ = ++paragraph_id;
        if (beg >= pos_type(p.text_.size()))
                return;
        text_ = p.text_.substr(beg, end - beg);
@@ -283,6 +563,7 @@ Paragraph::Private::Private(Private const & p, Paragraph * owner,
                // Add a new entry in the fontlist_.
                fontlist_.set(fcit->pos() - beg, fcit->font());
        }
+       requestSpellCheck(p.text_.size());
 }
 
 
@@ -450,6 +731,8 @@ void Paragraph::Private::insertChar(pos_type pos, char_type c,
        if (pos == pos_type(text_.size())) {
                // when appending characters, no need to update tables
                text_.push_back(c);
+               // but we want spell checking
+               requestSpellCheck(pos);
                return;
        }
 
@@ -460,6 +743,9 @@ void Paragraph::Private::insertChar(pos_type pos, char_type c,
 
        // Update the insets
        insetlist_.increasePosAfterPos(pos);
+
+       // Update list of misspelled positions
+       speller_state_.increasePosAfterPos(pos);
 }
 
 
@@ -479,6 +765,9 @@ bool Paragraph::insertInset(pos_type pos, Inset * inset,
 
        // Add a new entry in the insetlist_.
        d->insetlist_.insert(inset, pos);
+
+       // Some insets require run of spell checker
+       requestSpellCheck(pos);
        return true;
 }
 
@@ -499,6 +788,8 @@ bool Paragraph::eraseChar(pos_type pos, bool trackChanges)
                if (!change.changed() ||
                      (change.inserted() && !change.currentAuthor())) {
                        setChange(pos, Change(Change::DELETED));
+                       // request run of spell checker
+                       requestSpellCheck(pos);
                        return false;
                }
 
@@ -528,6 +819,10 @@ bool Paragraph::eraseChar(pos_type pos, bool trackChanges)
        // Update the insetlist_
        d->insetlist_.decreasePosAfterPos(pos);
 
+       // Update list of misspelled positions
+       d->speller_state_.decreasePosAfterPos(pos);
+       d->speller_state_.refreshLast(size());
+
        return true;
 }
 
@@ -546,7 +841,7 @@ int Paragraph::eraseChars(pos_type start, pos_type end, bool trackChanges)
 }
 
 
-int Paragraph::Private::latexSurrogatePair(odocstream & os, char_type c,
+int Paragraph::Private::latexSurrogatePair(otexstream & os, char_type c,
                char_type next, OutputParams const & runparams)
 {
        // Writing next here may circumvent a possible font change between
@@ -575,13 +870,13 @@ int Paragraph::Private::latexSurrogatePair(odocstream & os, char_type c,
 
 
 bool Paragraph::Private::simpleTeXBlanks(OutputParams const & runparams,
-                                      odocstream & os, TexRow & texrow,
+                                      otexstream & os,
                                       pos_type i,
                                       unsigned int & column,
                                       Font const & font,
                                       Layout const & style)
 {
-       if (style.pass_thru || runparams.verbatim)
+       if (style.pass_thru || runparams.pass_thru)
                return false;
 
        if (i + 1 < int(text_.size())) {
@@ -608,8 +903,7 @@ bool Paragraph::Private::simpleTeXBlanks(OutputParams const & runparams,
                     || text_[i - 1] == ':'
                     || text_[i - 1] == '!'))) {
                os << '\n';
-               texrow.newline();
-               texrow.start(owner_->id(), i + 1);
+               os.texrow().start(owner_->id(), i + 1);
                column = 0;
        } else if (style.free_spacing) {
                os << '~';
@@ -620,7 +914,7 @@ bool Paragraph::Private::simpleTeXBlanks(OutputParams const & runparams,
 }
 
 
-int Paragraph::Private::writeScriptChars(odocstream & os,
+int Paragraph::Private::writeScriptChars(otexstream & os,
                                         docstring const & ltx,
                                         Change const & runningChange,
                                         Encoding const & encoding,
@@ -713,25 +1007,23 @@ bool Paragraph::Private::isTextAt(string const & str, pos_type pos) const
 }
 
 
-void Paragraph::Private::latexInset(
-                                            BufferParams const & bparams,
-                                            odocstream & os,
-                                            TexRow & texrow,
-                                            OutputParams & runparams,
-                                            Font & running_font,
-                                            Font & basefont,
-                                            Font const & outerfont,
-                                            bool & open_font,
-                                            Change & running_change,
-                                            Layout const & style,
-                                            pos_type & i,
-                                            unsigned int & column)
+void Paragraph::Private::latexInset(BufferParams const & bparams,
+                                   otexstream & os,
+                                   OutputParams & runparams,
+                                   Font & running_font,
+                                   Font & basefont,
+                                   Font const & outerfont,
+                                   bool & open_font,
+                                   Change & running_change,
+                                   Layout const & style,
+                                   pos_type & i,
+                                   unsigned int & column)
 {
        Inset * inset = owner_->getInset(i);
        LASSERT(inset, /**/);
 
        if (style.pass_thru) {
-               inset->plaintext(os, runparams);
+               inset->plaintext(os.os(), runparams);
                return;
        }
 
@@ -759,8 +1051,7 @@ void Paragraph::Private::latexInset(
                                os << "\\protect ";
 
                }
-               texrow.newline();
-               texrow.start(owner_->id(), i + 1);
+               os.texrow().start(owner_->id(), i + 1);
                column = 0;
        }
 
@@ -776,14 +1067,14 @@ void Paragraph::Private::latexInset(
        }
 
        bool close = false;
-       odocstream::pos_type const len = os.tellp();
+       odocstream::pos_type const len = os.os().tellp();
 
        if (inset->forceLTR()
            && running_font.isRightToLeft()
            // ERT is an exception, it should be output with no
            // decorations at all
            && inset->lyxCode() != ERT_CODE) {
-               if (running_font.language()->lang() == "farsi")
+               if (running_font.language()->lang() == "farsi")
                        os << "\\beginL{}";
                else
                        os << "\\L{";
@@ -802,16 +1093,16 @@ void Paragraph::Private::latexInset(
        // ArabTeX, though, cannot handle this special behavior, it seems.
        bool arabtex = basefont.language()->lang() == "arabic_arabtex"
                || running_font.language()->lang() == "arabic_arabtex";
-       if (open_font && inset->noFontChange()) {
+       if (open_font && !inset->inheritFont()) {
                bool closeLanguage = arabtex
                        || basefont.isRightToLeft() == running_font.isRightToLeft();
                unsigned int count = running_font.latexWriteEndChanges(os,
                        bparams, runparams, basefont, basefont, closeLanguage);
                column += count;
-               // if any font properties were closed, update the running_font, 
+               // if any font properties were closed, update the running_font,
                // making sure, however, to leave the language as it was
                if (count > 0) {
-                       // FIXME: probably a better way to keep track of the old 
+                       // FIXME: probably a better way to keep track of the old
                        // language, than copying the entire font?
                        Font const copy_font(running_font);
                        basefont = owner_->getLayoutFont(bparams, outerfont);
@@ -825,10 +1116,12 @@ void Paragraph::Private::latexInset(
                }
        }
 
-       int tmp;
+       int prev_rows = os.texrow().rows();
 
        try {
-               tmp = inset->latex(os, runparams);
+               runparams.lastid = id_;
+               runparams.lastpos = i;
+               inset->latex(os, runparams);
        } catch (EncodingException & e) {
                // add location information and throw again.
                e.par_id = id_;
@@ -843,14 +1136,11 @@ void Paragraph::Private::latexInset(
                                os << '}';
        }
 
-       if (tmp) {
-               for (int j = 0; j < tmp; ++j)
-                       texrow.newline();
-
-               texrow.start(owner_->id(), i + 1);
+       if (os.texrow().rows() > prev_rows) {
+               os.texrow().start(owner_->id(), i + 1);
                column = 0;
        } else {
-               column += os.tellp() - len;
+               column += (unsigned int)(os.os().tellp() - len);
        }
 
        if (owner_->isDeleted(i))
@@ -858,29 +1148,24 @@ void Paragraph::Private::latexInset(
 }
 
 
-void Paragraph::Private::latexSpecialChar(
-                                            odocstream & os,
-                                            OutputParams const & runparams,
-                                            Font const & running_font,
-                                            Change const & running_change,
-                                            Layout const & style,
-                                            pos_type & i,
-                                            unsigned int & column)
+void Paragraph::Private::latexSpecialChar(otexstream & os,
+                                         OutputParams const & runparams,
+                                         Font const & running_font,
+                                         Change const & running_change,
+                                         Layout const & style,
+                                         pos_type & i,
+                                         pos_type end_pos,
+                                         unsigned int & column)
 {
        char_type const c = text_[i];
 
-       if (style.pass_thru) {
-               if (c != '\0')
-                       // FIXME UNICODE: This can fail if c cannot
-                       // be encoded in the current encoding.
+       if (style.pass_thru || runparams.pass_thru) {
+               if (c != '\0') {
+                       Encoding const * const enc = runparams.encoding;
+                       if (enc && enc->latexChar(c, true).empty())
+                               throw EncodingException(c);
                        os.put(c);
-               return;
-       }
-
-       if (runparams.verbatim) {
-               // FIXME UNICODE: This can fail if c cannot
-               // be encoded in the current encoding.
-               os.put(c);
+               }
                return;
        }
 
@@ -961,7 +1246,7 @@ void Paragraph::Private::latexSpecialChar(
 
        default:
                // LyX, LaTeX etc.
-               if (latexSpecialPhrase(os, i, column, runparams))
+               if (latexSpecialPhrase(os, i, end_pos, column, runparams))
                        return;
 
                if (c == '\0')
@@ -997,7 +1282,7 @@ void Paragraph::Private::latexSpecialChar(
 }
 
 
-bool Paragraph::Private::latexSpecialT1(char_type const c, odocstream & os,
+bool Paragraph::Private::latexSpecialT1(char_type const c, otexstream & os,
        pos_type i, unsigned int & column)
 {
        switch (c) {
@@ -1025,7 +1310,7 @@ bool Paragraph::Private::latexSpecialT1(char_type const c, odocstream & os,
 }
 
 
-bool Paragraph::Private::latexSpecialTypewriter(char_type const c, odocstream & os,
+bool Paragraph::Private::latexSpecialTypewriter(char_type const c, otexstream & os,
        pos_type i, unsigned int & column)
 {
        switch (c) {
@@ -1047,7 +1332,10 @@ bool Paragraph::Private::latexSpecialTypewriter(char_type const c, odocstream &
 }
 
 
-bool Paragraph::Private::latexSpecialPhrase(odocstream & os, pos_type & i,
+/// \param end_pos
+///   If [start_pos, end_pos) does not include entirely the special phrase, then
+///   do not apply the macro transformation.
+bool Paragraph::Private::latexSpecialPhrase(otexstream & os, pos_type & i, pos_type end_pos,
        unsigned int & column, OutputParams const & runparams)
 {
        // FIXME: if we have "LaTeX" with a font
@@ -1057,7 +1345,8 @@ bool Paragraph::Private::latexSpecialPhrase(odocstream & os, pos_type & i,
        // "words" for some definition of word
 
        for (size_t pnr = 0; pnr < phrases_nr; ++pnr) {
-               if (!isTextAt(special_phrases[pnr].phrase, i))
+               if (!isTextAt(special_phrases[pnr].phrase, i)
+                   || (end_pos != -1 && i + int(special_phrases[pnr].phrase.size()) > end_pos))
                        continue;
                if (runparams.moving_arg)
                        os << "\\protect";
@@ -1073,17 +1362,44 @@ bool Paragraph::Private::latexSpecialPhrase(odocstream & os, pos_type & i,
 void Paragraph::Private::validate(LaTeXFeatures & features) const
 {
        if (layout_->inpreamble && inset_owner_) {
+               bool const is_command = layout_->latextype == LATEX_COMMAND;
                Buffer const & buf = inset_owner_->buffer();
                BufferParams const & bp = buf.params();
                Font f;
-               TexRow tr;
+               TexRow texrow;
+               // Using a string stream here circumvents the encoding
+               // switching machinery of odocstream. Therefore the
+               // output is wrong if this paragraph contains content
+               // that needs to switch encoding.
                odocstringstream ods;
-               owner_->latex(bp, f, ods, tr, features.runparams());
-               docstring d = ods.str();
-               if (!d.empty())
-                       features.addPreambleSnippet(to_utf8(d));
+               otexstream os(ods, texrow);
+               if (is_command) {
+                       os << '\\' << from_ascii(layout_->latexname());
+                       // we have to provide all the optional arguments here, even though
+                       // the last one is the only one we care about.
+                       // Separate handling of optional argument inset.
+                       if (layout_->optargs != 0 || layout_->reqargs != 0)
+                               latexArgInsets(*owner_, os, features.runparams(),
+                                       layout_->reqargs, layout_->optargs);
+                       else
+                               os << from_ascii(layout_->latexparam());
+               }
+               docstring::size_type const length = ods.str().length();
+               // this will output "{" at the beginning, but not at the end
+               owner_->latex(bp, f, os, features.runparams(), 0, -1, true);
+               if (ods.str().length() > length) {
+                       if (is_command)
+                               ods << '}';
+                       string const snippet = to_utf8(ods.str());
+                       features.addPreambleSnippet(snippet);
+               }
        }
-       
+
+       if (features.runparams().flavor == OutputParams::HTML
+           && layout_->htmltitle()) {
+               features.setHTMLTitle(owner_->asString(AS_STR_INSETS));
+       }
+
        // check the params.
        if (!params_.spacing().isDefault())
                features.require("setspace");
@@ -1133,7 +1449,7 @@ namespace {
        Layout const emptyParagraphLayout;
 }
 
-Paragraph::Paragraph() 
+Paragraph::Paragraph()
        : d(new Paragraph::Private(this, emptyParagraphLayout))
 {
        itemdepth = 0;
@@ -1234,9 +1550,8 @@ void Paragraph::write(ostream & os, BufferParams const & bparams,
                if (i == size())
                        break;
 
-               // Write font changes (ignore spelling markers)
+               // Write font changes
                Font font2 = getFontSettings(bparams, i);
-               font2.setMisspelled(false);
                if (font2 != font1) {
                        flushString(os, write_buffer);
                        font2.lyxWriteChanges(font1, os);
@@ -1322,6 +1637,7 @@ void Paragraph::appendChar(char_type c, Font const & font,
        // when appending characters, no need to update tables
        d->text_.push_back(c);
        setFont(d->text_.size() - 1, font);
+       d->requestSpellCheck(d->text_.size() - 1);
 }
 
 
@@ -1342,6 +1658,7 @@ void Paragraph::appendString(docstring const & s, Font const & font,
        for (size_t i = oldsize; i != newsize; ++i) {
                // track change
                d->changes_.insert(change, i);
+               d->requestSpellCheck(i);
        }
        d->fontlist_.set(oldsize, font);
        d->fontlist_.set(newsize - 1, font);
@@ -1564,7 +1881,7 @@ void Paragraph::setFont(pos_type pos, Font const & font)
        // First, reduce font against layout/label font
        // Update: The setCharFont() routine in text2.cpp already
        // reduces font, so we don't need to do that here. (Asger)
-       
+
        d->fontlist_.set(pos, font);
 }
 
@@ -1650,6 +1967,20 @@ void Paragraph::setLabelWidthString(docstring const & s)
 
 
 docstring Paragraph::expandLabel(Layout const & layout,
+               BufferParams const & bparams) const
+{
+       return expandParagraphLabel(layout, bparams, true);
+}
+
+
+docstring Paragraph::expandDocBookLabel(Layout const & layout,
+               BufferParams const & bparams) const
+{
+       return expandParagraphLabel(layout, bparams, false);
+}
+
+
+docstring Paragraph::expandParagraphLabel(Layout const & layout,
                BufferParams const & bparams, bool process_appendix) const
 {
        DocumentClass const & tclass = bparams.documentClass();
@@ -1657,7 +1988,7 @@ docstring Paragraph::expandLabel(Layout const & layout,
        bool const in_appendix = process_appendix && d->params_.appendix();
        docstring fmt = translateIfPossible(layout.labelstring(in_appendix), lang);
 
-       if (fmt.empty() && layout.labeltype == LABEL_COUNTER 
+       if (fmt.empty() && layout.labeltype == LABEL_COUNTER
            && !layout.counter.empty())
                return tclass.counters().theCounter(layout.counter, lang);
 
@@ -1670,9 +2001,9 @@ docstring Paragraph::expandLabel(Layout const & layout,
                        docstring parent(fmt, i + 1, j - i - 1);
                        docstring label = from_ascii("??");
                        if (tclass.hasLayout(parent))
-                               docstring label = expandLabel(tclass[parent], bparams,
+                               docstring label = expandParagraphLabel(tclass[parent], bparams,
                                                      process_appendix);
-                       fmt = docstring(fmt, 0, i) + label 
+                       fmt = docstring(fmt, 0, i) + label
                                + docstring(fmt, j + 1, docstring::npos);
                }
        }
@@ -1685,9 +2016,9 @@ void Paragraph::applyLayout(Layout const & new_layout)
 {
        d->layout_ = &new_layout;
        LyXAlignment const oldAlign = d->params_.align();
-       
+
        if (!(oldAlign & d->layout_->alignpossible)) {
-               frontend::Alert::warning(_("Alignment not permitted"), 
+               frontend::Alert::warning(_("Alignment not permitted"),
                        _("The new layout does not permit the alignment previously used.\nSetting to default."));
                d->params_.align(LYX_ALIGN_LAYOUT);
        }
@@ -1747,6 +2078,11 @@ bool Paragraph::usePlainLayout() const
 }
 
 
+bool Paragraph::isPassThru() const
+{
+       return inInset().getLayout().isPassThru() || d->layout_->pass_thru;
+}
+
 namespace {
 
 // paragraphs inside floats need different alignment tags to avoid
@@ -1794,13 +2130,12 @@ string const corrected_env(string const & suffix, string const & env,
 }
 
 
-void adjust_row_column(string const & str, TexRow & texrow, int & column)
+void adjust_column(string const & str, int & column)
 {
        if (!contains(str, "\n"))
                column += str.size();
        else {
                string tmp;
-               texrow.newline();
                column = rsplit(str, tmp, '\n').size();
        }
 }
@@ -1809,16 +2144,15 @@ void adjust_row_column(string const & str, TexRow & texrow, int & column)
 
 
 int Paragraph::Private::startTeXParParams(BufferParams const & bparams,
-                                odocstream & os, TexRow & texrow,
-                                OutputParams const & runparams) const
+                       otexstream & os, OutputParams const & runparams) const
 {
        int column = 0;
 
-       if (params_.noindent()) {
+       if (params_.noindent() && !layout_->pass_thru) {
                os << "\\noindent ";
                column += 10;
        }
-       
+
        LyXAlignment const curAlign = params_.align();
 
        if (curAlign == layout_->align)
@@ -1829,6 +2163,7 @@ int Paragraph::Private::startTeXParParams(BufferParams const & bparams,
        case LYX_ALIGN_BLOCK:
        case LYX_ALIGN_LAYOUT:
        case LYX_ALIGN_SPECIAL:
+       case LYX_ALIGN_DECIMAL:
                break;
        case LYX_ALIGN_LEFT:
        case LYX_ALIGN_RIGHT:
@@ -1849,6 +2184,7 @@ int Paragraph::Private::startTeXParParams(BufferParams const & bparams,
        case LYX_ALIGN_BLOCK:
        case LYX_ALIGN_LAYOUT:
        case LYX_ALIGN_SPECIAL:
+       case LYX_ALIGN_DECIMAL:
                break;
        case LYX_ALIGN_LEFT: {
                string output;
@@ -1857,7 +2193,7 @@ int Paragraph::Private::startTeXParParams(BufferParams const & bparams,
                else
                        output = corrected_env(begin_tag, "flushright", code, lastpar);
                os << from_ascii(output);
-               adjust_row_column(output, texrow, column);
+               adjust_column(output, column);
                break;
        } case LYX_ALIGN_RIGHT: {
                string output;
@@ -1866,13 +2202,13 @@ int Paragraph::Private::startTeXParParams(BufferParams const & bparams,
                else
                        output = corrected_env(begin_tag, "flushleft", code, lastpar);
                os << from_ascii(output);
-               adjust_row_column(output, texrow, column);
+               adjust_column(output, column);
                break;
        } case LYX_ALIGN_CENTER: {
                string output;
                output = corrected_env(begin_tag, "center", code, lastpar);
                os << from_ascii(output);
-               adjust_row_column(output, texrow, column);
+               adjust_column(output, column);
                break;
        }
        }
@@ -1881,33 +2217,30 @@ int Paragraph::Private::startTeXParParams(BufferParams const & bparams,
 }
 
 
-int Paragraph::Private::endTeXParParams(BufferParams const & bparams,
-                              odocstream & os, TexRow & texrow,
-                              OutputParams const & runparams) const
+bool Paragraph::Private::endTeXParParams(BufferParams const & bparams,
+                       otexstream & os, OutputParams const & runparams) const
 {
-       int column = 0;
-
        LyXAlignment const curAlign = params_.align();
 
        if (curAlign == layout_->align)
-               return column;
+               return false;
 
        switch (curAlign) {
        case LYX_ALIGN_NONE:
        case LYX_ALIGN_BLOCK:
        case LYX_ALIGN_LAYOUT:
        case LYX_ALIGN_SPECIAL:
+       case LYX_ALIGN_DECIMAL:
                break;
        case LYX_ALIGN_LEFT:
        case LYX_ALIGN_RIGHT:
        case LYX_ALIGN_CENTER:
-               if (runparams.moving_arg) {
+               if (runparams.moving_arg)
                        os << "\\protect";
-                       column = 8;
-               }
                break;
        }
 
+       string output;
        string const end_tag = "\n\\par\\end";
        InsetCode code = ownerCode();
        bool const lastpar = runparams.isLastPar;
@@ -1917,59 +2250,52 @@ int Paragraph::Private::endTeXParParams(BufferParams const & bparams,
        case LYX_ALIGN_BLOCK:
        case LYX_ALIGN_LAYOUT:
        case LYX_ALIGN_SPECIAL:
+       case LYX_ALIGN_DECIMAL:
                break;
        case LYX_ALIGN_LEFT: {
-               string output;
                if (owner_->getParLanguage(bparams)->babel() != "hebrew")
                        output = corrected_env(end_tag, "flushleft", code, lastpar);
                else
                        output = corrected_env(end_tag, "flushright", code, lastpar);
                os << from_ascii(output);
-               adjust_row_column(output, texrow, column);
                break;
        } case LYX_ALIGN_RIGHT: {
-               string output;
                if (owner_->getParLanguage(bparams)->babel() != "hebrew")
                        output = corrected_env(end_tag, "flushright", code, lastpar);
                else
                        output = corrected_env(end_tag, "flushleft", code, lastpar);
                os << from_ascii(output);
-               adjust_row_column(output, texrow, column);
                break;
        } case LYX_ALIGN_CENTER: {
-               string output;
                output = corrected_env(end_tag, "center", code, lastpar);
                os << from_ascii(output);
-               adjust_row_column(output, texrow, column);
                break;
        }
        }
 
-       return column;
+       return !output.empty() || lastpar;
 }
 
 
 // This one spits out the text of the paragraph
-bool Paragraph::latex(BufferParams const & bparams,
+void Paragraph::latex(BufferParams const & bparams,
        Font const & outerfont,
-       odocstream & os, TexRow & texrow,
+       otexstream & os,
        OutputParams const & runparams,
-       int start_pos, int end_pos) const
+       int start_pos, int end_pos, bool force) const
 {
        LYXERR(Debug::LATEX, "Paragraph::latex...     " << this);
 
-       if (layout().inpreamble)
-               return true;
-
-       bool return_value = false;
-
-       bool const allowcust = allowParagraphCustomization();
-
        // FIXME This check should not be needed. Perhaps issue an
        // error if it triggers.
        Layout const & style = inInset().forcePlainLayout() ?
                bparams.documentClass().plainLayout() : *d->layout_;
 
+       if (!force && style.inpreamble)
+               return;
+
+       bool const allowcust = allowParagraphCustomization();
+
        // Current base font for all inherited font changes, without any
        // change caused by an individual character, except for the language:
        // It is set to the language of the first character.
@@ -1983,8 +2309,14 @@ bool Paragraph::latex(BufferParams const & bparams,
        unsigned int column = 0;
 
        if (body_pos > 0) {
-               os << '[';
-               column += 1;
+               // the optional argument is kept in curly brackets in
+               // case it contains a ']'
+               // This is not strictly needed, but if this is changed it
+               // would be a file format change, and tex2lyx would need
+               // to be adjusted, since it unconditionally removes the
+               // braces when it parses \item.
+               os << "[{";
+               column += 2;
                basefont = getLabelFont(bparams, outerfont);
        } else {
                basefont = getLayoutFont(bparams, outerfont);
@@ -1999,7 +2331,7 @@ bool Paragraph::latex(BufferParams const & bparams,
 
        Encoding const * const prev_encoding = runparams.encoding;
 
-       texrow.start(id(), 0);
+       os.texrow().start(id(), 0);
 
        // if the paragraph is empty, the loop will not be entered at all
        if (empty()) {
@@ -2008,8 +2340,7 @@ bool Paragraph::latex(BufferParams const & bparams,
                        ++column;
                }
                if (allowcust)
-                       column += d->startTeXParParams(bparams, os, texrow,
-                                                   runparams);
+                       column += d->startTeXParParams(bparams, os, runparams);
        }
 
        for (pos_type i = 0; i < size(); ++i) {
@@ -2030,8 +2361,8 @@ bool Paragraph::latex(BufferParams const & bparams,
                                                runparams);
                                runningChange = Change(Change::UNCHANGED);
 
-                               os << "] ";
-                               column +=2;
+                               os << "}] ";
+                               column +=3;
                        }
                        if (style.isCommand()) {
                                os << '{';
@@ -2040,12 +2371,11 @@ bool Paragraph::latex(BufferParams const & bparams,
 
                        if (allowcust)
                                column += d->startTeXParParams(bparams, os,
-                                                           texrow,
                                                            runparams);
                }
 
-               Change const & change = runparams.inDeletedInset ? runparams.changeOfDeletedInset
-                                                                : lookupChange(i);
+               Change const & change = runparams.inDeletedInset
+                       ? runparams.changeOfDeletedInset : lookupChange(i);
 
                if (bparams.outputChanges && runningChange != change) {
                        if (open_font) {
@@ -2086,22 +2416,27 @@ bool Paragraph::latex(BufferParams const & bparams,
                        open_font = false;
                }
 
+               string const running_lang = runparams.use_polyglossia ?
+                       running_font.language()->polyglossia() : running_font.language()->babel();
                // close babel's font environment before opening CJK.
-               if (!running_font.language()->babel().empty() &&
+               string const lang_end_command = runparams.use_polyglossia ?
+                       "\\end{$$lang}" : lyxrc.language_command_end;
+               if (!running_lang.empty() &&
                    font.language()->encoding()->package() == Encoding::CJK) {
-                               string end_tag = subst(lyxrc.language_command_end,
+                               string end_tag = subst(lang_end_command,
                                                        "$$lang",
-                                                       running_font.language()->babel());
+                                                       running_lang);
                                os << from_ascii(end_tag);
                                column += end_tag.length();
                }
 
                // Switch file encoding if necessary (and allowed)
-               if (!runparams.verbatim && 
+               if (!runparams.pass_thru && !style.pass_thru &&
                    runparams.encoding->package() != Encoding::none &&
                    font.language()->encoding()->package() != Encoding::none) {
-                       pair<bool, int> const enc_switch = switchEncoding(os, bparams,
-                                       runparams, *(font.language()->encoding()));
+                       pair<bool, int> const enc_switch =
+                               switchEncoding(os.os(), bparams, runparams,
+                                       *(font.language()->encoding()));
                        if (enc_switch.first) {
                                column += enc_switch.second;
                                runparams.encoding = font.language()->encoding();
@@ -2130,7 +2465,7 @@ bool Paragraph::latex(BufferParams const & bparams,
                        // check if the fontchange ends with a trailing blank
                        // (like "\small " (see bug 3382)
                        else if (suffixIs(fontchange, ' ') && c == ' ')
-                               os << fontchange.substr(0, fontchange.size() - 1) 
+                               os << fontchange.substr(0, fontchange.size() - 1)
                                   << from_ascii("{}");
                        else
                                os << fontchange;
@@ -2144,8 +2479,7 @@ bool Paragraph::latex(BufferParams const & bparams,
                        // latexSpecialChar ignores spaces if
                        // style.pass_thru is false.
                        if (i != body_pos - 1) {
-                               if (d->simpleTeXBlanks(
-                                               runparams, os, texrow,
+                               if (d->simpleTeXBlanks(runparams, os,
                                                i, column, font, style)) {
                                        // A surrogate pair was output. We
                                        // must not call latexSpecialChar
@@ -2167,8 +2501,7 @@ bool Paragraph::latex(BufferParams const & bparams,
                // and then split to handle the two modes separately.
                if (c == META_INSET) {
                        if (i >= start_pos && (end_pos == -1 || i < end_pos)) {
-                               d->latexInset(bparams, os,
-                                               texrow, rp, running_font,
+                               d->latexInset(bparams, os, rp, running_font,
                                                basefont, outerfont, open_font,
                                                runningChange, style, i, column);
                        }
@@ -2176,7 +2509,7 @@ bool Paragraph::latex(BufferParams const & bparams,
                        if (i >= start_pos && (end_pos == -1 || i < end_pos)) {
                                try {
                                        d->latexSpecialChar(os, rp, running_font, runningChange,
-                                               style, i, column);
+                                                           style, i, end_pos, column);
                                } catch (EncodingException & e) {
                                if (runparams.dryrun) {
                                        os << "<" << _("LyX Warning: ")
@@ -2202,9 +2535,8 @@ bool Paragraph::latex(BufferParams const & bparams,
        if (open_font) {
 #ifdef FIXED_LANGUAGE_END_DETECTION
                if (next_) {
-                       running_font
-                               .latexWriteEndChanges(os, bparams, runparams,
-                                       basefont,
+                       running_font.latexWriteEndChanges(os, bparams,
+                                       runparams, basefont,
                                        next_->getFont(bparams, 0, outerfont));
                } else {
                        running_font.latexWriteEndChanges(os, bparams,
@@ -2224,19 +2556,17 @@ bool Paragraph::latex(BufferParams const & bparams,
 
        // Needed if there is an optional argument but no contents.
        if (body_pos > 0 && body_pos == size()) {
-               os << "]~";
-               return_value = false;
+               os << "}]~";
        }
 
-       if (allowcust && d->endTeXParParams(bparams, os, texrow, runparams)
+       if (allowcust && d->endTeXParParams(bparams, os, runparams)
            && runparams.encoding != prev_encoding) {
                runparams.encoding = prev_encoding;
-               if (!bparams.useXetex)
+               if (!runparams.isFullUnicode())
                        os << setEncoding(prev_encoding->iconvName());
        }
 
        LYXERR(Debug::LATEX, "Paragraph::latex... done " << this);
-       return return_value;
 }
 
 
@@ -2312,7 +2642,7 @@ pos_type Paragraph::firstWordLyXHTML(XHTMLStream & xs, OutputParams const & runp
                        char_type c = d->text_[i];
                        if (c == ' ')
                                break;
-                       xs << html::escapeChar(c);
+                       xs << c;
                }
        }
        return i;
@@ -2394,40 +2724,51 @@ docstring Paragraph::simpleLyXHTMLOnePar(Buffer const & buf,
                                    XHTMLStream & xs,
                                    OutputParams const & runparams,
                                    Font const & outerfont,
-                                               bool fortoc,
                                    pos_type initial) const
 {
        docstring retval;
 
        bool emph_flag = false;
        bool bold_flag = false;
-       std::string closing_tag;
 
        Layout const & style = *d->layout_;
+
+       xs.startParagraph(allowEmpty());
+
+       if (!runparams.for_toc && runparams.html_make_pars) {
+               // generate a magic label for this paragraph
+               string const attr = "id='" + magicLabel() + "'";
+               xs << html::CompTag("a", attr);
+       }
+
        FontInfo font_old =
                style.labeltype == LABEL_MANUAL ? style.labelfont : style.font;
 
        // parsing main loop
        for (pos_type i = initial; i < size(); ++i) {
+               // let's not show deleted material in the output
+               if (isDeleted(i))
+                       continue;
+
                Font font = getFont(buf.params(), i, outerfont);
 
                // emphasis
                if (font_old.emph() != font.fontInfo().emph()) {
                        if (font.fontInfo().emph() == FONT_ON) {
-                               xs << StartTag("em");
+                               xs << html::StartTag("em");
                                emph_flag = true;
                        } else if (emph_flag && i != initial) {
-                               xs << EndTag("em");
+                               xs << html::EndTag("em");
                                emph_flag = false;
                        }
                }
                // bold
                if (font_old.series() != font.fontInfo().series()) {
                        if (font.fontInfo().series() == BOLD_SERIES) {
-                               xs << StartTag("strong");
+                               xs << html::StartTag("strong");
                                bold_flag = true;
                        } else if (bold_flag && i != initial) {
-                               xs << EndTag("strong");
+                               xs << html::EndTag("strong");
                                bold_flag = false;
                        }
                }
@@ -2436,11 +2777,9 @@ docstring Paragraph::simpleLyXHTMLOnePar(Buffer const & buf,
 
                Inset const * inset = getInset(i);
                if (inset) {
-                       InsetCommand const * ic = inset->asInsetCommand();
-                       InsetLayout const & il = inset->getLayout();
-                       if (!fortoc || il.isInToc() || (ic && ic->isInToc())) {
+                       if (!runparams.for_toc || inset->isInToc()) {
                                OutputParams np = runparams;
-                               if (!il.htmlisblock())
+                               if (!inset->getLayout().htmlisblock())
                                        np.html_in_par = true;
                                retval += inset->xhtml(xs, np);
                        }
@@ -2467,7 +2806,7 @@ docstring Paragraph::simpleLyXHTMLOnePar(Buffer const & buf,
                                // We don't want to escape the entities. Note that
                                // it is safe to do this, since str can otherwise
                                // only be "-". E.g., it can't be "<".
-                               xs << XHTMLStream::NextRaw() << str;
+                               xs << XHTMLStream::ESCAPE_NONE << str;
                        } else
                                xs << c;
                }
@@ -2475,6 +2814,7 @@ docstring Paragraph::simpleLyXHTMLOnePar(Buffer const & buf,
        }
 
        xs.closeFontTags();
+       xs.endParagraph();
        return retval;
 }
 
@@ -2506,13 +2846,47 @@ bool Paragraph::isLineSeparator(pos_type pos) const
 
 bool Paragraph::isWordSeparator(pos_type pos) const
 {
+       if (pos == size())
+               return true;
        if (Inset const * inset = getInset(pos))
                return !inset->isLetter();
+       // if we have a hard hyphen (no en- or emdash) or apostrophe
+       // we pass this to the spell checker
+       // FIXME: this method is subject to change, visit
+       // https://bugzilla.mozilla.org/show_bug.cgi?id=355178
+       // to get an impression how complex this is.
+       if (isHardHyphenOrApostrophe(pos))
+               return false;
        char_type const c = d->text_[pos];
-       // We want to pass the ' and escape chars to the spellchecker
-       static docstring const quote = from_utf8(lyxrc.spellchecker_esc_chars + '\'');
-       return (!isLetterChar(c) && !isDigit(c) && !contains(quote, c))
-               || pos == size();
+       // We want to pass the escape chars to the spellchecker
+       docstring const escape_chars = from_utf8(lyxrc.spellchecker_esc_chars);
+       return !isLetterChar(c) && !isDigitASCII(c) && !contains(escape_chars, c);
+}
+
+
+bool Paragraph::isHardHyphenOrApostrophe(pos_type pos) const
+{
+       pos_type const psize = size();
+       if (pos >= psize)
+               return false;
+       char_type const c = d->text_[pos];
+       if (c != '-' && c != '\'')
+               return false;
+       int nextpos = pos + 1;
+       int prevpos = pos > 0 ? pos - 1 : 0;
+       if ((nextpos == psize || isSpace(nextpos))
+               && (pos == 0 || isSpace(prevpos)))
+               return false;
+       return c == '\''
+               || ((nextpos == psize || d->text_[nextpos] != '-')
+               && (pos == 0 || d->text_[prevpos] != '-'));
+}
+
+
+bool Paragraph::isSameSpellRange(pos_type pos1, pos_type pos2) const
+{
+       return pos1 == pos2
+               || d->speller_state_.getRange(pos1) == d->speller_state_.getRange(pos2);
 }
 
 
@@ -2521,7 +2895,7 @@ bool Paragraph::isChar(pos_type pos) const
        if (Inset const * inset = getInset(pos))
                return inset->isChar();
        char_type const c = d->text_[pos];
-       return !isLetterChar(c) && !isDigit(c) && !lyx::isSpace(c);
+       return !isLetterChar(c) && !isDigitASCII(c) && !lyx::isSpace(c);
 }
 
 
@@ -2561,6 +2935,7 @@ void Paragraph::changeLanguage(BufferParams const & bparams,
                if (font.language() == from) {
                        font.setLanguage(to);
                        setFont(i, font);
+                       d->requestSpellCheck(i);
                }
        }
 }
@@ -2568,7 +2943,7 @@ void Paragraph::changeLanguage(BufferParams const & bparams,
 
 bool Paragraph::isMultiLingual(BufferParams const & bparams) const
 {
-       Language const * doc_language = bparams.language;
+       Language const * doc_language = bparams.language;
        FontList::const_iterator cit = d->fontlist_.begin();
        FontList::const_iterator end = d->fontlist_.end();
 
@@ -2581,6 +2956,20 @@ bool Paragraph::isMultiLingual(BufferParams const & bparams) const
 }
 
 
+void Paragraph::getLanguages(std::set<Language const *> & languages) const
+{
+       FontList::const_iterator cit = d->fontlist_.begin();
+       FontList::const_iterator end = d->fontlist_.end();
+
+       for (; cit != end; ++cit) {
+               Language const * lang = cit->font().language();
+               if (lang != ignore_language &&
+                   lang != latex_language)
+                       languages.insert(lang);
+       }
+}
+
+
 docstring Paragraph::asString(int options) const
 {
        return asString(0, size(), options);
@@ -2591,18 +2980,20 @@ docstring Paragraph::asString(pos_type beg, pos_type end, int options) const
 {
        odocstringstream os;
 
-       if (beg == 0 
-               && options & AS_STR_LABEL
-               && !d->params_.labelString().empty())
+       if (beg == 0
+           && options & AS_STR_LABEL
+           && !d->params_.labelString().empty())
                os << d->params_.labelString() << ' ';
 
        for (pos_type i = beg; i < end; ++i) {
+               if ((options & AS_STR_SKIPDELETE) && isDeleted(i))
+                       continue;
                char_type const c = d->text_[i];
                if (isPrintable(c) || c == '\t'
                    || (c == '\n' && (options & AS_STR_NEWLINES)))
                        os.put(c);
                else if (c == META_INSET && (options & AS_STR_INSETS)) {
-                       getInset(i)->tocString(os);
+                       getInset(i)->toString(os);
                        if (getInset(i)->asInsetMath())
                                os << " ";
                }
@@ -2612,11 +3003,29 @@ docstring Paragraph::asString(pos_type beg, pos_type end, int options) const
 }
 
 
+void Paragraph::forToc(docstring & os, size_t maxlen) const
+{
+       if (!d->params_.labelString().empty())
+               os += d->params_.labelString() + ' ';
+       for (pos_type i = 0; i < size() && os.length() < maxlen; ++i) {
+               if (isDeleted(i))
+                       continue;
+               char_type const c = d->text_[i];
+               if (isPrintable(c))
+                       os += c;
+               else if (c == '\t' || c == '\n')
+                       os += ' ';
+               else if (c == META_INSET)
+                       getInset(i)->forToc(os, maxlen);
+       }
+}
+
+
 docstring Paragraph::stringify(pos_type beg, pos_type end, int options, OutputParams & runparams) const
 {
        odocstringstream os;
 
-       if (beg == 0 
+       if (beg == 0
                && options & AS_STR_LABEL
                && !d->params_.labelString().empty())
                os << d->params_.labelString() << ' ';
@@ -2647,6 +3056,12 @@ int Paragraph::id() const
 }
 
 
+void Paragraph::setId(int id)
+{
+       d->id_ = id;
+}
+
+
 Layout const & Paragraph::layout() const
 {
        return *d->layout_;
@@ -2660,14 +3075,14 @@ void Paragraph::setLayout(Layout const & layout)
 
 
 void Paragraph::setDefaultLayout(DocumentClass const & tc)
-{ 
-       setLayout(tc.defaultLayout()); 
+{
+       setLayout(tc.defaultLayout());
 }
 
 
 void Paragraph::setPlainLayout(DocumentClass const & tc)
-{ 
-       setLayout(tc.plainLayout()); 
+{
+       setLayout(tc.plainLayout());
 }
 
 
@@ -2760,7 +3175,7 @@ int Paragraph::checkBiblio(Buffer const & buffer)
 {
        // FIXME From JS:
        // This is getting more and more a mess. ...We really should clean
-       // up this bibitem issue for 1.6. See also bug 2743.
+       // up this bibitem issue for 1.6.
 
        // Add bibitem insets if necessary
        if (d->layout_->labeltype != LABEL_BIBLIO)
@@ -2786,8 +3201,8 @@ int Paragraph::checkBiblio(Buffer const & buffer)
        InsetList::iterator end = d->insetlist_.end();
        for (; it != end; ++it)
                if (it->inset->lyxCode() == BIBITEM_CODE
-                   && it->pos > 0) {
-                       InsetBibitem * olditem = static_cast<InsetBibitem *>(it->inset);
+                     && it->pos > 0) {
+                       InsetCommand * olditem = it->inset->asInsetCommand();
                        oldkey = olditem->getParam("key");
                        oldlabel = olditem->getParam("label");
                        erasedInsetPosition = it->pos;
@@ -2803,8 +3218,7 @@ int Paragraph::checkBiblio(Buffer const & buffer)
        // There was an InsetBibitem at the beginning and we did have to
        // erase one. So we give its properties to the beginning inset.
        if (hasbibitem) {
-               InsetBibitem * inset =
-                       static_cast<InsetBibitem *>(d->insetlist_.begin()->inset);
+               InsetCommand * inset = d->insetlist_.begin()->inset->asInsetCommand();
                if (!oldkey.empty())
                        inset->setParam("key", oldkey);
                inset->setParam("label", oldlabel);
@@ -2813,13 +3227,13 @@ int Paragraph::checkBiblio(Buffer const & buffer)
 
        // There was no inset at the beginning, so we need to create one with
        // the key and label of the one we erased.
-       InsetBibitem * inset = 
+       InsetBibitem * inset =
                new InsetBibitem(const_cast<Buffer *>(&buffer), InsetCommandParams(BIBITEM_CODE));
        // restore values of previously deleted item in this par.
        if (!oldkey.empty())
                inset->setParam("key", oldkey);
        inset->setParam("label", oldlabel);
-       insertInset(0, static_cast<Inset *>(inset),
+       insertInset(0, inset,
                    Change(track_changes ? Change::INSERTED : Change::UNCHANGED));
 
        return 1;
@@ -2949,36 +3363,44 @@ void Paragraph::changeCase(BufferParams const & bparams, pos_type pos,
 }
 
 
-bool Paragraph::find(docstring const & str, bool cs, bool mw,
-               pos_type pos, bool del) const
+int Paragraph::find(docstring const & str, bool cs, bool mw,
+               pos_type start_pos, bool del) const
 {
+       pos_type pos = start_pos;
        int const strsize = str.length();
        int i = 0;
        pos_type const parsize = d->text_.size();
-       for (i = 0; pos + i < parsize; ++i) {
-               if (i >= strsize)
-                       break;
-               if (cs && str[i] != d->text_[pos + i])
+       for (i = 0; i < strsize && pos < parsize; ++i, ++pos) {
+               // Ignore "invisible" letters such as ligature breaks
+               // and hyphenation chars while searching
+               while (pos < parsize - 1 && isInset(pos)) {
+                       odocstringstream os;
+                       getInset(pos)->toString(os);
+                       if (!getInset(pos)->isLetter() || !os.str().empty())
+                               break;
+                       pos++;
+               }
+               if (cs && str[i] != d->text_[pos])
                        break;
-               if (!cs && uppercase(str[i]) != uppercase(d->text_[pos + i]))
+               if (!cs && uppercase(str[i]) != uppercase(d->text_[pos]))
                        break;
-               if (!del && isDeleted(pos + i))
+               if (!del && isDeleted(pos))
                        break;
        }
 
        if (i != strsize)
-               return false;
+               return 0;
 
        // if necessary, check whether string matches word
        if (mw) {
-               if (pos > 0 && !isWordSeparator(pos - 1))
-                       return false;
-               if (pos + strsize < parsize
-                       && !isWordSeparator(pos + strsize))
-                       return false;
+               if (start_pos > 0 && !isWordSeparator(start_pos - 1))
+                       return 0;
+               if (pos < parsize
+                       && !isWordSeparator(pos))
+                       return 0;
        }
 
-       return true;
+       return pos - start_pos;
 }
 
 
@@ -3015,10 +3437,15 @@ bool Paragraph::isSeparator(pos_type pos) const
 
 void Paragraph::deregisterWords()
 {
-       Private::Words::const_iterator it;
-       WordList & wl = theWordList();
-       for (it = d->words_.begin(); it != d->words_.end(); ++it)
-               wl.remove(*it);
+       Private::LangWordsMap::const_iterator itl = d->words_.begin();
+       Private::LangWordsMap::const_iterator ite = d->words_.end();
+       for (; itl != ite; ++itl) {
+               WordList * wl = theWordList(itl->first);
+               Private::Words::const_iterator it = (itl->second).begin();
+               Private::Words::const_iterator et = (itl->second).end();
+               for (; it != et; ++it)
+                       wl->remove(*it);
+       }
        d->words_.clear();
 }
 
@@ -3062,15 +3489,22 @@ void Paragraph::locateWord(pos_type & from, pos_type & to,
 
 void Paragraph::collectWords()
 {
+       // This is the value that needs to be exposed in the preferences
+       // to resolve bug #6760.
+       static int minlength = 6;
        pos_type n = size();
        for (pos_type pos = 0; pos < n; ++pos) {
                if (isWordSeparator(pos))
                        continue;
                pos_type from = pos;
                locateWord(from, pos, WHOLE_WORD);
-               if (pos - from >= 6) {
+               if (pos - from >= minlength) {
                        docstring word = asString(from, pos, AS_STR_NONE);
-                       d->words_.insert(word);
+                       FontList::const_iterator cit = d->fontlist_.fontIterator(pos);
+                       if (cit == d->fontlist_.end())
+                               return;
+                       Language const * lang = cit->font().language();
+                       d->words_[*lang].insert(word);
                }
        }
 }
@@ -3078,10 +3512,15 @@ void Paragraph::collectWords()
 
 void Paragraph::registerWords()
 {
-       Private::Words::const_iterator it;
-       WordList & wl = theWordList();
-       for (it = d->words_.begin(); it != d->words_.end(); ++it)
-               wl.insert(*it);
+       Private::LangWordsMap::const_iterator itl = d->words_.begin();
+       Private::LangWordsMap::const_iterator ite = d->words_.end();
+       for (; itl != ite; ++itl) {
+               WordList * wl = theWordList(itl->first);
+               Private::Words::const_iterator it = (itl->second).begin();
+               Private::Words::const_iterator et = (itl->second).end();
+               for (; it != et; ++it)
+                       wl->insert(*it);
+       }
 }
 
 
@@ -3093,51 +3532,287 @@ void Paragraph::updateWords()
 }
 
 
-bool Paragraph::spellCheck(pos_type & from, pos_type & to, WordLangTuple & wl,
-       docstring_list & suggestions, bool do_suggestion) const
+void Paragraph::Private::appendSkipPosition(SkipPositions & skips, pos_type const pos) const
 {
+       SkipPositionsIterator begin = skips.begin();
+       SkipPositions::iterator end = skips.end();
+       if (pos > 0 && begin < end) {
+               --end;
+               if (end->last == pos - 1) {
+                       end->last = pos;
+                       return;
+               }
+       }
+       skips.insert(end, FontSpan(pos, pos));
+}
+
+
+Language * Paragraph::Private::locateSpellRange(
+       pos_type & from, pos_type & to,
+       SkipPositions & skips) const
+{
+       // skip leading white space
+       while (from < to && owner_->isWordSeparator(from))
+               ++from;
+       // don't check empty range
+       if (from >= to)
+               return 0;
+       // get current language
+       Language * lang = getSpellLanguage(from);
+       pos_type last = from;
+       bool samelang = true;
+       bool sameinset = true;
+       while (last < to && samelang && sameinset) {
+               // hop to end of word
+               while (last < to && !owner_->isWordSeparator(last)) {
+                       if (owner_->getInset(last)) {
+                               appendSkipPosition(skips, last);
+                       } else if (owner_->isDeleted(last)) {
+                               appendSkipPosition(skips, last);
+                       }
+                       ++last;
+               }
+               // hop to next word while checking for insets
+               while (sameinset && last < to && owner_->isWordSeparator(last)) {
+                       if (Inset const * inset = owner_->getInset(last))
+                               sameinset = inset->isChar() && inset->isLetter();
+                       if (sameinset && owner_->isDeleted(last)) {
+                               appendSkipPosition(skips, last);
+                       }
+                       if (sameinset)
+                               last++;
+               }
+               if (sameinset && last < to) {
+                       // now check for language change
+                       samelang = lang == getSpellLanguage(last);
+               }
+       }
+       // if language change detected backstep is needed
+       if (!samelang)
+               --last;
+       to = last;
+       return lang;
+}
+
+
+Language * Paragraph::Private::getSpellLanguage(pos_type const from) const
+{
+       Language * lang =
+               const_cast<Language *>(owner_->getFontSettings(
+                       inset_owner_->buffer().params(), from).language());
+       if (lang == inset_owner_->buffer().params().language
+               && !lyxrc.spellchecker_alt_lang.empty()) {
+               string lang_code;
+               string const lang_variety =
+                       split(lyxrc.spellchecker_alt_lang, lang_code, '-');
+               lang->setCode(lang_code);
+               lang->setVariety(lang_variety);
+       }
+       return lang;
+}
+
+
+void Paragraph::requestSpellCheck(pos_type pos)
+{
+       d->requestSpellCheck(pos);
+}
+
+
+bool Paragraph::needsSpellCheck() const
+{
+       SpellChecker::ChangeNumber speller_change_number = 0;
+       if (theSpellChecker())
+               speller_change_number = theSpellChecker()->changeNumber();
+       if (speller_change_number > d->speller_state_.currentChangeNumber()) {
+               d->speller_state_.needsCompleteRefresh(speller_change_number);
+       }
+       return d->needsSpellCheck();
+}
+
+
+bool Paragraph::Private::ignoreWord(docstring const & word) const
+{
+       // Ignore words with digits
+       // FIXME: make this customizable
+       // (note that some checkers ignore words with digits by default)
+       docstring::const_iterator cit = word.begin();
+       docstring::const_iterator const end = word.end();
+       for (; cit != end; ++cit) {
+               if (isNumber((*cit)))
+                       return true;
+       }
+       return false;
+}
+
+
+SpellChecker::Result Paragraph::spellCheck(pos_type & from, pos_type & to,
+       WordLangTuple & wl, docstring_list & suggestions,
+       bool do_suggestion, bool check_learned) const
+{
+       SpellChecker::Result result = SpellChecker::WORD_OK;
        SpellChecker * speller = theSpellChecker();
        if (!speller)
-               return false;
+               return result;
+
+       if (!d->layout_->spellcheck || !inInset().allowSpellCheck())
+               return result;
 
        locateWord(from, to, WHOLE_WORD);
-       if (from == to || from >= pos_type(d->text_.size()))
-               return false;
+       if (from == to || from >= size())
+               return result;
+
+       docstring word = asString(from, to, AS_STR_INSETS | AS_STR_SKIPDELETE);
+       Language * lang = d->getSpellLanguage(from);
+
+       wl = WordLangTuple(word, lang);
+
+       if (!word.size())
+               return result;
+
+       if (needsSpellCheck() || check_learned) {
+               pos_type end = to;
+               if (!d->ignoreWord(word)) {
+                       bool const trailing_dot = to < size() && d->text_[to] == '.';
+                       result = speller->check(wl);
+                       if (SpellChecker::misspelled(result) && trailing_dot) {
+                               wl = WordLangTuple(word.append(from_ascii(".")), lang);
+                               result = speller->check(wl);
+                               if (!SpellChecker::misspelled(result)) {
+                                       LYXERR(Debug::GUI, "misspelled word is correct with dot: \"" <<
+                                          word << "\" [" <<
+                                          from << ".." << to << "]");
+                               } else {
+                                       // spell check with dot appended failed too
+                                       // restore original word/lang value
+                                       word = asString(from, to, AS_STR_INSETS | AS_STR_SKIPDELETE);
+                                       wl = WordLangTuple(word, lang);
+                               }
+                       }
+               }
+               if (!SpellChecker::misspelled(result)) {
+                       // area up to the begin of the next word is not misspelled
+                       while (end < size() && isWordSeparator(end))
+                               ++end;
+               }
+               d->setMisspelled(from, end, result);
+       } else {
+               result = d->speller_state_.getState(from);
+       }
 
-       docstring word = asString(from, to, AS_STR_INSETS);
-       string const lang_code = lyxrc.spellchecker_alt_lang.empty()
-               ? getFontSettings(d->inset_owner_->buffer().params(), from).language()->code()
-               : lyxrc.spellchecker_alt_lang;
-       wl = WordLangTuple(word, lang_code);
-       SpellChecker::Result res = speller->check(wl);
-       // Just ignore any error that the spellchecker reports.
-       // FIXME: we should through out an exception and catch it in the GUI to
-       // display the error.
-       if (!speller->error().empty())
-               return false;
+       if (do_suggestion)
+               suggestions.clear();
 
-       bool const misspelled = res != SpellChecker::OK
-               && res != SpellChecker::IGNORED_WORD;
+       if (SpellChecker::misspelled(result)) {
+               LYXERR(Debug::GUI, "misspelled word: \"" <<
+                          word << "\" [" <<
+                          from << ".." << to << "]");
+               if (do_suggestion)
+                       speller->suggest(wl, suggestions);
+       }
+       return result;
+}
 
-       if (lyxrc.spellcheck_continuously)
-               d->fontlist_.setMisspelled(from, to, misspelled);
 
-       if (misspelled && do_suggestion)
-               speller->suggest(wl, suggestions);
-       else
-               suggestions.clear();
+void Paragraph::Private::markMisspelledWords(
+       pos_type const & first, pos_type const & last,
+       SpellChecker::Result result,
+       docstring const & word,
+       SkipPositions const & skips)
+{
+       if (!SpellChecker::misspelled(result)) {
+               setMisspelled(first, last, SpellChecker::WORD_OK);
+               return;
+       }
+       int snext = first;
+       SpellChecker * speller = theSpellChecker();
+       // locate and enumerate the error positions
+       int nerrors = speller->numMisspelledWords();
+       int numskipped = 0;
+       SkipPositionsIterator it = skips.begin();
+       SkipPositionsIterator et = skips.end();
+       for (int index = 0; index < nerrors; ++index) {
+               int wstart;
+               int wlen = 0;
+               speller->misspelledWord(index, wstart, wlen);
+               /// should not happen if speller supports range checks
+               if (!wlen) continue;
+               docstring const misspelled = word.substr(wstart, wlen);
+               wstart += first + numskipped;
+               if (snext < wstart) {
+                       /// mark the range of correct spelling
+                       numskipped += countSkips(it, et, wstart);
+                       setMisspelled(snext,
+                               wstart - 1, SpellChecker::WORD_OK);
+               }
+               snext = wstart + wlen;
+               numskipped += countSkips(it, et, snext);
+               /// mark the range of misspelling
+               setMisspelled(wstart, snext, result);
+               LYXERR(Debug::GUI, "misspelled word: \"" <<
+                          misspelled << "\" [" <<
+                          wstart << ".." << (snext-1) << "]");
+               ++snext;
+       }
+       if (snext <= last) {
+               /// mark the range of correct spelling at end
+               setMisspelled(snext, last, SpellChecker::WORD_OK);
+       }
+}
+
 
-       return misspelled;
+void Paragraph::spellCheck() const
+{
+       SpellChecker * speller = theSpellChecker();
+       if (!speller || !size() ||!needsSpellCheck())
+               return;
+       pos_type start;
+       pos_type endpos;
+       d->rangeOfSpellCheck(start, endpos);
+       if (speller->canCheckParagraph()) {
+               // loop until we leave the range
+               for (pos_type first = start; first < endpos; ) {
+                       pos_type last = endpos;
+                       Private::SkipPositions skips;
+                       Language * lang = d->locateSpellRange(first, last, skips);
+                       if (first >= endpos)
+                               break;
+                       // start the spell checker on the unit of meaning
+                       docstring word = asString(first, last, AS_STR_INSETS + AS_STR_SKIPDELETE);
+                       WordLangTuple wl = WordLangTuple(word, lang);
+                       SpellChecker::Result result = word.size() ?
+                               speller->check(wl) : SpellChecker::WORD_OK;
+                       d->markMisspelledWords(first, last, result, word, skips);
+                       first = ++last;
+               }
+       } else {
+               static docstring_list suggestions;
+               pos_type to = endpos;
+               while (start < endpos) {
+                       WordLangTuple wl;
+                       spellCheck(start, to, wl, suggestions, false);
+                       start = to + 1;
+               }
+       }
+       d->readySpellCheck();
+}
+
+
+bool Paragraph::isMisspelled(pos_type pos, bool check_boundary) const
+{
+       bool result = SpellChecker::misspelled(d->speller_state_.getState(pos));
+       if (result || pos <= 0 || pos > size())
+               return result;
+       if (check_boundary && (pos == size() || isWordSeparator(pos)))
+               result = SpellChecker::misspelled(d->speller_state_.getState(pos - 1));
+       return result;
 }
 
 
-bool Paragraph::isMisspelled(pos_type pos) const
+string Paragraph::magicLabel() const
 {
-       pos_type from = pos;
-       pos_type to = pos;
-       WordLangTuple wl;
-       docstring_list suggestions;
-       return spellCheck(from, to, wl, suggestions, false);
+       stringstream ss;
+       ss << "magicparlabel-" << id();
+       return ss.str();
 }