X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=src%2FParagraph.cpp;h=991ab14a7527d65083207e171b9e34b8fb720fce;hb=ea6aed5b8bf38366aaa1eb15ce1b9f13de76987f;hp=a2253db71ad95486252b3d189b8e41dc038aaa82;hpb=4c4299d1a56435e42f7eea2f3b138197d2b67b2a;p=lyx.git

diff --git a/src/Paragraph.cpp b/src/Paragraph.cpp
index a2253db71a..991ab14a75 100644
--- a/src/Paragraph.cpp
+++ b/src/Paragraph.cpp
@@ -19,60 +19,52 @@
 
 #include "Paragraph.h"
 
+#include "LayoutFile.h"
 #include "Buffer.h"
 #include "BufferParams.h"
 #include "Changes.h"
 #include "Counters.h"
 #include "Encoding.h"
-#include "debug.h"
-#include "gettext.h"
 #include "InsetList.h"
 #include "Language.h"
 #include "LaTeXFeatures.h"
-#include "Color.h"
 #include "Layout.h"
 #include "Length.h"
 #include "Font.h"
 #include "FontList.h"
 #include "LyXRC.h"
-#include "Messages.h"
 #include "OutputParams.h"
 #include "output_latex.h"
 #include "paragraph_funcs.h"
 #include "ParagraphParameters.h"
 #include "sgml.h"
+#include "TextClass.h"
 #include "TexRow.h"
+#include "Text.h"
 #include "VSpace.h"
+#include "WordList.h"
 
 #include "frontends/alert.h"
-#include "frontends/FontMetrics.h"
 
 #include "insets/InsetBibitem.h"
 #include "insets/InsetLabel.h"
-#include "insets/InsetOptArg.h"
 
+#include "support/lassert.h"
+#include "support/convert.h"
+#include "support/debug.h"
+#include "support/gettext.h"
 #include "support/lstrings.h"
+#include "support/Messages.h"
 #include "support/textutils.h"
-#include "support/convert.h"
-#include "support/unicode.h"
 
 #include <sstream>
 #include <vector>
 
-using std::endl;
-using std::string;
-using std::ostream;
+using namespace std;
+using namespace lyx::support;
 
 namespace lyx {
 
-using support::contains;
-using support::lowercase;
-using support::prefixIs;
-using support::suffixIs;
-using support::rsplit;
-using support::rtrim;
-using support::uppercase;
-
 namespace {
 /// Inset identifier (above 0x10ffff, for ucs-4)
 char_type const META_INSET = 0x200001;
@@ -88,9 +80,11 @@ class Paragraph::Private
 {
 public:
 	///
-	Private(Paragraph * owner);
+	Private(Paragraph * owner, Layout const & layout);
 	/// "Copy constructor"
 	Private(Private const &, Paragraph * owner);
+	/// Copy constructor from \p beg  to \p end
+	Private(Private const &, Paragraph * owner, pos_type beg, pos_type end);
 
 	///
 	void insertChar(pos_type pos, char_type c, Change const & change);
@@ -103,18 +97,18 @@ public:
 	/// Output a space in appropriate formatting (or a surrogate pair
 	/// if the next character is a combining character).
 	/// \return whether a surrogate pair was output.
-	bool simpleTeXBlanks(Encoding const &,
+	bool simpleTeXBlanks(OutputParams const &,
 			     odocstream &, TexRow & texrow,
 			     pos_type i,
 			     unsigned int & column,
 			     Font const & font,
 			     Layout const & style);
 
-	/// Output consecutive known unicode chars, belonging to the same
-	/// language as specified by \p preamble, to \p os starting from \p c.
+	/// Output consecutive unicode chars, belonging to the same script as
+	/// specified by the latex macro \p ltx, to \p os starting from \p i.
 	/// \return the number of characters written.
-	int knownLangChars(odocstream & os, char_type c, string & preamble,
-			   Change &, Encoding const &, pos_type &);
+	int writeScriptChars(odocstream & os, docstring const & ltx,
+			   Change &, Encoding const &, pos_type & i);
 
 	/// This could go to ParagraphParameters if we want to.
 	int startTeXParParams(BufferParams const &, odocstream &, TexRow &,
@@ -125,7 +119,7 @@ public:
 			    bool) const;
 
 	///
-	void latexInset(Buffer const &, BufferParams const &,
+	void latexInset(BufferParams const &,
 				   odocstream &,
 				   TexRow & texrow, OutputParams &,
 				   Font & running_font,
@@ -163,14 +157,19 @@ public:
 	bool latexSpecialPhrase(
 		odocstream & os,
 		pos_type & i,
-		unsigned int & column);
+		unsigned int & column,
+		OutputParams & runparams);
 
 	///
 	void validate(LaTeXFeatures & features,
 		      Layout const & layout) const;
 
+	/// Checks if the paragraph contains only text and no inset or font change.
+	bool onlyText(Buffer const & buf, Font const & outerfont,
+		      pos_type initial) const;
+
 	/// match a string against a particular point in the paragraph
-	bool isTextAt(std::string const & str, pos_type pos) const;
+	bool isTextAt(string const & str, pos_type pos) const;
 	
 	/// Which Paragraph owns us?
 	Paragraph * owner_;
@@ -194,26 +193,21 @@ public:
 	///
 	InsetList insetlist_;
 
-	///
-	LayoutPtr layout_;
-
 	/// end of label
 	pos_type begin_of_body_;
 
 	typedef docstring TextContainer;
 	///
 	TextContainer text_;
+	
+	typedef std::set<docstring> Words;
+	///
+	Words words_;
+	///
+	Layout const * layout_;
 };
 
 
-
-
-using std::endl;
-using std::upper_bound;
-using std::lower_bound;
-using std::string;
-
-
 // Initialization of the counter for the paragraph id's,
 unsigned int Paragraph::Private::paragraph_id = 0;
 
@@ -226,10 +220,10 @@ struct special_phrase {
 };
 
 special_phrase const special_phrases[] = {
-	{ "LyX", from_ascii("\\protect\\LyX{}"), false },
-	{ "TeX", from_ascii("\\protect\\TeX{}"), true },
-	{ "LaTeX2e", from_ascii("\\protect\\LaTeXe{}"), true },
-	{ "LaTeX", from_ascii("\\protect\\LaTeX{}"), true },
+	{ "LyX", from_ascii("\\LyX{}"), false },
+	{ "TeX", from_ascii("\\TeX{}"), true },
+	{ "LaTeX2e", from_ascii("\\LaTeXe{}"), true },
+	{ "LaTeX", from_ascii("\\LaTeX{}"), true },
 };
 
 size_t const phrases_nr = sizeof(special_phrases)/sizeof(special_phrase);
@@ -237,8 +231,8 @@ size_t const phrases_nr = sizeof(special_phrases)/sizeof(special_phrase);
 } // namespace anon
 
 
-Paragraph::Private::Private(Paragraph * owner)
-	: owner_(owner), inset_owner_(0), begin_of_body_(0)
+Paragraph::Private::Private(Paragraph * owner, Layout const & layout)
+	: owner_(owner), inset_owner_(0), begin_of_body_(0), layout_(&layout)
 {
 	id_ = paragraph_id++;
 	text_.reserve(100);
@@ -248,30 +242,58 @@ Paragraph::Private::Private(Paragraph * owner)
 Paragraph::Private::Private(Private const & p, Paragraph * owner)
 	: owner_(owner), inset_owner_(p.inset_owner_), fontlist_(p.fontlist_), 
 	  params_(p.params_), changes_(p.changes_), insetlist_(p.insetlist_),
-	  layout_(p.layout_), begin_of_body_(p.begin_of_body_), text_(p.text_)
+	  begin_of_body_(p.begin_of_body_), text_(p.text_), words_(p.words_),
+	  layout_(p.layout_)
 {
 	id_ = paragraph_id++;
 }
 
 
+Paragraph::Private::Private(Private const & p, Paragraph * owner,
+	pos_type beg, pos_type end)
+	: owner_(owner), inset_owner_(p.inset_owner_),
+	  insetlist_(p.insetlist_, beg, end),
+	  params_(p.params_), changes_(p.changes_),
+	  begin_of_body_(p.begin_of_body_), words_(p.words_),
+	  layout_(p.layout_)
+{
+	id_ = paragraph_id++;
+	if (beg >= pos_type(p.text_.size()))
+		return;
+	text_ = p.text_.substr(beg, end - beg);
+
+	FontList::const_iterator fcit = fontlist_.begin();
+	FontList::const_iterator fend = fontlist_.end();
+	for (; fcit != fend; ++fcit) {
+		if (fcit->pos() < beg)
+			continue;
+		if (fcit->pos() >= end) {
+			// Add last entry in the fontlist_.
+			fontlist_.set(text_.size() - 1, fcit->font());
+			break;
+		}
+		// Add a new entry in the fontlist_.
+		fontlist_.set(fcit->pos() - beg, fcit->font());
+	}
+}
+
+
 bool Paragraph::isChanged(pos_type start, pos_type end) const
 {
-	BOOST_ASSERT(start >= 0 && start <= size());
-	BOOST_ASSERT(end > start && end <= size() + 1);
+	LASSERT(start >= 0 && start <= size(), /**/);
+	LASSERT(end > start && end <= size() + 1, /**/);
 
 	return d->changes_.isChanged(start, end);
 }
 
 
-bool Paragraph::isMergedOnEndOfParDeletion(bool trackChanges) const {
+bool Paragraph::isMergedOnEndOfParDeletion(bool trackChanges) const
+{
 	// keep the logic here in sync with the logic of eraseChars()
-
-	if (!trackChanges) {
+	if (!trackChanges)
 		return true;
-	}
-
-	Change change = d->changes_.lookup(size());
 
+	Change const change = d->changes_.lookup(size());
 	return change.type == Change::INSERTED && change.author == 0;
 }
 
@@ -295,8 +317,8 @@ void Paragraph::setChange(Change const & change)
 
 	if (change.type != Change::DELETED) {
 		for (pos_type pos = 0; pos < size(); ++pos) {
-			if (isInset(pos))
-				getInset(pos)->setChange(change);
+			if (Inset * inset = getInset(pos))
+				inset->setChange(change);
 		}
 	}
 }
@@ -304,23 +326,19 @@ void Paragraph::setChange(Change const & change)
 
 void Paragraph::setChange(pos_type pos, Change const & change)
 {
-	BOOST_ASSERT(pos >= 0 && pos <= size());
-
+	LASSERT(pos >= 0 && pos <= size(), /**/);
 	d->changes_.set(change, pos);
 
 	// see comment in setChange(Change const &) above
-
-	if (change.type != Change::DELETED &&
-	    pos < size() && isInset(pos)) {
-		getInset(pos)->setChange(change);
-	}
+	if (change.type != Change::DELETED && pos < size())
+			if (Inset * inset = getInset(pos))
+				inset->setChange(change);
 }
 
 
 Change const & Paragraph::lookupChange(pos_type pos) const
 {
-	BOOST_ASSERT(pos >= 0 && pos <= size());
-
+	LASSERT(pos >= 0 && pos <= size(), /**/);
 	return d->changes_.lookup(pos);
 }
 
@@ -328,24 +346,22 @@ Change const & Paragraph::lookupChange(pos_type pos) const
 void Paragraph::acceptChanges(BufferParams const & bparams, pos_type start,
 		pos_type end)
 {
-	BOOST_ASSERT(start >= 0 && start <= size());
-	BOOST_ASSERT(end > start && end <= size() + 1);
+	LASSERT(start >= 0 && start <= size(), /**/);
+	LASSERT(end > start && end <= size() + 1, /**/);
 
 	for (pos_type pos = start; pos < end; ++pos) {
 		switch (lookupChange(pos).type) {
 			case Change::UNCHANGED:
 				// accept changes in nested inset
-				if (pos < size() && isInset(pos))
-					getInset(pos)->acceptChanges(bparams);
-
+				if (Inset * inset = getInset(pos))
+					inset->acceptChanges(bparams);
 				break;
 
 			case Change::INSERTED:
 				d->changes_.set(Change(Change::UNCHANGED), pos);
 				// also accept changes in nested inset
-				if (pos < size() && isInset(pos)) {
-					getInset(pos)->acceptChanges(bparams);
-				}
+				if (Inset * inset = getInset(pos))
+					inset->acceptChanges(bparams);
 				break;
 
 			case Change::DELETED:
@@ -366,16 +382,15 @@ void Paragraph::acceptChanges(BufferParams const & bparams, pos_type start,
 void Paragraph::rejectChanges(BufferParams const & bparams,
 		pos_type start, pos_type end)
 {
-	BOOST_ASSERT(start >= 0 && start <= size());
-	BOOST_ASSERT(end > start && end <= size() + 1);
+	LASSERT(start >= 0 && start <= size(), /**/);
+	LASSERT(end > start && end <= size() + 1, /**/);
 
 	for (pos_type pos = start; pos < end; ++pos) {
 		switch (lookupChange(pos).type) {
 			case Change::UNCHANGED:
 				// reject changes in nested inset
-				if (pos < size() && isInset(pos)) {
-					getInset(pos)->rejectChanges(bparams);
-				}
+				if (Inset * inset = getInset(pos))
+						inset->rejectChanges(bparams);
 				break;
 
 			case Change::INSERTED:
@@ -403,14 +418,14 @@ void Paragraph::rejectChanges(BufferParams const & bparams,
 void Paragraph::Private::insertChar(pos_type pos, char_type c,
 		Change const & change)
 {
-	BOOST_ASSERT(pos >= 0 && pos <= int(text_.size()));
+	LASSERT(pos >= 0 && pos <= int(text_.size()), /**/);
 
 	// track change
 	changes_.insert(change, pos);
 
 	// This is actually very common when parsing buffers (and
 	// maybe inserting ascii text)
-	if (pos == text_.size()) {
+	if (pos == pos_type(text_.size())) {
 		// when appending characters, no need to update tables
 		text_.push_back(c);
 		return;
@@ -429,11 +444,11 @@ void Paragraph::Private::insertChar(pos_type pos, char_type c,
 void Paragraph::insertInset(pos_type pos, Inset * inset,
 				   Change const & change)
 {
-	BOOST_ASSERT(inset);
-	BOOST_ASSERT(pos >= 0 && pos <= size());
+	LASSERT(inset, /**/);
+	LASSERT(pos >= 0 && pos <= size(), /**/);
 
 	d->insertChar(pos, META_INSET, change);
-	BOOST_ASSERT(d->text_[pos] == META_INSET);
+	LASSERT(d->text_[pos] == META_INSET, /**/);
 
 	// Add a new entry in the insetlist_.
 	d->insetlist_.insert(inset, pos);
@@ -442,7 +457,7 @@ void Paragraph::insertInset(pos_type pos, Inset * inset,
 
 bool Paragraph::eraseChar(pos_type pos, bool trackChanges)
 {
-	BOOST_ASSERT(pos >= 0 && pos <= size());
+	LASSERT(pos >= 0 && pos <= size(), /**/);
 
 	// keep the logic here in sync with the logic of isMergedOnEndOfParDeletion()
 
@@ -491,8 +506,8 @@ bool Paragraph::eraseChar(pos_type pos, bool trackChanges)
 
 int Paragraph::eraseChars(pos_type start, pos_type end, bool trackChanges)
 {
-	BOOST_ASSERT(start >= 0 && start <= size());
-	BOOST_ASSERT(end >= start && end <= size() + 1);
+	LASSERT(start >= 0 && start <= size(), /**/);
+	LASSERT(end >= start && end <= size() + 1, /**/);
 
 	pos_type i = start;
 	for (pos_type count = end - start; count; --count) {
@@ -515,24 +530,30 @@ int Paragraph::Private::latexSurrogatePair(odocstream & os, char_type c,
 	// Is this correct WRT change tracking?
 	docstring const latex1 = encoding.latexChar(next);
 	docstring const latex2 = encoding.latexChar(c);
-	os << latex1 << '{' << latex2 << '}';
+	if (docstring(1, next) == latex1) {
+		// the encoding supports the combination
+		os << latex2 << latex1;
+		return latex1.length() + latex2.length();
+	} else
+		os << latex1 << '{' << latex2 << '}';
 	return latex1.length() + latex2.length() + 2;
 }
 
 
-bool Paragraph::Private::simpleTeXBlanks(Encoding const & encoding,
+bool Paragraph::Private::simpleTeXBlanks(OutputParams const & runparams,
 				       odocstream & os, TexRow & texrow,
 				       pos_type i,
 				       unsigned int & column,
 				       Font const & font,
 				       Layout const & style)
 {
-	if (style.pass_thru)
+	if (style.pass_thru || runparams.verbatim)
 		return false;
 
 	if (i + 1 < int(text_.size())) {
 		char_type next = text_[i + 1];
 		if (Encodings::isCombiningChar(next)) {
+			Encoding const & encoding = *(runparams.encoding);
 			// This space has an accent, so we must always output it.
 			column += latexSurrogatePair(os, ' ', next, encoding) - 1;
 			return true;
@@ -548,7 +569,7 @@ bool Paragraph::Private::simpleTeXBlanks(Encoding const & encoding,
 	    && !owner_->isFreeSpacing()
 	    // In typewriter mode, we want to avoid
 	    // ! . ? : at the end of a line
-	    && !(font.family() == Font::TYPEWRITER_FAMILY
+	    && !(font.fontInfo().family() == TYPEWRITER_FAMILY
 		 && (text_[i - 1] == '.'
 		     || text_[i - 1] == '?'
 		     || text_[i - 1] == ':'
@@ -566,28 +587,44 @@ bool Paragraph::Private::simpleTeXBlanks(Encoding const & encoding,
 }
 
 
-int Paragraph::Private::knownLangChars(odocstream & os,
-				     char_type c,
-				     string & preamble,
-				     Change & runningChange,
-				     Encoding const & encoding,
-				     pos_type & i)
-{
-	// When the character is marked by the proper language, we simply
-	// get its code point in some encoding, otherwise we get the
-	// translation specified in the unicodesymbols file, which is
-	// something like "\textLANG{<spec>}". So, we have to retain
-	// "\textLANG{<spec>" for the first char but only "<spec>" for
-	// all subsequent chars.
-	docstring const latex1 = rtrim(encoding.latexChar(c), "}");
-	int length = latex1.length();
-	os << latex1;
+int Paragraph::Private::writeScriptChars(odocstream & os,
+					 docstring const & ltx,
+					 Change & runningChange,
+					 Encoding const & encoding,
+					 pos_type & i)
+{
+	// FIXME: modifying i here is not very nice...
+
+	// We only arrive here when a proper language for character text_[i] has
+	// not been specified (i.e., it could not be translated in the current
+	// latex encoding) or its latex translation has been forced, and it
+	// belongs to a known script.
+	// Parameter ltx contains the latex translation of text_[i] as specified
+	// in the unicodesymbols file and is something like "\textXXX{<spec>}".
+	// The latex macro name "textXXX" specifies the script to which text_[i]
+	// belongs and we use it in order to check whether characters from the
+	// same script immediately follow, such that we can collect them in a
+	// single "\textXXX" macro. So, we have to retain "\textXXX{<spec>"
+	// for the first char but only "<spec>" for all subsequent chars.
+	docstring::size_type const brace1 = ltx.find_first_of(from_ascii("{"));
+	docstring::size_type const brace2 = ltx.find_last_of(from_ascii("}"));
+	string script = to_ascii(ltx.substr(1, brace1 - 1));
+	int pos = 0;
+	int length = brace2;
+	bool closing_brace = true;
+	if (script == "textgreek" && encoding.latexName() == "iso-8859-7") {
+		// Correct encoding is being used, so we can avoid \textgreek.
+		pos = brace1 + 1;
+		length -= pos;
+		closing_brace = false;
+	}
+	os << ltx.substr(pos, length);
 	int size = text_.size();
 	while (i + 1 < size) {
-		char_type next = text_[i + 1];
-		// Stop here if next character belongs to another
-		// language or there is a change tracking status.
-		if (!Encodings::isKnownLangChar(next, preamble) ||
+		char_type const next = text_[i + 1];
+		// Stop here if next character belongs to another script
+		// or there is a change in change tracking status.
+		if (!Encodings::isKnownScriptChar(next, script) ||
 		    runningChange != owner_->lookupChange(i + 1))
 			break;
 		Font prev_font;
@@ -602,24 +639,20 @@ int Paragraph::Private::knownLangChars(odocstream & os,
 			if (cit->pos() >= i + 1)
 				break;
 		}
-		// Stop here if there is a font attribute change.
+		// Stop here if there is a font attribute or encoding change.
 		if (found && cit != end && prev_font != cit->font())
 			break;
-		docstring const latex = rtrim(encoding.latexChar(next), "}");
-		docstring::size_type const j =
+		docstring const latex = encoding.latexChar(next);
+		docstring::size_type const b1 =
 					latex.find_first_of(from_ascii("{"));
-		if (j == docstring::npos) {
-			os << latex;
-			length += latex.length();
-		} else {
-			os << latex.substr(j + 1);
-			length += latex.substr(j + 1).length();
-		}
+		docstring::size_type const b2 =
+					latex.find_last_of(from_ascii("}"));
+		int const len = b2 - b1 - 1;
+		os << latex.substr(b1 + 1, len);
+		length += len;
 		++i;
 	}
-	// When the proper language is set, we are simply passed a code
-	// point, so we should not try to close the \textLANG command.
-	if (prefixIs(latex1, from_ascii("\\" + preamble))) {
+	if (closing_brace) {
 		os << '}';
 		++length;
 	}
@@ -647,7 +680,7 @@ bool Paragraph::Private::isTextAt(string const & str, pos_type pos) const
 }
 
 
-void Paragraph::Private::latexInset(Buffer const & buf,
+void Paragraph::Private::latexInset(
 					     BufferParams const & bparams,
 					     odocstream & os,
 					     TexRow & texrow,
@@ -662,10 +695,10 @@ void Paragraph::Private::latexInset(Buffer const & buf,
 					     unsigned int & column)
 {
 	Inset * inset = owner_->getInset(i);
-	BOOST_ASSERT(inset);
+	LASSERT(inset, /**/);
 
 	if (style.pass_thru) {
-		inset->plaintext(buf, os, runparams);
+		inset->plaintext(os, runparams);
 		return;
 	}
 
@@ -683,7 +716,7 @@ void Paragraph::Private::latexInset(Buffer const & buf,
 				open_font = false;
 			}
 
-			if (running_font.family() == Font::TYPEWRITER_FAMILY)
+			if (running_font.fontInfo().family() == TYPEWRITER_FAMILY)
 				os << '~';
 
 			basefont = owner_->getLayoutFont(bparams, outerfont);
@@ -692,7 +725,6 @@ void Paragraph::Private::latexInset(Buffer const & buf,
 			if (runparams.moving_arg)
 				os << "\\protect ";
 
-			os << "\\\\\n";
 		}
 		texrow.newline();
 		texrow.start(owner_->id(), i + 1);
@@ -713,10 +745,10 @@ void Paragraph::Private::latexInset(Buffer const & buf,
 	bool close = false;
 	odocstream::pos_type const len = os.tellp();
 
-	if ((inset->lyxCode() == GRAPHICS_CODE
-	     || inset->lyxCode() == MATH_CODE
-	     || inset->lyxCode() == HYPERLINK_CODE)
-	    && running_font.isRightToLeft()) {
+	if (inset->forceLTR() 
+	    && running_font.isRightToLeft()
+		// ERT is an exception, it should be output with no decorations at all
+		&& inset->lyxCode() != ERT_CODE) {
 	    	if (running_font.language()->lang() == "farsi")
 			os << "\\beginL{}";
 		else
@@ -759,7 +791,16 @@ void Paragraph::Private::latexInset(Buffer const & buf,
 		}
 	}
 
-	int tmp = inset->latex(buf, os, runparams);
+	int tmp;
+
+	try {
+		tmp = inset->latex(os, runparams);
+	} catch (EncodingException & e) {
+		// add location information and throw again.
+		e.par_id = id_;
+		e.pos = i;
+		throw(e);
+	}
 
 	if (close) {
     	if (running_font.language()->lang() == "farsi")
@@ -803,6 +844,8 @@ void Paragraph::Private::latexSpecialChar(
 	}
 
 	if (runparams.verbatim) {
+		// FIXME UNICODE: This can fail if c cannot
+		// be encoded in the current encoding.
 		os.put(c);
 		return;
 	}
@@ -810,7 +853,7 @@ void Paragraph::Private::latexSpecialChar(
 	if (lyxrc.fontenc == "T1" && latexSpecialT1(c, os, i, column))
 		return;
 
-	if (running_font.family() == Font::TYPEWRITER_FAMILY
+	if (running_font.fontInfo().family() == TYPEWRITER_FAMILY
 		&& latexSpecialTypewriter(c, os, i, column))
 		return;
 
@@ -875,9 +918,8 @@ void Paragraph::Private::latexSpecialChar(
 		break;
 
 	default:
-
 		// LyX, LaTeX etc.
-		if (latexSpecialPhrase(os, i, column))
+		if (latexSpecialPhrase(os, i, column, runparams))
 			return;
 
 		if (c == '\0')
@@ -892,14 +934,13 @@ void Paragraph::Private::latexSpecialChar(
 				break;
 			}
 		}
-		string preamble;
-		if (Encodings::isKnownLangChar(c, preamble)) {
-			column += knownLangChars(os, c, preamble, running_change,
-				encoding, i) - 1;
-			break;
-		}
+		string script;
 		docstring const latex = encoding.latexChar(c);
-		if (latex.length() > 1 && latex[latex.length() - 1] != '}') {
+		if (Encodings::isKnownScriptChar(c, script)
+		    && prefixIs(latex, from_ascii("\\" + script)))
+			column += writeScriptChars(os, latex,
+					running_change, encoding, i) - 1;
+		else if (latex.length() > 1 && latex[latex.length() - 1] != '}') {
 			// Prevent eating of a following
 			// space or command corruption by
 			// following characters
@@ -923,13 +964,10 @@ bool Paragraph::Private::latexSpecialT1(char_type const c, odocstream & os,
 		os.put(c);
 		// In T1 encoding, these characters exist
 		// but we should avoid ligatures
-		if (i + 1 > int(text_.size()) || text_[i + 1] != c)
+		if (i + 1 >= int(text_.size()) || text_[i + 1] != c)
 			return true;
-		os << "\\,{}";
-		column += 3;
-		// Alternative code:
-		//os << "\\textcompwordmark{}";
-		//column += 19;
+		os << "\\textcompwordmark{}";
+		column += 19;
 		return true;
 	case '|':
 		os.put(c);
@@ -945,43 +983,25 @@ bool Paragraph::Private::latexSpecialTypewriter(char_type const c, odocstream &
 {
 	switch (c) {
 	case '-':
+		// within \ttfamily, "--" is merged to "-" (no endash)
+		// so we avoid this rather irritating ligature
 		if (i + 1 < int(text_.size()) && text_[i + 1] == '-') {
-			// "--" in Typewriter mode -> "-{}-"
 			os << "-{}";
 			column += 2;
 		} else
 			os << '-';
 		return true;
 
-	// I assume this is hack treating typewriter as verbatim
-	// FIXME UNICODE: This can fail if c cannot be encoded
-	// in the current encoding.
-
-	case '\0':
-		return true;
-
-	// Those characters are not directly supported.
-	case '\\':
-	case '\"':
-	case '$': case '&':
-	case '%': case '#': case '{':
-	case '}': case '_':
-	case '~':
-	case '^':
-	case '*': case '[':
-	case ' ':
-		return false;
-
+	// everything else has to be checked separately
+	// (depending on the encoding)
 	default:
-		// With Typewriter font, these characters exist.
-		os.put(c);
-		return true;
+		return false;
 	}
 }
 
 
 bool Paragraph::Private::latexSpecialPhrase(odocstream & os, pos_type & i,
-	unsigned int & column)
+	unsigned int & column, OutputParams & runparams)
 {
 	// FIXME: if we have "LaTeX" with a font
 	// change in the middle (before the 'T', then
@@ -992,6 +1012,8 @@ bool Paragraph::Private::latexSpecialPhrase(odocstream & os, pos_type & i,
 	for (size_t pnr = 0; pnr < phrases_nr; ++pnr) {
 		if (!isTextAt(special_phrases[pnr].phrase, i))
 			continue;
+		if (runparams.moving_arg)
+			os << "\\protect";
 		os << special_phrases[pnr].macro;
 		i += special_phrases[pnr].phrase.length() - 1;
 		column += special_phrases[pnr].macro.length() - 1;
@@ -1049,8 +1071,12 @@ void Paragraph::Private::validate(LaTeXFeatures & features,
 //
 /////////////////////////////////////////////////////////////////////
 
-Paragraph::Paragraph()
-	: d(new Paragraph::Private(this))
+namespace {
+	Layout const emptyParagraphLayout;
+}
+
+Paragraph::Paragraph() 
+	: d(new Paragraph::Private(this, emptyParagraphLayout))
 {
 	itemdepth = 0;
 	d->params_.clear();
@@ -1061,6 +1087,15 @@ Paragraph::Paragraph(Paragraph const & par)
 	: itemdepth(par.itemdepth),
 	d(new Paragraph::Private(*par.d, this))
 {
+	registerWords();
+}
+
+
+Paragraph::Paragraph(Paragraph const & par, pos_type beg, pos_type end)
+	: itemdepth(par.itemdepth),
+	d(new Paragraph::Private(*par.d, this, beg, end))
+{
+	registerWords();
 }
 
 
@@ -1070,8 +1105,10 @@ Paragraph & Paragraph::operator=(Paragraph const & par)
 	if (&par != this) {
 		itemdepth = par.itemdepth;
 
+		deregisterWords();
 		delete d;
 		d = new Private(*par.d, this);
+		registerWords();
 	}
 	return *this;
 }
@@ -1079,23 +1116,23 @@ Paragraph & Paragraph::operator=(Paragraph const & par)
 
 Paragraph::~Paragraph()
 {
+	deregisterWords();
 	delete d;
 }
 
 
-void Paragraph::write(Buffer const & buf, ostream & os,
-			  BufferParams const & bparams,
-			  depth_type & dth) const
+void Paragraph::write(ostream & os, BufferParams const & bparams,
+	depth_type & dth) const
 {
 	// The beginning or end of a deeper (i.e. nested) area?
-	if (dth != params().depth()) {
-		if (params().depth() > dth) {
-			while (params().depth() > dth) {
+	if (dth != d->params_.depth()) {
+		if (d->params_.depth() > dth) {
+			while (d->params_.depth() > dth) {
 				os << "\n\\begin_deeper";
 				++dth;
 			}
 		} else {
-			while (params().depth() < dth) {
+			while (d->params_.depth() < dth) {
 				os << "\n\\end_deeper";
 				--dth;
 			}
@@ -1103,11 +1140,11 @@ void Paragraph::write(Buffer const & buf, ostream & os,
 	}
 
 	// First write the layout
-	os << "\n\\begin_layout " << to_utf8(layout()->name()) << '\n';
+	os << "\n\\begin_layout " << to_utf8(d->layout_->name()) << '\n';
 
-	params().write(os);
+	d->params_.write(os);
 
-	Font font1(Font::ALL_INHERIT, bparams.language);
+	Font font1(inherit_font, bparams.language);
 
 	Change running_change = Change(Change::UNCHANGED);
 
@@ -1132,24 +1169,22 @@ void Paragraph::write(Buffer const & buf, ostream & os,
 		char_type const c = d->text_[i];
 		switch (c) {
 		case META_INSET:
-		{
-			Inset const * inset = getInset(i);
-			if (inset)
+			if (Inset const * inset = getInset(i)) {
 				if (inset->directWrite()) {
 					// international char, let it write
 					// code directly so it's shorter in
 					// the file
-					inset->write(buf, os);
+					inset->write(os);
 				} else {
 					if (i)
 						os << '\n';
 					os << "\\begin_inset ";
-					inset->write(buf, os);
+					inset->write(os);
 					os << "\n\\end_inset\n\n";
 					column = 0;
 				}
-		}
-		break;
+			}
+			break;
 		case '\\':
 			os << "\n\\backslash\n";
 			column = 0;
@@ -1169,13 +1204,10 @@ void Paragraph::write(Buffer const & buf, ostream & os,
 			}
 			// this check is to amend a bug. LyX sometimes
 			// inserts '\0' this could cause problems.
-			if (c != '\0') {
-				std::vector<char> tmp = ucs4_to_utf8(c);
-				tmp.push_back('\0');
-				os << &tmp[0];
-			} else
-				lyxerr << "ERROR (Paragraph::writeFile):"
-					" NULL char in structure." << endl;
+			if (c != '\0')
+				os << to_utf8(docstring(1, c));
+			else
+				LYXERR0("NUL char in structure.");
 			++column;
 			break;
 		}
@@ -1187,7 +1219,7 @@ void Paragraph::write(Buffer const & buf, ostream & os,
 
 void Paragraph::validate(LaTeXFeatures & features) const
 {
-	d->validate(features, *layout());
+	d->validate(features, *d->layout_);
 }
 
 
@@ -1213,22 +1245,23 @@ void Paragraph::appendChar(char_type c, Font const & font,
 void Paragraph::appendString(docstring const & s, Font const & font,
 		Change const & change)
 {
-	size_t end = s.size();
+	pos_type end = s.size();
 	size_t oldsize = d->text_.size();
 	size_t newsize = oldsize + end;
 	size_t capacity = d->text_.capacity();
 	if (newsize >= capacity)
-		d->text_.reserve(std::max(capacity + 100, newsize));
+		d->text_.reserve(max(capacity + 100, newsize));
 
 	// when appending characters, no need to update tables
 	d->text_.append(s);
 
 	// FIXME: Optimize this!
-	for (pos_type i = 0; i != end; ++i) {
+	for (pos_type i = oldsize; i != newsize; ++i) {
 		// track change
 		d->changes_.insert(change, i);
 	}
-	d->fontlist_.setRange(oldsize, newsize, font);
+	d->fontlist_.set(oldsize, font);
+	d->fontlist_.set(newsize - 1, font);
 }
 
 
@@ -1272,13 +1305,20 @@ bool Paragraph::insetAllowed(InsetCode code)
 }
 
 
+void Paragraph::resetFonts(Font const & font)
+{
+	d->fontlist_.clear();
+	d->fontlist_.set(0, font);
+	d->fontlist_.set(d->text_.size() - 1, font);
+}
+
 // Gets uninstantiated font setting at position.
 Font const Paragraph::getFontSettings(BufferParams const & bparams,
 					 pos_type pos) const
 {
 	if (pos > size()) {
-		lyxerr << " pos: " << pos << " size: " << size() << endl;
-		BOOST_ASSERT(pos <= size());
+		LYXERR0("pos: " << pos << " size: " << size());
+		LASSERT(pos <= size(), /**/);
 	}
 
 	FontList::const_iterator cit = d->fontlist_.fontIterator(pos);
@@ -1288,13 +1328,13 @@ Font const Paragraph::getFontSettings(BufferParams const & bparams,
 	if (pos == size() && !empty())
 		return getFontSettings(bparams, pos - 1);
 
-	return Font(Font::ALL_INHERIT, getParLanguage(bparams));
+	return Font(inherit_font, getParLanguage(bparams));
 }
 
 
 FontSpan Paragraph::fontSpan(pos_type pos) const
 {
-	BOOST_ASSERT(pos <= size());
+	LASSERT(pos <= size(), /**/);
 	pos_type start = 0;
 
 	FontList::const_iterator cit = d->fontlist_.begin();
@@ -1302,19 +1342,18 @@ FontSpan Paragraph::fontSpan(pos_type pos) const
 	for (; cit != end; ++cit) {
 		if (cit->pos() >= pos) {
 			if (pos >= beginOfBody())
-				return FontSpan(std::max(start, beginOfBody()),
+				return FontSpan(max(start, beginOfBody()),
 						cit->pos());
 			else
 				return FontSpan(start,
-						std::min(beginOfBody() - 1,
+						min(beginOfBody() - 1,
 							 cit->pos()));
 		}
 		start = cit->pos() + 1;
 	}
 
 	// This should not happen, but if so, we take no chances.
-	//lyxerr << "Paragraph::getEndPosOfFontSpan: This should not happen!"
-	//      << endl;
+	// LYXERR0("Paragraph::getEndPosOfFontSpan: This should not happen!");
 	return FontSpan(pos, pos);
 }
 
@@ -1325,7 +1364,7 @@ Font const Paragraph::getFirstFontSettings(BufferParams const & bparams) const
 	if (!empty() && !d->fontlist_.empty())
 		return d->fontlist_.begin()->font();
 
-	return Font(Font::ALL_INHERIT, bparams.language);
+	return Font(inherit_font, bparams.language);
 }
 
 
@@ -1337,18 +1376,18 @@ Font const Paragraph::getFirstFontSettings(BufferParams const & bparams) const
 Font const Paragraph::getFont(BufferParams const & bparams, pos_type pos,
 				 Font const & outerfont) const
 {
-	BOOST_ASSERT(pos >= 0);
+	LASSERT(pos >= 0, /**/);
 
 	Font font = getFontSettings(bparams, pos);
 
 	pos_type const body_pos = beginOfBody();
 	if (pos < body_pos)
-		font.realize(d->layout_->labelfont);
+		font.fontInfo().realize(d->layout_->labelfont);
 	else
-		font.realize(d->layout_->font);
+		font.fontInfo().realize(d->layout_->font);
 
-	font.realize(outerfont);
-	font.realize(bparams.getFont());
+	font.fontInfo().realize(outerfont.fontInfo());
+	font.fontInfo().realize(bparams.getFont().fontInfo());
 
 	return font;
 }
@@ -1357,35 +1396,32 @@ Font const Paragraph::getFont(BufferParams const & bparams, pos_type pos,
 Font const Paragraph::getLabelFont
 	(BufferParams const & bparams, Font const & outerfont) const
 {
-	Font tmpfont = layout()->labelfont;
-	tmpfont.setLanguage(getParLanguage(bparams));
-	tmpfont.realize(outerfont);
-	tmpfont.realize(bparams.getFont());
-	return tmpfont;
+	FontInfo tmpfont = d->layout_->labelfont;
+	tmpfont.realize(outerfont.fontInfo());
+	tmpfont.realize(bparams.getFont().fontInfo());
+	return Font(tmpfont, getParLanguage(bparams));
 }
 
 
 Font const Paragraph::getLayoutFont
 	(BufferParams const & bparams, Font const & outerfont) const
 {
-	Font tmpfont = layout()->font;
-	tmpfont.setLanguage(getParLanguage(bparams));
-	tmpfont.realize(outerfont);
-	tmpfont.realize(bparams.getFont());
-	return tmpfont;
+	FontInfo tmpfont = d->layout_->font;
+	tmpfont.realize(outerfont.fontInfo());
+	tmpfont.realize(bparams.getFont().fontInfo());
+	return Font(tmpfont, getParLanguage(bparams));
 }
 
 
 /// Returns the height of the highest font in range
-Font_size Paragraph::highestFontInRange
-	(pos_type startpos, pos_type endpos, Font_size def_size) const
+FontSize Paragraph::highestFontInRange
+	(pos_type startpos, pos_type endpos, FontSize def_size) const
 {
 	return d->fontlist_.highestInRange(startpos, endpos, def_size);
 }
 
 
-char_type
-Paragraph::getUChar(BufferParams const & bparams, pos_type pos) const
+char_type Paragraph::getUChar(BufferParams const & bparams, pos_type pos) const
 {
 	char_type c = d->text_[pos];
 	if (!lyxrc.rtl_support)
@@ -1420,14 +1456,13 @@ Paragraph::getUChar(BufferParams const & bparams, pos_type pos) const
 	}
 	if (uc != c && getFontSettings(bparams, pos).isRightToLeft())
 		return uc;
-	else
-		return c;
+	return c;
 }
 
 
 void Paragraph::setFont(pos_type pos, Font const & font)
 {
-	BOOST_ASSERT(pos <= size());
+	LASSERT(pos <= size(), /**/);
 
 	// First, reduce font against layout/label font
 	// Update: The setCharFont() routine in text2.cpp already
@@ -1439,9 +1474,8 @@ void Paragraph::setFont(pos_type pos, Font const & font)
 
 void Paragraph::makeSameLayout(Paragraph const & par)
 {
-	layout(par.layout());
-	// move to pimpl?
-	d->params_ = par.params();
+	d->layout_ = par.d->layout_;
+	d->params_ = par.d->params_;
 }
 
 
@@ -1466,45 +1500,46 @@ bool Paragraph::stripLeadingSpaces(bool trackChanges)
 
 bool Paragraph::hasSameLayout(Paragraph const & par) const
 {
-	return par.layout() == layout() && d->params_.sameLayout(par.params());
+	return par.d->layout_ == d->layout_
+		&& d->params_.sameLayout(par.d->params_);
 }
 
 
 depth_type Paragraph::getDepth() const
 {
-	return params().depth();
+	return d->params_.depth();
 }
 
 
 depth_type Paragraph::getMaxDepthAfter() const
 {
-	if (layout()->isEnvironment())
-		return params().depth() + 1;
+	if (d->layout_->isEnvironment())
+		return d->params_.depth() + 1;
 	else
-		return params().depth();
+		return d->params_.depth();
 }
 
 
 char Paragraph::getAlign() const
 {
-	if (params().align() == LYX_ALIGN_LAYOUT)
-		return layout()->align;
+	if (d->params_.align() == LYX_ALIGN_LAYOUT)
+		return d->layout_->align;
 	else
-		return params().align();
+		return d->params_.align();
 }
 
 
-docstring const & Paragraph::getLabelstring() const
+docstring const & Paragraph::labelString() const
 {
-	return params().labelString();
+	return d->params_.labelString();
 }
 
 
 // the next two functions are for the manual labels
 docstring const Paragraph::getLabelWidthString() const
 {
-	if (layout()->margintype == MARGIN_MANUAL)
-		return params().labelWidthString();
+	if (d->layout_->margintype == MARGIN_MANUAL)
+		return d->params_.labelWidthString();
 	else
 		return _("Senseless with this layout!");
 }
@@ -1512,14 +1547,14 @@ docstring const Paragraph::getLabelWidthString() const
 
 void Paragraph::setLabelWidthString(docstring const & s)
 {
-	params().labelWidthString(s);
+	d->params_.labelWidthString(s);
 }
 
 
 docstring const Paragraph::translateIfPossible(docstring const & s,
 		BufferParams const & bparams) const
 {
-	if (!support::isAscii(s) || s.empty()) {
+	if (!isAscii(s) || s.empty()) {
 		// This must be a user defined layout. We cannot translate
 		// this, since gettext accepts only ascii keys.
 		return s;
@@ -1530,21 +1565,21 @@ docstring const Paragraph::translateIfPossible(docstring const & s,
 }
 
 
-docstring Paragraph::expandLabel(LayoutPtr const & layout,
+docstring Paragraph::expandLabel(Layout const & layout,
 		BufferParams const & bparams, bool process_appendix) const
 {
-	TextClass const & tclass = bparams.getTextClass();
+	DocumentClass const & tclass = bparams.documentClass();
 
 	docstring fmt;
-	if (process_appendix && params().appendix())
-		fmt = translateIfPossible(layout->labelstring_appendix(),
+	if (process_appendix && d->params_.appendix())
+		fmt = translateIfPossible(layout.labelstring_appendix(),
 			bparams);
 	else
-		fmt = translateIfPossible(layout->labelstring(), bparams);
+		fmt = translateIfPossible(layout.labelstring(), bparams);
 
-	if (fmt.empty() && layout->labeltype == LABEL_COUNTER 
-	    && !layout->counter.empty())
-		fmt = "\\the" + layout->counter;
+	if (fmt.empty() && layout.labeltype == LABEL_COUNTER 
+	    && !layout.counter.empty())
+		fmt = "\\the" + layout.counter;
 
 	// handle 'inherited level parts' in 'fmt',
 	// i.e. the stuff between '@' in   '@Section@.\arabic{subsection}'
@@ -1566,15 +1601,15 @@ docstring Paragraph::expandLabel(LayoutPtr const & layout,
 }
 
 
-void Paragraph::applyLayout(LayoutPtr const & new_layout)
+void Paragraph::applyLayout(Layout const & new_layout)
 {
-	layout(new_layout);
-	LyXAlignment const oldAlign = params().align();
+	d->layout_ = &new_layout;
+	LyXAlignment const oldAlign = d->params_.align();
 	
-	if (!(oldAlign & layout()->alignpossible)) {
+	if (!(oldAlign & d->layout_->alignpossible)) {
 		frontend::Alert::warning(_("Alignment not permitted"), 
 			_("The new layout does not permit the alignment previously used.\nSetting to default."));
-		params().align(LYX_ALIGN_LAYOUT);
+		d->params_.align(LYX_ALIGN_LAYOUT);
 	}
 }
 
@@ -1587,7 +1622,7 @@ pos_type Paragraph::beginOfBody() const
 
 void Paragraph::setBeginOfBody()
 {
-	if (layout()->labeltype != LABEL_MANUAL) {
+	if (d->layout_->labeltype != LABEL_MANUAL) {
 		d->begin_of_body_ = 0;
 		return;
 	}
@@ -1620,20 +1655,30 @@ void Paragraph::setBeginOfBody()
 }
 
 
-InsetBibitem * Paragraph::bibitem() const
+bool Paragraph::forcePlainLayout() const
 {
-	if (!d->insetlist_.empty()) {
-		Inset * inset = d->insetlist_.begin()->inset;
-		if (inset->lyxCode() == BIBITEM_CODE)
-			return static_cast<InsetBibitem *>(inset);
-	}
-	return 0;
+	Inset const * const inset = inInset();
+	if (!inset)
+		return true;
+	return inset->forcePlainLayout();
+}
+
+
+bool Paragraph::allowParagraphCustomization() const
+{
+	Inset const * const inset = inInset();
+	if (!inset)
+		return true;
+	return inset->allowParagraphCustomization();
 }
 
 
-bool Paragraph::forceDefaultParagraphs() const
+bool Paragraph::usePlainLayout() const
 {
-	return inInset() && inInset()->forceDefaultParagraphs(0);
+	Inset const * const inset = inInset();
+	if (!inset)
+		return false;
+	return inset->usePlainLayout();
 }
 
 
@@ -1818,30 +1863,20 @@ int Paragraph::Private::endTeXParParams(BufferParams const & bparams,
 
 
 // This one spits out the text of the paragraph
-bool Paragraph::latex(Buffer const & buf,
-				BufferParams const & bparams,
+bool Paragraph::latex(BufferParams const & bparams,
 				Font const & outerfont,
 				odocstream & os, TexRow & texrow,
 				OutputParams const & runparams) const
 {
-	LYXERR(Debug::LATEX) << "SimpleTeXOnePar...     " << this << endl;
+	LYXERR(Debug::LATEX, "SimpleTeXOnePar...     " << this);
 
 	bool return_value = false;
 
-	LayoutPtr style;
-
-	// well we have to check if we are in an inset with unlimited
-	// length (all in one row) if that is true then we don't allow
-	// any special options in the paragraph and also we don't allow
-	// any environment other than the default layout of the text class
-	// to be valid!
-	bool asdefault = forceDefaultParagraphs();
+	bool asdefault = forcePlainLayout();
 
-	if (asdefault) {
-		style = bparams.getTextClass().defaultLayout();
-	} else {
-		style = layout();
-	}
+	Layout const & style = asdefault ?
+		bparams.documentClass().plainLayout() :
+		*d->layout_;
 
 	// Current base font for all inherited font changes, without any
 	// change caused by an individual character, except for the language:
@@ -1876,7 +1911,7 @@ bool Paragraph::latex(Buffer const & buf,
 
 	// if the paragraph is empty, the loop will not be entered at all
 	if (empty()) {
-		if (style->isCommand()) {
+		if (style.isCommand()) {
 			os << '{';
 			++column;
 		}
@@ -1905,7 +1940,7 @@ bool Paragraph::latex(Buffer const & buf,
 				os << "}] ";
 				column +=3;
 			}
-			if (style->isCommand()) {
+			if (style.isCommand()) {
 				os << '{';
 				++column;
 			}
@@ -1957,13 +1992,22 @@ bool Paragraph::latex(Buffer const & buf,
 			open_font = false;
 		}
 
+		// close babel's font environment before opening CJK.
+		if (!running_font.language()->babel().empty() &&
+		    font.language()->encoding()->package() == Encoding::CJK) {
+				string end_tag = subst(lyxrc.language_command_end,
+							"$$lang",
+							running_font.language()->babel());
+				os << from_ascii(end_tag);
+				column += end_tag.length();
+		}
+
 		// Switch file encoding if necessary (and allowed)
 		if (!runparams.verbatim && 
-		    runparams.encoding->package() == Encoding::inputenc &&
-		    font.language()->encoding()->package() == Encoding::inputenc) {
-			std::pair<bool, int> const enc_switch = switchEncoding(os, bparams,
-					runparams.moving_arg, *(runparams.encoding),
-					*(font.language()->encoding()));
+		    runparams.encoding->package() != Encoding::none &&
+		    font.language()->encoding()->package() != Encoding::none) {
+			pair<bool, int> const enc_switch = switchEncoding(os, bparams,
+					runparams, *(font.language()->encoding()));
 			if (enc_switch.first) {
 				column += enc_switch.second;
 				runparams.encoding = font.language()->encoding();
@@ -1996,13 +2040,13 @@ bool Paragraph::latex(Buffer const & buf,
 		if (c == ' ') {
 			// FIXME: integrate this case in latexSpecialChar
 			// Do not print the separation of the optional argument
-			// if style->pass_thru is false. This works because
+			// if style.pass_thru is false. This works because
 			// latexSpecialChar ignores spaces if
-			// style->pass_thru is false.
+			// style.pass_thru is false.
 			if (i != body_pos - 1) {
 				if (d->simpleTeXBlanks(
-						*(runparams.encoding), os, texrow,
-						i, column, font, *style)) {
+						runparams, os, texrow,
+						i, column, font, style)) {
 					// A surrogate pair was output. We
 					// must not call latexSpecialChar
 					// in this iteration, since it would output
@@ -2014,23 +2058,38 @@ bool Paragraph::latex(Buffer const & buf,
 		}
 
 		OutputParams rp = runparams;
-		rp.free_spacing = style->free_spacing;
+		rp.free_spacing = style.free_spacing;
 		rp.local_font = &font;
-		rp.intitle = style->intitle;
+		rp.intitle = style.intitle;
 
 		// Two major modes:  LaTeX or plain
 		// Handle here those cases common to both modes
 		// and then split to handle the two modes separately.
 		if (c == META_INSET)
-			d->latexInset(buf, bparams, os,
+			d->latexInset(bparams, os,
 					texrow, rp, running_font,
 					basefont, outerfont, open_font,
-					runningChange, *style, i, column);
-		else
-			d->latexSpecialChar(os, rp, running_font, runningChange,
-				*style, i, column);
+					runningChange, style, i, column);
+		else {
+			try {
+				d->latexSpecialChar(os, rp, running_font, runningChange,
+					style, i, column);
+			} catch (EncodingException & e) {
+				if (runparams.dryrun) {
+					os << "<" << _("LyX Warning: ")
+					   << _("uncodable character") << " '";
+					os.put(c);
+					os << "'>";
+				} else {
+					// add location information and throw again.
+					e.par_id = id();
+					e.pos = i;
+					throw(e);
+				}
+			}
+		}
 
-		// Set the encoding to that returned from simpleTeXSpecialChars (see
+		// Set the encoding to that returned from latexSpecialChar (see
 		// comment for encoding member in OutputParams.h)
 		runparams.encoding = rp.encoding;
 	}
@@ -2069,59 +2128,15 @@ bool Paragraph::latex(Buffer const & buf,
 					  runparams.moving_arg);
 	}
 
-	LYXERR(Debug::LATEX) << "SimpleTeXOnePar...done " << this << endl;
+	LYXERR(Debug::LATEX, "SimpleTeXOnePar...done " << this);
 	return return_value;
 }
 
 
-namespace {
-
-enum PAR_TAG {
-	PAR_NONE=0,
-	TT = 1,
-	SF = 2,
-	BF = 4,
-	IT = 8,
-	SL = 16,
-	EM = 32
-};
-
-
-string tag_name(PAR_TAG const & pt) {
-	switch (pt) {
-	case PAR_NONE: return "!-- --";
-	case TT: return "tt";
-	case SF: return "sf";
-	case BF: return "bf";
-	case IT: return "it";
-	case SL: return "sl";
-	case EM: return "em";
-	}
-	return "";
-}
-
-
-inline
-void operator|=(PAR_TAG & p1, PAR_TAG const & p2)
-{
-	p1 = static_cast<PAR_TAG>(p1 | p2);
-}
-
-
-inline
-void reset(PAR_TAG & p1, PAR_TAG const & p2)
-{
-	p1 = static_cast<PAR_TAG>(p1 & ~p2);
-}
-
-} // anon
-
-
 bool Paragraph::emptyTag() const
 {
 	for (pos_type i = 0; i < size(); ++i) {
-		if (isInset(i)) {
-			Inset const * inset = getInset(i);
+		if (Inset const * inset = getInset(i)) {
 			InsetCode lyx_code = inset->lyxCode();
 			if (lyx_code != TOC_CODE &&
 			    lyx_code != INCLUDE_CODE &&
@@ -2142,11 +2157,11 @@ bool Paragraph::emptyTag() const
 }
 
 
-string Paragraph::getID(Buffer const & buf, OutputParams const & runparams) const
+string Paragraph::getID(Buffer const & buf, OutputParams const & runparams)
+	const
 {
 	for (pos_type i = 0; i < size(); ++i) {
-		if (isInset(i)) {
-			Inset const * inset = getInset(i);
+		if (Inset const * inset = getInset(i)) {
 			InsetCode lyx_code = inset->lyxCode();
 			if (lyx_code == LABEL_CODE) {
 				InsetLabel const * const il = static_cast<InsetLabel const *>(inset);
@@ -2154,19 +2169,18 @@ string Paragraph::getID(Buffer const & buf, OutputParams const & runparams) cons
 				return "id='" + to_utf8(sgml::cleanID(buf, runparams, id)) + "'";
 			}
 		}
-
 	}
 	return string();
 }
 
 
-pos_type Paragraph::getFirstWord(Buffer const & buf, odocstream & os, OutputParams const & runparams) const
+pos_type Paragraph::firstWord(odocstream & os, OutputParams const & runparams)
+	const
 {
 	pos_type i;
 	for (i = 0; i < size(); ++i) {
-		if (isInset(i)) {
-			Inset const * inset = getInset(i);
-			inset->docbook(buf, os, runparams);
+		if (Inset const * inset = getInset(i)) {
+			inset->docbook(os, runparams);
 		} else {
 			char_type c = d->text_[i];
 			if (c == ' ')
@@ -2178,13 +2192,13 @@ pos_type Paragraph::getFirstWord(Buffer const & buf, odocstream & os, OutputPara
 }
 
 
-bool Paragraph::onlyText(Buffer const & buf, Font const & outerfont, pos_type initial) const
+bool Paragraph::Private::onlyText(Buffer const & buf, Font const & outerfont, pos_type initial) const
 {
 	Font font_old;
-
-	for (pos_type i = initial; i < size(); ++i) {
-		Font font = getFont(buf.params(), i, outerfont);
-		if (isInset(i))
+	pos_type size = text_.size();
+	for (pos_type i = initial; i < size; ++i) {
+		Font font = owner_->getFont(buf.params(), i, outerfont);
+		if (text_[i] == META_INSET)
 			return false;
 		if (i != initial && font != font_old)
 			return false;
@@ -2203,11 +2217,11 @@ void Paragraph::simpleDocBookOnePar(Buffer const & buf,
 {
 	bool emph_flag = false;
 
-	LayoutPtr const & style = layout();
-	Font font_old =
-		style->labeltype == LABEL_MANUAL ? style->labelfont : style->font;
+	Layout const & style = *d->layout_;
+	FontInfo font_old =
+		style.labeltype == LABEL_MANUAL ? style.labelfont : style.font;
 
-	if (style->pass_thru && !onlyText(buf, outerfont, initial))
+	if (style.pass_thru && !d->onlyText(buf, outerfont, initial))
 		os << "]]>";
 
 	// parsing main loop
@@ -2215,8 +2229,8 @@ void Paragraph::simpleDocBookOnePar(Buffer const & buf,
 		Font font = getFont(buf.params(), i, outerfont);
 
 		// handle <emphasis> tag
-		if (font_old.emph() != font.emph()) {
-			if (font.emph() == Font::ON) {
+		if (font_old.emph() != font.fontInfo().emph()) {
+			if (font.fontInfo().emph() == FONT_ON) {
 				os << "<emphasis>";
 				emph_flag = true;
 			} else if (i != initial) {
@@ -2225,63 +2239,80 @@ void Paragraph::simpleDocBookOnePar(Buffer const & buf,
 			}
 		}
 
-		if (isInset(i)) {
-			Inset const * inset = getInset(i);
-			inset->docbook(buf, os, runparams);
+		if (Inset const * inset = getInset(i)) {
+			inset->docbook(os, runparams);
 		} else {
 			char_type c = d->text_[i];
 
-			if (style->pass_thru)
+			if (style.pass_thru)
 				os.put(c);
 			else
 				os << sgml::escapeChar(c);
 		}
-		font_old = font;
+		font_old = font.fontInfo();
 	}
 
 	if (emph_flag) {
 		os << "</emphasis>";
 	}
 
-	if (style->free_spacing)
+	if (style.free_spacing)
 		os << '\n';
-	if (style->pass_thru && !onlyText(buf, outerfont, initial))
+	if (style.pass_thru && !d->onlyText(buf, outerfont, initial))
 		os << "<![CDATA[";
 }
 
 
 bool Paragraph::isHfill(pos_type pos) const
 {
-	return isInset(pos)
-		&& getInset(pos)->lyxCode() == HFILL_CODE;
+	Inset const * inset = getInset(pos);
+	return inset && (inset->lyxCode() == SPACE_CODE &&
+			 inset->isStretchableSpace());
 }
 
 
 bool Paragraph::isNewline(pos_type pos) const
 {
-	return isInset(pos)
-		&& getInset(pos)->lyxCode() == NEWLINE_CODE;
+	Inset const * inset = getInset(pos);
+	return inset && inset->lyxCode() == NEWLINE_CODE;
 }
 
 
 bool Paragraph::isLineSeparator(pos_type pos) const
 {
 	char_type const c = d->text_[pos];
-	return isLineSeparatorChar(c)
-		|| (c == META_INSET && getInset(pos) &&
-		getInset(pos)->isLineSeparator());
+	if (isLineSeparatorChar(c))
+		return true;
+	Inset const * inset = getInset(pos);
+	return inset && inset->isLineSeparator();
 }
 
 
 /// Used by the spellchecker
 bool Paragraph::isLetter(pos_type pos) const
 {
-	if (isInset(pos))
-		return getInset(pos)->isLetter();
-	else {
-		char_type const c = d->text_[pos];
-		return isLetterChar(c) || isDigit(c);
-	}
+	if (Inset const * inset = getInset(pos))
+		return inset->isLetter();
+	char_type const c = d->text_[pos];
+	return isLetterChar(c) || isDigit(c);
+}
+
+
+bool Paragraph::isChar(pos_type pos) const
+{
+	if (Inset const * inset = getInset(pos))
+		return inset->isChar();
+	char_type const c = d->text_[pos];
+	return !isLetterChar(c) && !isDigit(c) && !lyx::isSpace(c);
+}
+
+
+bool Paragraph::isSpace(pos_type pos) const
+{
+	if (Inset const * inset = getInset(pos))
+		return inset->isSpace();
+	char_type const c = d->text_[pos];
+	return lyx::isSpace(c);
 }
 
 
@@ -2333,29 +2364,27 @@ bool Paragraph::isMultiLingual(BufferParams const & bparams) const
 }
 
 
-// Convert the paragraph to a string.
-// Used for building the table of contents
-docstring const Paragraph::asString(Buffer const & buffer, bool label) const
+docstring Paragraph::asString(int options) const
 {
-	return asString(buffer, 0, size(), label);
+	return asString(0, size(), options);
 }
 
 
-docstring const Paragraph::asString(Buffer const & buffer,
-				 pos_type beg, pos_type end, bool label) const
+docstring Paragraph::asString(pos_type beg, pos_type end, int options) const
 {
-
 	odocstringstream os;
 
-	if (beg == 0 && label && !params().labelString().empty())
-		os << params().labelString() << ' ';
+	if (beg == 0 
+		&& options & AS_STR_LABEL
+		&& !d->params_.labelString().empty())
+		os << d->params_.labelString() << ' ';
 
 	for (pos_type i = beg; i < end; ++i) {
 		char_type const c = d->text_[i];
 		if (isPrintable(c))
 			os.put(c);
-		else if (c == META_INSET)
-			getInset(i)->textString(buffer, os);
+		else if (c == META_INSET && options & AS_STR_INSETS)
+			getInset(i)->textString(os);
 	}
 
 	return os.str();
@@ -2374,15 +2403,24 @@ int Paragraph::id() const
 }
 
 
-LayoutPtr const & Paragraph::layout() const
+Layout const & Paragraph::layout() const
+{
+	return *d->layout_;
+}
+
+
+void Paragraph::setLayout(Layout const & layout)
 {
-	return d->layout_;
+	d->layout_ = &layout;
 }
 
 
-void Paragraph::layout(LayoutPtr const & new_layout)
+void Paragraph::setPlainOrDefaultLayout(DocumentClass const & tclass)
 {
-	d->layout_ = new_layout;
+	if (usePlainLayout())
+		setLayout(tclass.plainLayout());
+	else
+		setLayout(tclass.defaultLayout());
 }
 
 
@@ -2394,8 +2432,7 @@ Inset * Paragraph::inInset() const
 
 InsetCode Paragraph::ownerCode() const
 {
-	return d->inset_owner_ ?
-		d->inset_owner_->lyxCode() : NO_CODE;
+	return d->inset_owner_ ? d->inset_owner_->lyxCode() : NO_CODE;
 }
 
 
@@ -2413,26 +2450,23 @@ ParagraphParameters const & Paragraph::params() const
 
 bool Paragraph::isFreeSpacing() const
 {
-	if (layout()->free_spacing)
+	if (d->layout_->free_spacing)
 		return true;
-
-	// for now we just need this, later should we need this in some
-	// other way we can always add a function to Inset too.
-	return ownerCode() == ERT_CODE || ownerCode() == LISTINGS_CODE;
+	return d->inset_owner_ && d->inset_owner_->isFreeSpacing();
 }
 
 
 bool Paragraph::allowEmpty() const
 {
-	if (layout()->keepempty)
+	if (d->layout_->keepempty)
 		return true;
-	return ownerCode() == ERT_CODE || ownerCode() == LISTINGS_CODE;
+	return d->inset_owner_ && d->inset_owner_->allowEmpty();
 }
 
 
 char_type Paragraph::transformChar(char_type c, pos_type pos) const
 {
-	if (!Encodings::is_arabic(c))
+	if (!Encodings::isArabicChar(c))
 		return c;
 
 	char_type prev_char = ' ';
@@ -2440,7 +2474,7 @@ char_type Paragraph::transformChar(char_type c, pos_type pos) const
 
 	for (pos_type i = pos - 1; i >= 0; --i) {
 		char_type const par_char = d->text_[i];
-		if (!Encodings::isComposeChar_arabic(par_char)) {
+		if (!Encodings::isArabicComposeChar(par_char)) {
 			prev_char = par_char;
 			break;
 		}
@@ -2448,21 +2482,21 @@ char_type Paragraph::transformChar(char_type c, pos_type pos) const
 
 	for (pos_type i = pos + 1, end = size(); i < end; ++i) {
 		char_type const par_char = d->text_[i];
-		if (!Encodings::isComposeChar_arabic(par_char)) {
+		if (!Encodings::isArabicComposeChar(par_char)) {
 			next_char = par_char;
 			break;
 		}
 	}
 
-	if (Encodings::is_arabic(next_char)) {
-		if (Encodings::is_arabic(prev_char) &&
-			!Encodings::is_arabic_special(prev_char))
+	if (Encodings::isArabicChar(next_char)) {
+		if (Encodings::isArabicChar(prev_char) &&
+			!Encodings::isArabicSpecialChar(prev_char))
 			return Encodings::transformChar(c, Encodings::FORM_MEDIAL);
 		else
 			return Encodings::transformChar(c, Encodings::FORM_INITIAL);
 	} else {
-		if (Encodings::is_arabic(prev_char) &&
-			!Encodings::is_arabic_special(prev_char))
+		if (Encodings::isArabicChar(prev_char) &&
+			!Encodings::isArabicSpecialChar(prev_char))
 			return Encodings::transformChar(c, Encodings::FORM_FINAL);
 		else
 			return Encodings::transformChar(c, Encodings::FORM_ISOLATED);
@@ -2470,14 +2504,14 @@ char_type Paragraph::transformChar(char_type c, pos_type pos) const
 }
 
 
-int Paragraph::checkBiblio(bool track_changes)
+int Paragraph::checkBiblio(Buffer const & buffer)
 {
-	//FIXME From JS:
-	//This is getting more and more a mess. ...We really should clean
-	//up this bibitem issue for 1.6. See also bug 2743.
+	// FIXME From JS:
+	// This is getting more and more a mess. ...We really should clean
+	// up this bibitem issue for 1.6. See also bug 2743.
 
 	// Add bibitem insets if necessary
-	if (layout()->labeltype != LABEL_BIBLIO)
+	if (d->layout_->labeltype != LABEL_BIBLIO)
 		return 0;
 
 	bool hasbibitem = !d->insetlist_.empty()
@@ -2485,6 +2519,8 @@ int Paragraph::checkBiblio(bool track_changes)
 		&& d->text_[0] == META_INSET
 		&& d->insetlist_.begin()->inset->lyxCode() == BIBITEM_CODE;
 
+	bool track_changes = buffer.params().trackChanges;
+
 	docstring oldkey;
 	docstring oldlabel;
 
@@ -2507,13 +2543,13 @@ int Paragraph::checkBiblio(bool track_changes)
 			break;
 	}
 
-	//There was an InsetBibitem at the beginning, and we didn't
-	//have to erase one.
+	// There was an InsetBibitem at the beginning, and we didn't
+	// have to erase one.
 	if (hasbibitem && erasedInsetPosition < 0)
 			return 0;
 
-	//There was an InsetBibitem at the beginning and we did have to
-	//erase one. So we give its properties to the beginning inset.
+	// There was an InsetBibitem at the beginning and we did have to
+	// erase one. So we give its properties to the beginning inset.
 	if (hasbibitem) {
 		InsetBibitem * inset =
 			static_cast<InsetBibitem *>(d->insetlist_.begin()->inset);
@@ -2523,9 +2559,10 @@ int Paragraph::checkBiblio(bool track_changes)
 		return -erasedInsetPosition;
 	}
 
-	//There was no inset at the beginning, so we need to create one with
-	//the key and label of the one we erased.
-	InsetBibitem * inset(new InsetBibitem(InsetCommandParams(BIBITEM_CODE)));
+	// There was no inset at the beginning, so we need to create one with
+	// the key and label of the one we erased.
+	InsetBibitem * inset = 
+		new InsetBibitem(buffer, InsetCommandParams(BIBITEM_CODE));
 	// restore values of previously deleted item in this par.
 	if (!oldkey.empty())
 		inset->setParam("key", oldkey);
@@ -2567,6 +2604,12 @@ InsetList const & Paragraph::insetList() const
 }
 
 
+void Paragraph::setBuffer(Buffer & b)
+{
+	d->insetlist_.setBuffer(b);
+}
+
+
 Inset * Paragraph::releaseInset(pos_type pos)
 {
 	Inset * inset = d->insetlist_.release(pos);
@@ -2578,31 +2621,20 @@ Inset * Paragraph::releaseInset(pos_type pos)
 
 Inset * Paragraph::getInset(pos_type pos)
 {
-	return d->insetlist_.get(pos);
+	return (pos < pos_type(d->text_.size()) && d->text_[pos] == META_INSET)
+		 ? d->insetlist_.get(pos) : 0;
 }
 
 
 Inset const * Paragraph::getInset(pos_type pos) const
 {
-	return d->insetlist_.get(pos);
-}
-
-
-int Paragraph::numberOfOptArgs() const
-{
-	int num = 0;
-	InsetList::const_iterator it = insetList().begin();
-	InsetList::const_iterator end = insetList().end();
-	for (; it != end ; ++it) {
-		if (it->inset->lyxCode() == OPTARG_CODE)
-			++num;
-	}
-	return num;
+	return (pos < pos_type(d->text_.size()) && d->text_[pos] == META_INSET)
+		 ? d->insetlist_.get(pos) : 0;
 }
 
 
 void Paragraph::changeCase(BufferParams const & bparams, pos_type pos,
-		pos_type right, TextCase action)
+		pos_type & right, TextCase action)
 {
 	// process sequences of modified characters; in change
 	// tracking mode, this approach results in much better
@@ -2730,4 +2762,66 @@ bool Paragraph::isSeparator(pos_type pos) const
 }
 
 
+void Paragraph::deregisterWords()
+{
+	Private::Words::const_iterator it;
+	WordList & wl = theWordList();
+	for (it = d->words_.begin(); it != d->words_.end(); ++it)
+		wl.remove(*it);
+	d->words_.clear();
+}
+
+
+void Paragraph::collectWords(CursorSlice const & sl)
+{
+	// find new words
+	bool inword = false;
+
+	//lyxerr << "Words: ";
+	pos_type n = size();
+	for (pos_type pos = 0; pos != n; ++pos) {
+		if (isDeleted(pos))
+			continue;
+
+		if (!isLetter(pos)) {
+			inword = false;
+			continue;
+		}
+
+		if (inword)
+			continue;
+
+		inword = true;
+		CursorSlice from = sl;
+		CursorSlice to = sl;
+		from.pos() = pos;
+		to.pos() = pos;
+		from.text()->getWord(from, to, WHOLE_WORD);
+		if (to.pos() - from.pos() < 6)
+			continue;
+		docstring word = asString(from.pos(), to.pos(), false);
+		d->words_.insert(word);
+		//lyxerr << word << " ";
+	}
+	//lyxerr << std::endl;
+}
+
+
+void Paragraph::registerWords()
+{
+	Private::Words::const_iterator it;
+	WordList & wl = theWordList();
+	for (it = d->words_.begin(); it != d->words_.end(); ++it)
+		wl.insert(*it);
+}
+
+
+void Paragraph::updateWords(CursorSlice const & sl)
+{
+	LASSERT(&sl.paragraph() == this, /**/);
+	deregisterWords();
+	collectWords(sl);
+	registerWords();
+}
+
 } // namespace lyx