X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=src%2FParagraph.cpp;h=b5f31281f4da9ee0dff6d11b8825bdb8a68a63f1;hb=3256dbc78a76f947080dc8e3414abc60e4b5c5af;hp=8b3d7df38f039a435d5356c85d083b6f0dcab784;hpb=75febee877693cdf229eadc6326579779bfddfbe;p=lyx.git

diff --git a/src/Paragraph.cpp b/src/Paragraph.cpp
index 8b3d7df38f..b5f31281f4 100644
--- a/src/Paragraph.cpp
+++ b/src/Paragraph.cpp
@@ -24,23 +24,20 @@
 #include "Changes.h"
 #include "Counters.h"
 #include "Encoding.h"
-#include "debug.h"
-#include "gettext.h"
 #include "InsetList.h"
 #include "Language.h"
 #include "LaTeXFeatures.h"
-#include "Color.h"
 #include "Layout.h"
 #include "Length.h"
 #include "Font.h"
 #include "FontList.h"
 #include "LyXRC.h"
-#include "Messages.h"
 #include "OutputParams.h"
 #include "output_latex.h"
 #include "paragraph_funcs.h"
 #include "ParagraphParameters.h"
 #include "sgml.h"
+#include "TextClass.h"
 #include "TexRow.h"
 #include "VSpace.h"
 
@@ -49,30 +46,22 @@
 
 #include "insets/InsetBibitem.h"
 #include "insets/InsetLabel.h"
-#include "insets/InsetOptArg.h"
 
+#include "support/convert.h"
+#include "support/debug.h"
+#include "support/gettext.h"
 #include "support/lstrings.h"
+#include "support/Messages.h"
 #include "support/textutils.h"
-#include "support/convert.h"
-#include "support/unicode.h"
 
 #include <sstream>
 #include <vector>
 
-using std::endl;
-using std::string;
-using std::ostream;
+using namespace std;
+using namespace lyx::support;
 
 namespace lyx {
 
-using support::contains;
-using support::lowercase;
-using support::prefixIs;
-using support::suffixIs;
-using support::rsplit;
-using support::rtrim;
-using support::uppercase;
-
 namespace {
 /// Inset identifier (above 0x10ffff, for ucs-4)
 char_type const META_INSET = 0x200001;
@@ -103,18 +92,18 @@ public:
 	/// Output a space in appropriate formatting (or a surrogate pair
 	/// if the next character is a combining character).
 	/// \return whether a surrogate pair was output.
-	bool simpleTeXBlanks(Encoding const &,
+	bool simpleTeXBlanks(OutputParams const &,
 			     odocstream &, TexRow & texrow,
 			     pos_type i,
 			     unsigned int & column,
 			     Font const & font,
 			     Layout const & style);
 
-	/// Output consecutive known unicode chars, belonging to the same
-	/// language as specified by \p preamble, to \p os starting from \p c.
+	/// Output consecutive unicode chars, belonging to the same script as
+	/// specified by the latex macro \p ltx, to \p os starting from \p i.
 	/// \return the number of characters written.
-	int knownLangChars(odocstream & os, char_type c, string & preamble,
-			   Change &, Encoding const &, pos_type &);
+	int writeScriptChars(odocstream & os, docstring const & ltx,
+			   Change &, Encoding const &, pos_type & i);
 
 	/// This could go to ParagraphParameters if we want to.
 	int startTeXParParams(BufferParams const &, odocstream &, TexRow &,
@@ -163,17 +152,19 @@ public:
 	bool latexSpecialPhrase(
 		odocstream & os,
 		pos_type & i,
-		unsigned int & column);
+		unsigned int & column,
+		OutputParams & runparams);
 
 	///
 	void validate(LaTeXFeatures & features,
 		      Layout const & layout) const;
 
-	///
-	pos_type size() const { return owner_->size(); }
+	/// Checks if the paragraph contains only text and no inset or font change.
+	bool onlyText(Buffer const & buf, Font const & outerfont,
+		      pos_type initial) const;
 
 	/// match a string against a particular point in the paragraph
-	bool isTextAt(std::string const & str, pos_type pos) const;
+	bool isTextAt(string const & str, pos_type pos) const;
 	
 	/// Which Paragraph owns us?
 	Paragraph * owner_;
@@ -209,14 +200,6 @@ public:
 };
 
 
-
-
-using std::endl;
-using std::upper_bound;
-using std::lower_bound;
-using std::string;
-
-
 // Initialization of the counter for the paragraph id's,
 unsigned int Paragraph::Private::paragraph_id = 0;
 
@@ -229,10 +212,10 @@ struct special_phrase {
 };
 
 special_phrase const special_phrases[] = {
-	{ "LyX", from_ascii("\\protect\\LyX{}"), false },
-	{ "TeX", from_ascii("\\protect\\TeX{}"), true },
-	{ "LaTeX2e", from_ascii("\\protect\\LaTeXe{}"), true },
-	{ "LaTeX", from_ascii("\\protect\\LaTeX{}"), true },
+	{ "LyX", from_ascii("\\LyX{}"), false },
+	{ "TeX", from_ascii("\\TeX{}"), true },
+	{ "LaTeX2e", from_ascii("\\LaTeXe{}"), true },
+	{ "LaTeX", from_ascii("\\LaTeX{}"), true },
 };
 
 size_t const phrases_nr = sizeof(special_phrases)/sizeof(special_phrase);
@@ -266,15 +249,13 @@ bool Paragraph::isChanged(pos_type start, pos_type end) const
 }
 
 
-bool Paragraph::isMergedOnEndOfParDeletion(bool trackChanges) const {
+bool Paragraph::isMergedOnEndOfParDeletion(bool trackChanges) const
+{
 	// keep the logic here in sync with the logic of eraseChars()
-
-	if (!trackChanges) {
+	if (!trackChanges)
 		return true;
-	}
-
-	Change change = d->changes_.lookup(size());
 
+	Change const change = d->changes_.lookup(size());
 	return change.type == Change::INSERTED && change.author == 0;
 }
 
@@ -298,8 +279,8 @@ void Paragraph::setChange(Change const & change)
 
 	if (change.type != Change::DELETED) {
 		for (pos_type pos = 0; pos < size(); ++pos) {
-			if (isInset(pos))
-				getInset(pos)->setChange(change);
+			if (Inset * inset = getInset(pos))
+				inset->setChange(change);
 		}
 	}
 }
@@ -308,22 +289,18 @@ void Paragraph::setChange(Change const & change)
 void Paragraph::setChange(pos_type pos, Change const & change)
 {
 	BOOST_ASSERT(pos >= 0 && pos <= size());
-
 	d->changes_.set(change, pos);
 
 	// see comment in setChange(Change const &) above
-
-	if (change.type != Change::DELETED &&
-	    pos < size() && isInset(pos)) {
-		getInset(pos)->setChange(change);
-	}
+	if (change.type != Change::DELETED && pos < size())
+			if (Inset * inset = getInset(pos))
+				inset->setChange(change);
 }
 
 
 Change const & Paragraph::lookupChange(pos_type pos) const
 {
 	BOOST_ASSERT(pos >= 0 && pos <= size());
-
 	return d->changes_.lookup(pos);
 }
 
@@ -338,17 +315,15 @@ void Paragraph::acceptChanges(BufferParams const & bparams, pos_type start,
 		switch (lookupChange(pos).type) {
 			case Change::UNCHANGED:
 				// accept changes in nested inset
-				if (pos < size() && isInset(pos))
-					getInset(pos)->acceptChanges(bparams);
-
+				if (Inset * inset = getInset(pos))
+					inset->acceptChanges(bparams);
 				break;
 
 			case Change::INSERTED:
 				d->changes_.set(Change(Change::UNCHANGED), pos);
 				// also accept changes in nested inset
-				if (pos < size() && isInset(pos)) {
-					getInset(pos)->acceptChanges(bparams);
-				}
+				if (Inset * inset = getInset(pos))
+					inset->acceptChanges(bparams);
 				break;
 
 			case Change::DELETED:
@@ -376,9 +351,8 @@ void Paragraph::rejectChanges(BufferParams const & bparams,
 		switch (lookupChange(pos).type) {
 			case Change::UNCHANGED:
 				// reject changes in nested inset
-				if (pos < size() && isInset(pos)) {
-					getInset(pos)->rejectChanges(bparams);
-				}
+				if (Inset * inset = getInset(pos))
+						inset->rejectChanges(bparams);
 				break;
 
 			case Change::INSERTED:
@@ -406,14 +380,14 @@ void Paragraph::rejectChanges(BufferParams const & bparams,
 void Paragraph::Private::insertChar(pos_type pos, char_type c,
 		Change const & change)
 {
-	BOOST_ASSERT(pos >= 0 && pos <= size());
+	BOOST_ASSERT(pos >= 0 && pos <= int(text_.size()));
 
 	// track change
 	changes_.insert(change, pos);
 
 	// This is actually very common when parsing buffers (and
 	// maybe inserting ascii text)
-	if (pos == size()) {
+	if (pos == pos_type(text_.size())) {
 		// when appending characters, no need to update tables
 		text_.push_back(c);
 		return;
@@ -523,19 +497,20 @@ int Paragraph::Private::latexSurrogatePair(odocstream & os, char_type c,
 }
 
 
-bool Paragraph::Private::simpleTeXBlanks(Encoding const & encoding,
+bool Paragraph::Private::simpleTeXBlanks(OutputParams const & runparams,
 				       odocstream & os, TexRow & texrow,
 				       pos_type i,
 				       unsigned int & column,
 				       Font const & font,
 				       Layout const & style)
 {
-	if (style.pass_thru)
+	if (style.pass_thru || runparams.verbatim)
 		return false;
 
-	if (i + 1 < size()) {
+	if (i + 1 < int(text_.size())) {
 		char_type next = text_[i + 1];
 		if (Encodings::isCombiningChar(next)) {
+			Encoding const & encoding = *(runparams.encoding);
 			// This space has an accent, so we must always output it.
 			column += latexSurrogatePair(os, ' ', next, encoding) - 1;
 			return true;
@@ -546,12 +521,12 @@ bool Paragraph::Private::simpleTeXBlanks(Encoding const & encoding,
 	    && column > lyxrc.plaintext_linelen
 	    && i
 	    && text_[i - 1] != ' '
-	    && (i + 1 < size())
+	    && (i + 1 < int(text_.size()))
 	    // same in FreeSpacing mode
 	    && !owner_->isFreeSpacing()
 	    // In typewriter mode, we want to avoid
 	    // ! . ? : at the end of a line
-	    && !(font.family() == Font::TYPEWRITER_FAMILY
+	    && !(font.fontInfo().family() == TYPEWRITER_FAMILY
 		 && (text_[i - 1] == '.'
 		     || text_[i - 1] == '?'
 		     || text_[i - 1] == ':'
@@ -569,27 +544,35 @@ bool Paragraph::Private::simpleTeXBlanks(Encoding const & encoding,
 }
 
 
-int Paragraph::Private::knownLangChars(odocstream & os,
-				     char_type c,
-				     string & preamble,
-				     Change & runningChange,
-				     Encoding const & encoding,
-				     pos_type & i)
-{
-	// When the character is marked by the proper language, we simply
-	// get its code point in some encoding, otherwise we get the
-	// translation specified in the unicodesymbols file, which is
-	// something like "\textLANG{<spec>}". So, we have to retain
-	// "\textLANG{<spec>" for the first char but only "<spec>" for
-	// all subsequent chars.
-	docstring const latex1 = rtrim(encoding.latexChar(c), "}");
-	int length = latex1.length();
-	os << latex1;
-	while (i + 1 < size()) {
-		char_type next = text_[i + 1];
-		// Stop here if next character belongs to another
-		// language or there is a change tracking status.
-		if (!Encodings::isKnownLangChar(next, preamble) ||
+int Paragraph::Private::writeScriptChars(odocstream & os,
+					 docstring const & ltx,
+					 Change & runningChange,
+					 Encoding const & encoding,
+					 pos_type & i)
+{
+	// FIXME: modifying i here is not very nice...
+
+	// We only arrive here when a proper language for character text_[i] has
+	// not been specified (i.e., it could not be translated in the current
+	// latex encoding) and it belongs to a known script.
+	// Parameter ltx contains the latex translation of text_[i] as specified in
+	// the unicodesymbols file and is something like "\textXXX{<spec>}".
+	// The latex macro name "textXXX" specifies the script to which text_[i]
+	// belongs and we use it in order to check whether characters from the
+	// same script immediately follow, such that we can collect them in a
+	// single "\textXXX" macro. So, we have to retain "\textXXX{<spec>"
+	// for the first char but only "<spec>" for all subsequent chars.
+	docstring::size_type const brace1 = ltx.find_first_of(from_ascii("{"));
+	docstring::size_type const brace2 = ltx.find_last_of(from_ascii("}"));
+	string script = to_ascii(ltx.substr(1, brace1 - 1));
+	int length = ltx.substr(0, brace2).length();
+	os << ltx.substr(0, brace2);
+	int size = text_.size();
+	while (i + 1 < size) {
+		char_type const next = text_[i + 1];
+		// Stop here if next character belongs to another script
+		// or there is a change in change tracking status.
+		if (!Encodings::isKnownScriptChar(next, script) ||
 		    runningChange != owner_->lookupChange(i + 1))
 			break;
 		Font prev_font;
@@ -604,27 +587,21 @@ int Paragraph::Private::knownLangChars(odocstream & os,
 			if (cit->pos() >= i + 1)
 				break;
 		}
-		// Stop here if there is a font attribute change.
+		// Stop here if there is a font attribute or encoding change.
 		if (found && cit != end && prev_font != cit->font())
 			break;
-		docstring const latex = rtrim(encoding.latexChar(next), "}");
-		docstring::size_type const j =
+		docstring const latex = encoding.latexChar(next);
+		docstring::size_type const b1 =
 					latex.find_first_of(from_ascii("{"));
-		if (j == docstring::npos) {
-			os << latex;
-			length += latex.length();
-		} else {
-			os << latex.substr(j + 1);
-			length += latex.substr(j + 1).length();
-		}
+		docstring::size_type const b2 =
+					latex.find_last_of(from_ascii("}"));
+		int const len = b2 - b1 - 1;
+		os << latex.substr(b1 + 1, len);
+		length += len;
 		++i;
 	}
-	// When the proper language is set, we are simply passed a code
-	// point, so we should not try to close the \textLANG command.
-	if (prefixIs(latex1, from_ascii("\\" + preamble))) {
-		os << '}';
-		++length;
-	}
+	os << '}';
+	++length;
 	return length;
 }
 
@@ -634,7 +611,7 @@ bool Paragraph::Private::isTextAt(string const & str, pos_type pos) const
 	pos_type const len = str.length();
 
 	// is the paragraph large enough?
-	if (pos + len > size())
+	if (pos + len > int(text_.size()))
 		return false;
 
 	// does the wanted text start at point?
@@ -685,7 +662,7 @@ void Paragraph::Private::latexInset(Buffer const & buf,
 				open_font = false;
 			}
 
-			if (running_font.family() == Font::TYPEWRITER_FAMILY)
+			if (running_font.fontInfo().family() == TYPEWRITER_FAMILY)
 				os << '~';
 
 			basefont = owner_->getLayoutFont(bparams, outerfont);
@@ -694,7 +671,6 @@ void Paragraph::Private::latexInset(Buffer const & buf,
 			if (runparams.moving_arg)
 				os << "\\protect ";
 
-			os << "\\\\\n";
 		}
 		texrow.newline();
 		texrow.start(owner_->id(), i + 1);
@@ -715,10 +691,10 @@ void Paragraph::Private::latexInset(Buffer const & buf,
 	bool close = false;
 	odocstream::pos_type const len = os.tellp();
 
-	if ((inset->lyxCode() == GRAPHICS_CODE
-	     || inset->lyxCode() == MATH_CODE
-	     || inset->lyxCode() == HYPERLINK_CODE)
-	    && running_font.isRightToLeft()) {
+	if (inset->forceLTR() 
+	    && running_font.isRightToLeft()
+		// ERT is an exception, it should be output with no decorations at all
+		&& inset->lyxCode() != ERT_CODE) {
 	    	if (running_font.language()->lang() == "farsi")
 			os << "\\beginL{}";
 		else
@@ -812,7 +788,7 @@ void Paragraph::Private::latexSpecialChar(
 	if (lyxrc.fontenc == "T1" && latexSpecialT1(c, os, i, column))
 		return;
 
-	if (running_font.family() == Font::TYPEWRITER_FAMILY
+	if (running_font.fontInfo().family() == TYPEWRITER_FAMILY
 		&& latexSpecialTypewriter(c, os, i, column))
 		return;
 
@@ -879,14 +855,14 @@ void Paragraph::Private::latexSpecialChar(
 	default:
 
 		// LyX, LaTeX etc.
-		if (latexSpecialPhrase(os, i, column))
+		if (latexSpecialPhrase(os, i, column, runparams))
 			return;
 
 		if (c == '\0')
 			return;
 
 		Encoding const & encoding = *(runparams.encoding);
-		if (i + 1 < size()) {
+		if (i + 1 < int(text_.size())) {
 			char_type next = text_[i + 1];
 			if (Encodings::isCombiningChar(next)) {
 				column += latexSurrogatePair(os, c, next, encoding) - 1;
@@ -894,14 +870,13 @@ void Paragraph::Private::latexSpecialChar(
 				break;
 			}
 		}
-		string preamble;
-		if (Encodings::isKnownLangChar(c, preamble)) {
-			column += knownLangChars(os, c, preamble, running_change,
-				encoding, i) - 1;
-			break;
-		}
+		string script;
 		docstring const latex = encoding.latexChar(c);
-		if (latex.length() > 1 && latex[latex.length() - 1] != '}') {
+		if (Encodings::isKnownScriptChar(c, script)
+		    && prefixIs(latex, from_ascii("\\" + script)))
+			column += writeScriptChars(os, latex,
+					running_change, encoding, i) - 1;
+		else if (latex.length() > 1 && latex[latex.length() - 1] != '}') {
 			// Prevent eating of a following
 			// space or command corruption by
 			// following characters
@@ -925,7 +900,7 @@ bool Paragraph::Private::latexSpecialT1(char_type const c, odocstream & os,
 		os.put(c);
 		// In T1 encoding, these characters exist
 		// but we should avoid ligatures
-		if (i + 1 > size() || text_[i + 1] != c)
+		if (i + 1 >= int(text_.size()) || text_[i + 1] != c)
 			return true;
 		os << "\\,{}";
 		column += 3;
@@ -947,7 +922,7 @@ bool Paragraph::Private::latexSpecialTypewriter(char_type const c, odocstream &
 {
 	switch (c) {
 	case '-':
-		if (i + 1 < size() && text_[i + 1] == '-') {
+		if (i + 1 < int(text_.size()) && text_[i + 1] == '-') {
 			// "--" in Typewriter mode -> "-{}-"
 			os << "-{}";
 			column += 2;
@@ -983,7 +958,7 @@ bool Paragraph::Private::latexSpecialTypewriter(char_type const c, odocstream &
 
 
 bool Paragraph::Private::latexSpecialPhrase(odocstream & os, pos_type & i,
-	unsigned int & column)
+	unsigned int & column, OutputParams & runparams)
 {
 	// FIXME: if we have "LaTeX" with a font
 	// change in the middle (before the 'T', then
@@ -994,6 +969,8 @@ bool Paragraph::Private::latexSpecialPhrase(odocstream & os, pos_type & i,
 	for (size_t pnr = 0; pnr < phrases_nr; ++pnr) {
 		if (!isTextAt(special_phrases[pnr].phrase, i))
 			continue;
+		if (runparams.moving_arg)
+			os << "\\protect";
 		os << special_phrases[pnr].macro;
 		i += special_phrases[pnr].phrase.length() - 1;
 		column += special_phrases[pnr].macro.length() - 1;
@@ -1033,7 +1010,7 @@ void Paragraph::Private::validate(LaTeXFeatures & features,
 	}
 
 	// then the contents
-	for (pos_type i = 0; i < size() ; ++i) {
+	for (pos_type i = 0; i < int(text_.size()) ; ++i) {
 		for (size_t pnr = 0; pnr < phrases_nr; ++pnr) {
 			if (!special_phrases[pnr].builtin
 			    && isTextAt(special_phrases[pnr].phrase, i)) {
@@ -1109,7 +1086,7 @@ void Paragraph::write(Buffer const & buf, ostream & os,
 
 	params().write(os);
 
-	Font font1(Font::ALL_INHERIT, bparams.language);
+	Font font1(inherit_font, bparams.language);
 
 	Change running_change = Change(Change::UNCHANGED);
 
@@ -1134,9 +1111,7 @@ void Paragraph::write(Buffer const & buf, ostream & os,
 		char_type const c = d->text_[i];
 		switch (c) {
 		case META_INSET:
-		{
-			Inset const * inset = getInset(i);
-			if (inset)
+			if (Inset const * inset = getInset(i)) {
 				if (inset->directWrite()) {
 					// international char, let it write
 					// code directly so it's shorter in
@@ -1150,8 +1125,8 @@ void Paragraph::write(Buffer const & buf, ostream & os,
 					os << "\n\\end_inset\n\n";
 					column = 0;
 				}
-		}
-		break;
+			}
+			break;
 		case '\\':
 			os << "\n\\backslash\n";
 			column = 0;
@@ -1171,11 +1146,9 @@ void Paragraph::write(Buffer const & buf, ostream & os,
 			}
 			// this check is to amend a bug. LyX sometimes
 			// inserts '\0' this could cause problems.
-			if (c != '\0') {
-				std::vector<char> tmp = ucs4_to_utf8(c);
-				tmp.push_back('\0');
-				os << &tmp[0];
-			} else
+			if (c != '\0')
+				os << to_utf8(docstring(1, c));
+			else
 				lyxerr << "ERROR (Paragraph::writeFile):"
 					" NULL char in structure." << endl;
 			++column;
@@ -1215,12 +1188,12 @@ void Paragraph::appendChar(char_type c, Font const & font,
 void Paragraph::appendString(docstring const & s, Font const & font,
 		Change const & change)
 {
-	size_t end = s.size();
+	pos_type end = s.size();
 	size_t oldsize = d->text_.size();
 	size_t newsize = oldsize + end;
 	size_t capacity = d->text_.capacity();
 	if (newsize >= capacity)
-		d->text_.reserve(std::max(capacity + 100, newsize));
+		d->text_.reserve(max(capacity + 100, newsize));
 
 	// when appending characters, no need to update tables
 	d->text_.append(s);
@@ -1230,7 +1203,8 @@ void Paragraph::appendString(docstring const & s, Font const & font,
 		// track change
 		d->changes_.insert(change, i);
 	}
-	d->fontlist_.setRange(oldsize, newsize, font);
+	d->fontlist_.set(oldsize, font);
+	d->fontlist_.set(newsize - 1, font);
 }
 
 
@@ -1274,6 +1248,13 @@ bool Paragraph::insetAllowed(InsetCode code)
 }
 
 
+void Paragraph::resetFonts(Font const & font)
+{
+	d->fontlist_.clear();
+	d->fontlist_.set(0, font);
+	d->fontlist_.set(d->text_.size() - 1, font);
+}
+
 // Gets uninstantiated font setting at position.
 Font const Paragraph::getFontSettings(BufferParams const & bparams,
 					 pos_type pos) const
@@ -1290,7 +1271,7 @@ Font const Paragraph::getFontSettings(BufferParams const & bparams,
 	if (pos == size() && !empty())
 		return getFontSettings(bparams, pos - 1);
 
-	return Font(Font::ALL_INHERIT, getParLanguage(bparams));
+	return Font(inherit_font, getParLanguage(bparams));
 }
 
 
@@ -1304,11 +1285,11 @@ FontSpan Paragraph::fontSpan(pos_type pos) const
 	for (; cit != end; ++cit) {
 		if (cit->pos() >= pos) {
 			if (pos >= beginOfBody())
-				return FontSpan(std::max(start, beginOfBody()),
+				return FontSpan(max(start, beginOfBody()),
 						cit->pos());
 			else
 				return FontSpan(start,
-						std::min(beginOfBody() - 1,
+						min(beginOfBody() - 1,
 							 cit->pos()));
 		}
 		start = cit->pos() + 1;
@@ -1327,7 +1308,7 @@ Font const Paragraph::getFirstFontSettings(BufferParams const & bparams) const
 	if (!empty() && !d->fontlist_.empty())
 		return d->fontlist_.begin()->font();
 
-	return Font(Font::ALL_INHERIT, bparams.language);
+	return Font(inherit_font, bparams.language);
 }
 
 
@@ -1345,12 +1326,12 @@ Font const Paragraph::getFont(BufferParams const & bparams, pos_type pos,
 
 	pos_type const body_pos = beginOfBody();
 	if (pos < body_pos)
-		font.realize(d->layout_->labelfont);
+		font.fontInfo().realize(d->layout_->labelfont);
 	else
-		font.realize(d->layout_->font);
+		font.fontInfo().realize(d->layout_->font);
 
-	font.realize(outerfont);
-	font.realize(bparams.getFont());
+	font.fontInfo().realize(outerfont.fontInfo());
+	font.fontInfo().realize(bparams.getFont().fontInfo());
 
 	return font;
 }
@@ -1359,28 +1340,26 @@ Font const Paragraph::getFont(BufferParams const & bparams, pos_type pos,
 Font const Paragraph::getLabelFont
 	(BufferParams const & bparams, Font const & outerfont) const
 {
-	Font tmpfont = layout()->labelfont;
-	tmpfont.setLanguage(getParLanguage(bparams));
-	tmpfont.realize(outerfont);
-	tmpfont.realize(bparams.getFont());
-	return tmpfont;
+	FontInfo tmpfont = layout()->labelfont;
+	tmpfont.realize(outerfont.fontInfo());
+	tmpfont.realize(bparams.getFont().fontInfo());
+	return Font(tmpfont, getParLanguage(bparams));
 }
 
 
 Font const Paragraph::getLayoutFont
 	(BufferParams const & bparams, Font const & outerfont) const
 {
-	Font tmpfont = layout()->font;
-	tmpfont.setLanguage(getParLanguage(bparams));
-	tmpfont.realize(outerfont);
-	tmpfont.realize(bparams.getFont());
-	return tmpfont;
+	FontInfo tmpfont = layout()->font;
+	tmpfont.realize(outerfont.fontInfo());
+	tmpfont.realize(bparams.getFont().fontInfo());
+	return Font(tmpfont, getParLanguage(bparams));
 }
 
 
 /// Returns the height of the highest font in range
-Font_size Paragraph::highestFontInRange
-	(pos_type startpos, pos_type endpos, Font_size def_size) const
+FontSize Paragraph::highestFontInRange
+	(pos_type startpos, pos_type endpos, FontSize def_size) const
 {
 	return d->fontlist_.highestInRange(startpos, endpos, def_size);
 }
@@ -1521,7 +1500,7 @@ void Paragraph::setLabelWidthString(docstring const & s)
 docstring const Paragraph::translateIfPossible(docstring const & s,
 		BufferParams const & bparams) const
 {
-	if (!support::isAscii(s) || s.empty()) {
+	if (!isAscii(s) || s.empty()) {
 		// This must be a user defined layout. We cannot translate
 		// this, since gettext accepts only ascii keys.
 		return s;
@@ -1622,17 +1601,6 @@ void Paragraph::setBeginOfBody()
 }
 
 
-InsetBibitem * Paragraph::bibitem() const
-{
-	if (!d->insetlist_.empty()) {
-		Inset * inset = d->insetlist_.begin()->inset;
-		if (inset->lyxCode() == BIBITEM_CODE)
-			return static_cast<InsetBibitem *>(inset);
-	}
-	return 0;
-}
-
-
 bool Paragraph::forceDefaultParagraphs() const
 {
 	return inInset() && inInset()->forceDefaultParagraphs(0);
@@ -1826,7 +1794,7 @@ bool Paragraph::latex(Buffer const & buf,
 				odocstream & os, TexRow & texrow,
 				OutputParams const & runparams) const
 {
-	LYXERR(Debug::LATEX) << "SimpleTeXOnePar...     " << this << endl;
+	LYXERR(Debug::LATEX, "SimpleTeXOnePar...     " << this);
 
 	bool return_value = false;
 
@@ -1959,13 +1927,22 @@ bool Paragraph::latex(Buffer const & buf,
 			open_font = false;
 		}
 
+		// close babel's font environment before opening CJK.
+		if (!running_font.language()->babel().empty() &&
+		    font.language()->encoding()->package() == Encoding::CJK) {
+				string end_tag = subst(lyxrc.language_command_end,
+							"$$lang",
+							running_font.language()->babel());
+				os << from_ascii(end_tag);
+				column += end_tag.length();
+		}
+
 		// Switch file encoding if necessary (and allowed)
 		if (!runparams.verbatim && 
-		    runparams.encoding->package() == Encoding::inputenc &&
-		    font.language()->encoding()->package() == Encoding::inputenc) {
-			std::pair<bool, int> const enc_switch = switchEncoding(os, bparams,
-					runparams.moving_arg, *(runparams.encoding),
-					*(font.language()->encoding()));
+		    runparams.encoding->package() == Encoding::none &&
+		    font.language()->encoding()->package() == Encoding::none) {
+			pair<bool, int> const enc_switch = switchEncoding(os, bparams,
+					runparams, *(font.language()->encoding()));
 			if (enc_switch.first) {
 				column += enc_switch.second;
 				runparams.encoding = font.language()->encoding();
@@ -2003,7 +1980,7 @@ bool Paragraph::latex(Buffer const & buf,
 			// style->pass_thru is false.
 			if (i != body_pos - 1) {
 				if (d->simpleTeXBlanks(
-						*(runparams.encoding), os, texrow,
+						runparams, os, texrow,
 						i, column, font, *style)) {
 					// A surrogate pair was output. We
 					// must not call latexSpecialChar
@@ -2028,9 +2005,24 @@ bool Paragraph::latex(Buffer const & buf,
 					texrow, rp, running_font,
 					basefont, outerfont, open_font,
 					runningChange, *style, i, column);
-		else
-			d->latexSpecialChar(os, rp, running_font, runningChange,
-				*style, i, column);
+		else {
+			try {
+				d->latexSpecialChar(os, rp, running_font, runningChange,
+					*style, i, column);
+			} catch (EncodingException & e) {
+				if (runparams.dryrun) {
+					os << "<" << _("LyX Warning: ")
+					   << _("uncodable character") << " '";
+					os.put(c);
+					os << "'>";
+				} else {
+					// add location information and throw again.
+					e.par_id = id();
+					e.pos = i;
+					throw(e);
+				}
+			}
+		}
 
 		// Set the encoding to that returned from simpleTeXSpecialChars (see
 		// comment for encoding member in OutputParams.h)
@@ -2071,59 +2063,15 @@ bool Paragraph::latex(Buffer const & buf,
 					  runparams.moving_arg);
 	}
 
-	LYXERR(Debug::LATEX) << "SimpleTeXOnePar...done " << this << endl;
+	LYXERR(Debug::LATEX, "SimpleTeXOnePar...done " << this);
 	return return_value;
 }
 
 
-namespace {
-
-enum PAR_TAG {
-	PAR_NONE=0,
-	TT = 1,
-	SF = 2,
-	BF = 4,
-	IT = 8,
-	SL = 16,
-	EM = 32
-};
-
-
-string tag_name(PAR_TAG const & pt) {
-	switch (pt) {
-	case PAR_NONE: return "!-- --";
-	case TT: return "tt";
-	case SF: return "sf";
-	case BF: return "bf";
-	case IT: return "it";
-	case SL: return "sl";
-	case EM: return "em";
-	}
-	return "";
-}
-
-
-inline
-void operator|=(PAR_TAG & p1, PAR_TAG const & p2)
-{
-	p1 = static_cast<PAR_TAG>(p1 | p2);
-}
-
-
-inline
-void reset(PAR_TAG & p1, PAR_TAG const & p2)
-{
-	p1 = static_cast<PAR_TAG>(p1 & ~p2);
-}
-
-} // anon
-
-
 bool Paragraph::emptyTag() const
 {
 	for (pos_type i = 0; i < size(); ++i) {
-		if (isInset(i)) {
-			Inset const * inset = getInset(i);
+		if (Inset const * inset = getInset(i)) {
 			InsetCode lyx_code = inset->lyxCode();
 			if (lyx_code != TOC_CODE &&
 			    lyx_code != INCLUDE_CODE &&
@@ -2147,8 +2095,7 @@ bool Paragraph::emptyTag() const
 string Paragraph::getID(Buffer const & buf, OutputParams const & runparams) const
 {
 	for (pos_type i = 0; i < size(); ++i) {
-		if (isInset(i)) {
-			Inset const * inset = getInset(i);
+		if (Inset const * inset = getInset(i)) {
 			InsetCode lyx_code = inset->lyxCode();
 			if (lyx_code == LABEL_CODE) {
 				InsetLabel const * const il = static_cast<InsetLabel const *>(inset);
@@ -2156,7 +2103,6 @@ string Paragraph::getID(Buffer const & buf, OutputParams const & runparams) cons
 				return "id='" + to_utf8(sgml::cleanID(buf, runparams, id)) + "'";
 			}
 		}
-
 	}
 	return string();
 }
@@ -2166,8 +2112,7 @@ pos_type Paragraph::getFirstWord(Buffer const & buf, odocstream & os, OutputPara
 {
 	pos_type i;
 	for (i = 0; i < size(); ++i) {
-		if (isInset(i)) {
-			Inset const * inset = getInset(i);
+		if (Inset const * inset = getInset(i)) {
 			inset->docbook(buf, os, runparams);
 		} else {
 			char_type c = d->text_[i];
@@ -2180,13 +2125,13 @@ pos_type Paragraph::getFirstWord(Buffer const & buf, odocstream & os, OutputPara
 }
 
 
-bool Paragraph::onlyText(Buffer const & buf, Font const & outerfont, pos_type initial) const
+bool Paragraph::Private::onlyText(Buffer const & buf, Font const & outerfont, pos_type initial) const
 {
 	Font font_old;
-
-	for (pos_type i = initial; i < size(); ++i) {
-		Font font = getFont(buf.params(), i, outerfont);
-		if (isInset(i))
+	pos_type size = text_.size();
+	for (pos_type i = initial; i < size; ++i) {
+		Font font = owner_->getFont(buf.params(), i, outerfont);
+		if (text_[i] == META_INSET)
 			return false;
 		if (i != initial && font != font_old)
 			return false;
@@ -2206,10 +2151,10 @@ void Paragraph::simpleDocBookOnePar(Buffer const & buf,
 	bool emph_flag = false;
 
 	LayoutPtr const & style = layout();
-	Font font_old =
+	FontInfo font_old =
 		style->labeltype == LABEL_MANUAL ? style->labelfont : style->font;
 
-	if (style->pass_thru && !onlyText(buf, outerfont, initial))
+	if (style->pass_thru && !d->onlyText(buf, outerfont, initial))
 		os << "]]>";
 
 	// parsing main loop
@@ -2217,8 +2162,8 @@ void Paragraph::simpleDocBookOnePar(Buffer const & buf,
 		Font font = getFont(buf.params(), i, outerfont);
 
 		// handle <emphasis> tag
-		if (font_old.emph() != font.emph()) {
-			if (font.emph() == Font::ON) {
+		if (font_old.emph() != font.fontInfo().emph()) {
+			if (font.fontInfo().emph() == FONT_ON) {
 				os << "<emphasis>";
 				emph_flag = true;
 			} else if (i != initial) {
@@ -2227,8 +2172,7 @@ void Paragraph::simpleDocBookOnePar(Buffer const & buf,
 			}
 		}
 
-		if (isInset(i)) {
-			Inset const * inset = getInset(i);
+		if (Inset const * inset = getInset(i)) {
 			inset->docbook(buf, os, runparams);
 		} else {
 			char_type c = d->text_[i];
@@ -2238,7 +2182,7 @@ void Paragraph::simpleDocBookOnePar(Buffer const & buf,
 			else
 				os << sgml::escapeChar(c);
 		}
-		font_old = font;
+		font_old = font.fontInfo();
 	}
 
 	if (emph_flag) {
@@ -2247,43 +2191,42 @@ void Paragraph::simpleDocBookOnePar(Buffer const & buf,
 
 	if (style->free_spacing)
 		os << '\n';
-	if (style->pass_thru && !onlyText(buf, outerfont, initial))
+	if (style->pass_thru && !d->onlyText(buf, outerfont, initial))
 		os << "<![CDATA[";
 }
 
 
 bool Paragraph::isHfill(pos_type pos) const
 {
-	return isInset(pos)
-		&& getInset(pos)->lyxCode() == HFILL_CODE;
+	Inset const * inset = getInset(pos);
+	return inset && inset->lyxCode() == HFILL_CODE;
 }
 
 
 bool Paragraph::isNewline(pos_type pos) const
 {
-	return isInset(pos)
-		&& getInset(pos)->lyxCode() == NEWLINE_CODE;
+	Inset const * inset = getInset(pos);
+	return inset && inset->lyxCode() == NEWLINE_CODE;
 }
 
 
 bool Paragraph::isLineSeparator(pos_type pos) const
 {
 	char_type const c = d->text_[pos];
-	return isLineSeparatorChar(c)
-		|| (c == META_INSET && getInset(pos) &&
-		getInset(pos)->isLineSeparator());
+	if (isLineSeparatorChar(c))
+		return true;
+	Inset const * inset = getInset(pos);
+	return inset && inset->isLineSeparator();
 }
 
 
 /// Used by the spellchecker
 bool Paragraph::isLetter(pos_type pos) const
 {
-	if (isInset(pos))
-		return getInset(pos)->isLetter();
-	else {
-		char_type const c = d->text_[pos];
-		return isLetterChar(c) || isDigit(c);
-	}
+	if (Inset const * inset = getInset(pos))
+		return inset->isLetter();
+	char_type const c = d->text_[pos];
+	return isLetterChar(c) || isDigit(c);
 }
 
 
@@ -2396,8 +2339,7 @@ Inset * Paragraph::inInset() const
 
 InsetCode Paragraph::ownerCode() const
 {
-	return d->inset_owner_ ?
-		d->inset_owner_->lyxCode() : NO_CODE;
+	return d->inset_owner_ ? d->inset_owner_->lyxCode() : NO_CODE;
 }
 
 
@@ -2417,10 +2359,7 @@ bool Paragraph::isFreeSpacing() const
 {
 	if (layout()->free_spacing)
 		return true;
-
-	// for now we just need this, later should we need this in some
-	// other way we can always add a function to Inset too.
-	return ownerCode() == ERT_CODE || ownerCode() == LISTINGS_CODE;
+	return d->inset_owner_ && d->inset_owner_->isFreeSpacing();
 }
 
 
@@ -2428,7 +2367,7 @@ bool Paragraph::allowEmpty() const
 {
 	if (layout()->keepempty)
 		return true;
-	return ownerCode() == ERT_CODE || ownerCode() == LISTINGS_CODE;
+	return d->inset_owner_ && d->inset_owner_->allowEmpty();
 }
 
 
@@ -2580,31 +2519,20 @@ Inset * Paragraph::releaseInset(pos_type pos)
 
 Inset * Paragraph::getInset(pos_type pos)
 {
-	return d->insetlist_.get(pos);
+	return (pos < pos_type(d->text_.size()) && d->text_[pos] == META_INSET)
+		 ? d->insetlist_.get(pos) : 0;
 }
 
 
 Inset const * Paragraph::getInset(pos_type pos) const
 {
-	return d->insetlist_.get(pos);
-}
-
-
-int Paragraph::numberOfOptArgs() const
-{
-	int num = 0;
-	InsetList::const_iterator it = insetList().begin();
-	InsetList::const_iterator end = insetList().end();
-	for (; it != end ; ++it) {
-		if (it->inset->lyxCode() == OPTARG_CODE)
-			++num;
-	}
-	return num;
+	return (pos < pos_type(d->text_.size()) && d->text_[pos] == META_INSET)
+		 ? d->insetlist_.get(pos) : 0;
 }
 
 
 void Paragraph::changeCase(BufferParams const & bparams, pos_type pos,
-		pos_type right, TextCase action)
+		pos_type & right, TextCase action)
 {
 	// process sequences of modified characters; in change
 	// tracking mode, this approach results in much better
@@ -2620,7 +2548,7 @@ void Paragraph::changeCase(BufferParams const & bparams, pos_type pos,
 		char_type newChar = oldChar;
 
 		// ignore insets and don't play with deleted text!
-		if (isInset(pos) && !isDeleted(pos)) {
+		if (oldChar != META_INSET && !isDeleted(pos)) {
 			switch (action) {
 				case text_lowercase:
 					newChar = lowercase(oldChar);