Sort the language nesting mess with polyglossia

[lyx.git] / src / Paragraph.cpp
diff --git a/src/Paragraph.cpp b/src/Paragraph.cpp

index 75f0171bf1a4863b7059e57b1784462541ede7f3..94cf9313d53d6eebc9e901114221cadf0ab56db6 100644 (file)
--- a/src/Paragraph.cpp
+++ b/src/Paragraph.cpp
@@ -40,6 +40,7 @@
  #include "ParagraphParameters.h"
  #include "SpellChecker.h"
  #include "sgml.h"
+#include "texstream.h"
  #include "TextClass.h"
  #include "TexRow.h"
  #include "Text.h"
@@ -60,6 +61,7 @@
  #include "support/lstrings.h"
  #include "support/textutils.h"
  
+#include <atomic>
  #include <sstream>
  #include <vector>
  
@@ -96,7 +98,7 @@ public:
         ///
         void result(SpellChecker::Result r) { result_ = r; }
         ///
-       bool inside(pos_type pos) const { return range_.inside(pos); }
+       bool contains(pos_type pos) const { return range_.contains(pos); }
         ///
         bool covered(FontSpan const & r) const
         {
@@ -104,8 +106,9 @@ public:
                 // 2. last of new range inside current range or
                 // 3. first of current range inside new range or
                 // 4. last of current range inside new range
-               return range_.inside(r.first) || range_.inside(r.last) ||
-                       r.inside(range_.first) || r.inside(range_.last);
+               //FIXME: is this the same as !range_.intersect(r).empty() ?
+               return range_.contains(r.first) || range_.contains(r.last) ||
+                       r.contains(range_.first) || r.contains(range_.last);
         }
         ///
         void shift(pos_type pos, int offset)
@@ -191,7 +194,7 @@ public:
                 RangesIterator et = ranges_.end();
                 RangesIterator it = ranges_.begin();
                 for (; it != et; ++it) {
-                       if(it->inside(pos)) {
+                       if(it->contains(pos)) {
                                 return it->result();
                         }
                 }
@@ -205,7 +208,7 @@ public:
                 RangesIterator et = ranges_.end();
                 RangesIterator it = ranges_.begin();
                 for (; it != et; ++it) {
-                       if(it->inside(pos)) {
+                       if(it->contains(pos)) {
                                 return it->range();
                         }
                 }
@@ -281,6 +284,11 @@ private:
  
  class Paragraph::Private
  {
+       // Enforce our own "copy" constructor
+       Private(Private const &) = delete;
+       Private & operator=(Private const &) = delete;
+       // Unique ID generator
+       static int make_id();
  public:
         ///
         Private(Paragraph * owner, Layout const & layout);
@@ -358,19 +366,6 @@ public:
                 otexstream & os,
                 pos_type i,
                 unsigned int & column);
-       ///
-       bool latexSpecialTypewriter(
-               char_type const c,
-               otexstream & os,
-               pos_type i,
-               unsigned int & column);
-       ///
-       bool latexSpecialPhrase(
-               otexstream & os,
-               pos_type & i,
-               pos_type end_pos,
-               unsigned int & column,
-               OutputParams const & runparams);
  
         ///
         void validate(LaTeXFeatures & features) const;
@@ -379,9 +374,6 @@ public:
         bool onlyText(Buffer const & buf, Font const & outerfont,
                       pos_type initial) const;
  
-       /// match a string against a particular point in the paragraph
-       bool isTextAt(string const & str, pos_type pos) const;
-
         /// a vector of speller skip positions
         typedef vector<FontSpan> SkipPositions;
         typedef SkipPositions::const_iterator SkipPositionsIterator;
@@ -509,26 +501,6 @@ public:
  };
  
  
-namespace {
-
-struct special_phrase {
-       string phrase;
-       docstring macro;
-       bool builtin;
-};
-
-special_phrase const special_phrases[] = {
-       { "LyX", from_ascii("\\LyX{}"), false },
-       { "TeX", from_ascii("\\TeX{}"), true },
-       { "LaTeX2e", from_ascii("\\LaTeXe{}"), true },
-       { "LaTeX", from_ascii("\\LaTeX{}"), true },
-};
-
-size_t const phrases_nr = sizeof(special_phrases)/sizeof(special_phrase);
-
-} // namespace anon
-
-
  Paragraph::Private::Private(Paragraph * owner, Layout const & layout)
         : owner_(owner), inset_owner_(0), id_(-1), begin_of_body_(0), layout_(&layout)
  {
@@ -536,33 +508,37 @@ Paragraph::Private::Private(Paragraph * owner, Layout const & layout)
  }
  
  
-// Initialization of the counter for the paragraph id's,
-//
-// FIXME: There should be a more intelligent way to generate and use the
-// paragraph ids per buffer instead a global static counter for all InsetText
-// in the running program.
-static int paragraph_id = -1;
+//static
+int Paragraph::Private::make_id()
+{
+       // The id is unique per session across buffers because it is used in
+       // LFUN_PARAGRAPH_GOTO to switch to a different buffer, for instance in the
+       // outliner.
+       // (thread-safe)
+       static atomic_uint next_id(0);
+       return next_id++;
+}
+
  
  Paragraph::Private::Private(Private const & p, Paragraph * owner)
         : owner_(owner), inset_owner_(p.inset_owner_), fontlist_(p.fontlist_),
+         id_(make_id()),
           params_(p.params_), changes_(p.changes_), insetlist_(p.insetlist_),
           begin_of_body_(p.begin_of_body_), text_(p.text_), words_(p.words_),
           layout_(p.layout_)
  {
-       id_ = ++paragraph_id;
         requestSpellCheck(p.text_.size());
  }
  
  
  Paragraph::Private::Private(Private const & p, Paragraph * owner,
         pos_type beg, pos_type end)
-       : owner_(owner), inset_owner_(p.inset_owner_),
+       : owner_(owner), inset_owner_(p.inset_owner_), id_(make_id()),
           params_(p.params_), changes_(p.changes_),
           insetlist_(p.insetlist_, beg, end),
           begin_of_body_(p.begin_of_body_), words_(p.words_),
           layout_(p.layout_)
  {
-       id_ = ++paragraph_id;
         if (beg >= pos_type(p.text_.size()))
                 return;
         text_ = p.text_.substr(beg, end - beg);
@@ -591,6 +567,18 @@ void Paragraph::addChangesToToc(DocIterator const & cdit,
  }
  
  
+void Paragraph::addChangesToBuffer(Buffer const & buf) const
+{
+       d->changes_.updateBuffer(buf);
+}
+
+
+bool Paragraph::isChangeUpdateRequired() const
+{
+       return d->changes_.isUpdateRequired();
+}
+
+
  bool Paragraph::isDeleted(pos_type start, pos_type end) const
  {
         LASSERT(start >= 0 && start <= size(), return false);
@@ -615,7 +603,7 @@ bool Paragraph::isMergedOnEndOfParDeletion(bool trackChanges) const
         if (!trackChanges)
                 return true;
  
-       Change const change = d->changes_.lookup(size());
+       Change const & change = d->changes_.lookup(size());
         return change.inserted() && change.currentAuthor();
  }
  
@@ -946,10 +934,13 @@ int Paragraph::Private::writeScriptChars(otexstream & os,
  {
         // FIXME: modifying i here is not very nice...
  
-       // We only arrive here when a proper language for character text_[i] has
-       // not been specified (i.e., it could not be translated in the current
-       // latex encoding) or its latex translation has been forced, and it
-       // belongs to a known script.
+       // We only arrive here when character text_[i] could not be translated
+       // into the current latex encoding (or its latex translation has been forced,)
+       // and it belongs to a known script.
+       // TODO: We need \textcyr and \textgreek wrappers also for characters
+       //       that can be encoded in the "LaTeX encoding" but not in the
+       //       current *font encoding*.
+       //       (See #9681 for details and test)
         // Parameter ltx contains the latex translation of text_[i] as specified
         // in the unicodesymbols file and is something like "\textXXX{<spec>}".
         // The latex macro name "textXXX" specifies the script to which text_[i]
@@ -965,6 +956,10 @@ int Paragraph::Private::writeScriptChars(otexstream & os,
         bool closing_brace = true;
         if (script == "textgreek" && encoding.latexName() == "iso-8859-7") {
                 // Correct encoding is being used, so we can avoid \textgreek.
+               // TODO: wrong test: we need to check the *font encoding*
+               //       (i.e. the active language and its FontEncoding tag)
+               //       instead of the LaTeX *input encoding*!
+               //       See #9637 for details and test-cases.
                 pos = brace1 + 1;
                 length -= pos;
                 closing_brace = false;
@@ -1011,26 +1006,6 @@ int Paragraph::Private::writeScriptChars(otexstream & os,
  }
  
  
-bool Paragraph::Private::isTextAt(string const & str, pos_type pos) const
-{
-       pos_type const len = str.length();
-
-       // is the paragraph large enough?
-       if (pos + len > int(text_.size()))
-               return false;
-
-       // does the wanted text start at point?
-       for (string::size_type i = 0; i < str.length(); ++i) {
-               // Caution: direct comparison of characters works only
-               // because str is pure ASCII.
-               if (str[i] != text_[pos + i])
-                       return false;
-       }
-
-       return fontlist_.hasChangeInRange(pos, len);
-}
-
-
  void Paragraph::Private::latexInset(BufferParams const & bparams,
                                     otexstream & os,
                                     OutputParams & runparams,
@@ -1190,7 +1165,9 @@ void Paragraph::Private::latexSpecialChar(otexstream & os,
         char_type const c = (runparams.use_polyglossia) ?
                 owner_->getUChar(bparams, i) : text_[i];
  
-       if (style.pass_thru || runparams.pass_thru) {
+       if (style.pass_thru || runparams.pass_thru
+           || contains(style.pass_thru_chars, c)
+           || contains(runparams.pass_thru_chars, c)) {
                 if (c != '\0') {
                         Encoding const * const enc = runparams.encoding;
                         if (enc && !enc->encodable(c))
@@ -1205,16 +1182,11 @@ void Paragraph::Private::latexSpecialChar(otexstream & os,
                 return;
         // If T1 font encoding is used, use the special
         // characters it provides.
-       // NOTE: some languages reset the font encoding
-       // internally
+       // NOTE: Some languages reset the font encoding internally to a
+       //       non-standard font encoding. If we are using such a language,
+       //       we do not output special T1 chars.
         if (!runparams.inIPA && !running_font.language()->internalFontEncoding()
-           && lyxrc.fontenc == "T1" && latexSpecialT1(c, os, i, column))
-               return;
-
-       // \tt font needs special treatment
-       if (!runparams.inIPA
-            && running_font.fontInfo().family() == TYPEWRITER_FAMILY
-            && latexSpecialTypewriter(c, os, i, column))
+           && bparams.font_encoding() == "T1" && latexSpecialT1(c, os, i, column))
                 return;
  
         // Otherwise, we use what LaTeX provides us.
@@ -1237,6 +1209,16 @@ void Paragraph::Private::latexSpecialChar(otexstream & os,
                 break;
         case '-':
                 os << '-';
+               if (i + 1 < static_cast<pos_type>(text_.size()) &&
+                   (end_pos == -1 || i + 1 < end_pos) &&
+                   text_[i+1] == '-') {
+                       // Prevent "--" becoming an endash and "---" becoming
+                       // an emdash.
+                       // Within \ttfamily, "--" is merged to "-" (no endash)
+                       // so we avoid this rather irritating ligature as well
+                       os << "{}";
+                       column += 2;
+               }
                 break;
         case '\"':
                 os << "\\char`\\\"{}";
@@ -1280,10 +1262,6 @@ void Paragraph::Private::latexSpecialChar(otexstream & os,
                 break;
  
         default:
-               // LyX, LaTeX etc.
-               if (latexSpecialPhrase(os, i, end_pos, column, runparams))
-                       return;
-
                 if (c == '\0')
                         return;
  
@@ -1396,55 +1374,6 @@ bool Paragraph::Private::latexSpecialT3(char_type const c, otexstream & os,
  }
  
  
-bool Paragraph::Private::latexSpecialTypewriter(char_type const c, otexstream & os,
-       pos_type i, unsigned int & column)
-{
-       switch (c) {
-       case '-':
-               // within \ttfamily, "--" is merged to "-" (no endash)
-               // so we avoid this rather irritating ligature
-               if (i + 1 < int(text_.size()) && text_[i + 1] == '-') {
-                       os << "-{}";
-                       column += 2;
-               } else
-                       os << '-';
-               return true;
-
-       // everything else has to be checked separately
-       // (depending on the encoding)
-       default:
-               return false;
-       }
-}
-
-
-/// \param end_pos
-///   If [start_pos, end_pos) does not include entirely the special phrase, then
-///   do not apply the macro transformation.
-bool Paragraph::Private::latexSpecialPhrase(otexstream & os, pos_type & i, pos_type end_pos,
-       unsigned int & column, OutputParams const & runparams)
-{
-       // FIXME: if we have "LaTeX" with a font
-       // change in the middle (before the 'T', then
-       // the "TeX" part is still special cased.
-       // Really we should only operate this on
-       // "words" for some definition of word
-
-       for (size_t pnr = 0; pnr < phrases_nr; ++pnr) {
-               if (!isTextAt(special_phrases[pnr].phrase, i)
-                   || (end_pos != -1 && i + int(special_phrases[pnr].phrase.size()) > end_pos))
-                       continue;
-               if (runparams.moving_arg)
-                       os << "\\protect";
-               os << special_phrases[pnr].macro;
-               i += special_phrases[pnr].phrase.length() - 1;
-               column += special_phrases[pnr].macro.length() - 1;
-               return true;
-       }
-       return false;
-}
-
-
  void Paragraph::Private::validate(LaTeXFeatures & features) const
  {
         if (layout_->inpreamble && inset_owner_) {
@@ -1453,13 +1382,12 @@ void Paragraph::Private::validate(LaTeXFeatures & features) const
                 BufferParams const & bp = features.runparams().is_child
                         ? buf.masterParams() : buf.params();
                 Font f;
-               TexRow texrow;
                 // Using a string stream here circumvents the encoding
                 // switching machinery of odocstream. Therefore the
                 // output is wrong if this paragraph contains content
                 // that needs to switch encoding.
                 odocstringstream ods;
-               otexstream os(ods, texrow);
+               otexstream os(ods);
                 if (is_command) {
                         os << '\\' << from_ascii(layout_->latexname());
                         // we have to provide all the optional arguments here, even though
@@ -1485,7 +1413,7 @@ void Paragraph::Private::validate(LaTeXFeatures & features) const
                                 }
                         }
                         string const snippet = to_utf8(ods.str());
-                       features.addPreambleSnippet(snippet);
+                       features.addPreambleSnippet(snippet, true);
                 }
         }
  
@@ -1522,13 +1450,6 @@ void Paragraph::Private::validate(LaTeXFeatures & features) const
  
         // then the contents
         for (pos_type i = 0; i < int(text_.size()) ; ++i) {
-               for (size_t pnr = 0; pnr < phrases_nr; ++pnr) {
-                       if (!special_phrases[pnr].builtin
-                           && isTextAt(special_phrases[pnr].phrase, i)) {
-                               features.require(special_phrases[pnr].phrase);
-                               break;
-                       }
-               }
                 BufferEncodings::validate(text_[i], features);
         }
  }
@@ -1635,7 +1556,7 @@ void Paragraph::write(ostream & os, BufferParams const & bparams,
         int column = 0;
         for (pos_type i = 0; i <= size(); ++i) {
  
-               Change const change = lookupChange(i);
+               Change const & change = lookupChange(i);
                 if (change != running_change)
                         flushString(os, write_buffer);
                 Changes::lyxMarkChange(os, bparams, column, running_change, change);
@@ -1671,6 +1592,9 @@ void Paragraph::write(ostream & os, BufferParams const & bparams,
                                         os << "\n\\end_inset\n\n";
                                         column = 0;
                                 }
+                               // FIXME This can be removed again once the mystery
+                               // crash has been resolved.
+                               os << flush;
                         }
                         break;
                 case '\\':
@@ -1706,6 +1630,9 @@ void Paragraph::write(ostream & os, BufferParams const & bparams,
  
         flushString(os, write_buffer);
         os << "\n\\end_layout\n";
+       // FIXME This can be removed again once the mystery
+       // crash has been resolved.
+       os << flush;
  }
  
  
@@ -1822,7 +1749,10 @@ Font const & Paragraph::getFontSettings(BufferParams const & bparams,
  
  FontSpan Paragraph::fontSpan(pos_type pos) const
  {
-       LBUFERR(pos < size());
+       LBUFERR(pos <= size());
+
+       if (pos == size())
+               return FontSpan(pos, pos);
  
         pos_type start = 0;
         FontList::const_iterator cit = d->fontlist_.begin();
@@ -1911,18 +1841,10 @@ Font const Paragraph::getLayoutFont
  }
  
  
-/// Returns the height of the highest font in range
-FontSize Paragraph::highestFontInRange
-       (pos_type startpos, pos_type endpos, FontSize def_size) const
-{
-       return d->fontlist_.highestInRange(startpos, endpos, def_size);
-}
-
-
  char_type Paragraph::getUChar(BufferParams const & bparams, pos_type pos) const
  {
         char_type c = d->text_[pos];
-       if (!lyxrc.rtl_support || !getFontSettings(bparams, pos).isRightToLeft())
+       if (!getFontSettings(bparams, pos).isRightToLeft())
                 return c;
  
         // FIXME: The arabic special casing is due to the difference of arabic
@@ -2090,7 +2012,7 @@ docstring Paragraph::expandParagraphLabel(Layout const & layout,
                         docstring parent(fmt, i + 1, j - i - 1);
                         docstring label = from_ascii("??");
                         if (tclass.hasLayout(parent))
-                               docstring label = expandParagraphLabel(tclass[parent], bparams,
+                               label = expandParagraphLabel(tclass[parent], bparams,
                                                       process_appendix);
                         fmt = docstring(fmt, 0, i) + label
                                 + docstring(fmt, j + 1, docstring::npos);
@@ -2134,14 +2056,12 @@ void Paragraph::setBeginOfBody()
         pos_type end = size();
         if (i < end && !(isNewline(i) || isEnvSeparator(i))) {
                 ++i;
-               char_type previous_char = 0;
-               char_type temp = 0;
                 if (i < end) {
-                       previous_char = d->text_[i];
+                       char_type previous_char = d->text_[i];
                         if (!(isNewline(i) || isEnvSeparator(i))) {
                                 ++i;
                                 while (i < end && previous_char != ' ') {
-                                       temp = d->text_[i];
+                                       char_type temp = d->text_[i];
                                         if (isNewline(i) || isEnvSeparator(i))
                                                 break;
                                         ++i;
@@ -2197,36 +2117,37 @@ string correction(string const & orig)
  }
  
  
-string const corrected_env(string const & suffix, string const & env,
-       InsetCode code, bool const lastpar)
+bool corrected_env(otexstream & os, string const & suffix, string const & env,
+       InsetCode code, bool const lastpar, int & col)
  {
-       string output = suffix + "{";
+       string macro = suffix + "{";
         if (noTrivlistCentering(code)) {
                 if (lastpar) {
                         // the last paragraph in non-trivlist-aligned
                         // context is special (to avoid unwanted whitespace)
-                       if (suffix == "\\begin")
-                               return "\\" + correction(env) + "{}";
-                       return string();
+                       if (suffix == "\\begin") {
+                               macro = "\\" + correction(env) + "{}";
+                               os << from_ascii(macro);
+                               col += macro.size();
+                               return true;
+                       }
+                       return false;
                 }
-               output += correction(env);
+               macro += correction(env);
         } else
-               output += env;
-       output += "}";
-       if (suffix == "\\begin")
-               output += "\n";
-       return output;
-}
-
-
-void adjust_column(string const & str, int & column)
-{
-       if (!contains(str, "\n"))
-               column += str.size();
-       else {
-               string tmp;
-               column = rsplit(str, tmp, '\n').size();
+               macro += env;
+       macro += "}";
+       if (suffix == "\\par\\end") {
+               os << breakln;
+               col = 0;
+       }
+       os << from_ascii(macro);
+       col += macro.size();
+       if (suffix == "\\begin") {
+               os << breakln;
+               col = 0;
         }
+       return true;
  }
  
  } // namespace anon
@@ -2237,8 +2158,12 @@ int Paragraph::Private::startTeXParParams(BufferParams const & bparams,
  {
         int column = 0;
  
-       if (params_.noindent() && !layout_->pass_thru
-           && (layout_->toggle_indent != ITOGGLE_NEVER)) {
+       bool canindent =
+               (bparams.paragraph_separation == BufferParams::ParagraphIndentSeparation) ?
+                       (layout_->toggle_indent != ITOGGLE_NEVER) :
+                       (layout_->toggle_indent == ITOGGLE_ALWAYS);
+
+       if (canindent && params_.noindent() && !layout_->pass_thru) {
                 os << "\\noindent ";
                 column += 10;
         }
@@ -2277,28 +2202,19 @@ int Paragraph::Private::startTeXParParams(BufferParams const & bparams,
         case LYX_ALIGN_DECIMAL:
                 break;
         case LYX_ALIGN_LEFT: {
-               string output;
                 if (owner_->getParLanguage(bparams)->babel() != "hebrew")
-                       output = corrected_env(begin_tag, "flushleft", code, lastpar);
+                       corrected_env(os, begin_tag, "flushleft", code, lastpar, column);
                 else
-                       output = corrected_env(begin_tag, "flushright", code, lastpar);
-               os << from_ascii(output);
-               adjust_column(output, column);
+                       corrected_env(os, begin_tag, "flushright", code, lastpar, column);
                 break;
         } case LYX_ALIGN_RIGHT: {
-               string output;
                 if (owner_->getParLanguage(bparams)->babel() != "hebrew")
-                       output = corrected_env(begin_tag, "flushright", code, lastpar);
+                       corrected_env(os, begin_tag, "flushright", code, lastpar, column);
                 else
-                       output = corrected_env(begin_tag, "flushleft", code, lastpar);
-               os << from_ascii(output);
-               adjust_column(output, column);
+                       corrected_env(os, begin_tag, "flushleft", code, lastpar, column);
                 break;
         } case LYX_ALIGN_CENTER: {
-               string output;
-               output = corrected_env(begin_tag, "center", code, lastpar);
-               os << from_ascii(output);
-               adjust_column(output, column);
+               corrected_env(os, begin_tag, "center", code, lastpar, column);
                 break;
         }
         }
@@ -2330,8 +2246,9 @@ bool Paragraph::Private::endTeXParParams(BufferParams const & bparams,
                 break;
         }
  
-       string output;
-       string const end_tag = "\n\\par\\end";
+       bool output = false;
+       int col = 0;
+       string const end_tag = "\\par\\end";
         InsetCode code = ownerCode();
         bool const lastpar = runparams.isLastPar;
  
@@ -2344,26 +2261,23 @@ bool Paragraph::Private::endTeXParParams(BufferParams const & bparams,
                 break;
         case LYX_ALIGN_LEFT: {
                 if (owner_->getParLanguage(bparams)->babel() != "hebrew")
-                       output = corrected_env(end_tag, "flushleft", code, lastpar);
+                       output = corrected_env(os, end_tag, "flushleft", code, lastpar, col);
                 else
-                       output = corrected_env(end_tag, "flushright", code, lastpar);
-               os << from_ascii(output);
+                       output = corrected_env(os, end_tag, "flushright", code, lastpar, col);
                 break;
         } case LYX_ALIGN_RIGHT: {
                 if (owner_->getParLanguage(bparams)->babel() != "hebrew")
-                       output = corrected_env(end_tag, "flushright", code, lastpar);
+                       output = corrected_env(os, end_tag, "flushright", code, lastpar, col);
                 else
-                       output = corrected_env(end_tag, "flushleft", code, lastpar);
-               os << from_ascii(output);
+                       output = corrected_env(os, end_tag, "flushleft", code, lastpar, col);
                 break;
         } case LYX_ALIGN_CENTER: {
-               output = corrected_env(end_tag, "center", code, lastpar);
-               os << from_ascii(output);
+               corrected_env(os, end_tag, "center", code, lastpar, col);
                 break;
         }
         }
  
-       return !output.empty() || lastpar;
+       return output || lastpar;
  }
  
  
@@ -2499,18 +2413,18 @@ void Paragraph::latex(BufferParams const & bparams,
                 ++column;
  
                 // Fully instantiated font
-               Font const font = getFont(bparams, i, outerfont);
+               Font const current_font = getFont(bparams, i, outerfont);
  
                 Font const last_font = running_font;
  
                 // Do we need to close the previous font?
                 if (open_font &&
-                   (font != running_font ||
-                    font.language() != running_font.language()))
+                   (current_font != running_font ||
+                    current_font.language() != running_font.language()))
                 {
                         column += running_font.latexWriteEndChanges(
                                         os, bparams, runparams, basefont,
-                                       (i == body_pos-1) ? basefont : font);
+                                       (i == body_pos-1) ? basefont : current_font);
                         running_font = basefont;
                         open_font = false;
                 }
@@ -2521,7 +2435,7 @@ void Paragraph::latex(BufferParams const & bparams,
                 string const lang_end_command = runparams.use_polyglossia ?
                         "\\end{$$lang}" : lyxrc.language_command_end;
                 if (!running_lang.empty() &&
-                   font.language()->encoding()->package() == Encoding::CJK) {
+                   current_font.language()->encoding()->package() == Encoding::CJK) {
                                 string end_tag = subst(lang_end_command,
                                                         "$$lang",
                                                         running_lang);
@@ -2532,28 +2446,28 @@ void Paragraph::latex(BufferParams const & bparams,
                 // Switch file encoding if necessary (and allowed)
                 if (!runparams.pass_thru && !style.pass_thru &&
                     runparams.encoding->package() != Encoding::none &&
-                   font.language()->encoding()->package() != Encoding::none) {
+                   current_font.language()->encoding()->package() != Encoding::none) {
                         pair<bool, int> const enc_switch =
                                 switchEncoding(os.os(), bparams, runparams,
-                                       *(font.language()->encoding()));
+                                       *(current_font.language()->encoding()));
                         if (enc_switch.first) {
                                 column += enc_switch.second;
-                               runparams.encoding = font.language()->encoding();
+                               runparams.encoding = current_font.language()->encoding();
                         }
                 }
  
                 char_type const c = d->text_[i];
  
                 // Do we need to change font?
-               if ((font != running_font ||
-                    font.language() != running_font.language()) &&
+               if ((current_font != running_font ||
+                    current_font.language() != running_font.language()) &&
                         i != body_pos - 1)
                 {
                         odocstringstream ods;
-                       column += font.latexWriteStartChanges(ods, bparams,
+                       column += current_font.latexWriteStartChanges(ods, bparams,
                                                               runparams, basefont,
                                                               last_font);
-                       running_font = font;
+                       running_font = current_font;
                         open_font = true;
                         docstring fontchange = ods.str();
                         // check whether the fontchange ends with a \\textcolor
@@ -2579,7 +2493,7 @@ void Paragraph::latex(BufferParams const & bparams,
                         // style.pass_thru is false.
                         if (i != body_pos - 1) {
                                 if (d->simpleTeXBlanks(runparams, os,
-                                               i, column, font, style)) {
+                                               i, column, current_font, style)) {
                                         // A surrogate pair was output. We
                                         // must not call latexSpecialChar
                                         // in this iteration, since it would output
@@ -2592,7 +2506,7 @@ void Paragraph::latex(BufferParams const & bparams,
  
                 OutputParams rp = runparams;
                 rp.free_spacing = style.free_spacing;
-               rp.local_font = &font;
+               rp.local_font = &current_font;
                 rp.intitle = style.intitle;
  
                 // Two major modes:  LaTeX or plain
@@ -2666,8 +2580,7 @@ void Paragraph::latex(BufferParams const & bparams,
         if (allowcust && d->endTeXParParams(bparams, os, runparams)
             && runparams.encoding != prev_encoding) {
                 runparams.encoding = prev_encoding;
-               if (!runparams.isFullUnicode())
-                       os << setEncoding(prev_encoding->iconvName());
+               os << setEncoding(prev_encoding->iconvName());
         }
  
         LYXERR(Debug::LATEX, "Paragraph::latex... done " << this);
@@ -2844,6 +2757,7 @@ docstring Paragraph::simpleLyXHTMLOnePar(Buffer const & buf,
                                     XHTMLStream & xs,
                                     OutputParams const & runparams,
                                     Font const & outerfont,
+                                   bool start_paragraph, bool close_paragraph,
                                     pos_type initial) const
  {
         docstring retval;
@@ -2865,7 +2779,8 @@ docstring Paragraph::simpleLyXHTMLOnePar(Buffer const & buf,
  
         Layout const & style = *d->layout_;
  
-       xs.startParagraph(allowEmpty());
+       if (start_paragraph)
+               xs.startDivision(allowEmpty());
  
         FontInfo font_old =
                 style.labeltype == LABEL_MANUAL ? style.labelfont : style.font;
@@ -3151,42 +3066,26 @@ docstring Paragraph::simpleLyXHTMLOnePar(Buffer const & buf,
                         if (!runparams.for_toc || inset->isInToc()) {
                                 OutputParams np = runparams;
                                 np.local_font = &font;
-                               if (!inset->getLayout().htmlisblock())
+                               // If the paragraph has size 1, then we are in the "special
+                               // case" where we do not output the containing paragraph info
+                               if (!inset->getLayout().htmlisblock() && size() != 1)
                                         np.html_in_par = true;
                                 retval += inset->xhtml(xs, np);
                         }
                 } else {
                         char_type c = getUChar(buf.masterBuffer()->params(), i);
-
-                       if (style.pass_thru || runparams.pass_thru)
-                               xs << c;
-                       else if (c == '-') {
-                               docstring str;
-                               int j = i + 1;
-                               if (j < size() && d->text_[j] == '-') {
-                                       j += 1;
-                                       if (j < size() && d->text_[j] == '-') {
-                                               str += from_ascii("&mdash;");
-                                               i += 2;
-                                       } else {
-                                               str += from_ascii("&ndash;");
-                                               i += 1;
-                                       }
-                               }
-                               else
-                                       str += c;
-                               // We don't want to escape the entities. Note that
-                               // it is safe to do this, since str can otherwise
-                               // only be "-". E.g., it can't be "<".
-                               xs << XHTMLStream::ESCAPE_NONE << str;
-                       } else
-                               xs << c;
+                       xs << c;
                 }
                 font_old = font.fontInfo();
         }
  
+       // FIXME XHTML
+       // I'm worried about what happens if a branch, say, is itself
+       // wrapped in some font stuff. I think that will not work.
         xs.closeFontTags();
-       xs.endParagraph();
+       if (close_paragraph)
+               xs.endDivision();
+
         return retval;
  }
  
@@ -3194,8 +3093,7 @@ docstring Paragraph::simpleLyXHTMLOnePar(Buffer const & buf,
  bool Paragraph::isHfill(pos_type pos) const
  {
         Inset const * inset = getInset(pos);
-       return inset && (inset->lyxCode() == SPACE_CODE &&
-                        inset->isStretchableSpace());
+       return inset && inset->isHfill();
  }
  
  
@@ -3256,16 +3154,13 @@ bool Paragraph::isHardHyphenOrApostrophe(pos_type pos) const
         if ((nextpos == psize || isSpace(nextpos))
                 && (pos == 0 || isSpace(prevpos)))
                 return false;
-       return c == '\''
-               || ((nextpos == psize || d->text_[nextpos] != '-')
-               && (pos == 0 || d->text_[prevpos] != '-'));
+       return true;
  }
  
  
-bool Paragraph::isSameSpellRange(pos_type pos1, pos_type pos2) const
+FontSpan const & Paragraph::getSpellRange(pos_type pos) const
  {
-       return pos1 == pos2
-               || d->speller_state_.getRange(pos1) == d->speller_state_.getRange(pos2);
+       return d->speller_state_.getRange(pos);
  }
  
  
@@ -3299,8 +3194,7 @@ Paragraph::getParLanguage(BufferParams const & bparams) const
  
  bool Paragraph::isRTL(BufferParams const & bparams) const
  {
-       return lyxrc.rtl_support
-               && getParLanguage(bparams)->rightToLeft()
+       return getParLanguage(bparams)->rightToLeft()
                 && !inInset().getLayout().forceLTR();
  }
  
@@ -3377,8 +3271,6 @@ docstring Paragraph::asString(pos_type beg, pos_type end, int options, const Out
                                 getInset(i)->plaintext(os, *runparams);
                         } else {
                                 getInset(i)->toString(os);
-                               if (getInset(i)->asInsetMath())
-                                       os << " ";
                         }
                 }
         }
@@ -3387,21 +3279,23 @@ docstring Paragraph::asString(pos_type beg, pos_type end, int options, const Out
  }
  
  
-void Paragraph::forOutliner(docstring & os, size_t maxlen) const
+void Paragraph::forOutliner(docstring & os, size_t const maxlen,
+                                                       bool const shorten) const
  {
+       size_t tmplen = shorten ? maxlen + 1 : maxlen;
         if (!d->params_.labelString().empty())
                 os += d->params_.labelString() + ' ';
-       for (pos_type i = 0; i < size() && os.length() < maxlen; ++i) {
+       for (pos_type i = 0; i < size() && os.length() < tmplen; ++i) {
                 if (isDeleted(i))
                         continue;
                 char_type const c = d->text_[i];
                 if (isPrintable(c))
                         os += c;
-               else if (c == '\t' || c == '\n')
-                       os += ' ';
                 else if (c == META_INSET)
-                       getInset(i)->forOutliner(os, maxlen);
+                       getInset(i)->forOutliner(os, tmplen, false);
         }
+       if (shorten)
+               Text::shortenForOutliner(os, maxlen);
  }
  
  
@@ -3491,46 +3385,6 @@ bool Paragraph::allowEmpty() const
  }
  
  
-char_type Paragraph::transformChar(char_type c, pos_type pos) const
-{
-       if (!Encodings::isArabicChar(c))
-               return c;
-
-       char_type prev_char = ' ';
-       char_type next_char = ' ';
-
-       for (pos_type i = pos - 1; i >= 0; --i) {
-               char_type const par_char = d->text_[i];
-               if (!Encodings::isArabicComposeChar(par_char)) {
-                       prev_char = par_char;
-                       break;
-               }
-       }
-
-       for (pos_type i = pos + 1, end = size(); i < end; ++i) {
-               char_type const par_char = d->text_[i];
-               if (!Encodings::isArabicComposeChar(par_char)) {
-                       next_char = par_char;
-                       break;
-               }
-       }
-
-       if (Encodings::isArabicChar(next_char)) {
-               if (Encodings::isArabicChar(prev_char) &&
-                       !Encodings::isArabicSpecialChar(prev_char))
-                       return Encodings::transformChar(c, Encodings::FORM_MEDIAL);
-               else
-                       return Encodings::transformChar(c, Encodings::FORM_INITIAL);
-       } else {
-               if (Encodings::isArabicChar(prev_char) &&
-                       !Encodings::isArabicSpecialChar(prev_char))
-                       return Encodings::transformChar(c, Encodings::FORM_FINAL);
-               else
-                       return Encodings::transformChar(c, Encodings::FORM_ISOLATED);
-       }
-}
-
-
  bool Paragraph::brokenBiblio() const
  {
         // there is a problem if there is no bibitem at position 0 or
@@ -3846,7 +3700,16 @@ void Paragraph::collectWords()
                         continue;
                 pos_type from = pos;
                 locateWord(from, pos, WHOLE_WORD);
-               if (pos < from + lyxrc.completion_minlength)
+               // Work around MSVC warning: The statement
+               // if (pos < from + lyxrc.completion_minlength)
+               // triggers a signed vs. unsigned warning.
+               // I don't know why this happens, it could be a MSVC bug, or
+               // related to LLP64 (windows) vs. LP64 (unix) programming
+               // model, or the C++ standard might be ambigous in the section
+               // defining the "usual arithmetic conversions". However, using
+               // a temporary variable is safe and works on all compilers.
+               pos_type const endpos = from + lyxrc.completion_minlength;
+               if (pos < endpos)
                         continue;
                 FontList::const_iterator cit = d->fontlist_.fontIterator(from);
                 if (cit == d->fontlist_.end())