Small tweaks

author Enrico Forestieri <forenr@lyx.org>

Wed, 24 Oct 2007 17:22:57 +0000 (17:22 +0000)

committer Enrico Forestieri <forenr@lyx.org>

Wed, 24 Oct 2007 17:22:57 +0000 (17:22 +0000)
author Enrico Forestieri <forenr@lyx.org>
Wed, 24 Oct 2007 17:22:57 +0000 (17:22 +0000)
committer Enrico Forestieri <forenr@lyx.org>
Wed, 24 Oct 2007 17:22:57 +0000 (17:22 +0000)
diff --git a/src/Encoding.cpp b/src/Encoding.cpp

index 9e7f139cd3aa70ac6df69ff1c7fc95ed8ffdf277..ca896d4c433bfc1fc15fc15192add67f29ea0a6d 100644 (file)
--- a/src/Encoding.cpp
+++ b/src/Encoding.cpp
@@ -401,7 +401,7 @@ bool Encodings::isCombiningChar(char_type c)
  }
  
  
-bool Encodings::isKnownLangChar(char_type c, string & preamble)
+bool Encodings::isKnownScriptChar(char_type const c, string & preamble)
  {
         CharInfoMap::const_iterator const it = unicodesymbols.find(c);
  
diff --git a/src/Encoding.h b/src/Encoding.h

index 99afe269e5b7f7e22416f1967ae310fd3d934fb4..b4e728ea67bcc6757df3640ec902efcf2542c449 100644 (file)
--- a/src/Encoding.h
+++ b/src/Encoding.h
@@ -145,12 +145,12 @@ public:
         /**
          * Is this a known char from some language?
          * If \p preamble is empty and code point \p c is known to belong
-        * to a supported language, true is returned and \p preamble is set
+        * to a supported script, true is returned and \p preamble is set
          * to the corresponding entry in the unicodesymbols file.
          * If \p preamble is not empty, a check is made whether code point
          * \p c is a known character matching the preamble entry.
          */
-       static bool isKnownLangChar(char_type c, std::string & preamble);
+       static bool isKnownScriptChar(char_type const c, std::string & preamble);
         /**
          * Add the preamble snippet needed for the output of \p c to
          * \p features.
diff --git a/src/Paragraph.cpp b/src/Paragraph.cpp

index 326b47d17d7700cb324edeeb7ec03723278e87fe..56a19927b8e08cb74c31fee282e7a1d95e147e19 100644 (file)
--- a/src/Paragraph.cpp
+++ b/src/Paragraph.cpp
@@ -70,7 +70,6 @@ using support::lowercase;
  using support::prefixIs;
  using support::suffixIs;
  using support::rsplit;
-using support::rtrim;
  using support::uppercase;
  
  namespace {
@@ -110,10 +109,10 @@ public:
                              Font const & font,
                              Layout const & style);
  
-       /// Output consecutive known unicode chars, belonging to the same
-       /// language as specified by \p preamble, to \p os starting from \p c.
+       /// Output consecutive unicode chars, belonging to the same script as
+       /// specified by the latex macro \p ltx, to \p os starting from \p c.
         /// \return the number of characters written.
-       int knownLangChars(odocstream & os, char_type c, string & preamble,
+       int writeScriptChars(odocstream & os, char_type c, docstring const & ltx,
                            Change &, Encoding const &, pos_type &);
  
         /// This could go to ParagraphParameters if we want to.
@@ -570,28 +569,34 @@ bool Paragraph::Private::simpleTeXBlanks(Encoding const & encoding,
  }
  
  
-int Paragraph::Private::knownLangChars(odocstream & os,
-                                    char_type c,
-                                    string & preamble,
-                                    Change & runningChange,
-                                    Encoding const & encoding,
-                                    pos_type & i)
-{
-       // When the character is marked by the proper language, we simply
-       // get its code point in some encoding, otherwise we get the
-       // translation specified in the unicodesymbols file, which is
-       // something like "\textLANG{<spec>}". So, we have to retain
-       // "\textLANG{<spec>" for the first char but only "<spec>" for
-       // all subsequent chars.
-       docstring const latex1 = rtrim(encoding.latexChar(c), "}");
-       int length = latex1.length();
-       os << latex1;
+int Paragraph::Private::writeScriptChars(odocstream & os,
+                                        char_type c,
+                                        docstring const & ltx,
+                                        Change & runningChange,
+                                        Encoding const & encoding,
+                                        pos_type & i)
+{
+       // We only arrive here when a proper language for character c has not
+       // been specified (i.e., it could not be translated in the current
+       // latex encoding) and it belongs to a known script.
+       // Parameter ltx contains the latex translation of c as specified in
+       // the unicodesymbols file and is something like "\textXXX{<spec>}".
+       // The latex macro name "textXXX" specifies the script to which c
+       // belongs and we use it in order to check whether characters from the
+       // same script immediately follow, such that we can collect them in a
+       // single "\textXXX" macro. So, we have to retain "\textXXX{<spec>"
+       // for the first char but only "<spec>" for all subsequent chars.
+       docstring::size_type const brace1 = ltx.find_first_of(from_ascii("{"));
+       docstring::size_type const brace2 = ltx.find_last_of(from_ascii("}"));
+       string script = to_ascii(ltx.substr(1, brace1 - 1));
+       int length = ltx.substr(0, brace2).length();
+       os << ltx.substr(0, brace2);
         int size = text_.size();
         while (i + 1 < size) {
-               char_type next = text_[i + 1];
-               // Stop here if next character belongs to another
-               // language or there is a change tracking status.
-               if (!Encodings::isKnownLangChar(next, preamble) ||
+               char_type const next = text_[i + 1];
+               // Stop here if next character belongs to another script
+               // or there is a change in change tracking status.
+               if (!Encodings::isKnownScriptChar(next, script) ||
                     runningChange != owner_->lookupChange(i + 1))
                         break;
                 Font prev_font;
@@ -606,27 +611,21 @@ int Paragraph::Private::knownLangChars(odocstream & os,
                         if (cit->pos() >= i + 1)
                                 break;
                 }
-               // Stop here if there is a font attribute change.
+               // Stop here if there is a font attribute or encoding change.
                 if (found && cit != end && prev_font != cit->font())
                         break;
-               docstring const latex = rtrim(encoding.latexChar(next), "}");
-               docstring::size_type const j =
+               docstring const latex = encoding.latexChar(next);
+               docstring::size_type const b1 =
                                         latex.find_first_of(from_ascii("{"));
-               if (j == docstring::npos) {
-                       os << latex;
-                       length += latex.length();
-               } else {
-                       os << latex.substr(j + 1);
-                       length += latex.substr(j + 1).length();
-               }
+               docstring::size_type const b2 =
+                                       latex.find_last_of(from_ascii("}"));
+               int const len = b2 - b1 - 1;
+               os << latex.substr(b1 + 1, len);
+               length += len;
                 ++i;
         }
-       // When the proper language is set, we are simply passed a code
-       // point, so we should not try to close the \textLANG command.
-       if (prefixIs(latex1, from_ascii("\\" + preamble))) {
-               os << '}';
-               ++length;
-       }
+       os << '}';
+       ++length;
         return length;
  }
  
@@ -896,14 +895,13 @@ void Paragraph::Private::latexSpecialChar(
                                 break;
                         }
                 }
-               string preamble;
-               if (Encodings::isKnownLangChar(c, preamble)) {
-                       column += knownLangChars(os, c, preamble, running_change,
-                               encoding, i) - 1;
-                       break;
-               }
+               string script;
                 docstring const latex = encoding.latexChar(c);
-               if (latex.length() > 1 && latex[latex.length() - 1] != '}') {
+               if (Encodings::isKnownScriptChar(c, script)
+                   && prefixIs(latex, from_ascii("\\" + script)))
+                       column += writeScriptChars(os, c, latex,
+                                       running_change, encoding, i) - 1;
+               else if (latex.length() > 1 && latex[latex.length() - 1] != '}') {
                         // Prevent eating of a following
                         // space or command corruption by
                         // following characters
author	Enrico Forestieri <forenr@lyx.org>
	Wed, 24 Oct 2007 17:22:57 +0000 (17:22 +0000)
committer	Enrico Forestieri <forenr@lyx.org>
	Wed, 24 Oct 2007 17:22:57 +0000 (17:22 +0000)
src/Encoding.cpp		patch \| blob \| history
src/Encoding.h		patch \| blob \| history
src/Paragraph.cpp		patch \| blob \| history