From: Enrico Forestieri Date: Wed, 24 Oct 2007 17:22:57 +0000 (+0000) Subject: Small tweaks X-Git-Tag: 1.6.10~7677 X-Git-Url: https://git.lyx.org/gitweb/?a=commitdiff_plain;h=f48d7caee3fab40528ceedc7a05f67004ba59d83;p=features.git Small tweaks * src/Paragraph.cpp (knownLangChars): renamed as writeScriptChars. Now only deals with characters that cannot be encoded using the current latex encoding. (latexSpecialChars): only call writeScriptChars if the character cannot be encoded. * src/Encoding.{cpp,h} (isKnownLangChar): renamed as isKnownScriptChar. git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@21183 a592a061-630c-0410-9148-cb99ea01b6c8 --- diff --git a/src/Encoding.cpp b/src/Encoding.cpp index 9e7f139cd3..ca896d4c43 100644 --- a/src/Encoding.cpp +++ b/src/Encoding.cpp @@ -401,7 +401,7 @@ bool Encodings::isCombiningChar(char_type c) } -bool Encodings::isKnownLangChar(char_type c, string & preamble) +bool Encodings::isKnownScriptChar(char_type const c, string & preamble) { CharInfoMap::const_iterator const it = unicodesymbols.find(c); diff --git a/src/Encoding.h b/src/Encoding.h index 99afe269e5..b4e728ea67 100644 --- a/src/Encoding.h +++ b/src/Encoding.h @@ -145,12 +145,12 @@ public: /** * Is this a known char from some language? * If \p preamble is empty and code point \p c is known to belong - * to a supported language, true is returned and \p preamble is set + * to a supported script, true is returned and \p preamble is set * to the corresponding entry in the unicodesymbols file. * If \p preamble is not empty, a check is made whether code point * \p c is a known character matching the preamble entry. */ - static bool isKnownLangChar(char_type c, std::string & preamble); + static bool isKnownScriptChar(char_type const c, std::string & preamble); /** * Add the preamble snippet needed for the output of \p c to * \p features. diff --git a/src/Paragraph.cpp b/src/Paragraph.cpp index 326b47d17d..56a19927b8 100644 --- a/src/Paragraph.cpp +++ b/src/Paragraph.cpp @@ -70,7 +70,6 @@ using support::lowercase; using support::prefixIs; using support::suffixIs; using support::rsplit; -using support::rtrim; using support::uppercase; namespace { @@ -110,10 +109,10 @@ public: Font const & font, Layout const & style); - /// Output consecutive known unicode chars, belonging to the same - /// language as specified by \p preamble, to \p os starting from \p c. + /// Output consecutive unicode chars, belonging to the same script as + /// specified by the latex macro \p ltx, to \p os starting from \p c. /// \return the number of characters written. - int knownLangChars(odocstream & os, char_type c, string & preamble, + int writeScriptChars(odocstream & os, char_type c, docstring const & ltx, Change &, Encoding const &, pos_type &); /// This could go to ParagraphParameters if we want to. @@ -570,28 +569,34 @@ bool Paragraph::Private::simpleTeXBlanks(Encoding const & encoding, } -int Paragraph::Private::knownLangChars(odocstream & os, - char_type c, - string & preamble, - Change & runningChange, - Encoding const & encoding, - pos_type & i) -{ - // When the character is marked by the proper language, we simply - // get its code point in some encoding, otherwise we get the - // translation specified in the unicodesymbols file, which is - // something like "\textLANG{}". So, we have to retain - // "\textLANG{" for the first char but only "" for - // all subsequent chars. - docstring const latex1 = rtrim(encoding.latexChar(c), "}"); - int length = latex1.length(); - os << latex1; +int Paragraph::Private::writeScriptChars(odocstream & os, + char_type c, + docstring const & ltx, + Change & runningChange, + Encoding const & encoding, + pos_type & i) +{ + // We only arrive here when a proper language for character c has not + // been specified (i.e., it could not be translated in the current + // latex encoding) and it belongs to a known script. + // Parameter ltx contains the latex translation of c as specified in + // the unicodesymbols file and is something like "\textXXX{}". + // The latex macro name "textXXX" specifies the script to which c + // belongs and we use it in order to check whether characters from the + // same script immediately follow, such that we can collect them in a + // single "\textXXX" macro. So, we have to retain "\textXXX{" + // for the first char but only "" for all subsequent chars. + docstring::size_type const brace1 = ltx.find_first_of(from_ascii("{")); + docstring::size_type const brace2 = ltx.find_last_of(from_ascii("}")); + string script = to_ascii(ltx.substr(1, brace1 - 1)); + int length = ltx.substr(0, brace2).length(); + os << ltx.substr(0, brace2); int size = text_.size(); while (i + 1 < size) { - char_type next = text_[i + 1]; - // Stop here if next character belongs to another - // language or there is a change tracking status. - if (!Encodings::isKnownLangChar(next, preamble) || + char_type const next = text_[i + 1]; + // Stop here if next character belongs to another script + // or there is a change in change tracking status. + if (!Encodings::isKnownScriptChar(next, script) || runningChange != owner_->lookupChange(i + 1)) break; Font prev_font; @@ -606,27 +611,21 @@ int Paragraph::Private::knownLangChars(odocstream & os, if (cit->pos() >= i + 1) break; } - // Stop here if there is a font attribute change. + // Stop here if there is a font attribute or encoding change. if (found && cit != end && prev_font != cit->font()) break; - docstring const latex = rtrim(encoding.latexChar(next), "}"); - docstring::size_type const j = + docstring const latex = encoding.latexChar(next); + docstring::size_type const b1 = latex.find_first_of(from_ascii("{")); - if (j == docstring::npos) { - os << latex; - length += latex.length(); - } else { - os << latex.substr(j + 1); - length += latex.substr(j + 1).length(); - } + docstring::size_type const b2 = + latex.find_last_of(from_ascii("}")); + int const len = b2 - b1 - 1; + os << latex.substr(b1 + 1, len); + length += len; ++i; } - // When the proper language is set, we are simply passed a code - // point, so we should not try to close the \textLANG command. - if (prefixIs(latex1, from_ascii("\\" + preamble))) { - os << '}'; - ++length; - } + os << '}'; + ++length; return length; } @@ -896,14 +895,13 @@ void Paragraph::Private::latexSpecialChar( break; } } - string preamble; - if (Encodings::isKnownLangChar(c, preamble)) { - column += knownLangChars(os, c, preamble, running_change, - encoding, i) - 1; - break; - } + string script; docstring const latex = encoding.latexChar(c); - if (latex.length() > 1 && latex[latex.length() - 1] != '}') { + if (Encodings::isKnownScriptChar(c, script) + && prefixIs(latex, from_ascii("\\" + script))) + column += writeScriptChars(os, c, latex, + running_change, encoding, i) - 1; + else if (latex.length() > 1 && latex[latex.length() - 1] != '}') { // Prevent eating of a following // space or command corruption by // following characters