-int Paragraph::Private::knownLangChars(odocstream & os,
- char_type c,
- string & preamble,
- Change & runningChange,
- Encoding const & encoding,
- pos_type & i)
-{
- // When the character is marked by the proper language, we simply
- // get its code point in some encoding, otherwise we get the
- // translation specified in the unicodesymbols file, which is
- // something like "\textLANG{<spec>}". So, we have to retain
- // "\textLANG{<spec>" for the first char but only "<spec>" for
- // all subsequent chars.
- docstring const latex1 = rtrim(encoding.latexChar(c), "}");
- int length = latex1.length();
- os << latex1;
- while (i + 1 < size()) {
- char_type next = text_[i + 1];
- // Stop here if next character belongs to another
- // language or there is a change tracking status.
- if (!Encodings::isKnownLangChar(next, preamble) ||
+int Paragraph::Private::writeScriptChars(odocstream & os,
+ docstring const & ltx,
+ Change & runningChange,
+ Encoding const & encoding,
+ pos_type & i)
+{
+ // FIXME: modifying i here is not very nice...
+
+ // We only arrive here when a proper language for character text_[i] has
+ // not been specified (i.e., it could not be translated in the current
+ // latex encoding) or its latex translation has been forced, and it
+ // belongs to a known script.
+ // Parameter ltx contains the latex translation of text_[i] as specified
+ // in the unicodesymbols file and is something like "\textXXX{<spec>}".
+ // The latex macro name "textXXX" specifies the script to which text_[i]
+ // belongs and we use it in order to check whether characters from the
+ // same script immediately follow, such that we can collect them in a
+ // single "\textXXX" macro. So, we have to retain "\textXXX{<spec>"
+ // for the first char but only "<spec>" for all subsequent chars.
+ docstring::size_type const brace1 = ltx.find_first_of(from_ascii("{"));
+ docstring::size_type const brace2 = ltx.find_last_of(from_ascii("}"));
+ string script = to_ascii(ltx.substr(1, brace1 - 1));
+ int pos = 0;
+ int length = brace2;
+ bool closing_brace = true;
+ if (script == "textgreek" && encoding.latexName() == "iso-8859-7") {
+ // Correct encoding is being used, so we can avoid \textgreek.
+ pos = brace1 + 1;
+ length -= pos;
+ closing_brace = false;
+ }
+ os << ltx.substr(pos, length);
+ int size = text_.size();
+ while (i + 1 < size) {
+ char_type const next = text_[i + 1];
+ // Stop here if next character belongs to another script
+ // or there is a change in change tracking status.
+ if (!Encodings::isKnownScriptChar(next, script) ||