From: Enrico Forestieri <forenr@lyx.org>
Date: Wed, 24 Oct 2007 17:22:57 +0000 (+0000)
Subject: Small tweaks
X-Git-Tag: 1.6.10~7677
X-Git-Url: https://git.lyx.org/gitweb/?a=commitdiff_plain;h=f48d7caee3fab40528ceedc7a05f67004ba59d83;p=features.git

Small tweaks

	* src/Paragraph.cpp (knownLangChars): renamed as writeScriptChars.
	Now only deals with characters that cannot be encoded using the
	current latex encoding.
	(latexSpecialChars): only call writeScriptChars if the character
	cannot be encoded.

	* src/Encoding.{cpp,h} (isKnownLangChar): renamed as isKnownScriptChar.


git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@21183 a592a061-630c-0410-9148-cb99ea01b6c8
---

diff --git a/src/Encoding.cpp b/src/Encoding.cpp
index 9e7f139cd3..ca896d4c43 100644
--- a/src/Encoding.cpp
+++ b/src/Encoding.cpp
@@ -401,7 +401,7 @@ bool Encodings::isCombiningChar(char_type c)
 }
 
 
-bool Encodings::isKnownLangChar(char_type c, string & preamble)
+bool Encodings::isKnownScriptChar(char_type const c, string & preamble)
 {
 	CharInfoMap::const_iterator const it = unicodesymbols.find(c);
 
diff --git a/src/Encoding.h b/src/Encoding.h
index 99afe269e5..b4e728ea67 100644
--- a/src/Encoding.h
+++ b/src/Encoding.h
@@ -145,12 +145,12 @@ public:
 	/**
 	 * Is this a known char from some language?
 	 * If \p preamble is empty and code point \p c is known to belong
-	 * to a supported language, true is returned and \p preamble is set
+	 * to a supported script, true is returned and \p preamble is set
 	 * to the corresponding entry in the unicodesymbols file.
 	 * If \p preamble is not empty, a check is made whether code point
 	 * \p c is a known character matching the preamble entry.
 	 */
-	static bool isKnownLangChar(char_type c, std::string & preamble);
+	static bool isKnownScriptChar(char_type const c, std::string & preamble);
 	/**
 	 * Add the preamble snippet needed for the output of \p c to
 	 * \p features.
diff --git a/src/Paragraph.cpp b/src/Paragraph.cpp
index 326b47d17d..56a19927b8 100644
--- a/src/Paragraph.cpp
+++ b/src/Paragraph.cpp
@@ -70,7 +70,6 @@ using support::lowercase;
 using support::prefixIs;
 using support::suffixIs;
 using support::rsplit;
-using support::rtrim;
 using support::uppercase;
 
 namespace {
@@ -110,10 +109,10 @@ public:
 			     Font const & font,
 			     Layout const & style);
 
-	/// Output consecutive known unicode chars, belonging to the same
-	/// language as specified by \p preamble, to \p os starting from \p c.
+	/// Output consecutive unicode chars, belonging to the same script as
+	/// specified by the latex macro \p ltx, to \p os starting from \p c.
 	/// \return the number of characters written.
-	int knownLangChars(odocstream & os, char_type c, string & preamble,
+	int writeScriptChars(odocstream & os, char_type c, docstring const & ltx,
 			   Change &, Encoding const &, pos_type &);
 
 	/// This could go to ParagraphParameters if we want to.
@@ -570,28 +569,34 @@ bool Paragraph::Private::simpleTeXBlanks(Encoding const & encoding,
 }
 
 
-int Paragraph::Private::knownLangChars(odocstream & os,
-				     char_type c,
-				     string & preamble,
-				     Change & runningChange,
-				     Encoding const & encoding,
-				     pos_type & i)
-{
-	// When the character is marked by the proper language, we simply
-	// get its code point in some encoding, otherwise we get the
-	// translation specified in the unicodesymbols file, which is
-	// something like "\textLANG{<spec>}". So, we have to retain
-	// "\textLANG{<spec>" for the first char but only "<spec>" for
-	// all subsequent chars.
-	docstring const latex1 = rtrim(encoding.latexChar(c), "}");
-	int length = latex1.length();
-	os << latex1;
+int Paragraph::Private::writeScriptChars(odocstream & os,
+					 char_type c,
+					 docstring const & ltx,
+					 Change & runningChange,
+					 Encoding const & encoding,
+					 pos_type & i)
+{
+	// We only arrive here when a proper language for character c has not
+	// been specified (i.e., it could not be translated in the current
+	// latex encoding) and it belongs to a known script.
+	// Parameter ltx contains the latex translation of c as specified in
+	// the unicodesymbols file and is something like "\textXXX{<spec>}".
+	// The latex macro name "textXXX" specifies the script to which c
+	// belongs and we use it in order to check whether characters from the
+	// same script immediately follow, such that we can collect them in a
+	// single "\textXXX" macro. So, we have to retain "\textXXX{<spec>"
+	// for the first char but only "<spec>" for all subsequent chars.
+	docstring::size_type const brace1 = ltx.find_first_of(from_ascii("{"));
+	docstring::size_type const brace2 = ltx.find_last_of(from_ascii("}"));
+	string script = to_ascii(ltx.substr(1, brace1 - 1));
+	int length = ltx.substr(0, brace2).length();
+	os << ltx.substr(0, brace2);
 	int size = text_.size();
 	while (i + 1 < size) {
-		char_type next = text_[i + 1];
-		// Stop here if next character belongs to another
-		// language or there is a change tracking status.
-		if (!Encodings::isKnownLangChar(next, preamble) ||
+		char_type const next = text_[i + 1];
+		// Stop here if next character belongs to another script
+		// or there is a change in change tracking status.
+		if (!Encodings::isKnownScriptChar(next, script) ||
 		    runningChange != owner_->lookupChange(i + 1))
 			break;
 		Font prev_font;
@@ -606,27 +611,21 @@ int Paragraph::Private::knownLangChars(odocstream & os,
 			if (cit->pos() >= i + 1)
 				break;
 		}
-		// Stop here if there is a font attribute change.
+		// Stop here if there is a font attribute or encoding change.
 		if (found && cit != end && prev_font != cit->font())
 			break;
-		docstring const latex = rtrim(encoding.latexChar(next), "}");
-		docstring::size_type const j =
+		docstring const latex = encoding.latexChar(next);
+		docstring::size_type const b1 =
 					latex.find_first_of(from_ascii("{"));
-		if (j == docstring::npos) {
-			os << latex;
-			length += latex.length();
-		} else {
-			os << latex.substr(j + 1);
-			length += latex.substr(j + 1).length();
-		}
+		docstring::size_type const b2 =
+					latex.find_last_of(from_ascii("}"));
+		int const len = b2 - b1 - 1;
+		os << latex.substr(b1 + 1, len);
+		length += len;
 		++i;
 	}
-	// When the proper language is set, we are simply passed a code
-	// point, so we should not try to close the \textLANG command.
-	if (prefixIs(latex1, from_ascii("\\" + preamble))) {
-		os << '}';
-		++length;
-	}
+	os << '}';
+	++length;
 	return length;
 }
 
@@ -896,14 +895,13 @@ void Paragraph::Private::latexSpecialChar(
 				break;
 			}
 		}
-		string preamble;
-		if (Encodings::isKnownLangChar(c, preamble)) {
-			column += knownLangChars(os, c, preamble, running_change,
-				encoding, i) - 1;
-			break;
-		}
+		string script;
 		docstring const latex = encoding.latexChar(c);
-		if (latex.length() > 1 && latex[latex.length() - 1] != '}') {
+		if (Encodings::isKnownScriptChar(c, script)
+		    && prefixIs(latex, from_ascii("\\" + script)))
+			column += writeScriptChars(os, c, latex,
+					running_change, encoding, i) - 1;
+		else if (latex.length() > 1 && latex[latex.length() - 1] != '}') {
 			// Prevent eating of a following
 			// space or command corruption by
 			// following characters