X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=src%2Ftex2lyx%2Ftext.cpp;h=50eee93a3d2291596ee63140d7afac665298232f;hb=8e7b5eebf0ef9742e5f6983d8f55f563c12a31ab;hp=3f65d43642b0fc361bbbf52d54a0bb498eb912a9;hpb=c1965123ed2a5390a1d719f4ba3f92c27376adff;p=lyx.git

diff --git a/src/tex2lyx/text.cpp b/src/tex2lyx/text.cpp
index 3f65d43642..50eee93a3d 100644
--- a/src/tex2lyx/text.cpp
+++ b/src/tex2lyx/text.cpp
@@ -117,6 +117,62 @@ char const * const known_ref_commands[] = { "ref", "pageref", "vref",
 char const * const known_coded_ref_commands[] = { "ref", "pageref", "vref",
  "vpageref", "formatted", "eqref", 0 };
 
+/**
+ * known polyglossia language names (including variants)
+ */
+const char * const polyglossia_languages[] = {
+"albanian", "croatian", "hebrew", "norsk", "swedish", "amharic", "czech", "hindi",
+"nynorsk", "syriac", "arabic", "danish", "icelandic", "occitan", "tamil",
+"armenian", "divehi", "interlingua", "polish", "telugu", "asturian", "dutch",
+"irish", "portuges", "thai", "bahasai", "english", "italian", "romanian", "turkish",
+"bahasam", "esperanto", "lao", "russian", "turkmen", "basque", "estonian", "latin",
+"samin", "ukrainian", "bengali", "farsi", "latvian", "sanskrit", "urdu", "brazil",
+"brazilian", "finnish", "lithuanian", "scottish", "usorbian", "breton", "french",
+"lsorbian", "serbian", "vietnamese", "bulgarian", "galician", "magyar", "slovak",
+"welsh", "catalan", "german", "malayalam", "slovenian", "coptic", "greek",
+"marathi", "spanish",
+"american", "ancient", "australian", "british", "monotonic", "newzealand",
+"polytonic", 0};
+
+/**
+ * the same as polyglossia_languages with .lyx names
+ * please keep this in sync with polyglossia_languages line by line!
+ */
+const char * const coded_polyglossia_languages[] = {
+"albanian", "croatian", "hebrew", "norsk", "swedish", "amharic", "czech", "hindi",
+"nynorsk", "syriac", "arabic_arabi", "danish", "icelandic", "occitan", "tamil",
+"armenian", "divehi", "interlingua", "polish", "telugu", "asturian", "dutch",
+"irish", "portuges", "thai", "bahasa", "english", "italian", "romanian", "turkish",
+"bahasam", "esperanto", "lao", "russian", "turkmen", "basque", "estonian", "latin",
+"samin", "ukrainian", "bengali", "farsi", "latvian", "sanskrit", "urdu", "brazilian",
+"brazilian", "finnish", "lithuanian", "scottish", "uppersorbian", "breton", "french",
+"lowersorbian", "serbian", "vietnamese", "bulgarian", "galician", "magyar", "slovak",
+"welsh", "catalan", "ngerman", "malayalam", "slovene", "coptic", "greek",
+"marathi", "spanish",
+"american", "ancientgreek", "australian", "british", "greek", "newzealand",
+"polutonikogreek", 0};
+
+/**
+ * supported CJK encodings
+ */
+const char * const supported_CJK_encodings[] = {
+"EUC-JP", "KS", "GB", "UTF8", 0};
+
+/**
+ * the same as supported_CJK_encodings with their corresponding LyX language name
+ * please keep this in sync with supported_CJK_encodings line by line!
+ */
+const char * const coded_supported_CJK_encodings[] = {
+"japanese-cjk", "korean", "chinese-simplified", "chinese-traditional", 0};
+
+string CJK2lyx(string const & encoding)
+{
+	char const * const * where = is_known(encoding, supported_CJK_encodings);
+	if (where)
+		return coded_supported_CJK_encodings[where - supported_CJK_encodings];
+	return encoding;
+}
+
 /*!
  * natbib commands.
  * The starred forms are also known except for "citefullauthor",
@@ -461,7 +517,9 @@ docstring convert_unicodesymbols(docstring s)
 		set<string> req;
 		docstring parsed = encodings.fromLaTeXCommand(s,
 				Encodings::TEXT_CMD, termination, rem, &req);
-		for (set<string>::const_iterator it = req.begin(); it != req.end(); it++)
+		set<string>::const_iterator it = req.begin();
+		set<string>::const_iterator en = req.end();
+		for (; it != en; ++it)
 			preamble.registerAutomaticallyLoadedPackage(*it);
 		os << parsed;
 		s = rem;
@@ -1210,6 +1268,23 @@ void parse_environment(Parser & p, ostream & os, bool outer,
 		}
 	}
 
+	else if (is_known(name, polyglossia_languages)) {
+		// We must begin a new paragraph if not already done
+		if (! parent_context.atParagraphStart()) {
+			parent_context.check_end_layout(os);
+			parent_context.new_paragraph(os);
+		}
+		// save the language in the context so that it is
+		// handled by parse_text
+		parent_context.font.language = polyglossia2lyx(name);
+		parse_text(p, os, FLAG_END, outer, parent_context);
+		// Just in case the environment is empty
+		parent_context.extra_stuff.erase();
+		// We must begin a new paragraph to reset the language
+		parent_context.new_paragraph(os);
+		p.skip_spaces();
+	}
+
 	else if (unstarred_name == "tabular" || name == "longtable") {
 		eat_whitespace(p, os, parent_context, false);
 		string width = "0pt";
@@ -1379,6 +1454,57 @@ void parse_environment(Parser & p, ostream & os, bool outer,
 		os << "\n\\begin_layout Standard\n";
 	}
 
+	else if (name == "CJK") {
+		// the scheme is \begin{CJK}{encoding}{mapping}{text}
+		// It is impossible to decide if a CJK environment was in its own paragraph or within
+		// a line. We therefore always assume a paragraph since the latter is a rare case.
+		eat_whitespace(p, os, parent_context, false);
+		parent_context.check_end_layout(os);
+		// store the encoding to be able to reset it
+		string const encoding_old = p.encoding_latex_;
+		string const encoding = p.getArg('{', '}');
+		// SJIS and BIG5 don't work with LaTeX according to the comment in unicode.cpp
+		// JIS does not work with LyX's encoding conversion
+		if (encoding != "SJIS" && encoding != "BIG5" && encoding != "JIS")
+			p.setEncoding(encoding);
+		else
+			p.setEncoding("utf8");
+		// LyX doesn't support the second argument so if
+		// this is used we need to output everything as ERT
+		string const mapping = p.getArg('{', '}');
+		if ( (!mapping.empty() && mapping != " ")
+			|| (!is_known(encoding, supported_CJK_encodings))) {
+			parent_context.check_layout(os);
+			handle_ert(os, "\\begin{" + name + "}{" + encoding + "}{" + mapping + "}",
+				       parent_context);
+			// we must parse the content as verbatim because e.g. SJIS can contain
+			// normally invalid characters
+			string const s = p.plainEnvironment("CJK");
+			string::const_iterator it2 = s.begin();
+			for (string::const_iterator it = s.begin(), et = s.end(); it != et; ++it) {
+				if (*it == '\\')
+					handle_ert(os, "\\", parent_context);
+				else if (*it == '$')
+					handle_ert(os, "$", parent_context);
+				else 
+					os << *it;
+			}
+			p.skip_spaces();
+			handle_ert(os, "\\end{" + name + "}",
+				       parent_context);
+		} else {
+			string const lang = CJK2lyx(encoding);
+			// store the language because we must reset it at the end
+			string const lang_old = parent_context.font.language;
+			parent_context.font.language = lang;
+			parse_text_in_inset(p, os, FLAG_END, outer, parent_context);
+			parent_context.font.language = lang_old;
+			parent_context.new_paragraph(os);
+		}
+		p.encoding_latex_ = encoding_old;
+		p.skip_spaces();
+	}
+
 	else if (name == "lyxgreyedout") {
 		eat_whitespace(p, os, parent_context, false);
 		parent_context.check_layout(os);
@@ -1556,7 +1682,9 @@ void parse_environment(Parser & p, ostream & os, bool outer,
 		if (!preamble.titleLayoutFound())
 			preamble.titleLayoutFound(newlayout->intitle);
 		set<string> const & req = newlayout->requires();
-		for (set<string>::const_iterator it = req.begin(); it != req.end(); it++)
+		set<string>::const_iterator it = req.begin();
+		set<string>::const_iterator en = req.end();
+		for (; it != en; ++it)
 			preamble.registerAutomaticallyLoadedPackage(*it);
 	}
 
@@ -1973,6 +2101,43 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
 	while (p.good()) {
 		Token const & t = p.get_token();
 
+	// it is impossible to determine the correct document language if CJK is used.
+	// Therefore write a note at the beginning of the document
+	if (have_CJK) {
+		context.check_layout(os);
+		begin_inset(os, "Note Note\n");
+		os << "status open\n\\begin_layout Plain Layout\n"
+		   << "\\series bold\n"
+		   << "Important information:\n"
+		   << "\\end_layout\n\n"
+		   << "\\begin_layout Plain Layout\n"
+		   << "This document contains text in Chinese, Japanese or Korean.\n"
+		   << " It was therefore impossible for tex2lyx to set the correct document langue for your document."
+		   << " Please set in the document settings by yourself!\n"
+		   << "\\end_layout\n";
+		end_inset(os);
+		have_CJK = false;
+	}
+
+	// it is impossible to determine the correct encoding for non-CJK Japanese.
+	// Therefore write a note at the beginning of the document
+	if (is_nonCJKJapanese) {
+		context.check_layout(os);
+		begin_inset(os, "Note Note\n");
+		os << "status open\n\\begin_layout Plain Layout\n"
+		   << "\\series bold\n"
+		   << "Important information:\n"
+		   << "\\end_layout\n\n"
+		   << "\\begin_layout Plain Layout\n"
+		   << "This document is in Japanese (non-CJK).\n"
+		   << " It was therefore impossible for tex2lyx to determine the correct encoding."
+		   << " The encoding EUC-JP was assumed. If this is incorrect, please set the correct"
+		   << " encoding in the document settings.\n"
+		   << "\\end_layout\n";
+		end_inset(os);
+		is_nonCJKJapanese = false;
+	}
+
 #ifdef FILEDEBUG
 		debugToken(cerr, t, flags);
 #endif
@@ -2486,8 +2651,9 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
 					if (!preamble.titleLayoutFound())
 						preamble.titleLayoutFound(newlayout->intitle);
 					set<string> const & req = newlayout->requires();
-					for (set<string>::const_iterator it = req.begin();
-					     it != req.end(); it++)
+					set<string>::const_iterator it = req.begin();
+					set<string>::const_iterator en = req.end();
+					for (; it != en; ++it)
 						preamble.registerAutomaticallyLoadedPackage(*it);
 				} else
 					handle_ert(os,
@@ -2508,7 +2674,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
 			if (!preamble.titleLayoutFound())
 				preamble.titleLayoutFound(newlayout->intitle);
 			set<string> const & req = newlayout->requires();
-			for (set<string>::const_iterator it = req.begin(); it != req.end(); it++)
+			for (set<string>::const_iterator it = req.begin(); it != req.end(); ++it)
 				preamble.registerAutomaticallyLoadedPackage(*it);
 		}
 
@@ -2521,7 +2687,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
 			if (!preamble.titleLayoutFound())
 				preamble.titleLayoutFound(newlayout->intitle);
 			set<string> const & req = newlayout->requires();
-			for (set<string>::const_iterator it = req.begin(); it != req.end(); it++)
+			for (set<string>::const_iterator it = req.begin(); it != req.end(); ++it)
 				preamble.registerAutomaticallyLoadedPackage(*it);
 		}
 
@@ -3396,7 +3562,6 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
 			context.check_layout(os);
 			// save the language for the case that a
 			// \foreignlanguage is used
-
 			context.font.language = babel2lyx(p.verbatim_item());
 			os << "\n\\lang " << context.font.language << "\n";
 		}
@@ -3407,6 +3572,38 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
 			                      context, "\\lang",
 			                      context.font.language, lang);
 		}
+		
+		else if (is_known(t.cs().substr(4, string::npos), polyglossia_languages)) {
+			// scheme is \textLANGUAGE{text} where LANGUAGE is in polyglossia_languages[]
+			string lang;
+			// We have to output the whole command if it has an option
+			// because LyX doesn't support this yet, see bug #8214,
+			// only if there is a single option specifying a variant, we can handle it.
+			if (p.hasOpt()) {
+				string langopts = p.getOpt();
+				// check if the option contains a variant, if yes, extract it
+				string::size_type pos_var = langopts.find("variant");
+				string::size_type i = langopts.find(',');
+				if (pos_var != string::npos){
+					string variant;
+					if (i == string::npos) {
+						variant = langopts.substr(pos_var + 8, langopts.length() - pos_var - 9);
+						lang = polyglossia2lyx(variant);
+						parse_text_attributes(p, os, FLAG_ITEM, outer,
+							                  context, "\\lang",
+							                  context.font.language, lang);
+					}
+					else
+						handle_ert(os, t.asInput() + langopts, context);
+				} else
+					handle_ert(os, t.asInput() + langopts, context);
+			} else {
+				lang = polyglossia2lyx(t.cs().substr(4, string::npos));
+				parse_text_attributes(p, os, FLAG_ITEM, outer,
+					                  context, "\\lang",
+					                  context.font.language, lang);
+			}
+		}
 
 		else if (t.cs() == "inputencoding") {
 			// nothing to write here
@@ -3521,7 +3718,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
 					     << ", result is " << to_utf8(s)
 					     << "+" << to_utf8(rem) << endl;
 				os << to_utf8(s);
-				for (set<string>::const_iterator it = req.begin(); it != req.end(); it++)
+				for (set<string>::const_iterator it = req.begin(); it != req.end(); ++it)
 					preamble.registerAutomaticallyLoadedPackage(*it);
 			} else
 				// we did not find a non-ert version
@@ -4128,7 +4325,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
 				os << to_utf8(s);
 				if (termination)
 					skip_spaces_braces(p);
-				for (set<string>::const_iterator it = req.begin(); it != req.end(); it++)
+				for (set<string>::const_iterator it = req.begin(); it != req.end(); ++it)
 					preamble.registerAutomaticallyLoadedPackage(*it);
 			}
 			//cerr << "#: " << t << " mode: " << mode << endl;