]> git.lyx.org Git - lyx.git/blobdiff - src/tex2lyx/text.cpp
tex2lyx: improve CJK handling
[lyx.git] / src / tex2lyx / text.cpp
index 6d32c8b742be4bf3d21222f11fa9145e0c53e305..62d5e8df9ebfaf80c6d3458a94bf37d5ffdba6af 100644 (file)
@@ -117,43 +117,11 @@ char const * const known_ref_commands[] = { "ref", "pageref", "vref",
 char const * const known_coded_ref_commands[] = { "ref", "pageref", "vref",
  "vpageref", "formatted", "eqref", 0 };
 
-/**
- * known polyglossia language names (including variants)
- */
-const char * const polyglossia_languages[] = {
-"albanian", "croatian", "hebrew", "norsk", "swedish", "amharic", "czech", "hindi",
-"nynorsk", "syriac", "arabic", "danish", "icelandic", "occitan", "tamil",
-"armenian", "divehi", "interlingua", "polish", "telugu", "asturian", "dutch",
-"irish", "portuges", "thai", "bahasai", "english", "italian", "romanian", "turkish",
-"bahasam", "esperanto", "lao", "russian", "turkmen", "basque", "estonian", "latin",
-"samin", "ukrainian", "bengali", "farsi", "latvian", "sanskrit", "urdu", "brazil",
-"brazilian", "finnish", "lithuanian", "scottish", "usorbian", "breton", "french",
-"lsorbian", "serbian", "vietnamese", "bulgarian", "galician", "magyar", "slovak",
-"welsh", "catalan", "german", "malayalam", "slovenian", "coptic", "greek",
-"marathi", "spanish",
-"american", "ancient", "australian", "british", "monotonic", "newzealand",
-"polytonic", 0};
-
-/**
- * the same as polyglossia_languages with .lyx names
- * please keep this in sync with polyglossia_languages line by line!
- */
-const char * const coded_polyglossia_languages[] = {
-"albanian", "croatian", "hebrew", "norsk", "swedish", "amharic", "czech", "hindi",
-"nynorsk", "syriac", "arabic_arabi", "danish", "icelandic", "occitan", "tamil",
-"armenian", "divehi", "interlingua", "polish", "telugu", "asturian", "dutch",
-"irish", "portuges", "thai", "bahasa", "english", "italian", "romanian", "turkish",
-"bahasam", "esperanto", "lao", "russian", "turkmen", "basque", "estonian", "latin",
-"samin", "ukrainian", "bengali", "farsi", "latvian", "sanskrit", "urdu", "brazilian",
-"brazilian", "finnish", "lithuanian", "scottish", "uppersorbian", "breton", "french",
-"lowersorbian", "serbian", "vietnamese", "bulgarian", "galician", "magyar", "slovak",
-"welsh", "catalan", "ngerman", "malayalam", "slovene", "coptic", "greek",
-"marathi", "spanish",
-"american", "ancientgreek", "australian", "british", "greek", "newzealand",
-"polutonikogreek", 0};
-
 /**
  * supported CJK encodings
+ * SJIS anf Bg5 cannot be supported as this is not
+ * supported by iconv
+ * JIS does not work with LyX's encoding conversion
  */
 const char * const supported_CJK_encodings[] = {
 "EUC-JP", "KS", "GB", "UTF8", 0};
@@ -1268,7 +1236,7 @@ void parse_environment(Parser & p, ostream & os, bool outer,
                }
        }
 
-       else if (is_known(name, polyglossia_languages)) {
+       else if (is_known(name, preamble.polyglossia_languages)) {
                // We must begin a new paragraph if not already done
                if (! parent_context.atParagraphStart()) {
                        parent_context.check_end_layout(os);
@@ -1276,7 +1244,7 @@ void parse_environment(Parser & p, ostream & os, bool outer,
                }
                // save the language in the context so that it is
                // handled by parse_text
-               parent_context.font.language = polyglossia2lyx(name);
+               parent_context.font.language = preamble.polyglossia2lyx(name);
                parse_text(p, os, FLAG_END, outer, parent_context);
                // Just in case the environment is empty
                parent_context.extra_stuff.erase();
@@ -1461,26 +1429,25 @@ void parse_environment(Parser & p, ostream & os, bool outer,
                eat_whitespace(p, os, parent_context, false);
                parent_context.check_end_layout(os);
                // store the encoding to be able to reset it
-               string const encoding_old = p.encoding_latex_;
+               string const encoding_old = p.getEncoding();
                string const encoding = p.getArg('{', '}');
-               // SJIS and BIG5 don't work with LaTeX according to the comment in unicode.cpp
+               // SJIS and Bg5 cammopt be handled by iconv
                // JIS does not work with LyX's encoding conversion
-               if (encoding != "SJIS" && encoding != "BIG5" && encoding != "JIS")
+               if (encoding != "Bg5" && encoding != "JIS" && encoding != "SJIS")
                        p.setEncoding(encoding);
                else
                        p.setEncoding("utf8");
                // LyX doesn't support the second argument so if
                // this is used we need to output everything as ERT
                string const mapping = p.getArg('{', '}');
-               if ( (!mapping.empty() && mapping != " ")
+               if ((!mapping.empty() && mapping != " ")
                        || (!is_known(encoding, supported_CJK_encodings))) {
                        parent_context.check_layout(os);
                        handle_ert(os, "\\begin{" + name + "}{" + encoding + "}{" + mapping + "}",
                                       parent_context);
-                       // we must parse the content as verbatim because e.g. SJIS can contain
+                       // we must parse the content as verbatim because e.g. JIS can contain
                        // normally invalid characters
                        string const s = p.plainEnvironment("CJK");
-                       string::const_iterator it2 = s.begin();
                        for (string::const_iterator it = s.begin(), et = s.end(); it != et; ++it) {
                                if (*it == '\\')
                                        handle_ert(os, "\\", parent_context);
@@ -1489,7 +1456,6 @@ void parse_environment(Parser & p, ostream & os, bool outer,
                                else 
                                        os << *it;
                        }
-                       p.skip_spaces();
                        handle_ert(os, "\\end{" + name + "}",
                                       parent_context);
                } else {
@@ -1501,7 +1467,7 @@ void parse_environment(Parser & p, ostream & os, bool outer,
                        parent_context.font.language = lang_old;
                        parent_context.new_paragraph(os);
                }
-               p.encoding_latex_ = encoding_old;
+               p.setEncoding(encoding_old);
                p.skip_spaces();
        }
 
@@ -2113,7 +2079,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
                   << "\\begin_layout Plain Layout\n"
                   << "This document contains text in Chinese, Japanese or Korean.\n"
                   << " It was therefore impossible for tex2lyx to set the correct document langue for your document."
-                  << " Please set in the document settings by yourself!\n"
+                  << " Please set the language manually in the document settings.\n"
                   << "\\end_layout\n";
                end_inset(os);
                have_CJK = false;
@@ -3574,7 +3540,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
                }
                
                else if (prefixIs(t.cs(), "text") 
-                        && is_known(t.cs().substr(4), polyglossia_languages)) {
+                        && is_known(t.cs().substr(4), preamble.polyglossia_languages)) {
                        // scheme is \textLANGUAGE{text} where LANGUAGE is in polyglossia_languages[]
                        string lang;
                        // We have to output the whole command if it has an option
@@ -3589,14 +3555,14 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
                                if (pos_var != string::npos && i == string::npos) {
                                        string variant;
                                        variant = langopts.substr(k + 1, langopts.length() - k - 2);
-                                       lang = polyglossia2lyx(variant);
+                                       lang = preamble.polyglossia2lyx(variant);
                                        parse_text_attributes(p, os, FLAG_ITEM, outer,
                                                                  context, "\\lang",
                                                                  context.font.language, lang);
                                } else
                                        handle_ert(os, t.asInput() + langopts, context);
                        } else {
-                               lang = polyglossia2lyx(t.cs().substr(4, string::npos));
+                               lang = preamble.polyglossia2lyx(t.cs().substr(4, string::npos));
                                parse_text_attributes(p, os, FLAG_ITEM, outer,
                                                          context, "\\lang",
                                                          context.font.language, lang);
@@ -3945,7 +3911,6 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
                                arg += p.getFullOpt();
                                eat_whitespace(p, os, context, false);
                                handle_ert(os, arg + '{', context);
-                               eat_whitespace(p, os, context, false);
                                parse_text(p, os, FLAG_ITEM, outer, context);
                                handle_ert(os, "}", context);
                        } else {
@@ -3958,7 +3923,6 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
                                else {
                                        eat_whitespace(p, os, context, false);
                                        handle_ert(os, "\\framebox{", context);
-                                       eat_whitespace(p, os, context, false);
                                        parse_text(p, os, FLAG_ITEM, outer, context);
                                        handle_ert(os, "}", context);
                                }
@@ -3975,7 +3939,6 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
                                arg += p.getFullOpt();
                                eat_whitespace(p, os, context, false);
                                handle_ert(os, arg + '{', context);
-                               eat_whitespace(p, os, context, false);
                                parse_text(p, os, FLAG_ITEM, outer, context);
                                handle_ert(os, "}", context);
                        } else