X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=src%2Ftex2lyx%2Ftext.cpp;h=62d5e8df9ebfaf80c6d3458a94bf37d5ffdba6af;hb=fe5a4c8c81ef2e5dca844732c7f24545b5edaca9;hp=cdbc0e412a0355ed25cb93a8833a043221eaddcf;hpb=f1f63fcad0a5512dc277fd8e401ac6cd5736c699;p=lyx.git diff --git a/src/tex2lyx/text.cpp b/src/tex2lyx/text.cpp index cdbc0e412a..62d5e8df9e 100644 --- a/src/tex2lyx/text.cpp +++ b/src/tex2lyx/text.cpp @@ -118,42 +118,27 @@ char const * const known_coded_ref_commands[] = { "ref", "pageref", "vref", "vpageref", "formatted", "eqref", 0 }; /** - * known polyglossia language names (inluding synomyms) + * supported CJK encodings + * SJIS anf Bg5 cannot be supported as this is not + * supported by iconv + * JIS does not work with LyX's encoding conversion */ -const char * const polyglossia_languages[] = { -"albanian", "croatian", "hebrew", "norsk", "swedish", "amharic", "czech", "hindi", -"nynorsk", "syriac", "arabic", "danish", "icelandic", "occitan", "tamil", -"armenian", "divehi", "interlingua", "polish", "telugu", "asturian", "dutch", -"irish", "portuges", "thai", "bahasai", "english", "italian", "romanian", "turkish", -"bahasam", "esperanto", "lao", "russian", "turkmen", "basque", "estonian", "latin", -"samin", "ukrainian", "bengali", "farsi", "latvian", "sanskrit", "urdu", "brazil", -"brazilian", "finnish", "lithuanian", "scottish", "usorbian", "breton", "french", -"lsorbian", "serbian", "vietnamese", "bulgarian", "galician", "magyar", "slovak", -"welsh", "catalan", "german", "malayalam", "slovenian", "coptic", "greek", -"marathi", "spanish", 0}; +const char * const supported_CJK_encodings[] = { +"EUC-JP", "KS", "GB", "UTF8", 0}; /** - * the same as polyglossia_languages with .lyx names - * please keep this in sync with polyglossia_languages line by line! + * the same as supported_CJK_encodings with their corresponding LyX language name + * please keep this in sync with supported_CJK_encodings line by line! */ -const char * const coded_polyglossia_languages[] = { -"albanian", "croatian", "hebrew", "norsk", "swedish", "amharic", "czech", "hindi", -"nynorsk", "syriac", "arabic_arabi", "danish", "icelandic", "occitan", "tamil", -"armenian", "divehi", "interlingua", "polish", "telugu", "asturian", "dutch", -"irish", "portuges", "thai", "bahasa", "english", "italian", "romanian", "turkish", -"bahasam", "esperanto", "lao", "russian", "turkmen", "basque", "estonian", "latin", -"samin", "ukrainian", "bengali", "farsi", "latvian", "sanskrit", "urdu", "brazilian", -"brazilian", "finnish", "lithuanian", "scottish", "uppersorbian", "breton", "french", -"lowersorbian", "serbian", "vietnamese", "bulgarian", "galician", "magyar", "slovak", -"welsh", "catalan", "ngerman", "malayalam", "slovene", "coptic", "greek", -"marathi", "spanish", 0}; - -string polyglossia2lyx(string const & language) +const char * const coded_supported_CJK_encodings[] = { +"japanese-cjk", "korean", "chinese-simplified", "chinese-traditional", 0}; + +string CJK2lyx(string const & encoding) { - char const * const * where = is_known(language, polyglossia_languages); + char const * const * where = is_known(encoding, supported_CJK_encodings); if (where) - return coded_polyglossia_languages[where - polyglossia_languages]; - return language; + return coded_supported_CJK_encodings[where - supported_CJK_encodings]; + return encoding; } /*! @@ -1251,7 +1236,7 @@ void parse_environment(Parser & p, ostream & os, bool outer, } } - else if (is_known(name, polyglossia_languages)) { + else if (is_known(name, preamble.polyglossia_languages)) { // We must begin a new paragraph if not already done if (! parent_context.atParagraphStart()) { parent_context.check_end_layout(os); @@ -1259,7 +1244,7 @@ void parse_environment(Parser & p, ostream & os, bool outer, } // save the language in the context so that it is // handled by parse_text - parent_context.font.language = polyglossia2lyx(name); + parent_context.font.language = preamble.polyglossia2lyx(name); parse_text(p, os, FLAG_END, outer, parent_context); // Just in case the environment is empty parent_context.extra_stuff.erase(); @@ -1437,6 +1422,55 @@ void parse_environment(Parser & p, ostream & os, bool outer, os << "\n\\begin_layout Standard\n"; } + else if (name == "CJK") { + // the scheme is \begin{CJK}{encoding}{mapping}{text} + // It is impossible to decide if a CJK environment was in its own paragraph or within + // a line. We therefore always assume a paragraph since the latter is a rare case. + eat_whitespace(p, os, parent_context, false); + parent_context.check_end_layout(os); + // store the encoding to be able to reset it + string const encoding_old = p.getEncoding(); + string const encoding = p.getArg('{', '}'); + // SJIS and Bg5 cammopt be handled by iconv + // JIS does not work with LyX's encoding conversion + if (encoding != "Bg5" && encoding != "JIS" && encoding != "SJIS") + p.setEncoding(encoding); + else + p.setEncoding("utf8"); + // LyX doesn't support the second argument so if + // this is used we need to output everything as ERT + string const mapping = p.getArg('{', '}'); + if ((!mapping.empty() && mapping != " ") + || (!is_known(encoding, supported_CJK_encodings))) { + parent_context.check_layout(os); + handle_ert(os, "\\begin{" + name + "}{" + encoding + "}{" + mapping + "}", + parent_context); + // we must parse the content as verbatim because e.g. JIS can contain + // normally invalid characters + string const s = p.plainEnvironment("CJK"); + for (string::const_iterator it = s.begin(), et = s.end(); it != et; ++it) { + if (*it == '\\') + handle_ert(os, "\\", parent_context); + else if (*it == '$') + handle_ert(os, "$", parent_context); + else + os << *it; + } + handle_ert(os, "\\end{" + name + "}", + parent_context); + } else { + string const lang = CJK2lyx(encoding); + // store the language because we must reset it at the end + string const lang_old = parent_context.font.language; + parent_context.font.language = lang; + parse_text_in_inset(p, os, FLAG_END, outer, parent_context); + parent_context.font.language = lang_old; + parent_context.new_paragraph(os); + } + p.setEncoding(encoding_old); + p.skip_spaces(); + } + else if (name == "lyxgreyedout") { eat_whitespace(p, os, parent_context, false); parent_context.check_layout(os); @@ -2033,6 +2067,43 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, while (p.good()) { Token const & t = p.get_token(); + // it is impossible to determine the correct document language if CJK is used. + // Therefore write a note at the beginning of the document + if (have_CJK) { + context.check_layout(os); + begin_inset(os, "Note Note\n"); + os << "status open\n\\begin_layout Plain Layout\n" + << "\\series bold\n" + << "Important information:\n" + << "\\end_layout\n\n" + << "\\begin_layout Plain Layout\n" + << "This document contains text in Chinese, Japanese or Korean.\n" + << " It was therefore impossible for tex2lyx to set the correct document langue for your document." + << " Please set the language manually in the document settings.\n" + << "\\end_layout\n"; + end_inset(os); + have_CJK = false; + } + + // it is impossible to determine the correct encoding for non-CJK Japanese. + // Therefore write a note at the beginning of the document + if (is_nonCJKJapanese) { + context.check_layout(os); + begin_inset(os, "Note Note\n"); + os << "status open\n\\begin_layout Plain Layout\n" + << "\\series bold\n" + << "Important information:\n" + << "\\end_layout\n\n" + << "\\begin_layout Plain Layout\n" + << "This document is in Japanese (non-CJK).\n" + << " It was therefore impossible for tex2lyx to determine the correct encoding." + << " The encoding EUC-JP was assumed. If this is incorrect, please set the correct" + << " encoding in the document settings.\n" + << "\\end_layout\n"; + end_inset(os); + is_nonCJKJapanese = false; + } + #ifdef FILEDEBUG debugToken(cerr, t, flags); #endif @@ -3468,17 +3539,34 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, context.font.language, lang); } - else if (is_known(t.cs().substr(4, string::npos), polyglossia_languages)) { + else if (prefixIs(t.cs(), "text") + && is_known(t.cs().substr(4), preamble.polyglossia_languages)) { // scheme is \textLANGUAGE{text} where LANGUAGE is in polyglossia_languages[] - string const lang = polyglossia2lyx(t.cs().substr(4, string::npos)); - // FIXME: we have to output the whole command if it has an option - // because lyX doesn't support this yet, see bug #8214 - if (p.hasOpt()) - handle_ert(os, t.asInput() + p.getOpt(), context); - else + string lang; + // We have to output the whole command if it has an option + // because LyX doesn't support this yet, see bug #8214, + // only if there is a single option specifying a variant, we can handle it. + if (p.hasOpt()) { + string langopts = p.getOpt(); + // check if the option contains a variant, if yes, extract it + string::size_type pos_var = langopts.find("variant"); + string::size_type i = langopts.find(','); + string::size_type k = langopts.find('=', pos_var); + if (pos_var != string::npos && i == string::npos) { + string variant; + variant = langopts.substr(k + 1, langopts.length() - k - 2); + lang = preamble.polyglossia2lyx(variant); + parse_text_attributes(p, os, FLAG_ITEM, outer, + context, "\\lang", + context.font.language, lang); + } else + handle_ert(os, t.asInput() + langopts, context); + } else { + lang = preamble.polyglossia2lyx(t.cs().substr(4, string::npos)); parse_text_attributes(p, os, FLAG_ITEM, outer, - context, "\\lang", - context.font.language, lang); + context, "\\lang", + context.font.language, lang); + } } else if (t.cs() == "inputencoding") { @@ -3823,14 +3911,21 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, arg += p.getFullOpt(); eat_whitespace(p, os, context, false); handle_ert(os, arg + '{', context); - eat_whitespace(p, os, context, false); parse_text(p, os, FLAG_ITEM, outer, context); handle_ert(os, "}", context); } else { string special = p.getFullOpt(); special += p.getOpt(); - parse_outer_box(p, os, FLAG_ITEM, outer, - context, t.cs(), special); + // LyX does not yet support \framebox without any option + if (!special.empty()) + parse_outer_box(p, os, FLAG_ITEM, outer, + context, t.cs(), special); + else { + eat_whitespace(p, os, context, false); + handle_ert(os, "\\framebox{", context); + parse_text(p, os, FLAG_ITEM, outer, context); + handle_ert(os, "}", context); + } } } @@ -3844,7 +3939,6 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, arg += p.getFullOpt(); eat_whitespace(p, os, context, false); handle_ert(os, arg + '{', context); - eat_whitespace(p, os, context, false); parse_text(p, os, FLAG_ITEM, outer, context); handle_ert(os, "}", context); } else