From 9dd1b7c578153b7138f27600a93a1dbedec48af9 Mon Sep 17 00:00:00 2001 From: Julien Rioux Date: Sun, 20 Jan 2013 01:52:30 +0100 Subject: [PATCH] Use the LyX name of encodings instead of the LaTeX names. - Use the LyX name of encodings instead of the LaTeX names. The LyX name must be unique, while the name used by LaTeX not necessarily, e.g. different packages might implement support for the same encoding. - Rename koi8 to koi8-r, so that the LyX and LaTeX names match. - Rename euc-jp-plain to euc-jp-platex, jis-plain to jis-platex and shift-jis-plain to shift-jis-platex. - Add utf8-platex encoding (fixes #8408). LyX file format incremented to 463. --- development/FORMAT | 8 +++ lib/encodings | 29 +++++++--- lib/languages | 4 +- lib/lyx2lyx/lyx_2_1.py | 88 ++++++++++++++++++++++++++++++- src/BufferParams.cpp | 6 +-- src/BufferParams.h | 4 +- src/frontends/qt4/GuiDocument.cpp | 4 +- src/support/unicode.cpp | 1 - src/tex2lyx/Preamble.cpp | 35 ++++++++---- src/tex2lyx/TODO.txt | 2 +- src/tex2lyx/tex2lyx.cpp | 4 +- src/version.h | 4 +- 12 files changed, 155 insertions(+), 34 deletions(-) diff --git a/development/FORMAT b/development/FORMAT index ef67c3cc6f..5d34fd3995 100644 --- a/development/FORMAT +++ b/development/FORMAT @@ -11,6 +11,14 @@ adjustments are made to tex2lyx and bugs are fixed in lyx2lyx. ----------------------- +2013-02-18 Julien Rioux + * Format incremented to 463: + - Use the LyX name of encodings instead of the LaTeX names. + The LyX name must be unique, while the name used by LaTeX + not necessarily, e.g. different packages might implement + support for the same encoding. + - Add utf8-platex encoding. + 2013-02-15 Jürgen Spitzmüller * Format incremented to 462: Support for recent libertine LaTeX fonts a.) Support Libertine Mono: diff --git a/lib/encodings b/lib/encodings index a5bd5ccd6d..c79db405be 100644 --- a/lib/encodings +++ b/lib/encodings @@ -3,7 +3,7 @@ # Note that you can only add singlebyte encodings to this file. # LyX does not support the output of multibyte encodings (e.g. utf16). -# It does support singlebyte encodings with variable with (e.g. utf8). +# It does support singlebyte encodings with variable width (e.g. utf8). # These are marked with the "variable" keyword. # Fixed width encodings are marked with the "fixed" keyword. # The code points of TeX control characters like {, } and \ can occur in the @@ -11,7 +11,20 @@ # set as document encodings and are marked with the "variableunsafe" keyword. # They are only needed for proper tex2lyx import. -# Syntax: Encoding fixed|variable|variableunsafe End +# Most encodings require loading a latex package such as "inputenc" or "CJK". +# There is no "japanese" latex package, rather this keyword indicates to LyX +# to switch the buffer format and use platex instead of standard (pdf)latex. +# In this case, TeX control characters in high bytes is not a problem. +# The invocation is platex -kanji= + +# Syntax: Encoding End + +# LyX name: Name used by the file format and in lib/languages. Must be unique! +# LaTeX name: Used in the latex export or passed to platex as command-line switch. +# GUI name: Displayed in document settings. +# iconv name: Used by iconv. +# width: One of fixed, variable, or variableunsafe (see above). +# package: One of none, inputenc, CJK, or japanese (see above). # encodings used by inputenc.sty @@ -116,7 +129,7 @@ End Encoding cp1257 cp1257 "Baltic (CP 1257)" CP1257 fixed inputenc End -Encoding koi8 koi8-r "Cyrillic (KOI8-R)" KOI8-R fixed inputenc +Encoding koi8-r koi8-r "Cyrillic (KOI8-R)" KOI8-R fixed inputenc End Encoding koi8-u koi8-u "Cyrillic (KOI8-U)" KOI8-U fixed inputenc @@ -184,12 +197,13 @@ End # Traditional Japanese TeX programs require the japanese package. # that is incompatible with CJK and inputenc. -Encoding euc-jp-plain euc "Japanese (non-CJK) (EUC-JP)" EUC-JP variable japanese +Encoding euc-jp-platex euc "Japanese (pLaTeX) (EUC-JP)" EUC-JP variable japanese End -Encoding jis-plain jis "Japanese (non-CJK) (JIS)" ISO-2022-JP variable japanese +Encoding jis-platex jis "Japanese (pLaTeX) (JIS)" ISO-2022-JP variable japanese End -# FIXME: Should use variableunsafe (would be a file format change) -Encoding shift-jis-plain sjis "Japanese (non-CJK) (SJIS)" CP932 variable japanese +Encoding shift-jis-platex sjis "Japanese (pLaTeX) (SJIS)" CP932 variable japanese +End +Encoding utf8-platex utf8 "Japanese (pLaTeX) (UTF8)" UTF-8 variable japanese End # This one needs hardcoded support, since the inputenc package does not know @@ -207,4 +221,3 @@ End # Pure 7bit ASCII encoding (partially hardcoded in LyX) Encoding ascii ascii "ASCII" ascii fixed none End - diff --git a/lib/languages b/lib/languages index 1022f3cdbe..a0ba7b8fe7 100644 --- a/lib/languages +++ b/lib/languages @@ -576,7 +576,7 @@ End Language japanese GuiName "Japanese" BabelName japanese - Encoding jis-plain + Encoding jis-platex LangCode ja_JP AsBabelOptions true Requires japanese @@ -773,7 +773,7 @@ Language russian BabelName russian PolyglossiaName russian QuoteStyle french - Encoding koi8 + Encoding koi8-r LangCode ru_RU End diff --git a/lib/lyx2lyx/lyx_2_1.py b/lib/lyx2lyx/lyx_2_1.py index 6c0b9c837f..bc4d23ba28 100644 --- a/lib/lyx2lyx/lyx_2_1.py +++ b/lib/lyx2lyx/lyx_2_1.py @@ -3546,6 +3546,90 @@ def revert_newframes(document): document.body[i : i + 1] = subst i = j +# known encodings that do not change their names (same LyX and LaTeX names) +known_enc_tuple = ("auto", "default", "ansinew", "applemac", "armscii8", "ascii", + "cp437", "cp437de", "cp850", "cp852", "cp855", "cp858", "cp862", "cp865", "cp866", + "cp1250", "cp1251", "cp1252", "cp1255", "cp1256", "cp1257", "koi8-r", "koi8-u", + "pt154", "pt254", "tis620-0", "utf8", "utf8x", "utf8-plain") + +def convert_encodings(document): + "Use the LyX names of the encodings instead of the LaTeX names." + LaTeX2LyX_enc_dict = { + "8859-6": "iso8859-6", + "8859-8": "iso8859-8", + "Bg5": "big5", + "euc": "euc-jp-platex", + "EUC-JP": "euc-jp", + "EUC-TW": "euc-tw", + "GB": "euc-cn", + "GBK": "gbk", + "iso88595": "iso8859-5", + "iso-8859-7": "iso8859-7", + "JIS": "jis", + "jis": "jis-platex", + "KS": "euc-kr", + "l7xenc": "iso8859-13", + "latin1": "iso8859-1", + "latin2": "iso8859-2", + "latin3": "iso8859-3", + "latin4": "iso8859-4", + "latin5": "iso8859-9", + "latin9": "iso8859-15", + "latin10": "iso8859-16", + "SJIS": "shift-jis", + "sjis": "shift-jis-platex", + "UTF8": "utf8-cjk" + } + i = find_token(document.header, "\\inputencoding" , 0) + if i == -1: + return + val = get_value(document.header, "\\inputencoding", i) + if val in LaTeX2LyX_enc_dict.keys(): + document.header[i] = "\\inputencoding %s" % LaTeX2LyX_enc_dict[val] + elif val not in known_enc_tuple: + document.warning("Ignoring unknown input encoding: `%s'" % val) + + +def revert_encodings(document): + """Revert to using the LaTeX names of the encodings instead of the LyX names. + Also revert utf8-platex to sjis, the language default when using Japanese. + """ + LyX2LaTeX_enc_dict = { + "big5": "Bg5", + "euc-cn": "GB", + "euc-kr": "KS", + "euc-jp": "EUC-JP", + "euc-jp-platex": "euc", + "euc-tw": "EUC-TW", + "gbk": "GBK", + "iso8859-1": "latin1", + "iso8859-2": "latin2", + "iso8859-3": "latin3", + "iso8859-4": "latin4", + "iso8859-5": "iso88595", + "iso8859-6": "8859-6", + "iso8859-7": "iso-8859-7", + "iso8859-8": "8859-8", + "iso8859-9": "latin5", + "iso8859-13": "l7xenc", + "iso8859-15": "latin9", + "iso8859-16": "latin10", + "jis": "JIS", + "jis-platex": "jis", + "shift-jis": "SJIS", + "shift-jis-platex": "sjis", + "utf8-cjk": "UTF8", + "utf8-platex": "sjis" + } + i = find_token(document.header, "\\inputencoding" , 0) + if i == -1: + return + val = get_value(document.header, "\\inputencoding", i) + if val in LyX2LaTeX_enc_dict.keys(): + document.header[i] = "\\inputencoding %s" % LyX2LaTeX_enc_dict[val] + elif val not in known_enc_tuple: + document.warning("Ignoring unknown input encoding: `%s'" % val) + def revert_IEEEtran_3(document): ''' @@ -3699,10 +3783,12 @@ convert = [ [459, []], [460, []], [461, []], - [462, []] + [462, []], + [463, [convert_encodings]], ] revert = [ + [462, [revert_encodings]], [461, [revert_new_libertines]], [460, [revert_kurier_fonts]], [459, [revert_IEEEtran_3]], diff --git a/src/BufferParams.cpp b/src/BufferParams.cpp index 0b0965c236..60129ab00c 100644 --- a/src/BufferParams.cpp +++ b/src/BufferParams.cpp @@ -2786,7 +2786,7 @@ void BufferParams::writeEncodingPreamble(otexstream & os, // do not load inputenc if japanese is used if (features.isRequired("japanese")) break; - os << "\\usepackage[" << from_ascii(inputenc) + os << "\\usepackage[" << from_ascii(encoding().latexName()) << "]{inputenc}\n"; break; case Encoding::CJK: @@ -2913,10 +2913,10 @@ Encoding const & BufferParams::encoding() const // This check will not work with XeTeX/LuaTeX and tex fonts. // Thus we have to reset the encoding in Buffer::makeLaTeXFile. if (useNonTeXFonts) - return *(encodings.fromLaTeXName("utf8-plain")); + return *(encodings.fromLyXName("utf8-plain")); if (inputenc == "auto" || inputenc == "default") return *language->encoding(); - Encoding const * const enc = encodings.fromLaTeXName(inputenc); + Encoding const * const enc = encodings.fromLyXName(inputenc); if (enc) return *enc; LYXERR0("Unknown inputenc value `" << inputenc diff --git a/src/BufferParams.h b/src/BufferParams.h index 1bff132187..b9e37510ca 100644 --- a/src/BufferParams.h +++ b/src/BufferParams.h @@ -284,10 +284,10 @@ public: IndicesList & indiceslist(); IndicesList const & indiceslist() const; /** - * The input encoding for LaTeX. This can be one of + * The LyX name of the input encoding for LaTeX. This can be one of * - \c auto: find out the input encoding from the used languages * - \c default: ditto - * - any encoding supported by the inputenc package + * - any encoding defined in the file lib/encodings * The encoding of the LyX file is always utf8 and has nothing to * do with this setting. * The difference between \c auto and \c default is that \c auto also diff --git a/src/frontends/qt4/GuiDocument.cpp b/src/frontends/qt4/GuiDocument.cpp index 1017d5d8f1..ff63977f00 100644 --- a/src/frontends/qt4/GuiDocument.cpp +++ b/src/frontends/qt4/GuiDocument.cpp @@ -2586,7 +2586,7 @@ void GuiDocument::applyView() for (; it != end; ++it) { if (qt_(it->guiName()) == enc_gui && !it->unsafe()) { - bp_.inputenc = it->latexName(); + bp_.inputenc = it->name(); found = true; break; } @@ -2993,7 +2993,7 @@ void GuiDocument::paramsToDialog() Encodings::const_iterator it = encodings.begin(); Encodings::const_iterator const end = encodings.end(); for (; it != end; ++it) { - if (it->latexName() == bp_.inputenc && + if (it->name() == bp_.inputenc && !it->unsafe()) { enc_gui = it->guiName(); break; diff --git a/src/support/unicode.cpp b/src/support/unicode.cpp index 36cb1800f2..95415a5538 100644 --- a/src/support/unicode.cpp +++ b/src/support/unicode.cpp @@ -377,7 +377,6 @@ int max_encoded_bytes(std::string const & encoding) // The CJK encodings use (different) multibyte representation as well. // All other encodings encode one UCS4 code point in one byte // (and can therefore only encode a subset of UCS4) - // Note that BIG5 and SJIS do not work with LaTeX (see lib/encodings). // Furthermore, all encodings that use shifting (like SJIS) do not work with // iconv_codecvt_facet. if (encoding == "UTF-8" || diff --git a/src/tex2lyx/Preamble.cpp b/src/tex2lyx/Preamble.cpp index aaa76841fe..9d72a5e77b 100644 --- a/src/tex2lyx/Preamble.cpp +++ b/src/tex2lyx/Preamble.cpp @@ -805,7 +805,7 @@ void Preamble::handle_package(Parser &p, string const & name, Encoding const * const enc = encodings.fromIconvName( p.getEncoding(), Encoding::japanese, false); if (enc) - h_inputencoding = enc->latexName(); + h_inputencoding = enc->name(); is_nonCJKJapanese = true; // in this case babel can be removed from the preamble registerAutomaticallyLoadedPackage("babel"); @@ -844,7 +844,7 @@ void Preamble::handle_package(Parser &p, string const & name, } else if (name == "CJKutf8") { - h_inputencoding = "UTF8"; + h_inputencoding = "utf8-cjk"; p.setEncoding("UTF-8"); registerAutomaticallyLoadedPackage("CJKutf8"); } @@ -863,14 +863,22 @@ void Preamble::handle_package(Parser &p, string const & name, // h_inputencoding is only set when there is not more than one // inputenc option because otherwise h_inputencoding must be // set to "auto" (the default encoding of the document language) - // Therefore check for the "," character. + // Therefore check that exactly one option is passed to inputenc. // It is also only set when there is not more than one babel // language option. - if (opts.find(",") == string::npos && one_language == true) - h_inputencoding = opts; - if (!options.empty()) - p.setEncoding(options.back(), Encoding::inputenc); - options.clear(); + if (!options.empty()) { + string const encoding = options.back(); + Encoding const * const enc = encodings.fromLaTeXName( + encoding, Encoding::inputenc, true); + if (!enc) + cerr << "Unknown encoding " << encoding << ". Ignoring." << std::endl; + else { + if (!enc->unsafe() && options.size() == 1 && one_language == true) + h_inputencoding = enc->name(); + p.setEncoding(enc->iconvName()); + } + options.clear(); + } } else if (name == "srcltx") { @@ -1624,8 +1632,15 @@ void Preamble::parse(Parser & p, string const & forceclass, else if (t.cs() == "inputencoding") { string const encoding = p.getArg('{','}'); - h_inputencoding = encoding; - p.setEncoding(encoding, Encoding::inputenc); + Encoding const * const enc = encodings.fromLaTeXName( + encoding, Encoding::inputenc, true); + if (!enc) + cerr << "Unknown encoding " << encoding << ". Ignoring." << std::endl; + else { + if (!enc->unsafe()) + h_inputencoding = enc->name(); + p.setEncoding(enc->iconvName()); + } } else if (t.cs() == "newenvironment") { diff --git a/src/tex2lyx/TODO.txt b/src/tex2lyx/TODO.txt index 8754a29aa1..ad144387b1 100644 --- a/src/tex2lyx/TODO.txt +++ b/src/tex2lyx/TODO.txt @@ -90,10 +90,10 @@ Format LaTeX feature LyX feature \usepackage[scale|scaled=$val]{biolinum-type1} \font_sans \font_sf_scale +463 General * Use the language information provided by Language.cpp and the languages file (for babel/lyx/polyglossia name, quote style etc.) instead of hardcoding this information in Preamble.cpp. - diff --git a/src/tex2lyx/tex2lyx.cpp b/src/tex2lyx/tex2lyx.cpp index 3f98280cbe..9239d30984 100644 --- a/src/tex2lyx/tex2lyx.cpp +++ b/src/tex2lyx/tex2lyx.cpp @@ -840,8 +840,8 @@ bool tex2lyx(idocstream & is, ostream & os, string encoding) if (preamble.inputencoding() == "auto") encoding = "ISO8859-1"; else { - Encoding const * const enc = encodings.fromLaTeXName( - preamble.inputencoding(), Encoding::any, true); + Encoding const * const enc = encodings.fromLyXName( + preamble.inputencoding(), true); encoding = enc->iconvName(); } } diff --git a/src/version.h b/src/version.h index 8eebe850ff..ab0ee1b39a 100644 --- a/src/version.h +++ b/src/version.h @@ -30,8 +30,8 @@ extern char const * const lyx_version_info; // Do not remove the comment below, so we get merge conflict in // independent branches. Instead add your own. -#define LYX_FORMAT_LYX 462 // spitz: support for the newest libertine fonts -#define LYX_FORMAT_TEX2LYX 462 // spitz: support for the newest libertine fonts +#define LYX_FORMAT_LYX 463 // jrioux: encodings renaming +#define LYX_FORMAT_TEX2LYX 463 // jrioux: encodings renaming #if LYX_FORMAT_TEX2LYX != LYX_FORMAT_LYX #ifndef _MSC_VER -- 2.39.5