From 2eea1590b1016d873608e6b86d2a8dd08f01ed65 Mon Sep 17 00:00:00 2001 From: Julien Rioux Date: Sat, 19 Jan 2013 19:47:15 +0100 Subject: [PATCH] Separation of the various names for encodings. Provide functions for translating to the LyX name of an encoding from either a LaTeX name or an Iconv name, with the possibility to specify the package. This is in anticipation of changing to use the LyX name of the encoding in the .lyx file format and allowing multiple lib/encodings entries to have the same LaTeX name (but different packages!). The tex2lyx parser needs to worry about the iconv name of the input encoding, so store that instead of the latex name. --- src/Encoding.cpp | 22 +++++++++++++++++----- src/Encoding.h | 19 ++++++++++++------- src/Layout.cpp | 1 - src/Text.cpp | 1 - src/tex2lyx/Parser.cpp | 20 +++++++++++++------- src/tex2lyx/Parser.h | 13 ++++++++----- src/tex2lyx/Preamble.cpp | 11 ++++++----- src/tex2lyx/tex2lyx.cpp | 24 ++++++++++++++++-------- src/tex2lyx/tex2lyx.h | 2 +- src/tex2lyx/text.cpp | 12 ++++++------ 10 files changed, 79 insertions(+), 46 deletions(-) diff --git a/src/Encoding.cpp b/src/Encoding.cpp index 47f97e9dfa..3a27a7bb28 100644 --- a/src/Encoding.cpp +++ b/src/Encoding.cpp @@ -37,6 +37,8 @@ using namespace lyx::support; namespace lyx { +int const Encoding::any; + Encodings encodings; Encodings::MathCommandSet Encodings::mathcmd; @@ -852,7 +854,7 @@ Encodings::fromLyXName(string const & name, bool allowUnsafe) const Encoding const * -Encodings::fromLaTeXName(string const & n, bool allowUnsafe) const +Encodings::fromLaTeXName(string const & n, int const & p, bool allowUnsafe) const { string name = n; // FIXME: if we have to test for too many of these synonyms, @@ -867,11 +869,21 @@ Encodings::fromLaTeXName(string const & n, bool allowUnsafe) const // most at the top of lib/encodings. EncodingList::const_iterator const end = encodinglist.end(); for (EncodingList::const_iterator it = encodinglist.begin(); it != end; ++it) - if (it->second.latexName() == name) { - if (!allowUnsafe && it->second.unsafe()) - return 0; + if ((it->second.latexName() == name) && (it->second.package() & p) + && (!it->second.unsafe() || allowUnsafe)) + return &it->second; + return 0; +} + + +Encoding const * +Encodings::fromIconvName(string const & n, int const & p, bool allowUnsafe) const +{ + EncodingList::const_iterator const end = encodinglist.end(); + for (EncodingList::const_iterator it = encodinglist.begin(); it != end; ++it) + if ((it->second.iconvName() == n) && (it->second.package() & p) + && (!it->second.unsafe() || allowUnsafe)) return &it->second; - } return 0; } diff --git a/src/Encoding.h b/src/Encoding.h index e0480df98c..fb36e72651 100644 --- a/src/Encoding.h +++ b/src/Encoding.h @@ -44,11 +44,13 @@ class Encoding { public: /// Which LaTeX package handles this encoding? enum Package { - none, - inputenc, - CJK, - japanese + none = 1, + inputenc = 2, + CJK = 4, + japanese = 8 }; + /// Represent any of the above packages + static int const any = -1; /// Encoding() {} /// @@ -172,9 +174,12 @@ public: /// Get encoding from LyX name \p name Encoding const * fromLyXName(std::string const & name, bool allowUnsafe = false) const; - /// Get encoding from LaTeX name \p name - Encoding const * - fromLaTeXName(std::string const & name, bool allowUnsafe = false) const; + /// Get encoding from LaTeX name \p name and package \p package + Encoding const * fromLaTeXName(std::string const & name, + int const & package = Encoding::any, bool allowUnsafe = false) const; + /// Get encoding from iconv name \p name and package \p package + Encoding const * fromIconvName(std::string const & name, + int const & package = Encoding::any, bool allowUnsafe = false) const; /// const_iterator begin() const { return encodinglist.begin(); } diff --git a/src/Layout.cpp b/src/Layout.cpp index a3f2b103c6..78485d4996 100644 --- a/src/Layout.cpp +++ b/src/Layout.cpp @@ -13,7 +13,6 @@ #include #include "Layout.h" -#include "Encoding.h" #include "FontInfo.h" #include "Language.h" #include "Lexer.h" diff --git a/src/Text.cpp b/src/Text.cpp index 48f8c82491..683a8582ac 100644 --- a/src/Text.cpp +++ b/src/Text.cpp @@ -30,7 +30,6 @@ #include "Cursor.h" #include "CutAndPaste.h" #include "DispatchResult.h" -#include "Encoding.h" #include "ErrorList.h" #include "FuncRequest.h" #include "factory.h" diff --git a/src/tex2lyx/Parser.cpp b/src/tex2lyx/Parser.cpp index 6309ab4379..6f7dbf31e9 100644 --- a/src/tex2lyx/Parser.cpp +++ b/src/tex2lyx/Parser.cpp @@ -158,7 +158,7 @@ void debugToken(std::ostream & os, Token const & t, unsigned int flags) Parser::Parser(idocstream & is) - : lineno_(0), pos_(0), iss_(0), is_(is), encoding_latex_("utf8") + : lineno_(0), pos_(0), iss_(0), is_(is), encoding_iconv_("UTF-8") { } @@ -166,7 +166,7 @@ Parser::Parser(idocstream & is) Parser::Parser(string const & s) : lineno_(0), pos_(0), iss_(new idocstringstream(from_utf8(s))), is_(*iss_), - encoding_latex_("utf8") + encoding_iconv_("UTF-8") { } @@ -177,20 +177,26 @@ Parser::~Parser() } -void Parser::setEncoding(std::string const & e) +void Parser::setEncoding(std::string const & e, int const & p) { // We may (and need to) use unsafe encodings here: Since the text is // converted to unicode while reading from is_, we never see text in // the original encoding of the parser, but operate on utf8 strings // instead. Therefore, we cannot misparse high bytes as {, } or \\. - Encoding const * enc = encodings.fromLaTeXName(e, true); + Encoding const * const enc = encodings.fromLaTeXName(e, p, true); if (!enc) { cerr << "Unknown encoding " << e << ". Ignoring." << std::endl; return; } - //cerr << "setting encoding to " << enc->iconvName() << std::endl; - is_ << lyx::setEncoding(enc->iconvName()); - encoding_latex_ = e; + setEncoding(enc->iconvName()); +} + + +void Parser::setEncoding(std::string const & e) +{ + //cerr << "setting encoding to " << e << std::endl; + is_ << lyx::setEncoding(e); + encoding_iconv_ = e; } diff --git a/src/tex2lyx/Parser.h b/src/tex2lyx/Parser.h index 59a89d5a75..18a08ebe86 100644 --- a/src/tex2lyx/Parser.h +++ b/src/tex2lyx/Parser.h @@ -135,10 +135,13 @@ public: /// ~Parser(); - /// change the latex encoding of the input stream + /// change the iconv encoding of the input stream + /// according to the latex encoding and package + void setEncoding(std::string const & encoding, int const & package); + /// change the iconv encoding of the input stream void setEncoding(std::string const & encoding); - /// get the current latex encoding of the input stream - std::string getEncoding() const { return encoding_latex_; } + /// get the current iconv encoding of the input stream + std::string getEncoding() const { return encoding_iconv_; } /// int lineno() const { return lineno_; } @@ -271,8 +274,8 @@ private: idocstringstream * iss_; /// idocstream & is_; - /// latex name of the current encoding - std::string encoding_latex_; + /// iconv name of the current encoding + std::string encoding_iconv_; }; diff --git a/src/tex2lyx/Preamble.cpp b/src/tex2lyx/Preamble.cpp index 0c523c7f5d..4e976ec13f 100644 --- a/src/tex2lyx/Preamble.cpp +++ b/src/tex2lyx/Preamble.cpp @@ -16,6 +16,7 @@ #include "Preamble.h" #include "tex2lyx.h" +#include "Encoding.h" #include "LayoutFile.h" #include "Layout.h" #include "Lexer.h" @@ -651,7 +652,7 @@ void Preamble::handle_package(Parser &p, string const & name, h_use_non_tex_fonts = "true"; registerAutomaticallyLoadedPackage("fontspec"); if (h_inputencoding == "auto") - p.setEncoding("utf8"); + p.setEncoding("UTF-8"); } // roman fonts @@ -756,7 +757,7 @@ void Preamble::handle_package(Parser &p, string const & name, xetex = true; registerAutomaticallyLoadedPackage("xunicode"); if (h_inputencoding == "auto") - p.setEncoding("utf8"); + p.setEncoding("UTF-8"); } else if (name == "CJK") { @@ -769,7 +770,7 @@ void Preamble::handle_package(Parser &p, string const & name, else if (name == "CJKutf8") { h_inputencoding = "UTF8"; - p.setEncoding(h_inputencoding); + p.setEncoding("UTF-8"); registerAutomaticallyLoadedPackage("CJKutf8"); } @@ -793,7 +794,7 @@ void Preamble::handle_package(Parser &p, string const & name, if (opts.find(",") == string::npos && one_language == true) h_inputencoding = opts; if (!options.empty()) - p.setEncoding(options.back()); + p.setEncoding(options.back(), Encoding::inputenc); options.clear(); } @@ -1421,7 +1422,7 @@ void Preamble::parse(Parser & p, string const & forceclass, else if (t.cs() == "inputencoding") { string const encoding = p.getArg('{','}'); h_inputencoding = encoding; - p.setEncoding(encoding); + p.setEncoding(encoding, Encoding::inputenc); } else if (t.cs() == "newenvironment") { diff --git a/src/tex2lyx/tex2lyx.cpp b/src/tex2lyx/tex2lyx.cpp index ea20561f08..3f98280cbe 100644 --- a/src/tex2lyx/tex2lyx.cpp +++ b/src/tex2lyx/tex2lyx.cpp @@ -833,14 +833,17 @@ bool tex2lyx(idocstream & is, ostream & os, string encoding) { // Set a sensible default encoding. // This is used until an encoding command is found. - // For child documents use the encoding of the master, else latin1, - // since latin1 does not cause an iconv error if the actual encoding - // is different (bug 7509). + // For child documents use the encoding of the master, else ISO8859-1, + // (formerly known by its latex name latin1), since ISO8859-1 does not + // cause an iconv error if the actual encoding is different (bug 7509). if (encoding.empty()) { if (preamble.inputencoding() == "auto") - encoding = "latin1"; - else - encoding = preamble.inputencoding(); + encoding = "ISO8859-1"; + else { + Encoding const * const enc = encodings.fromLaTeXName( + preamble.inputencoding(), Encoding::any, true); + encoding = enc->iconvName(); + } } Parser p(is); @@ -1077,8 +1080,13 @@ int main(int argc, char * argv[]) return EXIT_FAILURE; } encodings.read(enc_path, symbols_path); - if (!default_encoding.empty() && !encodings.fromLaTeXName(default_encoding)) - error_message("Unknown LaTeX encoding `" + default_encoding + "'"); + if (!default_encoding.empty()) { + Encoding const * const enc = encodings.fromLaTeXName( + default_encoding, Encoding::any, true); + if (!enc) + error_message("Unknown LaTeX encoding `" + default_encoding + "'"); + default_encoding = enc->iconvName(); + } // Load the layouts LayoutFileList::get().read(); diff --git a/src/tex2lyx/tex2lyx.h b/src/tex2lyx/tex2lyx.h index 94f2497fac..020682cec8 100644 --- a/src/tex2lyx/tex2lyx.h +++ b/src/tex2lyx/tex2lyx.h @@ -195,7 +195,7 @@ extern bool skipChildren(); /*! * Reads tex input from \a infilename and writes lyx output to \a outfilename. - * The (latex) encoding can be provided as \a encoding. + * The iconv name of the encoding can be provided as \a encoding. * Uses some common settings for the preamble, so this should only * be used more than once for included documents. * Caution: Overwrites the existing preamble settings if the new document diff --git a/src/tex2lyx/text.cpp b/src/tex2lyx/text.cpp index ae44d85be5..9d90c2d2e0 100644 --- a/src/tex2lyx/text.cpp +++ b/src/tex2lyx/text.cpp @@ -1465,10 +1465,10 @@ void parse_environment(Parser & p, ostream & os, bool outer, // you set buggy_encoding to false for JIS. bool const buggy_encoding = encoding == "JIS"; if (!buggy_encoding) - p.setEncoding(encoding); + p.setEncoding(encoding, Encoding::CJK); else { // FIXME: This will read garbage, since the data is not encoded in utf8. - p.setEncoding("utf8"); + p.setEncoding("UTF-8"); } // LyX only supports the same mapping for all CJK // environments, so we might need to output everything as ERT @@ -3706,7 +3706,7 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, else if (t.cs() == "inputencoding") { // nothing to write here string const enc = subst(p.verbatim_item(), "\n", " "); - p.setEncoding(enc); + p.setEncoding(enc, Encoding::inputenc); } else if ((where = is_known(t.cs(), known_special_chars))) { @@ -4505,7 +4505,7 @@ string guessLanguage(Parser & p, string const & lang) if (t.cat() == catEscape) { if (t.cs() == "inputencoding") { string const enc = subst(p.verbatim_item(), "\n", " "); - p.setEncoding(enc); + p.setEncoding(enc, Encoding::inputenc); continue; } if (t.cs() != "begin") @@ -4535,9 +4535,9 @@ string guessLanguage(Parser & p, string const & lang) char const * const * const where = is_known(encoding, supported_CJK_encodings); if (where) - p.setEncoding(encoding); + p.setEncoding(encoding, Encoding::CJK); else - p.setEncoding("utf8"); + p.setEncoding("UTF-8"); string const text = p.verbatimEnvironment("CJK"); p.setEncoding(encoding_old); p.skip_spaces(); -- 2.39.2