namespace lyx {
+int const Encoding::any;
+
Encodings encodings;
Encodings::MathCommandSet Encodings::mathcmd;
Encoding const *
-Encodings::fromLaTeXName(string const & n, bool allowUnsafe) const
+Encodings::fromLaTeXName(string const & n, int const & p, bool allowUnsafe) const
{
string name = n;
// FIXME: if we have to test for too many of these synonyms,
// most at the top of lib/encodings.
EncodingList::const_iterator const end = encodinglist.end();
for (EncodingList::const_iterator it = encodinglist.begin(); it != end; ++it)
- if (it->second.latexName() == name) {
- if (!allowUnsafe && it->second.unsafe())
- return 0;
+ if ((it->second.latexName() == name) && (it->second.package() & p)
+ && (!it->second.unsafe() || allowUnsafe))
+ return &it->second;
+ return 0;
+}
+
+
+Encoding const *
+Encodings::fromIconvName(string const & n, int const & p, bool allowUnsafe) const
+{
+ EncodingList::const_iterator const end = encodinglist.end();
+ for (EncodingList::const_iterator it = encodinglist.begin(); it != end; ++it)
+ if ((it->second.iconvName() == n) && (it->second.package() & p)
+ && (!it->second.unsafe() || allowUnsafe))
return &it->second;
- }
return 0;
}
public:
/// Which LaTeX package handles this encoding?
enum Package {
- none,
- inputenc,
- CJK,
- japanese
+ none = 1,
+ inputenc = 2,
+ CJK = 4,
+ japanese = 8
};
+ /// Represent any of the above packages
+ static int const any = -1;
///
Encoding() {}
///
/// Get encoding from LyX name \p name
Encoding const *
fromLyXName(std::string const & name, bool allowUnsafe = false) const;
- /// Get encoding from LaTeX name \p name
- Encoding const *
- fromLaTeXName(std::string const & name, bool allowUnsafe = false) const;
+ /// Get encoding from LaTeX name \p name and package \p package
+ Encoding const * fromLaTeXName(std::string const & name,
+ int const & package = Encoding::any, bool allowUnsafe = false) const;
+ /// Get encoding from iconv name \p name and package \p package
+ Encoding const * fromIconvName(std::string const & name,
+ int const & package = Encoding::any, bool allowUnsafe = false) const;
///
const_iterator begin() const { return encodinglist.begin(); }
#include <config.h>
#include "Layout.h"
-#include "Encoding.h"
#include "FontInfo.h"
#include "Language.h"
#include "Lexer.h"
#include "Cursor.h"
#include "CutAndPaste.h"
#include "DispatchResult.h"
-#include "Encoding.h"
#include "ErrorList.h"
#include "FuncRequest.h"
#include "factory.h"
Parser::Parser(idocstream & is)
- : lineno_(0), pos_(0), iss_(0), is_(is), encoding_latex_("utf8")
+ : lineno_(0), pos_(0), iss_(0), is_(is), encoding_iconv_("UTF-8")
{
}
Parser::Parser(string const & s)
: lineno_(0), pos_(0),
iss_(new idocstringstream(from_utf8(s))), is_(*iss_),
- encoding_latex_("utf8")
+ encoding_iconv_("UTF-8")
{
}
}
-void Parser::setEncoding(std::string const & e)
+void Parser::setEncoding(std::string const & e, int const & p)
{
// We may (and need to) use unsafe encodings here: Since the text is
// converted to unicode while reading from is_, we never see text in
// the original encoding of the parser, but operate on utf8 strings
// instead. Therefore, we cannot misparse high bytes as {, } or \\.
- Encoding const * enc = encodings.fromLaTeXName(e, true);
+ Encoding const * const enc = encodings.fromLaTeXName(e, p, true);
if (!enc) {
cerr << "Unknown encoding " << e << ". Ignoring." << std::endl;
return;
}
- //cerr << "setting encoding to " << enc->iconvName() << std::endl;
- is_ << lyx::setEncoding(enc->iconvName());
- encoding_latex_ = e;
+ setEncoding(enc->iconvName());
+}
+
+
+void Parser::setEncoding(std::string const & e)
+{
+ //cerr << "setting encoding to " << e << std::endl;
+ is_ << lyx::setEncoding(e);
+ encoding_iconv_ = e;
}
///
~Parser();
- /// change the latex encoding of the input stream
+ /// change the iconv encoding of the input stream
+ /// according to the latex encoding and package
+ void setEncoding(std::string const & encoding, int const & package);
+ /// change the iconv encoding of the input stream
void setEncoding(std::string const & encoding);
- /// get the current latex encoding of the input stream
- std::string getEncoding() const { return encoding_latex_; }
+ /// get the current iconv encoding of the input stream
+ std::string getEncoding() const { return encoding_iconv_; }
///
int lineno() const { return lineno_; }
idocstringstream * iss_;
///
idocstream & is_;
- /// latex name of the current encoding
- std::string encoding_latex_;
+ /// iconv name of the current encoding
+ std::string encoding_iconv_;
};
#include "Preamble.h"
#include "tex2lyx.h"
+#include "Encoding.h"
#include "LayoutFile.h"
#include "Layout.h"
#include "Lexer.h"
h_use_non_tex_fonts = "true";
registerAutomaticallyLoadedPackage("fontspec");
if (h_inputencoding == "auto")
- p.setEncoding("utf8");
+ p.setEncoding("UTF-8");
}
// roman fonts
xetex = true;
registerAutomaticallyLoadedPackage("xunicode");
if (h_inputencoding == "auto")
- p.setEncoding("utf8");
+ p.setEncoding("UTF-8");
}
else if (name == "CJK") {
else if (name == "CJKutf8") {
h_inputencoding = "UTF8";
- p.setEncoding(h_inputencoding);
+ p.setEncoding("UTF-8");
registerAutomaticallyLoadedPackage("CJKutf8");
}
if (opts.find(",") == string::npos && one_language == true)
h_inputencoding = opts;
if (!options.empty())
- p.setEncoding(options.back());
+ p.setEncoding(options.back(), Encoding::inputenc);
options.clear();
}
else if (t.cs() == "inputencoding") {
string const encoding = p.getArg('{','}');
h_inputencoding = encoding;
- p.setEncoding(encoding);
+ p.setEncoding(encoding, Encoding::inputenc);
}
else if (t.cs() == "newenvironment") {
{
// Set a sensible default encoding.
// This is used until an encoding command is found.
- // For child documents use the encoding of the master, else latin1,
- // since latin1 does not cause an iconv error if the actual encoding
- // is different (bug 7509).
+ // For child documents use the encoding of the master, else ISO8859-1,
+ // (formerly known by its latex name latin1), since ISO8859-1 does not
+ // cause an iconv error if the actual encoding is different (bug 7509).
if (encoding.empty()) {
if (preamble.inputencoding() == "auto")
- encoding = "latin1";
- else
- encoding = preamble.inputencoding();
+ encoding = "ISO8859-1";
+ else {
+ Encoding const * const enc = encodings.fromLaTeXName(
+ preamble.inputencoding(), Encoding::any, true);
+ encoding = enc->iconvName();
+ }
}
Parser p(is);
return EXIT_FAILURE;
}
encodings.read(enc_path, symbols_path);
- if (!default_encoding.empty() && !encodings.fromLaTeXName(default_encoding))
- error_message("Unknown LaTeX encoding `" + default_encoding + "'");
+ if (!default_encoding.empty()) {
+ Encoding const * const enc = encodings.fromLaTeXName(
+ default_encoding, Encoding::any, true);
+ if (!enc)
+ error_message("Unknown LaTeX encoding `" + default_encoding + "'");
+ default_encoding = enc->iconvName();
+ }
// Load the layouts
LayoutFileList::get().read();
/*!
* Reads tex input from \a infilename and writes lyx output to \a outfilename.
- * The (latex) encoding can be provided as \a encoding.
+ * The iconv name of the encoding can be provided as \a encoding.
* Uses some common settings for the preamble, so this should only
* be used more than once for included documents.
* Caution: Overwrites the existing preamble settings if the new document
// you set buggy_encoding to false for JIS.
bool const buggy_encoding = encoding == "JIS";
if (!buggy_encoding)
- p.setEncoding(encoding);
+ p.setEncoding(encoding, Encoding::CJK);
else {
// FIXME: This will read garbage, since the data is not encoded in utf8.
- p.setEncoding("utf8");
+ p.setEncoding("UTF-8");
}
// LyX only supports the same mapping for all CJK
// environments, so we might need to output everything as ERT
else if (t.cs() == "inputencoding") {
// nothing to write here
string const enc = subst(p.verbatim_item(), "\n", " ");
- p.setEncoding(enc);
+ p.setEncoding(enc, Encoding::inputenc);
}
else if ((where = is_known(t.cs(), known_special_chars))) {
if (t.cat() == catEscape) {
if (t.cs() == "inputencoding") {
string const enc = subst(p.verbatim_item(), "\n", " ");
- p.setEncoding(enc);
+ p.setEncoding(enc, Encoding::inputenc);
continue;
}
if (t.cs() != "begin")
char const * const * const where =
is_known(encoding, supported_CJK_encodings);
if (where)
- p.setEncoding(encoding);
+ p.setEncoding(encoding, Encoding::CJK);
else
- p.setEncoding("utf8");
+ p.setEncoding("UTF-8");
string const text = p.verbatimEnvironment("CJK");
p.setEncoding(encoding_old);
p.skip_spaces();