+
+char_type const arabic_start = 0xc1;
+
+
+/// Information about a single UCS4 character
+struct CharInfo {
+ /// LaTeX command for this character
+ docstring command;
+ /// Needed LaTeX preamble (or feature)
+ string preamble;
+ /// Is this a combining character?
+ bool combining;
+ /// Is \c preamble a feature known by LaTeXFeatures, or a raw LaTeX
+ /// command?
+ bool feature;
+ /// Always force the LaTeX command, even if the encoding contains
+ /// this character?
+ bool force;
+};
+
+
+typedef std::map<char_type, CharInfo> CharInfoMap;
+CharInfoMap unicodesymbols;
+
+} // namespace anon
+
+
+Encoding::Encoding(string const & n, string const & l, string const & i)
+ : Name_(n), LatexName_(l), iconvName_(i)
+{
+ if (n == "ascii")
+ // ASCII can encode 128 code points and nothing else
+ start_encodable_ = 128;
+ if (n == "utf8" || n == "utf8x")
+ // UTF8 can encode all 1<<20 + 1<<16 UCS4 code points
+ start_encodable_ = 0x110000;
+ else {
+ start_encodable_ = 0;
+ // temporarily switch off lyxerr, since we will generate iconv errors
+ lyxerr.disable();
+ for (unsigned short j = 0; j < 256; ++j) {
+ char const c = j;
+ std::vector<char_type> const ucs4 = eightbit_to_ucs4(&c, 1, i);
+ if (ucs4.size() == 1) {
+ char_type const c = ucs4[0];
+ CharInfoMap::const_iterator const it = unicodesymbols.find(c);
+ if (it == unicodesymbols.end() || !it->second.force)
+ encodable_.insert(c);
+ }
+ }
+ lyxerr.enable();
+ CharSet::iterator it = encodable_.find(start_encodable_);
+ while (it != encodable_.end()) {
+ encodable_.erase(it);
+ ++start_encodable_;
+ it = encodable_.find(start_encodable_);
+ }
+ }
+}
+
+
+docstring const Encoding::latexChar(char_type c) const
+{
+ if (c < start_encodable_)
+ return docstring(1, c);
+ if (encodable_.find(c) == encodable_.end()) {
+ // c cannot be encoded in this encoding
+ CharInfoMap::const_iterator const it = unicodesymbols.find(c);
+ if (it == unicodesymbols.end())
+ lyxerr << "Could not find LaTeX command for character 0x"
+ << std::hex << c << ".\nLaTeX export will fail."
+ << endl;
+ else
+ return it->second.command;
+ }
+ return docstring(1, c);
+}
+
+
+void Encoding::validate(char_type c, LaTeXFeatures & features) const
+{
+ // Add the preamble stuff even if c can be encoded in this encoding,
+ // since the inputenc package only maps the code point c to a command,
+ // it does not make this command available.
+ CharInfoMap::const_iterator const it = unicodesymbols.find(c);
+ if (it != unicodesymbols.end() && !it->second.preamble.empty()) {
+ if (it->second.feature)
+ features.require(it->second.preamble);
+ else
+ features.addPreambleSnippet(it->second.preamble);
+ }
+}
+
+
+bool Encodings::isComposeChar_hebrew(char_type c)
+{
+ return c <= 0xd2 && c >= 0xc0 &&
+ c != 0xce && c != 0xd0;
+}
+
+
+// Special Arabic letters are ones that do not get connected from left
+// they are hamza, alef_madda, alef_hamza, waw_hamza, alef_hamza_under,
+// alef, tah_marbota, dal, thal, rah, zai, wow, alef_maksoura
+
+bool Encodings::is_arabic_special(char_type c)
+{
+ return (c >= 0xc1 && c <= 0xc5) ||
+ c == 0xc7 || c == 0xc9 ||
+ c == 0xcf || c == 0xe8 ||
+ (c >= 0xd0 && c <= 0xd2) ||
+ c == 0xe9;
+}
+
+bool Encodings::isComposeChar_arabic(char_type c)
+{
+ return c >= 0xeb && c <= 0xf2;
+}
+
+
+bool Encodings::is_arabic(char_type c)
+{
+ return c >= arabic_start && arabic_table[c-arabic_start][0];
+}
+
+
+char_type Encodings::transformChar(char_type c,
+ Encodings::Letter_Form form)
+{
+ if (!is_arabic(c))
+ return c;
+
+ if (lyxrc.font_norm_type == LyXRC::ISO_10646_1)
+ return arabic_table2[c-arabic_start][form];
+ else
+ return arabic_table[c-arabic_start][form >> 1];
+}
+
+
+bool Encodings::isCombiningChar(char_type c)
+{
+ CharInfoMap::const_iterator const it = unicodesymbols.find(c);
+ if (it != unicodesymbols.end())
+ return it->second.combining;
+ return false;
+}
+
+
+Encoding const * Encodings::getFromLyXName(string const & name) const
+{
+ EncodingList::const_iterator it = encodinglist.find(name);
+ if (it != encodinglist.end())
+ return &it->second;
+ else
+ return 0;
+}
+
+
+Encoding const * Encodings::getFromLaTeXName(string const & name) const
+{
+ // We don't use std::find_if because it makes copies of the pairs in
+ // the map.
+ // This linear search is OK since we don't have many encodings.
+ // Users could even optimize it by putting the encodings they use
+ // most at the top of lib/encodings.
+ EncodingList::const_iterator const end = encodinglist.end();
+ for (EncodingList::const_iterator it = encodinglist.begin(); it != end; ++it)
+ if (it->second.latexName() == name)
+ return &it->second;
+ return 0;
+}
+
+
+Encodings::Encodings()
+{
+}
+
+
+void Encodings::read(FileName const & encfile, FileName const & symbolsfile)
+{
+ // We must read the symbolsfile first, because the Encoding
+ // constructor depends on it.
+ LyXLex symbolslex(0, 0);
+ symbolslex.setFile(symbolsfile);
+ while (symbolslex.isOK()) {
+ char_type symbol;
+ CharInfo info;
+ string flags;
+
+ if (symbolslex.next(true)) {
+ std::istringstream is(symbolslex.getString());
+ // reading symbol directly does not work if
+ // char_type == std::wchar_t.
+ boost::uint32_t tmp;
+ if(!(is >> std::hex >> tmp))
+ break;
+ symbol = tmp;
+ } else
+ break;
+ if (symbolslex.next(true))
+ info.command = symbolslex.getDocString();
+ else
+ break;
+ if (symbolslex.next(true))
+ info.preamble = symbolslex.getString();
+ else
+ break;
+ if (symbolslex.next(true))
+ flags = symbolslex.getString();
+ else
+ break;
+
+ info.combining = false;
+ info.force = false;
+ while (!flags.empty()) {
+ string flag;
+ flags = support::split(flags, flag, ',');
+ if (flag == "combining")
+ info.combining = true;
+ else if (flag == "force")
+ info.force = true;
+ else
+ lyxerr << "Ignoring unknown flag `" << flag
+ << "' for symbol `0x" << std::hex
+ << symbol << "'." << endl;
+ }
+
+ if (!info.preamble.empty())
+ info.feature = info.preamble[0] != '\\';
+
+ lyxerr[Debug::INFO]
+ << "Read unicode symbol " << symbol << " '"
+ << to_utf8(info.command) << "' '" << info.preamble
+ << "' " << info.combining << ' ' << info.feature
+ << endl;
+ unicodesymbols[symbol] = info;
+ }
+
+ // Now read the encodings
+ enum Encodingtags {
+ et_encoding = 1,
+ et_end,
+ et_last
+ };
+
+ struct keyword_item encodingtags[et_last - 1] = {
+ { "encoding", et_encoding },
+ { "end", et_end }
+ };
+
+ LyXLex lex(encodingtags, et_last - 1);
+ lex.setFile(encfile);
+ while (lex.isOK()) {
+ switch (lex.lex()) {
+ case et_encoding:
+ {
+ lex.next();
+ string const name = lex.getString();
+ lex.next();
+ string const latexname = lex.getString();
+ lex.next();
+ string const iconvname = lex.getString();
+ lyxerr[Debug::INFO] << "Reading encoding " << name << endl;
+ encodinglist[name] = Encoding(name, latexname, iconvname);
+ if (lex.lex() != et_end)
+ lex.printError("Encodings::read: "
+ "missing end");
+ break;
+ }
+ case et_end:
+ lex.printError("Encodings::read: Misplaced end");
+ break;
+ case LyXLex::LEX_FEOF:
+ break;
+ default:
+ lex.printError("Encodings::read: "
+ "Unknown tag: `$$Token'");
+ break;
+ }
+ }
+}
+
+
+} // namespace lyx