X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=src%2FEncoding.cpp;h=3aa31d0099ab9a771afa860f54b42203706677cf;hb=2df6a6a37876dcfaa755c9aaf6a5ddc1ad58c63e;hp=3dca7b01ab4fb52020123533a7e7800f0f0336bc;hpb=9bdca85f2602576d669bbf2b2e63a45e38292e30;p=lyx.git diff --git a/src/Encoding.cpp b/src/Encoding.cpp index 3dca7b01ab..3aa31d0099 100644 --- a/src/Encoding.cpp +++ b/src/Encoding.cpp @@ -261,7 +261,7 @@ const char * EncodingException::what() const throw() Encoding::Encoding(string const & n, string const & l, string const & i, bool f, Encoding::Package p) - : Name_(n), LatexName_(l), iconvName_(i), fixedwidth_(f), package_(p) + : name_(n), latexName_(l), iconvName_(i), fixedwidth_(f), package_(p) { if (n == "ascii") { // ASCII can encode 128 code points and nothing else @@ -279,6 +279,9 @@ Encoding::Encoding(string const & n, string const & l, string const & i, void Encoding::init() const { + if (complete_) + return; + start_encodable_ = 0; // temporarily switch off lyxerr, since we will generate iconv errors lyxerr.disable(); @@ -288,12 +291,12 @@ void Encoding::init() const for (unsigned short j = 0; j < 256; ++j) { char const c = char(j); vector const ucs4 = eightbit_to_ucs4(&c, 1, iconvName_); - if (ucs4.size() == 1) { - char_type const c = ucs4[0]; - CharInfoMap::const_iterator const it = unicodesymbols.find(c); - if (it == unicodesymbols.end() || !it->second.force) - encodable_.insert(c); - } + if (ucs4.size() != 1) + continue; + char_type const uc = ucs4[0]; + CharInfoMap::const_iterator const it = unicodesymbols.find(uc); + if (it == unicodesymbols.end() || !it->second.force) + encodable_.insert(uc); } } else { // We do not know how many code points this encoding has, and @@ -320,44 +323,39 @@ void Encoding::init() const } -docstring const Encoding::latexChar(char_type c) const +docstring Encoding::latexChar(char_type c) const { // assure the used encoding is properly initialized - if (!complete_) - init(); - BOOST_ASSERT(complete_); + init(); if (c < start_encodable_) return docstring(1, c); - if (encodable_.find(c) == encodable_.end()) { - // c cannot be encoded in this encoding - CharInfoMap::const_iterator const it = unicodesymbols.find(c); - if (it == unicodesymbols.end()) - throw EncodingException(c); - else - return it->second.command; - } - return docstring(1, c); + if (encodable_.find(c) != encodable_.end()) + return docstring(1, c); + + // c cannot be encoded in this encoding + CharInfoMap::const_iterator const it = unicodesymbols.find(c); + if (it == unicodesymbols.end()) + throw EncodingException(c); + return it->second.command; } -set Encoding::getSymbolsList() +vector Encoding::symbolsList() const { // assure the used encoding is properly initialized - if (!complete_) - init(); - BOOST_ASSERT(complete_); + init(); // first all encodable characters - CharSet symbols = encodable_; + vector symbols(encodable_.begin(), encodable_.end()); // add those below start_encodable_ for (char_type c = 0; c < start_encodable_; ++c) - symbols.insert(c); + symbols.push_back(c); // now the ones from the unicodesymbols file CharInfoMap::const_iterator const end = unicodesymbols.end(); - for (CharInfoMap::const_iterator it = unicodesymbols.begin(); it != end; ++it) { - symbols.insert(it->first); - } + CharInfoMap::const_iterator it = unicodesymbols.begin(); + for (; it != end; ++it) + symbols.push_back(it->first); return symbols; } @@ -374,10 +372,9 @@ void Encodings::validate(char_type c, LaTeXFeatures & features) } -bool Encodings::isComposeChar_hebrew(char_type c) +bool Encodings::isHebrewComposeChar(char_type c) { - return c <= 0x05c2 && c >= 0x05b0 && - c != 0x05be && c != 0x05c0; + return c <= 0x05c2 && c >= 0x05b0 && c != 0x05be && c != 0x05c0; } @@ -385,36 +382,30 @@ bool Encodings::isComposeChar_hebrew(char_type c) // they are hamza, alef_madda, alef_hamza, waw_hamza, alef_hamza_under, // alef, tah_marbota, dal, thal, rah, zai, wow, alef_maksoura -bool Encodings::is_arabic_special(char_type c) +bool Encodings::isArabicSpecialChar(char_type c) { - return (c >= 0x0621 && c <= 0x0625) || - c == 0x0627 || c == 0x0629 || - c == 0x062f || c == 0x0648 || - (c >= 0x0630 && c <= 0x0632) || - c == 0x0649 || c == 0x0698; + return (c >= 0x0621 && c <= 0x0625) || (c >= 0x0630 && c <= 0x0632) + || c == 0x0627 || c == 0x0629 || c == 0x062f || c == 0x0648 + || c == 0x0649 || c == 0x0698; } -bool Encodings::isComposeChar_arabic(char_type c) +bool Encodings::isArabicComposeChar(char_type c) { return c >= 0x064b && c <= 0x0652; } -bool Encodings::is_arabic(char_type c) +bool Encodings::isArabicChar(char_type c) { - return c >= arabic_start && c <= arabic_end && - arabic_table[c-arabic_start][0]; + return c >= arabic_start && c <= arabic_end + && arabic_table[c-arabic_start][0]; } -char_type Encodings::transformChar(char_type c, - Encodings::Letter_Form form) +char_type Encodings::transformChar(char_type c, Encodings::LetterForm form) { - if (!is_arabic(c)) - return c; - - return arabic_table[c-arabic_start][form]; + return isArabicChar(c) ? arabic_table[c-arabic_start][form] : c; } @@ -434,8 +425,7 @@ bool Encodings::isKnownScriptChar(char_type const c, string & preamble) if (it == unicodesymbols.end()) return false; - if (it->second.preamble != "textgreek" && - it->second.preamble != "textcyr") + if (it->second.preamble != "textgreek" && it->second.preamble != "textcyr") return false; if (preamble.empty()) { @@ -446,17 +436,14 @@ bool Encodings::isKnownScriptChar(char_type const c, string & preamble) } -Encoding const * Encodings::getFromLyXName(string const & name) const +Encoding const * Encodings::fromLyXName(string const & name) const { - EncodingList::const_iterator it = encodinglist.find(name); - if (it != encodinglist.end()) - return &it->second; - else - return 0; + EncodingList::const_iterator const it = encodinglist.find(name); + return it != encodinglist.end() ? &it->second : 0; } -Encoding const * Encodings::getFromLaTeXName(string const & name) const +Encoding const * Encodings::fromLaTeXName(string const & name) const { // We don't use find_if because it makes copies of the pairs in // the map. @@ -487,28 +474,26 @@ void Encodings::read(FileName const & encfile, FileName const & symbolsfile) CharInfo info; string flags; - if (symbolslex.next(true)) { - istringstream is(symbolslex.getString()); - // reading symbol directly does not work if - // char_type == wchar_t. - boost::uint32_t tmp; - if(!(is >> hex >> tmp)) - break; - symbol = tmp; - } else + if (!symbolslex.next(true)) break; - if (symbolslex.next(true)) - info.command = symbolslex.getDocString(); - else + + istringstream is(symbolslex.getString()); + // reading symbol directly does not work if + // char_type == wchar_t. + boost::uint32_t tmp; + if(!(is >> hex >> tmp)) break; - if (symbolslex.next(true)) - info.preamble = symbolslex.getString(); - else + symbol = tmp; + + if (!symbolslex.next(true)) break; - if (symbolslex.next(true)) - flags = symbolslex.getString(); - else + info.command = symbolslex.getDocString(); + if (!symbolslex.next(true)) break; + info.preamble = symbolslex.getString(); + if (!symbolslex.next(true)) + break; + flags = symbolslex.getString(); info.combining = false; info.feature = false; @@ -567,11 +552,10 @@ void Encodings::read(FileName const & encfile, FileName const & symbolsfile) fixedwidth = true; else if (width == "variable") fixedwidth = false; - else { + else lex.printError("Encodings::read: " "Unknown width: `$$Token'"); - } - + lex.next(); string const p = lex.getString(); Encoding::Package package = Encoding::none; @@ -581,15 +565,14 @@ void Encodings::read(FileName const & encfile, FileName const & symbolsfile) package = Encoding::inputenc; else if (p == "CJK") package = Encoding::CJK; - else { + else lex.printError("Encodings::read: " "Unknown package: `$$Token'"); - } - + LYXERR(Debug::INFO, "Reading encoding " << name); encodinglist[name] = Encoding(name, latexname, - iconvname, fixedwidth, - package); + iconvname, fixedwidth, package); + if (lex.lex() != et_end) lex.printError("Encodings::read: " "missing end");