]> git.lyx.org Git - lyx.git/blobdiff - src/Encoding.cpp
More requires --> required, for C++2a.
[lyx.git] / src / Encoding.cpp
index 73edc843ff361bfe977441284f67f10d498239e6..2379209d270cc25d80b63059377f12cc3cb9c5da 100644 (file)
 #include "support/textutils.h"
 #include "support/unicode.h"
 
-#include <boost/cstdint.hpp>
-
-#include <sstream>
 #include <algorithm>
+#include <cstdint>
+#include <iterator>
+#include <sstream>
 
 using namespace std;
 using namespace lyx::support;
@@ -58,7 +58,7 @@ MathAlphaSet mathalpha;
 /// The highest code point in UCS4 encoding (1<<20 + 1<<16)
 char_type const max_ucs4 = 0x110000;
 
-} // namespace anon
+} // namespace
 
 
 EncodingException::EncodingException(char_type c)
@@ -188,9 +188,16 @@ bool Encoding::encodable(char_type c) const
 {
        // assure the used encoding is properly initialized
        init();
-
        if (iconvName_ == "UTF-8" && package_ == none)
                return true;
+       // platex does not load inputenc: force conversion of supported characters
+       if (package_ == Encoding::japanese
+           && ((0xb7 <= c && c <= 0x05ff) // Latin-1 Supplement ... Hebrew
+                       || (0x1d00 <= c && c <= 0x218f) // Phonetic Extensions ... Number Forms
+                       || (0x2193 <= c && c <= 0x2aff) // Arrows ... Supplemental Mathematical Operators
+                       || (0xfb00 <= c && c <= 0xfb4f) // Alphabetic Presentation Forms
+                       || (0x1d400 <= c && c <= 0x1d7ff))) // Mathematical Alphanumeric Symbols
+               return false;
        if (c < start_encodable_ && !isForced(c))
                return true;
        if (encodable_.find(c) != encodable_.end())
@@ -222,8 +229,8 @@ pair<docstring, docstring> Encoding::latexString(docstring const & input, bool d
        docstring uncodable;
        bool terminate = false;
        for (size_t n = 0; n < input.size(); ++n) {
+               char_type const c = input[n];
                try {
-                       char_type const c = input[n];
                        pair<docstring, bool> latex_char = latexChar(c);
                        docstring const latex = latex_char.first;
                        if (terminate && !prefixIs(latex, '\\')
@@ -240,7 +247,8 @@ pair<docstring, docstring> Encoding::latexString(docstring const & input, bool d
                        result += latex;
                        terminate = latex_char.second;
                } catch (EncodingException & /* e */) {
-                       LYXERR0("Uncodable character in latexString!");
+                       LYXERR0("Uncodable character <" << docstring(1, c) 
+                                       << "> in latexString!");
                        if (dryrun) {
                                result += "<" + _("LyX Warning: ")
                                           + _("uncodable character") + " '";
@@ -259,16 +267,19 @@ vector<char_type> Encoding::symbolsList() const
        // assure the used encoding is properly initialized
        init();
 
-       // first all encodable characters
-       vector<char_type> symbols(encodable_.begin(), encodable_.end());
-       // add those below start_encodable_
+       // first all those below start_encodable_
+       vector<char_type> symbols;
        for (char_type c = 0; c < start_encodable_; ++c)
                symbols.push_back(c);
-       // now the ones from the unicodesymbols file
-       CharInfoMap::const_iterator const end = unicodesymbols.end();
-       CharInfoMap::const_iterator it = unicodesymbols.begin();
-       for (; it != end; ++it)
-               symbols.push_back(it->first);
+       // add all encodable characters
+       copy(encodable_.begin(), encodable_.end(), back_inserter(symbols));
+       // now the ones from the unicodesymbols file that are not already there
+       for (pair<char_type, CharInfo> const & elem : unicodesymbols) {
+               if (find(symbols.begin(), symbols.end(), elem.first) == symbols.end())
+                       symbols.push_back(elem.first);
+       }
+       // finally, sort the vector
+       sort(symbols.begin(), symbols.end());
        return symbols;
 }
 
@@ -302,10 +313,11 @@ bool Encodings::latexMathChar(char_type c, bool mathmode,
                if (!encoding || command.empty()) {
                        command = it->second.textcommand();
                        needsTermination = !it->second.textnotermination();
-                       addTextCmd(c);
                }
                if (mathmode)
                        addMathSym(c);
+               else
+                       addTextCmd(c);
        }
        return use_math;
 }
@@ -577,21 +589,29 @@ string const Encodings::TIPAShortcut(char_type c)
 }
 
 
-bool Encodings::isKnownScriptChar(char_type const c, string & preamble)
+string const Encodings::isKnownScriptChar(char_type const c)
 {
        CharInfoMap::const_iterator const it = unicodesymbols.find(c);
 
        if (it == unicodesymbols.end())
-               return false;
+               return string();
+       // FIXME: parse complex textpreamble (may be list or alternatives,
+       //                e.g., "subscript,textgreek" or "textcomp|textgreek")
+       if (it->second.textpreamble() == "textgreek"
+               || it->second.textpreamble() == "textcyrillic")
+               return it->second.textpreamble();
+       return string();
+}
 
-       if (it->second.textpreamble() != "textgreek" && it->second.textpreamble() != "textcyr")
-               return false;
 
-       if (preamble.empty()) {
-               preamble = it->second.textpreamble();
-               return true;
-       }
-       return it->second.textpreamble() == preamble;
+bool Encodings::fontencSupportsScript(string const & fontenc, string const & script)
+{
+       if (script == "textgreek")
+               return (fontenc == "LGR" || fontenc == "TU");
+       if (script == "textcyrillic")
+               return (fontenc == "T2A" || fontenc == "T2B" || fontenc == "T2C"
+                               || fontenc == "X2" || fontenc == "TU");
+       return false;
 }
 
 
@@ -601,6 +621,16 @@ bool Encodings::isMathAlpha(char_type c)
 }
 
 
+bool Encodings::isUnicodeTextOnly(char_type c)
+{
+       if (isASCII(c) || isMathAlpha(c))
+               return false;
+
+       CharInfoMap::const_iterator const it = unicodesymbols.find(c);
+       return it == unicodesymbols.end() || it->second.mathcommand().empty();
+}
+
+
 Encoding const *
 Encodings::fromLyXName(string const & name, bool allowUnsafe) const
 {
@@ -672,7 +702,7 @@ void Encodings::read(FileName const & encfile, FileName const & symbolsfile)
                istringstream is(symbolslex.getString());
                // reading symbol directly does not work if
                // char_type == wchar_t.
-               boost::uint32_t tmp;
+               uint32_t tmp;
                if(!(is >> hex >> tmp))
                        break;
                symbol = tmp;
@@ -701,16 +731,16 @@ void Encodings::read(FileName const & encfile, FileName const & symbolsfile)
                                flags |= CharInfoForce;
                                forced.insert(symbol);
                        } else if (prefixIs(flag, "force=")) {
-                               vector<string> encodings =
+                               vector<string> encs =
                                        getVectorFromString(flag.substr(6), ";");
-                               for (size_t i = 0; i < encodings.size(); ++i)
-                                       forcedselected[encodings[i]].insert(symbol);
+                               for (size_t i = 0; i < encs.size(); ++i)
+                                       forcedselected[encs[i]].insert(symbol);
                                flags |= CharInfoForceSelected;
                        } else if (prefixIs(flag, "force!=")) {
-                               vector<string> encodings =
+                               vector<string> encs =
                                        getVectorFromString(flag.substr(7), ";");
-                               for (size_t i = 0; i < encodings.size(); ++i)
-                                       forcednotselected[encodings[i]].insert(symbol);
+                               for (size_t i = 0; i < encs.size(); ++i)
+                                       forcednotselected[encs[i]].insert(symbol);
                                flags |= CharInfoForceSelected;
                        } else if (flag == "mathalpha") {
                                mathalpha.insert(symbol);