Fix cut and paste error

[features.git] / src / Encoding.cpp
diff --git a/src/Encoding.cpp b/src/Encoding.cpp

index e4bbbc044abeccaa12d9eb9fbd3bcedd6115a2c0..c7ea237179e79c966c7a700cea998fa0dda8da05 100644 (file)
--- a/src/Encoding.cpp
+++ b/src/Encoding.cpp
@@ -17,15 +17,17 @@
  #include "Lexer.h"
  
  #include "support/debug.h"
+#include "support/docstring.h"
  #include "support/gettext.h"
  #include "support/lstrings.h"
+#include "support/mutex.h"
  #include "support/textutils.h"
  #include "support/unicode.h"
  
-#include <boost/cstdint.hpp>
-
-#include <sstream>
  #include <algorithm>
+#include <cstdint>
+#include <iterator>
+#include <sstream>
  
  using namespace std;
  using namespace lyx::support;
@@ -57,7 +59,7 @@ MathAlphaSet mathalpha;
  /// The highest code point in UCS4 encoding (1<<20 + 1<<16)
  char_type const max_ucs4 = 0x110000;
  
-} // namespace anon
+} // namespace
  
  
  EncodingException::EncodingException(char_type c)
@@ -66,22 +68,23 @@ EncodingException::EncodingException(char_type c)
  }
  
  
-const char * EncodingException::what() const throw()
+const char * EncodingException::what() const noexcept
  {
         return "Could not find LaTeX command for a character";
  }
  
  
  CharInfo::CharInfo(
-       docstring const textcommand, docstring const mathcommand,
-       std::string const textpreamble, std::string const mathpreamble,
-       std::string const tipashortcut, unsigned int flags)
+       docstring const & textcommand, docstring const & mathcommand,
+       std::string const & textpreamble, std::string const & mathpreamble,
+       std::string const & tipashortcut, unsigned int flags)
         : textcommand_(textcommand), mathcommand_(mathcommand),
           textpreamble_(textpreamble), mathpreamble_(mathpreamble),
           tipashortcut_(tipashortcut), flags_(flags)
  {
  }
  
+
  Encoding::Encoding(string const & n, string const & l, string const & g,
                    string const & i, bool f, bool u, Encoding::Package p)
         : name_(n), latexName_(l), guiName_(g), iconvName_(i), fixedwidth_(f),
@@ -104,10 +107,28 @@ Encoding::Encoding(string const & n, string const & l, string const & g,
  
  void Encoding::init() const
  {
+       // Since the the constructor is the only method which sets complete_
+       // to false the test for complete_ is thread-safe without mutex.
         if (complete_)
                 return;
  
-       start_encodable_ = 0;
+       static Mutex mutex;
+       Mutex::Locker lock(&mutex);
+
+       // We need to test again for complete_, since another thread could
+       // have set it to true while we were waiting for the lock and we must
+       // not modify an encoding which is already complete.
+       if (complete_)
+               return;
+
+       // We do not make any member mutable  so that it can be easily verified
+       // that all const methods are thread-safe: init() is the only const
+       // method which changes complete_, encodable_ and start_encodable_, and
+       // it uses a mutex to ensure thread-safety.
+       CharSet & encodable = const_cast<Encoding *>(this)->encodable_;
+       char_type & start_encodable = const_cast<Encoding *>(this)->start_encodable_;
+
+       start_encodable = 0;
         // temporarily switch off lyxerr, since we will generate iconv errors
         lyxerr.disable();
         if (fixedwidth_) {
@@ -121,10 +142,10 @@ void Encoding::init() const
                         char_type const uc = ucs4[0];
                         CharInfoMap::const_iterator const it = unicodesymbols.find(uc);
                         if (it == unicodesymbols.end())
-                               encodable_.insert(uc);
+                               encodable.insert(uc);
                         else if (!it->second.force()) {
                                 if (forced_->empty() || forced_->find(uc) == forced_->end())
-                                       encodable_.insert(uc);
+                                       encodable.insert(uc);
                         }
                 }
         } else {
@@ -137,22 +158,22 @@ void Encoding::init() const
                         if (!eightbit.empty()) {
                                 CharInfoMap::const_iterator const it = unicodesymbols.find(c);
                                 if (it == unicodesymbols.end())
-                                       encodable_.insert(c);
+                                       encodable.insert(c);
                                 else if (!it->second.force()) {
                                         if (forced_->empty() || forced_->find(c) == forced_->end())
-                                               encodable_.insert(c);
+                                               encodable.insert(c);
                                 }
                         }
                 }
         }
         lyxerr.enable();
-       CharSet::iterator it = encodable_.find(start_encodable_);
-       while (it != encodable_.end()) {
-               encodable_.erase(it);
-               ++start_encodable_;
-               it = encodable_.find(start_encodable_);
+       CharSet::iterator it = encodable.find(start_encodable);
+       while (it != encodable.end()) {
+               encodable.erase(it);
+               ++start_encodable;
+               it = encodable.find(start_encodable);
         }
-       complete_ = true;
+       const_cast<Encoding *>(this)->complete_ = true;
  }
  
  
@@ -168,9 +189,16 @@ bool Encoding::encodable(char_type c) const
  {
         // assure the used encoding is properly initialized
         init();
-
         if (iconvName_ == "UTF-8" && package_ == none)
                 return true;
+       // platex does not load inputenc: force conversion of supported characters
+       if (package_ == Encoding::japanese
+           && ((0xb7 <= c && c <= 0x05ff) // Latin-1 Supplement ... Hebrew
+                       || (0x1d00 <= c && c <= 0x218f) // Phonetic Extensions ... Number Forms
+                       || (0x2193 <= c && c <= 0x2aff) // Arrows ... Supplemental Mathematical Operators
+                       || (0xfb00 <= c && c <= 0xfb4f) // Alphabetic Presentation Forms
+                       || (0x1d400 <= c && c <= 0x1d7ff))) // Mathematical Alphanumeric Symbols
+               return false;
         if (c < start_encodable_ && !isForced(c))
                 return true;
         if (encodable_.find(c) != encodable_.end())
@@ -196,14 +224,13 @@ pair<docstring, bool> Encoding::latexChar(char_type c) const
  }
  
  
-pair<docstring, docstring> Encoding::latexString(docstring const input, bool dryrun) const
+pair<docstring, docstring> Encoding::latexString(docstring const & input, bool dryrun) const
  {
         docstring result;
         docstring uncodable;
         bool terminate = false;
-       for (size_t n = 0; n < input.size(); ++n) {
+       for (char_type const c : input) {
                 try {
-                       char_type const c = input[n];
                         pair<docstring, bool> latex_char = latexChar(c);
                         docstring const latex = latex_char.first;
                         if (terminate && !prefixIs(latex, '\\')
@@ -220,14 +247,15 @@ pair<docstring, docstring> Encoding::latexString(docstring const input, bool dry
                         result += latex;
                         terminate = latex_char.second;
                 } catch (EncodingException & /* e */) {
-                       LYXERR0("Uncodable character in latexString!");
+                       LYXERR0("Uncodable character <" << docstring(1, c) 
+                                       << "> in latexString!");
                         if (dryrun) {
                                 result += "<" + _("LyX Warning: ")
                                            + _("uncodable character") + " '";
-                               result += docstring(1, input[n]);
+                               result += docstring(1, c);
                                 result += "'>";
                         } else
-                               uncodable += input[n];
+                               uncodable += c;
                 }
         }
         return make_pair(result, uncodable);
@@ -239,16 +267,19 @@ vector<char_type> Encoding::symbolsList() const
         // assure the used encoding is properly initialized
         init();
  
-       // first all encodable characters
-       vector<char_type> symbols(encodable_.begin(), encodable_.end());
-       // add those below start_encodable_
+       // first all those below start_encodable_
+       vector<char_type> symbols;
         for (char_type c = 0; c < start_encodable_; ++c)
                 symbols.push_back(c);
-       // now the ones from the unicodesymbols file
-       CharInfoMap::const_iterator const end = unicodesymbols.end();
-       CharInfoMap::const_iterator it = unicodesymbols.begin();
-       for (; it != end; ++it)
-               symbols.push_back(it->first);
+       // add all encodable characters
+       copy(encodable_.begin(), encodable_.end(), back_inserter(symbols));
+       // now the ones from the unicodesymbols file that are not already there
+       for (auto const & elem : unicodesymbols) {
+               if (find(symbols.begin(), symbols.end(), elem.first) == symbols.end())
+                       symbols.push_back(elem.first);
+       }
+       // finally, sort the vector
+       sort(symbols.begin(), symbols.end());
         return symbols;
  }
  
@@ -282,10 +313,11 @@ bool Encodings::latexMathChar(char_type c, bool mathmode,
                 if (!encoding || command.empty()) {
                         command = it->second.textcommand();
                         needsTermination = !it->second.textnotermination();
-                       addTextCmd(c);
                 }
                 if (mathmode)
                         addMathSym(c);
+               else
+                       addTextCmd(c);
         }
         return use_math;
  }
@@ -297,6 +329,8 @@ char_type Encodings::fromLaTeXCommand(docstring const & cmd, int cmdtype,
         CharInfoMap::const_iterator const end = unicodesymbols.end();
         CharInfoMap::const_iterator it = unicodesymbols.begin();
         for (combining = false; it != end; ++it) {
+               if (it->second.deprecated())
+                       continue;
                 docstring const math = it->second.mathcommand();
                 docstring const text = it->second.textcommand();
                 if ((cmdtype & MATH_CMD) && math == cmd) {
@@ -382,6 +416,8 @@ docstring Encodings::fromLaTeXCommand(docstring const & cmd, int cmdtype,
                 size_t unicmd_size = 0;
                 char_type c = 0;
                 for (; it != uniend; ++it) {
+                       if (it->second.deprecated())
+                               continue;
                         docstring const math = mathmode ? it->second.mathcommand()
                                                         : docstring();
                         docstring const text = textmode ? it->second.textcommand()
@@ -451,7 +487,7 @@ docstring Encodings::fromLaTeXCommand(docstring const & cmd, int cmdtype,
                                    || (tmp.size() == prefix + 1 &&
                                        !isAlphaASCII(tmp[1]) &&
                                        (prefix == 1 || !isAlphaASCII(tmp[2])))
-                                  || k == cmdend 
+                                  || k == cmdend
                                    || !isAlphaASCII(cmd[k])
                                    || tmp[tmp.size() - 1] == '}'
                                  ) {
@@ -553,21 +589,29 @@ string const Encodings::TIPAShortcut(char_type c)
  }
  
  
-bool Encodings::isKnownScriptChar(char_type const c, string & preamble)
+string const Encodings::isKnownScriptChar(char_type const c)
  {
         CharInfoMap::const_iterator const it = unicodesymbols.find(c);
  
         if (it == unicodesymbols.end())
-               return false;
+               return string();
+       // FIXME: parse complex textpreamble (may be list or alternatives,
+       //                e.g., "subscript,textgreek" or "textcomp|textgreek")
+       if (it->second.textpreamble() == "textgreek"
+               || it->second.textpreamble() == "textcyrillic")
+               return it->second.textpreamble();
+       return string();
+}
  
-       if (it->second.textpreamble() != "textgreek" && it->second.textpreamble() != "textcyr")
-               return false;
  
-       if (preamble.empty()) {
-               preamble = it->second.textpreamble();
-               return true;
-       }
-       return it->second.textpreamble() == preamble;
+bool Encodings::fontencSupportsScript(string const & fontenc, string const & script)
+{
+       if (script == "textgreek")
+               return (fontenc == "LGR" || fontenc == "TU");
+       if (script == "textcyrillic")
+               return (fontenc == "T2A" || fontenc == "T2B" || fontenc == "T2C"
+                               || fontenc == "X2" || fontenc == "TU");
+       return false;
  }
  
  
@@ -577,18 +621,30 @@ bool Encodings::isMathAlpha(char_type c)
  }
  
  
+bool Encodings::isUnicodeTextOnly(char_type c)
+{
+       if (isASCII(c) || isMathAlpha(c))
+               return false;
+
+       CharInfoMap::const_iterator const it = unicodesymbols.find(c);
+       return it == unicodesymbols.end() || it->second.mathcommand().empty();
+}
+
+
  Encoding const *
  Encodings::fromLyXName(string const & name, bool allowUnsafe) const
  {
         EncodingList::const_iterator const it = encodinglist.find(name);
+       if (it == encodinglist.end())
+               return nullptr;
         if (!allowUnsafe && it->second.unsafe())
-               return 0;
-       return it != encodinglist.end() ? &it->second : 0;
+               return nullptr;
+       return &it->second;
  }
  
  
  Encoding const *
-Encodings::fromLaTeXName(string const & n, int const & p, bool allowUnsafe) const
+Encodings::fromLaTeXName(string const & n, int p, bool allowUnsafe) const
  {
         string name = n;
         // FIXME: if we have to test for too many of these synonyms,
@@ -606,19 +662,19 @@ Encodings::fromLaTeXName(string const & n, int const & p, bool allowUnsafe) cons
                 if ((it->second.latexName() == name) && (it->second.package() & p)
                                 && (!it->second.unsafe() || allowUnsafe))
                         return &it->second;
-       return 0;
+       return nullptr;
  }
  
  
  Encoding const *
-Encodings::fromIconvName(string const & n, int const & p, bool allowUnsafe) const
+Encodings::fromIconvName(string const & n, int p, bool allowUnsafe) const
  {
         EncodingList::const_iterator const end = encodinglist.end();
         for (EncodingList::const_iterator it = encodinglist.begin(); it != end; ++it)
                 if ((it->second.iconvName() == n) && (it->second.package() & p)
                                 && (!it->second.unsafe() || allowUnsafe))
                         return &it->second;
-       return 0;
+       return nullptr;
  }
  
  
@@ -646,7 +702,7 @@ void Encodings::read(FileName const & encfile, FileName const & symbolsfile)
                 istringstream is(symbolslex.getString());
                 // reading symbol directly does not work if
                 // char_type == wchar_t.
-               boost::uint32_t tmp;
+               uint32_t tmp;
                 if(!(is >> hex >> tmp))
                         break;
                 symbol = tmp;
@@ -660,7 +716,7 @@ void Encodings::read(FileName const & encfile, FileName const & symbolsfile)
                 if (!symbolslex.next(true))
                         break;
                 string sflags = symbolslex.getString();
-               
+
                 string tipashortcut;
                 int flags = 0;
  
@@ -675,16 +731,16 @@ void Encodings::read(FileName const & encfile, FileName const & symbolsfile)
                                 flags |= CharInfoForce;
                                 forced.insert(symbol);
                         } else if (prefixIs(flag, "force=")) {
-                               vector<string> encodings =
+                               vector<string> encs =
                                         getVectorFromString(flag.substr(6), ";");
-                               for (size_t i = 0; i < encodings.size(); ++i)
-                                       forcedselected[encodings[i]].insert(symbol);
+                               for (auto const & enc : encs)
+                                       forcedselected[enc].insert(symbol);
                                 flags |= CharInfoForceSelected;
                         } else if (prefixIs(flag, "force!=")) {
-                               vector<string> encodings =
+                               vector<string> encs =
                                         getVectorFromString(flag.substr(7), ";");
-                               for (size_t i = 0; i < encodings.size(); ++i)
-                                       forcednotselected[encodings[i]].insert(symbol);
+                               for (auto const & enc : encs)
+                                       forcednotselected[enc].insert(symbol);
                                 flags |= CharInfoForceSelected;
                         } else if (flag == "mathalpha") {
                                 mathalpha.insert(symbol);
@@ -700,6 +756,8 @@ void Encodings::read(FileName const & encfile, FileName const & symbolsfile)
                                 flags &= ~CharInfoMathNoTermination;
                         } else if (contains(flag, "tipashortcut=")) {
                                 tipashortcut = split(flag, '=');
+                       } else if (flag == "deprecated") {
+                               flags |= CharInfoDeprecated;
                         } else {
                                 lyxerr << "Ignoring unknown flag `" << flag
                                        << "' for symbol `0x"