]> git.lyx.org Git - lyx.git/blobdiff - src/Encoding.cpp
Account for old versions of Pygments
[lyx.git] / src / Encoding.cpp
index 4a8faf1f7861656d8c15d1895b1976940e684a33..7dc12af0271fb1849f1212a6a654980eb566b3c4 100644 (file)
 #include "support/debug.h"
 #include "support/gettext.h"
 #include "support/lstrings.h"
+#include "support/mutex.h"
 #include "support/textutils.h"
 #include "support/unicode.h"
 
 #include <boost/cstdint.hpp>
 
-#include <sstream>
+#include <iterator>
 #include <algorithm>
+#include <sstream>
 
 using namespace std;
 using namespace lyx::support;
@@ -82,6 +84,7 @@ CharInfo::CharInfo(
 {
 }
 
+
 Encoding::Encoding(string const & n, string const & l, string const & g,
                   string const & i, bool f, bool u, Encoding::Package p)
        : name_(n), latexName_(l), guiName_(g), iconvName_(i), fixedwidth_(f),
@@ -104,10 +107,28 @@ Encoding::Encoding(string const & n, string const & l, string const & g,
 
 void Encoding::init() const
 {
+       // Since the the constructor is the only method which sets complete_
+       // to false the test for complete_ is thread-safe without mutex.
        if (complete_)
                return;
 
-       start_encodable_ = 0;
+       static Mutex mutex;
+       Mutex::Locker lock(&mutex);
+
+       // We need to test again for complete_, since another thread could
+       // have set it to true while we were waiting for the lock and we must
+       // not modify an encoding which is already complete.
+       if (complete_)
+               return;
+
+       // We do not make any member mutable  so that it can be easily verified
+       // that all const methods are thread-safe: init() is the only const
+       // method which changes complete_, encodable_ and start_encodable_, and
+       // it uses a mutex to ensure thread-safety.
+       CharSet & encodable = const_cast<Encoding *>(this)->encodable_;
+       char_type & start_encodable = const_cast<Encoding *>(this)->start_encodable_;
+
+       start_encodable = 0;
        // temporarily switch off lyxerr, since we will generate iconv errors
        lyxerr.disable();
        if (fixedwidth_) {
@@ -121,10 +142,10 @@ void Encoding::init() const
                        char_type const uc = ucs4[0];
                        CharInfoMap::const_iterator const it = unicodesymbols.find(uc);
                        if (it == unicodesymbols.end())
-                               encodable_.insert(uc);
+                               encodable.insert(uc);
                        else if (!it->second.force()) {
                                if (forced_->empty() || forced_->find(uc) == forced_->end())
-                                       encodable_.insert(uc);
+                                       encodable.insert(uc);
                        }
                }
        } else {
@@ -137,22 +158,22 @@ void Encoding::init() const
                        if (!eightbit.empty()) {
                                CharInfoMap::const_iterator const it = unicodesymbols.find(c);
                                if (it == unicodesymbols.end())
-                                       encodable_.insert(c);
+                                       encodable.insert(c);
                                else if (!it->second.force()) {
                                        if (forced_->empty() || forced_->find(c) == forced_->end())
-                                               encodable_.insert(c);
+                                               encodable.insert(c);
                                }
                        }
                }
        }
        lyxerr.enable();
-       CharSet::iterator it = encodable_.find(start_encodable_);
-       while (it != encodable_.end()) {
-               encodable_.erase(it);
-               ++start_encodable_;
-               it = encodable_.find(start_encodable_);
+       CharSet::iterator it = encodable.find(start_encodable);
+       while (it != encodable.end()) {
+               encodable.erase(it);
+               ++start_encodable;
+               it = encodable.find(start_encodable);
        }
-       complete_ = true;
+       const_cast<Encoding *>(this)->complete_ = true;
 }
 
 
@@ -239,16 +260,19 @@ vector<char_type> Encoding::symbolsList() const
        // assure the used encoding is properly initialized
        init();
 
-       // first all encodable characters
-       vector<char_type> symbols(encodable_.begin(), encodable_.end());
-       // add those below start_encodable_
+       // first all those below start_encodable_
+       vector<char_type> symbols;
        for (char_type c = 0; c < start_encodable_; ++c)
                symbols.push_back(c);
-       // now the ones from the unicodesymbols file
-       CharInfoMap::const_iterator const end = unicodesymbols.end();
-       CharInfoMap::const_iterator it = unicodesymbols.begin();
-       for (; it != end; ++it)
-               symbols.push_back(it->first);
+       // add all encodable characters
+       copy(encodable_.begin(), encodable_.end(), back_inserter(symbols));
+       // now the ones from the unicodesymbols file that are not already there
+       for (pair<char_type, CharInfo> const & elem : unicodesymbols) {
+               if (find(symbols.begin(), symbols.end(), elem.first) == symbols.end())
+                       symbols.push_back(elem.first);
+       }
+       // finally, sort the vector
+       sort(symbols.begin(), symbols.end());
        return symbols;
 }
 
@@ -297,6 +321,8 @@ char_type Encodings::fromLaTeXCommand(docstring const & cmd, int cmdtype,
        CharInfoMap::const_iterator const end = unicodesymbols.end();
        CharInfoMap::const_iterator it = unicodesymbols.begin();
        for (combining = false; it != end; ++it) {
+               if (it->second.deprecated())
+                       continue;
                docstring const math = it->second.mathcommand();
                docstring const text = it->second.textcommand();
                if ((cmdtype & MATH_CMD) && math == cmd) {
@@ -382,6 +408,8 @@ docstring Encodings::fromLaTeXCommand(docstring const & cmd, int cmdtype,
                size_t unicmd_size = 0;
                char_type c = 0;
                for (; it != uniend; ++it) {
+                       if (it->second.deprecated())
+                               continue;
                        docstring const math = mathmode ? it->second.mathcommand()
                                                        : docstring();
                        docstring const text = textmode ? it->second.textcommand()
@@ -451,7 +479,7 @@ docstring Encodings::fromLaTeXCommand(docstring const & cmd, int cmdtype,
                                   || (tmp.size() == prefix + 1 &&
                                       !isAlphaASCII(tmp[1]) &&
                                       (prefix == 1 || !isAlphaASCII(tmp[2])))
-                                  || k == cmdend 
+                                  || k == cmdend
                                   || !isAlphaASCII(cmd[k])
                                   || tmp[tmp.size() - 1] == '}'
                                 ) {
@@ -577,13 +605,25 @@ bool Encodings::isMathAlpha(char_type c)
 }
 
 
+bool Encodings::isUnicodeTextOnly(char_type c)
+{
+       if (isASCII(c) || isMathAlpha(c))
+               return false;
+
+       CharInfoMap::const_iterator const it = unicodesymbols.find(c);
+       return it == unicodesymbols.end() || it->second.mathcommand().empty();
+}
+
+
 Encoding const *
 Encodings::fromLyXName(string const & name, bool allowUnsafe) const
 {
        EncodingList::const_iterator const it = encodinglist.find(name);
+       if (it == encodinglist.end())
+               return 0;
        if (!allowUnsafe && it->second.unsafe())
                return 0;
-       return it != encodinglist.end() ? &it->second : 0;
+       return &it->second;
 }
 
 
@@ -660,7 +700,7 @@ void Encodings::read(FileName const & encfile, FileName const & symbolsfile)
                if (!symbolslex.next(true))
                        break;
                string sflags = symbolslex.getString();
-               
+
                string tipashortcut;
                int flags = 0;
 
@@ -700,6 +740,8 @@ void Encodings::read(FileName const & encfile, FileName const & symbolsfile)
                                flags &= ~CharInfoMathNoTermination;
                        } else if (contains(flag, "tipashortcut=")) {
                                tipashortcut = split(flag, '=');
+                       } else if (flag == "deprecated") {
+                               flags |= CharInfoDeprecated;
                        } else {
                                lyxerr << "Ignoring unknown flag `" << flag
                                       << "' for symbol `0x"