#include "Lexer.h"
#include "support/debug.h"
+#include "support/docstring.h"
#include "support/gettext.h"
#include "support/lstrings.h"
+#include "support/mutex.h"
#include "support/textutils.h"
#include "support/unicode.h"
-#include <boost/cstdint.hpp>
-
-#include <sstream>
#include <algorithm>
+#include <cstdint>
+#include <iterator>
+#include <sstream>
using namespace std;
using namespace lyx::support;
/// The highest code point in UCS4 encoding (1<<20 + 1<<16)
char_type const max_ucs4 = 0x110000;
-} // namespace anon
+} // namespace
EncodingException::EncodingException(char_type c)
}
-const char * EncodingException::what() const throw()
+const char * EncodingException::what() const noexcept
{
return "Could not find LaTeX command for a character";
}
CharInfo::CharInfo(
- docstring const textcommand, docstring const mathcommand,
- std::string const textpreamble, std::string const mathpreamble,
- std::string const tipashortcut, unsigned int flags)
+ docstring const & textcommand, docstring const & mathcommand,
+ std::string const & textpreamble, std::string const & mathpreamble,
+ std::string const & tipashortcut, unsigned int flags)
: textcommand_(textcommand), mathcommand_(mathcommand),
textpreamble_(textpreamble), mathpreamble_(mathpreamble),
tipashortcut_(tipashortcut), flags_(flags)
{
}
+
Encoding::Encoding(string const & n, string const & l, string const & g,
string const & i, bool f, bool u, Encoding::Package p)
: name_(n), latexName_(l), guiName_(g), iconvName_(i), fixedwidth_(f),
void Encoding::init() const
{
+ // Since the the constructor is the only method which sets complete_
+ // to false the test for complete_ is thread-safe without mutex.
if (complete_)
return;
- start_encodable_ = 0;
+ static Mutex mutex;
+ Mutex::Locker lock(&mutex);
+
+ // We need to test again for complete_, since another thread could
+ // have set it to true while we were waiting for the lock and we must
+ // not modify an encoding which is already complete.
+ if (complete_)
+ return;
+
+ // We do not make any member mutable so that it can be easily verified
+ // that all const methods are thread-safe: init() is the only const
+ // method which changes complete_, encodable_ and start_encodable_, and
+ // it uses a mutex to ensure thread-safety.
+ CharSet & encodable = const_cast<Encoding *>(this)->encodable_;
+ char_type & start_encodable = const_cast<Encoding *>(this)->start_encodable_;
+
+ start_encodable = 0;
// temporarily switch off lyxerr, since we will generate iconv errors
lyxerr.disable();
if (fixedwidth_) {
char_type const uc = ucs4[0];
CharInfoMap::const_iterator const it = unicodesymbols.find(uc);
if (it == unicodesymbols.end())
- encodable_.insert(uc);
+ encodable.insert(uc);
else if (!it->second.force()) {
if (forced_->empty() || forced_->find(uc) == forced_->end())
- encodable_.insert(uc);
+ encodable.insert(uc);
}
}
} else {
if (!eightbit.empty()) {
CharInfoMap::const_iterator const it = unicodesymbols.find(c);
if (it == unicodesymbols.end())
- encodable_.insert(c);
+ encodable.insert(c);
else if (!it->second.force()) {
if (forced_->empty() || forced_->find(c) == forced_->end())
- encodable_.insert(c);
+ encodable.insert(c);
}
}
}
}
lyxerr.enable();
- CharSet::iterator it = encodable_.find(start_encodable_);
- while (it != encodable_.end()) {
- encodable_.erase(it);
- ++start_encodable_;
- it = encodable_.find(start_encodable_);
+ CharSet::iterator it = encodable.find(start_encodable);
+ while (it != encodable.end()) {
+ encodable.erase(it);
+ ++start_encodable;
+ it = encodable.find(start_encodable);
}
- complete_ = true;
+ const_cast<Encoding *>(this)->complete_ = true;
}
{
// assure the used encoding is properly initialized
init();
-
if (iconvName_ == "UTF-8" && package_ == none)
return true;
+ // platex does not load inputenc: force conversion of supported characters
+ if (package_ == Encoding::japanese
+ && ((0xb7 <= c && c <= 0x05ff) // Latin-1 Supplement ... Hebrew
+ || (0x1d00 <= c && c <= 0x218f) // Phonetic Extensions ... Number Forms
+ || (0x2193 <= c && c <= 0x2aff) // Arrows ... Supplemental Mathematical Operators
+ || (0xfb00 <= c && c <= 0xfb4f) // Alphabetic Presentation Forms
+ || (0x1d400 <= c && c <= 0x1d7ff))) // Mathematical Alphanumeric Symbols
+ return false;
if (c < start_encodable_ && !isForced(c))
return true;
if (encodable_.find(c) != encodable_.end())
}
-pair<docstring, docstring> Encoding::latexString(docstring const input, bool dryrun) const
+pair<docstring, docstring> Encoding::latexString(docstring const & input, bool dryrun) const
{
docstring result;
docstring uncodable;
bool terminate = false;
- for (size_t n = 0; n < input.size(); ++n) {
+ for (char_type const c : input) {
try {
- char_type const c = input[n];
pair<docstring, bool> latex_char = latexChar(c);
docstring const latex = latex_char.first;
if (terminate && !prefixIs(latex, '\\')
result += latex;
terminate = latex_char.second;
} catch (EncodingException & /* e */) {
- LYXERR0("Uncodable character in latexString!");
+ LYXERR0("Uncodable character <" << docstring(1, c)
+ << "> in latexString!");
if (dryrun) {
result += "<" + _("LyX Warning: ")
+ _("uncodable character") + " '";
- result += docstring(1, input[n]);
+ result += docstring(1, c);
result += "'>";
} else
- uncodable += input[n];
+ uncodable += c;
}
}
return make_pair(result, uncodable);
// assure the used encoding is properly initialized
init();
- // first all encodable characters
- vector<char_type> symbols(encodable_.begin(), encodable_.end());
- // add those below start_encodable_
+ // first all those below start_encodable_
+ vector<char_type> symbols;
for (char_type c = 0; c < start_encodable_; ++c)
symbols.push_back(c);
- // now the ones from the unicodesymbols file
- CharInfoMap::const_iterator const end = unicodesymbols.end();
- CharInfoMap::const_iterator it = unicodesymbols.begin();
- for (; it != end; ++it)
- symbols.push_back(it->first);
+ // add all encodable characters
+ copy(encodable_.begin(), encodable_.end(), back_inserter(symbols));
+ // now the ones from the unicodesymbols file that are not already there
+ for (auto const & elem : unicodesymbols) {
+ if (find(symbols.begin(), symbols.end(), elem.first) == symbols.end())
+ symbols.push_back(elem.first);
+ }
+ // finally, sort the vector
+ sort(symbols.begin(), symbols.end());
return symbols;
}
if (!encoding || command.empty()) {
command = it->second.textcommand();
needsTermination = !it->second.textnotermination();
- addTextCmd(c);
}
if (mathmode)
addMathSym(c);
+ else
+ addTextCmd(c);
}
return use_math;
}
CharInfoMap::const_iterator const end = unicodesymbols.end();
CharInfoMap::const_iterator it = unicodesymbols.begin();
for (combining = false; it != end; ++it) {
+ if (it->second.deprecated())
+ continue;
docstring const math = it->second.mathcommand();
docstring const text = it->second.textcommand();
if ((cmdtype & MATH_CMD) && math == cmd) {
size_t unicmd_size = 0;
char_type c = 0;
for (; it != uniend; ++it) {
+ if (it->second.deprecated())
+ continue;
docstring const math = mathmode ? it->second.mathcommand()
: docstring();
docstring const text = textmode ? it->second.textcommand()
|| (tmp.size() == prefix + 1 &&
!isAlphaASCII(tmp[1]) &&
(prefix == 1 || !isAlphaASCII(tmp[2])))
- || k == cmdend
+ || k == cmdend
|| !isAlphaASCII(cmd[k])
|| tmp[tmp.size() - 1] == '}'
) {
}
-bool Encodings::isKnownScriptChar(char_type const c, string & preamble)
+string const Encodings::isKnownScriptChar(char_type const c)
{
CharInfoMap::const_iterator const it = unicodesymbols.find(c);
if (it == unicodesymbols.end())
- return false;
+ return string();
+ // FIXME: parse complex textpreamble (may be list or alternatives,
+ // e.g., "subscript,textgreek" or "textcomp|textgreek")
+ if (it->second.textpreamble() == "textgreek"
+ || it->second.textpreamble() == "textcyrillic")
+ return it->second.textpreamble();
+ return string();
+}
- if (it->second.textpreamble() != "textgreek" && it->second.textpreamble() != "textcyr")
- return false;
- if (preamble.empty()) {
- preamble = it->second.textpreamble();
- return true;
- }
- return it->second.textpreamble() == preamble;
+bool Encodings::fontencSupportsScript(string const & fontenc, string const & script)
+{
+ if (script == "textgreek")
+ return (fontenc == "LGR" || fontenc == "TU");
+ if (script == "textcyrillic")
+ return (fontenc == "T2A" || fontenc == "T2B" || fontenc == "T2C"
+ || fontenc == "X2" || fontenc == "TU");
+ return false;
}
}
+bool Encodings::isUnicodeTextOnly(char_type c)
+{
+ if (isASCII(c) || isMathAlpha(c))
+ return false;
+
+ CharInfoMap::const_iterator const it = unicodesymbols.find(c);
+ return it == unicodesymbols.end() || it->second.mathcommand().empty();
+}
+
+
Encoding const *
Encodings::fromLyXName(string const & name, bool allowUnsafe) const
{
EncodingList::const_iterator const it = encodinglist.find(name);
+ if (it == encodinglist.end())
+ return nullptr;
if (!allowUnsafe && it->second.unsafe())
- return 0;
- return it != encodinglist.end() ? &it->second : 0;
+ return nullptr;
+ return &it->second;
}
Encoding const *
-Encodings::fromLaTeXName(string const & n, int const & p, bool allowUnsafe) const
+Encodings::fromLaTeXName(string const & n, int p, bool allowUnsafe) const
{
string name = n;
// FIXME: if we have to test for too many of these synonyms,
if ((it->second.latexName() == name) && (it->second.package() & p)
&& (!it->second.unsafe() || allowUnsafe))
return &it->second;
- return 0;
+ return nullptr;
}
Encoding const *
-Encodings::fromIconvName(string const & n, int const & p, bool allowUnsafe) const
+Encodings::fromIconvName(string const & n, int p, bool allowUnsafe) const
{
EncodingList::const_iterator const end = encodinglist.end();
for (EncodingList::const_iterator it = encodinglist.begin(); it != end; ++it)
if ((it->second.iconvName() == n) && (it->second.package() & p)
&& (!it->second.unsafe() || allowUnsafe))
return &it->second;
- return 0;
+ return nullptr;
}
istringstream is(symbolslex.getString());
// reading symbol directly does not work if
// char_type == wchar_t.
- boost::uint32_t tmp;
+ uint32_t tmp;
if(!(is >> hex >> tmp))
break;
symbol = tmp;
if (!symbolslex.next(true))
break;
string sflags = symbolslex.getString();
-
+
string tipashortcut;
int flags = 0;
flags |= CharInfoForce;
forced.insert(symbol);
} else if (prefixIs(flag, "force=")) {
- vector<string> encodings =
+ vector<string> encs =
getVectorFromString(flag.substr(6), ";");
- for (size_t i = 0; i < encodings.size(); ++i)
- forcedselected[encodings[i]].insert(symbol);
+ for (auto const & enc : encs)
+ forcedselected[enc].insert(symbol);
flags |= CharInfoForceSelected;
} else if (prefixIs(flag, "force!=")) {
- vector<string> encodings =
+ vector<string> encs =
getVectorFromString(flag.substr(7), ";");
- for (size_t i = 0; i < encodings.size(); ++i)
- forcednotselected[encodings[i]].insert(symbol);
+ for (auto const & enc : encs)
+ forcednotselected[enc].insert(symbol);
flags |= CharInfoForceSelected;
} else if (flag == "mathalpha") {
mathalpha.insert(symbol);
flags &= ~CharInfoMathNoTermination;
} else if (contains(flag, "tipashortcut=")) {
tipashortcut = split(flag, '=');
+ } else if (flag == "deprecated") {
+ flags |= CharInfoDeprecated;
} else {
lyxerr << "Ignoring unknown flag `" << flag
<< "' for symbol `0x"