#define ENCODING_H
#include "support/docstring.h"
+#include "support/trivstring.h"
#include "support/types.h"
#include <map>
class EncodingException : public std::exception {
public:
EncodingException(char_type c);
- virtual ~EncodingException() throw() {}
- virtual const char * what() const throw();
-
+ virtual ~EncodingException() noexcept {}
+ virtual const char * what() const noexcept;
+
char_type failed_char;
int par_id;
pos_type pos;
CharInfoMathNoTermination = 32,
///
CharInfoForceSelected = 64,
+ ///
+ CharInfoDeprecated = 128
};
public:
CharInfo() : flags_(0) {}
CharInfo(
- docstring const textcommand, docstring const mathcommand,
- std::string const textpreamble, std::string const mathpreamble,
- std::string const tipashortcut, unsigned int flags);
+ docstring const & textcommand, docstring const & mathcommand,
+ std::string const & textpreamble, std::string const & mathpreamble,
+ std::string const & tipashortcut, unsigned int flags);
// we assume that at least one command is nonempty when using unicodesymbols
bool isUnicodeSymbol() const { return !textcommand_.empty() || !mathcommand_.empty(); }
/// LaTeX command (text mode) for this character
/// Needed LaTeX preamble (or feature) for math mode
std::string mathpreamble() const { return mathpreamble_; }
/// Is this a combining character?
- bool combining() const { return flags_ & CharInfoCombining ? true : false; }
+ bool combining() const { return flags_ & CharInfoCombining; }
/// Is \c textpreamble a feature known by LaTeXFeatures, or a raw LaTeX
/// command?
- bool textfeature() const { return flags_ & CharInfoTextFeature ? true : false; }
+ bool textfeature() const { return flags_ & CharInfoTextFeature; }
/// Is \c mathpreamble a feature known by LaTeXFeatures, or a raw LaTeX
/// command?
- bool mathfeature() const { return flags_ & CharInfoMathFeature ? true : false; }
+ bool mathfeature() const { return flags_ & CharInfoMathFeature; }
/// Always force the LaTeX command, even if the encoding contains
/// this character?
- bool force() const { return flags_ & CharInfoForce ? true : false; }
+ bool force() const { return flags_ & CharInfoForce; }
/// Force the LaTeX command for some encodings?
- bool forceselected() const { return flags_ & CharInfoForceSelected ? true : false; }
+ bool forceselected() const { return flags_ & CharInfoForceSelected; }
+ /// Disable LaTeX command => char_type conversion for this deprecated symbol?
+ bool deprecated() const { return flags_ & CharInfoDeprecated; }
/// TIPA shortcut
std::string const tipashortcut() const { return tipashortcut_; }
/// \c textcommand needs no termination (such as {} or space).
- bool textnotermination() const { return flags_ & CharInfoTextNoTermination ? true : false; }
+ bool textnotermination() const { return flags_ & CharInfoTextNoTermination; }
/// \c mathcommand needs no termination (such as {} or space).
- bool mathnotermination() const { return flags_ & CharInfoMathNoTermination ? true : false; }
+ bool mathnotermination() const { return flags_ & CharInfoMathNoTermination; }
///
private:
/// LaTeX command (text mode) for this character
- docstring textcommand_;
+ trivdocstring textcommand_;
/// LaTeX command (math mode) for this character
- docstring mathcommand_;
+ trivdocstring mathcommand_;
/// Needed LaTeX preamble (or feature) for text mode
- std::string textpreamble_;
+ trivstring textpreamble_;
/// Needed LaTeX preamble (or feature) for math mode
- std::string mathpreamble_;
+ trivstring mathpreamble_;
/// TIPA shortcut
- std::string tipashortcut_;
+ trivstring tipashortcut_;
/// feature flags
unsigned int flags_;
};
-///
+/**
+ * An encoding as defined in lib/encodings.
+ * All const methods are thread-safe, so the caller does not need any locking.
+ * This property must be kept when changing the class.
+ */
class Encoding {
public:
/// Which LaTeX package handles this encoding?
/// Represent any of the above packages
static int const any;
///
- Encoding() : fixedwidth_(true), unsafe_(false), complete_(false) {}
+ Encoding() : fixedwidth_(true), unsafe_(false), forced_(nullptr),
+ start_encodable_(0), package_(none), complete_(false) {}
///
Encoding(std::string const & n, std::string const & l,
std::string const & g, std::string const & i,
///
void init() const;
///
- std::string const & name() const { return name_; }
+ std::string const name() const { return name_; }
///
- std::string const & latexName() const { return latexName_; }
+ std::string const latexName() const { return latexName_; }
///
- std::string const & guiName() const { return guiName_; }
+ std::string const guiName() const { return guiName_; }
///
- std::string const & iconvName() const { return iconvName_; }
+ std::string const iconvName() const { return iconvName_; }
///
bool hasFixedWidth() const { return fixedwidth_; }
///
* \p dryrun specifies whether the string is used within source
* preview (which yields a special warning).
*/
- std::pair<docstring, docstring> latexString(docstring const input,
+ std::pair<docstring, docstring> latexString(docstring const & input,
bool dryrun = false) const;
/// Which LaTeX package handles this encoding?
Package package() const { return package_; }
*/
bool isForced(char_type c) const;
///
- std::string name_;
+ trivstring name_;
///
- std::string latexName_;
+ trivstring latexName_;
///
- std::string guiName_;
+ trivstring guiName_;
///
- std::string iconvName_;
+ trivstring iconvName_;
/// Is this a fixed width encoding?
bool fixedwidth_;
/// Is this encoding TeX unsafe, e.g. control characters like {, }
typedef std::set<char_type> CharSet;
/// Set of UCS4 characters that we can encode (for singlebyte
/// encodings only)
- mutable CharSet encodable_;
+ CharSet encodable_;
/// Set of UCS4 characters that we can't encode
CharSet const * forced_;
/// All code points below this are encodable. This helps us to avoid
/// lokup of ASCII characters in encodable_ and gives about 1 sec
/// speedup on export of the Userguide.
- mutable char_type start_encodable_;
+ char_type start_encodable_;
/// Which LaTeX package handles this encoding?
Package package_;
/**
* This is needed especially for the multibyte encodings, if we
* complete all encoding info on startup it takes 2-3 minutes.
*/
- mutable bool complete_;
+ bool complete_;
};
class Encodings {
///
typedef std::set<char_type> MathSymbolSet;
///
- typedef std::map<std::string, Encoding> EncodingList;
+ typedef std::map<trivstring, Encoding> EncodingList;
/// iterator to iterate over all encodings.
/// We hide the fact that our encoding list is implemented as a map.
class const_iterator : public EncodingList::const_iterator {
///
const_iterator end() const { return encodinglist.end(); }
- ///
- enum LetterForm {
- ///
- FORM_ISOLATED,
- ///
- FORM_FINAL,
- ///
- FORM_INITIAL,
- ///
- FORM_MEDIAL
- };
- ///
- static bool isHebrewComposeChar(char_type c);
- ///
- static bool isHebrewChar(char_type c);
- ///
- static bool isArabicComposeChar(char_type c);
- ///
- static bool isArabicSpecialChar(char_type c);
- ///
- static bool isArabicChar(char_type c);
/// Accessor for the unicode information table.
static CharInfo const & unicodeCharInfo(char_type c);
- ///
- static char_type transformChar(char_type c, LetterForm form);
/// Is this a combining char?
static bool isCombiningChar(char_type c);
/// Return the TIPA shortcut
static std::string const TIPAShortcut(char_type c);
/**
- * Is this a known char from some language?
- * If \p preamble is empty and code point \p c is known to belong
- * to a supported script, true is returned and \p preamble is set
- * to the corresponding entry in the unicodesymbols file.
- * If \p preamble is not empty, a check is made whether code point
- * \p c is a known character matching the preamble entry.
+ * Test, if \p c is a supported Greek or Cyrillic letter.
+ * Return script macro name or empty string.
*/
- static bool isKnownScriptChar(char_type const c, std::string & preamble);
+ static std::string const isKnownScriptChar(char_type const c);
+ /// Does \p fontenc support characters in \p script?
+ static bool fontencSupportsScript(std::string const & fontenc,
+ std::string const & script);
/**
* Do we have to display in italics this character when in mathmode?
* This is true if the "mathalpha" flag is set. We use this for
* letters and accented characters that are output as math commands.
*/
static bool isMathAlpha(char_type c);
+ /**
+ * Do we have to wrap in \text this character when in mathmode?
+ * This is true if \p c is not ascii and the "mathalpha" flag is not
+ * set and a mathcommand is not defined in the unicodesymbols file.
+ */
+ static bool isUnicodeTextOnly(char_type c);
/**
* Register \p c as a mathmode command.
*/
*/
static char_type fromLaTeXCommand(docstring const & cmd, int cmdtype,
bool & combining, bool & needsTermination,
- std::set<std::string> * req = 0);
+ std::set<std::string> * req = nullptr);
///
enum LatexCmd {
///
*/
static docstring fromLaTeXCommand(docstring const & cmd, int cmdtype,
bool & needsTermination, docstring & rem,
- std::set<std::string> * req = 0);
+ std::set<std::string> * req = nullptr);
protected:
///