///
Token(docstring const & cs, CatCode cat) : cs_(to_utf8(cs)), cat_(cat) {}
- ///
+ /// Returns the token as string
std::string const & cs() const { return cs_; }
/// Returns the catcode of the token
CatCode cat() const { return cat_; }
- ///
+ /** Get the character of tokens that were constructed from a single
+ * character input or a two character input and cat_ == catEscape.
+ * FIXME: The intended usage is not clear. The Token class in
+ * ../mathed/MathParser.cpp (which is the anchestor of this
+ * class) uses a separate char member for this method. I
+ * believe that the intended usage is to not cover tokens with
+ * catEscape or catComment, e.g. \code
+ * return (cs_.empty() || cat_ == catEscape || cat_ == catComment) ? 0 : cs_[0];
+ * \endcode
+ * All usages of this method should be checked. gb 2011-01-05
+ */
char character() const { return cs_.empty() ? 0 : cs_[0]; }
- /// Returns the token as string
- std::string asString() const;
/// Returns the token verbatim
std::string asInput() const;
+ /// Is the token an alphanumerical character?
+ bool isAlnumASCII() const;
private:
///
std::ostream & operator<<(std::ostream & os, Token const & t);
+#ifdef FILEDEBUG
+extern void debugToken(std::ostream & os, Token const & t, unsigned int flags);
+#endif
+
/*!
* Actual parser class
*/
class Parser {
-
+ /// noncopyable
+ Parser(Parser const & p);
+ Parser & operator=(Parser const & p);
public:
///
Parser(idocstream & is);
///
~Parser();
- /// change the encoding of the input stream
+ /// change the latex encoding of the input stream
void setEncoding(std::string const & encoding);
+ /// get the current latex encoding of the input stream
+ std::string getEncoding() const { return encoding_latex_; }
///
int lineno() const { return lineno_; }
///
void putback();
+ /// store current position
+ void pushPosition();
+ /// restore previous position
+ void popPosition();
/// dump contents to screen
void dump() const;
+ /// Does an optional argument follow after the current token?
+ bool hasOpt();
///
typedef std::pair<bool, std::string> Arg;
/*!
* Get an argument enclosed by \p left and \p right.
+ * If \p allow_escaping is true, a right delimiter escaped by a
+ * backslash does not count as delimiter, but is included in the
+ * argument.
* \returns wether an argument was found in \p Arg.first and the
* argument in \p Arg.second. \see getArg().
*/
- Arg getFullArg(char left, char right);
+ Arg getFullArg(char left, char right, bool allow_escaping = true);
/*!
* Get an argument enclosed by \p left and \p right.
+ * If \p allow_escaping is true, a right delimiter escaped by a
+ * backslash does not count as delimiter, but is included in the
+ * argument.
* \returns the argument (without \p left and \p right) or the empty
* string if the next non-space token is not \p left. Use
* getFullArg() if you need to know wether there was an empty
* argument or no argument at all.
*/
- std::string getArg(char left, char right);
+ std::string getArg(char left, char right, bool allow_escaping = true);
/*!
- * \returns getFullArg('[', ']') including the brackets or the
- * empty string if there is no such argument.
+ * Like getOpt(), but distinguishes between a missing argument ""
+ * and an empty argument "[]".
*/
- std::string getFullOpt();
+ std::string getFullOpt(bool keepws = false);
/*!
* \returns getArg('[', ']') including the brackets or the
* empty string if there is no such argument.
+ * No whitespace is eaten if \p keepws is true and no optional
+ * argument exists. This is important if an optional argument is
+ * parsed that would go after a command in ERT: In this case the
+ * whitespace is needed to separate the ERT from the subsequent
+ * word. Without it, the ERT and the next word would be concatenated
+ * during .tex export, thus creating an invalid command.
*/
- std::string getOpt();
+ std::string getOpt(bool keepws = false);
/*!
* \returns getFullArg('(', ')') including the parentheses or the
* empty string if there is no such argument.
* is parsed but not returned.
*/
std::string const verbatimEnvironment(std::string const & name);
+ /*
+ * The same as verbatimEnvironment(std::string const & name) but
+ * \begin and \end commands inside the name environment are not parsed.
+ * This function is designed to parse verbatim environments.
+ */
+ std::string const plainEnvironment(std::string const & name);
+ /*
+ * Basically the same as plainEnvironment(std::string const & name) but
+ * instead of \begin and \end commands the parsing is started/stopped
+ * at given characters.
+ * This function is designed to parse verbatim commands.
+ */
+ std::string const plainCommand(char left, char right, std::string const & name);
/*!
* Returns the character of the current token and increments
* the token position.
///
void push_back(Token const & t);
/// The previous token.
- Token const & prev_token() const;
+ Token const prev_token() const;
/// The current token.
- Token const & curr_token() const;
+ Token const curr_token() const;
/// The next token.
- Token const & next_token();
+ Token const next_token();
+ /// The next but one token.
+ Token const next_next_token();
/// Make the next token current and return that.
- Token const & get_token();
+ Token const get_token();
/// \return whether the current token starts a new paragraph
bool isParagraph();
/// skips spaces (and comments if \p skip_comments is true)
- void skip_spaces(bool skip_comments = false);
+ /// \return whether whitespace was skipped (not comments)
+ bool skip_spaces(bool skip_comments = false);
/// puts back spaces (and comments if \p skip_comments is true)
void unskip_spaces(bool skip_comments = false);
///
///
unsigned pos_;
///
+ std::vector<unsigned> positions_;
+ ///
idocstringstream * iss_;
///
idocstream & is_;
+ /// latex name of the current encoding
+ std::string encoding_latex_;
};