#ifndef PARSER_H
#define PARSER_H
-#include <vector>
#include <string>
#include <utility>
+#include <vector>
+#include "support/docstream.h"
namespace lyx {
};
-CatCode catcode(unsigned char c);
-
-
enum {
FLAG_BRACE_LAST = 1 << 1, // last closing brace ends the parsing
FLAG_RIGHT = 1 << 2, // next \\right ends the parsing process
class Token {
public:
///
- Token() : cs_(), char_(0), cat_(catIgnore) {}
- ///
- Token(char c, CatCode cat) : cs_(), char_(c), cat_(cat) {}
+ Token() : cs_(), cat_(catIgnore) {}
///
- Token(std::string const & cs, CatCode cat) : cs_(cs), char_(0), cat_(cat) {}
+ Token(docstring const & cs, CatCode cat) : cs_(to_utf8(cs)), cat_(cat) {}
- ///
+ /// Returns the token as string
std::string const & cs() const { return cs_; }
/// Returns the catcode of the token
CatCode cat() const { return cat_; }
- ///
- char character() const { return char_; }
- /// Returns the token as string
- std::string asString() const;
+ /** Get the character of tokens that were constructed from a single
+ * character input or a two character input and cat_ == catEscape.
+ * FIXME: The intended usage is not clear. The Token class in
+ * ../mathed/MathParser.cpp (which is the anchestor of this
+ * class) uses a separate char member for this method. I
+ * believe that the intended usage is to not cover tokens with
+ * catEscape, e.g. \code
+ * return (cs_.empty() || cat_ == catEscape) ? 0 : cs_[0];
+ * \endcode
+ * All usages of this method should be checked. gb 2011-01-05
+ */
+ char character() const { return cs_.empty() ? 0 : cs_[0]; }
/// Returns the token verbatim
std::string asInput() const;
+ /// Is the token an alphanumerical character?
+ bool isAlnumASCII() const;
private:
///
std::string cs_;
///
- char char_;
- ///
CatCode cat_;
};
std::ostream & operator<<(std::ostream & os, Token const & t);
+#ifdef FILEDEBUG
+extern void debugToken(std::ostream & os, Token const & t, unsigned int flags);
+#endif
+
/*!
* Actual parser class
*/
class Parser {
-
+ /// noncopyable
+ Parser(Parser const & p);
+ Parser & operator=(Parser const & p);
public:
///
- Parser(std::istream & is);
+ Parser(idocstream & is);
///
Parser(std::string const & s);
///
~Parser();
+ /// change the latex encoding of the input stream
+ void setEncoding(std::string const & encoding);
+ /// get the current latex encoding of the input stream
+ std::string getEncoding() const { return encoding_latex_; }
+
///
int lineno() const { return lineno_; }
///
void putback();
+ /// store current position
+ void pushPosition();
+ /// restore previous position
+ void popPosition();
/// dump contents to screen
void dump() const;
+ /// Does an optional argument follow after the current token?
+ bool hasOpt();
///
typedef std::pair<bool, std::string> Arg;
/*!
*/
std::string getArg(char left, char right);
/*!
- * \returns getFullArg('[', ']') including the brackets or the
- * empty string if there is no such argument.
+ * Like getOpt(), but distinguishes between a missing argument ""
+ * and an empty argument "[]".
*/
- std::string getFullOpt();
+ std::string getFullOpt(bool keepws = false);
/*!
* \returns getArg('[', ']') including the brackets or the
* empty string if there is no such argument.
+ * No whitespace is eaten if \p keepws is true and no optional
+ * argument exists. This is important if an optional argument is
+ * parsed that would go after a command in ERT: In this case the
+ * whitespace is needed to separate the ERT from the subsequent
+ * word. Without it, the ERT and the next word would be concatenated
+ * during .tex export, thus creating an invalid command.
*/
- std::string getOpt();
+ std::string getOpt(bool keepws = false);
/*!
* \returns getFullArg('(', ')') including the parentheses or the
* empty string if there is no such argument.
///
void push_back(Token const & t);
/// The previous token.
- Token const & prev_token() const;
+ Token const prev_token() const;
/// The current token.
- Token const & curr_token() const;
+ Token const curr_token() const;
/// The next token.
- Token const & next_token();
+ Token const next_token();
+ /// The next but one token.
+ Token const next_next_token();
/// Make the next token current and return that.
- Token const & get_token();
+ Token const get_token();
/// \return whether the current token starts a new paragraph
bool isParagraph();
/// skips spaces (and comments if \p skip_comments is true)
- void skip_spaces(bool skip_comments = false);
+ /// \return whether whitespace was skipped (not comments)
+ bool skip_spaces(bool skip_comments = false);
/// puts back spaces (and comments if \p skip_comments is true)
void unskip_spaces(bool skip_comments = false);
///
///
unsigned pos_;
///
- std::istringstream * iss_;
+ std::vector<unsigned> positions_;
+ ///
+ idocstringstream * iss_;
///
- std::istream & is_;
+ idocstream & is_;
+ /// latex name of the current encoding
+ std::string encoding_latex_;
};