catInvalid // 15 <delete>
};
+enum cat_type {
+ NORMAL_CATCODES,
+ VERBATIM_CATCODES,
+ UNDECIDED_CATCODES
+};
+
enum {
FLAG_BRACE_LAST = 1 << 1, // last closing brace ends the parsing
* ../mathed/MathParser.cpp (which is the anchestor of this
* class) uses a separate char member for this method. I
* believe that the intended usage is to not cover tokens with
- * catEscape, e.g. \code
- * return (cs_.empty() || cat_ == catEscape) ? 0 : cs_[0];
+ * catEscape or catComment, e.g. \code
+ * return (cs_.empty() || cat_ == catEscape || cat_ == catComment) ? 0 : cs_[0];
* \endcode
* All usages of this method should be checked. gb 2011-01-05
*/
std::ostream & operator<<(std::ostream & os, Token const & t);
+#ifdef FILEDEBUG
+extern void debugToken(std::ostream & os, Token const & t, unsigned int flags);
+#endif
+
/*!
* Actual parser class
///
~Parser();
- /// change the latex encoding of the input stream
+ ///
+ CatCode catcode(char_type c) const;
+ ///
+ void setCatcode(char c, CatCode cat);
+ /// set parser to normal or verbatim mode
+ void setCatcodes(cat_type t);
+
+ /// change the iconv encoding of the input stream
+ /// according to the latex encoding and package
+ void setEncoding(std::string const & encoding, int const & package);
+ /// change the iconv encoding of the input stream
void setEncoding(std::string const & encoding);
- /// get the current latex encoding of the input stream
- std::string getEncoding() const { return encoding_latex_; }
+ /// get the current iconv encoding of the input stream
+ std::string getEncoding() const { return encoding_iconv_; }
///
int lineno() const { return lineno_; }
typedef std::pair<bool, std::string> Arg;
/*!
* Get an argument enclosed by \p left and \p right.
+ * If \p allow_escaping is true, a right delimiter escaped by a
+ * backslash does not count as delimiter, but is included in the
+ * argument.
* \returns wether an argument was found in \p Arg.first and the
* argument in \p Arg.second. \see getArg().
*/
- Arg getFullArg(char left, char right);
+ Arg getFullArg(char left, char right, bool allow_escaping = true);
/*!
* Get an argument enclosed by \p left and \p right.
+ * If \p allow_escaping is true, a right delimiter escaped by a
+ * backslash does not count as delimiter, but is included in the
+ * argument.
* \returns the argument (without \p left and \p right) or the empty
* string if the next non-space token is not \p left. Use
* getFullArg() if you need to know wether there was an empty
* argument or no argument at all.
*/
- std::string getArg(char left, char right);
+ std::string getArg(char left, char right, bool allow_escaping = true);
/*!
- * \returns getFullArg('[', ']') including the brackets or the
- * empty string if there is no such argument.
+ * Like getOpt(), but distinguishes between a missing argument ""
+ * and an empty argument "[]".
*/
- std::string getFullOpt();
+ std::string getFullOpt(bool keepws = false);
/*!
* \returns getArg('[', ']') including the brackets or the
* empty string if there is no such argument.
* during .tex export, thus creating an invalid command.
*/
std::string getOpt(bool keepws = false);
- /*!
- * the same as getOpt but without the brackets
- */
- std::string getOptContent();
/*!
* \returns getFullArg('(', ')') including the parentheses or the
* empty string if there is no such argument.
/*!
* \returns the contents of the environment \p name.
* <tt>\begin{name}</tt> must be parsed already, <tt>\end{name}</tt>
- * is parsed but not returned.
+ * is parsed but not returned. This parses nested environments properly.
+ */
+ std::string const ertEnvironment(std::string const & name);
+ /*
+ * The same as ertEnvironment(std::string const & name) but
+ * \begin and \end commands inside the name environment are not parsed.
+ * This function is designed to parse verbatim environments.
+ */
+ std::string const plainEnvironment(std::string const & name);
+ /*
+ * Basically the same as plainEnvironment(std::string const & name) but
+ * instead of \begin and \end commands the parsing is started/stopped
+ * at given characters.
+ * This function is designed to parse verbatim commands.
+ */
+ std::string const plainCommand(char left, char right, std::string const & name);
+ /*
+ * Basically the same as plainEnvironment() but the parsing is
+ * stopped at string \p end_string. Contrary to the other
+ * methods, this uses proper catcode setting. This function is
+ * designed to parse verbatim environments and command. The
+ * intention is to eventually replace all of its siblings.
+ */
+ std::string const verbatimStuff(std::string const & end_string);
+ /*
+ * \returns the contents of the environment \p name.
+ * <tt>\begin{name}</tt> must be parsed already,
+ * <tt>\end{name}</tt> is parsed but not returned. The string
+ * is parsed with proper verbatim catcodes and one newline is
+ * removed from head and tail of the string if applicable.
*/
- std::string const verbatimEnvironment(std::string const & name);
+ std::string const verbatimEnvironment(std::string const & end_string);
+ ///
+ std::string verbatim_item();
+ ///
+ std::string verbatimOption();
/*!
* Returns the character of the current token and increments
* the token position.
char getChar();
///
void error(std::string const & msg);
- /// Parses one token from \p is
- void tokenize_one();
///
void push_back(Token const & t);
/// The previous token.
Token const curr_token() const;
/// The next token.
Token const next_token();
+ /// The next but one token.
+ Token const next_next_token();
/// Make the next token current and return that.
Token const get_token();
/// \return whether the current token starts a new paragraph
/// puts back spaces (and comments if \p skip_comments is true)
void unskip_spaces(bool skip_comments = false);
///
- void lex(std::string const & s);
- ///
bool good();
- ///
- std::string verbatim_item();
- ///
- std::string verbatimOption();
/// resets the parser to initial state
void reset();
- ///
- void setCatCode(char c, CatCode cat);
- ///
- CatCode getCatCode(char c) const;
private:
+ /// Setup catcode table
+ void catInit();
+ /// Parses one token from \p is
+ void tokenize_one();
///
int lineno_;
///
idocstringstream * iss_;
///
idocstream & is_;
- /// latex name of the current encoding
- std::string encoding_latex_;
+ /// iconv name of the current encoding
+ std::string encoding_iconv_;
+ ///
+ CatCode theCatcode_[256];
+ //
+ cat_type theCatcodesType_;
+ //
+ cat_type curr_cat_;
};