X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=src%2Ftex2lyx%2FParser.h;h=c0c5685bb83fc449cf2567c39f00206b38fb3629;hb=2872f35a51a66895e65f38ccf945c89aa7540b02;hp=3ea9aa50bf7a90053ae6a28a0eed890797d5eba6;hpb=e4c9689906e05c94116feea53ff29ad00a0e5a0b;p=lyx.git diff --git a/src/tex2lyx/Parser.h b/src/tex2lyx/Parser.h index 3ea9aa50bf..c0c5685bb8 100644 --- a/src/tex2lyx/Parser.h +++ b/src/tex2lyx/Parser.h @@ -12,10 +12,11 @@ #ifndef PARSER_H #define PARSER_H -#include #include #include +#include +#include "support/docstream.h" namespace lyx { @@ -46,9 +47,6 @@ enum CatCode { }; -CatCode catcode(unsigned char c); - - enum { FLAG_BRACE_LAST = 1 << 1, // last closing brace ends the parsing FLAG_RIGHT = 1 << 2, // next \\right ends the parsing process @@ -75,34 +73,44 @@ enum { class Token { public: /// - Token() : cs_(), char_(0), cat_(catIgnore) {} - /// - Token(char c, CatCode cat) : cs_(), char_(c), cat_(cat) {} + Token() : cs_(), cat_(catIgnore) {} /// - Token(std::string const & cs, CatCode cat) : cs_(cs), char_(0), cat_(cat) {} + Token(docstring const & cs, CatCode cat) : cs_(to_utf8(cs)), cat_(cat) {} - /// + /// Returns the token as string std::string const & cs() const { return cs_; } /// Returns the catcode of the token CatCode cat() const { return cat_; } - /// - char character() const { return char_; } - /// Returns the token as string - std::string asString() const; + /** Get the character of tokens that were constructed from a single + * character input or a two character input and cat_ == catEscape. + * FIXME: The intended usage is not clear. The Token class in + * ../mathed/MathParser.cpp (which is the anchestor of this + * class) uses a separate char member for this method. I + * believe that the intended usage is to not cover tokens with + * catEscape, e.g. \code + * return (cs_.empty() || cat_ == catEscape) ? 0 : cs_[0]; + * \endcode + * All usages of this method should be checked. gb 2011-01-05 + */ + char character() const { return cs_.empty() ? 0 : cs_[0]; } /// Returns the token verbatim std::string asInput() const; + /// Is the token an alphanumerical character? + bool isAlnumASCII() const; private: /// std::string cs_; /// - char char_; - /// CatCode cat_; }; std::ostream & operator<<(std::ostream & os, Token const & t); +#ifdef FILEDEBUG +extern void debugToken(std::ostream & os, Token const & t, unsigned int flags); +#endif + /*! * Actual parser class @@ -116,22 +124,35 @@ std::ostream & operator<<(std::ostream & os, Token const & t); */ class Parser { - + /// noncopyable + Parser(Parser const & p); + Parser & operator=(Parser const & p); public: /// - Parser(std::istream & is); + Parser(idocstream & is); /// Parser(std::string const & s); /// ~Parser(); + /// change the latex encoding of the input stream + void setEncoding(std::string const & encoding); + /// get the current latex encoding of the input stream + std::string getEncoding() const { return encoding_latex_; } + /// int lineno() const { return lineno_; } /// void putback(); + /// store current position + void pushPosition(); + /// restore previous position + void popPosition(); /// dump contents to screen void dump() const; + /// Does an optional argument follow after the current token? + bool hasOpt(); /// typedef std::pair Arg; /*! @@ -149,15 +170,21 @@ public: */ std::string getArg(char left, char right); /*! - * \returns getFullArg('[', ']') including the brackets or the - * empty string if there is no such argument. + * Like getOpt(), but distinguishes between a missing argument "" + * and an empty argument "[]". */ - std::string getFullOpt(); + std::string getFullOpt(bool keepws = false); /*! * \returns getArg('[', ']') including the brackets or the * empty string if there is no such argument. + * No whitespace is eaten if \p keepws is true and no optional + * argument exists. This is important if an optional argument is + * parsed that would go after a command in ERT: In this case the + * whitespace is needed to separate the ERT from the subsequent + * word. Without it, the ERT and the next word would be concatenated + * during .tex export, thus creating an invalid command. */ - std::string getOpt(); + std::string getOpt(bool keepws = false); /*! * \returns getFullArg('(', ')') including the parentheses or the * empty string if there is no such argument. @@ -169,6 +196,19 @@ public: * is parsed but not returned. */ std::string const verbatimEnvironment(std::string const & name); + /* + * The same as verbatimEnvironment(std::string const & name) but + * \begin and \end commands inside the name environment are not parsed. + * This function is designed to parse verbatim environments. + */ + std::string const plainEnvironment(std::string const & name); + /* + * Basically the same as plainEnvironment(std::string const & name) but + * instead of \begin and \end commands the parsing is started/stopped + * at given characters. + * This function is designed to parse verbatim commands. + */ + std::string const plainCommand(char left, char right, std::string const & name); /*! * Returns the character of the current token and increments * the token position. @@ -178,28 +218,29 @@ public: void error(std::string const & msg); /// Parses one token from \p is void tokenize_one(); - /// Parses \p is into tokens - void tokenize(); /// void push_back(Token const & t); /// The previous token. - Token const & prev_token() const; + Token const prev_token() const; /// The current token. - Token const & curr_token() const; + Token const curr_token() const; /// The next token. - Token const & next_token() const; + Token const next_token(); + /// The next but one token. + Token const next_next_token(); /// Make the next token current and return that. - Token const & get_token(); + Token const get_token(); /// \return whether the current token starts a new paragraph - bool isParagraph() const; + bool isParagraph(); /// skips spaces (and comments if \p skip_comments is true) - void skip_spaces(bool skip_comments = false); + /// \return whether whitespace was skipped (not comments) + bool skip_spaces(bool skip_comments = false); /// puts back spaces (and comments if \p skip_comments is true) void unskip_spaces(bool skip_comments = false); /// void lex(std::string const & s); /// - bool good() const; + bool good(); /// std::string verbatim_item(); /// @@ -219,9 +260,13 @@ private: /// unsigned pos_; /// - std::istringstream * iss_; + std::vector positions_; + /// + idocstringstream * iss_; /// - std::istream & is_; + idocstream & is_; + /// latex name of the current encoding + std::string encoding_latex_; };