X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=src%2Ftex2lyx%2FParser.h;h=c0c5685bb83fc449cf2567c39f00206b38fb3629;hb=2872f35a51a66895e65f38ccf945c89aa7540b02;hp=c79296175e8144ee2982f3763601943d9da5faed;hpb=3e3179b4d4ee5002e41ebd27ab3504200a8d4260;p=lyx.git diff --git a/src/tex2lyx/Parser.h b/src/tex2lyx/Parser.h index c79296175e..c0c5685bb8 100644 --- a/src/tex2lyx/Parser.h +++ b/src/tex2lyx/Parser.h @@ -77,16 +77,26 @@ public: /// Token(docstring const & cs, CatCode cat) : cs_(to_utf8(cs)), cat_(cat) {} - /// + /// Returns the token as string std::string const & cs() const { return cs_; } /// Returns the catcode of the token CatCode cat() const { return cat_; } - /// + /** Get the character of tokens that were constructed from a single + * character input or a two character input and cat_ == catEscape. + * FIXME: The intended usage is not clear. The Token class in + * ../mathed/MathParser.cpp (which is the anchestor of this + * class) uses a separate char member for this method. I + * believe that the intended usage is to not cover tokens with + * catEscape, e.g. \code + * return (cs_.empty() || cat_ == catEscape) ? 0 : cs_[0]; + * \endcode + * All usages of this method should be checked. gb 2011-01-05 + */ char character() const { return cs_.empty() ? 0 : cs_[0]; } - /// Returns the token as string - std::string asString() const; /// Returns the token verbatim std::string asInput() const; + /// Is the token an alphanumerical character? + bool isAlnumASCII() const; private: /// @@ -97,6 +107,10 @@ private: std::ostream & operator<<(std::ostream & os, Token const & t); +#ifdef FILEDEBUG +extern void debugToken(std::ostream & os, Token const & t, unsigned int flags); +#endif + /*! * Actual parser class @@ -110,7 +124,9 @@ std::ostream & operator<<(std::ostream & os, Token const & t); */ class Parser { - + /// noncopyable + Parser(Parser const & p); + Parser & operator=(Parser const & p); public: /// Parser(idocstream & is); @@ -128,9 +144,15 @@ public: int lineno() const { return lineno_; } /// void putback(); + /// store current position + void pushPosition(); + /// restore previous position + void popPosition(); /// dump contents to screen void dump() const; + /// Does an optional argument follow after the current token? + bool hasOpt(); /// typedef std::pair Arg; /*! @@ -148,15 +170,21 @@ public: */ std::string getArg(char left, char right); /*! - * \returns getFullArg('[', ']') including the brackets or the - * empty string if there is no such argument. + * Like getOpt(), but distinguishes between a missing argument "" + * and an empty argument "[]". */ - std::string getFullOpt(); + std::string getFullOpt(bool keepws = false); /*! * \returns getArg('[', ']') including the brackets or the * empty string if there is no such argument. + * No whitespace is eaten if \p keepws is true and no optional + * argument exists. This is important if an optional argument is + * parsed that would go after a command in ERT: In this case the + * whitespace is needed to separate the ERT from the subsequent + * word. Without it, the ERT and the next word would be concatenated + * during .tex export, thus creating an invalid command. */ - std::string getOpt(); + std::string getOpt(bool keepws = false); /*! * \returns getFullArg('(', ')') including the parentheses or the * empty string if there is no such argument. @@ -168,6 +196,19 @@ public: * is parsed but not returned. */ std::string const verbatimEnvironment(std::string const & name); + /* + * The same as verbatimEnvironment(std::string const & name) but + * \begin and \end commands inside the name environment are not parsed. + * This function is designed to parse verbatim environments. + */ + std::string const plainEnvironment(std::string const & name); + /* + * Basically the same as plainEnvironment(std::string const & name) but + * instead of \begin and \end commands the parsing is started/stopped + * at given characters. + * This function is designed to parse verbatim commands. + */ + std::string const plainCommand(char left, char right, std::string const & name); /*! * Returns the character of the current token and increments * the token position. @@ -185,12 +226,15 @@ public: Token const curr_token() const; /// The next token. Token const next_token(); + /// The next but one token. + Token const next_next_token(); /// Make the next token current and return that. Token const get_token(); /// \return whether the current token starts a new paragraph bool isParagraph(); /// skips spaces (and comments if \p skip_comments is true) - void skip_spaces(bool skip_comments = false); + /// \return whether whitespace was skipped (not comments) + bool skip_spaces(bool skip_comments = false); /// puts back spaces (and comments if \p skip_comments is true) void unskip_spaces(bool skip_comments = false); /// @@ -216,6 +260,8 @@ private: /// unsigned pos_; /// + std::vector positions_; + /// idocstringstream * iss_; /// idocstream & is_;