X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=src%2Ftex2lyx%2FParser.h;h=c0c5685bb83fc449cf2567c39f00206b38fb3629;hb=2872f35a51a66895e65f38ccf945c89aa7540b02;hp=713cbfcfa1eea1182f01d93f0028ecb9f31c7af2;hpb=a77cd8c0c6bce535f3bb5fc873e1f9bbabce0a5a;p=lyx.git diff --git a/src/tex2lyx/Parser.h b/src/tex2lyx/Parser.h index 713cbfcfa1..c0c5685bb8 100644 --- a/src/tex2lyx/Parser.h +++ b/src/tex2lyx/Parser.h @@ -4,7 +4,7 @@ * This file is part of LyX, the document processor. * Licence details can be found in the file COPYING. * - * \author André Pönitz + * \author André Pönitz * * Full author contact details are available in file CREDITS. */ @@ -12,10 +12,11 @@ #ifndef PARSER_H #define PARSER_H -#include #include #include +#include +#include "support/docstream.h" namespace lyx { @@ -46,9 +47,6 @@ enum CatCode { }; -CatCode catcode(unsigned char c); - - enum { FLAG_BRACE_LAST = 1 << 1, // last closing brace ends the parsing FLAG_RIGHT = 1 << 2, // next \\right ends the parsing process @@ -75,34 +73,44 @@ enum { class Token { public: /// - Token() : cs_(), char_(0), cat_(catIgnore) {} - /// - Token(char c, CatCode cat) : cs_(), char_(c), cat_(cat) {} + Token() : cs_(), cat_(catIgnore) {} /// - Token(std::string const & cs, CatCode cat) : cs_(cs), char_(0), cat_(cat) {} + Token(docstring const & cs, CatCode cat) : cs_(to_utf8(cs)), cat_(cat) {} - /// + /// Returns the token as string std::string const & cs() const { return cs_; } /// Returns the catcode of the token CatCode cat() const { return cat_; } - /// - char character() const { return char_; } - /// Returns the token as string - std::string asString() const; + /** Get the character of tokens that were constructed from a single + * character input or a two character input and cat_ == catEscape. + * FIXME: The intended usage is not clear. The Token class in + * ../mathed/MathParser.cpp (which is the anchestor of this + * class) uses a separate char member for this method. I + * believe that the intended usage is to not cover tokens with + * catEscape, e.g. \code + * return (cs_.empty() || cat_ == catEscape) ? 0 : cs_[0]; + * \endcode + * All usages of this method should be checked. gb 2011-01-05 + */ + char character() const { return cs_.empty() ? 0 : cs_[0]; } /// Returns the token verbatim std::string asInput() const; + /// Is the token an alphanumerical character? + bool isAlnumASCII() const; private: /// std::string cs_; /// - char char_; - /// CatCode cat_; }; std::ostream & operator<<(std::ostream & os, Token const & t); +#ifdef FILEDEBUG +extern void debugToken(std::ostream & os, Token const & t, unsigned int flags); +#endif + /*! * Actual parser class @@ -116,20 +124,35 @@ std::ostream & operator<<(std::ostream & os, Token const & t); */ class Parser { - + /// noncopyable + Parser(Parser const & p); + Parser & operator=(Parser const & p); public: /// - Parser(std::istream & is); + Parser(idocstream & is); /// Parser(std::string const & s); + /// + ~Parser(); + + /// change the latex encoding of the input stream + void setEncoding(std::string const & encoding); + /// get the current latex encoding of the input stream + std::string getEncoding() const { return encoding_latex_; } /// int lineno() const { return lineno_; } /// void putback(); + /// store current position + void pushPosition(); + /// restore previous position + void popPosition(); /// dump contents to screen void dump() const; + /// Does an optional argument follow after the current token? + bool hasOpt(); /// typedef std::pair Arg; /*! @@ -147,15 +170,21 @@ public: */ std::string getArg(char left, char right); /*! - * \returns getFullArg('[', ']') including the brackets or the - * empty string if there is no such argument. + * Like getOpt(), but distinguishes between a missing argument "" + * and an empty argument "[]". */ - std::string getFullOpt(); + std::string getFullOpt(bool keepws = false); /*! * \returns getArg('[', ']') including the brackets or the * empty string if there is no such argument. + * No whitespace is eaten if \p keepws is true and no optional + * argument exists. This is important if an optional argument is + * parsed that would go after a command in ERT: In this case the + * whitespace is needed to separate the ERT from the subsequent + * word. Without it, the ERT and the next word would be concatenated + * during .tex export, thus creating an invalid command. */ - std::string getOpt(); + std::string getOpt(bool keepws = false); /*! * \returns getFullArg('(', ')') including the parentheses or the * empty string if there is no such argument. @@ -167,6 +196,19 @@ public: * is parsed but not returned. */ std::string const verbatimEnvironment(std::string const & name); + /* + * The same as verbatimEnvironment(std::string const & name) but + * \begin and \end commands inside the name environment are not parsed. + * This function is designed to parse verbatim environments. + */ + std::string const plainEnvironment(std::string const & name); + /* + * Basically the same as plainEnvironment(std::string const & name) but + * instead of \begin and \end commands the parsing is started/stopped + * at given characters. + * This function is designed to parse verbatim commands. + */ + std::string const plainCommand(char left, char right, std::string const & name); /*! * Returns the character of the current token and increments * the token position. @@ -174,30 +216,31 @@ public: char getChar(); /// void error(std::string const & msg); - /// Parses \p is into tokens - void tokenize(std::istream & is); + /// Parses one token from \p is + void tokenize_one(); /// void push_back(Token const & t); - /// - void pop_back(); /// The previous token. - Token const & prev_token() const; + Token const prev_token() const; /// The current token. - Token const & curr_token() const; + Token const curr_token() const; /// The next token. - Token const & next_token() const; + Token const next_token(); + /// The next but one token. + Token const next_next_token(); /// Make the next token current and return that. - Token const & get_token(); + Token const get_token(); /// \return whether the current token starts a new paragraph - bool isParagraph() const; + bool isParagraph(); /// skips spaces (and comments if \p skip_comments is true) - void skip_spaces(bool skip_comments = false); + /// \return whether whitespace was skipped (not comments) + bool skip_spaces(bool skip_comments = false); /// puts back spaces (and comments if \p skip_comments is true) void unskip_spaces(bool skip_comments = false); /// void lex(std::string const & s); /// - bool good() const; + bool good(); /// std::string verbatim_item(); /// @@ -216,6 +259,14 @@ private: std::vector tokens_; /// unsigned pos_; + /// + std::vector positions_; + /// + idocstringstream * iss_; + /// + idocstream & is_; + /// latex name of the current encoding + std::string encoding_latex_; };