X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=src%2Ftex2lyx%2FParser.h;h=c0c5685bb83fc449cf2567c39f00206b38fb3629;hb=2872f35a51a66895e65f38ccf945c89aa7540b02;hp=3ea9aa50bf7a90053ae6a28a0eed890797d5eba6;hpb=e4c9689906e05c94116feea53ff29ad00a0e5a0b;p=lyx.git

diff --git a/src/tex2lyx/Parser.h b/src/tex2lyx/Parser.h
index 3ea9aa50bf..c0c5685bb8 100644
--- a/src/tex2lyx/Parser.h
+++ b/src/tex2lyx/Parser.h
@@ -12,10 +12,11 @@
 #ifndef PARSER_H
 #define PARSER_H
 
-#include <vector>
 #include <string>
 #include <utility>
+#include <vector>
 
+#include "support/docstream.h"
 
 namespace lyx {
 
@@ -46,9 +47,6 @@ enum CatCode {
 };
 
 
-CatCode catcode(unsigned char c);
-
-
 enum {
 	FLAG_BRACE_LAST = 1 << 1,  //  last closing brace ends the parsing
 	FLAG_RIGHT      = 1 << 2,  //  next \\right ends the parsing process
@@ -75,34 +73,44 @@ enum {
 class Token {
 public:
 	///
-	Token() : cs_(), char_(0), cat_(catIgnore) {}
-	///
-	Token(char c, CatCode cat) : cs_(), char_(c), cat_(cat) {}
+	Token() : cs_(), cat_(catIgnore) {}
 	///
-	Token(std::string const & cs, CatCode cat) : cs_(cs), char_(0), cat_(cat) {}
+	Token(docstring const & cs, CatCode cat) : cs_(to_utf8(cs)), cat_(cat) {}
 
-	///
+	/// Returns the token as string
 	std::string const & cs() const { return cs_; }
 	/// Returns the catcode of the token
 	CatCode cat() const { return cat_; }
-	///
-	char character() const { return char_; }
-	/// Returns the token as string
-	std::string asString() const;
+	/** Get the character of tokens that were constructed from a single
+	 * character input or a two character input and cat_ == catEscape.
+	 * FIXME: The intended usage is not clear. The Token class in
+	 *        ../mathed/MathParser.cpp (which is the anchestor of this
+	 *        class) uses a separate char member for this method. I
+	 *        believe that the intended usage is to not cover tokens with
+	 *        catEscape, e.g. \code
+	 *        return (cs_.empty() || cat_ == catEscape) ? 0 : cs_[0];
+	 *        \endcode
+	 *        All usages of this method should be checked. gb 2011-01-05
+	 */
+	char character() const { return cs_.empty() ? 0 : cs_[0]; }
 	/// Returns the token verbatim
 	std::string asInput() const;
+	/// Is the token an alphanumerical character?
+	bool isAlnumASCII() const;
 
 private:
 	///
 	std::string cs_;
 	///
-	char char_;
-	///
 	CatCode cat_;
 };
 
 std::ostream & operator<<(std::ostream & os, Token const & t);
 
+#ifdef FILEDEBUG
+extern void debugToken(std::ostream & os, Token const & t, unsigned int flags);
+#endif
+
 
 /*!
  * Actual parser class
@@ -116,22 +124,35 @@ std::ostream & operator<<(std::ostream & os, Token const & t);
  */
 
 class Parser {
-
+	/// noncopyable
+	Parser(Parser const & p);
+	Parser & operator=(Parser const & p);
 public:
 	///
-	Parser(std::istream & is);
+	Parser(idocstream & is);
 	///
 	Parser(std::string const & s);
 	///
 	~Parser();
 
+	/// change the latex encoding of the input stream
+	void setEncoding(std::string const & encoding);
+	/// get the current latex encoding of the input stream
+	std::string getEncoding() const { return encoding_latex_; }
+
 	///
 	int lineno() const { return lineno_; }
 	///
 	void putback();
+	/// store current position
+	void pushPosition();
+	/// restore previous position
+	void popPosition();
 	/// dump contents to screen
 	void dump() const;
 
+	/// Does an optional argument follow after the current token?
+	bool hasOpt();
 	///
 	typedef std::pair<bool, std::string> Arg;
 	/*!
@@ -149,15 +170,21 @@ public:
 	 */
 	std::string getArg(char left, char right);
 	/*!
-	 * \returns getFullArg('[', ']') including the brackets or the
-	 * empty string if there is no such argument.
+	 * Like getOpt(), but distinguishes between a missing argument ""
+	 * and an empty argument "[]".
 	 */
-	std::string getFullOpt();
+	std::string getFullOpt(bool keepws = false);
 	/*!
 	 * \returns getArg('[', ']') including the brackets or the
 	 * empty string if there is no such argument.
+	 * No whitespace is eaten if \p keepws is true and no optional
+	 * argument exists. This is important if an optional argument is
+	 * parsed that would go after a command in ERT: In this case the
+	 * whitespace is needed to separate the ERT from the subsequent
+	 * word. Without it, the ERT and the next word would be concatenated
+	 * during .tex export, thus creating an invalid command.
 	 */
-	std::string getOpt();
+	std::string getOpt(bool keepws = false);
 	/*!
 	 * \returns getFullArg('(', ')') including the parentheses or the
 	 * empty string if there is no such argument.
@@ -169,6 +196,19 @@ public:
 	 * is parsed but not returned.
 	 */
 	std::string const verbatimEnvironment(std::string const & name);
+	/*
+	 * The same as verbatimEnvironment(std::string const & name) but
+	 * \begin and \end commands inside the name environment are not parsed.
+	 * This function is designed to parse verbatim environments.
+	 */
+	std::string const plainEnvironment(std::string const & name);
+	/*
+	 * Basically the same as plainEnvironment(std::string const & name) but
+	 * instead of \begin and \end commands the parsing is started/stopped
+	 * at given characters.
+	 * This function is designed to parse verbatim commands.
+	 */
+	std::string const plainCommand(char left, char right, std::string const & name);
 	/*!
 	 * Returns the character of the current token and increments
 	 * the token position.
@@ -178,28 +218,29 @@ public:
 	void error(std::string const & msg);
 	/// Parses one token from \p is 
 	void tokenize_one();
-	/// Parses \p is into tokens
-	void tokenize();
 	///
 	void push_back(Token const & t);
 	/// The previous token.
-	Token const & prev_token() const;
+	Token const prev_token() const;
 	/// The current token.
-	Token const & curr_token() const;
+	Token const curr_token() const;
 	/// The next token.
-	Token const & next_token() const;
+	Token const next_token();
+	/// The next but one token.
+	Token const next_next_token();
 	/// Make the next token current and return that.
-	Token const & get_token();
+	Token const get_token();
 	/// \return whether the current token starts a new paragraph
-	bool isParagraph() const;
+	bool isParagraph();
 	/// skips spaces (and comments if \p skip_comments is true)
-	void skip_spaces(bool skip_comments = false);
+	/// \return whether whitespace was skipped (not comments)
+	bool skip_spaces(bool skip_comments = false);
 	/// puts back spaces (and comments if \p skip_comments is true)
 	void unskip_spaces(bool skip_comments = false);
 	///
 	void lex(std::string const & s);
 	///
-	bool good() const;
+	bool good();
 	///
 	std::string verbatim_item();
 	///
@@ -219,9 +260,13 @@ private:
 	///
 	unsigned pos_;
 	///
-	std::istringstream * iss_;
+	std::vector<unsigned> positions_;
+	///
+	idocstringstream * iss_;
 	///
-	std::istream & is_;
+	idocstream & is_;
+	/// latex name of the current encoding
+	std::string encoding_latex_;
 };