From 25fe87e55c2449e4305e1b200469a48c75b0ea6e Mon Sep 17 00:00:00 2001 From: Georg Baum Date: Sun, 17 Feb 2013 14:53:56 +0100 Subject: [PATCH] Make tex2lyx encoding changes more robust This is achieved by not calling Parse::tokenize_one() anymore in Parser::good(): The status of the input can be tested without performing the actual tokenizing. Now there are only two methods that may prevent an encoding change:next_token() and next_next_token(). --- src/tex2lyx/Parser.cpp | 57 ++++++++++++++++++++++++++++++------------ src/tex2lyx/Parser.h | 38 ++++++++++++++++++++-------- 2 files changed, 68 insertions(+), 27 deletions(-) diff --git a/src/tex2lyx/Parser.cpp b/src/tex2lyx/Parser.cpp index cba63099c9..11ecfe12ec 100644 --- a/src/tex2lyx/Parser.cpp +++ b/src/tex2lyx/Parser.cpp @@ -118,6 +118,17 @@ void debugToken(std::ostream & os, Token const & t, unsigned int flags) // Wrapper // +bool iparserdocstream::setEncoding(std::string const & e) +{ + is_ << lyx::setEncoding(e); + if (s_.empty()) + return true; + cerr << "Setting encoding " << e << " too late. The encoding of `" + << to_utf8(s_) << "´ is wrong." << std::endl; + return false; +} + + void iparserdocstream::putback(char_type c) { s_ += c; @@ -182,7 +193,7 @@ void Parser::deparse() } -void Parser::setEncoding(std::string const & e, int const & p) +bool Parser::setEncoding(std::string const & e, int const & p) { // We may (and need to) use unsafe encodings here: Since the text is // converted to unicode while reading from is_, we never see text in @@ -191,9 +202,9 @@ void Parser::setEncoding(std::string const & e, int const & p) Encoding const * const enc = encodings.fromLaTeXName(e, p, true); if (!enc) { cerr << "Unknown encoding " << e << ". Ignoring." << std::endl; - return; + return false; } - setEncoding(enc->iconvName()); + return setEncoding(enc->iconvName()); } @@ -250,11 +261,11 @@ void Parser::setCatcodes(cat_type t) } -void Parser::setEncoding(std::string const & e) +bool Parser::setEncoding(std::string const & e) { //cerr << "setting encoding to " << e << std::endl; - is_.docstream() << lyx::setEncoding(e); encoding_iconv_ = e; + return is_.setEncoding(e); } @@ -284,7 +295,11 @@ Token const Parser::curr_token() const Token const Parser::next_token() { static const Token dummy; - return good() ? tokens_[pos_] : dummy; + if (!good()) + return dummy; + if (pos_ >= tokens_.size()) + tokenize_one(); + return pos_ < tokens_.size() ? tokens_[pos_] : dummy; } @@ -292,11 +307,14 @@ Token const Parser::next_token() Token const Parser::next_next_token() { static const Token dummy; - // If good() has not been called after the last get_token() we need - // to tokenize two more tokens. - if (pos_ + 1 >= tokens_.size()) { - tokenize_one(); + if (!good()) + return dummy; + // If tokenize_one() has not been called after the last get_token() we + // need to tokenize two more tokens. + if (pos_ >= tokens_.size()) { tokenize_one(); + if (pos_ + 1 >= tokens_.size()) + tokenize_one(); } return pos_ + 1 < tokens_.size() ? tokens_[pos_ + 1] : dummy; } @@ -306,10 +324,16 @@ Token const Parser::next_next_token() Token const Parser::get_token() { static const Token dummy; - // if (good()) - // cerr << "looking at token " << tokens_[pos_] - // << " pos: " << pos_ << '\n'; - return good() ? tokens_[pos_++] : dummy; + if (!good()) + return dummy; + if (pos_ >= tokens_.size()) { + tokenize_one(); + if (pos_ >= tokens_.size()) + return dummy; + } + // cerr << "looking at token " << tokens_[pos_] + // << " pos: " << pos_ << '\n'; + return tokens_[pos_++]; } @@ -408,8 +432,9 @@ bool Parser::good() { if (pos_ < tokens_.size()) return true; - tokenize_one(); - return pos_ < tokens_.size(); + if (!is_.good()) + return false; + return is_.peek() != idocstream::traits_type::eof(); } diff --git a/src/tex2lyx/Parser.h b/src/tex2lyx/Parser.h index 3c55a7ebb0..3d2bf567ef 100644 --- a/src/tex2lyx/Parser.h +++ b/src/tex2lyx/Parser.h @@ -117,15 +117,19 @@ std::ostream & operator<<(std::ostream & os, Token const & t); extern void debugToken(std::ostream & os, Token const & t, unsigned int flags); #endif -// A docstream version that supports putback even when not buffered +/// A docstream version that supports putback even when not buffered class iparserdocstream { public: + typedef idocstream::int_type int_type; + iparserdocstream(idocstream & is) : is_(is) {}; - operator bool() const { return is_; }; + /// Like std::istream::operator bool() + operator bool() const { return s_.empty() ? is_ : true; } - idocstream & docstream() { return is_; }; + /// change the encoding of the input stream to \p e (iconv name) + bool setEncoding(std::string const & e); // add to the list of characters to read before actually reading // the stream @@ -135,7 +139,14 @@ public: // the stream void put_almost_back(docstring s); + /// Like std::istream::get() iparserdocstream & get(char_type &c); + + /// Like std::istream::good() + bool good() const { return s_.empty() ? is_.good() : true; } + + /// Like std::istream::peek() + int_type peek() const { return s_.empty() ? is_.peek() : s_[0]; } private: /// idocstream & is_; @@ -172,11 +183,11 @@ public: * re-reading. Useful when changing catcodes. */ void deparse(); - /// change the iconv encoding of the input stream - /// according to the latex encoding and package - void setEncoding(std::string const & encoding, int const & package); - /// change the iconv encoding of the input stream - void setEncoding(std::string const & encoding); + /// change the encoding of the input stream according to \p encoding + /// (latex name) and package \p package + bool setEncoding(std::string const & encoding, int const & package); + /// change the encoding of the input stream to \p encoding (iconv name) + bool setEncoding(std::string const & encoding); /// get the current iconv encoding of the input stream std::string getEncoding() const { return encoding_iconv_; } @@ -288,9 +299,12 @@ public: Token const prev_token() const; /// The current token. Token const curr_token() const; - /// The next token. + /// The next token. Caution: If this is called, an encoding change is + /// only possible again after get_token() has been called. Token const next_token(); - /// The next but one token. + /// The next but one token. Caution: If this is called, an encoding + /// change is only possible again after get_token() has been called + /// twice. Token const next_next_token(); /// Make the next token current and return that. Token const get_token(); @@ -301,7 +315,9 @@ public: bool skip_spaces(bool skip_comments = false); /// puts back spaces (and comments if \p skip_comments is true) void unskip_spaces(bool skip_comments = false); - /// + /// Is any further input pending()? This is not like + /// std::istream::good(), which returns true if all available input + /// was read, and the next attempt to read would return EOF. bool good(); /// resets the parser to initial state void reset(); -- 2.39.2