Make tex2lyx encoding changes more robust

author Georg Baum <baum@lyx.org>

Sun, 17 Feb 2013 13:53:56 +0000 (14:53 +0100)

committer Georg Baum <baum@lyx.org>

Sun, 17 Feb 2013 14:04:14 +0000 (15:04 +0100)
author Georg Baum <baum@lyx.org>
Sun, 17 Feb 2013 13:53:56 +0000 (14:53 +0100)
committer Georg Baum <baum@lyx.org>
Sun, 17 Feb 2013 14:04:14 +0000 (15:04 +0100)
diff --git a/src/tex2lyx/Parser.cpp b/src/tex2lyx/Parser.cpp

index cba63099c9afd395b979bc16a01cdb302dc2a8b5..11ecfe12ec410698ec1caa56d4f14ba80ff137a1 100644 (file)
--- a/src/tex2lyx/Parser.cpp
+++ b/src/tex2lyx/Parser.cpp
@@ -118,6 +118,17 @@ void debugToken(std::ostream & os, Token const & t, unsigned int flags)
  // Wrapper
  //
  
+bool iparserdocstream::setEncoding(std::string const & e)
+{
+       is_ << lyx::setEncoding(e);
+       if (s_.empty())
+               return true;
+       cerr << "Setting encoding " << e << " too late. The encoding of `"
+            << to_utf8(s_) << "´ is wrong." << std::endl;
+       return false;
+}
+
+
  void iparserdocstream::putback(char_type c)
  {
         s_ += c;
@@ -182,7 +193,7 @@ void Parser::deparse()
  }
  
  
-void Parser::setEncoding(std::string const & e, int const & p)
+bool Parser::setEncoding(std::string const & e, int const & p)
  {
         // We may (and need to) use unsafe encodings here: Since the text is
         // converted to unicode while reading from is_, we never see text in
@@ -191,9 +202,9 @@ void Parser::setEncoding(std::string const & e, int const & p)
         Encoding const * const enc = encodings.fromLaTeXName(e, p, true);
         if (!enc) {
                 cerr << "Unknown encoding " << e << ". Ignoring." << std::endl;
-               return;
+               return false;
         }
-       setEncoding(enc->iconvName());
+       return setEncoding(enc->iconvName());
  }
  
  
@@ -250,11 +261,11 @@ void Parser::setCatcodes(cat_type t)
  }
  
  
-void Parser::setEncoding(std::string const & e)
+bool Parser::setEncoding(std::string const & e)
  {
         //cerr << "setting encoding to " << e << std::endl;
-       is_.docstream() << lyx::setEncoding(e);
         encoding_iconv_ = e;
+       return is_.setEncoding(e);
  }
  
  
@@ -284,7 +295,11 @@ Token const Parser::curr_token() const
  Token const Parser::next_token()
  {
         static const Token dummy;
-       return good() ? tokens_[pos_] : dummy;
+       if (!good())
+               return dummy;
+       if (pos_ >= tokens_.size())
+               tokenize_one();
+       return pos_ < tokens_.size() ? tokens_[pos_] : dummy;
  }
  
  
@@ -292,11 +307,14 @@ Token const Parser::next_token()
  Token const Parser::next_next_token()
  {
         static const Token dummy;
-       // If good() has not been called after the last get_token() we need
-       // to tokenize two more tokens.
-       if (pos_ + 1 >= tokens_.size()) {
-               tokenize_one();
+       if (!good())
+               return dummy;
+       // If tokenize_one() has not been called after the last get_token() we
+       // need to tokenize two more tokens.
+       if (pos_ >= tokens_.size()) {
                 tokenize_one();
+               if (pos_ + 1 >= tokens_.size())
+                       tokenize_one();
         }
         return pos_ + 1 < tokens_.size() ? tokens_[pos_ + 1] : dummy;
  }
@@ -306,10 +324,16 @@ Token const Parser::next_next_token()
  Token const Parser::get_token()
  {
         static const Token dummy;
-       // if (good()) 
-       //      cerr << "looking at token " << tokens_[pos_] 
-       //           << " pos: " << pos_ << '\n';
-       return good() ? tokens_[pos_++] : dummy;
+       if (!good())
+               return dummy;
+       if (pos_ >= tokens_.size()) {
+               tokenize_one();
+               if (pos_ >= tokens_.size())
+                       return dummy;
+       }
+       // cerr << "looking at token " << tokens_[pos_] 
+       //      << " pos: " << pos_ << '\n';
+       return tokens_[pos_++];
  }
  
  
@@ -408,8 +432,9 @@ bool Parser::good()
  {
         if (pos_ < tokens_.size())
                 return true;
-       tokenize_one();
-       return pos_ < tokens_.size();
+       if (!is_.good())
+               return false;
+       return is_.peek() != idocstream::traits_type::eof();
  }
  
  
diff --git a/src/tex2lyx/Parser.h b/src/tex2lyx/Parser.h

index 3c55a7ebb093de8da33fe98983c6046f287334c6..3d2bf567ef4c29c94133d1c346e622ea1cf0e9fc 100644 (file)
--- a/src/tex2lyx/Parser.h
+++ b/src/tex2lyx/Parser.h
@@ -117,15 +117,19 @@ std::ostream & operator<<(std::ostream & os, Token const & t);
  extern void debugToken(std::ostream & os, Token const & t, unsigned int flags);
  #endif
  
-// A docstream version that supports putback even when not buffered
+/// A docstream version that supports putback even when not buffered
  class iparserdocstream
  {
  public:
+       typedef idocstream::int_type int_type;
+
         iparserdocstream(idocstream & is) : is_(is) {};
  
-       operator bool() const { return is_; };
+       /// Like std::istream::operator bool()
+       operator bool() const { return s_.empty() ? is_ : true; }
  
-       idocstream & docstream() { return is_; };
+       /// change the encoding of the input stream to \p e (iconv name)
+       bool setEncoding(std::string const & e);
  
         // add to the list of characters to read before actually reading
         // the stream
@@ -135,7 +139,14 @@ public:
         // the stream
         void put_almost_back(docstring s);
  
+       /// Like std::istream::get()
         iparserdocstream & get(char_type &c);
+
+       /// Like std::istream::good()
+       bool good() const { return s_.empty() ? is_.good() : true; }
+
+       /// Like std::istream::peek()
+       int_type peek() const { return s_.empty() ? is_.peek() : s_[0]; }
  private:
         ///
         idocstream & is_;
@@ -172,11 +183,11 @@ public:
          * re-reading. Useful when changing catcodes. */
         void deparse();
  
-       /// change the iconv encoding of the input stream
-       /// according to the latex encoding and package
-       void setEncoding(std::string const & encoding, int const & package);
-       /// change the iconv encoding of the input stream
-       void setEncoding(std::string const & encoding);
+       /// change the encoding of the input stream according to \p encoding
+       /// (latex name) and package \p package
+       bool setEncoding(std::string const & encoding, int const & package);
+       /// change the encoding of the input stream to \p encoding (iconv name)
+       bool setEncoding(std::string const & encoding);
         /// get the current iconv encoding of the input stream
         std::string getEncoding() const { return encoding_iconv_; }
  
@@ -288,9 +299,12 @@ public:
         Token const prev_token() const;
         /// The current token.
         Token const curr_token() const;
-       /// The next token.
+       /// The next token. Caution: If this is called, an encoding change is
+       /// only possible again after get_token() has been called.
         Token const next_token();
-       /// The next but one token.
+       /// The next but one token. Caution: If this is called, an encoding
+       /// change is only possible again after get_token() has been called
+       /// twice.
         Token const next_next_token();
         /// Make the next token current and return that.
         Token const get_token();
@@ -301,7 +315,9 @@ public:
         bool skip_spaces(bool skip_comments = false);
         /// puts back spaces (and comments if \p skip_comments is true)
         void unskip_spaces(bool skip_comments = false);
-       ///
+       /// Is any further input pending()? This is not like
+       /// std::istream::good(), which returns true if all available input
+       /// was read, and the next attempt to read would return EOF.
         bool good();
         /// resets the parser to initial state
         void reset();
author	Georg Baum <baum@lyx.org>
	Sun, 17 Feb 2013 13:53:56 +0000 (14:53 +0100)
committer	Georg Baum <baum@lyx.org>
	Sun, 17 Feb 2013 14:04:14 +0000 (15:04 +0100)
src/tex2lyx/Parser.cpp		patch \| blob \| history
src/tex2lyx/Parser.h		patch \| blob \| history