X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=src%2Ftex2lyx%2FParser.cpp;h=69e3460f459de17c1ccc6f15b564eeac8589af69;hb=3f72a026cc26e8e8dfdd306a62fd174b51dd59c4;hp=547f424b9dab64f0e9ea78543c629178f81fab3f;hpb=5f3cd55f1dcd85626277968891e5e12bc07287fc;p=lyx.git

diff --git a/src/tex2lyx/Parser.cpp b/src/tex2lyx/Parser.cpp
index 547f424b9d..69e3460f45 100644
--- a/src/tex2lyx/Parser.cpp
+++ b/src/tex2lyx/Parser.cpp
@@ -118,13 +118,19 @@ void debugToken(std::ostream & os, Token const & t, unsigned int flags)
 // Wrapper
 //
 
+void iparserdocstream::setEncoding(std::string const & e)
+{
+	is_ << lyx::setEncoding(e);
+}
+
+
 void iparserdocstream::putback(char_type c)
 {
-	s_ += c;
+	s_ = c + s_;
 }
 
 
-void iparserdocstream::put_almost_back(docstring s)
+void iparserdocstream::putback(docstring s)
 {
 	s_ = s + s_;
 }
@@ -135,6 +141,7 @@ iparserdocstream & iparserdocstream::get(char_type &c)
 	if (s_.empty())
 		is_.get(c);
 	else {
+		//cerr << "unparsed: " << to_utf8(s_) <<endl;
 		c = s_[0];
 		s_.erase(0,1);
 	}
@@ -147,10 +154,14 @@ iparserdocstream & iparserdocstream::get(char_type &c)
 //
 
 
-Parser::Parser(idocstream & is)
-	: lineno_(0), pos_(0), iss_(0), is_(is), encoding_iconv_("UTF-8"),
-	  theCatcodesType_(NORMAL_CATCODES), curr_cat_(UNDECIDED_CATCODES)
+Parser::Parser(idocstream & is, std::string const & fixedenc)
+	: lineno_(0), pos_(0), iss_(0), is_(is),
+	  encoding_iconv_(fixedenc.empty() ? "UTF-8" : fixedenc),
+	  theCatcodesType_(NORMAL_CATCODES), curr_cat_(UNDECIDED_CATCODES),
+	  fixed_enc_(!fixedenc.empty())
 {
+	if (fixed_enc_)
+		is_.setEncoding(fixedenc);
 }
 
 
@@ -158,7 +169,9 @@ Parser::Parser(string const & s)
 	: lineno_(0), pos_(0),
 	  iss_(new idocstringstream(from_utf8(s))), is_(*iss_),
 	  encoding_iconv_("UTF-8"),
-	  theCatcodesType_(NORMAL_CATCODES), curr_cat_(UNDECIDED_CATCODES)
+	  theCatcodesType_(NORMAL_CATCODES), curr_cat_(UNDECIDED_CATCODES),
+	  // An idocstringstream can not change the encoding
+	  fixed_enc_(true)
 {
 }
 
@@ -175,14 +188,14 @@ void Parser::deparse()
 	for(size_type i = pos_ ; i < tokens_.size() ; ++i) {
 		s += tokens_[i].asInput();
 	}
-	is_.put_almost_back(from_utf8(s));
+	is_.putback(from_utf8(s));
 	tokens_.erase(tokens_.begin() + pos_, tokens_.end());
 	// make sure that next token is read
 	tokenize_one();
 }
 
 
-void Parser::setEncoding(std::string const & e, int const & p)
+bool Parser::setEncoding(std::string const & e, int const & p)
 {
 	// We may (and need to) use unsafe encodings here: Since the text is
 	// converted to unicode while reading from is_, we never see text in
@@ -191,9 +204,9 @@ void Parser::setEncoding(std::string const & e, int const & p)
 	Encoding const * const enc = encodings.fromLaTeXName(e, p, true);
 	if (!enc) {
 		cerr << "Unknown encoding " << e << ". Ignoring." << std::endl;
-		return;
+		return false;
 	}
-	setEncoding(enc->iconvName());
+	return setEncoding(enc->iconvName());
 }
 
 
@@ -250,11 +263,17 @@ void Parser::setCatcodes(cat_type t)
 }
 
 
-void Parser::setEncoding(std::string const & e)
+bool Parser::setEncoding(std::string const & e)
 {
 	//cerr << "setting encoding to " << e << std::endl;
-	is_.docstream() << lyx::setEncoding(e);
 	encoding_iconv_ = e;
+	// If the encoding is fixed, we must not change the stream encoding
+	// (because the whole input uses that encoding, e.g. if it comes from
+	// the clipboard). We still need to track the original encoding in
+	// encoding_iconv_, so that the generated output is correct.
+	if (!fixed_enc_)
+		is_.setEncoding(e);
+	return true;
 }
 
 
@@ -284,7 +303,11 @@ Token const Parser::curr_token() const
 Token const Parser::next_token()
 {
 	static const Token dummy;
-	return good() ? tokens_[pos_] : dummy;
+	if (!good())
+		return dummy;
+	if (pos_ >= tokens_.size())
+		tokenize_one();
+	return pos_ < tokens_.size() ? tokens_[pos_] : dummy;
 }
 
 
@@ -292,12 +315,14 @@ Token const Parser::next_token()
 Token const Parser::next_next_token()
 {
 	static const Token dummy;
-	// If good() has not been called after the last get_token() we need
-	// to tokenize two more tokens.
-	if (pos_ + 1 >= tokens_.size()) {
+	if (!good())
+		return dummy;
+	// If tokenize_one() has not been called after the last get_token() we
+	// need to tokenize two more tokens.
+	if (pos_ >= tokens_.size())
 		tokenize_one();
+	if (pos_ + 1 >= tokens_.size())
 		tokenize_one();
-	}
 	return pos_ + 1 < tokens_.size() ? tokens_[pos_ + 1] : dummy;
 }
 
@@ -306,10 +331,16 @@ Token const Parser::next_next_token()
 Token const Parser::get_token()
 {
 	static const Token dummy;
-	// if (good()) 
-	// 	cerr << "looking at token " << tokens_[pos_] 
-	// 	     << " pos: " << pos_ << '\n';
-	return good() ? tokens_[pos_++] : dummy;
+	if (!good())
+		return dummy;
+	if (pos_ >= tokens_.size()) {
+		tokenize_one();
+		if (pos_ >= tokens_.size())
+			return dummy;
+	}
+	// cerr << "looking at token " << tokens_[pos_] 
+	//      << " pos: " << pos_ << '\n';
+	return tokens_[pos_++];
 }
 
 
@@ -401,6 +432,13 @@ void Parser::popPosition()
 {
 	pos_ = positions_.back();
 	positions_.pop_back();
+	deparse();
+}
+
+
+void Parser::dropPosition()
+{
+	positions_.pop_back();
 }
 
 
@@ -408,8 +446,9 @@ bool Parser::good()
 {
 	if (pos_ < tokens_.size())
 		return true;
-	tokenize_one();
-	return pos_ < tokens_.size();
+	if (!is_.good())
+		return false;
+	return is_.peek() != idocstream::traits_type::eof();
 }
 
 
@@ -462,7 +501,8 @@ Parser::Arg Parser::getFullArg(char left, char right, bool allow_escaping)
 		putback();
 		return make_pair(false, string());
 	} else {
-		for (t = get_token(); good(); t = get_token()) {
+		while (good()) {
+			t = get_token();
 			// Ignore comments
 			if (t.cat() == catComment) {
 				if (!t.cs().empty())
@@ -597,11 +637,12 @@ string const Parser::plainCommand(char left, char right, string const & name)
 }
 
 
-string const Parser::verbatimStuff(string const & end_string)
+Parser::Arg Parser::verbatimStuff(string const & end_string, bool const allow_linebreak)
 {
 	if (!good())
-		return string();
+		return Arg(false, string());
 
+	pushPosition();
 	ostringstream oss;
 	size_t match_index = 0;
 	setCatcodes(VERBATIM_CATCODES);
@@ -612,22 +653,38 @@ string const Parser::verbatimStuff(string const & end_string)
 			match_index += t.asInput().length();
 			if (match_index >= end_string.length())
 				break;
-		} else if (match_index) {
-			oss << end_string.substr(0, match_index) << t.asInput();
-			match_index = 0;
-		} else
-			oss << t.asInput();
+		} else {
+			if (!allow_linebreak && t.asInput() == "\n") {
+				cerr << "unexpected end of input" << endl;
+				popPosition();
+				setCatcodes(NORMAL_CATCODES);
+				return Arg(false, string());
+			}
+			if (match_index) {
+				oss << end_string.substr(0, match_index) 
+				    << t.asInput();
+				match_index = 0;
+			} else
+				oss << t.asInput();
+		}
 	}
-	setCatcodes(NORMAL_CATCODES);
-	if (!good())
+
+	if (!good()) {
 		cerr << "unexpected end of input" << endl;
-	return oss.str();
+		popPosition();
+		setCatcodes(NORMAL_CATCODES);
+		return Arg(false, string());
+	}
+	setCatcodes(NORMAL_CATCODES);
+	dropPosition();
+	return Arg(true, oss.str());
 }
 
 
 string const Parser::verbatimEnvironment(string const & name)
 {
-	string s = verbatimStuff("\\end{" + name + "}");
+	//FIXME: do something if endstring is not found
+	string s = verbatimStuff("\\end{" + name + "}").second;
 	// ignore one newline at beginning or end of string
 	if (prefixIs(s, "\n"))
 		s.erase(0,1);
@@ -647,7 +704,7 @@ string Parser::verbatimOption()
 				putback();
 				res += '{' + verbatim_item() + '}';
 			} else
-				res += t.cs();
+				res += t.asInput();
 		}
 	}
 	return res;