X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=src%2Ftex2lyx%2FParser.cpp;h=41ab92063c031a50b2a496d67dd75de963f039a6;hb=298730215c21735f16e7278a5d5a4469fb0b9859;hp=e2af5f21bbb9139a8fe58a00b69e1a821d01eadb;hpb=be42f1398db05353bdab6fa328a4e86d11ce6b97;p=lyx.git

diff --git a/src/tex2lyx/Parser.cpp b/src/tex2lyx/Parser.cpp
index e2af5f21bb..41ab92063c 100644
--- a/src/tex2lyx/Parser.cpp
+++ b/src/tex2lyx/Parser.cpp
@@ -12,11 +12,13 @@
 
 #include "Encoding.h"
 #include "Parser.h"
+#include "support/lstrings.h"
 #include "support/textutils.h"
 
 #include <iostream>
 
 using namespace std;
+using namespace lyx::support;
 
 namespace lyx {
 
@@ -27,7 +29,7 @@ namespace {
  * \p c must have catcode catNewline, and it must be the last character read
  * from \p is.
  */
-char_type getNewline(idocstream & is, char_type c)
+char_type getNewline(iparserdocstream & is, char_type c)
 {
 	// we have to handle 3 different line endings:
 	// - UNIX (\n)
@@ -111,15 +113,55 @@ void debugToken(std::ostream & os, Token const & t, unsigned int flags)
 #endif
 
 
+//
+// Wrapper
+//
+
+void iparserdocstream::setEncoding(std::string const & e)
+{
+	is_ << lyx::setEncoding(e);
+}
+
+
+void iparserdocstream::putback(char_type c)
+{
+	s_ = c + s_;
+}
+
+
+void iparserdocstream::putback(docstring s)
+{
+	s_ = s + s_;
+}
+
+
+iparserdocstream & iparserdocstream::get(char_type &c)
+{
+	if (s_.empty())
+		is_.get(c);
+	else {
+		//cerr << "unparsed: " << to_utf8(s_) <<endl;
+		c = s_[0];
+		s_.erase(0,1);
+	}
+	return *this;
+}
+
+
 //
 // Parser
 //
 
 
-Parser::Parser(idocstream & is)
-	: lineno_(0), pos_(0), iss_(0), is_(is), encoding_iconv_("UTF-8"),
-	  theCatcodesType_(NORMAL_CATCODES), curr_cat_(UNDECIDED_CATCODES)
+Parser::Parser(idocstream & is, std::string const & fixedenc)
+	: lineno_(0), pos_(0), iss_(0), is_(is),
+	  encoding_iconv_(fixedenc.empty() ? "UTF-8" : fixedenc),
+	  theCatcodesType_(NORMAL_CATCODES), curr_cat_(UNDECIDED_CATCODES),
+	  fixed_enc_(!fixedenc.empty())
 {
+	if (fixed_enc_)
+		is_.setEncoding(fixedenc);
+	catInit();
 }
 
 
@@ -127,8 +169,11 @@ Parser::Parser(string const & s)
 	: lineno_(0), pos_(0),
 	  iss_(new idocstringstream(from_utf8(s))), is_(*iss_),
 	  encoding_iconv_("UTF-8"),
-	  theCatcodesType_(NORMAL_CATCODES), curr_cat_(UNDECIDED_CATCODES)
+	  theCatcodesType_(NORMAL_CATCODES), curr_cat_(UNDECIDED_CATCODES),
+	  // An idocstringstream can not change the encoding
+	  fixed_enc_(true)
 {
+	catInit();
 }
 
 
@@ -138,7 +183,20 @@ Parser::~Parser()
 }
 
 
-void Parser::setEncoding(std::string const & e, int const & p)
+void Parser::deparse()
+{
+	string s;
+	for(size_type i = pos_ ; i < tokens_.size() ; ++i) {
+		s += tokens_[i].asInput();
+	}
+	is_.putback(from_utf8(s));
+	tokens_.erase(tokens_.begin() + pos_, tokens_.end());
+	// make sure that next token is read
+	tokenize_one();
+}
+
+
+bool Parser::setEncoding(std::string const & e, int const & p)
 {
 	// We may (and need to) use unsafe encodings here: Since the text is
 	// converted to unicode while reading from is_, we never see text in
@@ -147,9 +205,9 @@ void Parser::setEncoding(std::string const & e, int const & p)
 	Encoding const * const enc = encodings.fromLaTeXName(e, p, true);
 	if (!enc) {
 		cerr << "Unknown encoding " << e << ". Ignoring." << std::endl;
-		return;
+		return false;
 	}
-	setEncoding(enc->iconvName());
+	return setEncoding(enc->iconvName());
 }
 
 
@@ -195,20 +253,28 @@ CatCode Parser::catcode(char_type c) const
 void Parser::setCatcode(char c, CatCode cat)
 {
 	theCatcode_[(unsigned char)c] = cat;
+	deparse();
 }
 
 
 void Parser::setCatcodes(cat_type t)
 {
 	theCatcodesType_ = t;
+	deparse();
 }
 
 
-void Parser::setEncoding(std::string const & e)
+bool Parser::setEncoding(std::string const & e)
 {
 	//cerr << "setting encoding to " << e << std::endl;
-	is_ << lyx::setEncoding(e);
 	encoding_iconv_ = e;
+	// If the encoding is fixed, we must not change the stream encoding
+	// (because the whole input uses that encoding, e.g. if it comes from
+	// the clipboard). We still need to track the original encoding in
+	// encoding_iconv_, so that the generated output is correct.
+	if (!fixed_enc_)
+		is_.setEncoding(e);
+	return true;
 }
 
 
@@ -238,7 +304,11 @@ Token const Parser::curr_token() const
 Token const Parser::next_token()
 {
 	static const Token dummy;
-	return good() ? tokens_[pos_] : dummy;
+	if (!good())
+		return dummy;
+	if (pos_ >= tokens_.size())
+		tokenize_one();
+	return pos_ < tokens_.size() ? tokens_[pos_] : dummy;
 }
 
 
@@ -246,12 +316,14 @@ Token const Parser::next_token()
 Token const Parser::next_next_token()
 {
 	static const Token dummy;
-	// If good() has not been called after the last get_token() we need
-	// to tokenize two more tokens.
-	if (pos_ + 1 >= tokens_.size()) {
+	if (!good())
+		return dummy;
+	// If tokenize_one() has not been called after the last get_token() we
+	// need to tokenize two more tokens.
+	if (pos_ >= tokens_.size())
 		tokenize_one();
+	if (pos_ + 1 >= tokens_.size())
 		tokenize_one();
-	}
 	return pos_ + 1 < tokens_.size() ? tokens_[pos_ + 1] : dummy;
 }
 
@@ -260,8 +332,16 @@ Token const Parser::next_next_token()
 Token const Parser::get_token()
 {
 	static const Token dummy;
-	//cerr << "looking at token " << tokens_[pos_] << " pos: " << pos_ << '\n';
-	return good() ? tokens_[pos_++] : dummy;
+	if (!good())
+		return dummy;
+	if (pos_ >= tokens_.size()) {
+		tokenize_one();
+		if (pos_ >= tokens_.size())
+			return dummy;
+	}
+	// cerr << "looking at token " << tokens_[pos_] 
+	//      << " pos: " << pos_ << '\n';
+	return tokens_[pos_++];
 }
 
 
@@ -353,23 +433,23 @@ void Parser::popPosition()
 {
 	pos_ = positions_.back();
 	positions_.pop_back();
+	deparse();
 }
 
 
-bool Parser::good()
+void Parser::dropPosition()
 {
-	if (pos_ < tokens_.size())
-		return true;
-	tokenize_one();
-	return pos_ < tokens_.size();
+	positions_.pop_back();
 }
 
 
-char Parser::getChar()
+bool Parser::good()
 {
-	if (!good())
-		error("The input stream is not well...");
-	return get_token().character();
+	if (pos_ < tokens_.size())
+		return true;
+	if (!is_.good())
+		return false;
+	return is_.peek() != idocstream::traits_type::eof();
 }
 
 
@@ -422,7 +502,8 @@ Parser::Arg Parser::getFullArg(char left, char right, bool allow_escaping)
 		putback();
 		return make_pair(false, string());
 	} else {
-		for (t = get_token(); good(); t = get_token()) {
+		while (good()) {
+			t = get_token();
 			// Ignore comments
 			if (t.cat() == catComment) {
 				if (!t.cs().empty())
@@ -557,11 +638,12 @@ string const Parser::plainCommand(char left, char right, string const & name)
 }
 
 
-string const Parser::verbatimStuff(string const & end_string)
+Parser::Arg Parser::verbatimStuff(string const & end_string, bool const allow_linebreak)
 {
 	if (!good())
-		return string();
+		return Arg(false, string());
 
+	pushPosition();
 	ostringstream oss;
 	size_t match_index = 0;
 	setCatcodes(VERBATIM_CATCODES);
@@ -572,16 +654,83 @@ string const Parser::verbatimStuff(string const & end_string)
 			match_index += t.asInput().length();
 			if (match_index >= end_string.length())
 				break;
-		} else if (match_index) {
-			oss << end_string.substr(0, match_index) << t.asInput();
-			match_index = 0;
-		} else
-			oss << t.asInput();
+		} else {
+			if (!allow_linebreak && t.asInput() == "\n") {
+				cerr << "unexpected end of input" << endl;
+				popPosition();
+				setCatcodes(NORMAL_CATCODES);
+				return Arg(false, string());
+			}
+			if (match_index) {
+				oss << end_string.substr(0, match_index) 
+				    << t.asInput();
+				match_index = 0;
+			} else
+				oss << t.asInput();
+		}
+	}
+
+	if (!good()) {
+		cerr << "unexpected end of input" << endl;
+		popPosition();
+		setCatcodes(NORMAL_CATCODES);
+		return Arg(false, string());
 	}
 	setCatcodes(NORMAL_CATCODES);
+	dropPosition();
+	return Arg(true, oss.str());
+}
+
+
+string const Parser::verbatimEnvironment(string const & name)
+{
+	//FIXME: do something if endstring is not found
+	string s = verbatimStuff("\\end{" + name + "}").second;
+	// ignore one newline at beginning or end of string
+	if (prefixIs(s, "\n"))
+		s.erase(0,1);
+	if (suffixIs(s, "\n"))
+		s.erase(s.length() - 1,1);
+	return s;
+}
+
+
+string Parser::verbatimOption()
+{
+	string res;
+	if (next_token().character() == '[') {
+		Token t = get_token();
+		for (t = get_token(); t.character() != ']' && good(); t = get_token()) {
+			if (t.cat() == catBegin) {
+				putback();
+				res += '{' + verbatim_item() + '}';
+			} else
+				res += t.asInput();
+		}
+	}
+	return res;
+}
+
+
+string Parser::verbatim_item()
+{
 	if (!good())
-		cerr << "unexpected end of input" << endl;
-	return oss.str();
+		error("stream bad");
+	skip_spaces();
+	if (next_token().cat() == catBegin) {
+		Token t = get_token(); // skip brace
+		string res;
+		for (Token t = get_token(); t.cat() != catEnd && good(); t = get_token()) {
+			if (t.cat() == catBegin) {
+				putback();
+				res += '{' + verbatim_item() + '}';
+			}
+			else
+				res += t.asInput();
+		}
+		return res;
+	}
+	return get_token().asInput();
 }
 
 
@@ -682,45 +831,6 @@ void Parser::error(string const & msg)
 }
 
 
-string Parser::verbatimOption()
-{
-	string res;
-	if (next_token().character() == '[') {
-		Token t = get_token();
-		for (t = get_token(); t.character() != ']' && good(); t = get_token()) {
-			if (t.cat() == catBegin) {
-				putback();
-				res += '{' + verbatim_item() + '}';
-			} else
-				res += t.cs();
-		}
-	}
-	return res;
-}
-
-
-string Parser::verbatim_item()
-{
-	if (!good())
-		error("stream bad");
-	skip_spaces();
-	if (next_token().cat() == catBegin) {
-		Token t = get_token(); // skip brace
-		string res;
-		for (Token t = get_token(); t.cat() != catEnd && good(); t = get_token()) {
-			if (t.cat() == catBegin) {
-				putback();
-				res += '{' + verbatim_item() + '}';
-			}
-			else
-				res += t.asInput();
-		}
-		return res;
-	}
-	return get_token().asInput();
-}
-
-
 void Parser::reset()
 {
 	pos_ = 0;