X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=src%2Ftex2lyx%2FParser.cpp;h=a538c760efe2ac8f83002cd2a581544bf5c8cfe4;hb=a2b21e3cd4bbfd42e59161143eba6e7681aaa93f;hp=cba63099c9afd395b979bc16a01cdb302dc2a8b5;hpb=fb64fe613f502905cf24ff4064643d2a48679f35;p=lyx.git

diff --git a/src/tex2lyx/Parser.cpp b/src/tex2lyx/Parser.cpp
index cba63099c9..a538c760ef 100644
--- a/src/tex2lyx/Parser.cpp
+++ b/src/tex2lyx/Parser.cpp
@@ -12,7 +12,6 @@
 
 #include "Encoding.h"
 #include "Parser.h"
-#include "support/foreach.h"
 #include "support/lstrings.h"
 #include "support/textutils.h"
 
@@ -49,7 +48,7 @@ char_type getNewline(iparserdocstream & is, char_type c)
 	return c;
 }
 
-}
+} // namespace
 
 //
 // Token
@@ -118,13 +117,19 @@ void debugToken(std::ostream & os, Token const & t, unsigned int flags)
 // Wrapper
 //
 
+void iparserdocstream::setEncoding(std::string const & e)
+{
+	is_ << lyx::setEncoding(e);
+}
+
+
 void iparserdocstream::putback(char_type c)
 {
-	s_ += c;
+	s_ = c + s_;
 }
 
 
-void iparserdocstream::put_almost_back(docstring s)
+void iparserdocstream::putback(docstring s)
 {
 	s_ = s + s_;
 }
@@ -135,6 +140,7 @@ iparserdocstream & iparserdocstream::get(char_type &c)
 	if (s_.empty())
 		is_.get(c);
 	else {
+		//cerr << "unparsed: " << to_utf8(s_) <<endl;
 		c = s_[0];
 		s_.erase(0,1);
 	}
@@ -147,10 +153,15 @@ iparserdocstream & iparserdocstream::get(char_type &c)
 //
 
 
-Parser::Parser(idocstream & is)
-	: lineno_(0), pos_(0), iss_(0), is_(is), encoding_iconv_("UTF-8"),
-	  theCatcodesType_(NORMAL_CATCODES), curr_cat_(UNDECIDED_CATCODES)
+Parser::Parser(idocstream & is, std::string const & fixedenc)
+	: lineno_(0), pos_(0), iss_(0), is_(is),
+	  encoding_iconv_(fixedenc.empty() ? "UTF-8" : fixedenc),
+	  theCatcodesType_(NORMAL_CATCODES), curr_cat_(UNDECIDED_CATCODES),
+	  fixed_enc_(!fixedenc.empty())
 {
+	if (fixed_enc_)
+		is_.setEncoding(fixedenc);
+	catInit();
 }
 
 
@@ -158,8 +169,11 @@ Parser::Parser(string const & s)
 	: lineno_(0), pos_(0),
 	  iss_(new idocstringstream(from_utf8(s))), is_(*iss_),
 	  encoding_iconv_("UTF-8"),
-	  theCatcodesType_(NORMAL_CATCODES), curr_cat_(UNDECIDED_CATCODES)
+	  theCatcodesType_(NORMAL_CATCODES), curr_cat_(UNDECIDED_CATCODES),
+	  // An idocstringstream can not change the encoding
+	  fixed_enc_(true)
 {
+	catInit();
 }
 
 
@@ -175,14 +189,14 @@ void Parser::deparse()
 	for(size_type i = pos_ ; i < tokens_.size() ; ++i) {
 		s += tokens_[i].asInput();
 	}
-	is_.put_almost_back(from_utf8(s));
+	is_.putback(from_utf8(s));
 	tokens_.erase(tokens_.begin() + pos_, tokens_.end());
 	// make sure that next token is read
 	tokenize_one();
 }
 
 
-void Parser::setEncoding(std::string const & e, int const & p)
+bool Parser::setEncoding(std::string const & e, int const & p)
 {
 	// We may (and need to) use unsafe encodings here: Since the text is
 	// converted to unicode while reading from is_, we never see text in
@@ -191,9 +205,9 @@ void Parser::setEncoding(std::string const & e, int const & p)
 	Encoding const * const enc = encodings.fromLaTeXName(e, p, true);
 	if (!enc) {
 		cerr << "Unknown encoding " << e << ". Ignoring." << std::endl;
-		return;
+		return false;
 	}
-	setEncoding(enc->iconvName());
+	return setEncoding(enc->iconvName());
 }
 
 
@@ -250,11 +264,17 @@ void Parser::setCatcodes(cat_type t)
 }
 
 
-void Parser::setEncoding(std::string const & e)
+bool Parser::setEncoding(std::string const & e)
 {
 	//cerr << "setting encoding to " << e << std::endl;
-	is_.docstream() << lyx::setEncoding(e);
 	encoding_iconv_ = e;
+	// If the encoding is fixed, we must not change the stream encoding
+	// (because the whole input uses that encoding, e.g. if it comes from
+	// the clipboard). We still need to track the original encoding in
+	// encoding_iconv_, so that the generated output is correct.
+	if (!fixed_enc_)
+		is_.setEncoding(e);
+	return true;
 }
 
 
@@ -284,7 +304,11 @@ Token const Parser::curr_token() const
 Token const Parser::next_token()
 {
 	static const Token dummy;
-	return good() ? tokens_[pos_] : dummy;
+	if (!good())
+		return dummy;
+	if (pos_ >= tokens_.size())
+		tokenize_one();
+	return pos_ < tokens_.size() ? tokens_[pos_] : dummy;
 }
 
 
@@ -292,12 +316,14 @@ Token const Parser::next_token()
 Token const Parser::next_next_token()
 {
 	static const Token dummy;
-	// If good() has not been called after the last get_token() we need
-	// to tokenize two more tokens.
-	if (pos_ + 1 >= tokens_.size()) {
+	if (!good())
+		return dummy;
+	// If tokenize_one() has not been called after the last get_token() we
+	// need to tokenize two more tokens.
+	if (pos_ >= tokens_.size())
 		tokenize_one();
+	if (pos_ + 1 >= tokens_.size())
 		tokenize_one();
-	}
 	return pos_ + 1 < tokens_.size() ? tokens_[pos_ + 1] : dummy;
 }
 
@@ -306,10 +332,16 @@ Token const Parser::next_next_token()
 Token const Parser::get_token()
 {
 	static const Token dummy;
-	// if (good()) 
-	// 	cerr << "looking at token " << tokens_[pos_] 
-	// 	     << " pos: " << pos_ << '\n';
-	return good() ? tokens_[pos_++] : dummy;
+	if (!good())
+		return dummy;
+	if (pos_ >= tokens_.size()) {
+		tokenize_one();
+		if (pos_ >= tokens_.size())
+			return dummy;
+	}
+	// cerr << "looking at token " << tokens_[pos_]
+	//      << " pos: " << pos_ << '\n';
+	return tokens_[pos_++];
 }
 
 
@@ -401,6 +433,13 @@ void Parser::popPosition()
 {
 	pos_ = positions_.back();
 	positions_.pop_back();
+	deparse();
+}
+
+
+void Parser::dropPosition()
+{
+	positions_.pop_back();
 }
 
 
@@ -408,12 +447,13 @@ bool Parser::good()
 {
 	if (pos_ < tokens_.size())
 		return true;
-	tokenize_one();
-	return pos_ < tokens_.size();
+	if (!is_.good())
+		return false;
+	return is_.peek() != idocstream::traits_type::eof();
 }
 
 
-bool Parser::hasOpt()
+bool Parser::hasOpt(string const l)
 {
 	// An optional argument can occur in any of the following forms:
 	// - \foo[bar]
@@ -439,7 +479,7 @@ bool Parser::hasOpt()
 		putback();
 		break;
 	}
-	bool const retval = (next_token().asInput() == "[");
+	bool const retval = (next_token().asInput() == l);
 	pos_ = oldpos;
 	return retval;
 }
@@ -454,6 +494,7 @@ Parser::Arg Parser::getFullArg(char left, char right, bool allow_escaping)
 	if (! good())
 		return make_pair(false, string());
 
+	int group_level = 0;
 	string result;
 	Token t = get_token();
 
@@ -464,6 +505,15 @@ Parser::Arg Parser::getFullArg(char left, char right, bool allow_escaping)
 	} else {
 		while (good()) {
 			t = get_token();
+			// honor grouping
+			if (left != '{' && t.cat() == catBegin) {
+				++group_level;
+				continue;
+			}
+			if (left != '{' && t.cat() == catEnd) {
+				--group_level;
+				continue;
+			}
 			// Ignore comments
 			if (t.cat() == catComment) {
 				if (!t.cs().empty())
@@ -471,13 +521,15 @@ Parser::Arg Parser::getFullArg(char left, char right, bool allow_escaping)
 				continue;
 			}
 			if (allow_escaping) {
-				if (t.cat() != catEscape && t.character() == right)
+				if (t.cat() != catEscape && t.character() == right
+				    && group_level == 0)
 					break;
 			} else {
 				if (t.character() == right) {
 					if (t.cat() == catEscape)
 						result += '\\';
-					break;
+					if (group_level == 0)
+						break;
 				}
 			}
 			result += t.asInput();
@@ -493,11 +545,11 @@ string Parser::getArg(char left, char right, bool allow_escaping)
 }
 
 
-string Parser::getFullOpt(bool keepws)
+string Parser::getFullOpt(bool keepws, char left, char right)
 {
-	Arg arg = getFullArg('[', ']');
+	Arg arg = getFullArg(left, right);
 	if (arg.first)
-		return '[' + arg.second + ']';
+		return left + arg.second + right;
 	if (keepws)
 		unskip_spaces(true);
 	return string();
@@ -525,6 +577,26 @@ string Parser::getFullParentheseArg()
 }
 
 
+bool Parser::hasListPreamble(string const itemcmd)
+{
+	// remember current position
+	unsigned int oldpos = pos_;
+	// jump over arguments
+	if (hasOpt())
+		getOpt();
+	if (hasOpt("{"))
+		getArg('{', '}');
+	// and swallow spaces and comments
+	skip_spaces(true);
+	// we have a list preamble if the next thing
+	// that follows is not the \item command
+	bool res =  next_token().cs() != itemcmd;
+	// back to orig position
+	pos_ = oldpos;
+	return res;
+}
+
+
 string const Parser::ertEnvironment(string const & name)
 {
 	if (!good())
@@ -598,11 +670,33 @@ string const Parser::plainCommand(char left, char right, string const & name)
 }
 
 
-string const Parser::verbatimStuff(string const & end_string)
+string const Parser::getCommandLatexParam()
 {
 	if (!good())
 		return string();
+	string res;
+	size_t offset = 0;
+	while (true) {
+		if (pos_ + offset >= tokens_.size())
+			tokenize_one();
+		if (pos_ + offset >= tokens_.size())
+			break;
+		Token t = tokens_[pos_ + offset];
+		if (t.cat() == catBegin)
+			break;
+		res += t.asInput();
+		++offset;
+	}
+	return res;
+}
+
 
+Parser::Arg Parser::verbatimStuff(string const & end_string, bool const allow_linebreak)
+{
+	if (!good())
+		return Arg(false, string());
+
+	pushPosition();
 	ostringstream oss;
 	size_t match_index = 0;
 	setCatcodes(VERBATIM_CATCODES);
@@ -613,22 +707,38 @@ string const Parser::verbatimStuff(string const & end_string)
 			match_index += t.asInput().length();
 			if (match_index >= end_string.length())
 				break;
-		} else if (match_index) {
-			oss << end_string.substr(0, match_index) << t.asInput();
-			match_index = 0;
-		} else
-			oss << t.asInput();
+		} else {
+			if (!allow_linebreak && t.asInput() == "\n") {
+				cerr << "unexpected end of input" << endl;
+				popPosition();
+				setCatcodes(NORMAL_CATCODES);
+				return Arg(false, string());
+			}
+			if (match_index) {
+				oss << end_string.substr(0, match_index)
+				    << t.asInput();
+				match_index = 0;
+			} else
+				oss << t.asInput();
+		}
 	}
-	setCatcodes(NORMAL_CATCODES);
-	if (!good())
+
+	if (!good()) {
 		cerr << "unexpected end of input" << endl;
-	return oss.str();
+		popPosition();
+		setCatcodes(NORMAL_CATCODES);
+		return Arg(false, string());
+	}
+	setCatcodes(NORMAL_CATCODES);
+	dropPosition();
+	return Arg(true, oss.str());
 }
 
 
 string const Parser::verbatimEnvironment(string const & name)
 {
-	string s = verbatimStuff("\\end{" + name + "}");
+	//FIXME: do something if endstring is not found
+	string s = verbatimStuff("\\end{" + name + "}").second;
 	// ignore one newline at beginning or end of string
 	if (prefixIs(s, "\n"))
 		s.erase(0,1);
@@ -648,7 +758,7 @@ string Parser::verbatimOption()
 				putback();
 				res += '{' + verbatim_item() + '}';
 			} else
-				res += t.cs();
+				res += t.asInput();
 		}
 	}
 	return res;
@@ -663,7 +773,7 @@ string Parser::verbatim_item()
 	if (next_token().cat() == catBegin) {
 		Token t = get_token(); // skip brace
 		string res;
-		for (Token t = get_token(); t.cat() != catEnd && good(); t = get_token()) {
+		for (t = get_token(); t.cat() != catEnd && good(); t = get_token()) {
 			if (t.cat() == catBegin) {
 				putback();
 				res += '{' + verbatim_item() + '}';