X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=src%2Ftex2lyx%2FParser.cpp;h=a538c760efe2ac8f83002cd2a581544bf5c8cfe4;hb=a2b21e3cd4bbfd42e59161143eba6e7681aaa93f;hp=d0954cf5c1adb611cbfd9230d76b8cdf7f59736f;hpb=e5a91999278253d597ad1e18f322884df4a4c94e;p=lyx.git

diff --git a/src/tex2lyx/Parser.cpp b/src/tex2lyx/Parser.cpp
index d0954cf5c1..a538c760ef 100644
--- a/src/tex2lyx/Parser.cpp
+++ b/src/tex2lyx/Parser.cpp
@@ -12,7 +12,6 @@
 
 #include "Encoding.h"
 #include "Parser.h"
-#include "support/foreach.h"
 #include "support/lstrings.h"
 #include "support/textutils.h"
 
@@ -49,7 +48,7 @@ char_type getNewline(iparserdocstream & is, char_type c)
 	return c;
 }
 
-}
+} // namespace
 
 //
 // Token
@@ -118,14 +117,9 @@ void debugToken(std::ostream & os, Token const & t, unsigned int flags)
 // Wrapper
 //
 
-bool iparserdocstream::setEncoding(std::string const & e)
+void iparserdocstream::setEncoding(std::string const & e)
 {
 	is_ << lyx::setEncoding(e);
-	if (s_.empty())
-		return true;
-	cerr << "Setting encoding " << e << " too late. The encoding of `"
-	     << to_utf8(s_) << "Â´ is wrong." << std::endl;
-	return false;
 }
 
 
@@ -159,10 +153,15 @@ iparserdocstream & iparserdocstream::get(char_type &c)
 //
 
 
-Parser::Parser(idocstream & is)
-	: lineno_(0), pos_(0), iss_(0), is_(is), encoding_iconv_("UTF-8"),
-	  theCatcodesType_(NORMAL_CATCODES), curr_cat_(UNDECIDED_CATCODES)
+Parser::Parser(idocstream & is, std::string const & fixedenc)
+	: lineno_(0), pos_(0), iss_(0), is_(is),
+	  encoding_iconv_(fixedenc.empty() ? "UTF-8" : fixedenc),
+	  theCatcodesType_(NORMAL_CATCODES), curr_cat_(UNDECIDED_CATCODES),
+	  fixed_enc_(!fixedenc.empty())
 {
+	if (fixed_enc_)
+		is_.setEncoding(fixedenc);
+	catInit();
 }
 
 
@@ -170,8 +169,11 @@ Parser::Parser(string const & s)
 	: lineno_(0), pos_(0),
 	  iss_(new idocstringstream(from_utf8(s))), is_(*iss_),
 	  encoding_iconv_("UTF-8"),
-	  theCatcodesType_(NORMAL_CATCODES), curr_cat_(UNDECIDED_CATCODES)
+	  theCatcodesType_(NORMAL_CATCODES), curr_cat_(UNDECIDED_CATCODES),
+	  // An idocstringstream can not change the encoding
+	  fixed_enc_(true)
 {
+	catInit();
 }
 
 
@@ -266,7 +268,13 @@ bool Parser::setEncoding(std::string const & e)
 {
 	//cerr << "setting encoding to " << e << std::endl;
 	encoding_iconv_ = e;
-	return is_.setEncoding(e);
+	// If the encoding is fixed, we must not change the stream encoding
+	// (because the whole input uses that encoding, e.g. if it comes from
+	// the clipboard). We still need to track the original encoding in
+	// encoding_iconv_, so that the generated output is correct.
+	if (!fixed_enc_)
+		is_.setEncoding(e);
+	return true;
 }
 
 
@@ -312,11 +320,10 @@ Token const Parser::next_next_token()
 		return dummy;
 	// If tokenize_one() has not been called after the last get_token() we
 	// need to tokenize two more tokens.
-	if (pos_ >= tokens_.size()) {
+	if (pos_ >= tokens_.size())
+		tokenize_one();
+	if (pos_ + 1 >= tokens_.size())
 		tokenize_one();
-		if (pos_ + 1 >= tokens_.size())
-			tokenize_one();
-	}
 	return pos_ + 1 < tokens_.size() ? tokens_[pos_ + 1] : dummy;
 }
 
@@ -332,7 +339,7 @@ Token const Parser::get_token()
 		if (pos_ >= tokens_.size())
 			return dummy;
 	}
-	// cerr << "looking at token " << tokens_[pos_] 
+	// cerr << "looking at token " << tokens_[pos_]
 	//      << " pos: " << pos_ << '\n';
 	return tokens_[pos_++];
 }
@@ -446,7 +453,7 @@ bool Parser::good()
 }
 
 
-bool Parser::hasOpt()
+bool Parser::hasOpt(string const l)
 {
 	// An optional argument can occur in any of the following forms:
 	// - \foo[bar]
@@ -472,7 +479,7 @@ bool Parser::hasOpt()
 		putback();
 		break;
 	}
-	bool const retval = (next_token().asInput() == "[");
+	bool const retval = (next_token().asInput() == l);
 	pos_ = oldpos;
 	return retval;
 }
@@ -487,6 +494,7 @@ Parser::Arg Parser::getFullArg(char left, char right, bool allow_escaping)
 	if (! good())
 		return make_pair(false, string());
 
+	int group_level = 0;
 	string result;
 	Token t = get_token();
 
@@ -497,6 +505,15 @@ Parser::Arg Parser::getFullArg(char left, char right, bool allow_escaping)
 	} else {
 		while (good()) {
 			t = get_token();
+			// honor grouping
+			if (left != '{' && t.cat() == catBegin) {
+				++group_level;
+				continue;
+			}
+			if (left != '{' && t.cat() == catEnd) {
+				--group_level;
+				continue;
+			}
 			// Ignore comments
 			if (t.cat() == catComment) {
 				if (!t.cs().empty())
@@ -504,13 +521,15 @@ Parser::Arg Parser::getFullArg(char left, char right, bool allow_escaping)
 				continue;
 			}
 			if (allow_escaping) {
-				if (t.cat() != catEscape && t.character() == right)
+				if (t.cat() != catEscape && t.character() == right
+				    && group_level == 0)
 					break;
 			} else {
 				if (t.character() == right) {
 					if (t.cat() == catEscape)
 						result += '\\';
-					break;
+					if (group_level == 0)
+						break;
 				}
 			}
 			result += t.asInput();
@@ -526,11 +545,11 @@ string Parser::getArg(char left, char right, bool allow_escaping)
 }
 
 
-string Parser::getFullOpt(bool keepws)
+string Parser::getFullOpt(bool keepws, char left, char right)
 {
-	Arg arg = getFullArg('[', ']');
+	Arg arg = getFullArg(left, right);
 	if (arg.first)
-		return '[' + arg.second + ']';
+		return left + arg.second + right;
 	if (keepws)
 		unskip_spaces(true);
 	return string();
@@ -558,6 +577,26 @@ string Parser::getFullParentheseArg()
 }
 
 
+bool Parser::hasListPreamble(string const itemcmd)
+{
+	// remember current position
+	unsigned int oldpos = pos_;
+	// jump over arguments
+	if (hasOpt())
+		getOpt();
+	if (hasOpt("{"))
+		getArg('{', '}');
+	// and swallow spaces and comments
+	skip_spaces(true);
+	// we have a list preamble if the next thing
+	// that follows is not the \item command
+	bool res =  next_token().cs() != itemcmd;
+	// back to orig position
+	pos_ = oldpos;
+	return res;
+}
+
+
 string const Parser::ertEnvironment(string const & name)
 {
 	if (!good())
@@ -631,6 +670,27 @@ string const Parser::plainCommand(char left, char right, string const & name)
 }
 
 
+string const Parser::getCommandLatexParam()
+{
+	if (!good())
+		return string();
+	string res;
+	size_t offset = 0;
+	while (true) {
+		if (pos_ + offset >= tokens_.size())
+			tokenize_one();
+		if (pos_ + offset >= tokens_.size())
+			break;
+		Token t = tokens_[pos_ + offset];
+		if (t.cat() == catBegin)
+			break;
+		res += t.asInput();
+		++offset;
+	}
+	return res;
+}
+
+
 Parser::Arg Parser::verbatimStuff(string const & end_string, bool const allow_linebreak)
 {
 	if (!good())
@@ -655,7 +715,7 @@ Parser::Arg Parser::verbatimStuff(string const & end_string, bool const allow_li
 				return Arg(false, string());
 			}
 			if (match_index) {
-				oss << end_string.substr(0, match_index) 
+				oss << end_string.substr(0, match_index)
 				    << t.asInput();
 				match_index = 0;
 			} else
@@ -698,7 +758,7 @@ string Parser::verbatimOption()
 				putback();
 				res += '{' + verbatim_item() + '}';
 			} else
-				res += t.cs();
+				res += t.asInput();
 		}
 	}
 	return res;
@@ -713,7 +773,7 @@ string Parser::verbatim_item()
 	if (next_token().cat() == catBegin) {
 		Token t = get_token(); // skip brace
 		string res;
-		for (Token t = get_token(); t.cat() != catEnd && good(); t = get_token()) {
+		for (t = get_token(); t.cat() != catEnd && good(); t = get_token()) {
 			if (t.cat() == catBegin) {
 				putback();
 				res += '{' + verbatim_item() + '}';