X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=src%2Ftex2lyx%2FParser.cpp;h=a538c760efe2ac8f83002cd2a581544bf5c8cfe4;hb=a2b21e3cd4bbfd42e59161143eba6e7681aaa93f;hp=d0954cf5c1adb611cbfd9230d76b8cdf7f59736f;hpb=e5a91999278253d597ad1e18f322884df4a4c94e;p=lyx.git diff --git a/src/tex2lyx/Parser.cpp b/src/tex2lyx/Parser.cpp index d0954cf5c1..a538c760ef 100644 --- a/src/tex2lyx/Parser.cpp +++ b/src/tex2lyx/Parser.cpp @@ -12,7 +12,6 @@ #include "Encoding.h" #include "Parser.h" -#include "support/foreach.h" #include "support/lstrings.h" #include "support/textutils.h" @@ -49,7 +48,7 @@ char_type getNewline(iparserdocstream & is, char_type c) return c; } -} +} // namespace // // Token @@ -118,14 +117,9 @@ void debugToken(std::ostream & os, Token const & t, unsigned int flags) // Wrapper // -bool iparserdocstream::setEncoding(std::string const & e) +void iparserdocstream::setEncoding(std::string const & e) { is_ << lyx::setEncoding(e); - if (s_.empty()) - return true; - cerr << "Setting encoding " << e << " too late. The encoding of `" - << to_utf8(s_) << "´ is wrong." << std::endl; - return false; } @@ -159,10 +153,15 @@ iparserdocstream & iparserdocstream::get(char_type &c) // -Parser::Parser(idocstream & is) - : lineno_(0), pos_(0), iss_(0), is_(is), encoding_iconv_("UTF-8"), - theCatcodesType_(NORMAL_CATCODES), curr_cat_(UNDECIDED_CATCODES) +Parser::Parser(idocstream & is, std::string const & fixedenc) + : lineno_(0), pos_(0), iss_(0), is_(is), + encoding_iconv_(fixedenc.empty() ? "UTF-8" : fixedenc), + theCatcodesType_(NORMAL_CATCODES), curr_cat_(UNDECIDED_CATCODES), + fixed_enc_(!fixedenc.empty()) { + if (fixed_enc_) + is_.setEncoding(fixedenc); + catInit(); } @@ -170,8 +169,11 @@ Parser::Parser(string const & s) : lineno_(0), pos_(0), iss_(new idocstringstream(from_utf8(s))), is_(*iss_), encoding_iconv_("UTF-8"), - theCatcodesType_(NORMAL_CATCODES), curr_cat_(UNDECIDED_CATCODES) + theCatcodesType_(NORMAL_CATCODES), curr_cat_(UNDECIDED_CATCODES), + // An idocstringstream can not change the encoding + fixed_enc_(true) { + catInit(); } @@ -266,7 +268,13 @@ bool Parser::setEncoding(std::string const & e) { //cerr << "setting encoding to " << e << std::endl; encoding_iconv_ = e; - return is_.setEncoding(e); + // If the encoding is fixed, we must not change the stream encoding + // (because the whole input uses that encoding, e.g. if it comes from + // the clipboard). We still need to track the original encoding in + // encoding_iconv_, so that the generated output is correct. + if (!fixed_enc_) + is_.setEncoding(e); + return true; } @@ -312,11 +320,10 @@ Token const Parser::next_next_token() return dummy; // If tokenize_one() has not been called after the last get_token() we // need to tokenize two more tokens. - if (pos_ >= tokens_.size()) { + if (pos_ >= tokens_.size()) + tokenize_one(); + if (pos_ + 1 >= tokens_.size()) tokenize_one(); - if (pos_ + 1 >= tokens_.size()) - tokenize_one(); - } return pos_ + 1 < tokens_.size() ? tokens_[pos_ + 1] : dummy; } @@ -332,7 +339,7 @@ Token const Parser::get_token() if (pos_ >= tokens_.size()) return dummy; } - // cerr << "looking at token " << tokens_[pos_] + // cerr << "looking at token " << tokens_[pos_] // << " pos: " << pos_ << '\n'; return tokens_[pos_++]; } @@ -446,7 +453,7 @@ bool Parser::good() } -bool Parser::hasOpt() +bool Parser::hasOpt(string const l) { // An optional argument can occur in any of the following forms: // - \foo[bar] @@ -472,7 +479,7 @@ bool Parser::hasOpt() putback(); break; } - bool const retval = (next_token().asInput() == "["); + bool const retval = (next_token().asInput() == l); pos_ = oldpos; return retval; } @@ -487,6 +494,7 @@ Parser::Arg Parser::getFullArg(char left, char right, bool allow_escaping) if (! good()) return make_pair(false, string()); + int group_level = 0; string result; Token t = get_token(); @@ -497,6 +505,15 @@ Parser::Arg Parser::getFullArg(char left, char right, bool allow_escaping) } else { while (good()) { t = get_token(); + // honor grouping + if (left != '{' && t.cat() == catBegin) { + ++group_level; + continue; + } + if (left != '{' && t.cat() == catEnd) { + --group_level; + continue; + } // Ignore comments if (t.cat() == catComment) { if (!t.cs().empty()) @@ -504,13 +521,15 @@ Parser::Arg Parser::getFullArg(char left, char right, bool allow_escaping) continue; } if (allow_escaping) { - if (t.cat() != catEscape && t.character() == right) + if (t.cat() != catEscape && t.character() == right + && group_level == 0) break; } else { if (t.character() == right) { if (t.cat() == catEscape) result += '\\'; - break; + if (group_level == 0) + break; } } result += t.asInput(); @@ -526,11 +545,11 @@ string Parser::getArg(char left, char right, bool allow_escaping) } -string Parser::getFullOpt(bool keepws) +string Parser::getFullOpt(bool keepws, char left, char right) { - Arg arg = getFullArg('[', ']'); + Arg arg = getFullArg(left, right); if (arg.first) - return '[' + arg.second + ']'; + return left + arg.second + right; if (keepws) unskip_spaces(true); return string(); @@ -558,6 +577,26 @@ string Parser::getFullParentheseArg() } +bool Parser::hasListPreamble(string const itemcmd) +{ + // remember current position + unsigned int oldpos = pos_; + // jump over arguments + if (hasOpt()) + getOpt(); + if (hasOpt("{")) + getArg('{', '}'); + // and swallow spaces and comments + skip_spaces(true); + // we have a list preamble if the next thing + // that follows is not the \item command + bool res = next_token().cs() != itemcmd; + // back to orig position + pos_ = oldpos; + return res; +} + + string const Parser::ertEnvironment(string const & name) { if (!good()) @@ -631,6 +670,27 @@ string const Parser::plainCommand(char left, char right, string const & name) } +string const Parser::getCommandLatexParam() +{ + if (!good()) + return string(); + string res; + size_t offset = 0; + while (true) { + if (pos_ + offset >= tokens_.size()) + tokenize_one(); + if (pos_ + offset >= tokens_.size()) + break; + Token t = tokens_[pos_ + offset]; + if (t.cat() == catBegin) + break; + res += t.asInput(); + ++offset; + } + return res; +} + + Parser::Arg Parser::verbatimStuff(string const & end_string, bool const allow_linebreak) { if (!good()) @@ -655,7 +715,7 @@ Parser::Arg Parser::verbatimStuff(string const & end_string, bool const allow_li return Arg(false, string()); } if (match_index) { - oss << end_string.substr(0, match_index) + oss << end_string.substr(0, match_index) << t.asInput(); match_index = 0; } else @@ -698,7 +758,7 @@ string Parser::verbatimOption() putback(); res += '{' + verbatim_item() + '}'; } else - res += t.cs(); + res += t.asInput(); } } return res; @@ -713,7 +773,7 @@ string Parser::verbatim_item() if (next_token().cat() == catBegin) { Token t = get_token(); // skip brace string res; - for (Token t = get_token(); t.cat() != catEnd && good(); t = get_token()) { + for (t = get_token(); t.cat() != catEnd && good(); t = get_token()) { if (t.cat() == catBegin) { putback(); res += '{' + verbatim_item() + '}';