X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=src%2Ftex2lyx%2FParser.cpp;h=20f5058901e163d9ab7f54eece6a73b0b31e1cd3;hb=aa9fed932225bb9344a056df3610258f19fa94dd;hp=83f4c479805e7aac7f16ae1f538a87221ed1d945;hpb=337c6d157754e2f613ddbc3bca072ca658282edf;p=lyx.git diff --git a/src/tex2lyx/Parser.cpp b/src/tex2lyx/Parser.cpp index 83f4c47980..20f5058901 100644 --- a/src/tex2lyx/Parser.cpp +++ b/src/tex2lyx/Parser.cpp @@ -10,7 +10,9 @@ #include +#include "Encoding.h" #include "Parser.h" +#include "support/textutils.h" #include @@ -58,7 +60,7 @@ void catInit() * \p c must have catcode catNewline, and it must be the last character read * from \p is. */ -char getNewline(idocstream & is, char c) +char_type getNewline(idocstream & is, char_type c) { // we have to handle 3 different line endings: // - UNIX (\n) @@ -109,12 +111,6 @@ ostream & operator<<(ostream & os, Token const & t) } -string Token::asString() const -{ - return cs_; -} - - string Token::asInput() const { if (cat_ == catComment) @@ -125,20 +121,52 @@ string Token::asInput() const } +bool Token::isAlnumASCII() const +{ + return cat_ == catLetter || + (cat_ == catOther && cs_.length() == 1 && isDigitASCII(cs_[0])); +} + + +#ifdef FILEDEBUG +void debugToken(std::ostream & os, Token const & t, unsigned int flags) +{ + char sep = ' '; + os << "t: " << t << " flags: " << flags; + if (flags & FLAG_BRACE_LAST) { os << sep << "BRACE_LAST"; sep = '|'; } + if (flags & FLAG_RIGHT ) { os << sep << "RIGHT" ; sep = '|'; } + if (flags & FLAG_END ) { os << sep << "END" ; sep = '|'; } + if (flags & FLAG_BRACK_LAST) { os << sep << "BRACK_LAST"; sep = '|'; } + if (flags & FLAG_TEXTMODE ) { os << sep << "TEXTMODE" ; sep = '|'; } + if (flags & FLAG_ITEM ) { os << sep << "ITEM" ; sep = '|'; } + if (flags & FLAG_LEAVE ) { os << sep << "LEAVE" ; sep = '|'; } + if (flags & FLAG_SIMPLE ) { os << sep << "SIMPLE" ; sep = '|'; } + if (flags & FLAG_EQUATION ) { os << sep << "EQUATION" ; sep = '|'; } + if (flags & FLAG_SIMPLE2 ) { os << sep << "SIMPLE2" ; sep = '|'; } + if (flags & FLAG_OPTION ) { os << sep << "OPTION" ; sep = '|'; } + if (flags & FLAG_BRACED ) { os << sep << "BRACED" ; sep = '|'; } + if (flags & FLAG_CELL ) { os << sep << "CELL" ; sep = '|'; } + if (flags & FLAG_TABBING ) { os << sep << "TABBING" ; sep = '|'; } + os << "\n"; +} +#endif + + // // Parser // Parser::Parser(idocstream & is) - : lineno_(0), pos_(0), iss_(0), is_(is) + : lineno_(0), pos_(0), iss_(0), is_(is), encoding_latex_("utf8") { } Parser::Parser(string const & s) : lineno_(0), pos_(0), - iss_(new idocstringstream(from_utf8(s))), is_(*iss_) + iss_(new idocstringstream(from_utf8(s))), is_(*iss_), + encoding_latex_("utf8") { } @@ -149,34 +177,65 @@ Parser::~Parser() } +void Parser::setEncoding(std::string const & e) +{ + Encoding const * enc = encodings.fromLaTeXName(e); + if (!enc) { + cerr << "Unknown encoding " << e << ". Ignoring." << std::endl; + return; + } + //cerr << "setting encoding to " << enc->iconvName() << std::endl; + is_ << lyx::setEncoding(enc->iconvName()); + encoding_latex_ = e; +} + + void Parser::push_back(Token const & t) { tokens_.push_back(t); } -Token const & Parser::prev_token() const +// We return a copy here because the tokens_ vector may get reallocated +Token const Parser::prev_token() const { static const Token dummy; return pos_ > 1 ? tokens_[pos_ - 2] : dummy; } -Token const & Parser::curr_token() const +// We return a copy here because the tokens_ vector may get reallocated +Token const Parser::curr_token() const { static const Token dummy; return pos_ > 0 ? tokens_[pos_ - 1] : dummy; } -Token const & Parser::next_token() +// We return a copy here because the tokens_ vector may get reallocated +Token const Parser::next_token() { static const Token dummy; return good() ? tokens_[pos_] : dummy; } -Token const & Parser::get_token() +// We return a copy here because the tokens_ vector may get reallocated +Token const Parser::next_next_token() +{ + static const Token dummy; + // If good() has not been called after the last get_token() we need + // to tokenize two more tokens. + if (pos_ + 1 >= tokens_.size()) { + tokenize_one(); + tokenize_one(); + } + return pos_ + 1 < tokens_.size() ? tokens_[pos_ + 1] : dummy; +} + + +// We return a copy here because the tokens_ vector may get reallocated +Token const Parser::get_token() { static const Token dummy; //cerr << "looking at token " << tokens_[pos_] << " pos: " << pos_ << '\n'; @@ -193,8 +252,7 @@ bool Parser::isParagraph() if (curr_token().cat() == catNewline && (curr_token().cs().size() > 1 || (next_token().cat() == catSpace && - pos_ < tokens_.size() - 1 && - tokens_[pos_ + 1].cat() == catNewline))) + next_next_token().cat() == catNewline))) return true; if (curr_token().cat() == catEscape && curr_token().cs() == "par") return true; @@ -202,28 +260,37 @@ bool Parser::isParagraph() } -void Parser::skip_spaces(bool skip_comments) +bool Parser::skip_spaces(bool skip_comments) { // We just silently return if we have no more tokens. // skip_spaces() should be callable at any time, // the caller must check p::good() anyway. + bool skipped = false; while (good()) { get_token(); if (isParagraph()) { putback(); break; } - if ( curr_token().cat() == catSpace || - curr_token().cat() == catNewline || - (curr_token().cat() == catComment && curr_token().cs().empty())) + if (curr_token().cat() == catSpace || + curr_token().cat() == catNewline) { + skipped = true; + continue; + } + if ((curr_token().cat() == catComment && curr_token().cs().empty())) continue; - if (skip_comments && curr_token().cat() == catComment) - cerr << " Ignoring comment: " << curr_token().asInput(); - else { + if (skip_comments && curr_token().cat() == catComment) { + // If positions_ is not empty we are doing some kind + // of look ahead + if (!positions_.empty()) + cerr << " Ignoring comment: " + << curr_token().asInput(); + } else { putback(); break; } } + return skipped; } @@ -235,7 +302,11 @@ void Parser::unskip_spaces(bool skip_comments) putback(); else if (skip_comments && curr_token().cat() == catComment) { // TODO: Get rid of this - cerr << "Unignoring comment: " << curr_token().asInput(); + // If positions_ is not empty we are doing some kind + // of look ahead + if (!positions_.empty()) + cerr << "Unignoring comment: " + << curr_token().asInput(); putback(); } else @@ -250,6 +321,19 @@ void Parser::putback() } +void Parser::pushPosition() +{ + positions_.push_back(pos_); +} + + +void Parser::popPosition() +{ + pos_ = positions_.back(); + positions_.pop_back(); +} + + bool Parser::good() { if (pos_ < tokens_.size()) @@ -267,6 +351,38 @@ char Parser::getChar() } +bool Parser::hasOpt() +{ + // An optional argument can occur in any of the following forms: + // - \foo[bar] + // - \foo [bar] + // - \foo + // [bar] + // - \foo %comment + // [bar] + + // remember current position + unsigned int oldpos = pos_; + // skip spaces and comments + while (good()) { + get_token(); + if (isParagraph()) { + putback(); + break; + } + if (curr_token().cat() == catSpace || + curr_token().cat() == catNewline || + curr_token().cat() == catComment) + continue; + putback(); + break; + } + bool const retval = (next_token().asInput() == "["); + pos_ = oldpos; + return retval; +} + + Parser::Arg Parser::getFullArg(char left, char right) { skip_spaces(true); @@ -303,19 +419,26 @@ string Parser::getArg(char left, char right) } -string Parser::getFullOpt() +string Parser::getFullOpt(bool keepws) { Arg arg = getFullArg('[', ']'); if (arg.first) return '[' + arg.second + ']'; + if (keepws) + unskip_spaces(true); return string(); } -string Parser::getOpt() +string Parser::getOpt(bool keepws) { string const res = getArg('[', ']'); - return res.empty() ? string() : '[' + res + ']'; + if (res.empty()) { + if (keepws) + unskip_spaces(true); + return string(); + } + return '[' + res + ']'; } @@ -358,6 +481,49 @@ string const Parser::verbatimEnvironment(string const & name) } +string const Parser::plainEnvironment(string const & name) +{ + if (!good()) + return string(); + + ostringstream os; + for (Token t = get_token(); good(); t = get_token()) { + if (t.asInput() == "\\end") { + string const end = getArg('{', '}'); + if (end == name) + return os.str(); + else + os << "\\end{" << end << '}'; + } else + os << t.asInput(); + } + cerr << "unexpected end of input" << endl; + return os.str(); +} + + +string const Parser::plainCommand(char left, char right, string const & name) +{ + if (!good()) + return string(); + // check if first token is really the start character + Token tok = get_token(); + if (tok.character() != left) { + cerr << "first character does not match start character of command \\" << name << endl; + return string(); + } + ostringstream os; + for (Token t = get_token(); good(); t = get_token()) { + if (t.character() == right) { + return os.str(); + } else + os << t.asInput(); + } + cerr << "unexpected end of input" << endl; + return os.str(); +} + + void Parser::tokenize_one() { catInit(); @@ -465,7 +631,7 @@ string Parser::verbatimOption() putback(); res += '{' + verbatim_item() + '}'; } else - res += t.asString(); + res += t.cs(); } } return res;