+/**
+ * \file texparser.C
+ * This file is part of LyX, the document processor.
+ * Licence details can be found in the file COPYING.
+ *
+ * \author André Pönitz
+ *
+ * Full author contact details are available in file CREDITS.
+ */
#include <config.h>
#include "texparser.h"
-#include "Lsstream.h"
#include <iostream>
+#include <sstream>
+
+
+namespace lyx {
using std::cerr;
using std::endl;
using std::fill;
-using std::ios;
using std::istream;
using std::istringstream;
+using std::ostringstream;
using std::ostream;
-using std::vector;
+using std::string;
namespace {
CatCode theCatcode[256];
-void skipSpaceTokens(istream & is, char c)
-{
- // skip trailing spaces
- while (catcode(c) == catSpace || catcode(c) == catNewline)
- if (!is.get(c))
- break;
- //cerr << "putting back: " << c << "\n";
- is.putback(c);
-}
-
-
void catInit()
{
fill(theCatcode, theCatcode + 256, catOther);
fill(theCatcode + 'a', theCatcode + 'z' + 1, catLetter);
fill(theCatcode + 'A', theCatcode + 'Z' + 1, catLetter);
- theCatcode['\\'] = catEscape;
- theCatcode['{'] = catBegin;
- theCatcode['}'] = catEnd;
- theCatcode['$'] = catMath;
- theCatcode['&'] = catAlign;
- theCatcode[10] = catNewline;
- theCatcode['#'] = catParameter;
- theCatcode['^'] = catSuper;
- theCatcode['_'] = catSub;
- theCatcode['\7f'] = catIgnore;
- theCatcode[' '] = catSpace;
- theCatcode['\t'] = catSpace;
- theCatcode[13] = catIgnore;
- theCatcode['~'] = catActive;
- theCatcode['%'] = catComment;
+ theCatcode[int('\\')] = catEscape;
+ theCatcode[int('{')] = catBegin;
+ theCatcode[int('}')] = catEnd;
+ theCatcode[int('$')] = catMath;
+ theCatcode[int('&')] = catAlign;
+ theCatcode[int('\n')] = catNewline;
+ theCatcode[int('#')] = catParameter;
+ theCatcode[int('^')] = catSuper;
+ theCatcode[int('_')] = catSub;
+ theCatcode[0x7f] = catIgnore;
+ theCatcode[int(' ')] = catSpace;
+ theCatcode[int('\t')] = catSpace;
+ theCatcode[int('\r')] = catNewline;
+ theCatcode[int('~')] = catActive;
+ theCatcode[int('%')] = catComment;
// This is wrong!
- theCatcode['@'] = catLetter;
+ theCatcode[int('@')] = catLetter;
+}
+
+
+/*!
+ * Translate a line ending to '\n'.
+ * \p c must have catcode catNewline, and it must be the last character read
+ * from \p is.
+ */
+char getNewline(istream & is, char c)
+{
+ // we have to handle 3 different line endings:
+ // - UNIX (\n)
+ // - MAC (\r)
+ // - DOS (\r\n)
+ if (c == '\r') {
+ // MAC or DOS
+ if (is.get(c) && c != '\n') {
+ // MAC
+ is.putback(c);
+ }
+ return '\n';
+ }
+ // UNIX
+ return c;
}
}
-//
+//
// catcodes
//
-mode_type asMode(mode_type oldmode, string const & str)
-{
- if (str == "mathmode")
- return MATH_MODE;
- if (str == "textmode" || str == "forcetext")
- return TEXT_MODE;
- return oldmode;
-}
-
-
CatCode catcode(unsigned char c)
{
return theCatcode[c];
ostream & operator<<(ostream & os, Token const & t)
{
- if (t.cs().size())
+ if (t.cat() == catComment)
+ os << '%' << t.cs() << '\n';
+ else if (t.cat() == catSpace)
+ os << t.cs();
+ else if (t.cat() == catEscape)
os << '\\' << t.cs() << ' ';
else if (t.cat() == catLetter)
os << t.character();
else if (t.cat() == catNewline)
- os << "[\\n," << t.cat() << "]\n";
+ os << "[" << t.cs().size() << "\\n," << t.cat() << "]\n";
else
os << '[' << t.character() << ',' << t.cat() << ']';
return os;
string Token::asInput() const
{
- return char_ ? string(1, char_) : '\\' + cs_ + ' ';
+ if (cat_ == catComment)
+ return '%' + cs_ + '\n';
+ if (cat_ == catSpace || cat_ == catNewline)
+ return cs_;
+ return char_ ? string(1, char_) : '\\' + cs_;
}
Parser::Parser(string const & s)
: lineno_(0), pos_(0)
{
- istringstream is(STRCONV(s));
+ istringstream is(s);
tokenize(is);
}
Token const & Parser::prev_token() const
+{
+ static const Token dummy;
+ return pos_ > 1 ? tokens_[pos_ - 2] : dummy;
+}
+
+
+Token const & Parser::curr_token() const
{
static const Token dummy;
return pos_ > 0 ? tokens_[pos_ - 1] : dummy;
}
-void Parser::skip_spaces()
+bool Parser::isParagraph() const
+{
+ // A new paragraph in TeX ist started
+ // - either by a newline, following any amount of whitespace
+ // characters (including zero), and another newline
+ // - or the token \par
+ if (curr_token().cat() == catNewline &&
+ (curr_token().cs().size() > 1 ||
+ (next_token().cat() == catSpace &&
+ pos_ < tokens_.size() - 1 &&
+ tokens_[pos_ + 1].cat() == catNewline)))
+ return true;
+ if (curr_token().cat() == catEscape && curr_token().cs() == "par")
+ return true;
+ return false;
+}
+
+
+void Parser::skip_spaces(bool skip_comments)
{
- while (1) {
- if (next_token().cat() == catSpace || next_token().cat() == catNewline)
- get_token();
- else if (next_token().cat() == catComment)
- while (next_token().cat() != catNewline)
- get_token();
+ // We just silently return if we have no more tokens.
+ // skip_spaces() should be callable at any time,
+ // the caller must check p::good() anyway.
+ while (good()) {
+ get_token();
+ if (isParagraph()) {
+ putback();
+ break;
+ }
+ if ( curr_token().cat() == catSpace ||
+ curr_token().cat() == catNewline ||
+ (curr_token().cat() == catComment && curr_token().cs().empty()))
+ continue;
+ if (skip_comments && curr_token().cat() == catComment)
+ cerr << " Ignoring comment: " << curr_token().asInput();
+ else {
+ putback();
+ break;
+ }
+ }
+}
+
+
+void Parser::unskip_spaces(bool skip_comments)
+{
+ while (pos_ > 0) {
+ if ( curr_token().cat() == catSpace ||
+ (curr_token().cat() == catNewline && curr_token().cs().size() == 1))
+ putback();
+ else if (skip_comments && curr_token().cat() == catComment) {
+ // TODO: Get rid of this
+ cerr << "Unignoring comment: " << curr_token().asInput();
+ putback();
+ }
else
break;
}
}
-string Parser::getArg(char left, char right)
+Parser::Arg Parser::getFullArg(char left, char right)
{
- skip_spaces();
+ skip_spaces(true);
+
+ // This is needed if a partial file ends with a command without arguments,
+ // e. g. \medskip
+ if (! good())
+ return std::make_pair(false, string());
string result;
char c = getChar();
- if (c != left)
+ if (c != left) {
putback();
- else
- while ((c = getChar()) != right && good())
- result += c;
+ return std::make_pair(false, string());
+ } else
+ while ((c = getChar()) != right && good()) {
+ // Ignore comments
+ if (curr_token().cat() == catComment) {
+ if (!curr_token().cs().empty())
+ cerr << "Ignoring comment: " << curr_token().asInput();
+ }
+ else
+ result += curr_token().asInput();
+ }
- return result;
+ return std::make_pair(true, result);
+}
+
+
+string Parser::getArg(char left, char right)
+{
+ return getFullArg(left, right).second;
+}
+
+
+string Parser::getFullOpt()
+{
+ Arg arg = getFullArg('[', ']');
+ if (arg.first)
+ return '[' + arg.second + ']';
+ return arg.second;
}
string Parser::getOpt()
{
- string res = getArg('[', ']');
- return res.size() ? '[' + res + ']' : string();
+ string const res = getArg('[', ']');
+ return res.empty() ? string() : '[' + res + ']';
+}
+
+
+string const Parser::verbatimEnvironment(string const & name)
+{
+ if (!good())
+ return string();
+
+ ostringstream os;
+ for (Token t = get_token(); good(); t = get_token()) {
+ if (t.cat() == catBegin) {
+ putback();
+ os << '{' << verbatim_item() << '}';
+ } else if (t.asInput() == "\\begin") {
+ string const env = getArg('{', '}');
+ os << "\\begin{" << env << '}'
+ << verbatimEnvironment(env)
+ << "\\end{" << env << '}';
+ } else if (t.asInput() == "\\end") {
+ string const end = getArg('{', '}');
+ if (end != name)
+ cerr << "\\end{" << end
+ << "} does not match \\begin{" << name
+ << "}." << endl;
+ return os.str();
+ } else
+ os << t.asInput();
+ }
+ cerr << "unexpected end of input" << endl;
+ return os.str();
}
//cerr << "reading c: " << c << "\n";
switch (catcode(c)) {
+ case catSpace: {
+ string s(1, c);
+ while (is.get(c) && catcode(c) == catSpace)
+ s += c;
+ if (catcode(c) != catSpace)
+ is.putback(c);
+ push_back(Token(s, catSpace));
+ break;
+ }
+
case catNewline: {
++lineno_;
- is.get(c);
- if (catcode(c) == catNewline) {
- //do {
- is.get(c);
- //} while (catcode(c) == catNewline);
- push_back(Token("par"));
- } else {
- push_back(Token('\n', catNewline));
+ string s(1, getNewline(is, c));
+ while (is.get(c) && catcode(c) == catNewline) {
+ ++lineno_;
+ s += getNewline(is, c);
}
- is.putback(c);
+ if (catcode(c) != catNewline)
+ is.putback(c);
+ push_back(Token(s, catNewline));
break;
}
-/*
case catComment: {
+ // We don't treat "%\n" combinations here specially because
+ // we want to preserve them in the preamble
+ string s;
while (is.get(c) && catcode(c) != catNewline)
- ;
+ s += c;
+ // handle possible DOS line ending
+ if (catcode(c) == catNewline)
+ c = getNewline(is, c);
+ // Note: The '%' at the beginning and the '\n' at the end
+ // of the comment are not stored.
++lineno_;
+ push_back(Token(s, catComment));
break;
}
-*/
case catEscape: {
is.get(c);
// collect letters
while (is.get(c) && catcode(c) == catLetter)
s += c;
- skipSpaceTokens(is, c);
+ if (catcode(c) != catLetter)
+ is.putback(c);
}
- push_back(Token(s));
+ push_back(Token(s, catEscape));
}
break;
}
- case catSuper:
- case catSub: {
- push_back(Token(c, catcode(c)));
- is.get(c);
- skipSpaceTokens(is, c);
- break;
- }
-
case catIgnore: {
- if (c != 13)
- cerr << "ignoring a char: " << int(c) << "\n";
+ cerr << "ignoring a char: " << int(c) << "\n";
break;
}
}
+void Parser::reset()
+{
+ pos_ = 0;
+}
+
+
void Parser::setCatCode(char c, CatCode cat)
{
- theCatcode[c] = cat;
+ theCatcode[(unsigned char)c] = cat;
}
CatCode Parser::getCatCode(char c) const
{
- return theCatcode[c];
+ return theCatcode[(unsigned char)c];
}
+
+
+} // namespace lyx