split reading .tex from writing .lyx

author André Pönitz <poenitz@gmx.net>

Wed, 12 Feb 2003 07:53:03 +0000 (07:53 +0000)

committer André Pönitz <poenitz@gmx.net>

Wed, 12 Feb 2003 07:53:03 +0000 (07:53 +0000)
author André Pönitz <poenitz@gmx.net>
Wed, 12 Feb 2003 07:53:03 +0000 (07:53 +0000)
committer André Pönitz <poenitz@gmx.net>
Wed, 12 Feb 2003 07:53:03 +0000 (07:53 +0000)
diff --git a/src/tex2lyx/texparser.C b/src/tex2lyx/texparser.C

new file mode 100644 (file)

index 0000000..a638b93
--- /dev/null
+++ b/src/tex2lyx/texparser.C
@@ -0,0 +1,327 @@
+#include "parser.h"
+
+using std::cerr;
+using std::endl;
+using std::fill;
+using std::ios;
+using std::istream;
+using std::istringstream;
+using std::ostream;
+using std::string;
+
+
+// 
+// catcodes
+//
+
+mode_type asMode(mode_type oldmode, string const & str)
+{
+       if (str == "mathmode")
+               return MATH_MODE;
+       if (str == "textmode" || str == "forcetext")
+               return TEXT_MODE;
+       return oldmode;
+}
+
+
+CatCode theCatcode[256];
+
+
+CatCode catcode(unsigned char c)
+{
+       return theCatcode[c];
+}
+
+
+void catInit()
+{
+       fill(theCatcode, theCatcode + 256, catOther);
+       fill(theCatcode + 'a', theCatcode + 'z' + 1, catLetter);
+       fill(theCatcode + 'A', theCatcode + 'Z' + 1, catLetter);
+
+       theCatcode['\\'] = catEscape;
+       theCatcode['{']  = catBegin;
+       theCatcode['}']  = catEnd;
+       theCatcode['$']  = cat;
+       theCatcode['&']  = catAlign;
+       theCatcode['\n'] = catNewline;
+       theCatcode['#']  = catParameter;
+       theCatcode['^']  = catSuper;
+       theCatcode['_']  = catSub;
+       theCatcode['\7f'] = catIgnore;
+       theCatcode[' ']  = catSpace;
+       theCatcode['\t'] = catSpace;
+       theCatcode['\r'] = catNewline;
+       theCatcode['~']  = catActive;
+       theCatcode['%']  = catComment;
+}
+
+
+//
+// Token
+//
+
+ostream & operator<<(ostream & os, Token const & t)
+{
+       if (t.cs().size())
+               os << '\\' << t.cs();
+       else
+               os << '[' << t.character() << ',' << t.cat() << ']';
+       return os;
+}
+
+
+//
+// Parser
+//
+
+
+Parser::Parser(istream & is)
+       : lineno_(0), pos_(0)
+{
+       tokenize(is);
+}
+
+
+void Parser::push_back(Token const & t)
+{
+       tokens_.push_back(t);
+}
+
+
+void Parser::pop_back()
+{
+       tokens_.pop_back();
+}
+
+
+Token const & Parser::prevToken() const
+{
+       static const Token dummy;
+       return pos_ > 0 ? tokens_[pos_ - 1] : dummy;
+}
+
+
+Token const & Parser::nextToken() const
+{
+       static const Token dummy;
+       return good() ? tokens_[pos_] : dummy;
+}
+
+
+Token const & Parser::getToken()
+{
+       static const Token dummy;
+       //cerr << "looking at token " << tokens_[pos_] << " pos: " << pos_ << '\n';
+       return good() ? tokens_[pos_++] : dummy;
+}
+
+
+void Parser::skipSpaces()
+{
+       while (nextToken().cat() == catSpace || nextToken().cat() == catNewline)
+               getToken();
+}
+
+
+void Parser::putback()
+{
+       --pos_;
+}
+
+
+bool Parser::good() const
+{
+       return pos_ < tokens_.size();
+}
+
+
+char Parser::getChar()
+{
+       if (!good())
+               error("The input stream is not well...");
+       return tokens_[pos_++].character();
+}
+
+
+string Parser::getArg(char left, char right)
+{
+       skipSpaces();
+
+       string result;
+       char c = getChar();
+
+       if (c != left)
+               putback();
+       else
+               while ((c = getChar()) != right && good())
+                       result += c;
+
+       return result;
+}
+
+
+void Parser::skipSpaceTokens(istream & is, char c)
+{
+       // skip trailing spaces
+       while (catcode(c) == catSpace || catcode(c) == catNewline)
+               if (!is.get(c))
+                       break;
+       //cerr << "putting back: " << c << "\n";
+       is.putback(c);
+}
+
+
+void Parser::tokenize(istream & is)
+{
+       // eat everything up to the next \end_inset or end of stream
+       // and store it in s for further tokenization
+       string s;
+       char c;
+       while (is.get(c)) {
+               s += c;
+               if (s.size() >= 10 && s.substr(s.size() - 10) == "\\end_inset") {
+                       s = s.substr(0, s.size() - 10);
+                       break;
+               }
+       }
+       // Remove the space after \end_inset
+       if (is.get(c) && c != ' ')
+               is.unget();
+
+       // tokenize buffer
+       tokenize(s);
+}
+
+
+void Parser::tokenize(string const & buffer)
+{
+       static bool init_done = false;
+
+       if (!init_done) {
+               catInit();
+               init_done = true;
+       }
+
+       istringstream is(buffer.c_str(), ios::in | ios::binary);
+
+       char c;
+       while (is.get(c)) {
+               //cerr << "reading c: " << c << "\n";
+
+               switch (catcode(c)) {
+                       case catNewline: {
+                               ++lineno_;
+                               is.get(c);
+                               if (catcode(c) == catNewline)
+                                       push_back(Token("par"));
+                               else {
+                                       push_back(Token('\n', catNewline));
+                                       is.putback(c);
+                               }
+                               break;
+                       }
+
+/*
+                       case catComment: {
+                               while (is.get(c) && catcode(c) != catNewline)
+                                       ;
+                               ++lineno_;
+                               break;
+                       }
+*/
+
+                       case catEscape: {
+                               is.get(c);
+                               if (!is) {
+                                       error("unexpected end of input");
+                               } else {
+                                       string s(1, c);
+                                       if (catcode(c) == catLetter) {
+                                               // collect letters
+                                               while (is.get(c) && catcode(c) == catLetter)
+                                                       s += c;
+                                               skipSpaceTokens(is, c);
+                                       }
+                                       push_back(Token(s));
+                               }
+                               break;
+                       }
+
+                       case catSuper:
+                       case catSub: {
+                               push_back(Token(c, catcode(c)));
+                               is.get(c);
+                               skipSpaceTokens(is, c);
+                               break;
+                       }
+
+                       case catIgnore: {
+                               cerr << "ignoring a char: " << int(c) << "\n";
+                               break;
+                       }
+
+                       default:
+                               push_back(Token(c, catcode(c)));
+               }
+       }
+
+#ifdef FILEDEBUG
+       dump();
+#endif
+}
+
+
+void Parser::dump() const
+{
+       cerr << "\nTokens: ";
+       for (unsigned i = 0; i < tokens_.size(); ++i) {
+               if (i == pos_)
+                       cerr << " <#> ";
+               cerr << tokens_[i];
+       }
+       cerr << " pos: " << pos_ << "\n";
+}
+
+
+void Parser::error(string const & msg)
+{
+       cerr << "Line ~" << lineno_ << ":  parse error: " << msg << endl;
+       dump();
+       //exit(1);
+}
+
+
+string Parser::verbatimOption()
+{
+       string res;
+       if (nextToken().character() == '[') {
+               Token t = getToken();
+               for (Token t = getToken(); t.character() != ']' && good(); t = getToken()) {
+                       if (t.cat() == catBegin) {
+                               putback();
+                               res += '{' + verbatimItem() + '}';
+                       } else
+                               res += t.asString();
+               }
+       }
+       return res;
+}
+
+
+string Parser::verbatimItem()
+{
+       string res;
+       if (nextToken().cat() == catBegin) {
+               Token t = getToken();
+               for (Token t = getToken(); t.cat() != catEnd && good(); t = getToken()) {
+                       if (t.cat() == catBegin) {
+                               putback();
+                               res += '{' + verbatimItem() + '}';
+                       }
+                       else
+                               res += t.asString();
+               }
+       }
+       return res;
+}
diff --git a/src/tex2lyx/texparser.h b/src/tex2lyx/texparser.h

new file mode 100644 (file)

index 0000000..99ccd5d
--- /dev/null
+++ b/src/tex2lyx/texparser.h
@@ -0,0 +1,146 @@
+
+#ifndef PARSER_H
+#define PARSER_H
+
+#include "LString.h"
+
+enum mode_type {UNDECIDED_MODE, TEXT_MODE, MATH_MODE};
+
+mode_type asMode(mode_type oldmode, string const & str);
+
+
+// These are TeX's catcodes
+enum CatCode {
+       catEscape,     // 0    backslash
+       catBegin,      // 1    {
+       catEnd,        // 2    }
+       catMath,       // 3    $
+       catAlign,      // 4    &
+       catNewline,    // 5    ^^M
+       catParameter,  // 6    #
+       catSuper,      // 7    ^
+       catSub,        // 8    _
+       catIgnore,     // 9
+       catSpace,      // 10   space
+       catLetter,     // 11   a-zA-Z
+       catOther,      // 12   none of the above
+       catActive,     // 13   ~
+       catComment,    // 14   %
+       catInvalid     // 15   <delete>
+};
+
+
+CatCode catcode(unsigned char c);
+
+
+enum {
+       FLAG_BRACE_LAST = 1 << 1,  //  last closing brace ends the parsing
+       FLAG_RIGHT      = 1 << 2,  //  next \\right ends the parsing process
+       FLAG_END        = 1 << 3,  //  next \\end ends the parsing process
+       FLAG_BRACK_LAST = 1 << 4,  //  next closing bracket ends the parsing
+       FLAG_TEXTMODE   = 1 << 5,  //  we are in a box
+       FLAG_ITEM       = 1 << 6,  //  read a (possibly braced token)
+       FLAG_LEAVE      = 1 << 7,  //  leave the loop at the end
+       FLAG_SIMPLE     = 1 << 8,  //  next $ leaves the loop
+       FLAG_EQUATION   = 1 << 9,  //  next \] leaves the loop
+       FLAG_SIMPLE2    = 1 << 10, //  next \) leaves the loop
+       FLAG_OPTION     = 1 << 11, //  read [...] style option
+       FLAG_BRACED     = 1 << 12  //  read {...} style argument
+};
+
+
+
+//
+// Helper class for parsing
+//
+
+class Token {
+public:
+       ///
+       Token() : cs_(), char_(0), cat_(catIgnore) {}
+       ///
+       Token(char c, CatCode cat) : cs_(), char_(c), cat_(cat) {}
+       ///
+       Token(string const & cs) : cs_(cs), char_(0), cat_(catIgnore) {}
+
+       ///
+       string const & cs() const { return cs_; }
+       ///
+       CatCode cat() const { return cat_; }
+       ///
+       char character() const { return char_; }
+       ///
+       string asString() const { return cs_.size() ? cs_ : string(1, char_); }
+
+private:
+       ///
+       string cs_;
+       ///
+       char char_;
+       ///
+       CatCode cat_;
+};
+
+ostream & operator<<(ostream & os, Token const & t);
+
+
+//
+// Actual parser class
+//
+
+class Parser {
+
+public:
+       ///
+       Parser(istream & is);
+
+       ///
+       int lineno() const { return lineno_; }
+       ///
+       void putback();
+       /// dump contents to screen
+       void dump() const;
+
+       ///
+       string getArg(char left, char right);
+       ///
+       char getChar();
+       ///
+       void error(string const & msg);
+       ///
+       void tokenize(istream & is);
+       ///
+       void tokenize(string const & s);
+       ///
+       void skipSpaceTokens(istream & is, char c);
+       ///
+       void push_back(Token const & t);
+       ///
+       void pop_back();
+       ///
+       Token const & prevToken() const;
+       ///
+       Token const & nextToken() const;
+       ///
+       Token const & getToken();
+       /// skips spaces if any
+       void skipSpaces();
+       ///
+       void lex(string const & s);
+       ///
+       bool good() const;
+       ///
+       string verbatimItem();
+       ///
+       string verbatimOption();
+
+//private:
+       ///
+       int lineno_;
+       ///
+       vector<Token> tokens_;
+       ///
+       unsigned pos_;
+};
+
+#endif
author	André Pönitz <poenitz@gmx.net>
	Wed, 12 Feb 2003 07:53:03 +0000 (07:53 +0000)
committer	André Pönitz <poenitz@gmx.net>
	Wed, 12 Feb 2003 07:53:03 +0000 (07:53 +0000)
src/tex2lyx/texparser.C	[new file with mode: 0644]	patch \| blob
src/tex2lyx/texparser.h	[new file with mode: 0644]	patch \| blob