src/tex2lyx/tex2lyx.C

   1 /** The .tex to .lyx converter
   2     \author André Pönitz (2003)
   3  */
   4
   5 #include <config.h>
   6
   7 #include <algorithm>
   8 #include <cctype>
   9 #include <fstream>
  10 #include <iostream>
  11 #include <sstream>
  12 #include <stack>
  13 #include <string>
  14 #include <vector>
  15
  16 using std::atoi;
  17 using std::cout;
  18 using std::cerr;
  19 using std::endl;
  20 using std::fill;
  21 using std::getline;
  22 using std::ios;
  23 using std::ifstream;
  24 using std::istream;
  25 using std::istringstream;
  26 using std::ostream;
  27 using std::ostringstream;
  28 using std::stack;
  29 using std::string;
  30 using std::vector;
  31
  32
  33 namespace {
  34
  35 char const OPEN = '<';
  36 char const CLOSE = '>';
  37
  38 const char * known_languages[] = { "austrian", "babel", "bahasa",
  39 "basque", "breton", "bulgarian", "catalan", "croatian", "czech", "danish",
  40 "dutch", "english", "esperanto", "estonian", "finnish", "francais",
  41 "frenchb", "galician", "germanb", "greek", "hebcal", "hebfont", "hebrew",
  42 "hebrew_newcode", "hebrew_oldcode", "hebrew_p", "hyphen", "icelandic",
  43 "irish", "italian", "latin", "lgrcmr", "lgrcmro", "lgrcmss", "lgrcmtt",
  44 "lgrenc", "lgrlcmss", "lgrlcmtt", "lheclas", "lhecmr", "lhecmss",
  45 "lhecmtt", "lhecrml", "lheenc", "lhefr", "lheredis", "lheshold",
  46 "lheshscr", "lheshstk", "lsorbian", "magyar", "naustrian", "ngermanb",
  47 "ngerman", "norsk", "polish", "portuges", "rlbabel", "romanian",
  48 "russianb", "samin", "scottish", "serbian", "slovak", "slovene", "spanish",
  49 "swedish", "turkish", "ukraineb", "usorbian", "welsh", 0};
  50
  51 const char * known_fontsizes[] = { "10pt", "11pt", "12pt", 0 };
  52
  53
  54 // some ugly stuff
  55 string h_preamble;
  56 string h_textclass               = "FIXME";
  57 string h_options                 = "FIXME";
  58 string h_language                = "FIXME";
  59 string h_inputencoding           = "FIXME";
  60 string h_fontscheme              = "FIXME";
  61 string h_graphics                = "default";
  62 string h_paperfontsize           = "FIXME";
  63 string h_spacing                 = "single";
  64 string h_papersize               = "FIXME";
  65 string h_paperpackage            = "FIXME";
  66 string h_use_geometry            = "0";
  67 string h_use_amsmath             = "0";
  68 string h_use_natbib              = "0";
  69 string h_use_numerical_citations = "0";
  70 string h_paperorientation        = "portrait";
  71 string h_secnumdepth             = "3";
  72 string h_tocdepth                = "3";
  73 string h_paragraph_separation    = "indent";
  74 string h_defskip                 = "medskip";
  75 string h_quotes_language         = "2";
  76 string h_quotes_times            = "1";
  77 string h_papercolumns            = "1";
  78 string h_papersides              = "1";
  79 string h_paperpagestyle          = "default";
  80 string h_tracking_changes        = "0";
  81
  82 // indicates whether we are in the preamble
  83 bool in_preamble = true;
  84
  85 // current stack of nested environments
  86 stack<string> active_environments;
  87
  88
  89
  90 string const trim(string const & a, char const * p = " ")
  91 {
  92         // lyx::Assert(p);
  93
  94         if (a.empty() || !*p)
  95                 return a;
  96
  97         string::size_type r = a.find_last_not_of(p);
  98         string::size_type l = a.find_first_not_of(p);
  99
 100         // Is this the minimal test? (lgb)
 101         if (r == string::npos && l == string::npos)
 102                 return string();
 103
 104         return a.substr(l, r - l + 1);
 105 }
 106
 107
 108 void split(string const & s, vector<string> & result, char delim)
 109 {
 110         istringstream is(s);
 111         string t;
 112         while (getline(is, t, delim))
 113                 result.push_back(t);
 114 }
 115
 116
 117 string join(vector<string> const & input, char delim)
 118 {
 119         ostringstream os;
 120         for (size_t i = 0; i != input.size(); ++i) {
 121                 if (i)
 122                         os << delim;
 123                 os << input[i];
 124         }
 125         return os.str();
 126 }
 127
 128
 129 void handle_opt(vector<string> & opts, char const ** what, string & target)
 130 {
 131         if (opts.empty())
 132                 return;
 133
 134         for ( ; what; ++what) {
 135                 vector<string>::iterator it = find(opts.begin(), opts.end(), *what);
 136                 if (it != opts.end()) {
 137                         //cerr << "### found option '" << *what << "'\n";
 138                         target = *what;
 139                         opts.erase(it);
 140                         return;
 141                 }
 142         }
 143 }
 144
 145
 146 void handle_ert(ostream & os, string const & s)
 147 {
 148         os << "\n\\begin_inset ERT\nstatus Collapsed\n\n\\layout Standard\n\n";
 149         os << s;
 150         os << "\n\\end_inset\n";
 151 }
 152
 153
 154 void handle_package(string const & name, string const & options)
 155 {
 156         if (name == "a4wide") {
 157                 h_papersize = "a4";
 158                 h_paperpackage = "widemarginsa4";
 159         } else if (name == "ae")
 160                 h_fontscheme = "ae";
 161         else if (name == "aecompl")
 162                 h_fontscheme = "ae";
 163         else if (name == "amsmath")
 164                 h_use_amsmath = "1";
 165         else if (name == "amssymb")
 166                 h_use_amsmath = "1";
 167         else if (name == "babel")
 168                 ; // ignore this
 169         else if (name == "fontenc")
 170                 ; // ignore this
 171         else if (name == "inputenc")
 172                 h_inputencoding = options;
 173         else if (name == "makeidx")
 174                 ; // ignore this
 175         else if (name == "verbatim")
 176                 ; // ignore this
 177         else {
 178                 if (options.size())
 179                         h_preamble += "\\usepackage[" + options + "]{" + name + "}\n";
 180                 else
 181                         h_preamble += "\\usepackage{" + name + "}\n";
 182         }
 183 }
 184
 185
 186 string wrap(string const & cmd, string const & str)
 187 {
 188         return OPEN + cmd + ' ' + str + CLOSE;
 189 }
 190
 191
 192 string wrap(string const & cmd, string const & str, string const & str2)
 193 {
 194         return OPEN + cmd + ' ' + str + ' ' + str2 + CLOSE;
 195 }
 196
 197
 198 enum mode_type {UNDECIDED_MODE, TEXT_MODE, MATH_MODE};
 199
 200 mode_type asMode(mode_type oldmode, string const & str)
 201 {
 202         if (str == "mathmode")
 203                 return MATH_MODE;
 204         if (str == "textmode" || str == "forcetext")
 205                 return TEXT_MODE;
 206         return oldmode;
 207 }
 208
 209
 210 // These are TeX's catcodes
 211 enum CatCode {
 212         catEscape,     // 0    backslash
 213         catBegin,      // 1    {
 214         catEnd,        // 2    }
 215         cat,       // 3    $
 216         catAlign,      // 4    &
 217         catNewline,    // 5    ^^M
 218         catParameter,  // 6    #
 219         catSuper,      // 7    ^
 220         catSub,        // 8    _
 221         catIgnore,     // 9
 222         catSpace,      // 10   space
 223         catLetter,     // 11   a-zA-Z
 224         catOther,      // 12   none of the above
 225         catActive,     // 13   ~
 226         catComment,    // 14   %
 227         catInvalid     // 15   <delete>
 228 };
 229
 230 CatCode theCatcode[256];
 231
 232
 233 inline CatCode catcode(unsigned char c)
 234 {
 235         return theCatcode[c];
 236 }
 237
 238
 239 enum {
 240         FLAG_BRACE_LAST = 1 << 1,  //  last closing brace ends the parsing
 241         FLAG_RIGHT      = 1 << 2,  //  next \\right ends the parsing process
 242         FLAG_END        = 1 << 3,  //  next \\end ends the parsing process
 243         FLAG_BRACK_LAST = 1 << 4,  //  next closing bracket ends the parsing
 244         FLAG_TEXTMODE   = 1 << 5,  //  we are in a box
 245         FLAG_ITEM       = 1 << 6,  //  read a (possibly braced token)
 246         FLAG_LEAVE      = 1 << 7,  //  leave the loop at the end
 247         FLAG_SIMPLE     = 1 << 8,  //  next $ leaves the loop
 248         FLAG_EQUATION   = 1 << 9,  //  next \] leaves the loop
 249         FLAG_SIMPLE2    = 1 << 10, //  next \) leaves the loop
 250         FLAG_OPTION     = 1 << 11, //  read [...] style option
 251         FLAG_BRACED     = 1 << 12  //  read {...} style argument
 252 };
 253
 254
 255 void catInit()
 256 {
 257         fill(theCatcode, theCatcode + 256, catOther);
 258         fill(theCatcode + 'a', theCatcode + 'z' + 1, catLetter);
 259         fill(theCatcode + 'A', theCatcode + 'Z' + 1, catLetter);
 260
 261         theCatcode['\\'] = catEscape;
 262         theCatcode['{']  = catBegin;
 263         theCatcode['}']  = catEnd;
 264         theCatcode['$']  = cat;
 265         theCatcode['&']  = catAlign;
 266         theCatcode['\n'] = catNewline;
 267         theCatcode['#']  = catParameter;
 268         theCatcode['^']  = catSuper;
 269         theCatcode['_']  = catSub;
 270         theCatcode['\7f'] = catIgnore;
 271         theCatcode[' ']  = catSpace;
 272         theCatcode['\t'] = catSpace;
 273         theCatcode['\r'] = catNewline;
 274         theCatcode['~']  = catActive;
 275         theCatcode['%']  = catComment;
 276 }
 277
 278
 279
 280 //
 281 // Helper class for parsing
 282 //
 283
 284 class Token {
 285 public:
 286         ///
 287         Token() : cs_(), char_(0), cat_(catIgnore) {}
 288         ///
 289         Token(char c, CatCode cat) : cs_(), char_(c), cat_(cat) {}
 290         ///
 291         Token(string const & cs) : cs_(cs), char_(0), cat_(catIgnore) {}
 292
 293         ///
 294         string const & cs() const { return cs_; }
 295         ///
 296         CatCode cat() const { return cat_; }
 297         ///
 298         char character() const { return char_; }
 299         ///
 300         string asString() const { return cs_.size() ? cs_ : string(1, char_); }
 301
 302 private:
 303         ///
 304         string cs_;
 305         ///
 306         char char_;
 307         ///
 308         CatCode cat_;
 309 };
 310
 311 ostream & operator<<(ostream & os, Token const & t)
 312 {
 313         if (t.cs().size())
 314                 os << '\\' << t.cs();
 315         else
 316                 os << '[' << t.character() << ',' << t.cat() << ']';
 317         return os;
 318 }
 319
 320
 321 class Parser {
 322
 323 public:
 324         ///
 325         Parser(istream & is);
 326
 327         ///
 328         string parse();
 329         ///
 330         string parse(unsigned flags, mode_type mode);
 331         ///
 332         int lineno() const { return lineno_; }
 333         ///
 334         void putback();
 335         /// dump contents to screen
 336         void dump() const;
 337
 338 private:
 339         ///
 340         string getArg(char left, char right);
 341         ///
 342         char getChar();
 343         ///
 344         void error(string const & msg);
 345         ///
 346         void tokenize(istream & is);
 347         ///
 348         void tokenize(string const & s);
 349         ///
 350         void skipSpaceTokens(istream & is, char c);
 351         ///
 352         void push_back(Token const & t);
 353         ///
 354         void pop_back();
 355         ///
 356         Token const & prevToken() const;
 357         ///
 358         Token const & nextToken() const;
 359         ///
 360         Token const & getToken();
 361         /// skips spaces if any
 362         void skipSpaces();
 363         ///
 364         void lex(string const & s);
 365         ///
 366         bool good() const;
 367         ///
 368         string parse_verbatim_item();
 369         ///
 370         string parse_verbatim_option();
 371
 372         ///
 373         int lineno_;
 374         ///
 375         vector<Token> tokens_;
 376         ///
 377         unsigned pos_;
 378 };
 379
 380
 381 Parser::Parser(istream & is)
 382         : lineno_(0), pos_(0)
 383 {
 384         tokenize(is);
 385 }
 386
 387
 388 void Parser::push_back(Token const & t)
 389 {
 390         tokens_.push_back(t);
 391 }
 392
 393
 394 void Parser::pop_back()
 395 {
 396         tokens_.pop_back();
 397 }
 398
 399
 400 Token const & Parser::prevToken() const
 401 {
 402         static const Token dummy;
 403         return pos_ > 0 ? tokens_[pos_ - 1] : dummy;
 404 }
 405
 406
 407 Token const & Parser::nextToken() const
 408 {
 409         static const Token dummy;
 410         return good() ? tokens_[pos_] : dummy;
 411 }
 412
 413
 414 Token const & Parser::getToken()
 415 {
 416         static const Token dummy;
 417         //cerr << "looking at token " << tokens_[pos_] << " pos: " << pos_ << '\n';
 418         return good() ? tokens_[pos_++] : dummy;
 419 }
 420
 421
 422 void Parser::skipSpaces()
 423 {
 424         while (nextToken().cat() == catSpace || nextToken().cat() == catNewline)
 425                 getToken();
 426 }
 427
 428
 429 void Parser::putback()
 430 {
 431         --pos_;
 432 }
 433
 434
 435 bool Parser::good() const
 436 {
 437         return pos_ < tokens_.size();
 438 }
 439
 440
 441 char Parser::getChar()
 442 {
 443         if (!good())
 444                 error("The input stream is not well...");
 445         return tokens_[pos_++].character();
 446 }
 447
 448
 449 string Parser::getArg(char left, char right)
 450 {
 451         skipSpaces();
 452
 453         string result;
 454         char c = getChar();
 455
 456         if (c != left)
 457                 putback();
 458         else
 459                 while ((c = getChar()) != right && good())
 460                         result += c;
 461
 462         return result;
 463 }
 464
 465
 466 void Parser::skipSpaceTokens(istream & is, char c)
 467 {
 468         // skip trailing spaces
 469         while (catcode(c) == catSpace || catcode(c) == catNewline)
 470                 if (!is.get(c))
 471                         break;
 472         //cerr << "putting back: " << c << "\n";
 473         is.putback(c);
 474 }
 475
 476
 477 void Parser::tokenize(istream & is)
 478 {
 479         // eat everything up to the next \end_inset or end of stream
 480         // and store it in s for further tokenization
 481         string s;
 482         char c;
 483         while (is.get(c)) {
 484                 s += c;
 485                 if (s.size() >= 10 && s.substr(s.size() - 10) == "\\end_inset") {
 486                         s = s.substr(0, s.size() - 10);
 487                         break;
 488                 }
 489         }
 490         // Remove the space after \end_inset
 491         if (is.get(c) && c != ' ')
 492                 is.unget();
 493
 494         // tokenize buffer
 495         tokenize(s);
 496 }
 497
 498
 499 void Parser::tokenize(string const & buffer)
 500 {
 501         static bool init_done = false;
 502
 503         if (!init_done) {
 504                 catInit();
 505                 init_done = true;
 506         }
 507
 508         istringstream is(buffer.c_str(), ios::in | ios::binary);
 509
 510         char c;
 511         while (is.get(c)) {
 512                 //cerr << "reading c: " << c << "\n";
 513
 514                 switch (catcode(c)) {
 515                         case catNewline: {
 516                                 ++lineno_;
 517                                 is.get(c);
 518                                 if (catcode(c) == catNewline)
 519                                         push_back(Token("par"));
 520                                 else {
 521                                         push_back(Token('\n', catNewline));
 522                                         is.putback(c);
 523                                 }
 524                                 break;
 525                         }
 526
 527 /*
 528                         case catComment: {
 529                                 while (is.get(c) && catcode(c) != catNewline)
 530                                         ;
 531                                 ++lineno_;
 532                                 break;
 533                         }
 534 */
 535
 536                         case catEscape: {
 537                                 is.get(c);
 538                                 if (!is) {
 539                                         error("unexpected end of input");
 540                                 } else {
 541                                         string s(1, c);
 542                                         if (catcode(c) == catLetter) {
 543                                                 // collect letters
 544                                                 while (is.get(c) && catcode(c) == catLetter)
 545                                                         s += c;
 546                                                 skipSpaceTokens(is, c);
 547                                         }
 548                                         push_back(Token(s));
 549                                 }
 550                                 break;
 551                         }
 552
 553                         case catSuper:
 554                         case catSub: {
 555                                 push_back(Token(c, catcode(c)));
 556                                 is.get(c);
 557                                 skipSpaceTokens(is, c);
 558                                 break;
 559                         }
 560
 561                         case catIgnore: {
 562                                 cerr << "ignoring a char: " << int(c) << "\n";
 563                                 break;
 564                         }
 565
 566                         default:
 567                                 push_back(Token(c, catcode(c)));
 568                 }
 569         }
 570
 571 #ifdef FILEDEBUG
 572         dump();
 573 #endif
 574 }
 575
 576
 577 void Parser::dump() const
 578 {
 579         cerr << "\nTokens: ";
 580         for (unsigned i = 0; i < tokens_.size(); ++i) {
 581                 if (i == pos_)
 582                         cerr << " <#> ";
 583                 cerr << tokens_[i];
 584         }
 585         cerr << " pos: " << pos_ << "\n";
 586 }
 587
 588
 589 void Parser::error(string const & msg)
 590 {
 591         cerr << "Line ~" << lineno_ << ":  parse error: " << msg << endl;
 592         dump();
 593         //exit(1);
 594 }
 595
 596
 597 string Parser::parse()
 598 {
 599         skipSpaces();
 600         return parse(0, UNDECIDED_MODE);
 601 }
 602
 603
 604 string Parser::parse_verbatim_option()
 605 {
 606         string res;
 607         if (nextToken().character() == '[') {
 608                 Token t = getToken();
 609                 for (Token t = getToken(); t.character() != ']' && good(); t = getToken()) {
 610                         if (t.cat() == catBegin) {
 611                                 putback();
 612                                 res += '{' + parse_verbatim_item() + '}';
 613                         } else
 614                                 res += t.asString();
 615                 }
 616         }
 617         return res;
 618 }
 619
 620
 621 string Parser::parse_verbatim_item()
 622 {
 623         string res;
 624         if (nextToken().cat() == catBegin) {
 625                 Token t = getToken();
 626                 for (Token t = getToken(); t.cat() != catEnd && good(); t = getToken()) {
 627                         if (t.cat() == catBegin) {
 628                                 putback();
 629                                 res += '{' + parse_verbatim_item() + '}';
 630                         }
 631                         else
 632                                 res += t.asString();
 633                 }
 634         }
 635         return res;
 636 }
 637
 638
 639 string Parser::parse(unsigned flags, mode_type mode)
 640 {
 641         //int limits = 0;
 642
 643         ostringstream result;
 644         while (good()) {
 645                 Token const & t = getToken();
 646
 647 #ifdef FILEDEBUG
 648                 cerr << "t: " << t << " flags: " << flags << "\n";
 649                 cell->dump();
 650                 cerr << "\n";
 651 #endif
 652
 653                 if (flags & FLAG_ITEM) {
 654                         if (t.cat() == catSpace)
 655                                 continue;
 656
 657                         flags &= ~FLAG_ITEM;
 658                         if (t.cat() == catBegin) {
 659                                 // skip the brace and collect everything to the next matching
 660                                 // closing brace
 661                                 flags |= FLAG_BRACE_LAST;
 662                                 continue;
 663                         }
 664
 665                         // handle only this single token, leave the loop if done
 666                         flags |= FLAG_LEAVE;
 667                 }
 668
 669
 670                 if (flags & FLAG_BRACED) {
 671                         if (t.cat() == catSpace)
 672                                 continue;
 673
 674                         if (t.cat() != catBegin) {
 675                                 error("opening brace expected");
 676                                 return result.str();
 677                         }
 678
 679                         // skip the brace and collect everything to the next matching
 680                         // closing brace
 681                         flags = FLAG_BRACE_LAST;
 682                 }
 683
 684
 685                 if (flags & FLAG_OPTION) {
 686                         if (t.cat() == catOther && t.character() == '[') {
 687                                 result << parse(FLAG_BRACK_LAST, mode);
 688                         } else {
 689                                 // no option found, put back token and we are done
 690                                 putback();
 691                         }
 692                         return result.str();
 693                 }
 694
 695                 //
 696                 // cat codes
 697                 //
 698                 if (t.cat() == cat) {
 699                         if (mode != MATH_MODE) {
 700                                 // we are inside some text mode thingy, so opening new math is allowed
 701                                 Token const & n = getToken();
 702                                 if (n.cat() == cat) {
 703                                         // TeX's $$...$$ syntax for displayed math
 704                                         result << wrap("equation", parse(FLAG_SIMPLE, MATH_MODE));
 705                                         getToken(); // skip the second '$' token
 706                                 } else {
 707                                         // simple $...$  stuff
 708                                         putback();
 709                                         result << wrap("simple", parse(FLAG_SIMPLE, MATH_MODE));
 710                                 }
 711                         }
 712
 713                         else if (flags & FLAG_SIMPLE) {
 714                                 // this is the end of the formula
 715                                 return result.str();
 716                         }
 717
 718                         else {
 719                                 error("something strange in the parser\n");
 720                                 break;
 721                         }
 722                 }
 723
 724                 else if (t.cat() == catLetter)
 725                         result << t.character();
 726
 727                 else if (t.cat() == catSpace && mode != MATH_MODE) {
 728                         //if (result.empty() || result[result.size() - 1] != ' ')
 729                                 result << t.character();
 730                 }
 731
 732                 else if (t.cat() == catNewline && mode != MATH_MODE)
 733                         result << t.character();
 734
 735                 else if (t.cat() == catParameter) {
 736                         Token const & n = getToken();
 737                         result << wrap("macroarg", string(1, n.character()));
 738                 }
 739
 740                 else if (t.cat() == catActive)
 741                         result << wrap("active", string(1, t.character()));
 742
 743                 else if (t.cat() == catBegin)
 744                         result << wrap("braced", parse(FLAG_BRACE_LAST, mode));
 745
 746                 else if (t.cat() == catEnd) {
 747                         if (flags & FLAG_BRACE_LAST)
 748                                 return result.str();
 749                         error("found '}' unexpectedly");
 750                         //lyx::Assert(0);
 751                         //add(cell, '}', LM_TC_TEX);
 752                 }
 753
 754 /*
 755                 else if (t.cat() == catAlign) {
 756                         ++cellcol;
 757                         //cerr << " column now " << cellcol << " max: " << grid.ncols() << "\n";
 758                         if (cellcol == grid.ncols()) {
 759                                 //cerr << "adding column " << cellcol << "\n";
 760                                 grid.addCol(cellcol - 1);
 761                         }
 762                         cell = &grid.cell(grid.index(cellrow, cellcol));
 763                 }
 764 */
 765
 766                 else if (t.character() == ']' && (flags & FLAG_BRACK_LAST)) {
 767                         //cerr << "finished reading option\n";
 768                         return result.str();
 769                 }
 770
 771                 else if (t.cat() == catOther)
 772                         result << string(1, t.character());
 773
 774                 else if (t.cat() == catComment) {
 775                         string s;
 776                         while (good()) {
 777                                 Token const & t = getToken();
 778                                 if (t.cat() == catNewline)
 779                                         break;
 780                                 s += t.asString();
 781                         }
 782                         //result << wrap("comment", s);
 783                         skipSpaces();
 784                 }
 785
 786                 //
 787                 // control sequences
 788                 //
 789
 790                 else if (t.cs() == "lyxlock") {
 791                         // ignored;
 792                 }
 793
 794                 else if (t.cs() == "newcommand" || t.cs() == "providecommand") {
 795                         string const name = parse_verbatim_item();
 796                         string const opts = getArg('[', ']');
 797                         string const body = parse_verbatim_item();
 798                         // only non-lyxspecific stuff
 799                         if (name != "noun" && name != "tabularnewline") {
 800                                 h_preamble += "\\" + t.cs() + "{" + name + "}";
 801                                 if (opts.size())
 802                                         h_preamble += "[" + opts + "]";
 803                                 h_preamble += "{" + body + "}\n";
 804                         }
 805                 }
 806
 807                 else if (t.cs() == "(")
 808                         result << wrap("simple", parse(FLAG_SIMPLE2, MATH_MODE));
 809
 810                 else if (t.cs() == "[")
 811                         result << wrap("equation", parse(FLAG_EQUATION, MATH_MODE));
 812
 813                 else if (t.cs() == "protect")
 814                         // ignore \\protect, will hopefully be re-added during output
 815                         ;
 816
 817                 else if (t.cs() == "end") {
 818                         if (flags & FLAG_END) {
 819                                 // eat environment name
 820                                 string const name = getArg('{', '}');
 821                                 if (name != active_environments.top())
 822                                         error("\\end{" + name + "} does not match \\begin{"
 823                                                 + active_environments.top() + "}");
 824                                 active_environments.pop();
 825                                 return result.str();
 826                         }
 827                         error("found 'end' unexpectedly");
 828                 }
 829
 830                 else if (t.cs() == ")") {
 831                         if (flags & FLAG_SIMPLE2)
 832                                 return result.str();
 833                         error("found '\\)' unexpectedly");
 834                 }
 835
 836                 else if (t.cs() == "]") {
 837                         if (flags & FLAG_EQUATION)
 838                                 return result.str();
 839                         error("found '\\]' unexpectedly");
 840                 }
 841
 842 /*
 843                 else if (t.cs() == "\\") {
 844                         grid.vcrskip(LyXLength(getArg('[', ']')), cellrow);
 845                         ++cellrow;
 846                         cellcol = 0;
 847                         if (cellrow == grid.nrows())
 848                                 grid.addRow(cellrow - 1);
 849                         if (grid.asHullstring())
 850                                 grid.asHullstring()->numbered(cellrow, numbered);
 851                         cell = &grid.cell(grid.index(cellrow, cellcol));
 852                 }
 853 */
 854                 else if (t.cs() == "documentclass") {
 855                         vector<string> opts;
 856                         split(getArg('[', ']'), opts, ',');
 857                         handle_opt(opts, known_languages, h_language);
 858                         handle_opt(opts, known_fontsizes, h_paperfontsize);
 859                         h_options = join(opts, ',');
 860                         h_textclass = getArg('{', '}');
 861                 }
 862
 863                 else if (t.cs() == "usepackage") {
 864                         string const options = getArg('[', ']');
 865                         string const name = getArg('{', '}');
 866                         if (options.empty() && name.find(',')) {
 867                                 vector<string> vecnames;
 868                                 split(name, vecnames, ',');
 869                                 vector<string>::const_iterator it  = vecnames.begin();
 870                                 vector<string>::const_iterator end = vecnames.end();
 871                                 for (; it != end; ++it) {
 872                                         handle_package(trim(*it), string());
 873                                 }
 874                         } else {
 875                                 handle_package(name, options);
 876                         }
 877                 }
 878
 879                 else if (t.cs() == "newenvironment") {
 880                         string const name = getArg('{', '}');
 881                         skipSpaces();
 882                         string const begin = parse_verbatim_item();
 883                         skipSpaces();
 884                         string const end = parse_verbatim_item();
 885                         // ignore out mess
 886                         if (name != "lyxcode")
 887                                 result << wrap("newenvironment", begin + end);
 888                 }
 889
 890                 else if (t.cs() == "def") {
 891                         string const name = getToken().cs();
 892                         string res;
 893                         while (nextToken().cat() != catBegin)
 894                                 res += getToken().asString();
 895                         handle_ert(result, "\\def" + res + '{' + parse_verbatim_item() + '}');
 896                 }
 897
 898                 else if (t.cs() == "setcounter") {
 899                         string const name = getArg('{', '}');
 900                         string const content = getArg('{', '}');
 901                         if (name == "secnumdepth")
 902                                 h_secnumdepth = content;
 903                         else if (name == "tocdepth")
 904                                 h_tocdepth = content;
 905                         else
 906                                 h_preamble += "\\setcounter{" + name + "}{" + content + "}\n";
 907                 }
 908
 909                 else if (t.cs() == "setlength") {
 910                         string const name = getToken().cs();
 911                         string const content = getArg('{', '}');
 912                         if (name == "parskip")
 913                                 h_paragraph_separation = "skip";
 914                         else if (name == "parindent")
 915                                 h_paragraph_separation = "skip";
 916                         else
 917                                 h_preamble += "\\setcounter{" + name + "}{" + content + "}\n";
 918                 }
 919
 920                 else if (t.cs() == "par") {
 921                         if (!active_environments.empty())
 922                                 result << "\n\\layout " << active_environments.top() << "\n\n";
 923                 }
 924
 925                 else if (t.cs() == "title")
 926                         result << "\\layout Title\n\n" + parse_verbatim_item();
 927
 928                 else if (t.cs() == "author")
 929                         result << "\\layout Author\n\n" + parse_verbatim_item();
 930
 931                 else if (t.cs() == "abstract")
 932                         result << "\\layout Abstract\n\n" + parse_verbatim_item();
 933
 934                 else if (t.cs() == "begin") {
 935                         string const name = getArg('{', '}');
 936                         active_environments.push(name);
 937                         result << parse(FLAG_END, mode);
 938                 }
 939
 940                 if (flags & FLAG_LEAVE) {
 941                         flags &= ~FLAG_LEAVE;
 942                         break;
 943                 }
 944         }
 945
 946         return result.str();
 947 }
 948
 949
 950 } // anonymous namespace
 951
 952
 953 int main(int argc, char * argv[])
 954 {
 955         if (argc <= 1) {
 956                 cerr << "Usage: " << argv[0] << " <infile.tex>" << endl;
 957                 return 2;
 958         }
 959
 960         string t;
 961         ifstream is(argv[1]);
 962         Parser p(is);
 963         //p.dump();
 964         string s = p.parse();
 965         cout << "# tex2lyx 0.0.2 created this file\n"
 966              << "\\lyxformat 222\n"
 967              << "\\textclass " << h_textclass << "\n"
 968              << "\\begin_preamble\n" << h_preamble << "\\end_preamble\n"
 969              << "\\options " << h_options << "\n"
 970              << "\\language " << h_language << "\n"
 971              << "\\inputencoding " << h_inputencoding << "\n"
 972              << "\\fontscheme " << h_fontscheme << "\n"
 973              << "\\graphics " << h_graphics << "\n"
 974              << "\\paperfontsize " << h_paperfontsize << "\n"
 975              << "\\spacing " << h_spacing << "\n"
 976              << "\\papersize " << h_papersize << "\n"
 977              << "\\paperpackage " << h_paperpackage << "\n"
 978              << "\\use_geometry " << h_use_geometry << "\n"
 979              << "\\use_amsmath " << h_use_amsmath << "\n"
 980              << "\\use_natbib " << h_use_natbib << "\n"
 981              << "\\use_numerical_citations " << h_use_numerical_citations << "\n"
 982              << "\\paperorientation " << h_paperorientation << "\n"
 983              << "\\secnumdepth " << h_secnumdepth << "\n"
 984              << "\\tocdepth " << h_tocdepth << "\n"
 985              << "\\paragraph_separation " << h_paragraph_separation << "\n"
 986              << "\\defskip " << h_defskip << "\n"
 987              << "\\quotes_language " << h_quotes_language << "\n"
 988              << "\\quotes_times " << h_quotes_times << "\n"
 989              << "\\papercolumns " << h_papercolumns << "\n"
 990              << "\\papersides " << h_papersides << "\n"
 991              << "\\paperpagestyle " << h_paperpagestyle << "\n"
 992              << "\\tracking_changes " << h_tracking_changes << "\n"
 993              << s << "\n"
 994              << "\\the_end";
 995
 996         return 0;
 997 }