src/mathed/math_parser.C

   1 /*
   2  *  File:        math_parser.C
   3  *  Purpose:     Parser for mathed
   4  *  Author:      Alejandro Aguilar Sierra <asierra@servidor.unam.mx>
   5  *  Created:     January 1996
   6  *  Description: Parse LaTeX2e math mode code.
   7  *
   8  *  Dependencies: Xlib, XForms
   9  *
  10  *  Copyright: 1996, Alejandro Aguilar Sierra
  11  *
  12  *   Version: 0.8beta.
  13  *
  14  *   You are free to use and modify this code under the terms of
  15  *   the GNU General Public Licence version 2 or later.
  16  */
  17
  18 /*
  19
  20 If someone desperately needs partial "structures" (such as a few cells of
  21 an array inset or similar) (s)he could uses the following hack as starting
  22 point to write some macros:
  23
  24   \newif\ifcomment
  25   \commentfalse
  26   \ifcomment
  27           \def\makeamptab{\catcode`\&=4\relax}
  28           \def\makeampletter{\catcode`\&=11\relax}
  29     \def\b{\makeampletter\expandafter\makeamptab\bi}
  30     \long\def\bi#1\e{}
  31   \else
  32     \def\b{}\def\e{}
  33   \fi
  34
  35   ...
  36
  37   \[\begin{array}{ccc}
  38    1 & 2\b & 3^2\\
  39    4 & 5\e & 6\\
  40    7 & 8 & 9
  41   \end{array}\]
  42
  43 */
  44
  45
  46 #include <config.h>
  47
  48 #ifdef __GNUG__
  49 #pragma implementation
  50 #endif
  51
  52 #include "math_parser.h"
  53 #include "math_inset.h"
  54 #include "math_arrayinset.h"
  55 #include "math_braceinset.h"
  56 #include "math_casesinset.h"
  57 #include "math_charinset.h"
  58 #include "math_deliminset.h"
  59 #include "math_factory.h"
  60 #include "math_funcinset.h"
  61 #include "math_kerninset.h"
  62 #include "math_macro.h"
  63 #include "math_macrotable.h"
  64 #include "math_macrotemplate.h"
  65 #include "math_hullinset.h"
  66 #include "math_rootinset.h"
  67 #include "math_sizeinset.h"
  68 #include "math_sqrtinset.h"
  69 #include "math_scriptinset.h"
  70 #include "math_specialcharinset.h"
  71 #include "math_splitinset.h"
  72 #include "math_sqrtinset.h"
  73 #include "math_support.h"
  74 #include "math_xyarrowinset.h"
  75
  76 #include "lyxlex.h"
  77 #include "debug.h"
  78
  79 #include "support/lstrings.h"
  80
  81 #include <cctype>
  82 #include <stack>
  83 #include <algorithm>
  84
  85 using std::istream;
  86 using std::ostream;
  87 using std::ios;
  88 using std::endl;
  89 using std::stack;
  90 using std::fill;
  91
  92
  93 namespace {
  94
  95 bool stared(string const & s)
  96 {
  97         string::size_type const n = s.size();
  98         return n && s[n - 1] == '*';
  99 }
 100
 101
 102 void add(MathArray & ar, char c, MathTextCodes code)
 103 {
 104         ar.push_back(MathAtom(new MathCharInset(c, code)));
 105 }
 106
 107
 108 // These are TeX's catcodes
 109 enum CatCode {
 110         catEscape,     // 0    backslash
 111         catBegin,      // 1    {
 112         catEnd,        // 2    }
 113         catMath,       // 3    $
 114         catAlign,      // 4    &
 115         catNewline,    // 5    ^^M
 116         catParameter,  // 6    #
 117         catSuper,      // 7    ^
 118         catSub,        // 8    _
 119         catIgnore,     // 9
 120         catSpace,      // 10   space
 121         catLetter,     // 11   a-zA-Z
 122         catOther,      // 12   none of the above
 123         catActive,     // 13   ~
 124         catComment,    // 14   %
 125         catInvalid     // 15   <delete>
 126 };
 127
 128 CatCode theCatcode[256];
 129
 130
 131 inline CatCode catcode(unsigned char c)
 132 {
 133         return theCatcode[c];
 134 }
 135
 136
 137 enum {
 138         FLAG_BRACE_LAST = 1 << 1,  //  last closing brace ends the parsing process
 139         FLAG_RIGHT      = 1 << 2,  //  next \\right ends the parsing process
 140         FLAG_END        = 1 << 3,  //  next \\end ends the parsing process
 141         FLAG_BRACK_END  = 1 << 4,  //  next closing bracket ends the parsing process
 142         FLAG_BOX        = 1 << 5,  //  we are in a box
 143         FLAG_ITEM       = 1 << 6,  //  read a (possibly braced token)
 144         FLAG_BLOCK      = 1 << 7,  //  next block ends the parsing process
 145         FLAG_BLOCK2     = 1 << 8,  //  next block2 ends the parsing process
 146         FLAG_LEAVE      = 1 << 9   //  leave the loop at the end
 147 };
 148
 149
 150 void catInit()
 151 {
 152         fill(theCatcode, theCatcode + 256, catOther);
 153         fill(theCatcode + 'a', theCatcode + 'z' + 1, catLetter);
 154         fill(theCatcode + 'A', theCatcode + 'Z' + 1, catLetter);
 155
 156         theCatcode['\\'] = catEscape;
 157         theCatcode['{']  = catBegin;
 158         theCatcode['}']  = catEnd;
 159         theCatcode['$']  = catMath;
 160         theCatcode['&']  = catAlign;
 161         theCatcode['\n'] = catNewline;
 162         theCatcode['#']  = catParameter;
 163         theCatcode['^']  = catSuper;
 164         theCatcode['_']  = catSub;
 165         theCatcode['\7f'] = catIgnore;
 166         theCatcode[' ']  = catSpace;
 167         theCatcode['\t'] = catSpace;
 168         theCatcode['\r'] = catSpace;
 169         theCatcode['~']  = catActive;
 170         theCatcode['%']  = catComment;
 171 }
 172
 173
 174
 175 //
 176 // Helper class for parsing
 177 //
 178
 179 class Token {
 180 public:
 181         ///
 182         Token() : cs_(), char_(0), cat_(catIgnore) {}
 183         ///
 184         Token(char c, CatCode cat) : cs_(), char_(c), cat_(cat) {}
 185         ///
 186         Token(string const & cs) : cs_(cs), char_(0), cat_(catIgnore) {}
 187
 188         ///
 189         string const & cs() const { return cs_; }
 190         ///
 191         CatCode cat() const { return cat_; }
 192         ///
 193         char character() const { return char_; }
 194         ///
 195         string asString() const;
 196         ///
 197         bool isCR() const;
 198
 199 private:
 200         ///
 201         string cs_;
 202         ///
 203         char char_;
 204         ///
 205         CatCode cat_;
 206 };
 207
 208 bool Token::isCR() const
 209 {
 210         return cs_ == "\\" || cs_ == "cr" || cs_ == "crcr";
 211 }
 212
 213 string Token::asString() const
 214 {
 215         return cs_.size() ? cs_ : string(1, char_);
 216 }
 217
 218 // Angus' compiler says these are not needed
 219 //bool operator==(Token const & s, Token const & t)
 220 //{
 221 //      return s.character() == t.character()
 222 //              && s.cat() == t.cat() && s.cs() == t.cs();
 223 //}
 224 //
 225 //bool operator!=(Token const & s, Token const & t)
 226 //{
 227 //      return !(s == t);
 228 //}
 229
 230 ostream & operator<<(ostream & os, Token const & t)
 231 {
 232         if (t.cs().size())
 233                 os << "\\" << t.cs();
 234         else
 235                 os << "[" << t.character() << "," << t.cat() << "]";
 236         return os;
 237 }
 238
 239
 240 class Parser {
 241
 242 public:
 243         ///
 244         Parser(LyXLex & lex);
 245         ///
 246         Parser(istream & is);
 247
 248         ///
 249         bool parse_macro(string & name);
 250         ///
 251         bool parse_normal(MathAtom &);
 252         ///
 253         void parse_into(MathArray & array, unsigned flags, MathTextCodes = LM_TC_MIN);
 254         ///
 255         int lineno() const { return lineno_; }
 256         ///
 257         void putback();
 258
 259 private:
 260         ///
 261         void parse_into1(MathArray & array, unsigned flags, MathTextCodes);
 262         ///
 263         string getArg(char lf, char rf);
 264         ///
 265         char getChar();
 266         ///
 267         void error(string const & msg);
 268         ///
 269         bool parse_lines(MathAtom & t, bool numbered, bool outmost);
 270         /// parses {... & ... \\ ... & ... }
 271         bool parse_lines2(MathAtom & t);
 272         /// dump contents to screen
 273         void dump() const;
 274
 275 private:
 276         ///
 277         void tokenize(istream & is);
 278         ///
 279         void tokenize(string const & s);
 280         ///
 281         void skipSpaceTokens(istream & is, char c);
 282         ///
 283         void push_back(Token const & t);
 284         ///
 285         void pop_back();
 286         ///
 287         Token const & prevToken() const;
 288         ///
 289         Token const & nextToken() const;
 290         ///
 291         Token const & getToken();
 292         /// skips spaces if any
 293         void skipSpaces();
 294         /// skips opening brace
 295         void skipBegin();
 296         /// skips closing brace
 297         void skipEnd();
 298         /// counts a sequence of hlines
 299         int readHLines();
 300         ///
 301         void lex(string const & s);
 302         ///
 303         bool good() const;
 304
 305         ///
 306         int lineno_;
 307         ///
 308         std::vector<Token> tokens_;
 309         ///
 310         unsigned pos_;
 311         ///
 312         bool   curr_num_;
 313         ///
 314         string curr_label_;
 315         ///
 316         string curr_skip_;
 317 };
 318
 319
 320 Parser::Parser(LyXLex & lexer)
 321         : lineno_(lexer.getLineNo()), pos_(0), curr_num_(false)
 322 {
 323         tokenize(lexer.getStream());
 324         lexer.eatLine();
 325 }
 326
 327
 328 Parser::Parser(istream & is)
 329         : lineno_(0), pos_(0), curr_num_(false)
 330 {
 331         tokenize(is);
 332 }
 333
 334
 335 void Parser::push_back(Token const & t)
 336 {
 337         tokens_.push_back(t);
 338 }
 339
 340
 341 void Parser::pop_back()
 342 {
 343         tokens_.pop_back();
 344 }
 345
 346
 347 Token const & Parser::prevToken() const
 348 {
 349         static const Token dummy;
 350         return pos_ > 0 ? tokens_[pos_ - 1] : dummy;
 351 }
 352
 353
 354 Token const & Parser::nextToken() const
 355 {
 356         static const Token dummy;
 357         return good() ? tokens_[pos_] : dummy;
 358 }
 359
 360
 361 Token const & Parser::getToken()
 362 {
 363         static const Token dummy;
 364         //lyxerr << "looking at token " << tokens_[pos_] << " pos: " << pos_ << '\n';
 365         return good() ? tokens_[pos_++] : dummy;
 366 }
 367
 368
 369 void Parser::skipSpaces()
 370 {
 371         while (nextToken().cat() == catSpace)
 372                 getToken();
 373 }
 374
 375
 376 void Parser::skipBegin()
 377 {
 378         if (nextToken().cat() == catBegin)
 379                 getToken();
 380         else
 381                 lyxerr << "'{' expected\n";
 382 }
 383
 384
 385 void Parser::skipEnd()
 386 {
 387         if (nextToken().cat() == catEnd)
 388                 getToken();
 389         else
 390                 lyxerr << "'}' expected\n";
 391 }
 392
 393
 394 int Parser::readHLines()
 395 {
 396         int num = 0;
 397         skipSpaces();
 398         while (nextToken().cs() == "hline") {
 399                 getToken();
 400                 ++num;
 401                 skipSpaces();
 402         }
 403         return num;
 404 }
 405
 406
 407 void Parser::putback()
 408 {
 409         --pos_;
 410 }
 411
 412
 413 bool Parser::good() const
 414 {
 415         return pos_ < tokens_.size();
 416 }
 417
 418
 419 char Parser::getChar()
 420 {
 421         if (!good())
 422                 lyxerr << "The input stream is not well..." << endl;
 423         return tokens_[pos_++].character();
 424 }
 425
 426
 427 string Parser::getArg(char lf, char rg)
 428 {
 429         skipSpaces();
 430
 431         string result;
 432         char c = getChar();
 433
 434         if (c != lf)
 435                 putback();
 436         else
 437                 while ((c = getChar()) != rg && good())
 438                         result += c;
 439
 440         return result;
 441 }
 442
 443
 444 void Parser::tokenize(istream & is)
 445 {
 446         // eat everything up to the next \end_inset or end of stream
 447         // and store it in s for further tokenization
 448         string s;
 449         char c;
 450         while (is.get(c)) {
 451                 s += c;
 452                 if (s.size() >= 10 && s.substr(s.size() - 10) == "\\end_inset") {
 453                         s = s.substr(0, s.size() - 10);
 454                         break;
 455                 }
 456         }
 457
 458         // tokenize buffer
 459         tokenize(s);
 460 }
 461
 462
 463 void Parser::skipSpaceTokens(istream & is, char c)
 464 {
 465         // skip trailing spaces
 466         while (catcode(c) == catSpace || catcode(c) == catNewline)
 467                 if (!is.get(c))
 468                         break;
 469         //lyxerr << "putting back: " << c << "\n";
 470         is.putback(c);
 471 }
 472
 473
 474 void Parser::tokenize(string const & buffer)
 475 {
 476         static bool init_done = false;
 477
 478         if (!init_done) {
 479                 catInit();
 480                 init_done = true;
 481         }
 482
 483         istringstream is(buffer.c_str(), ios::in | ios::binary);
 484
 485         char c;
 486         while (is.get(c)) {
 487                 //lyxerr << "reading c: " << c << "\n";
 488
 489                 switch (catcode(c)) {
 490                         case catNewline: {
 491                                 ++lineno_;
 492                                 is.get(c);
 493                                 if (catcode(c) == catNewline)
 494                                         ; //push_back(Token("par"));
 495                                 else {
 496                                         push_back(Token(' ', catSpace));
 497                                         is.putback(c);
 498                                 }
 499                                 break;
 500                         }
 501
 502                         case catComment: {
 503                                 while (is.get(c) && catcode(c) != catNewline)
 504                                         ;
 505                                 ++lineno_;
 506                                 break;
 507                         }
 508
 509                         case catEscape: {
 510                                 is.get(c);
 511                                 if (!is) {
 512                                         error("unexpected end of input");
 513                                 } else {
 514                                         string s(1, c);
 515                                         if (catcode(c) == catLetter) {
 516                                                 // collect letters
 517                                                 while (is.get(c) && catcode(c) == catLetter)
 518                                                         s += c;
 519                                                 skipSpaceTokens(is, c);
 520                                         }
 521                                         push_back(Token(s));
 522                                 }
 523                                 break;
 524                         }
 525
 526                         case catSuper:
 527                         case catSub: {
 528                                 push_back(Token(c, catcode(c)));
 529                                 is.get(c);
 530                                 skipSpaceTokens(is, c);
 531                                 break;
 532                         }
 533
 534                         case catIgnore: {
 535                                 lyxerr << "ignoring a char: " << int(c) << "\n";
 536                                 break;
 537                         }
 538
 539                         default:
 540                                 push_back(Token(c, catcode(c)));
 541                 }
 542         }
 543
 544         //dump();
 545 }
 546
 547
 548 void Parser::dump() const
 549 {
 550         lyxerr << "\nTokens: ";
 551         for (unsigned i = 0; i < tokens_.size(); ++i)
 552                 lyxerr << tokens_[i];
 553         lyxerr << "\n";
 554 }
 555
 556
 557 void Parser::error(string const & msg)
 558 {
 559         lyxerr << "Line ~" << lineno_ << ": Math parse error: " << msg << endl;
 560         dump();
 561         //exit(1);
 562 }
 563
 564
 565
 566 bool Parser::parse_lines(MathAtom & t, bool numbered, bool outmost)
 567 {
 568         MathGridInset * p = t->asGridInset();
 569         if (!p) {
 570                 dump();
 571                 lyxerr << "error in Parser::parse_lines() 1\n";
 572                 return false;
 573         }
 574
 575         // save global variables
 576         bool   const saved_num   = curr_num_;
 577         string const saved_label = curr_label_;
 578
 579         // read initial hlines
 580         p->rowinfo(0).lines_ = readHLines();
 581
 582         for (int row = 0; true; ++row) {
 583                 // reset global variables
 584                 curr_num_   = numbered;
 585                 curr_label_.erase();
 586
 587                 // reading a row
 588                 for (MathInset::col_type col = 0; col < p->ncols(); ++col) {
 589                         //lyxerr << "reading cell " << row << " " << col << "\n";
 590                         //lyxerr << "ncols: " << p->ncols() << "\n";
 591
 592                         MathArray & ar = p->cell(col + row * p->ncols());
 593                         parse_into(ar, FLAG_BLOCK);
 594                         // remove 'unnecessary' braces:
 595                         if (ar.size() == 1 && ar.back()->asBraceInset())
 596                                 ar = ar.back()->asBraceInset()->cell(0);
 597                         //lyxerr << "ar: " << ar << "\n";
 598
 599                         // break if cell is not followed by an ampersand
 600                         if (nextToken().cat() != catAlign) {
 601                                 //lyxerr << "less cells read than normal in row/col: "
 602                                 //      << row << " " << col << "\n";
 603                                 break;
 604                         }
 605
 606                         // skip the ampersand
 607                         getToken();
 608                 }
 609
 610                 if (outmost) {
 611                         MathHullInset * m = t->asHullInset();
 612                         if (!m) {
 613                                 lyxerr << "error in Parser::parse_lines() 2\n";
 614                                 return false;
 615                         }
 616                         m->numbered(row, curr_num_);
 617                         m->label(row, curr_label_);
 618                         if (curr_skip_.size()) {
 619                                 m->vcrskip(LyXLength(curr_skip_), row);
 620                                 curr_skip_.erase();
 621                         }
 622                 }
 623
 624                 // is a \\ coming?
 625                 if (nextToken().isCR()) {
 626                         // skip the cr-token
 627                         getToken();
 628
 629                         // try to read a length
 630                         //get
 631
 632                         // read hlines for next row
 633                         p->rowinfo(row + 1).lines_ = readHLines();
 634                 }
 635
 636                 // we are finished if the next token is an 'end'
 637                 if (nextToken().cs() == "end") {
 638                         // skip the end-token
 639                         getToken();
 640                         getArg('{','}');
 641
 642                         // leave the 'read a line'-loop
 643                         break;
 644                 }
 645
 646                 // otherwise, we have to start a new row
 647                 p->appendRow();
 648         }
 649
 650         // restore "global" variables
 651         curr_num_   = saved_num;
 652         curr_label_ = saved_label;
 653
 654         return true;
 655 }
 656
 657
 658 bool Parser::parse_lines2(MathAtom & t)
 659 {
 660         MathGridInset * p = t->asGridInset();
 661         if (!p) {
 662                 lyxerr << "error in Parser::parse_lines() 1\n";
 663                 return false;
 664         }
 665
 666         skipBegin();
 667
 668         for (int row = 0; true; ++row) {
 669                 // reading a row
 670                 for (MathInset::col_type col = 0; true; ++col) {
 671                         //lyxerr << "reading cell " << row << " " << col << " " << p->ncols() << "\n";
 672
 673                         if (col >= p->ncols()) {
 674                                 //lyxerr << "adding col " << col << "\n";
 675                                 p->addCol(p->ncols());
 676                         }
 677
 678                         parse_into(p->cell(col + row * p->ncols()), FLAG_BLOCK2);
 679                         //lyxerr << "read cell: " << p->cell(col + row * p->ncols()) << "\n";
 680
 681                         // break if cell is not followed by an ampersand
 682                         if (nextToken().cat() != catAlign) {
 683                                 //lyxerr << "less cells read than normal in row/col: " << row << " " << col << "\n";
 684                                 break;
 685                         }
 686
 687                         // skip the ampersand
 688                         getToken();
 689                 }
 690
 691                 // is a \\ coming?
 692                 if (nextToken().isCR()) {
 693                         // skip the cr-token
 694                         getToken();
 695                 }
 696
 697                 // we are finished if the next token is an '}'
 698                 if (nextToken().cat() == catEnd) {
 699                         // skip the end-token
 700                         getToken();
 701                         // leave the 'read a line'-loop
 702                         break;
 703                 }
 704
 705                 // otherwise, we have to start a new row
 706                 p->appendRow();
 707         }
 708
 709         return true;
 710 }
 711
 712
 713
 714 bool Parser::parse_macro(string & name)
 715 {
 716         name = "{error}";
 717         skipSpaces();
 718
 719         if (getToken().cs() != "newcommand") {
 720                 lyxerr << "\\newcommand expected\n";
 721                 return false;
 722         }
 723
 724         if (getToken().cat() != catBegin) {
 725                 lyxerr << "'{' in \\newcommand expected (1)\n";
 726                 return false;
 727         }
 728
 729         name = getToken().cs();
 730
 731         if (getToken().cat() != catEnd) {
 732                 lyxerr << "'}' expected\n";
 733                 return false;
 734         }
 735
 736         string    arg  = getArg('[', ']');
 737         int       narg = arg.empty() ? 0 : atoi(arg.c_str());
 738
 739         if (getToken().cat() != catBegin) {
 740                 lyxerr << "'{' in \\newcommand expected (2)\n";
 741                 return false;
 742         }
 743
 744         MathArray ar;
 745         parse_into(ar, FLAG_BRACE_LAST);
 746
 747         // we cannot handle recursive stuff at all
 748         MathArray test;
 749         test.push_back(createMathInset(name));
 750         if (ar.contains(test)) {
 751                 lyxerr << "we cannot handle recursive macros at all.\n";
 752                 return false;
 753         }
 754
 755         MathMacroTable::create(name, narg, ar);
 756         return true;
 757 }
 758
 759
 760 bool Parser::parse_normal(MathAtom & matrix)
 761 {
 762         skipSpaces();
 763         Token const & t = getToken();
 764
 765         if (t.cs() == "(") {
 766                 matrix = MathAtom(new MathHullInset(LM_OT_SIMPLE));
 767                 parse_into(matrix->cell(0), 0);
 768                 return true;
 769         }
 770
 771         if (t.cat() == catMath) {
 772                 Token const & n = getToken();
 773                 if (n.cat() == catMath) {
 774                         // TeX's $$...$$ syntax for displayed math
 775                         matrix = MathAtom(new MathHullInset(LM_OT_EQUATION));
 776                         MathHullInset * p = matrix->asHullInset();
 777                         parse_into(p->cell(0), 0);
 778                         p->numbered(0, curr_num_);
 779                         p->label(0, curr_label_);
 780                 } else {
 781                         // simple $...$  stuff
 782                         putback();
 783                         matrix = MathAtom(new MathHullInset(LM_OT_SIMPLE));
 784                         parse_into(matrix->cell(0), 0);
 785                 }
 786                 return true;
 787         }
 788
 789         if (!t.cs().size()) {
 790                 lyxerr << "start of math expected, got '" << t << "'\n";
 791                 return false;
 792         }
 793
 794         string const & cs = t.cs();
 795
 796         if (cs == "[") {
 797                 curr_num_ = 0;
 798                 curr_label_.erase();
 799                 matrix = MathAtom(new MathHullInset(LM_OT_EQUATION));
 800                 MathHullInset * p = matrix->asHullInset();
 801                 parse_into(p->cell(0), 0);
 802                 p->numbered(0, curr_num_);
 803                 p->label(0, curr_label_);
 804                 return true;
 805         }
 806
 807         if (cs != "begin") {
 808                 lyxerr << "'begin' of un-simple math expected, got '" << cs << "'\n";
 809                 return false;
 810         }
 811
 812         string const name = getArg('{', '}');
 813
 814         if (name == "math") {
 815                 matrix = MathAtom(new MathHullInset(LM_OT_SIMPLE));
 816                 parse_into(matrix->cell(0), 0);
 817                 return true;
 818         }
 819
 820         if (name == "equation" || name == "equation*" || name == "displaymath") {
 821                 curr_num_ = (name == "equation");
 822                 curr_label_.erase();
 823                 matrix = MathAtom(new MathHullInset(LM_OT_EQUATION));
 824                 MathHullInset * p = matrix->asHullInset();
 825                 parse_into(p->cell(0), FLAG_END);
 826                 p->numbered(0, curr_num_);
 827                 p->label(0, curr_label_);
 828                 return true;
 829         }
 830
 831         if (name == "eqnarray" || name == "eqnarray*") {
 832                 matrix = MathAtom(new MathHullInset(LM_OT_EQNARRAY));
 833                 return parse_lines(matrix, !stared(name), true);
 834         }
 835
 836         if (name == "align" || name == "align*") {
 837                 matrix = MathAtom(new MathHullInset(LM_OT_ALIGN));
 838                 return parse_lines(matrix, !stared(name), true);
 839         }
 840
 841         if (name == "alignat" || name == "alignat*") {
 842                 int nc = 2 * atoi(getArg('{', '}').c_str());
 843                 matrix = MathAtom(new MathHullInset(LM_OT_ALIGNAT, nc));
 844                 return parse_lines(matrix, !stared(name), true);
 845         }
 846
 847         if (name == "xalignat" || name == "xalignat*") {
 848                 int nc = 2 * atoi(getArg('{', '}').c_str());
 849                 matrix = MathAtom(new MathHullInset(LM_OT_XALIGNAT, nc));
 850                 return parse_lines(matrix, !stared(name), true);
 851         }
 852
 853         if (name == "xxalignat") {
 854                 int nc = 2 * atoi(getArg('{', '}').c_str());
 855                 matrix = MathAtom(new MathHullInset(LM_OT_XXALIGNAT, nc));
 856                 return parse_lines(matrix, !stared(name), true);
 857         }
 858
 859         if (name == "multline" || name == "multline*") {
 860                 matrix = MathAtom(new MathHullInset(LM_OT_MULTLINE));
 861                 return parse_lines(matrix, !stared(name), true);
 862         }
 863
 864         if (name == "gather" || name == "gather*") {
 865                 matrix = MathAtom(new MathHullInset(LM_OT_GATHER));
 866                 return parse_lines(matrix, !stared(name), true);
 867         }
 868
 869         lyxerr[Debug::MATHED] << "1: unknown math environment: " << name << "\n";
 870         lyxerr << "1: unknown math environment: " << name << "\n";
 871         return false;
 872 }
 873
 874
 875 void Parser::parse_into(MathArray & array, unsigned flags, MathTextCodes code)
 876 {
 877         parse_into1(array, flags, code);
 878         // remove 'unnecessary' braces:
 879         if (array.size() == 1 && array.back()->asBraceInset()) {
 880                 lyxerr << "extra braces removed\n";
 881                 array = array.back()->asBraceInset()->cell(0);
 882         }
 883 }
 884
 885
 886 void Parser::parse_into1(MathArray & array, unsigned flags, MathTextCodes code)
 887 {
 888         bool panic  = false;
 889         int  limits = 0;
 890
 891         while (good()) {
 892                 Token const & t = getToken();
 893
 894                 //lyxerr << "t: " << t << " flags: " << flags << "\n";
 895                 //array.dump(lyxerr);
 896                 //lyxerr << "\n";
 897
 898                 if (flags & FLAG_ITEM) {
 899                         flags &= ~FLAG_ITEM;
 900                         if (t.cat() == catBegin) {
 901                                 // skip the brace and collect everything to the next matching
 902                                 // closing brace
 903                                 flags |= FLAG_BRACE_LAST;
 904                                 continue;
 905                         } else {
 906                                 // handle only this single token, leave the loop if done
 907                                 flags |= FLAG_LEAVE;
 908                         }
 909                 }
 910
 911                 if (flags & FLAG_BLOCK) {
 912                         if (t.cat() == catAlign || t.isCR() || t.cs() == "end") {
 913                                 putback();
 914                                 return;
 915                         }
 916                 }
 917
 918                 if (flags & FLAG_BLOCK2) {
 919                         if (t.cat() == catAlign || t.isCR() || t.cs() == "end"
 920                                         || t.cat() == catEnd) {
 921                                 putback();
 922                                 return;
 923                         }
 924                 }
 925
 926                 //
 927                 // cat codes
 928                 //
 929                 if (t.cat() == catMath) {
 930                         if (flags & FLAG_BOX) {
 931                                 // we are inside an mbox, so opening new math is allowed
 932                                 array.push_back(MathAtom(new MathHullInset(LM_OT_SIMPLE)));
 933                                 parse_into(array.back()->cell(0), 0);
 934                         } else {
 935                                 // otherwise this is the end of the formula
 936                                 break;
 937                         }
 938                 }
 939
 940                 else if (t.cat() == catLetter)
 941                         add(array, t.character(), code);
 942
 943                 else if (t.cat() == catSpace && code == LM_TC_TEXTRM)
 944                         add(array, t.character(), code);
 945
 946                 else if (t.cat() == catParameter) {
 947                         Token const & n = getToken();
 948                         array.push_back(MathAtom(new MathMacroArgument(n.character()-'0', code)));
 949                 }
 950
 951                 else if (t.cat() == catBegin) {
 952                         MathArray ar;
 953                         parse_into(ar, FLAG_BRACE_LAST);
 954 #ifndef WITH_WARNINGS
 955 #warning this might be wrong in general!
 956 #endif
 957                         // ignore braces around simple items
 958                         if ((ar.size() == 1 && !ar.front()->needsBraces()
 959        || (ar.size() == 2 && !ar.front()->needsBraces()
 960                                             && ar.back()->asScriptInset()))
 961        || (ar.size() == 0 && array.size() == 0))
 962                         {
 963                                 array.push_back(ar);
 964                         } else {
 965                                 array.push_back(MathAtom(new MathBraceInset));
 966                                 array.back()->cell(0).swap(ar);
 967                         }
 968                 }
 969
 970                 else if (t.cat() == catEnd) {
 971                         if (flags & FLAG_BRACE_LAST)
 972                                 return;
 973                         lyxerr << "found '}' unexpectedly, array: '" << array << "'\n";
 974                         //lyxerr << "found '}' unexpectedly\n";
 975                         add(array, '}', LM_TC_TEX);
 976                 }
 977
 978                 else if (t.cat() == catAlign) {
 979                         lyxerr << "found tab unexpectedly, array: '" << array << "'\n";
 980                         //lyxerr << "found tab unexpectedly\n";
 981                         add(array, '&', LM_TC_TEX);
 982                 }
 983
 984                 else if (t.cat() == catSuper || t.cat() == catSub) {
 985                         bool up = (t.cat() == catSuper);
 986                         MathScriptInset * p = 0;
 987                         if (array.size())
 988                                 p = array.back()->asScriptInset();
 989                         if (!p || p->has(up)) {
 990                                 array.push_back(MathAtom(new MathScriptInset(up)));
 991                                 p = array.back()->asScriptInset();
 992                         }
 993                         p->ensure(up);
 994                         parse_into(p->cell(up), FLAG_ITEM);
 995                         p->limits(limits);
 996                         limits = 0;
 997                 }
 998
 999                 else if (t.character() == ']' && (flags & FLAG_BRACK_END))
1000                         return;
1001
1002                 else if (t.cat() == catOther)
1003                         add(array, t.character(), code);
1004
1005                 //
1006                 // control sequences
1007                 //
1008                 else if (t.cs() == "protect")
1009                         // ignore \\protect, will be re-added during output
1010                         ;
1011
1012                 else if (t.cs() == "end")
1013                         break;
1014
1015                 else if (t.cs() == ")")
1016                         break;
1017
1018                 else if (t.cs() == "]")
1019                         break;
1020
1021                 else if (t.cs() == "\\") {
1022                         curr_skip_ = getArg('[', ']');
1023                         //lyxerr << "found newline unexpectedly, array: '" << array << "'\n";
1024                         lyxerr << "found newline unexpectedly\n";
1025                         array.push_back(createMathInset("\\"));
1026                 }
1027
1028                 else if (t.cs() == "limits")
1029                         limits = 1;
1030
1031                 else if (t.cs() == "nolimits")
1032                         limits = -1;
1033
1034                 else if (t.cs() == "nonumber")
1035                         curr_num_ = false;
1036
1037                 else if (t.cs() == "number")
1038                         curr_num_ = true;
1039
1040                 else if (t.cs() == "sqrt") {
1041                         char c = getChar();
1042                         if (c == '[') {
1043                                 array.push_back(MathAtom(new MathRootInset));
1044                                 parse_into(array.back()->cell(0), FLAG_BRACK_END);
1045                                 parse_into(array.back()->cell(1), FLAG_ITEM);
1046                         } else {
1047                                 putback();
1048                                 array.push_back(MathAtom(new MathSqrtInset));
1049                                 parse_into(array.back()->cell(0), FLAG_ITEM);
1050                         }
1051                 }
1052
1053                 else if (t.cs() == "left") {
1054                         string l = getToken().asString();
1055                         MathArray ar;
1056                         parse_into(ar, FLAG_RIGHT);
1057                         string r = getToken().asString();
1058                         MathAtom dl(new MathDelimInset(l, r));
1059                         dl->cell(0) = ar;
1060                         array.push_back(dl);
1061                 }
1062
1063                 else if (t.cs() == "right") {
1064                         if (!(flags & FLAG_RIGHT)) {
1065                                 //lyxerr << "got so far: '" << array << "'\n";
1066                                 error("Unmatched right delimiter");
1067                         }
1068                         return;
1069                 }
1070
1071                 else if (t.cs() == "begin") {
1072                         string const name = getArg('{', '}');
1073                         if (name == "array") {
1074                                 string const valign = getArg('[', ']') + 'c';
1075                                 string const halign = getArg('{', '}');
1076                                 array.push_back(MathAtom(new MathArrayInset(valign[0], halign)));
1077                                 parse_lines(array.back(), false, false);
1078                         } else if (name == "split") {
1079                                 array.push_back(MathAtom(new MathSplitInset(1)));
1080                                 parse_lines(array.back(), false, false);
1081                         } else if (name == "cases") {
1082                                 array.push_back(MathAtom(new MathCasesInset));
1083                                 parse_lines(array.back(), false, false);
1084                         } else
1085                                 lyxerr << "unknow math inset begin '" << name << "'\n";
1086                 }
1087
1088                 else if (t.cs() == "kern") {
1089 #ifdef WITH_WARNINGS
1090 #warning A hack...
1091 #endif
1092                         string s;
1093                         while (1) {
1094                                 Token const & t = getToken();
1095                                 if (!good()) {
1096                                         putback();
1097                                         break;
1098                                 }
1099                                 s += t.character();
1100                                 if (isValidLength(s))
1101                                         break;
1102                         }
1103                         array.push_back(MathAtom(new MathKernInset(s)));
1104                 }
1105
1106                 else if (t.cs() == "label") {
1107                         curr_label_ = getArg('{', '}');
1108                 }
1109
1110                 else if (t.cs() == "choose" || t.cs() == "over" || t.cs() == "atop") {
1111                         MathAtom p = createMathInset(t.cs());
1112                         array.swap(p->cell(0));
1113                         parse_into(p->cell(1), flags, code);
1114                         array.push_back(p);
1115                         return;
1116                 }
1117
1118                 else if (t.cs() == "xymatrix") {
1119                         array.push_back(createMathInset(t.cs()));
1120                         parse_lines2(array.back());
1121                 }
1122
1123 #if 0
1124                 // Disabled
1125                 else if (1 && t.cs() == "ar") {
1126                         MathXYArrowInset * p = new MathXYArrowInset;
1127
1128                         // try to read target
1129                         char c = getChar();
1130                         if (c == '[') {
1131                                 parse_into(p->cell(0), FLAG_BRACK_END);
1132                                 //lyxerr << "read target: " << p->cell(0) << "\n";
1133                         } else {
1134                                 putback();
1135                         }
1136
1137                         // try to read label
1138                         if (nextToken().cat() == catSuper || nextToken().cat() == catSub) {
1139                                 p->up_ = nextToken().cat() == catSuper;
1140                                 getToken();
1141                                 parse_into(p->cell(1), FLAG_ITEM);
1142                                 //lyxerr << "read label: " << p->cell(1) << "\n";
1143                         }
1144
1145                         array.push_back(MathAtom(p));
1146                         //lyxerr << "read array: " << array << "\n";
1147                 }
1148
1149                 else if (t.cs() == "mbox") {
1150                         array.push_back(createMathInset(t.cs()));
1151                         // slurp in the argument of mbox
1152
1153                         MathBoxInset * p = array.back()->asBoxInset();
1154                         //lyx::assert(p);
1155                 }
1156 #endif
1157
1158
1159                 else if (t.cs().size()) {
1160                         latexkeys const * l = in_word_set(t.cs());
1161                         if (l) {
1162                                 if (l->token == LM_TK_FONT) {
1163                                         //lyxerr << "starting font\n";
1164                                         //CatCode catSpaceSave = theCatcode[' '];
1165                                         //if (l->id == LM_TC_TEXTRM) {
1166                                         //      // temporarily change catcode
1167                                         //      theCatcode[' '] = catLetter;
1168                                         //}
1169
1170                                         MathArray ar;
1171                                         parse_into(ar, FLAG_ITEM, static_cast<MathTextCodes>(l->id));
1172                                         array.push_back(ar);
1173
1174                                         // undo catcode changes
1175                                         ////theCatcode[' '] = catSpaceSave;
1176                                         //lyxerr << "ending font\n";
1177                                 }
1178
1179                                 else if (l->token == LM_TK_OLDFONT) {
1180                                         code = static_cast<MathTextCodes>(l->id);
1181                                 }
1182
1183                                 else if (l->token == LM_TK_BOX) {
1184                                         MathAtom p = createMathInset(t.cs());
1185                                         parse_into(p->cell(0), FLAG_ITEM | FLAG_BOX, LM_TC_BOX);
1186                                         array.push_back(p);
1187                                 }
1188
1189                                 else if (l->token == LM_TK_STY) {
1190                                         MathAtom p = createMathInset(t.cs());
1191                                         parse_into(p->cell(0), flags, code);
1192                                         array.push_back(p);
1193                                         return;
1194                                 }
1195
1196                                 else {
1197                                         MathAtom p = createMathInset(t.cs());
1198                                         for (MathInset::idx_type i = 0; i < p->nargs(); ++i)
1199                                                 parse_into(p->cell(i), FLAG_ITEM);
1200                                         array.push_back(p);
1201                                 }
1202                         }
1203
1204                         else {
1205                                 MathAtom p = createMathInset(t.cs());
1206                                 for (MathInset::idx_type i = 0; i < p->nargs(); ++i)
1207                                         parse_into(p->cell(i), FLAG_ITEM);
1208                                 array.push_back(p);
1209                         }
1210                 }
1211
1212
1213                 if (flags & FLAG_LEAVE) {
1214                         flags &= ~FLAG_LEAVE;
1215                         break;
1216                 }
1217         }
1218
1219         if (panic) {
1220                 lyxerr << " Math Panic, expect problems!\n";
1221                 //   Search for the end command.
1222                 Token t;
1223                 do {
1224                         t = getToken();
1225                 } while (good() && t.cs() != "end");
1226         }
1227 }
1228
1229
1230
1231 } // anonymous namespace
1232
1233
1234 void mathed_parse_cell(MathArray & ar, string const & str)
1235 {
1236         istringstream is(str.c_str());
1237         mathed_parse_cell(ar, is);
1238 }
1239
1240
1241 void mathed_parse_cell(MathArray & ar, istream & is)
1242 {
1243         Parser(is).parse_into(ar, 0);
1244 }
1245
1246
1247
1248 bool mathed_parse_macro(string & name, string const & str)
1249 {
1250         istringstream is(str.c_str());
1251         Parser parser(is);
1252         return parser.parse_macro(name);
1253 }
1254
1255 bool mathed_parse_macro(string & name, istream & is)
1256 {
1257         Parser parser(is);
1258         return parser.parse_macro(name);
1259 }
1260
1261 bool mathed_parse_macro(string & name, LyXLex & lex)
1262 {
1263         Parser parser(lex);
1264         return parser.parse_macro(name);
1265 }
1266
1267
1268
1269 bool mathed_parse_normal(MathAtom & t, string const & str)
1270 {
1271         istringstream is(str.c_str());
1272         Parser parser(is);
1273         return parser.parse_normal(t);
1274 }
1275
1276 bool mathed_parse_normal(MathAtom & t, istream & is)
1277 {
1278         Parser parser(is);
1279         return parser.parse_normal(t);
1280 }
1281
1282 bool mathed_parse_normal(MathAtom & t, LyXLex & lex)
1283 {
1284         Parser parser(lex);
1285         return parser.parse_normal(t);
1286 }