]> git.lyx.org Git - lyx.git/blob - src/mathed/math_parser.C
b59643b9817e77d4d4c1238a021ec87dd141fbd9
[lyx.git] / src / mathed / math_parser.C
1 /*
2  *  File:        math_parser.C
3  *  Purpose:     Parser for mathed
4  *  Author:      Alejandro Aguilar Sierra <asierra@servidor.unam.mx> 
5  *  Created:     January 1996
6  *  Description: Parse LaTeX2e math mode code.
7  *
8  *  Dependencies: Xlib, XForms
9  *
10  *  Copyright: 1996, Alejandro Aguilar Sierra
11  *
12  *   Version: 0.8beta.
13  *
14  *   You are free to use and modify this code under the terms of
15  *   the GNU General Public Licence version 2 or later.
16  */
17
18 /* 
19
20 If someone desperately needs partial "structures" (such as a few cells of
21 an array inset or similar) (s)he could uses the following hack as starting
22 point to write some macros:
23
24   \newif\ifcomment
25   \commentfalse
26   \ifcomment
27           \def\makeamptab{\catcode`\&=4\relax}
28           \def\makeampletter{\catcode`\&=11\relax}
29     \def\b{\makeampletter\expandafter\makeamptab\bi}
30     \long\def\bi#1\e{}
31   \else
32     \def\b{}\def\e{}
33   \fi
34
35   ...
36
37   \[\begin{array}{ccc}
38    1 & 2\b & 3^2\\
39    4 & 5\e & 6\\
40    7 & 8 & 9
41   \end{array}\]
42
43 */
44
45
46 #include <config.h>
47
48 #ifdef __GNUG__
49 #pragma implementation
50 #endif
51
52 #include "math_parser.h"
53 #include "math_inset.h"
54 #include "math_arrayinset.h"
55 #include "math_braceinset.h"
56 #include "math_boxinset.h"
57 #include "math_charinset.h"
58 #include "math_deliminset.h"
59 #include "math_factory.h"
60 #include "math_funcinset.h"
61 #include "math_kerninset.h"
62 #include "math_macro.h"
63 #include "math_macrotable.h"
64 #include "math_macrotemplate.h"
65 #include "math_hullinset.h"
66 #include "math_rootinset.h"
67 #include "math_sizeinset.h"
68 #include "math_sqrtinset.h"
69 #include "math_scriptinset.h"
70 #include "math_specialcharinset.h"
71 #include "math_sqrtinset.h"
72 #include "math_support.h"
73 #include "math_xyarrowinset.h"
74
75 #include "lyxlex.h"
76 #include "debug.h"
77 #include "support/LAssert.h"
78 #include "support/lstrings.h"
79
80 #include <cctype>
81 #include <stack>
82 #include <algorithm>
83
84 using std::istream;
85 using std::ostream;
86 using std::ios;
87 using std::endl;
88 using std::stack;
89 using std::fill;
90 using std::vector;
91
92 //#define FILEDEBUG
93
94
95 namespace {
96
97 bool stared(string const & s)
98 {
99         string::size_type const n = s.size();
100         return n && s[n - 1] == '*';
101 }
102
103
104 void add(MathArray & ar, char c, MathTextCodes code)
105 {
106         ar.push_back(MathAtom(new MathCharInset(c, code)));
107 }
108
109
110 // These are TeX's catcodes
111 enum CatCode {
112         catEscape,     // 0    backslash 
113         catBegin,      // 1    {
114         catEnd,        // 2    }
115         catMath,       // 3    $
116         catAlign,      // 4    &
117         catNewline,    // 5    ^^M
118         catParameter,  // 6    #
119         catSuper,      // 7    ^
120         catSub,        // 8    _
121         catIgnore,     // 9       
122         catSpace,      // 10   space
123         catLetter,     // 11   a-zA-Z
124         catOther,      // 12   none of the above
125         catActive,     // 13   ~
126         catComment,    // 14   %
127         catInvalid     // 15   <delete>
128 };
129
130 CatCode theCatcode[256];  
131
132
133 inline CatCode catcode(unsigned char c)
134 {
135         return theCatcode[c];
136 }
137
138
139 enum {
140         FLAG_BRACE_LAST = 1 << 1,  //  last closing brace ends the parsing process
141         FLAG_RIGHT      = 1 << 2,  //  next \\right ends the parsing process
142         FLAG_END        = 1 << 3,  //  next \\end ends the parsing process
143         FLAG_BRACK_END  = 1 << 4,  //  next closing bracket ends the parsing process
144         FLAG_BOX        = 1 << 5,  //  we are in a box
145         FLAG_ITEM       = 1 << 6,  //  read a (possibly braced token)
146         FLAG_BLOCK      = 1 << 7,  //  next block ends the parsing process
147         FLAG_BLOCK2     = 1 << 8,  //  next block2 ends the parsing process
148         FLAG_LEAVE      = 1 << 9   //  leave the loop at the end
149 };
150
151
152 void catInit()
153 {
154         fill(theCatcode, theCatcode + 256, catOther);
155         fill(theCatcode + 'a', theCatcode + 'z' + 1, catLetter);
156         fill(theCatcode + 'A', theCatcode + 'Z' + 1, catLetter);
157
158         theCatcode['\\'] = catEscape;   
159         theCatcode['{']  = catBegin;    
160         theCatcode['}']  = catEnd;      
161         theCatcode['$']  = catMath;     
162         theCatcode['&']  = catAlign;    
163         theCatcode['\n'] = catNewline;  
164         theCatcode['#']  = catParameter;        
165         theCatcode['^']  = catSuper;    
166         theCatcode['_']  = catSub;      
167         theCatcode['\7f'] = catIgnore;    
168         theCatcode[' ']  = catSpace;    
169         theCatcode['\t'] = catSpace;    
170         theCatcode['\r'] = catSpace;    
171         theCatcode['~']  = catActive;   
172         theCatcode['%']  = catComment;  
173 }
174
175
176
177 //
178 // Helper class for parsing
179 //
180
181 class Token {
182 public:
183         ///
184         Token() : cs_(), char_(0), cat_(catIgnore) {}
185         ///
186         Token(char c, CatCode cat) : cs_(), char_(c), cat_(cat) {}
187         ///
188         Token(string const & cs) : cs_(cs), char_(0), cat_(catIgnore) {}
189
190         ///
191         string const & cs() const { return cs_; }
192         ///
193         CatCode cat() const { return cat_; }
194         ///
195         char character() const { return char_; }
196         ///
197         string asString() const;
198         ///
199         bool isCR() const;
200
201 private:        
202         ///
203         string cs_;
204         ///
205         char char_;
206         ///
207         CatCode cat_;
208 };
209
210 bool Token::isCR() const
211 {
212         return cs_ == "\\" || cs_ == "cr" || cs_ == "crcr";
213 }
214
215 string Token::asString() const
216 {
217         return cs_.size() ? cs_ : string(1, char_);
218 }
219
220 // Angus' compiler says these are not needed
221 //bool operator==(Token const & s, Token const & t)
222 //{
223 //      return s.character() == t.character()
224 //              && s.cat() == t.cat() && s.cs() == t.cs(); 
225 //}
226 //
227 //bool operator!=(Token const & s, Token const & t)
228 //{
229 //      return !(s == t);
230 //}
231
232 ostream & operator<<(ostream & os, Token const & t)
233 {
234         if (t.cs().size())
235                 os << "\\" << t.cs();
236         else
237                 os << "[" << t.character() << "," << t.cat() << "]";
238         return os;
239 }
240
241
242 class Parser {
243
244 public:
245         ///
246         Parser(LyXLex & lex);
247         ///
248         Parser(istream & is);
249
250         ///
251         bool parse_macro(string & name);
252         ///
253         bool parse_normal(MathAtom &);
254         ///
255         void parse_into(MathArray & array, unsigned flags, MathTextCodes = LM_TC_MIN);
256         ///
257         int lineno() const { return lineno_; }
258         ///
259         void putback();
260
261 private:
262         ///
263         void parse_into1(MathArray & array, unsigned flags, MathTextCodes);
264         ///
265         string getArg(char lf, char rf);
266         ///
267         char getChar();
268         ///
269         void error(string const & msg);
270         ///
271         bool parse_lines(MathAtom & t, bool numbered, bool outmost);
272         /// parses {... & ... \\ ... & ... }
273         bool parse_lines2(MathAtom & t, bool braced);
274         /// dump contents to screen
275         void dump() const;
276
277 private:
278         ///
279         void tokenize(istream & is);
280         ///
281         void tokenize(string const & s);
282         ///
283         void skipSpaceTokens(istream & is, char c);
284         ///
285         void push_back(Token const & t);
286         ///
287         void pop_back();
288         ///
289         Token const & prevToken() const;
290         ///
291         Token const & nextToken() const;
292         ///
293         Token const & getToken();
294         /// skips spaces if any
295         void skipSpaces();
296         /// skips opening brace
297         void skipBegin();
298         /// skips closing brace
299         void skipEnd();
300         /// counts a sequence of hlines
301         int readHLines();
302         ///
303         void lex(string const & s);
304         ///
305         bool good() const;
306
307         ///
308         int lineno_;
309         ///
310         vector<Token> tokens_;
311         ///
312         unsigned pos_;
313         ///
314         bool   curr_num_;
315         ///
316         string curr_label_;
317         ///
318         string curr_skip_;
319 };
320
321
322 Parser::Parser(LyXLex & lexer)
323         : lineno_(lexer.getLineNo()), pos_(0), curr_num_(false)
324 {
325         tokenize(lexer.getStream());
326         lexer.eatLine();
327 }
328
329
330 Parser::Parser(istream & is)
331         : lineno_(0), pos_(0), curr_num_(false)
332 {
333         tokenize(is);
334 }
335
336
337 void Parser::push_back(Token const & t)
338 {
339         tokens_.push_back(t);
340 }
341
342
343 void Parser::pop_back()
344 {
345         tokens_.pop_back();
346 }
347
348
349 Token const & Parser::prevToken() const
350 {
351         static const Token dummy;
352         return pos_ > 0 ? tokens_[pos_ - 1] : dummy;
353 }
354
355
356 Token const & Parser::nextToken() const
357 {
358         static const Token dummy;
359         return good() ? tokens_[pos_] : dummy;
360 }
361
362
363 Token const & Parser::getToken()
364 {
365         static const Token dummy;
366         //lyxerr << "looking at token " << tokens_[pos_] << " pos: " << pos_ << '\n';
367         return good() ? tokens_[pos_++] : dummy;
368 }
369
370
371 void Parser::skipSpaces()
372 {
373         while (nextToken().cat() == catSpace)
374                 getToken();
375 }
376
377
378 void Parser::skipBegin()
379 {
380         if (nextToken().cat() == catBegin)
381                 getToken();
382         else
383                 lyxerr << "'{' expected\n";
384 }
385
386
387 void Parser::skipEnd()
388 {
389         if (nextToken().cat() == catEnd)
390                 getToken();
391         else
392                 lyxerr << "'}' expected\n";
393 }
394
395
396 int Parser::readHLines()
397 {
398         int num = 0;
399         skipSpaces();
400         while (nextToken().cs() == "hline") {
401                 getToken();
402                 ++num;
403                 skipSpaces();
404         }
405         return num;
406 }
407
408
409 void Parser::putback()
410 {
411         --pos_;
412 }
413
414
415 bool Parser::good() const
416 {
417         return pos_ < tokens_.size();
418 }
419
420
421 char Parser::getChar()
422 {
423         if (!good())
424                 lyxerr << "The input stream is not well..." << endl;
425         return tokens_[pos_++].character();
426 }
427
428
429 string Parser::getArg(char lf, char rg)
430 {
431         skipSpaces();
432
433         string result;
434         char c = getChar();
435
436         if (c != lf)  
437                 putback();
438         else 
439                 while ((c = getChar()) != rg && good())
440                         result += c;
441
442         return result;
443 }
444
445
446 void Parser::tokenize(istream & is)
447 {
448         // eat everything up to the next \end_inset or end of stream
449         // and store it in s for further tokenization
450         string s;
451         char c;
452         while (is.get(c)) {
453                 s += c;
454                 if (s.size() >= 10 && s.substr(s.size() - 10) == "\\end_inset") {
455                         s = s.substr(0, s.size() - 10);
456                         break;
457                 }
458         }
459
460         // tokenize buffer
461         tokenize(s);
462 }
463
464
465 void Parser::skipSpaceTokens(istream & is, char c)
466 {
467         // skip trailing spaces
468         while (catcode(c) == catSpace || catcode(c) == catNewline)
469                 if (!is.get(c))
470                         break;
471         //lyxerr << "putting back: " << c << "\n";
472         is.putback(c);
473 }
474
475
476 void Parser::tokenize(string const & buffer)
477 {
478         static bool init_done = false;
479         
480         if (!init_done) {
481                 catInit();
482                 init_done = true;
483         }
484
485         istringstream is(buffer.c_str(), ios::in | ios::binary);
486
487         char c;
488         while (is.get(c)) {
489                 //lyxerr << "reading c: " << c << "\n";
490
491                 switch (catcode(c)) {
492                         case catNewline: {
493                                 ++lineno_; 
494                                 is.get(c);
495                                 if (catcode(c) == catNewline)
496                                         ; //push_back(Token("par"));
497                                 else {
498                                         push_back(Token(' ', catSpace));
499                                         is.putback(c);  
500                                 }
501                                 break;
502                         }
503
504                         case catComment: {
505                                 while (is.get(c) && catcode(c) != catNewline)
506                                         ;
507                                 ++lineno_; 
508                                 break;
509                         }
510
511                         case catEscape: {
512                                 is.get(c);
513                                 if (!is) {
514                                         error("unexpected end of input");
515                                 } else {
516                                         string s(1, c);
517                                         if (catcode(c) == catLetter) {
518                                                 // collect letters
519                                                 while (is.get(c) && catcode(c) == catLetter)
520                                                         s += c;
521                                                 skipSpaceTokens(is, c);
522                                         }       
523                                         push_back(Token(s));
524                                 }
525                                 break;
526                         }
527
528                         case catSuper:
529                         case catSub: {
530                                 push_back(Token(c, catcode(c)));
531                                 is.get(c);
532                                 skipSpaceTokens(is, c);
533                                 break;
534                         }
535
536                         case catIgnore: {
537                                 lyxerr << "ignoring a char: " << int(c) << "\n";
538                                 break;
539                         }
540
541                         default:
542                                 push_back(Token(c, catcode(c)));
543                 }
544         }
545
546 #ifdef FILEDEBUG
547         dump();
548 #endif
549 }
550
551
552 void Parser::dump() const
553 {
554         lyxerr << "\nTokens: ";
555         for (unsigned i = 0; i < tokens_.size(); ++i)
556                 lyxerr << tokens_[i];
557         lyxerr << "\n";
558 }
559
560
561 void Parser::error(string const & msg) 
562 {
563         lyxerr << "Line ~" << lineno_ << ": Math parse error: " << msg << endl;
564         dump();
565         //exit(1);
566 }
567
568
569
570 bool Parser::parse_lines(MathAtom & t, bool numbered, bool outmost)
571 {       
572         MathGridInset * p = t->asGridInset();
573         if (!p) {
574                 dump();
575                 lyxerr << "error in Parser::parse_lines() 1\n";
576                 return false;
577         }
578
579         // save global variables
580         bool   const saved_num   = curr_num_;
581         string const saved_label = curr_label_;
582
583         // read initial hlines
584         p->rowinfo(0).lines_ = readHLines();
585
586         for (int row = 0; true; ++row) {
587                 // reset global variables
588                 curr_num_   = numbered;
589                 curr_label_.erase();
590
591                 // reading a row
592                 for (MathInset::col_type col = 0; true; ++col) {
593                         //lyxerr << "reading cell " << row << " " << col << " "
594                         // << p->ncols() << "\n";
595                         //lyxerr << "ncols: " << p->ncols() << "\n";
596                 
597                         if (col >= p->ncols()) {
598                                 //lyxerr << "adding col " << col << "\n";
599                                 p->addCol(p->ncols());
600                         }
601
602                         MathArray & ar = p->cell(col + row * p->ncols());
603                         parse_into(ar, FLAG_BLOCK);
604                         // remove 'unnecessary' braces:
605                         if (ar.size() == 1 && ar.back()->asBraceInset())
606                                 ar = ar.back()->asBraceInset()->cell(0);
607                         //lyxerr << "ar: " << ar << "\n";
608
609                         // break if cell is not followed by an ampersand
610                         if (nextToken().cat() != catAlign) {
611                                 //lyxerr << "less cells read than normal in row/col: "
612                                 //      << row << " " << col << "\n";
613                                 break;
614                         }
615                         
616                         // skip the ampersand
617                         getToken();
618                 }
619
620                 if (outmost) {
621                         MathHullInset * m = t->asHullInset();
622                         if (!m) {
623                                 lyxerr << "error in Parser::parse_lines() 2\n";
624                                 return false;
625                         }
626                         m->numbered(row, curr_num_);
627                         m->label(row, curr_label_);
628                         if (curr_skip_.size()) {
629                                 m->vcrskip(LyXLength(curr_skip_), row);
630                                 curr_skip_.erase();
631                         }
632                 }
633
634                 // is a \\ coming?
635                 if (nextToken().isCR()) {
636                         // skip the cr-token
637                         getToken();
638
639                         // try to read a length
640                         //get
641
642                         // read hlines for next row
643                         p->rowinfo(row + 1).lines_ = readHLines();
644                 }
645
646                 // we are finished if the next token is an 'end'
647                 if (nextToken().cs() == "end") {
648                         // skip the end-token
649                         getToken();
650                         getArg('{','}');
651
652                         // leave the 'read a line'-loop
653                         break;
654                 }
655
656                 // otherwise, we have to start a new row
657                 p->appendRow();
658         }
659
660         // restore "global" variables
661         curr_num_   = saved_num;
662         curr_label_ = saved_label;
663
664         return true;
665 }
666
667
668 bool Parser::parse_lines2(MathAtom & t, bool braced)
669 {       
670         MathGridInset * p = t->asGridInset();
671         if (!p) {
672                 lyxerr << "error in Parser::parse_lines() 1\n";
673                 return false;
674         }
675
676         for (int row = 0; true; ++row) {
677                 // reading a row
678                 for (MathInset::col_type col = 0; true; ++col) {
679                         //lyxerr << "reading cell " << row << " " << col << " " << p->ncols() << "\n";
680                 
681                         if (col >= p->ncols()) {
682                                 //lyxerr << "adding col " << col << "\n";
683                                 p->addCol(p->ncols());
684                         }
685
686                         parse_into(p->cell(col + row * p->ncols()), FLAG_BLOCK2);
687                         //lyxerr << "read cell: " << p->cell(col + row * p->ncols()) << "\n";
688
689                         // break if cell is not followed by an ampersand
690                         if (nextToken().cat() != catAlign) {
691                                 //lyxerr << "less cells read than normal in row/col: " << row << " " << col << "\n";
692                                 break;
693                         }
694                         
695                         // skip the ampersand
696                         getToken();
697                 }
698
699                 // is a \\ coming?
700                 if (nextToken().isCR()) {
701                         // skip the cr-token
702                         getToken();
703                 }
704
705                 // we are finished if the next token is the one we expected
706                 // skip the end-token
707                 // leave the 'read a line'-loop
708                 if (braced) {
709                         if (nextToken().cat() == catEnd) {
710                                 getToken();
711                                 break;
712                         }
713                 } else {
714                         if (nextToken().cs() == "end") {
715                                 getToken();
716                                 getArg('{','}');
717                                 break;
718                         }
719                 }
720
721                 // otherwise, we have to start a new row
722                 p->appendRow();
723         }
724
725         return true;
726 }
727
728
729
730 bool Parser::parse_macro(string & name)
731 {
732         name = "{error}";
733         skipSpaces();
734
735         if (getToken().cs() != "newcommand") {
736                 lyxerr << "\\newcommand expected\n";
737                 return false;
738         }
739
740         if (getToken().cat() != catBegin) {
741                 lyxerr << "'{' in \\newcommand expected (1)\n";
742                 return false;
743         }
744
745         name = getToken().cs();
746
747         if (getToken().cat() != catEnd) {
748                 lyxerr << "'}' expected\n";
749                 return false;
750         }
751
752         string    arg  = getArg('[', ']');
753         int       narg = arg.empty() ? 0 : atoi(arg.c_str()); 
754
755         if (getToken().cat() != catBegin) {
756                 lyxerr << "'{' in \\newcommand expected (2)\n";
757                 return false;
758         }
759
760         MathArray ar;
761         parse_into(ar, FLAG_BRACE_LAST);
762
763         // we cannot handle recursive stuff at all
764         MathArray test;
765         test.push_back(createMathInset(name));
766         if (ar.contains(test)) {
767                 lyxerr << "we cannot handle recursive macros at all.\n";
768                 return false;
769         }
770
771         MathMacroTable::create(name, narg, ar);
772         return true;
773 }
774
775
776 bool Parser::parse_normal(MathAtom & matrix)
777 {
778         skipSpaces();
779         Token const & t = getToken();
780
781         if (t.cs() == "(") {
782                 matrix = MathAtom(new MathHullInset(LM_OT_SIMPLE));
783                 parse_into(matrix->cell(0), 0);
784                 return true;
785         }
786
787         if (t.cat() == catMath) {
788                 Token const & n = getToken();
789                 if (n.cat() == catMath) {
790                         // TeX's $$...$$ syntax for displayed math
791                         matrix = MathAtom(new MathHullInset(LM_OT_EQUATION));
792                         MathHullInset * p = matrix->asHullInset();
793                         parse_into(p->cell(0), 0);
794                         p->numbered(0, curr_num_);
795                         p->label(0, curr_label_);
796                 } else {
797                         // simple $...$  stuff
798                         putback();
799                         matrix = MathAtom(new MathHullInset(LM_OT_SIMPLE));
800                         parse_into(matrix->cell(0), 0);
801                 }
802                 return true;
803         }
804
805         if (!t.cs().size()) {
806                 lyxerr << "start of math expected, got '" << t << "'\n";
807                 return false;
808         }
809
810         string const & cs = t.cs();
811
812         if (cs == "[") {
813                 curr_num_ = 0;
814                 curr_label_.erase();
815                 matrix = MathAtom(new MathHullInset(LM_OT_EQUATION));
816                 MathHullInset * p = matrix->asHullInset();
817                 parse_into(p->cell(0), 0);
818                 p->numbered(0, curr_num_);
819                 p->label(0, curr_label_);
820                 return true;
821         }
822
823         if (cs != "begin") {
824                 lyxerr << "'begin' of un-simple math expected, got '" << cs << "'\n";
825                 return false;
826         }
827
828         string const name = getArg('{', '}');
829
830         if (name == "math") {
831                 matrix = MathAtom(new MathHullInset(LM_OT_SIMPLE));
832                 parse_into(matrix->cell(0), 0);
833                 return true;
834         }
835
836         if (name == "equation" || name == "equation*" || name == "displaymath") {
837                 curr_num_ = (name == "equation");
838                 curr_label_.erase();
839                 matrix = MathAtom(new MathHullInset(LM_OT_EQUATION));
840                 MathHullInset * p = matrix->asHullInset();
841                 parse_into(p->cell(0), FLAG_END);
842                 p->numbered(0, curr_num_);
843                 p->label(0, curr_label_);
844                 return true;
845         }
846
847         if (name == "eqnarray" || name == "eqnarray*") {
848                 matrix = MathAtom(new MathHullInset(LM_OT_EQNARRAY));
849                 return parse_lines(matrix, !stared(name), true);
850         }
851
852         if (name == "align" || name == "align*") {
853                 matrix = MathAtom(new MathHullInset(LM_OT_ALIGN));
854                 return parse_lines(matrix, !stared(name), true);
855         }
856
857         if (name == "alignat" || name == "alignat*") {
858                 int nc = 2 * atoi(getArg('{', '}').c_str());
859                 matrix = MathAtom(new MathHullInset(LM_OT_ALIGNAT, nc));
860                 return parse_lines(matrix, !stared(name), true);
861         }
862
863         if (name == "xalignat" || name == "xalignat*") {
864                 int nc = 2 * atoi(getArg('{', '}').c_str());
865                 matrix = MathAtom(new MathHullInset(LM_OT_XALIGNAT, nc));
866                 return parse_lines(matrix, !stared(name), true);
867         }
868
869         if (name == "xxalignat") {
870                 int nc = 2 * atoi(getArg('{', '}').c_str());
871                 matrix = MathAtom(new MathHullInset(LM_OT_XXALIGNAT, nc));
872                 return parse_lines(matrix, !stared(name), true);
873         }
874
875         if (name == "multline" || name == "multline*") {
876                 matrix = MathAtom(new MathHullInset(LM_OT_MULTLINE));
877                 return parse_lines(matrix, !stared(name), true);
878         }
879
880         if (name == "gather" || name == "gather*") {
881                 matrix = MathAtom(new MathHullInset(LM_OT_GATHER));
882                 return parse_lines(matrix, !stared(name), true);
883         }
884
885         lyxerr[Debug::MATHED] << "1: unknown math environment: " << name << "\n";
886         lyxerr << "1: unknown math environment: " << name << "\n";
887         return false;
888 }
889
890
891 void Parser::parse_into(MathArray & array, unsigned flags, MathTextCodes code)
892 {
893         parse_into1(array, flags, code);
894         // remove 'unnecessary' braces:
895         if (array.size() == 1 && array.back()->asBraceInset()) {
896                 lyxerr << "extra braces removed\n";
897                 array = array.back()->asBraceInset()->cell(0);
898         }
899 }
900
901
902 void Parser::parse_into1(MathArray & array, unsigned flags, MathTextCodes code)
903 {
904         bool panic  = false;
905         int  limits = 0;
906
907         while (good()) {
908                 Token const & t = getToken();
909         
910 #ifdef FILEDEBUG
911                 lyxerr << "t: " << t << " flags: " << flags << "\n";
912                 //array.dump();
913                 lyxerr << "\n";
914 #endif
915
916                 if (flags & FLAG_ITEM) {
917                         flags &= ~FLAG_ITEM;
918                         if (t.cat() == catBegin) { 
919                                 // skip the brace and collect everything to the next matching
920                                 // closing brace
921                                 flags |= FLAG_BRACE_LAST;
922                                 continue;
923                         } else {
924                                 // handle only this single token, leave the loop if done
925                                 flags |= FLAG_LEAVE;
926                         }
927                 }
928
929                 if (flags & FLAG_BLOCK) {
930                         if (t.cat() == catAlign || t.isCR() || t.cs() == "end") {
931                                 putback();
932                                 return;
933                         }
934                 }
935
936                 if (flags & FLAG_BLOCK2) {
937                         if (t.cat() == catAlign || t.isCR() || t.cs() == "end"
938                                         || t.cat() == catEnd) {
939                                 putback();
940                                 return;
941                         }
942                 }
943
944                 //
945                 // cat codes
946                 //
947                 if (t.cat() == catMath) {
948                         if (flags & FLAG_BOX) {
949                                 // we are inside an mbox, so opening new math is allowed
950                                 array.push_back(MathAtom(new MathHullInset(LM_OT_SIMPLE)));
951                                 parse_into(array.back()->cell(0), 0);
952                         } else {
953                                 // otherwise this is the end of the formula
954                                 break;
955                         }
956                 }
957
958                 else if (t.cat() == catLetter)
959                         add(array, t.character(), code);
960
961                 else if (t.cat() == catSpace && code == LM_TC_TEXTRM)
962                         add(array, t.character(), code);
963
964                 else if (t.cat() == catParameter) {
965                         Token const & n = getToken();
966                         array.push_back(MathAtom(new MathMacroArgument(n.character()-'0', code)));
967                 }
968
969                 else if (t.cat() == catBegin) {
970                         MathArray ar;
971                         parse_into(ar, FLAG_BRACE_LAST);
972 #ifndef WITH_WARNINGS
973 #warning this might be wrong in general!
974 #endif
975                         // ignore braces around simple items
976                         if ((ar.size() == 1 && !ar.front()->needsBraces()
977        || (ar.size() == 2 && !ar.front()->needsBraces()
978                                             && ar.back()->asScriptInset()))
979        || (ar.size() == 0 && array.size() == 0))
980                         {
981                                 array.push_back(ar);
982                         } else {
983                                 array.push_back(MathAtom(new MathBraceInset));
984                                 array.back()->cell(0).swap(ar);
985                         }
986                 }
987
988                 else if (t.cat() == catEnd) {
989                         if (flags & FLAG_BRACE_LAST)
990                                 return;
991                         lyxerr << "found '}' unexpectedly, array: '" << array << "'\n";
992                         //lyxerr << "found '}' unexpectedly\n";
993                         lyx::Assert(0);
994                         add(array, '}', LM_TC_TEX);
995                 }
996                 
997                 else if (t.cat() == catAlign) {
998                         lyxerr << "found tab unexpectedly, array: '" << array << "'\n";
999                         //lyxerr << "found tab unexpectedly\n";
1000                         add(array, '&', LM_TC_TEX);
1001                 }
1002                 
1003                 else if (t.cat() == catSuper || t.cat() == catSub) {
1004                         bool up = (t.cat() == catSuper);
1005                         MathScriptInset * p = 0; 
1006                         if (array.size()) 
1007                                 p = array.back()->asScriptInset();
1008                         if (!p || p->has(up)) {
1009                                 array.push_back(MathAtom(new MathScriptInset(up)));
1010                                 p = array.back()->asScriptInset();
1011                         }
1012                         p->ensure(up);
1013                         parse_into(p->cell(up), FLAG_ITEM);
1014                         p->limits(limits);
1015                         limits = 0;
1016                 }
1017
1018                 else if (t.character() == ']' && (flags & FLAG_BRACK_END))
1019                         return;
1020
1021                 else if (t.cat() == catOther)
1022                         add(array, t.character(), code);
1023                 
1024                 //
1025                 // control sequences
1026                 //      
1027                 else if (t.cs() == "protect")
1028                         // ignore \\protect, will be re-added during output 
1029                         ;
1030
1031                 else if (t.cs() == "end")
1032                         break;
1033
1034                 else if (t.cs() == ")")
1035                         break;
1036
1037                 else if (t.cs() == "]")
1038                         break;
1039
1040                 else if (t.cs() == "\\") {
1041                         curr_skip_ = getArg('[', ']');
1042                         //lyxerr << "found newline unexpectedly, array: '" << array << "'\n";
1043                         lyxerr << "found newline unexpectedly\n";
1044                         array.push_back(createMathInset("\\"));
1045                 }
1046         
1047                 else if (t.cs() == "limits")
1048                         limits = 1;
1049                 
1050                 else if (t.cs() == "nolimits")
1051                         limits = -1;
1052                 
1053                 else if (t.cs() == "nonumber")
1054                         curr_num_ = false;
1055
1056                 else if (t.cs() == "number")
1057                         curr_num_ = true;
1058
1059                 else if (t.cs() == "sqrt") {
1060                         char c = getChar();
1061                         if (c == '[') {
1062                                 array.push_back(MathAtom(new MathRootInset));
1063                                 parse_into(array.back()->cell(0), FLAG_BRACK_END);
1064                                 parse_into(array.back()->cell(1), FLAG_ITEM);
1065                         } else {
1066                                 putback();
1067                                 array.push_back(MathAtom(new MathSqrtInset));
1068                                 parse_into(array.back()->cell(0), FLAG_ITEM);
1069                         }
1070                 }
1071                 
1072                 else if (t.cs() == "left") {
1073                         string l = getToken().asString();
1074                         MathArray ar;
1075                         parse_into(ar, FLAG_RIGHT);
1076                         string r = getToken().asString();
1077                         MathAtom dl(new MathDelimInset(l, r));
1078                         dl->cell(0) = ar;
1079                         array.push_back(dl);
1080                 }
1081                 
1082                 else if (t.cs() == "right") {
1083                         if (!(flags & FLAG_RIGHT)) {
1084                                 //lyxerr << "got so far: '" << array << "'\n";
1085                                 error("Unmatched right delimiter");
1086                         }
1087                         return;
1088                 }
1089
1090                 else if (t.cs() == "begin") {
1091                         string const name = getArg('{', '}');   
1092                         if (name == "array" || name == "subarray") {
1093                                 string const valign = getArg('[', ']') + 'c';
1094                                 string const halign = getArg('{', '}');
1095                                 array.push_back(MathAtom(new MathArrayInset(name, valign[0], halign)));
1096                                 parse_lines(array.back(), false, false);
1097                         } else if (name == "split" || name == "cases" ||
1098                                          name == "gathered" || name == "aligned") {
1099                                 array.push_back(createMathInset(name));
1100                                 parse_lines(array.back(), false, false);
1101                         } else if (name == "matrix"  || name == "pmatrix" || name == "bmatrix" ||
1102                                          name == "vmatrix" || name == "Vmatrix") {
1103                                 array.push_back(createMathInset(name));
1104                                 parse_lines2(array.back(), false);
1105                         } else 
1106                                 lyxerr << "unknow math inset begin '" << name << "'\n"; 
1107                 }
1108         
1109                 else if (t.cs() == "kern") {
1110 #ifdef WITH_WARNINGS
1111 #warning A hack...
1112 #endif
1113                         string s;
1114                         while (1) {
1115                                 Token const & t = getToken();
1116                                 if (!good()) {
1117                                         putback();      
1118                                         break;
1119                                 }
1120                                 s += t.character();
1121                                 if (isValidLength(s))
1122                                         break;
1123                         }
1124                         array.push_back(MathAtom(new MathKernInset(s)));
1125                 }
1126
1127                 else if (t.cs() == "label") {
1128                         curr_label_ = getArg('{', '}');
1129                 }
1130
1131                 else if (t.cs() == "choose" || t.cs() == "over" || t.cs() == "atop") {
1132                         MathAtom p = createMathInset(t.cs());
1133                         array.swap(p->cell(0));
1134                         parse_into(p->cell(1), flags, code);
1135                         array.push_back(p);
1136                         return;
1137                 }
1138
1139                 else if (t.cs() == "substack") {
1140                         array.push_back(createMathInset(t.cs()));
1141                         skipBegin();
1142                         parse_lines2(array.back(), true);
1143                 }
1144
1145                 else if (t.cs() == "xymatrix") {
1146                         array.push_back(createMathInset(t.cs()));
1147                         skipBegin();
1148                         parse_lines2(array.back(), true);
1149                 }
1150
1151 #if 0
1152                 // Disabled
1153                 else if (1 && t.cs() == "ar") {
1154                         MathXYArrowInset * p = new MathXYArrowInset;
1155
1156                         // try to read target
1157                         char c = getChar();
1158                         if (c == '[') {
1159                                 parse_into(p->cell(0), FLAG_BRACK_END);
1160                                 //lyxerr << "read target: " << p->cell(0) << "\n";
1161                         } else {
1162                                 putback();
1163                         }
1164
1165                         // try to read label
1166                         if (nextToken().cat() == catSuper || nextToken().cat() == catSub) {
1167                                 p->up_ = nextToken().cat() == catSuper;
1168                                 getToken();
1169                                 parse_into(p->cell(1), FLAG_ITEM);
1170                                 //lyxerr << "read label: " << p->cell(1) << "\n";
1171                         }
1172
1173                         array.push_back(MathAtom(p));
1174                         //lyxerr << "read array: " << array << "\n";
1175                 }
1176 #endif
1177
1178 #if 0
1179                 else if (t.cs() == "mbox" || t.cs() == "text") {
1180                         //array.push_back(createMathInset(t.cs()));
1181                         array.push_back(MathAtom(new MathBoxInset(t.cs())));
1182                         // slurp in the argument of mbox
1183         
1184                         MathBoxInset * p = array.back()->asBoxInset();
1185                         //lyx::assert(p);
1186                 }
1187 #endif
1188
1189         
1190                 else if (t.cs().size()) {
1191                         latexkeys const * l = in_word_set(t.cs());
1192                         if (l) {
1193                                 if (l->token == LM_TK_FONT) {
1194                                         //lyxerr << "starting font\n";
1195                                         //CatCode catSpaceSave = theCatcode[' '];
1196                                         //if (l->id == LM_TC_TEXTRM) {
1197                                         //      // temporarily change catcode   
1198                                         //      theCatcode[' '] = catLetter;    
1199                                         //}
1200
1201                                         MathArray ar;
1202                                         parse_into(ar, FLAG_ITEM, static_cast<MathTextCodes>(l->id));
1203                                         array.push_back(ar);
1204
1205                                         // undo catcode changes
1206                                         ////theCatcode[' '] = catSpaceSave;
1207                                         //lyxerr << "ending font\n";
1208                                 }
1209
1210                                 else if (l->token == LM_TK_OLDFONT) {
1211                                         code = static_cast<MathTextCodes>(l->id);
1212                                 }
1213
1214                                 else if (l->token == LM_TK_BOX) {
1215                                         MathAtom p = createMathInset(t.cs());
1216                                         parse_into(p->cell(0), FLAG_ITEM | FLAG_BOX, LM_TC_BOX);
1217                                         array.push_back(p);
1218                                 }
1219
1220                                 else if (l->token == LM_TK_STY) {
1221                                         MathAtom p = createMathInset(t.cs());
1222                                         parse_into(p->cell(0), flags, code);
1223                                         array.push_back(p);
1224                                         return;
1225                                 }
1226
1227                                 else {
1228                                         MathAtom p = createMathInset(t.cs());
1229                                         for (MathInset::idx_type i = 0; i < p->nargs(); ++i) 
1230                                                 parse_into(p->cell(i), FLAG_ITEM);
1231                                         array.push_back(p);
1232                                 }
1233                         }
1234
1235                         else {
1236                                 MathAtom p = createMathInset(t.cs());
1237                                 for (MathInset::idx_type i = 0; i < p->nargs(); ++i)
1238                                         parse_into(p->cell(i), FLAG_ITEM);
1239                                 array.push_back(p);
1240                         }
1241                 }
1242
1243
1244                 if (flags & FLAG_LEAVE) {
1245                         flags &= ~FLAG_LEAVE;
1246                         break;
1247                 }
1248         }
1249
1250         if (panic) {
1251                 lyxerr << " Math Panic, expect problems!\n";
1252                 //   Search for the end command. 
1253                 Token t;
1254                 do {
1255                         t = getToken();
1256                 } while (good() && t.cs() != "end");
1257         }
1258 }
1259
1260
1261
1262 } // anonymous namespace
1263
1264
1265 void mathed_parse_cell(MathArray & ar, string const & str)
1266 {
1267         istringstream is(str.c_str());
1268         mathed_parse_cell(ar, is);
1269 }
1270
1271
1272 void mathed_parse_cell(MathArray & ar, istream & is)
1273 {
1274         Parser(is).parse_into(ar, 0);
1275 }
1276
1277
1278
1279 bool mathed_parse_macro(string & name, string const & str)
1280 {
1281         istringstream is(str.c_str());
1282         Parser parser(is);
1283         return parser.parse_macro(name);
1284 }
1285
1286 bool mathed_parse_macro(string & name, istream & is)
1287 {
1288         Parser parser(is);
1289         return parser.parse_macro(name);
1290 }
1291
1292 bool mathed_parse_macro(string & name, LyXLex & lex)
1293 {
1294         Parser parser(lex);
1295         return parser.parse_macro(name);
1296 }
1297
1298
1299
1300 bool mathed_parse_normal(MathAtom & t, string const & str)
1301 {
1302         istringstream is(str.c_str());
1303         Parser parser(is);
1304         return parser.parse_normal(t);
1305 }
1306
1307 bool mathed_parse_normal(MathAtom & t, istream & is)
1308 {
1309         Parser parser(is);
1310         return parser.parse_normal(t);
1311 }
1312
1313 bool mathed_parse_normal(MathAtom & t, LyXLex & lex)
1314 {
1315         Parser parser(lex);
1316         return parser.parse_normal(t);
1317 }