]> git.lyx.org Git - lyx.git/blob - src/mathed/math_parser.C
inactive new stuff to re-sync my tree before going on holyday
[lyx.git] / src / mathed / math_parser.C
1 /*
2  *  File:        math_parser.C
3  *  Purpose:     Parser for mathed
4  *  Author:      Alejandro Aguilar Sierra <asierra@servidor.unam.mx> 
5  *  Created:     January 1996
6  *  Description: Parse LaTeX2e math mode code.
7  *
8  *  Dependencies: Xlib, XForms
9  *
10  *  Copyright: 1996, Alejandro Aguilar Sierra
11  *
12  *   Version: 0.8beta.
13  *
14  *   You are free to use and modify this code under the terms of
15  *   the GNU General Public Licence version 2 or later.
16  */
17
18 /* 
19
20 If someone desperately needs partial "structures" (such as a few cells of
21 an array inset or similar) (s)he could uses the following hack as starting
22 point to write some macros:
23
24   \newif\ifcomment
25   \commentfalse
26   \ifcomment
27           \def\makeamptab{\catcode`\&=4\relax}
28           \def\makeampletter{\catcode`\&=11\relax}
29     \def\b{\makeampletter\expandafter\makeamptab\bi}
30     \long\def\bi#1\e{}
31   \else
32     \def\b{}\def\e{}
33   \fi
34
35   ...
36
37   \[\begin{array}{ccc}
38    1 & 2\b & 3^2\\
39    4 & 5\e & 6\\
40    7 & 8 & 9
41   \end{array}\]
42
43 */
44
45
46 #include <config.h>
47
48 #ifdef __GNUG__
49 #pragma implementation
50 #endif
51
52 #include "math_parser.h"
53 #include "math_inset.h"
54 #include "math_arrayinset.h"
55 #include "math_amsarrayinset.h"
56 #include "math_braceinset.h"
57 #include "math_casesinset.h"
58 #include "math_charinset.h"
59 #include "math_deliminset.h"
60 #include "math_factory.h"
61 #include "math_funcinset.h"
62 #include "math_kerninset.h"
63 #include "math_macro.h"
64 #include "math_macrotable.h"
65 #include "math_macrotemplate.h"
66 #include "math_hullinset.h"
67 #include "math_rootinset.h"
68 #include "math_sizeinset.h"
69 #include "math_sqrtinset.h"
70 #include "math_scriptinset.h"
71 #include "math_specialcharinset.h"
72 #include "math_splitinset.h"
73 #include "math_sqrtinset.h"
74 #include "math_support.h"
75 #include "math_xyarrowinset.h"
76
77 #include "lyxlex.h"
78 #include "debug.h"
79
80 #include "support/lstrings.h"
81
82 #include <cctype>
83 #include <stack>
84 #include <algorithm>
85
86 using std::istream;
87 using std::ostream;
88 using std::ios;
89 using std::endl;
90 using std::stack;
91 using std::fill;
92
93
94 namespace {
95
96 bool stared(string const & s)
97 {
98         string::size_type const n = s.size();
99         return n && s[n - 1] == '*';
100 }
101
102
103 void add(MathArray & ar, char c, MathTextCodes code)
104 {
105         ar.push_back(MathAtom(new MathCharInset(c, code)));
106 }
107
108
109 // These are TeX's catcodes
110 enum CatCode {
111         catEscape,     // 0    backslash 
112         catBegin,      // 1    {
113         catEnd,        // 2    }
114         catMath,       // 3    $
115         catAlign,      // 4    &
116         catNewline,    // 5    ^^M
117         catParameter,  // 6    #
118         catSuper,      // 7    ^
119         catSub,        // 8    _
120         catIgnore,     // 9       
121         catSpace,      // 10   space
122         catLetter,     // 11   a-zA-Z
123         catOther,      // 12   none of the above
124         catActive,     // 13   ~
125         catComment,    // 14   %
126         catInvalid     // 15   <delete>
127 };
128
129 CatCode theCatcode[256];  
130
131
132 inline CatCode catcode(unsigned char c)
133 {
134         return theCatcode[c];
135 }
136
137
138 enum {
139         FLAG_BRACE_LAST = 1 << 1,  //  last closing brace ends the parsing process
140         FLAG_RIGHT      = 1 << 2,  //  next \\right ends the parsing process
141         FLAG_END        = 1 << 3,  //  next \\end ends the parsing process
142         FLAG_BRACK_END  = 1 << 4,  //  next closing bracket ends the parsing process
143         FLAG_BOX        = 1 << 5,  //  we are in a box
144         FLAG_ITEM       = 1 << 6,  //  read a (possibly braced token)
145         FLAG_BLOCK      = 1 << 7,  //  next block ends the parsing process
146         FLAG_BLOCK2     = 1 << 8,  //  next block2 ends the parsing process
147         FLAG_LEAVE      = 1 << 9   //  leave the loop at the end
148 };
149
150
151 void catInit()
152 {
153         fill(theCatcode, theCatcode + 256, catOther);
154         fill(theCatcode + 'a', theCatcode + 'z' + 1, catLetter);
155         fill(theCatcode + 'A', theCatcode + 'Z' + 1, catLetter);
156
157         theCatcode['\\'] = catEscape;   
158         theCatcode['{']  = catBegin;    
159         theCatcode['}']  = catEnd;      
160         theCatcode['$']  = catMath;     
161         theCatcode['&']  = catAlign;    
162         theCatcode['\n'] = catNewline;  
163         theCatcode['#']  = catParameter;        
164         theCatcode['^']  = catSuper;    
165         theCatcode['_']  = catSub;      
166         theCatcode['\7f'] = catIgnore;    
167         theCatcode[' ']  = catSpace;    
168         theCatcode['\t'] = catSpace;    
169         theCatcode['\r'] = catSpace;    
170         theCatcode['~']  = catActive;   
171         theCatcode['%']  = catComment;  
172 }
173
174
175
176 //
177 // Helper class for parsing
178 //
179
180 class Token {
181 public:
182         ///
183         Token() : cs_(), char_(0), cat_(catIgnore) {}
184         ///
185         Token(char c, CatCode cat) : cs_(), char_(c), cat_(cat) {}
186         ///
187         Token(string const & cs) : cs_(cs), char_(0), cat_(catIgnore) {}
188
189         ///
190         string const & cs() const { return cs_; }
191         ///
192         CatCode cat() const { return cat_; }
193         ///
194         char character() const { return char_; }
195         ///
196         string asString() const;
197         ///
198         bool isCR() const;
199
200 private:        
201         ///
202         string cs_;
203         ///
204         char char_;
205         ///
206         CatCode cat_;
207 };
208
209 bool Token::isCR() const
210 {
211         return cs_ == "\\" || cs_ == "cr" || cs_ == "crcr";
212 }
213
214 string Token::asString() const
215 {
216         return cs_.size() ? cs_ : string(1, char_);
217 }
218
219 // Angus' compiler says these are not needed
220 //bool operator==(Token const & s, Token const & t)
221 //{
222 //      return s.character() == t.character()
223 //              && s.cat() == t.cat() && s.cs() == t.cs(); 
224 //}
225 //
226 //bool operator!=(Token const & s, Token const & t)
227 //{
228 //      return !(s == t);
229 //}
230
231 ostream & operator<<(ostream & os, Token const & t)
232 {
233         if (t.cs().size())
234                 os << "\\" << t.cs();
235         else
236                 os << "[" << t.character() << "," << t.cat() << "]";
237         return os;
238 }
239
240
241 class Parser {
242
243 public:
244         ///
245         Parser(LyXLex & lex);
246         ///
247         Parser(istream & is);
248
249         ///
250         bool parse_macro(string & name);
251         ///
252         bool parse_normal(MathAtom &);
253         ///
254         void parse_into(MathArray & array, unsigned flags, MathTextCodes = LM_TC_MIN);
255         ///
256         int lineno() const { return lineno_; }
257         ///
258         void putback();
259
260 private:
261         ///
262         void parse_into1(MathArray & array, unsigned flags, MathTextCodes);
263         ///
264         string getArg(char lf, char rf);
265         ///
266         char getChar();
267         ///
268         void error(string const & msg);
269         ///
270         bool parse_lines(MathAtom & t, bool numbered, bool outmost);
271         /// parses {... & ... \\ ... & ... }
272         bool parse_lines2(MathAtom & t, bool braced);
273         /// dump contents to screen
274         void dump() const;
275
276 private:
277         ///
278         void tokenize(istream & is);
279         ///
280         void tokenize(string const & s);
281         ///
282         void skipSpaceTokens(istream & is, char c);
283         ///
284         void push_back(Token const & t);
285         ///
286         void pop_back();
287         ///
288         Token const & prevToken() const;
289         ///
290         Token const & nextToken() const;
291         ///
292         Token const & getToken();
293         /// skips spaces if any
294         void skipSpaces();
295         /// skips opening brace
296         void skipBegin();
297         /// skips closing brace
298         void skipEnd();
299         /// counts a sequence of hlines
300         int readHLines();
301         ///
302         void lex(string const & s);
303         ///
304         bool good() const;
305
306         ///
307         int lineno_;
308         ///
309         std::vector<Token> tokens_;
310         ///
311         unsigned pos_;
312         ///
313         bool   curr_num_;
314         ///
315         string curr_label_;
316         ///
317         string curr_skip_;
318 };
319
320
321 Parser::Parser(LyXLex & lexer)
322         : lineno_(lexer.getLineNo()), pos_(0), curr_num_(false)
323 {
324         tokenize(lexer.getStream());
325         lexer.eatLine();
326 }
327
328
329 Parser::Parser(istream & is)
330         : lineno_(0), pos_(0), curr_num_(false)
331 {
332         tokenize(is);
333 }
334
335
336 void Parser::push_back(Token const & t)
337 {
338         tokens_.push_back(t);
339 }
340
341
342 void Parser::pop_back()
343 {
344         tokens_.pop_back();
345 }
346
347
348 Token const & Parser::prevToken() const
349 {
350         static const Token dummy;
351         return pos_ > 0 ? tokens_[pos_ - 1] : dummy;
352 }
353
354
355 Token const & Parser::nextToken() const
356 {
357         static const Token dummy;
358         return good() ? tokens_[pos_] : dummy;
359 }
360
361
362 Token const & Parser::getToken()
363 {
364         static const Token dummy;
365         //lyxerr << "looking at token " << tokens_[pos_] << " pos: " << pos_ << '\n';
366         return good() ? tokens_[pos_++] : dummy;
367 }
368
369
370 void Parser::skipSpaces()
371 {
372         while (nextToken().cat() == catSpace)
373                 getToken();
374 }
375
376
377 void Parser::skipBegin()
378 {
379         if (nextToken().cat() == catBegin)
380                 getToken();
381         else
382                 lyxerr << "'{' expected\n";
383 }
384
385
386 void Parser::skipEnd()
387 {
388         if (nextToken().cat() == catEnd)
389                 getToken();
390         else
391                 lyxerr << "'}' expected\n";
392 }
393
394
395 int Parser::readHLines()
396 {
397         int num = 0;
398         skipSpaces();
399         while (nextToken().cs() == "hline") {
400                 getToken();
401                 ++num;
402                 skipSpaces();
403         }
404         return num;
405 }
406
407
408 void Parser::putback()
409 {
410         --pos_;
411 }
412
413
414 bool Parser::good() const
415 {
416         return pos_ < tokens_.size();
417 }
418
419
420 char Parser::getChar()
421 {
422         if (!good())
423                 lyxerr << "The input stream is not well..." << endl;
424         return tokens_[pos_++].character();
425 }
426
427
428 string Parser::getArg(char lf, char rg)
429 {
430         skipSpaces();
431
432         string result;
433         char c = getChar();
434
435         if (c != lf)  
436                 putback();
437         else 
438                 while ((c = getChar()) != rg && good())
439                         result += c;
440
441         return result;
442 }
443
444
445 void Parser::tokenize(istream & is)
446 {
447         // eat everything up to the next \end_inset or end of stream
448         // and store it in s for further tokenization
449         string s;
450         char c;
451         while (is.get(c)) {
452                 s += c;
453                 if (s.size() >= 10 && s.substr(s.size() - 10) == "\\end_inset") {
454                         s = s.substr(0, s.size() - 10);
455                         break;
456                 }
457         }
458
459         // tokenize buffer
460         tokenize(s);
461 }
462
463
464 void Parser::skipSpaceTokens(istream & is, char c)
465 {
466         // skip trailing spaces
467         while (catcode(c) == catSpace || catcode(c) == catNewline)
468                 if (!is.get(c))
469                         break;
470         //lyxerr << "putting back: " << c << "\n";
471         is.putback(c);
472 }
473
474
475 void Parser::tokenize(string const & buffer)
476 {
477         static bool init_done = false;
478         
479         if (!init_done) {
480                 catInit();
481                 init_done = true;
482         }
483
484         istringstream is(buffer.c_str(), ios::in | ios::binary);
485
486         char c;
487         while (is.get(c)) {
488                 //lyxerr << "reading c: " << c << "\n";
489
490                 switch (catcode(c)) {
491                         case catNewline: {
492                                 ++lineno_; 
493                                 is.get(c);
494                                 if (catcode(c) == catNewline)
495                                         ; //push_back(Token("par"));
496                                 else {
497                                         push_back(Token(' ', catSpace));
498                                         is.putback(c);  
499                                 }
500                                 break;
501                         }
502
503                         case catComment: {
504                                 while (is.get(c) && catcode(c) != catNewline)
505                                         ;
506                                 ++lineno_; 
507                                 break;
508                         }
509
510                         case catEscape: {
511                                 is.get(c);
512                                 if (!is) {
513                                         error("unexpected end of input");
514                                 } else {
515                                         string s(1, c);
516                                         if (catcode(c) == catLetter) {
517                                                 // collect letters
518                                                 while (is.get(c) && catcode(c) == catLetter)
519                                                         s += c;
520                                                 skipSpaceTokens(is, c);
521                                         }       
522                                         push_back(Token(s));
523                                 }
524                                 break;
525                         }
526
527                         case catSuper:
528                         case catSub: {
529                                 push_back(Token(c, catcode(c)));
530                                 is.get(c);
531                                 skipSpaceTokens(is, c);
532                                 break;
533                         }
534
535                         case catIgnore: {
536                                 lyxerr << "ignoring a char: " << int(c) << "\n";
537                                 break;
538                         }
539
540                         default:
541                                 push_back(Token(c, catcode(c)));
542                 }
543         }
544
545         //dump();
546 }
547
548
549 void Parser::dump() const
550 {
551         lyxerr << "\nTokens: ";
552         for (unsigned i = 0; i < tokens_.size(); ++i)
553                 lyxerr << tokens_[i];
554         lyxerr << "\n";
555 }
556
557
558 void Parser::error(string const & msg) 
559 {
560         lyxerr << "Line ~" << lineno_ << ": Math parse error: " << msg << endl;
561         dump();
562         //exit(1);
563 }
564
565
566
567 bool Parser::parse_lines(MathAtom & t, bool numbered, bool outmost)
568 {       
569         MathGridInset * p = t->asGridInset();
570         if (!p) {
571                 dump();
572                 lyxerr << "error in Parser::parse_lines() 1\n";
573                 return false;
574         }
575
576         // save global variables
577         bool   const saved_num   = curr_num_;
578         string const saved_label = curr_label_;
579
580         // read initial hlines
581         p->rowinfo(0).lines_ = readHLines();
582
583         for (int row = 0; true; ++row) {
584                 // reset global variables
585                 curr_num_   = numbered;
586                 curr_label_.erase();
587
588                 // reading a row
589                 for (MathInset::col_type col = 0; col < p->ncols(); ++col) {
590                         //lyxerr << "reading cell " << row << " " << col << "\n";
591                         //lyxerr << "ncols: " << p->ncols() << "\n";
592                 
593                         MathArray & ar = p->cell(col + row * p->ncols());
594                         parse_into(ar, FLAG_BLOCK);
595                         // remove 'unnecessary' braces:
596                         if (ar.size() == 1 && ar.back()->asBraceInset())
597                                 ar = ar.back()->asBraceInset()->cell(0);
598                         //lyxerr << "ar: " << ar << "\n";
599
600                         // break if cell is not followed by an ampersand
601                         if (nextToken().cat() != catAlign) {
602                                 //lyxerr << "less cells read than normal in row/col: "
603                                 //      << row << " " << col << "\n";
604                                 break;
605                         }
606                         
607                         // skip the ampersand
608                         getToken();
609                 }
610
611                 if (outmost) {
612                         MathHullInset * m = t->asHullInset();
613                         if (!m) {
614                                 lyxerr << "error in Parser::parse_lines() 2\n";
615                                 return false;
616                         }
617                         m->numbered(row, curr_num_);
618                         m->label(row, curr_label_);
619                         if (curr_skip_.size()) {
620                                 m->vcrskip(LyXLength(curr_skip_), row);
621                                 curr_skip_.erase();
622                         }
623                 }
624
625                 // is a \\ coming?
626                 if (nextToken().isCR()) {
627                         // skip the cr-token
628                         getToken();
629
630                         // try to read a length
631                         //get
632
633                         // read hlines for next row
634                         p->rowinfo(row + 1).lines_ = readHLines();
635                 }
636
637                 // we are finished if the next token is an 'end'
638                 if (nextToken().cs() == "end") {
639                         // skip the end-token
640                         getToken();
641                         getArg('{','}');
642
643                         // leave the 'read a line'-loop
644                         break;
645                 }
646
647                 // otherwise, we have to start a new row
648                 p->appendRow();
649         }
650
651         // restore "global" variables
652         curr_num_   = saved_num;
653         curr_label_ = saved_label;
654
655         return true;
656 }
657
658
659 bool Parser::parse_lines2(MathAtom & t, bool braced)
660 {       
661         MathGridInset * p = t->asGridInset();
662         if (!p) {
663                 lyxerr << "error in Parser::parse_lines() 1\n";
664                 return false;
665         }
666
667         for (int row = 0; true; ++row) {
668                 // reading a row
669                 for (MathInset::col_type col = 0; true; ++col) {
670                         //lyxerr << "reading cell " << row << " " << col << " " << p->ncols() << "\n";
671                 
672                         if (col >= p->ncols()) {
673                                 //lyxerr << "adding col " << col << "\n";
674                                 p->addCol(p->ncols());
675                         }
676
677                         parse_into(p->cell(col + row * p->ncols()), FLAG_BLOCK2);
678                         //lyxerr << "read cell: " << p->cell(col + row * p->ncols()) << "\n";
679
680                         // break if cell is not followed by an ampersand
681                         if (nextToken().cat() != catAlign) {
682                                 //lyxerr << "less cells read than normal in row/col: " << row << " " << col << "\n";
683                                 break;
684                         }
685                         
686                         // skip the ampersand
687                         getToken();
688                 }
689
690                 // is a \\ coming?
691                 if (nextToken().isCR()) {
692                         // skip the cr-token
693                         getToken();
694                 }
695
696                 // we are finished if the next token is the one we expected
697                 // skip the end-token
698                 // leave the 'read a line'-loop
699                 if (braced) {
700                         if (nextToken().cat() == catEnd) {
701                                 getToken();
702                                 break;
703                         }
704                 } else {
705                         if (nextToken().cs() == "end") {
706                                 getToken();
707                                 getArg('{','}');
708                                 break;
709                         }
710                 }
711
712                 // otherwise, we have to start a new row
713                 p->appendRow();
714         }
715
716         return true;
717 }
718
719
720
721 bool Parser::parse_macro(string & name)
722 {
723         name = "{error}";
724         skipSpaces();
725
726         if (getToken().cs() != "newcommand") {
727                 lyxerr << "\\newcommand expected\n";
728                 return false;
729         }
730
731         if (getToken().cat() != catBegin) {
732                 lyxerr << "'{' in \\newcommand expected (1)\n";
733                 return false;
734         }
735
736         name = getToken().cs();
737
738         if (getToken().cat() != catEnd) {
739                 lyxerr << "'}' expected\n";
740                 return false;
741         }
742
743         string    arg  = getArg('[', ']');
744         int       narg = arg.empty() ? 0 : atoi(arg.c_str()); 
745
746         if (getToken().cat() != catBegin) {
747                 lyxerr << "'{' in \\newcommand expected (2)\n";
748                 return false;
749         }
750
751         MathArray ar;
752         parse_into(ar, FLAG_BRACE_LAST);
753
754         // we cannot handle recursive stuff at all
755         MathArray test;
756         test.push_back(createMathInset(name));
757         if (ar.contains(test)) {
758                 lyxerr << "we cannot handle recursive macros at all.\n";
759                 return false;
760         }
761
762         MathMacroTable::create(name, narg, ar);
763         return true;
764 }
765
766
767 bool Parser::parse_normal(MathAtom & matrix)
768 {
769         skipSpaces();
770         Token const & t = getToken();
771
772         if (t.cs() == "(") {
773                 matrix = MathAtom(new MathHullInset(LM_OT_SIMPLE));
774                 parse_into(matrix->cell(0), 0);
775                 return true;
776         }
777
778         if (t.cat() == catMath) {
779                 Token const & n = getToken();
780                 if (n.cat() == catMath) {
781                         // TeX's $$...$$ syntax for displayed math
782                         matrix = MathAtom(new MathHullInset(LM_OT_EQUATION));
783                         MathHullInset * p = matrix->asHullInset();
784                         parse_into(p->cell(0), 0);
785                         p->numbered(0, curr_num_);
786                         p->label(0, curr_label_);
787                 } else {
788                         // simple $...$  stuff
789                         putback();
790                         matrix = MathAtom(new MathHullInset(LM_OT_SIMPLE));
791                         parse_into(matrix->cell(0), 0);
792                 }
793                 return true;
794         }
795
796         if (!t.cs().size()) {
797                 lyxerr << "start of math expected, got '" << t << "'\n";
798                 return false;
799         }
800
801         string const & cs = t.cs();
802
803         if (cs == "[") {
804                 curr_num_ = 0;
805                 curr_label_.erase();
806                 matrix = MathAtom(new MathHullInset(LM_OT_EQUATION));
807                 MathHullInset * p = matrix->asHullInset();
808                 parse_into(p->cell(0), 0);
809                 p->numbered(0, curr_num_);
810                 p->label(0, curr_label_);
811                 return true;
812         }
813
814         if (cs != "begin") {
815                 lyxerr << "'begin' of un-simple math expected, got '" << cs << "'\n";
816                 return false;
817         }
818
819         string const name = getArg('{', '}');
820
821         if (name == "math") {
822                 matrix = MathAtom(new MathHullInset(LM_OT_SIMPLE));
823                 parse_into(matrix->cell(0), 0);
824                 return true;
825         }
826
827         if (name == "equation" || name == "equation*" || name == "displaymath") {
828                 curr_num_ = (name == "equation");
829                 curr_label_.erase();
830                 matrix = MathAtom(new MathHullInset(LM_OT_EQUATION));
831                 MathHullInset * p = matrix->asHullInset();
832                 parse_into(p->cell(0), FLAG_END);
833                 p->numbered(0, curr_num_);
834                 p->label(0, curr_label_);
835                 return true;
836         }
837
838         if (name == "eqnarray" || name == "eqnarray*") {
839                 matrix = MathAtom(new MathHullInset(LM_OT_EQNARRAY));
840                 return parse_lines(matrix, !stared(name), true);
841         }
842
843         if (name == "align" || name == "align*") {
844                 matrix = MathAtom(new MathHullInset(LM_OT_ALIGN));
845                 return parse_lines(matrix, !stared(name), true);
846         }
847
848         if (name == "alignat" || name == "alignat*") {
849                 int nc = 2 * atoi(getArg('{', '}').c_str());
850                 matrix = MathAtom(new MathHullInset(LM_OT_ALIGNAT, nc));
851                 return parse_lines(matrix, !stared(name), true);
852         }
853
854         if (name == "xalignat" || name == "xalignat*") {
855                 int nc = 2 * atoi(getArg('{', '}').c_str());
856                 matrix = MathAtom(new MathHullInset(LM_OT_XALIGNAT, nc));
857                 return parse_lines(matrix, !stared(name), true);
858         }
859
860         if (name == "xxalignat") {
861                 int nc = 2 * atoi(getArg('{', '}').c_str());
862                 matrix = MathAtom(new MathHullInset(LM_OT_XXALIGNAT, nc));
863                 return parse_lines(matrix, !stared(name), true);
864         }
865
866         if (name == "multline" || name == "multline*") {
867                 matrix = MathAtom(new MathHullInset(LM_OT_MULTLINE));
868                 return parse_lines(matrix, !stared(name), true);
869         }
870
871         if (name == "gather" || name == "gather*") {
872                 matrix = MathAtom(new MathHullInset(LM_OT_GATHER));
873                 return parse_lines(matrix, !stared(name), true);
874         }
875
876         lyxerr[Debug::MATHED] << "1: unknown math environment: " << name << "\n";
877         lyxerr << "1: unknown math environment: " << name << "\n";
878         return false;
879 }
880
881
882 void Parser::parse_into(MathArray & array, unsigned flags, MathTextCodes code)
883 {
884         parse_into1(array, flags, code);
885         // remove 'unnecessary' braces:
886         if (array.size() == 1 && array.back()->asBraceInset()) {
887                 lyxerr << "extra braces removed\n";
888                 array = array.back()->asBraceInset()->cell(0);
889         }
890 }
891
892
893 void Parser::parse_into1(MathArray & array, unsigned flags, MathTextCodes code)
894 {
895         bool panic  = false;
896         int  limits = 0;
897
898         while (good()) {
899                 Token const & t = getToken();
900         
901                 //lyxerr << "t: " << t << " flags: " << flags << "\n";
902                 //array.dump(lyxerr);
903                 //lyxerr << "\n";
904
905                 if (flags & FLAG_ITEM) {
906                         flags &= ~FLAG_ITEM;
907                         if (t.cat() == catBegin) { 
908                                 // skip the brace and collect everything to the next matching
909                                 // closing brace
910                                 flags |= FLAG_BRACE_LAST;
911                                 continue;
912                         } else {
913                                 // handle only this single token, leave the loop if done
914                                 flags |= FLAG_LEAVE;
915                         }
916                 }
917
918                 if (flags & FLAG_BLOCK) {
919                         if (t.cat() == catAlign || t.isCR() || t.cs() == "end") {
920                                 putback();
921                                 return;
922                         }
923                 }
924
925                 if (flags & FLAG_BLOCK2) {
926                         if (t.cat() == catAlign || t.isCR() || t.cs() == "end"
927                                         || t.cat() == catEnd) {
928                                 putback();
929                                 return;
930                         }
931                 }
932
933                 //
934                 // cat codes
935                 //
936                 if (t.cat() == catMath) {
937                         if (flags & FLAG_BOX) {
938                                 // we are inside an mbox, so opening new math is allowed
939                                 array.push_back(MathAtom(new MathHullInset(LM_OT_SIMPLE)));
940                                 parse_into(array.back()->cell(0), 0);
941                         } else {
942                                 // otherwise this is the end of the formula
943                                 break;
944                         }
945                 }
946
947                 else if (t.cat() == catLetter)
948                         add(array, t.character(), code);
949
950                 else if (t.cat() == catSpace && code == LM_TC_TEXTRM)
951                         add(array, t.character(), code);
952
953                 else if (t.cat() == catParameter) {
954                         Token const & n = getToken();
955                         array.push_back(MathAtom(new MathMacroArgument(n.character()-'0', code)));
956                 }
957
958                 else if (t.cat() == catBegin) {
959                         MathArray ar;
960                         parse_into(ar, FLAG_BRACE_LAST);
961 #ifndef WITH_WARNINGS
962 #warning this might be wrong in general!
963 #endif
964                         // ignore braces around simple items
965                         if ((ar.size() == 1 && !ar.front()->needsBraces()
966        || (ar.size() == 2 && !ar.front()->needsBraces()
967                                             && ar.back()->asScriptInset()))
968        || (ar.size() == 0 && array.size() == 0))
969                         {
970                                 array.push_back(ar);
971                         } else {
972                                 array.push_back(MathAtom(new MathBraceInset));
973                                 array.back()->cell(0).swap(ar);
974                         }
975                 }
976
977                 else if (t.cat() == catEnd) {
978                         if (flags & FLAG_BRACE_LAST)
979                                 return;
980                         lyxerr << "found '}' unexpectedly, array: '" << array << "'\n";
981                         //lyxerr << "found '}' unexpectedly\n";
982                         add(array, '}', LM_TC_TEX);
983                 }
984                 
985                 else if (t.cat() == catAlign) {
986                         lyxerr << "found tab unexpectedly, array: '" << array << "'\n";
987                         //lyxerr << "found tab unexpectedly\n";
988                         add(array, '&', LM_TC_TEX);
989                 }
990                 
991                 else if (t.cat() == catSuper || t.cat() == catSub) {
992                         bool up = (t.cat() == catSuper);
993                         MathScriptInset * p = 0; 
994                         if (array.size()) 
995                                 p = array.back()->asScriptInset();
996                         if (!p || p->has(up)) {
997                                 array.push_back(MathAtom(new MathScriptInset(up)));
998                                 p = array.back()->asScriptInset();
999                         }
1000                         p->ensure(up);
1001                         parse_into(p->cell(up), FLAG_ITEM);
1002                         p->limits(limits);
1003                         limits = 0;
1004                 }
1005
1006                 else if (t.character() == ']' && (flags & FLAG_BRACK_END))
1007                         return;
1008
1009                 else if (t.cat() == catOther)
1010                         add(array, t.character(), code);
1011                 
1012                 //
1013                 // control sequences
1014                 //      
1015                 else if (t.cs() == "protect")
1016                         // ignore \\protect, will be re-added during output 
1017                         ;
1018
1019                 else if (t.cs() == "end")
1020                         break;
1021
1022                 else if (t.cs() == ")")
1023                         break;
1024
1025                 else if (t.cs() == "]")
1026                         break;
1027
1028                 else if (t.cs() == "\\") {
1029                         curr_skip_ = getArg('[', ']');
1030                         //lyxerr << "found newline unexpectedly, array: '" << array << "'\n";
1031                         lyxerr << "found newline unexpectedly\n";
1032                         array.push_back(createMathInset("\\"));
1033                 }
1034         
1035                 else if (t.cs() == "limits")
1036                         limits = 1;
1037                 
1038                 else if (t.cs() == "nolimits")
1039                         limits = -1;
1040                 
1041                 else if (t.cs() == "nonumber")
1042                         curr_num_ = false;
1043
1044                 else if (t.cs() == "number")
1045                         curr_num_ = true;
1046
1047                 else if (t.cs() == "sqrt") {
1048                         char c = getChar();
1049                         if (c == '[') {
1050                                 array.push_back(MathAtom(new MathRootInset));
1051                                 parse_into(array.back()->cell(0), FLAG_BRACK_END);
1052                                 parse_into(array.back()->cell(1), FLAG_ITEM);
1053                         } else {
1054                                 putback();
1055                                 array.push_back(MathAtom(new MathSqrtInset));
1056                                 parse_into(array.back()->cell(0), FLAG_ITEM);
1057                         }
1058                 }
1059                 
1060                 else if (t.cs() == "left") {
1061                         string l = getToken().asString();
1062                         MathArray ar;
1063                         parse_into(ar, FLAG_RIGHT);
1064                         string r = getToken().asString();
1065                         MathAtom dl(new MathDelimInset(l, r));
1066                         dl->cell(0) = ar;
1067                         array.push_back(dl);
1068                 }
1069                 
1070                 else if (t.cs() == "right") {
1071                         if (!(flags & FLAG_RIGHT)) {
1072                                 //lyxerr << "got so far: '" << array << "'\n";
1073                                 error("Unmatched right delimiter");
1074                         }
1075                         return;
1076                 }
1077
1078                 else if (t.cs() == "begin") {
1079                         string const name = getArg('{', '}');   
1080                         if (name == "array") {
1081                                 string const valign = getArg('[', ']') + 'c';
1082                                 string const halign = getArg('{', '}');
1083                                 array.push_back(MathAtom(new MathArrayInset(valign[0], halign)));
1084                                 parse_lines(array.back(), false, false);
1085                         } else if (name == "split") {
1086                                 array.push_back(MathAtom(new MathSplitInset(1)));
1087                                 parse_lines(array.back(), false, false);
1088                         } else if (name == "cases") {
1089                                 array.push_back(MathAtom(new MathCasesInset));
1090                                 parse_lines(array.back(), false, false);
1091                         } else if (name == "pmatrix" || name == "bmatrix") {
1092                                 array.push_back(MathAtom(new MathAMSArrayInset(name)));
1093                                 parse_lines2(array.back(), false);
1094                         } else 
1095                                 lyxerr << "unknow math inset begin '" << name << "'\n"; 
1096                 }
1097         
1098                 else if (t.cs() == "kern") {
1099 #ifdef WITH_WARNINGS
1100 #warning A hack...
1101 #endif
1102                         string s;
1103                         while (1) {
1104                                 Token const & t = getToken();
1105                                 if (!good()) {
1106                                         putback();      
1107                                         break;
1108                                 }
1109                                 s += t.character();
1110                                 if (isValidLength(s))
1111                                         break;
1112                         }
1113                         array.push_back(MathAtom(new MathKernInset(s)));
1114                 }
1115
1116                 else if (t.cs() == "label") {
1117                         curr_label_ = getArg('{', '}');
1118                 }
1119
1120                 else if (t.cs() == "choose" || t.cs() == "over" || t.cs() == "atop") {
1121                         MathAtom p = createMathInset(t.cs());
1122                         array.swap(p->cell(0));
1123                         parse_into(p->cell(1), flags, code);
1124                         array.push_back(p);
1125                         return;
1126                 }
1127
1128                 else if (t.cs() == "xymatrix") {
1129                         array.push_back(createMathInset(t.cs()));
1130                         skipBegin();
1131                         parse_lines2(array.back(), true);
1132                 }
1133
1134 #if 0
1135                 // Disabled
1136                 else if (1 && t.cs() == "ar") {
1137                         MathXYArrowInset * p = new MathXYArrowInset;
1138
1139                         // try to read target
1140                         char c = getChar();
1141                         if (c == '[') {
1142                                 parse_into(p->cell(0), FLAG_BRACK_END);
1143                                 //lyxerr << "read target: " << p->cell(0) << "\n";
1144                         } else {
1145                                 putback();
1146                         }
1147
1148                         // try to read label
1149                         if (nextToken().cat() == catSuper || nextToken().cat() == catSub) {
1150                                 p->up_ = nextToken().cat() == catSuper;
1151                                 getToken();
1152                                 parse_into(p->cell(1), FLAG_ITEM);
1153                                 //lyxerr << "read label: " << p->cell(1) << "\n";
1154                         }
1155
1156                         array.push_back(MathAtom(p));
1157                         //lyxerr << "read array: " << array << "\n";
1158                 }
1159
1160                 else if (t.cs() == "mbox") {
1161                         array.push_back(createMathInset(t.cs()));
1162                         // slurp in the argument of mbox
1163         
1164                         MathBoxInset * p = array.back()->asBoxInset();
1165                         //lyx::assert(p);
1166                 }
1167 #endif
1168
1169         
1170                 else if (t.cs().size()) {
1171                         latexkeys const * l = in_word_set(t.cs());
1172                         if (l) {
1173                                 if (l->token == LM_TK_FONT) {
1174                                         //lyxerr << "starting font\n";
1175                                         //CatCode catSpaceSave = theCatcode[' '];
1176                                         //if (l->id == LM_TC_TEXTRM) {
1177                                         //      // temporarily change catcode   
1178                                         //      theCatcode[' '] = catLetter;    
1179                                         //}
1180
1181                                         MathArray ar;
1182                                         parse_into(ar, FLAG_ITEM, static_cast<MathTextCodes>(l->id));
1183                                         array.push_back(ar);
1184
1185                                         // undo catcode changes
1186                                         ////theCatcode[' '] = catSpaceSave;
1187                                         //lyxerr << "ending font\n";
1188                                 }
1189
1190                                 else if (l->token == LM_TK_OLDFONT) {
1191                                         code = static_cast<MathTextCodes>(l->id);
1192                                 }
1193
1194                                 else if (l->token == LM_TK_BOX) {
1195                                         MathAtom p = createMathInset(t.cs());
1196                                         parse_into(p->cell(0), FLAG_ITEM | FLAG_BOX, LM_TC_BOX);
1197                                         array.push_back(p);
1198                                 }
1199
1200                                 else if (l->token == LM_TK_STY) {
1201                                         MathAtom p = createMathInset(t.cs());
1202                                         parse_into(p->cell(0), flags, code);
1203                                         array.push_back(p);
1204                                         return;
1205                                 }
1206
1207                                 else {
1208                                         MathAtom p = createMathInset(t.cs());
1209                                         for (MathInset::idx_type i = 0; i < p->nargs(); ++i) 
1210                                                 parse_into(p->cell(i), FLAG_ITEM);
1211                                         array.push_back(p);
1212                                 }
1213                         }
1214
1215                         else {
1216                                 MathAtom p = createMathInset(t.cs());
1217                                 for (MathInset::idx_type i = 0; i < p->nargs(); ++i)
1218                                         parse_into(p->cell(i), FLAG_ITEM);
1219                                 array.push_back(p);
1220                         }
1221                 }
1222
1223
1224                 if (flags & FLAG_LEAVE) {
1225                         flags &= ~FLAG_LEAVE;
1226                         break;
1227                 }
1228         }
1229
1230         if (panic) {
1231                 lyxerr << " Math Panic, expect problems!\n";
1232                 //   Search for the end command. 
1233                 Token t;
1234                 do {
1235                         t = getToken();
1236                 } while (good() && t.cs() != "end");
1237         }
1238 }
1239
1240
1241
1242 } // anonymous namespace
1243
1244
1245 void mathed_parse_cell(MathArray & ar, string const & str)
1246 {
1247         istringstream is(str.c_str());
1248         mathed_parse_cell(ar, is);
1249 }
1250
1251
1252 void mathed_parse_cell(MathArray & ar, istream & is)
1253 {
1254         Parser(is).parse_into(ar, 0);
1255 }
1256
1257
1258
1259 bool mathed_parse_macro(string & name, string const & str)
1260 {
1261         istringstream is(str.c_str());
1262         Parser parser(is);
1263         return parser.parse_macro(name);
1264 }
1265
1266 bool mathed_parse_macro(string & name, istream & is)
1267 {
1268         Parser parser(is);
1269         return parser.parse_macro(name);
1270 }
1271
1272 bool mathed_parse_macro(string & name, LyXLex & lex)
1273 {
1274         Parser parser(lex);
1275         return parser.parse_macro(name);
1276 }
1277
1278
1279
1280 bool mathed_parse_normal(MathAtom & t, string const & str)
1281 {
1282         istringstream is(str.c_str());
1283         Parser parser(is);
1284         return parser.parse_normal(t);
1285 }
1286
1287 bool mathed_parse_normal(MathAtom & t, istream & is)
1288 {
1289         Parser parser(is);
1290         return parser.parse_normal(t);
1291 }
1292
1293 bool mathed_parse_normal(MathAtom & t, LyXLex & lex)
1294 {
1295         Parser parser(lex);
1296         return parser.parse_normal(t);
1297 }