]> git.lyx.org Git - lyx.git/blob - src/mathed/MathParser.C
Fix bug 2789 (as discussed)
[lyx.git] / src / mathed / MathParser.C
1 /**
2  * \file MathParser.C
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author André Pönitz
7  *
8  * Full author contact details are available in file CREDITS.
9  */
10
11 /*
12
13 If someone desperately needs partial "structures" (such as a few
14 cells of an array inset or similar) (s)he could uses the
15 following hack as starting point to write some macros:
16
17   \newif\ifcomment
18   \commentfalse
19   \ifcomment
20           \def\makeamptab{\catcode`\&=4\relax}
21           \def\makeampletter{\catcode`\&=11\relax}
22     \def\b{\makeampletter\expandafter\makeamptab\bi}
23     \long\def\bi#1\e{}
24   \else
25     \def\b{}\def\e{}
26   \fi
27
28   ...
29
30   \[\begin{array}{ccc}
31 1
32 &
33
34   \end{array}\]
35
36 */
37
38
39 #include <config.h>
40
41 #include "MathParser.h"
42
43 #include "InsetMathArray.h"
44 #include "InsetMathBig.h"
45 #include "InsetMathBrace.h"
46 #include "InsetMathChar.h"
47 #include "InsetMathColor.h"
48 #include "InsetMathComment.h"
49 #include "InsetMathDelim.h"
50 #include "InsetMathEnv.h"
51 #include "InsetMathKern.h"
52 #include "InsetMathMacro.h"
53 #include "InsetMathPar.h"
54 #include "InsetMathRef.h"
55 #include "InsetMathRoot.h"
56 #include "InsetMathScript.h"
57 #include "InsetMathSqrt.h"
58 #include "InsetMathTabular.h"
59 #include "MathMacroTemplate.h"
60 #include "MathFactory.h"
61 #include "MathMacroArgument.h"
62 #include "MathSupport.h"
63
64 #include "lyxlex.h"
65 #include "debug.h"
66
67 #include "support/convert.h"
68
69 #include <sstream>
70
71 using std::endl;
72 using std::fill;
73
74 using std::string;
75 using std::ios;
76 using std::istream;
77 using std::istringstream;
78 using std::ostream;
79 using std::vector;
80
81
82 //#define FILEDEBUG
83
84
85 namespace {
86
87 InsetMath::mode_type asMode(InsetMath::mode_type oldmode, string const & str)
88 {
89         //lyxerr << "handling mode: '" << str << "'" << endl;
90         if (str == "mathmode")
91                 return InsetMath::MATH_MODE;
92         if (str == "textmode" || str == "forcetext")
93                 return InsetMath::TEXT_MODE;
94         return oldmode;
95 }
96
97
98 bool stared(string const & s)
99 {
100         string::size_type const n = s.size();
101         return n && s[n - 1] == '*';
102 }
103
104
105 /*!
106  * Add the row \p cellrow to \p grid.
107  * \returns wether the row could be added. Adding a row can fail for
108  * environments like "equation" that have a fixed number of rows.
109  */
110 bool addRow(InsetMathGrid & grid, InsetMathGrid::row_type & cellrow,
111             string const & vskip)
112 {
113         ++cellrow;
114         if (cellrow == grid.nrows()) {
115                 //lyxerr << "adding row " << cellrow << endl;
116                 grid.addRow(cellrow - 1);
117                 if (cellrow == grid.nrows()) {
118                         // We can't add a row to this grid, so let's
119                         // append the content of this cell to the previous
120                         // one.
121                         // This does not happen in well formed .lyx files,
122                         // but LyX versions 1.3.x and older could create
123                         // such files and tex2lyx can still do that.
124                         --cellrow;
125                         lyxerr << "ignoring extra row";
126                         if (!vskip.empty())
127                                 lyxerr << " with extra space " << vskip;
128                         lyxerr << '.' << endl;
129                         return false;
130                 }
131         }
132         grid.vcrskip(LyXLength(vskip), cellrow - 1);
133         return true;
134 }
135
136
137 /*!
138  * Add the column \p cellcol to \p grid.
139  * \returns wether the column could be added. Adding a column can fail for
140  * environments like "eqnarray" that have a fixed number of columns.
141  */
142 bool addCol(InsetMathGrid & grid, InsetMathGrid::col_type & cellcol)
143 {
144         ++cellcol;
145         if (cellcol == grid.ncols()) {
146                 //lyxerr << "adding column " << cellcol << endl;
147                 grid.addCol(cellcol - 1);
148                 if (cellcol == grid.ncols()) {
149                         // We can't add a column to this grid, so let's
150                         // append the content of this cell to the previous
151                         // one.
152                         // This does not happen in well formed .lyx files,
153                         // but LyX versions 1.3.x and older could create
154                         // such files and tex2lyx can still do that.
155                         --cellcol;
156                         lyxerr << "ignoring extra column." << endl;
157                         return false;
158                 }
159         }
160         return true;
161 }
162
163
164 /*!
165  * Check wether the last row is empty and remove it if yes.
166  * Otherwise the following code
167  * \verbatim
168 \begin{array}{|c|c|}
169 \hline
170 1 & 2 \\ \hline
171 3 & 4 \\ \hline
172 \end{array}
173  * \endverbatim
174  * will result in a grid with 3 rows (+ the dummy row that is always present),
175  * because the last '\\' opens a new row.
176  */
177 void delEmptyLastRow(InsetMathGrid & grid)
178 {
179         InsetMathGrid::row_type const row = grid.nrows() - 1;
180         for (InsetMathGrid::col_type col = 0; col < grid.ncols(); ++col) {
181                 if (!grid.cell(grid.index(row, col)).empty())
182                         return;
183         }
184         // Copy the row information of the empty row (which would contain the
185         // last hline in the example above) to the dummy row and delete the
186         // empty row.
187         grid.rowinfo(row + 1) = grid.rowinfo(row);
188         grid.delRow(row);
189 }
190
191
192 // These are TeX's catcodes
193 enum CatCode {
194         catEscape,     // 0    backslash
195         catBegin,      // 1    {
196         catEnd,        // 2    }
197         catMath,       // 3    $
198         catAlign,      // 4    &
199         catNewline,    // 5    ^^M
200         catParameter,  // 6    #
201         catSuper,      // 7    ^
202         catSub,        // 8    _
203         catIgnore,     // 9
204         catSpace,      // 10   space
205         catLetter,     // 11   a-zA-Z
206         catOther,      // 12   none of the above
207         catActive,     // 13   ~
208         catComment,    // 14   %
209         catInvalid     // 15   <delete>
210 };
211
212 CatCode theCatcode[256];
213
214
215 inline CatCode catcode(unsigned char c)
216 {
217         return theCatcode[c];
218 }
219
220
221 enum {
222         FLAG_ALIGN      = 1 << 0,  //  next & or \\ ends the parsing process
223         FLAG_BRACE_LAST = 1 << 1,  //  next closing brace ends the parsing
224         FLAG_RIGHT      = 1 << 2,  //  next \\right ends the parsing process
225         FLAG_END        = 1 << 3,  //  next \\end ends the parsing process
226         FLAG_BRACK_LAST = 1 << 4,  //  next closing bracket ends the parsing
227         FLAG_TEXTMODE   = 1 << 5,  //  we are in a box
228         FLAG_ITEM       = 1 << 6,  //  read a (possibly braced token)
229         FLAG_LEAVE      = 1 << 7,  //  leave the loop at the end
230         FLAG_SIMPLE     = 1 << 8,  //  next $ leaves the loop
231         FLAG_EQUATION   = 1 << 9,  //  next \] leaves the loop
232         FLAG_SIMPLE2    = 1 << 10, //  next \) leaves the loop
233         FLAG_OPTION     = 1 << 11, //  read [...] style option
234         FLAG_BRACED     = 1 << 12  //  read {...} style argument
235 };
236
237
238 //
239 // Helper class for parsing
240 //
241
242 class Token {
243 public:
244         ///
245         Token() : cs_(), char_(0), cat_(catIgnore) {}
246         ///
247         Token(char c, CatCode cat) : cs_(), char_(c), cat_(cat) {}
248         ///
249         Token(string const & cs) : cs_(cs), char_(0), cat_(catIgnore) {}
250
251         ///
252         string const & cs() const { return cs_; }
253         ///
254         CatCode cat() const { return cat_; }
255         ///
256         char character() const { return char_; }
257         ///
258         string asString() const { return cs_.size() ? cs_ : string(1, char_); }
259         ///
260         string asInput() const { return cs_.size() ? '\\' + cs_ : string(1, char_); }
261
262 private:
263         ///
264         string cs_;
265         ///
266         char char_;
267         ///
268         CatCode cat_;
269 };
270
271 ostream & operator<<(ostream & os, Token const & t)
272 {
273         if (t.cs().size())
274                 os << '\\' << t.cs();
275         else if (t.cat() == catLetter)
276                 os << t.character();
277         else
278                 os << '[' << t.character() << ',' << t.cat() << ']';
279         return os;
280 }
281
282
283 class Parser {
284 public:
285         ///
286         typedef  InsetMath::mode_type mode_type;
287
288         ///
289         Parser(LyXLex & lex);
290         ///
291         Parser(istream & is);
292
293         ///
294         bool parse(MathAtom & at);
295         ///
296         void parse(MathArray & array, unsigned flags, mode_type mode);
297         ///
298         void parse1(InsetMathGrid & grid, unsigned flags, mode_type mode,
299                 bool numbered);
300         ///
301         MathArray parse(unsigned flags, mode_type mode);
302         ///
303         int lineno() const { return lineno_; }
304         ///
305         void putback();
306
307 private:
308         ///
309         void parse2(MathAtom & at, unsigned flags, mode_type mode, bool numbered);
310         /// get arg delimited by 'left' and 'right'
311         string getArg(char left, char right);
312         ///
313         char getChar();
314         ///
315         void error(string const & msg);
316         /// dump contents to screen
317         void dump() const;
318         ///
319         void tokenize(istream & is);
320         ///
321         void tokenize(string const & s);
322         ///
323         void skipSpaceTokens(istream & is, char c);
324         ///
325         void push_back(Token const & t);
326         ///
327         void pop_back();
328         ///
329         Token const & prevToken() const;
330         ///
331         Token const & nextToken() const;
332         ///
333         Token const & getToken();
334         /// skips spaces if any
335         void skipSpaces();
336         ///
337         void lex(string const & s);
338         ///
339         bool good() const;
340         ///
341         string parse_verbatim_item();
342         ///
343         string parse_verbatim_option();
344
345         ///
346         int lineno_;
347         ///
348         vector<Token> tokens_;
349         ///
350         unsigned pos_;
351         /// Stack of active environments
352         vector<string> environments_;
353 };
354
355
356 Parser::Parser(LyXLex & lexer)
357         : lineno_(lexer.getLineNo()), pos_(0)
358 {
359         tokenize(lexer.getStream());
360         lexer.eatLine();
361 }
362
363
364 Parser::Parser(istream & is)
365         : lineno_(0), pos_(0)
366 {
367         tokenize(is);
368 }
369
370
371 void Parser::push_back(Token const & t)
372 {
373         tokens_.push_back(t);
374 }
375
376
377 void Parser::pop_back()
378 {
379         tokens_.pop_back();
380 }
381
382
383 Token const & Parser::prevToken() const
384 {
385         static const Token dummy;
386         return pos_ > 0 ? tokens_[pos_ - 1] : dummy;
387 }
388
389
390 Token const & Parser::nextToken() const
391 {
392         static const Token dummy;
393         return good() ? tokens_[pos_] : dummy;
394 }
395
396
397 Token const & Parser::getToken()
398 {
399         static const Token dummy;
400         //lyxerr << "looking at token " << tokens_[pos_] << " pos: " << pos_ << endl;
401         return good() ? tokens_[pos_++] : dummy;
402 }
403
404
405 void Parser::skipSpaces()
406 {
407         while (nextToken().cat() == catSpace || nextToken().cat() == catNewline)
408                 getToken();
409 }
410
411
412 void Parser::putback()
413 {
414         --pos_;
415 }
416
417
418 bool Parser::good() const
419 {
420         return pos_ < tokens_.size();
421 }
422
423
424 char Parser::getChar()
425 {
426         if (!good())
427                 error("The input stream is not well...");
428         return tokens_[pos_++].character();
429 }
430
431
432 string Parser::getArg(char left, char right)
433 {
434         skipSpaces();
435
436         string result;
437         char c = getChar();
438
439         if (c != left)
440                 putback();
441         else
442                 while ((c = getChar()) != right && good())
443                         result += c;
444
445         return result;
446 }
447
448
449 void Parser::skipSpaceTokens(istream & is, char c)
450 {
451         // skip trailing spaces
452         while (catcode(c) == catSpace || catcode(c) == catNewline)
453                 if (!is.get(c))
454                         break;
455         //lyxerr << "putting back: " << c << endl;
456         is.putback(c);
457 }
458
459
460 void Parser::tokenize(istream & is)
461 {
462         // eat everything up to the next \end_inset or end of stream
463         // and store it in s for further tokenization
464         string s;
465         char c;
466         while (is.get(c)) {
467                 s += c;
468                 if (s.size() >= 10 && s.substr(s.size() - 10) == "\\end_inset") {
469                         s = s.substr(0, s.size() - 10);
470                         break;
471                 }
472         }
473         // Remove the space after \end_inset
474         if (is.get(c) && c != ' ')
475                 is.unget();
476
477         // tokenize buffer
478         tokenize(s);
479 }
480
481
482 void Parser::tokenize(string const & buffer)
483 {
484         istringstream is(buffer, ios::in | ios::binary);
485
486         char c;
487         while (is.get(c)) {
488                 //lyxerr << "reading c: " << c << endl;
489
490                 switch (catcode(c)) {
491                         case catNewline: {
492                                 ++lineno_;
493                                 is.get(c);
494                                 if (catcode(c) == catNewline)
495                                         ; //push_back(Token("par"));
496                                 else {
497                                         push_back(Token('\n', catNewline));
498                                         is.putback(c);
499                                 }
500                                 break;
501                         }
502
503 /*
504                         case catComment: {
505                                 while (is.get(c) && catcode(c) != catNewline)
506                                         ;
507                                 ++lineno_;
508                                 break;
509                         }
510 */
511
512                         case catEscape: {
513                                 is.get(c);
514                                 if (!is) {
515                                         error("unexpected end of input");
516                                 } else {
517                                         string s(1, c);
518                                         if (catcode(c) == catLetter) {
519                                                 // collect letters
520                                                 while (is.get(c) && catcode(c) == catLetter)
521                                                         s += c;
522                                                 skipSpaceTokens(is, c);
523                                         }
524                                         push_back(Token(s));
525                                 }
526                                 break;
527                         }
528
529                         case catSuper:
530                         case catSub: {
531                                 push_back(Token(c, catcode(c)));
532                                 is.get(c);
533                                 skipSpaceTokens(is, c);
534                                 break;
535                         }
536
537                         case catIgnore: {
538                                 lyxerr << "ignoring a char: " << int(c) << endl;
539                                 break;
540                         }
541
542                         default:
543                                 push_back(Token(c, catcode(c)));
544                 }
545         }
546
547 #ifdef FILEDEBUG
548         dump();
549 #endif
550 }
551
552
553 void Parser::dump() const
554 {
555         lyxerr << "\nTokens: ";
556         for (unsigned i = 0; i < tokens_.size(); ++i) {
557                 if (i == pos_)
558                         lyxerr << " <#> ";
559                 lyxerr << tokens_[i];
560         }
561         lyxerr << " pos: " << pos_ << endl;
562 }
563
564
565 void Parser::error(string const & msg)
566 {
567         lyxerr << "Line ~" << lineno_ << ": Math parse error: " << msg << endl;
568         dump();
569         //exit(1);
570 }
571
572
573 bool Parser::parse(MathAtom & at)
574 {
575         skipSpaces();
576         MathArray ar;
577         parse(ar, false, InsetMath::UNDECIDED_MODE);
578         if (ar.size() != 1 || ar.front()->getType() == hullNone) {
579                 lyxerr << "unusual contents found: " << ar << endl;
580                 at = MathAtom(new InsetMathPar(ar));
581                 //if (at->nargs() > 0)
582                 //      at.nucleus()->cell(0) = ar;
583                 //else
584                 //      lyxerr << "unusual contents found: " << ar << endl;
585                 return true;
586         }
587         at = ar[0];
588         return true;
589 }
590
591
592 string Parser::parse_verbatim_option()
593 {
594         skipSpaces();
595         string res;
596         if (nextToken().character() == '[') {
597                 Token t = getToken();
598                 for (Token t = getToken(); t.character() != ']' && good(); t = getToken()) {
599                         if (t.cat() == catBegin) {
600                                 putback();
601                                 res += '{' + parse_verbatim_item() + '}';
602                         } else
603                                 res += t.asString();
604                 }
605         }
606         return res;
607 }
608
609
610 string Parser::parse_verbatim_item()
611 {
612         skipSpaces();
613         string res;
614         if (nextToken().cat() == catBegin) {
615                 Token t = getToken();
616                 for (Token t = getToken(); t.cat() != catEnd && good(); t = getToken()) {
617                         if (t.cat() == catBegin) {
618                                 putback();
619                                 res += '{' + parse_verbatim_item() + '}';
620                         }
621                         else
622                                 res += t.asString();
623                 }
624         }
625         return res;
626 }
627
628
629 MathArray Parser::parse(unsigned flags, mode_type mode)
630 {
631         MathArray ar;
632         parse(ar, flags, mode);
633         return ar;
634 }
635
636
637 void Parser::parse(MathArray & array, unsigned flags, mode_type mode)
638 {
639         InsetMathGrid grid(1, 1);
640         parse1(grid, flags, mode, false);
641         array = grid.cell(0);
642 }
643
644
645 void Parser::parse2(MathAtom & at, const unsigned flags, const mode_type mode,
646         const bool numbered)
647 {
648         parse1(*(at.nucleus()->asGridInset()), flags, mode, numbered);
649 }
650
651
652 void Parser::parse1(InsetMathGrid & grid, unsigned flags,
653         const mode_type mode, const bool numbered)
654 {
655         int limits = 0;
656         InsetMathGrid::row_type cellrow = 0;
657         InsetMathGrid::col_type cellcol = 0;
658         MathArray * cell = &grid.cell(grid.index(cellrow, cellcol));
659
660         if (grid.asHullInset())
661                 grid.asHullInset()->numbered(cellrow, numbered);
662
663         //dump();
664         //lyxerr << " flags: " << flags << endl;
665         //lyxerr << " mode: " << mode  << endl;
666         //lyxerr << "grid: " << grid << endl;
667
668         while (good()) {
669                 Token const & t = getToken();
670
671 #ifdef FILEDEBUG
672                 lyxerr << "t: " << t << " flags: " << flags << endl;
673                 lyxerr << "mode: " << mode  << endl;
674                 cell->dump();
675                 lyxerr << endl;
676 #endif
677
678                 if (flags & FLAG_ITEM) {
679
680                         if (t.cat() == catBegin) {
681                                 // skip the brace and collect everything to the next matching
682                                 // closing brace
683                                 parse1(grid, FLAG_BRACE_LAST, mode, numbered);
684                                 return;
685                         }
686
687                         // handle only this single token, leave the loop if done
688                         flags = FLAG_LEAVE;
689                 }
690
691
692                 if (flags & FLAG_BRACED) {
693                         if (t.cat() == catSpace)
694                                 continue;
695
696                         if (t.cat() != catBegin) {
697                                 error("opening brace expected");
698                                 return;
699                         }
700
701                         // skip the brace and collect everything to the next matching
702                         // closing brace
703                         flags = FLAG_BRACE_LAST;
704                 }
705
706
707                 if (flags & FLAG_OPTION) {
708                         if (t.cat() == catOther && t.character() == '[') {
709                                 MathArray ar;
710                                 parse(ar, FLAG_BRACK_LAST, mode);
711                                 cell->append(ar);
712                         } else {
713                                 // no option found, put back token and we are done
714                                 putback();
715                         }
716                         return;
717                 }
718
719                 //
720                 // cat codes
721                 //
722                 if (t.cat() == catMath) {
723                         if (mode != InsetMath::MATH_MODE) {
724                                 // we are inside some text mode thingy, so opening new math is allowed
725                                 Token const & n = getToken();
726                                 if (n.cat() == catMath) {
727                                         // TeX's $$...$$ syntax for displayed math
728                                         cell->push_back(MathAtom(new InsetMathHull(hullEquation)));
729                                         parse2(cell->back(), FLAG_SIMPLE, InsetMath::MATH_MODE, false);
730                                         getToken(); // skip the second '$' token
731                                 } else {
732                                         // simple $...$  stuff
733                                         putback();
734                                         cell->push_back(MathAtom(new InsetMathHull(hullSimple)));
735                                         parse2(cell->back(), FLAG_SIMPLE, InsetMath::MATH_MODE, false);
736                                 }
737                         }
738
739                         else if (flags & FLAG_SIMPLE) {
740                                 // this is the end of the formula
741                                 return;
742                         }
743
744                         else {
745                                 error("something strange in the parser");
746                                 break;
747                         }
748                 }
749
750                 else if (t.cat() == catLetter)
751                         cell->push_back(MathAtom(new InsetMathChar(t.character())));
752
753                 else if (t.cat() == catSpace && mode != InsetMath::MATH_MODE) {
754                         if (cell->empty() || cell->back()->getChar() != ' ')
755                                 cell->push_back(MathAtom(new InsetMathChar(t.character())));
756                 }
757
758                 else if (t.cat() == catNewline && mode != InsetMath::MATH_MODE) {
759                         if (cell->empty() || cell->back()->getChar() != ' ')
760                                 cell->push_back(MathAtom(new InsetMathChar(' ')));
761                 }
762
763                 else if (t.cat() == catParameter) {
764                         Token const & n = getToken();
765                         cell->push_back(MathAtom(new MathMacroArgument(n.character()-'0')));
766                 }
767
768                 else if (t.cat() == catActive)
769                         cell->push_back(MathAtom(new InsetMathChar(t.character())));
770
771                 else if (t.cat() == catBegin) {
772                         MathArray ar;
773                         parse(ar, FLAG_BRACE_LAST, mode);
774                         // do not create a BraceInset if they were written by LyX
775                         // this helps to keep the annoyance of  "a choose b"  to a minimum
776                         if (ar.size() == 1 && ar[0]->extraBraces())
777                                 cell->append(ar);
778                         else
779                                 cell->push_back(MathAtom(new InsetMathBrace(ar)));
780                 }
781
782                 else if (t.cat() == catEnd) {
783                         if (flags & FLAG_BRACE_LAST)
784                                 return;
785                         error("found '}' unexpectedly");
786                         //BOOST_ASSERT(false);
787                         //add(cell, '}', LM_TC_TEX);
788                 }
789
790                 else if (t.cat() == catAlign) {
791                         //lyxerr << " column now " << (cellcol + 1)
792                         //       << " max: " << grid.ncols() << endl;
793                         if (flags & FLAG_ALIGN)
794                                 return;
795                         if (addCol(grid, cellcol))
796                                 cell = &grid.cell(grid.index(cellrow, cellcol));
797                 }
798
799                 else if (t.cat() == catSuper || t.cat() == catSub) {
800                         bool up = (t.cat() == catSuper);
801                         // we need no new script inset if the last thing was a scriptinset,
802                         // which has that script already not the same script already
803                         if (!cell->size())
804                                 cell->push_back(MathAtom(new InsetMathScript(up)));
805                         else if (cell->back()->asScriptInset() &&
806                                         !cell->back()->asScriptInset()->has(up))
807                                 cell->back().nucleus()->asScriptInset()->ensure(up);
808                         else if (cell->back()->asScriptInset())
809                                 cell->push_back(MathAtom(new InsetMathScript(up)));
810                         else
811                                 cell->back() = MathAtom(new InsetMathScript(cell->back(), up));
812                         InsetMathScript * p = cell->back().nucleus()->asScriptInset();
813                         // special handling of {}-bases
814                         // is this always correct?
815                         if (p->nuc().size() == 1 
816                             && p->nuc().back()->asBraceInset())
817                                 p->nuc() = p->nuc().back()->asNestInset()->cell(0);
818                         parse(p->cell(p->idxOfScript(up)), FLAG_ITEM, mode);
819                         if (limits) {
820                                 p->limits(limits);
821                                 limits = 0;
822                         }
823                 }
824
825                 else if (t.character() == ']' && (flags & FLAG_BRACK_LAST)) {
826                         //lyxerr << "finished reading option" << endl;
827                         return;
828                 }
829
830                 else if (t.cat() == catOther)
831                         cell->push_back(MathAtom(new InsetMathChar(t.character())));
832
833                 else if (t.cat() == catComment) {
834                         string s;
835                         while (good()) {
836                                 Token const & t = getToken();
837                                 if (t.cat() == catNewline)
838                                         break;
839                                 s += t.asString();
840                         }
841                         cell->push_back(MathAtom(new InsetMathComment(s)));
842                         skipSpaces();
843                 }
844
845                 //
846                 // control sequences
847                 //
848
849                 else if (t.cs() == "lyxlock") {
850                         if (cell->size())
851                                 cell->back().nucleus()->lock(true);
852                 }
853
854                 else if (t.cs() == "def" ||
855                         t.cs() == "newcommand" ||
856                         t.cs() == "renewcommand")
857                 {
858                         string const type = t.cs();
859                         string name;
860                         int nargs = 0;
861                         if (t.cs() == "def") {
862                                 // get name
863                                 name = getToken().cs();
864
865                                 // read parameter
866                                 string pars;
867                                 while (good() && nextToken().cat() != catBegin) {
868                                         pars += getToken().cs();
869                                         ++nargs;
870                                 }
871                                 nargs /= 2;
872                                 //lyxerr << "read \\def parameter list '" << pars << "'" << endl;
873
874                         } else { // t.cs() == "newcommand" || t.cs() == "renewcommand"
875
876                                 if (getToken().cat() != catBegin) {
877                                         error("'{' in \\newcommand expected (1) ");
878                                         return;
879                                 }
880
881                                 name = getToken().cs();
882
883                                 if (getToken().cat() != catEnd) {
884                                         error("'}' in \\newcommand expected");
885                                         return;
886                                 }
887
888                                 string const arg  = getArg('[', ']');
889                                 if (!arg.empty())
890                                         nargs = convert<int>(arg);
891
892                         }
893
894                         MathArray ar1;
895                         parse(ar1, FLAG_ITEM, InsetMath::UNDECIDED_MODE);
896
897                         // we cannot handle recursive stuff at all
898                         //MathArray test;
899                         //test.push_back(createInsetMath(name));
900                         //if (ar1.contains(test)) {
901                         //      error("we cannot handle recursive macros at all.");
902                         //      return;
903                         //}
904
905                         // is a version for display attached?
906                         skipSpaces();
907                         MathArray ar2;
908                         if (nextToken().cat() == catBegin)
909                                 parse(ar2, FLAG_ITEM, InsetMath::MATH_MODE);
910
911                         cell->push_back(MathAtom(new MathMacroTemplate(name, nargs, type,
912                                 ar1, ar2)));
913                 }
914
915                 else if (t.cs() == "(") {
916                         cell->push_back(MathAtom(new InsetMathHull(hullSimple)));
917                         parse2(cell->back(), FLAG_SIMPLE2, InsetMath::MATH_MODE, false);
918                 }
919
920                 else if (t.cs() == "[") {
921                         cell->push_back(MathAtom(new InsetMathHull(hullEquation)));
922                         parse2(cell->back(), FLAG_EQUATION, InsetMath::MATH_MODE, false);
923                 }
924
925                 else if (t.cs() == "protect")
926                         // ignore \\protect, will hopefully be re-added during output
927                         ;
928
929                 else if (t.cs() == "end") {
930                         if (flags & FLAG_END) {
931                                 // eat environment name
932                                 string const name = getArg('{', '}');
933                                 if (environments_.empty())
934                                         error("'found \\end{" + name +
935                                               "}' without matching '\\begin{" +
936                                               name + "}'");
937                                 else if (name != environments_.back())
938                                         error("'\\end{" + name +
939                                               "}' does not match '\\begin{" +
940                                               environments_.back() + "}'");
941                                 else {
942                                         environments_.pop_back();
943                                         // Delete empty last row in matrix
944                                         // like insets.
945                                         // If you abuse InsetMathGrid for
946                                         // non-matrix like structures you
947                                         // probably need to refine this test.
948                                         // Right now we only have to test for
949                                         // single line hull insets.
950                                         if (grid.nrows() > 1)
951                                                 delEmptyLastRow(grid);
952                                         return;
953                                 }
954                         } else
955                                 error("found 'end' unexpectedly");
956                 }
957
958                 else if (t.cs() == ")") {
959                         if (flags & FLAG_SIMPLE2)
960                                 return;
961                         error("found '\\)' unexpectedly");
962                 }
963
964                 else if (t.cs() == "]") {
965                         if (flags & FLAG_EQUATION)
966                                 return;
967                         error("found '\\]' unexpectedly");
968                 }
969
970                 else if (t.cs() == "\\") {
971                         if (flags & FLAG_ALIGN)
972                                 return;
973                         if (addRow(grid, cellrow, getArg('[', ']'))) {
974                                 cellcol = 0;
975                                 if (grid.asHullInset())
976                                         grid.asHullInset()->numbered(
977                                                         cellrow, numbered);
978                                 cell = &grid.cell(grid.index(cellrow,
979                                                              cellcol));
980                         }
981                 }
982
983 #if 0
984                 else if (t.cs() == "multicolumn") {
985                         // extract column count and insert dummy cells
986                         MathArray count;
987                         parse(count, FLAG_ITEM, mode);
988                         int cols = 1;
989                         if (!extractNumber(count, cols)) {
990                                 lyxerr << " can't extract number of cells from " << count << endl;
991                         }
992                         // resize the table if necessary
993                         for (int i = 0; i < cols; ++i) {
994                                 if (addCol(grid, cellcol)) {
995                                         cell = &grid.cell(grid.index(
996                                                         cellrow, cellcol));
997                                         // mark this as dummy
998                                         grid.cellinfo(grid.index(
999                                                 cellrow, cellcol)).dummy_ = true;
1000                                 }
1001                         }
1002                         // the last cell is the real thing, not a dummy
1003                         grid.cellinfo(grid.index(cellrow, cellcol)).dummy_ = false;
1004
1005                         // read special alignment
1006                         MathArray align;
1007                         parse(align, FLAG_ITEM, mode);
1008                         //grid.cellinfo(grid.index(cellrow, cellcol)).align_ = extractString(align);
1009
1010                         // parse the remaining contents into the "real" cell
1011                         parse(*cell, FLAG_ITEM, mode);
1012                 }
1013 #endif
1014
1015                 else if (t.cs() == "limits")
1016                         limits = 1;
1017
1018                 else if (t.cs() == "nolimits")
1019                         limits = -1;
1020
1021                 else if (t.cs() == "nonumber") {
1022                         if (grid.asHullInset())
1023                                 grid.asHullInset()->numbered(cellrow, false);
1024                 }
1025
1026                 else if (t.cs() == "number") {
1027                         if (grid.asHullInset())
1028                                 grid.asHullInset()->numbered(cellrow, true);
1029                 }
1030
1031                 else if (t.cs() == "hline") {
1032                         grid.rowinfo(cellrow).lines_ ++;
1033                 }
1034
1035                 else if (t.cs() == "sqrt") {
1036                         MathArray ar;
1037                         parse(ar, FLAG_OPTION, mode);
1038                         if (ar.size()) {
1039                                 cell->push_back(MathAtom(new InsetMathRoot));
1040                                 cell->back().nucleus()->cell(0) = ar;
1041                                 parse(cell->back().nucleus()->cell(1), FLAG_ITEM, mode);
1042                         } else {
1043                                 cell->push_back(MathAtom(new InsetMathSqrt));
1044                                 parse(cell->back().nucleus()->cell(0), FLAG_ITEM, mode);
1045                         }
1046                 }
1047
1048                 else if (t.cs() == "xrightarrow" || t.cs() == "xleftarrow") {
1049                         cell->push_back(createInsetMath(t.cs()));
1050                         parse(cell->back().nucleus()->cell(1), FLAG_OPTION, mode);
1051                         parse(cell->back().nucleus()->cell(0), FLAG_ITEM, mode);
1052                 }
1053
1054                 else if (t.cs() == "ref" || t.cs() == "prettyref" ||
1055                                 t.cs() == "pageref" || t.cs() == "vpageref" || t.cs() == "vref") {
1056                         cell->push_back(MathAtom(new RefInset(t.cs())));
1057                         parse(cell->back().nucleus()->cell(1), FLAG_OPTION, mode);
1058                         parse(cell->back().nucleus()->cell(0), FLAG_ITEM, mode);
1059                 }
1060
1061                 else if (t.cs() == "left") {
1062                         skipSpaces();
1063                         Token const & tl = getToken();
1064                         // \| and \Vert are equivalent, and InsetMathDelim
1065                         // can't handle \|
1066                         // FIXME: fix this in InsetMathDelim itself!
1067                         string const l = tl.cs() == "|" ? "Vert" : tl.asString();
1068                         MathArray ar;
1069                         parse(ar, FLAG_RIGHT, mode);
1070                         skipSpaces();
1071                         Token const & tr = getToken();
1072                         string const r = tr.cs() == "|" ? "Vert" : tr.asString();
1073                         cell->push_back(MathAtom(new InsetMathDelim(l, r, ar)));
1074                 }
1075
1076                 else if (t.cs() == "right") {
1077                         if (flags & FLAG_RIGHT)
1078                                 return;
1079                         //lyxerr << "got so far: '" << cell << "'" << endl;
1080                         error("Unmatched right delimiter");
1081                         return;
1082                 }
1083
1084                 else if (t.cs() == "begin") {
1085                         string const name = getArg('{', '}');
1086                         environments_.push_back(name);
1087
1088                         if (name == "array" || name == "subarray") {
1089                                 string const valign = parse_verbatim_option() + 'c';
1090                                 string const halign = parse_verbatim_item();
1091                                 cell->push_back(MathAtom(new InsetMathArray(name, valign[0], halign)));
1092                                 parse2(cell->back(), FLAG_END, mode, false);
1093                         }
1094
1095                         else if (name == "tabular") {
1096                                 string const valign = parse_verbatim_option() + 'c';
1097                                 string const halign = parse_verbatim_item();
1098                                 cell->push_back(MathAtom(new InsetMathTabular(name, valign[0], halign)));
1099                                 parse2(cell->back(), FLAG_END, InsetMath::TEXT_MODE, false);
1100                         }
1101
1102                         else if (name == "split" || name == "cases" ||
1103                                  name == "gathered" || name == "aligned") {
1104                                 cell->push_back(createInsetMath(name));
1105                                 parse2(cell->back(), FLAG_END, mode, false);
1106                         }
1107
1108                         else if (name == "alignedat") {
1109                                 // ignore this for a while
1110                                 getArg('{', '}');
1111                                 cell->push_back(createInsetMath(name));
1112                                 parse2(cell->back(), FLAG_END, mode, false);
1113                         }
1114
1115                         else if (name == "math") {
1116                                 cell->push_back(MathAtom(new InsetMathHull(hullSimple)));
1117                                 parse2(cell->back(), FLAG_END, InsetMath::MATH_MODE, true);
1118                         }
1119
1120                         else if (name == "equation" || name == "equation*"
1121                                         || name == "displaymath") {
1122                                 cell->push_back(MathAtom(new InsetMathHull(hullEquation)));
1123                                 parse2(cell->back(), FLAG_END, InsetMath::MATH_MODE, (name == "equation"));
1124                         }
1125
1126                         else if (name == "eqnarray" || name == "eqnarray*") {
1127                                 cell->push_back(MathAtom(new InsetMathHull(hullEqnArray)));
1128                                 parse2(cell->back(), FLAG_END, InsetMath::MATH_MODE, !stared(name));
1129                         }
1130
1131                         else if (name == "align" || name == "align*") {
1132                                 cell->push_back(MathAtom(new InsetMathHull(hullAlign)));
1133                                 parse2(cell->back(), FLAG_END, InsetMath::MATH_MODE, !stared(name));
1134                         }
1135
1136                         else if (name == "flalign" || name == "flalign*") {
1137                                 cell->push_back(MathAtom(new InsetMathHull(hullFlAlign)));
1138                                 parse2(cell->back(), FLAG_END, InsetMath::MATH_MODE, !stared(name));
1139                         }
1140
1141                         else if (name == "alignat" || name == "alignat*") {
1142                                 // ignore this for a while
1143                                 getArg('{', '}');
1144                                 cell->push_back(MathAtom(new InsetMathHull(hullAlignAt)));
1145                                 parse2(cell->back(), FLAG_END, InsetMath::MATH_MODE, !stared(name));
1146                         }
1147
1148                         else if (name == "xalignat" || name == "xalignat*") {
1149                                 // ignore this for a while
1150                                 getArg('{', '}');
1151                                 cell->push_back(MathAtom(new InsetMathHull(hullXAlignAt)));
1152                                 parse2(cell->back(), FLAG_END, InsetMath::MATH_MODE, !stared(name));
1153                         }
1154
1155                         else if (name == "xxalignat") {
1156                                 // ignore this for a while
1157                                 getArg('{', '}');
1158                                 cell->push_back(MathAtom(new InsetMathHull(hullXXAlignAt)));
1159                                 parse2(cell->back(), FLAG_END, InsetMath::MATH_MODE, !stared(name));
1160                         }
1161
1162                         else if (name == "multline" || name == "multline*") {
1163                                 cell->push_back(MathAtom(new InsetMathHull(hullMultline)));
1164                                 parse2(cell->back(), FLAG_END, InsetMath::MATH_MODE, !stared(name));
1165                         }
1166
1167                         else if (name == "gather" || name == "gather*") {
1168                                 cell->push_back(MathAtom(new InsetMathHull(hullGather)));
1169                                 parse2(cell->back(), FLAG_END, InsetMath::MATH_MODE, !stared(name));
1170                         }
1171
1172                         else if (latexkeys const * l = in_word_set(name)) {
1173                                 if (l->inset == "matrix") {
1174                                         cell->push_back(createInsetMath(name));
1175                                         parse2(cell->back(), FLAG_END, mode, false);
1176                                 }
1177                         }
1178
1179                         else {
1180                                 dump();
1181                                 lyxerr << "found unknown math environment '" << name << "'" << endl;
1182                                 // create generic environment inset
1183                                 cell->push_back(MathAtom(new InsetMathEnv(name)));
1184                                 parse(cell->back().nucleus()->cell(0), FLAG_ITEM, mode);
1185                         }
1186                 }
1187
1188                 else if (t.cs() == "kern") {
1189 #ifdef WITH_WARNINGS
1190 #warning A hack...
1191 #endif
1192                         string s;
1193                         while (true) {
1194                                 Token const & t = getToken();
1195                                 if (!good()) {
1196                                         putback();
1197                                         break;
1198                                 }
1199                                 s += t.character();
1200                                 if (isValidLength(s))
1201                                         break;
1202                         }
1203                         cell->push_back(MathAtom(new InsetMathKern(s)));
1204                 }
1205
1206                 else if (t.cs() == "label") {
1207                         // FIXME: This is swallowed in inline formulas
1208                         string label = parse_verbatim_item();
1209                         MathArray ar;
1210                         asArray(label, ar);
1211                         if (grid.asHullInset()) {
1212                                 grid.asHullInset()->label(cellrow, label);
1213                         } else {
1214                                 cell->push_back(createInsetMath(t.cs()));
1215                                 cell->push_back(MathAtom(new InsetMathBrace(ar)));
1216                         }
1217                 }
1218
1219                 else if (t.cs() == "choose" || t.cs() == "over" || t.cs() == "atop") {
1220                         MathAtom at = createInsetMath(t.cs());
1221                         at.nucleus()->cell(0) = *cell;
1222                         cell->clear();
1223                         parse(at.nucleus()->cell(1), flags, mode);
1224                         cell->push_back(at);
1225                         return;
1226                 }
1227
1228                 else if (t.cs() == "color") {
1229                         string const color = parse_verbatim_item();
1230                         cell->push_back(MathAtom(new InsetMathColor(true, color)));
1231                         parse(cell->back().nucleus()->cell(0), flags, mode);
1232                         return;
1233                 }
1234
1235                 else if (t.cs() == "textcolor") {
1236                         string const color = parse_verbatim_item();
1237                         cell->push_back(MathAtom(new InsetMathColor(false, color)));
1238                         parse(cell->back().nucleus()->cell(0), FLAG_ITEM, InsetMath::TEXT_MODE);
1239                 }
1240
1241                 else if (t.cs() == "normalcolor") {
1242                         cell->push_back(createInsetMath(t.cs()));
1243                         parse(cell->back().nucleus()->cell(0), flags, mode);
1244                         return;
1245                 }
1246
1247                 else if (t.cs() == "substack") {
1248                         cell->push_back(createInsetMath(t.cs()));
1249                         parse2(cell->back(), FLAG_ITEM, mode, false);
1250                 }
1251
1252                 else if (t.cs() == "xymatrix") {
1253                         cell->push_back(createInsetMath(t.cs()));
1254                         parse2(cell->back(), FLAG_ITEM, mode, false);
1255                 }
1256
1257                 else if (t.cs() == "framebox" || t.cs() == "makebox") {
1258                         cell->push_back(createInsetMath(t.cs()));
1259                         parse(cell->back().nucleus()->cell(0), FLAG_OPTION, InsetMath::TEXT_MODE);
1260                         parse(cell->back().nucleus()->cell(1), FLAG_OPTION, InsetMath::TEXT_MODE);
1261                         parse(cell->back().nucleus()->cell(2), FLAG_ITEM, InsetMath::TEXT_MODE);
1262                 }
1263
1264                 else if (t.cs() == "tag") {
1265                         if (nextToken().character() == '*') {
1266                                 getToken();
1267                                 cell->push_back(createInsetMath(t.cs() + '*'));
1268                         } else
1269                                 cell->push_back(createInsetMath(t.cs()));
1270                         parse(cell->back().nucleus()->cell(0), FLAG_ITEM, InsetMath::TEXT_MODE);
1271                 }
1272
1273 #if 0
1274                 else if (t.cs() == "infer") {
1275                         MathArray ar;
1276                         parse(ar, FLAG_OPTION, mode);
1277                         cell->push_back(createInsetMath(t.cs()));
1278                         parse2(cell->back(), FLAG_ITEM, mode, false);
1279                 }
1280
1281                 // Disabled
1282                 else if (1 && t.cs() == "ar") {
1283                         auto_ptr<InsetMathXYArrow> p(new InsetMathXYArrow);
1284                         // try to read target
1285                         parse(p->cell(0), FLAG_OTPTION, mode);
1286                         // try to read label
1287                         if (nextToken().cat() == catSuper || nextToken().cat() == catSub) {
1288                                 p->up_ = nextToken().cat() == catSuper;
1289                                 getToken();
1290                                 parse(p->cell(1), FLAG_ITEM, mode);
1291                                 //lyxerr << "read label: " << p->cell(1) << endl;
1292                         }
1293
1294                         cell->push_back(MathAtom(p.release()));
1295                         //lyxerr << "read cell: " << cell << endl;
1296                 }
1297 #endif
1298
1299                 else if (t.cs().size()) {
1300                         latexkeys const * l = in_word_set(t.cs());
1301                         if (l) {
1302                                 if (l->inset == "big") {
1303                                         skipSpaces();
1304                                         string const delim = getToken().asInput();
1305                                         if (InsetMathBig::isBigInsetDelim(delim))
1306                                                 cell->push_back(MathAtom(
1307                                                         new InsetMathBig(t.cs(), delim)));
1308                                         else {
1309                                                 cell->push_back(createInsetMath(t.cs()));
1310                                                 cell->push_back(createInsetMath(
1311                                                                 delim.substr(1)));
1312                                         }
1313                                 }
1314
1315                                 else if (l->inset == "font") {
1316                                         cell->push_back(createInsetMath(t.cs()));
1317                                         parse(cell->back().nucleus()->cell(0),
1318                                                 FLAG_ITEM, asMode(mode, l->extra));
1319                                 }
1320
1321                                 else if (l->inset == "oldfont") {
1322                                         cell->push_back(createInsetMath(t.cs()));
1323                                         parse(cell->back().nucleus()->cell(0),
1324                                                 flags | FLAG_ALIGN, asMode(mode, l->extra));
1325                                         if (prevToken().cat() != catAlign &&
1326                                             prevToken().cs() != "\\")
1327                                                 return;
1328                                         putback();
1329                                 }
1330
1331                                 else if (l->inset == "style") {
1332                                         cell->push_back(createInsetMath(t.cs()));
1333                                         parse(cell->back().nucleus()->cell(0),
1334                                                 flags | FLAG_ALIGN, mode);
1335                                         if (prevToken().cat() != catAlign &&
1336                                             prevToken().cs() != "\\")
1337                                                 return;
1338                                         putback();
1339                                 }
1340
1341                                 else {
1342                                         MathAtom at = createInsetMath(t.cs());
1343                                         for (InsetMath::idx_type i = 0; i < at->nargs(); ++i)
1344                                                 parse(at.nucleus()->cell(i),
1345                                                         FLAG_ITEM, asMode(mode, l->extra));
1346                                         cell->push_back(at);
1347                                 }
1348                         }
1349
1350                         else {
1351                                 MathAtom at = createInsetMath(t.cs());
1352                                 InsetMath::mode_type m = mode;
1353                                 //if (m == InsetMath::UNDECIDED_MODE)
1354                                 //lyxerr << "default creation: m1: " << m << endl;
1355                                 if (at->currentMode() != InsetMath::UNDECIDED_MODE)
1356                                         m = at->currentMode();
1357                                 //lyxerr << "default creation: m2: " << m << endl;
1358                                 InsetMath::idx_type start = 0;
1359                                 // this fails on \bigg[...\bigg]
1360                                 //MathArray opt;
1361                                 //parse(opt, FLAG_OPTION, InsetMath::VERBATIM_MODE);
1362                                 //if (opt.size()) {
1363                                 //      start = 1;
1364                                 //      at.nucleus()->cell(0) = opt;
1365                                 //}
1366                                 for (InsetMath::idx_type i = start; i < at->nargs(); ++i) {
1367                                         parse(at.nucleus()->cell(i), FLAG_ITEM, m);
1368                                         skipSpaces();
1369                                 }
1370                                 cell->push_back(at);
1371                         }
1372                 }
1373
1374
1375                 if (flags & FLAG_LEAVE) {
1376                         flags &= ~FLAG_LEAVE;
1377                         break;
1378                 }
1379         }
1380 }
1381
1382
1383
1384 } // anonymous namespace
1385
1386
1387 void mathed_parse_cell(MathArray & ar, string const & str)
1388 {
1389         istringstream is(str);
1390         mathed_parse_cell(ar, is);
1391 }
1392
1393
1394 void mathed_parse_cell(MathArray & ar, istream & is)
1395 {
1396         Parser(is).parse(ar, 0, InsetMath::MATH_MODE);
1397 }
1398
1399
1400 bool mathed_parse_normal(MathAtom & t, string const & str)
1401 {
1402         istringstream is(str);
1403         return Parser(is).parse(t);
1404 }
1405
1406
1407 bool mathed_parse_normal(MathAtom & t, istream & is)
1408 {
1409         return Parser(is).parse(t);
1410 }
1411
1412
1413 bool mathed_parse_normal(MathAtom & t, LyXLex & lex)
1414 {
1415         return Parser(lex).parse(t);
1416 }
1417
1418
1419 void mathed_parse_normal(InsetMathGrid & grid, string const & str)
1420 {
1421         istringstream is(str);
1422         Parser(is).parse1(grid, 0, InsetMath::MATH_MODE, false);
1423 }
1424
1425
1426 void initParser()
1427 {
1428         fill(theCatcode, theCatcode + 256, catOther);
1429         fill(theCatcode + 'a', theCatcode + 'z' + 1, catLetter);
1430         fill(theCatcode + 'A', theCatcode + 'Z' + 1, catLetter);
1431
1432         theCatcode[int('\\')] = catEscape;
1433         theCatcode[int('{')]  = catBegin;
1434         theCatcode[int('}')]  = catEnd;
1435         theCatcode[int('$')]  = catMath;
1436         theCatcode[int('&')]  = catAlign;
1437         theCatcode[int('\n')] = catNewline;
1438         theCatcode[int('#')]  = catParameter;
1439         theCatcode[int('^')]  = catSuper;
1440         theCatcode[int('_')]  = catSub;
1441         theCatcode[int(0x7f)] = catIgnore;
1442         theCatcode[int(' ')]  = catSpace;
1443         theCatcode[int('\t')] = catSpace;
1444         theCatcode[int('\r')] = catNewline;
1445         theCatcode[int('~')]  = catActive;
1446         theCatcode[int('%')]  = catComment;
1447 }