]> git.lyx.org Git - lyx.git/blob - src/mathed/math_parser.C
* math_nestinset.C (lfunMousePress): on button 2 press, paste
[lyx.git] / src / mathed / math_parser.C
1 /**
2  * \file math_parser.C
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author André Pönitz
7  *
8  * Full author contact details are available in file CREDITS.
9  */
10
11 /*
12
13 If someone desperately needs partial "structures" (such as a few
14 cells of an array inset or similar) (s)he could uses the
15 following hack as starting point to write some macros:
16
17   \newif\ifcomment
18   \commentfalse
19   \ifcomment
20           \def\makeamptab{\catcode`\&=4\relax}
21           \def\makeampletter{\catcode`\&=11\relax}
22     \def\b{\makeampletter\expandafter\makeamptab\bi}
23     \long\def\bi#1\e{}
24   \else
25     \def\b{}\def\e{}
26   \fi
27
28   ...
29
30   \[\begin{array}{ccc}
31 1
32 &
33
34   \end{array}\]
35
36 */
37
38
39 #include <config.h>
40
41 #include "math_parser.h"
42 #include "math_arrayinset.h"
43 #include "math_biginset.h"
44 #include "math_braceinset.h"
45 #include "math_charinset.h"
46 #include "math_colorinset.h"
47 #include "math_commentinset.h"
48 #include "math_deliminset.h"
49 #include "math_envinset.h"
50 #include "math_factory.h"
51 #include "math_kerninset.h"
52 #include "math_macro.h"
53 #include "math_macroarg.h"
54 #include "math_macrotemplate.h"
55 #include "math_parinset.h"
56 #include "math_rootinset.h"
57 #include "math_scriptinset.h"
58 #include "math_sqrtinset.h"
59 #include "math_support.h"
60 #include "math_tabularinset.h"
61
62 //#include "insets/insetref.h"
63 #include "ref_inset.h"
64
65 #include "lyxlex.h"
66 #include "debug.h"
67
68 #include "support/convert.h"
69
70 #include <sstream>
71
72 using std::endl;
73 using std::fill;
74
75 using std::string;
76 using std::ios;
77 using std::istream;
78 using std::istringstream;
79 using std::ostream;
80 using std::vector;
81
82
83 //#define FILEDEBUG
84
85
86 namespace {
87
88 MathInset::mode_type asMode(MathInset::mode_type oldmode, string const & str)
89 {
90         //lyxerr << "handling mode: '" << str << "'" << endl;
91         if (str == "mathmode")
92                 return MathInset::MATH_MODE;
93         if (str == "textmode" || str == "forcetext")
94                 return MathInset::TEXT_MODE;
95         return oldmode;
96 }
97
98
99 bool stared(string const & s)
100 {
101         string::size_type const n = s.size();
102         return n && s[n - 1] == '*';
103 }
104
105
106 /*!
107  * Add the row \p cellrow to \p grid.
108  * \returns wether the row could be added. Adding a row can fail for
109  * environments like "equation" that have a fixed number of rows.
110  */
111 bool addRow(MathGridInset & grid, MathGridInset::row_type & cellrow,
112             string const & vskip)
113 {
114         ++cellrow;
115         if (cellrow == grid.nrows()) {
116                 //lyxerr << "adding row " << cellrow << endl;
117                 grid.addRow(cellrow - 1);
118                 if (cellrow == grid.nrows()) {
119                         // We can't add a row to this grid, so let's
120                         // append the content of this cell to the previous
121                         // one.
122                         // This does not happen in well formed .lyx files,
123                         // but LyX versions 1.3.x and older could create
124                         // such files and tex2lyx can still do that.
125                         --cellrow;
126                         lyxerr << "ignoring extra row";
127                         if (!vskip.empty())
128                                 lyxerr << " with extra space " << vskip;
129                         lyxerr << '.' << endl;
130                         return false;
131                 }
132         }
133         grid.vcrskip(LyXLength(vskip), cellrow - 1);
134         return true;
135 }
136
137
138 /*!
139  * Add the column \p cellcol to \p grid.
140  * \returns wether the column could be added. Adding a column can fail for
141  * environments like "eqnarray" that have a fixed number of columns.
142  */
143 bool addCol(MathGridInset & grid, MathGridInset::col_type & cellcol)
144 {
145         ++cellcol;
146         if (cellcol == grid.ncols()) {
147                 //lyxerr << "adding column " << cellcol << endl;
148                 grid.addCol(cellcol - 1);
149                 if (cellcol == grid.ncols()) {
150                         // We can't add a column to this grid, so let's
151                         // append the content of this cell to the previous
152                         // one.
153                         // This does not happen in well formed .lyx files,
154                         // but LyX versions 1.3.x and older could create
155                         // such files and tex2lyx can still do that.
156                         --cellcol;
157                         lyxerr << "ignoring extra column." << endl;
158                         return false;
159                 }
160         }
161         return true;
162 }
163
164
165 /*!
166  * Check wether the last row is empty and remove it if yes.
167  * Otherwise the following code
168  * \verbatim
169 \begin{array}{|c|c|}
170 \hline
171 1 & 2 \\ \hline
172 3 & 4 \\ \hline
173 \end{array}
174  * \endverbatim
175  * will result in a grid with 3 rows (+ the dummy row that is always present),
176  * because the last '\\' opens a new row.
177  */
178 void delEmptyLastRow(MathGridInset & grid)
179 {
180         MathGridInset::row_type const row = grid.nrows() - 1;
181         for (MathGridInset::col_type col = 0; col < grid.ncols(); ++col) {
182                 if (!grid.cell(grid.index(row, col)).empty())
183                         return;
184         }
185         // Copy the row information of the empty row (which would contain the
186         // last hline in the example above) to the dummy row and delete the
187         // empty row.
188         grid.rowinfo(row + 1) = grid.rowinfo(row);
189         grid.delRow(row);
190 }
191
192
193 // These are TeX's catcodes
194 enum CatCode {
195         catEscape,     // 0    backslash
196         catBegin,      // 1    {
197         catEnd,        // 2    }
198         catMath,       // 3    $
199         catAlign,      // 4    &
200         catNewline,    // 5    ^^M
201         catParameter,  // 6    #
202         catSuper,      // 7    ^
203         catSub,        // 8    _
204         catIgnore,     // 9
205         catSpace,      // 10   space
206         catLetter,     // 11   a-zA-Z
207         catOther,      // 12   none of the above
208         catActive,     // 13   ~
209         catComment,    // 14   %
210         catInvalid     // 15   <delete>
211 };
212
213 CatCode theCatcode[256];
214
215
216 inline CatCode catcode(unsigned char c)
217 {
218         return theCatcode[c];
219 }
220
221
222 enum {
223         FLAG_ALIGN      = 1 << 0,  //  next & or \\ ends the parsing process
224         FLAG_BRACE_LAST = 1 << 1,  //  next closing brace ends the parsing
225         FLAG_RIGHT      = 1 << 2,  //  next \\right ends the parsing process
226         FLAG_END        = 1 << 3,  //  next \\end ends the parsing process
227         FLAG_BRACK_LAST = 1 << 4,  //  next closing bracket ends the parsing
228         FLAG_TEXTMODE   = 1 << 5,  //  we are in a box
229         FLAG_ITEM       = 1 << 6,  //  read a (possibly braced token)
230         FLAG_LEAVE      = 1 << 7,  //  leave the loop at the end
231         FLAG_SIMPLE     = 1 << 8,  //  next $ leaves the loop
232         FLAG_EQUATION   = 1 << 9,  //  next \] leaves the loop
233         FLAG_SIMPLE2    = 1 << 10, //  next \) leaves the loop
234         FLAG_OPTION     = 1 << 11, //  read [...] style option
235         FLAG_BRACED     = 1 << 12  //  read {...} style argument
236 };
237
238
239 //
240 // Helper class for parsing
241 //
242
243 class Token {
244 public:
245         ///
246         Token() : cs_(), char_(0), cat_(catIgnore) {}
247         ///
248         Token(char c, CatCode cat) : cs_(), char_(c), cat_(cat) {}
249         ///
250         Token(string const & cs) : cs_(cs), char_(0), cat_(catIgnore) {}
251
252         ///
253         string const & cs() const { return cs_; }
254         ///
255         CatCode cat() const { return cat_; }
256         ///
257         char character() const { return char_; }
258         ///
259         string asString() const { return cs_.size() ? cs_ : string(1, char_); }
260         ///
261         string asInput() const { return cs_.size() ? '\\' + cs_ : string(1, char_); }
262
263 private:
264         ///
265         string cs_;
266         ///
267         char char_;
268         ///
269         CatCode cat_;
270 };
271
272 ostream & operator<<(ostream & os, Token const & t)
273 {
274         if (t.cs().size())
275                 os << '\\' << t.cs();
276         else if (t.cat() == catLetter)
277                 os << t.character();
278         else
279                 os << '[' << t.character() << ',' << t.cat() << ']';
280         return os;
281 }
282
283
284 class Parser {
285 public:
286         ///
287         typedef  MathInset::mode_type mode_type;
288
289         ///
290         Parser(LyXLex & lex);
291         ///
292         Parser(istream & is);
293
294         ///
295         bool parse(MathAtom & at);
296         ///
297         void parse(MathArray & array, unsigned flags, mode_type mode);
298         ///
299         void parse1(MathGridInset & grid, unsigned flags, mode_type mode,
300                 bool numbered);
301         ///
302         MathArray parse(unsigned flags, mode_type mode);
303         ///
304         int lineno() const { return lineno_; }
305         ///
306         void putback();
307
308 private:
309         ///
310         void parse2(MathAtom & at, unsigned flags, mode_type mode, bool numbered);
311         /// get arg delimited by 'left' and 'right'
312         string getArg(char left, char right);
313         ///
314         char getChar();
315         ///
316         void error(string const & msg);
317         /// dump contents to screen
318         void dump() const;
319         ///
320         void tokenize(istream & is);
321         ///
322         void tokenize(string const & s);
323         ///
324         void skipSpaceTokens(istream & is, char c);
325         ///
326         void push_back(Token const & t);
327         ///
328         void pop_back();
329         ///
330         Token const & prevToken() const;
331         ///
332         Token const & nextToken() const;
333         ///
334         Token const & getToken();
335         /// skips spaces if any
336         void skipSpaces();
337         ///
338         void lex(string const & s);
339         ///
340         bool good() const;
341         ///
342         string parse_verbatim_item();
343         ///
344         string parse_verbatim_option();
345
346         ///
347         int lineno_;
348         ///
349         vector<Token> tokens_;
350         ///
351         unsigned pos_;
352         /// Stack of active environments
353         vector<string> environments_;
354 };
355
356
357 Parser::Parser(LyXLex & lexer)
358         : lineno_(lexer.getLineNo()), pos_(0)
359 {
360         tokenize(lexer.getStream());
361         lexer.eatLine();
362 }
363
364
365 Parser::Parser(istream & is)
366         : lineno_(0), pos_(0)
367 {
368         tokenize(is);
369 }
370
371
372 void Parser::push_back(Token const & t)
373 {
374         tokens_.push_back(t);
375 }
376
377
378 void Parser::pop_back()
379 {
380         tokens_.pop_back();
381 }
382
383
384 Token const & Parser::prevToken() const
385 {
386         static const Token dummy;
387         return pos_ > 0 ? tokens_[pos_ - 1] : dummy;
388 }
389
390
391 Token const & Parser::nextToken() const
392 {
393         static const Token dummy;
394         return good() ? tokens_[pos_] : dummy;
395 }
396
397
398 Token const & Parser::getToken()
399 {
400         static const Token dummy;
401         //lyxerr << "looking at token " << tokens_[pos_] << " pos: " << pos_ << endl;
402         return good() ? tokens_[pos_++] : dummy;
403 }
404
405
406 void Parser::skipSpaces()
407 {
408         while (nextToken().cat() == catSpace || nextToken().cat() == catNewline)
409                 getToken();
410 }
411
412
413 void Parser::putback()
414 {
415         --pos_;
416 }
417
418
419 bool Parser::good() const
420 {
421         return pos_ < tokens_.size();
422 }
423
424
425 char Parser::getChar()
426 {
427         if (!good())
428                 error("The input stream is not well...");
429         return tokens_[pos_++].character();
430 }
431
432
433 string Parser::getArg(char left, char right)
434 {
435         skipSpaces();
436
437         string result;
438         char c = getChar();
439
440         if (c != left)
441                 putback();
442         else
443                 while ((c = getChar()) != right && good())
444                         result += c;
445
446         return result;
447 }
448
449
450 void Parser::skipSpaceTokens(istream & is, char c)
451 {
452         // skip trailing spaces
453         while (catcode(c) == catSpace || catcode(c) == catNewline)
454                 if (!is.get(c))
455                         break;
456         //lyxerr << "putting back: " << c << endl;
457         is.putback(c);
458 }
459
460
461 void Parser::tokenize(istream & is)
462 {
463         // eat everything up to the next \end_inset or end of stream
464         // and store it in s for further tokenization
465         string s;
466         char c;
467         while (is.get(c)) {
468                 s += c;
469                 if (s.size() >= 10 && s.substr(s.size() - 10) == "\\end_inset") {
470                         s = s.substr(0, s.size() - 10);
471                         break;
472                 }
473         }
474         // Remove the space after \end_inset
475         if (is.get(c) && c != ' ')
476                 is.unget();
477
478         // tokenize buffer
479         tokenize(s);
480 }
481
482
483 void Parser::tokenize(string const & buffer)
484 {
485         istringstream is(buffer, ios::in | ios::binary);
486
487         char c;
488         while (is.get(c)) {
489                 //lyxerr << "reading c: " << c << endl;
490
491                 switch (catcode(c)) {
492                         case catNewline: {
493                                 ++lineno_;
494                                 is.get(c);
495                                 if (catcode(c) == catNewline)
496                                         ; //push_back(Token("par"));
497                                 else {
498                                         push_back(Token('\n', catNewline));
499                                         is.putback(c);
500                                 }
501                                 break;
502                         }
503
504 /*
505                         case catComment: {
506                                 while (is.get(c) && catcode(c) != catNewline)
507                                         ;
508                                 ++lineno_;
509                                 break;
510                         }
511 */
512
513                         case catEscape: {
514                                 is.get(c);
515                                 if (!is) {
516                                         error("unexpected end of input");
517                                 } else {
518                                         string s(1, c);
519                                         if (catcode(c) == catLetter) {
520                                                 // collect letters
521                                                 while (is.get(c) && catcode(c) == catLetter)
522                                                         s += c;
523                                                 skipSpaceTokens(is, c);
524                                         }
525                                         push_back(Token(s));
526                                 }
527                                 break;
528                         }
529
530                         case catSuper:
531                         case catSub: {
532                                 push_back(Token(c, catcode(c)));
533                                 is.get(c);
534                                 skipSpaceTokens(is, c);
535                                 break;
536                         }
537
538                         case catIgnore: {
539                                 lyxerr << "ignoring a char: " << int(c) << endl;
540                                 break;
541                         }
542
543                         default:
544                                 push_back(Token(c, catcode(c)));
545                 }
546         }
547
548 #ifdef FILEDEBUG
549         dump();
550 #endif
551 }
552
553
554 void Parser::dump() const
555 {
556         lyxerr << "\nTokens: ";
557         for (unsigned i = 0; i < tokens_.size(); ++i) {
558                 if (i == pos_)
559                         lyxerr << " <#> ";
560                 lyxerr << tokens_[i];
561         }
562         lyxerr << " pos: " << pos_ << endl;
563 }
564
565
566 void Parser::error(string const & msg)
567 {
568         lyxerr << "Line ~" << lineno_ << ": Math parse error: " << msg << endl;
569         dump();
570         //exit(1);
571 }
572
573
574 bool Parser::parse(MathAtom & at)
575 {
576         skipSpaces();
577         MathArray ar;
578         parse(ar, false, MathInset::UNDECIDED_MODE);
579         if (ar.size() != 1 || ar.front()->getType() == "none") {
580                 lyxerr << "unusual contents found: " << ar << endl;
581                 at = MathAtom(new MathParInset(ar));
582                 //if (at->nargs() > 0)
583                 //      at.nucleus()->cell(0) = ar;
584                 //else
585                 //      lyxerr << "unusual contents found: " << ar << endl;
586                 return true;
587         }
588         at = ar[0];
589         return true;
590 }
591
592
593 string Parser::parse_verbatim_option()
594 {
595         skipSpaces();
596         string res;
597         if (nextToken().character() == '[') {
598                 Token t = getToken();
599                 for (Token t = getToken(); t.character() != ']' && good(); t = getToken()) {
600                         if (t.cat() == catBegin) {
601                                 putback();
602                                 res += '{' + parse_verbatim_item() + '}';
603                         } else
604                                 res += t.asString();
605                 }
606         }
607         return res;
608 }
609
610
611 string Parser::parse_verbatim_item()
612 {
613         skipSpaces();
614         string res;
615         if (nextToken().cat() == catBegin) {
616                 Token t = getToken();
617                 for (Token t = getToken(); t.cat() != catEnd && good(); t = getToken()) {
618                         if (t.cat() == catBegin) {
619                                 putback();
620                                 res += '{' + parse_verbatim_item() + '}';
621                         }
622                         else
623                                 res += t.asString();
624                 }
625         }
626         return res;
627 }
628
629
630 MathArray Parser::parse(unsigned flags, mode_type mode)
631 {
632         MathArray ar;
633         parse(ar, flags, mode);
634         return ar;
635 }
636
637
638 void Parser::parse(MathArray & array, unsigned flags, mode_type mode)
639 {
640         MathGridInset grid(1, 1);
641         parse1(grid, flags, mode, false);
642         array = grid.cell(0);
643 }
644
645
646 void Parser::parse2(MathAtom & at, const unsigned flags, const mode_type mode,
647         const bool numbered)
648 {
649         parse1(*(at.nucleus()->asGridInset()), flags, mode, numbered);
650 }
651
652
653 void Parser::parse1(MathGridInset & grid, unsigned flags,
654         const mode_type mode, const bool numbered)
655 {
656         int limits = 0;
657         MathGridInset::row_type cellrow = 0;
658         MathGridInset::col_type cellcol = 0;
659         MathArray * cell = &grid.cell(grid.index(cellrow, cellcol));
660
661         if (grid.asHullInset())
662                 grid.asHullInset()->numbered(cellrow, numbered);
663
664         //dump();
665         //lyxerr << " flags: " << flags << endl;
666         //lyxerr << " mode: " << mode  << endl;
667         //lyxerr << "grid: " << grid << endl;
668
669         while (good()) {
670                 Token const & t = getToken();
671
672 #ifdef FILEDEBUG
673                 lyxerr << "t: " << t << " flags: " << flags << endl;
674                 lyxerr << "mode: " << mode  << endl;
675                 cell->dump();
676                 lyxerr << endl;
677 #endif
678
679                 if (flags & FLAG_ITEM) {
680
681                         if (t.cat() == catBegin) {
682                                 // skip the brace and collect everything to the next matching
683                                 // closing brace
684                                 parse1(grid, FLAG_BRACE_LAST, mode, numbered);
685                                 return;
686                         }
687
688                         // handle only this single token, leave the loop if done
689                         flags = FLAG_LEAVE;
690                 }
691
692
693                 if (flags & FLAG_BRACED) {
694                         if (t.cat() == catSpace)
695                                 continue;
696
697                         if (t.cat() != catBegin) {
698                                 error("opening brace expected");
699                                 return;
700                         }
701
702                         // skip the brace and collect everything to the next matching
703                         // closing brace
704                         flags = FLAG_BRACE_LAST;
705                 }
706
707
708                 if (flags & FLAG_OPTION) {
709                         if (t.cat() == catOther && t.character() == '[') {
710                                 MathArray ar;
711                                 parse(ar, FLAG_BRACK_LAST, mode);
712                                 cell->append(ar);
713                         } else {
714                                 // no option found, put back token and we are done
715                                 putback();
716                         }
717                         return;
718                 }
719
720                 //
721                 // cat codes
722                 //
723                 if (t.cat() == catMath) {
724                         if (mode != MathInset::MATH_MODE) {
725                                 // we are inside some text mode thingy, so opening new math is allowed
726                                 Token const & n = getToken();
727                                 if (n.cat() == catMath) {
728                                         // TeX's $$...$$ syntax for displayed math
729                                         cell->push_back(MathAtom(new MathHullInset("equation")));
730                                         parse2(cell->back(), FLAG_SIMPLE, MathInset::MATH_MODE, false);
731                                         getToken(); // skip the second '$' token
732                                 } else {
733                                         // simple $...$  stuff
734                                         putback();
735                                         cell->push_back(MathAtom(new MathHullInset("simple")));
736                                         parse2(cell->back(), FLAG_SIMPLE, MathInset::MATH_MODE, false);
737                                 }
738                         }
739
740                         else if (flags & FLAG_SIMPLE) {
741                                 // this is the end of the formula
742                                 return;
743                         }
744
745                         else {
746                                 error("something strange in the parser");
747                                 break;
748                         }
749                 }
750
751                 else if (t.cat() == catLetter)
752                         cell->push_back(MathAtom(new MathCharInset(t.character())));
753
754                 else if (t.cat() == catSpace && mode != MathInset::MATH_MODE) {
755                         if (cell->empty() || cell->back()->getChar() != ' ')
756                                 cell->push_back(MathAtom(new MathCharInset(t.character())));
757                 }
758
759                 else if (t.cat() == catNewline && mode != MathInset::MATH_MODE) {
760                         if (cell->empty() || cell->back()->getChar() != ' ')
761                                 cell->push_back(MathAtom(new MathCharInset(' ')));
762                 }
763
764                 else if (t.cat() == catParameter) {
765                         Token const & n = getToken();
766                         cell->push_back(MathAtom(new MathMacroArgument(n.character()-'0')));
767                 }
768
769                 else if (t.cat() == catActive)
770                         cell->push_back(MathAtom(new MathCharInset(t.character())));
771
772                 else if (t.cat() == catBegin) {
773                         MathArray ar;
774                         parse(ar, FLAG_BRACE_LAST, mode);
775                         // do not create a BraceInset if they were written by LyX
776                         // this helps to keep the annoyance of  "a choose b"  to a minimum
777                         if (ar.size() == 1 && ar[0]->extraBraces())
778                                 cell->append(ar);
779                         else
780                                 cell->push_back(MathAtom(new MathBraceInset(ar)));
781                 }
782
783                 else if (t.cat() == catEnd) {
784                         if (flags & FLAG_BRACE_LAST)
785                                 return;
786                         error("found '}' unexpectedly");
787                         //BOOST_ASSERT(false);
788                         //add(cell, '}', LM_TC_TEX);
789                 }
790
791                 else if (t.cat() == catAlign) {
792                         //lyxerr << " column now " << (cellcol + 1)
793                         //       << " max: " << grid.ncols() << endl;
794                         if (flags & FLAG_ALIGN)
795                                 return;
796                         if (addCol(grid, cellcol))
797                                 cell = &grid.cell(grid.index(cellrow, cellcol));
798                 }
799
800                 else if (t.cat() == catSuper || t.cat() == catSub) {
801                         bool up = (t.cat() == catSuper);
802                         // we need no new script inset if the last thing was a scriptinset,
803                         // which has that script already not the same script already
804                         if (!cell->size())
805                                 cell->push_back(MathAtom(new MathScriptInset(up)));
806                         else if (cell->back()->asScriptInset() &&
807                                         !cell->back()->asScriptInset()->has(up))
808                                 cell->back().nucleus()->asScriptInset()->ensure(up);
809                         else if (cell->back()->asScriptInset())
810                                 cell->push_back(MathAtom(new MathScriptInset(up)));
811                         else
812                                 cell->back() = MathAtom(new MathScriptInset(cell->back(), up));
813                         MathScriptInset * p = cell->back().nucleus()->asScriptInset();
814                         // special handling of {}-bases
815                         // is this always correct?
816                         // It appears that this is wrong (Dekel)
817                         //if (p->nuc().size() == 1 && p->nuc().back()->asNestInset() &&
818                         //    p->nuc().back()->extraBraces())
819                         //      p->nuc() = p->nuc().back()->asNestInset()->cell(0);
820                         parse(p->cell(p->idxOfScript(up)), FLAG_ITEM, mode);
821                         if (limits) {
822                                 p->limits(limits);
823                                 limits = 0;
824                         }
825                 }
826
827                 else if (t.character() == ']' && (flags & FLAG_BRACK_LAST)) {
828                         //lyxerr << "finished reading option" << endl;
829                         return;
830                 }
831
832                 else if (t.cat() == catOther)
833                         cell->push_back(MathAtom(new MathCharInset(t.character())));
834
835                 else if (t.cat() == catComment) {
836                         string s;
837                         while (good()) {
838                                 Token const & t = getToken();
839                                 if (t.cat() == catNewline)
840                                         break;
841                                 s += t.asString();
842                         }
843                         cell->push_back(MathAtom(new MathCommentInset(s)));
844                         skipSpaces();
845                 }
846
847                 //
848                 // control sequences
849                 //
850
851                 else if (t.cs() == "lyxlock") {
852                         if (cell->size())
853                                 cell->back().nucleus()->lock(true);
854                 }
855
856                 else if (t.cs() == "def" ||
857                         t.cs() == "newcommand" ||
858                         t.cs() == "renewcommand")
859                 {
860                         string const type = t.cs();
861                         string name;
862                         int nargs = 0;
863                         if (t.cs() == "def") {
864                                 // get name
865                                 name = getToken().cs();
866
867                                 // read parameter
868                                 string pars;
869                                 while (good() && nextToken().cat() != catBegin) {
870                                         pars += getToken().cs();
871                                         ++nargs;
872                                 }
873                                 nargs /= 2;
874                                 //lyxerr << "read \\def parameter list '" << pars << "'" << endl;
875
876                         } else { // t.cs() == "newcommand" || t.cs() == "renewcommand"
877
878                                 if (getToken().cat() != catBegin) {
879                                         error("'{' in \\newcommand expected (1) ");
880                                         return;
881                                 }
882
883                                 name = getToken().cs();
884
885                                 if (getToken().cat() != catEnd) {
886                                         error("'}' in \\newcommand expected");
887                                         return;
888                                 }
889
890                                 string const arg  = getArg('[', ']');
891                                 if (!arg.empty())
892                                         nargs = convert<int>(arg);
893
894                         }
895
896                         MathArray ar1;
897                         parse(ar1, FLAG_ITEM, MathInset::UNDECIDED_MODE);
898
899                         // we cannot handle recursive stuff at all
900                         //MathArray test;
901                         //test.push_back(createMathInset(name));
902                         //if (ar1.contains(test)) {
903                         //      error("we cannot handle recursive macros at all.");
904                         //      return;
905                         //}
906
907                         // is a version for display attached?
908                         skipSpaces();
909                         MathArray ar2;
910                         if (nextToken().cat() == catBegin)
911                                 parse(ar2, FLAG_ITEM, MathInset::MATH_MODE);
912
913                         cell->push_back(MathAtom(new MathMacroTemplate(name, nargs, type,
914                                 ar1, ar2)));
915                 }
916
917                 else if (t.cs() == "(") {
918                         cell->push_back(MathAtom(new MathHullInset("simple")));
919                         parse2(cell->back(), FLAG_SIMPLE2, MathInset::MATH_MODE, false);
920                 }
921
922                 else if (t.cs() == "[") {
923                         cell->push_back(MathAtom(new MathHullInset("equation")));
924                         parse2(cell->back(), FLAG_EQUATION, MathInset::MATH_MODE, false);
925                 }
926
927                 else if (t.cs() == "protect")
928                         // ignore \\protect, will hopefully be re-added during output
929                         ;
930
931                 else if (t.cs() == "end") {
932                         if (flags & FLAG_END) {
933                                 // eat environment name
934                                 string const name = getArg('{', '}');
935                                 if (environments_.empty())
936                                         error("'found \\end{" + name +
937                                               "}' without matching '\\begin{" +
938                                               name + "}'");
939                                 else if (name != environments_.back())
940                                         error("'\\end{" + name +
941                                               "}' does not match '\\begin{" +
942                                               environments_.back() + "}'");
943                                 else {
944                                         environments_.pop_back();
945                                         // Delete empty last row in matrix
946                                         // like insets.
947                                         // If you abuse MathGridInset for
948                                         // non-matrix like structures you
949                                         // probably need to refine this test.
950                                         // Right now we only have to test for
951                                         // single line hull insets.
952                                         if (grid.nrows() > 1)
953                                                 delEmptyLastRow(grid);
954                                         return;
955                                 }
956                         } else
957                                 error("found 'end' unexpectedly");
958                 }
959
960                 else if (t.cs() == ")") {
961                         if (flags & FLAG_SIMPLE2)
962                                 return;
963                         error("found '\\)' unexpectedly");
964                 }
965
966                 else if (t.cs() == "]") {
967                         if (flags & FLAG_EQUATION)
968                                 return;
969                         error("found '\\]' unexpectedly");
970                 }
971
972                 else if (t.cs() == "\\") {
973                         if (flags & FLAG_ALIGN)
974                                 return;
975                         if (addRow(grid, cellrow, getArg('[', ']'))) {
976                                 cellcol = 0;
977                                 if (grid.asHullInset())
978                                         grid.asHullInset()->numbered(
979                                                         cellrow, numbered);
980                                 cell = &grid.cell(grid.index(cellrow,
981                                                              cellcol));
982                         }
983                 }
984
985 #if 0
986                 else if (t.cs() == "multicolumn") {
987                         // extract column count and insert dummy cells
988                         MathArray count;
989                         parse(count, FLAG_ITEM, mode);
990                         int cols = 1;
991                         if (!extractNumber(count, cols)) {
992                                 lyxerr << " can't extract number of cells from " << count << endl;
993                         }
994                         // resize the table if necessary
995                         for (int i = 0; i < cols; ++i) {
996                                 if (addCol(grid, cellcol)) {
997                                         cell = &grid.cell(grid.index(
998                                                         cellrow, cellcol));
999                                         // mark this as dummy
1000                                         grid.cellinfo(grid.index(
1001                                                 cellrow, cellcol)).dummy_ = true;
1002                                 }
1003                         }
1004                         // the last cell is the real thing, not a dummy
1005                         grid.cellinfo(grid.index(cellrow, cellcol)).dummy_ = false;
1006
1007                         // read special alignment
1008                         MathArray align;
1009                         parse(align, FLAG_ITEM, mode);
1010                         //grid.cellinfo(grid.index(cellrow, cellcol)).align_ = extractString(align);
1011
1012                         // parse the remaining contents into the "real" cell
1013                         parse(*cell, FLAG_ITEM, mode);
1014                 }
1015 #endif
1016
1017                 else if (t.cs() == "limits")
1018                         limits = 1;
1019
1020                 else if (t.cs() == "nolimits")
1021                         limits = -1;
1022
1023                 else if (t.cs() == "nonumber") {
1024                         if (grid.asHullInset())
1025                                 grid.asHullInset()->numbered(cellrow, false);
1026                 }
1027
1028                 else if (t.cs() == "number") {
1029                         if (grid.asHullInset())
1030                                 grid.asHullInset()->numbered(cellrow, true);
1031                 }
1032
1033                 else if (t.cs() == "hline") {
1034                         grid.rowinfo(cellrow).lines_ ++;
1035                 }
1036
1037                 else if (t.cs() == "sqrt") {
1038                         MathArray ar;
1039                         parse(ar, FLAG_OPTION, mode);
1040                         if (ar.size()) {
1041                                 cell->push_back(MathAtom(new MathRootInset));
1042                                 cell->back().nucleus()->cell(0) = ar;
1043                                 parse(cell->back().nucleus()->cell(1), FLAG_ITEM, mode);
1044                         } else {
1045                                 cell->push_back(MathAtom(new MathSqrtInset));
1046                                 parse(cell->back().nucleus()->cell(0), FLAG_ITEM, mode);
1047                         }
1048                 }
1049
1050                 else if (t.cs() == "xrightarrow" || t.cs() == "xleftarrow") {
1051                         cell->push_back(createMathInset(t.cs()));
1052                         parse(cell->back().nucleus()->cell(1), FLAG_OPTION, mode);
1053                         parse(cell->back().nucleus()->cell(0), FLAG_ITEM, mode);
1054                 }
1055
1056                 else if (t.cs() == "ref" || t.cs() == "prettyref" ||
1057                                 t.cs() == "pageref" || t.cs() == "vpageref" || t.cs() == "vref") {
1058                         cell->push_back(MathAtom(new RefInset(t.cs())));
1059                         parse(cell->back().nucleus()->cell(1), FLAG_OPTION, mode);
1060                         parse(cell->back().nucleus()->cell(0), FLAG_ITEM, mode);
1061                 }
1062
1063                 else if (t.cs() == "left") {
1064                         skipSpaces();
1065                         Token const & tl = getToken();
1066                         // \| and \Vert are equivalent, and MathDelimInset
1067                         // can't handle \|
1068                         // FIXME: fix this in MathDelimInset itself!
1069                         string const l = tl.cs() == "|" ? "Vert" : tl.asString();
1070                         MathArray ar;
1071                         parse(ar, FLAG_RIGHT, mode);
1072                         skipSpaces();
1073                         Token const & tr = getToken();
1074                         string const r = tr.cs() == "|" ? "Vert" : tr.asString();
1075                         cell->push_back(MathAtom(new MathDelimInset(l, r, ar)));
1076                 }
1077
1078                 else if (t.cs() == "right") {
1079                         if (flags & FLAG_RIGHT)
1080                                 return;
1081                         //lyxerr << "got so far: '" << cell << "'" << endl;
1082                         error("Unmatched right delimiter");
1083                         return;
1084                 }
1085
1086                 else if (t.cs() == "begin") {
1087                         string const name = getArg('{', '}');
1088                         environments_.push_back(name);
1089
1090                         if (name == "array" || name == "subarray") {
1091                                 string const valign = parse_verbatim_option() + 'c';
1092                                 string const halign = parse_verbatim_item();
1093                                 cell->push_back(MathAtom(new MathArrayInset(name, valign[0], halign)));
1094                                 parse2(cell->back(), FLAG_END, mode, false);
1095                         }
1096
1097                         else if (name == "tabular") {
1098                                 string const valign = parse_verbatim_option() + 'c';
1099                                 string const halign = parse_verbatim_item();
1100                                 cell->push_back(MathAtom(new MathTabularInset(name, valign[0], halign)));
1101                                 parse2(cell->back(), FLAG_END, MathInset::TEXT_MODE, false);
1102                         }
1103
1104                         else if (name == "split" || name == "cases" ||
1105                                  name == "gathered" || name == "aligned") {
1106                                 cell->push_back(createMathInset(name));
1107                                 parse2(cell->back(), FLAG_END, mode, false);
1108                         }
1109
1110                         else if (name == "alignedat") {
1111                                 // ignore this for a while
1112                                 getArg('{', '}');
1113                                 cell->push_back(createMathInset(name));
1114                                 parse2(cell->back(), FLAG_END, mode, false);
1115                         }
1116
1117                         else if (name == "math") {
1118                                 cell->push_back(MathAtom(new MathHullInset("simple")));
1119                                 parse2(cell->back(), FLAG_END, MathInset::MATH_MODE, true);
1120                         }
1121
1122                         else if (name == "equation" || name == "equation*"
1123                                         || name == "displaymath") {
1124                                 cell->push_back(MathAtom(new MathHullInset("equation")));
1125                                 parse2(cell->back(), FLAG_END, MathInset::MATH_MODE, (name == "equation"));
1126                         }
1127
1128                         else if (name == "eqnarray" || name == "eqnarray*") {
1129                                 cell->push_back(MathAtom(new MathHullInset("eqnarray")));
1130                                 parse2(cell->back(), FLAG_END, MathInset::MATH_MODE, !stared(name));
1131                         }
1132
1133                         else if (name == "align" || name == "align*") {
1134                                 cell->push_back(MathAtom(new MathHullInset("align")));
1135                                 parse2(cell->back(), FLAG_END, MathInset::MATH_MODE, !stared(name));
1136                         }
1137
1138                         else if (name == "flalign" || name == "flalign*") {
1139                                 cell->push_back(MathAtom(new MathHullInset("flalign")));
1140                                 parse2(cell->back(), FLAG_END, MathInset::MATH_MODE, !stared(name));
1141                         }
1142
1143                         else if (name == "alignat" || name == "alignat*") {
1144                                 // ignore this for a while
1145                                 getArg('{', '}');
1146                                 cell->push_back(MathAtom(new MathHullInset("alignat")));
1147                                 parse2(cell->back(), FLAG_END, MathInset::MATH_MODE, !stared(name));
1148                         }
1149
1150                         else if (name == "xalignat" || name == "xalignat*") {
1151                                 // ignore this for a while
1152                                 getArg('{', '}');
1153                                 cell->push_back(MathAtom(new MathHullInset("xalignat")));
1154                                 parse2(cell->back(), FLAG_END, MathInset::MATH_MODE, !stared(name));
1155                         }
1156
1157                         else if (name == "xxalignat") {
1158                                 // ignore this for a while
1159                                 getArg('{', '}');
1160                                 cell->push_back(MathAtom(new MathHullInset("xxalignat")));
1161                                 parse2(cell->back(), FLAG_END, MathInset::MATH_MODE, !stared(name));
1162                         }
1163
1164                         else if (name == "multline" || name == "multline*") {
1165                                 cell->push_back(MathAtom(new MathHullInset("multline")));
1166                                 parse2(cell->back(), FLAG_END, MathInset::MATH_MODE, !stared(name));
1167                         }
1168
1169                         else if (name == "gather" || name == "gather*") {
1170                                 cell->push_back(MathAtom(new MathHullInset("gather")));
1171                                 parse2(cell->back(), FLAG_END, MathInset::MATH_MODE, !stared(name));
1172                         }
1173
1174                         else if (latexkeys const * l = in_word_set(name)) {
1175                                 if (l->inset == "matrix") {
1176                                         cell->push_back(createMathInset(name));
1177                                         parse2(cell->back(), FLAG_END, mode, false);
1178                                 }
1179                         }
1180
1181                         else {
1182                                 dump();
1183                                 lyxerr << "found unknown math environment '" << name << "'" << endl;
1184                                 // create generic environment inset
1185                                 cell->push_back(MathAtom(new MathEnvInset(name)));
1186                                 parse(cell->back().nucleus()->cell(0), FLAG_ITEM, mode);
1187                         }
1188                 }
1189
1190                 else if (t.cs() == "kern") {
1191 #ifdef WITH_WARNINGS
1192 #warning A hack...
1193 #endif
1194                         string s;
1195                         while (true) {
1196                                 Token const & t = getToken();
1197                                 if (!good()) {
1198                                         putback();
1199                                         break;
1200                                 }
1201                                 s += t.character();
1202                                 if (isValidLength(s))
1203                                         break;
1204                         }
1205                         cell->push_back(MathAtom(new MathKernInset(s)));
1206                 }
1207
1208                 else if (t.cs() == "label") {
1209                         // FIXME: This is swallowed in inline formulas
1210                         string label = parse_verbatim_item();
1211                         MathArray ar;
1212                         asArray(label, ar);
1213                         if (grid.asHullInset()) {
1214                                 grid.asHullInset()->label(cellrow, label);
1215                         } else {
1216                                 cell->push_back(createMathInset(t.cs()));
1217                                 cell->push_back(MathAtom(new MathBraceInset(ar)));
1218                         }
1219                 }
1220
1221                 else if (t.cs() == "choose" || t.cs() == "over" || t.cs() == "atop") {
1222                         MathAtom at = createMathInset(t.cs());
1223                         at.nucleus()->cell(0) = *cell;
1224                         cell->clear();
1225                         parse(at.nucleus()->cell(1), flags, mode);
1226                         cell->push_back(at);
1227                         return;
1228                 }
1229
1230                 else if (t.cs() == "color") {
1231                         string const color = parse_verbatim_item();
1232                         cell->push_back(MathAtom(new MathColorInset(true, color)));
1233                         parse(cell->back().nucleus()->cell(0), flags, mode);
1234                         return;
1235                 }
1236
1237                 else if (t.cs() == "textcolor") {
1238                         string const color = parse_verbatim_item();
1239                         cell->push_back(MathAtom(new MathColorInset(false, color)));
1240                         parse(cell->back().nucleus()->cell(0), FLAG_ITEM, MathInset::TEXT_MODE);
1241                 }
1242
1243                 else if (t.cs() == "normalcolor") {
1244                         cell->push_back(createMathInset(t.cs()));
1245                         parse(cell->back().nucleus()->cell(0), flags, mode);
1246                         return;
1247                 }
1248
1249                 else if (t.cs() == "substack") {
1250                         cell->push_back(createMathInset(t.cs()));
1251                         parse2(cell->back(), FLAG_ITEM, mode, false);
1252                 }
1253
1254                 else if (t.cs() == "xymatrix") {
1255                         cell->push_back(createMathInset(t.cs()));
1256                         parse2(cell->back(), FLAG_ITEM, mode, false);
1257                 }
1258
1259                 else if (t.cs() == "framebox" || t.cs() == "makebox") {
1260                         cell->push_back(createMathInset(t.cs()));
1261                         parse(cell->back().nucleus()->cell(0), FLAG_OPTION, MathInset::TEXT_MODE);
1262                         parse(cell->back().nucleus()->cell(1), FLAG_OPTION, MathInset::TEXT_MODE);
1263                         parse(cell->back().nucleus()->cell(2), FLAG_ITEM, MathInset::TEXT_MODE);
1264                 }
1265
1266                 else if (t.cs() == "tag") {
1267                         if (nextToken().character() == '*') {
1268                                 getToken();
1269                                 cell->push_back(createMathInset(t.cs() + '*'));
1270                         } else
1271                                 cell->push_back(createMathInset(t.cs()));
1272                         parse(cell->back().nucleus()->cell(0), FLAG_ITEM, MathInset::TEXT_MODE);
1273                 }
1274
1275 #if 0
1276                 else if (t.cs() == "infer") {
1277                         MathArray ar;
1278                         parse(ar, FLAG_OPTION, mode);
1279                         cell->push_back(createMathInset(t.cs()));
1280                         parse2(cell->back(), FLAG_ITEM, mode, false);
1281                 }
1282
1283                 // Disabled
1284                 else if (1 && t.cs() == "ar") {
1285                         auto_ptr<MathXYArrowInset> p(new MathXYArrowInset);
1286                         // try to read target
1287                         parse(p->cell(0), FLAG_OTPTION, mode);
1288                         // try to read label
1289                         if (nextToken().cat() == catSuper || nextToken().cat() == catSub) {
1290                                 p->up_ = nextToken().cat() == catSuper;
1291                                 getToken();
1292                                 parse(p->cell(1), FLAG_ITEM, mode);
1293                                 //lyxerr << "read label: " << p->cell(1) << endl;
1294                         }
1295
1296                         cell->push_back(MathAtom(p.release()));
1297                         //lyxerr << "read cell: " << cell << endl;
1298                 }
1299 #endif
1300
1301                 else if (t.cs().size()) {
1302                         latexkeys const * l = in_word_set(t.cs());
1303                         if (l) {
1304                                 if (l->inset == "big") {
1305                                         skipSpaces();
1306                                         string const delim = getToken().asInput();
1307                                         if (MathBigInset::isBigInsetDelim(delim))
1308                                                 cell->push_back(MathAtom(
1309                                                         new MathBigInset(t.cs(), delim)));
1310                                         else {
1311                                                 cell->push_back(createMathInset(t.cs()));
1312                                                 cell->push_back(createMathInset(
1313                                                                 delim.substr(1)));
1314                                         }
1315                                 }
1316
1317                                 else if (l->inset == "font") {
1318                                         cell->push_back(createMathInset(t.cs()));
1319                                         parse(cell->back().nucleus()->cell(0),
1320                                                 FLAG_ITEM, asMode(mode, l->extra));
1321                                 }
1322
1323                                 else if (l->inset == "oldfont") {
1324                                         cell->push_back(createMathInset(t.cs()));
1325                                         parse(cell->back().nucleus()->cell(0),
1326                                                 flags | FLAG_ALIGN, asMode(mode, l->extra));
1327                                         if (prevToken().cat() != catAlign &&
1328                                             prevToken().cs() != "\\")
1329                                                 return;
1330                                         putback();
1331                                 }
1332
1333                                 else if (l->inset == "style") {
1334                                         cell->push_back(createMathInset(t.cs()));
1335                                         parse(cell->back().nucleus()->cell(0),
1336                                                 flags | FLAG_ALIGN, mode);
1337                                         if (prevToken().cat() != catAlign &&
1338                                             prevToken().cs() != "\\")
1339                                                 return;
1340                                         putback();
1341                                 }
1342
1343                                 else {
1344                                         MathAtom at = createMathInset(t.cs());
1345                                         for (MathInset::idx_type i = 0; i < at->nargs(); ++i)
1346                                                 parse(at.nucleus()->cell(i),
1347                                                         FLAG_ITEM, asMode(mode, l->extra));
1348                                         cell->push_back(at);
1349                                 }
1350                         }
1351
1352                         else {
1353                                 MathAtom at = createMathInset(t.cs());
1354                                 MathInset::mode_type m = mode;
1355                                 //if (m == MathInset::UNDECIDED_MODE)
1356                                 //lyxerr << "default creation: m1: " << m << endl;
1357                                 if (at->currentMode() != MathInset::UNDECIDED_MODE)
1358                                         m = at->currentMode();
1359                                 //lyxerr << "default creation: m2: " << m << endl;
1360                                 MathInset::idx_type start = 0;
1361                                 // this fails on \bigg[...\bigg]
1362                                 //MathArray opt;
1363                                 //parse(opt, FLAG_OPTION, MathInset::VERBATIM_MODE);
1364                                 //if (opt.size()) {
1365                                 //      start = 1;
1366                                 //      at.nucleus()->cell(0) = opt;
1367                                 //}
1368                                 for (MathInset::idx_type i = start; i < at->nargs(); ++i) {
1369                                         parse(at.nucleus()->cell(i), FLAG_ITEM, m);
1370                                         skipSpaces();
1371                                 }
1372                                 cell->push_back(at);
1373                         }
1374                 }
1375
1376
1377                 if (flags & FLAG_LEAVE) {
1378                         flags &= ~FLAG_LEAVE;
1379                         break;
1380                 }
1381         }
1382 }
1383
1384
1385
1386 } // anonymous namespace
1387
1388
1389 void mathed_parse_cell(MathArray & ar, string const & str)
1390 {
1391         istringstream is(str);
1392         mathed_parse_cell(ar, is);
1393 }
1394
1395
1396 void mathed_parse_cell(MathArray & ar, istream & is)
1397 {
1398         Parser(is).parse(ar, 0, MathInset::MATH_MODE);
1399 }
1400
1401
1402 bool mathed_parse_normal(MathAtom & t, string const & str)
1403 {
1404         istringstream is(str);
1405         return Parser(is).parse(t);
1406 }
1407
1408
1409 bool mathed_parse_normal(MathAtom & t, istream & is)
1410 {
1411         return Parser(is).parse(t);
1412 }
1413
1414
1415 bool mathed_parse_normal(MathAtom & t, LyXLex & lex)
1416 {
1417         return Parser(lex).parse(t);
1418 }
1419
1420
1421 void mathed_parse_normal(MathGridInset & grid, string const & str)
1422 {
1423         istringstream is(str);
1424         Parser(is).parse1(grid, 0, MathInset::MATH_MODE, false);
1425 }
1426
1427
1428 void initParser()
1429 {
1430         fill(theCatcode, theCatcode + 256, catOther);
1431         fill(theCatcode + 'a', theCatcode + 'z' + 1, catLetter);
1432         fill(theCatcode + 'A', theCatcode + 'Z' + 1, catLetter);
1433
1434         theCatcode[int('\\')] = catEscape;
1435         theCatcode[int('{')]  = catBegin;
1436         theCatcode[int('}')]  = catEnd;
1437         theCatcode[int('$')]  = catMath;
1438         theCatcode[int('&')]  = catAlign;
1439         theCatcode[int('\n')] = catNewline;
1440         theCatcode[int('#')]  = catParameter;
1441         theCatcode[int('^')]  = catSuper;
1442         theCatcode[int('_')]  = catSub;
1443         theCatcode[int(0x7f)] = catIgnore;
1444         theCatcode[int(' ')]  = catSpace;
1445         theCatcode[int('\t')] = catSpace;
1446         theCatcode[int('\r')] = catNewline;
1447         theCatcode[int('~')]  = catActive;
1448         theCatcode[int('%')]  = catComment;
1449 }