2 \author André Pönitz (2001)
7 If someone desperately needs partial "structures" (such as a few cells of
8 an array inset or similar) (s)he could uses the following hack as starting
9 point to write some macros:
14 \def\makeamptab{\catcode`\&=4\relax}
15 \def\makeampletter{\catcode`\&=11\relax}
16 \def\b{\makeampletter\expandafter\makeamptab\bi}
36 #pragma implementation
39 #include "math_parser.h"
40 #include "math_inset.h"
41 #include "math_arrayinset.h"
42 #include "math_braceinset.h"
43 #include "math_boxinset.h"
44 #include "math_charinset.h"
45 #include "math_deliminset.h"
46 #include "math_factory.h"
47 #include "math_kerninset.h"
48 #include "math_macro.h"
49 #include "math_macrotable.h"
50 #include "math_macrotemplate.h"
51 #include "math_hullinset.h"
52 #include "math_rootinset.h"
53 #include "math_sizeinset.h"
54 #include "math_sqrtinset.h"
55 #include "math_scriptinset.h"
56 #include "math_specialcharinset.h"
57 #include "math_sqrtinset.h"
58 #include "math_support.h"
59 #include "math_xyarrowinset.h"
63 #include "support/LAssert.h"
64 #include "support/lstrings.h"
84 bool stared(string const & s)
86 string::size_type const n = s.size();
87 return n && s[n - 1] == '*';
91 void add(MathArray & ar, char c)
93 ar.push_back(MathAtom(new MathCharInset(c)));
97 // These are TeX's catcodes
99 catEscape, // 0 backslash
109 catSpace, // 10 space
110 catLetter, // 11 a-zA-Z
111 catOther, // 12 none of the above
114 catInvalid // 15 <delete>
117 CatCode theCatcode[256];
120 inline CatCode catcode(unsigned char c)
122 return theCatcode[c];
127 FLAG_BRACE_LAST = 1 << 1, // last closing brace ends the parsing process
128 FLAG_RIGHT = 1 << 2, // next \\right ends the parsing process
129 FLAG_END = 1 << 3, // next \\end ends the parsing process
130 FLAG_BRACK_END = 1 << 4, // next closing bracket ends the parsing process
131 FLAG_TEXTMODE = 1 << 5, // we are in a box
132 FLAG_ITEM = 1 << 6, // read a (possibly braced token)
133 FLAG_BLOCK = 1 << 7, // next block ends the parsing process
134 FLAG_BLOCK2 = 1 << 8, // next block2 ends the parsing process
135 FLAG_LEAVE = 1 << 9, // leave the loop at the end
136 FLAG_SIMPLE = 1 << 10, // next $ leaves the loop
137 FLAG_EQUATION = 1 << 11, // next \] leaves the loop
138 FLAG_SIMPLE2 = 1 << 12 // next \) leaves the loop
144 fill(theCatcode, theCatcode + 256, catOther);
145 fill(theCatcode + 'a', theCatcode + 'z' + 1, catLetter);
146 fill(theCatcode + 'A', theCatcode + 'Z' + 1, catLetter);
148 theCatcode['\\'] = catEscape;
149 theCatcode['{'] = catBegin;
150 theCatcode['}'] = catEnd;
151 theCatcode['$'] = catMath;
152 theCatcode['&'] = catAlign;
153 theCatcode['\n'] = catNewline;
154 theCatcode['#'] = catParameter;
155 theCatcode['^'] = catSuper;
156 theCatcode['_'] = catSub;
157 theCatcode['
\7f'] = catIgnore;
158 theCatcode[' '] = catSpace;
159 theCatcode['\t'] = catSpace;
160 theCatcode['\r'] = catSpace;
161 theCatcode['~'] = catActive;
162 theCatcode['%'] = catComment;
168 // Helper class for parsing
174 Token() : cs_(), char_(0), cat_(catIgnore) {}
176 Token(char c, CatCode cat) : cs_(), char_(c), cat_(cat) {}
178 Token(string const & cs) : cs_(cs), char_(0), cat_(catIgnore) {}
181 string const & cs() const { return cs_; }
183 CatCode cat() const { return cat_; }
185 char character() const { return char_; }
187 string asString() const;
200 bool Token::isCR() const
202 return cs_ == "\\" || cs_ == "cr" || cs_ == "crcr";
205 string Token::asString() const
207 return cs_.size() ? cs_ : string(1, char_);
210 ostream & operator<<(ostream & os, Token const & t)
213 os << "\\" << t.cs();
215 os << "[" << t.character() << "," << t.cat() << "]";
224 Parser(LyXLex & lex);
226 Parser(istream & is);
229 bool parse_macro(string & name);
231 bool parse_normal(MathAtom &);
233 void parse_into(MathArray & array, unsigned flags);
235 int lineno() const { return lineno_; }
241 void parse_into1(MathGridInset & grid, unsigned flags, bool numbered);
243 void parse_into2(MathAtom & at, unsigned flags, bool numbered);
244 /// get arg delimited by 'left' and 'right'
245 string getArg(char left, char right);
249 void error(string const & msg);
250 /// dump contents to screen
255 void tokenize(istream & is);
257 void tokenize(string const & s);
259 void skipSpaceTokens(istream & is, char c);
261 void push_back(Token const & t);
265 Token const & prevToken() const;
267 Token const & nextToken() const;
269 Token const & getToken();
270 /// skips spaces if any
273 void lex(string const & s);
280 vector<Token> tokens_;
286 Parser::Parser(LyXLex & lexer)
287 : lineno_(lexer.getLineNo()), pos_(0)
289 tokenize(lexer.getStream());
294 Parser::Parser(istream & is)
295 : lineno_(0), pos_(0)
301 void Parser::push_back(Token const & t)
303 tokens_.push_back(t);
307 void Parser::pop_back()
313 Token const & Parser::prevToken() const
315 static const Token dummy;
316 return pos_ > 0 ? tokens_[pos_ - 1] : dummy;
320 Token const & Parser::nextToken() const
322 static const Token dummy;
323 return good() ? tokens_[pos_] : dummy;
327 Token const & Parser::getToken()
329 static const Token dummy;
330 //lyxerr << "looking at token " << tokens_[pos_] << " pos: " << pos_ << '\n';
331 return good() ? tokens_[pos_++] : dummy;
335 void Parser::skipSpaces()
337 while (nextToken().cat() == catSpace)
342 void Parser::putback()
348 bool Parser::good() const
350 return pos_ < tokens_.size();
354 char Parser::getChar()
357 lyxerr << "The input stream is not well..." << endl;
360 return tokens_[pos_++].character();
364 string Parser::getArg(char left, char right)
374 while ((c = getChar()) != right && good())
381 void Parser::tokenize(istream & is)
383 // eat everything up to the next \end_inset or end of stream
384 // and store it in s for further tokenization
389 if (s.size() >= 10 && s.substr(s.size() - 10) == "\\end_inset") {
390 s = s.substr(0, s.size() - 10);
400 void Parser::skipSpaceTokens(istream & is, char c)
402 // skip trailing spaces
403 while (catcode(c) == catSpace || catcode(c) == catNewline)
406 //lyxerr << "putting back: " << c << "\n";
411 void Parser::tokenize(string const & buffer)
413 static bool init_done = false;
420 istringstream is(buffer.c_str(), ios::in | ios::binary);
424 //lyxerr << "reading c: " << c << "\n";
426 switch (catcode(c)) {
430 if (catcode(c) == catNewline)
431 ; //push_back(Token("par"));
433 push_back(Token(' ', catSpace));
440 while (is.get(c) && catcode(c) != catNewline)
449 error("unexpected end of input");
452 if (catcode(c) == catLetter) {
454 while (is.get(c) && catcode(c) == catLetter)
456 skipSpaceTokens(is, c);
465 push_back(Token(c, catcode(c)));
467 skipSpaceTokens(is, c);
472 lyxerr << "ignoring a char: " << int(c) << "\n";
477 push_back(Token(c, catcode(c)));
487 void Parser::dump() const
489 lyxerr << "\nTokens: ";
490 for (unsigned i = 0; i < tokens_.size(); ++i) {
493 lyxerr << tokens_[i];
499 void Parser::error(string const & msg)
501 lyxerr << "Line ~" << lineno_ << ": Math parse error: " << msg << endl;
507 bool Parser::parse_macro(string & name)
513 if (nextToken().cs() == "def") {
516 name = getToken().cs();
519 while (good() && nextToken().cat() != catBegin)
520 pars += getToken().cs();
523 lyxerr << "bad stream in parse_macro\n";
528 //lyxerr << "read \\def parameter list '" << pars << "'\n";
530 lyxerr << "can't handle non-empty parameter lists\n";
535 } else if (nextToken().cs() == "newcommand") {
539 if (getToken().cat() != catBegin) {
540 lyxerr << "'{' in \\newcommand expected (1) \n";
545 name = getToken().cs();
547 if (getToken().cat() != catEnd) {
548 lyxerr << "'}' expected\n";
552 string arg = getArg('[', ']');
554 nargs = atoi(arg.c_str());
557 lyxerr << "\\newcommand or \\def expected\n";
562 if (getToken().cat() != catBegin) {
563 lyxerr << "'{' in macro definition expected (2)\n";
568 parse_into(ar1, FLAG_BRACE_LAST);
570 // we cannot handle recursive stuff at all
572 test.push_back(createMathInset(name));
573 if (ar1.contains(test)) {
574 lyxerr << "we cannot handle recursive macros at all.\n";
578 // is a version for display attached?
580 parse_into(ar2, FLAG_ITEM);
582 MathMacroTable::create(name, nargs, ar1, ar2);
587 bool Parser::parse_normal(MathAtom & matrix)
590 Token const & t = getToken();
593 matrix = MathAtom(new MathHullInset(LM_OT_SIMPLE));
594 parse_into2(matrix, FLAG_SIMPLE2, true);
598 if (t.cat() == catMath) {
599 Token const & n = getToken();
600 if (n.cat() == catMath) {
601 // TeX's $$...$$ syntax for displayed math
602 matrix = MathAtom(new MathHullInset(LM_OT_EQUATION));
603 parse_into2(matrix, FLAG_SIMPLE, false);
604 getToken(); // skip the second '$' token
606 // simple $...$ stuff
608 matrix = MathAtom(new MathHullInset(LM_OT_SIMPLE));
609 parse_into2(matrix, FLAG_SIMPLE, false);
614 if (!t.cs().size()) {
615 lyxerr << "start of math expected, got '" << t << "'\n";
619 string const & cs = t.cs();
622 matrix = MathAtom(new MathHullInset(LM_OT_EQUATION));
623 parse_into2(matrix, FLAG_EQUATION, true);
628 lyxerr[Debug::MATHED]
629 << "'begin' of un-simple math expected, got '" << cs << "'\n";
633 string const name = getArg('{', '}');
635 if (name == "math") {
636 matrix = MathAtom(new MathHullInset(LM_OT_SIMPLE));
637 parse_into2(matrix, FLAG_SIMPLE, true);
641 if (name == "equation" || name == "equation*" || name == "displaymath") {
642 matrix = MathAtom(new MathHullInset(LM_OT_EQUATION));
643 parse_into2(matrix, FLAG_END, (name == "equation"));
647 if (name == "eqnarray" || name == "eqnarray*") {
648 matrix = MathAtom(new MathHullInset(LM_OT_EQNARRAY));
649 parse_into2(matrix, FLAG_END, !stared(name));
653 if (name == "align" || name == "align*") {
654 matrix = MathAtom(new MathHullInset(LM_OT_ALIGN));
655 parse_into2(matrix, FLAG_END, !stared(name));
659 if (name == "alignat" || name == "alignat*") {
660 // ignore this for a while
662 matrix = MathAtom(new MathHullInset(LM_OT_ALIGNAT));
663 parse_into2(matrix, FLAG_END, !stared(name));
667 if (name == "xalignat" || name == "xalignat*") {
668 // ignore this for a while
670 matrix = MathAtom(new MathHullInset(LM_OT_XALIGNAT));
671 parse_into2(matrix, FLAG_END, !stared(name));
675 if (name == "xxalignat") {
676 // ignore this for a while
678 matrix = MathAtom(new MathHullInset(LM_OT_XXALIGNAT));
679 parse_into2(matrix, FLAG_END, !stared(name));
683 if (name == "multline" || name == "multline*") {
684 matrix = MathAtom(new MathHullInset(LM_OT_MULTLINE));
685 parse_into2(matrix, FLAG_END, !stared(name));
689 if (name == "gather" || name == "gather*") {
690 matrix = MathAtom(new MathHullInset(LM_OT_GATHER));
691 parse_into2(matrix, FLAG_END, !stared(name));
695 lyxerr[Debug::MATHED] << "1: unknown math environment: " << name << "\n";
696 lyxerr << "1: unknown math environment: " << name << "\n";
701 void Parser::parse_into(MathArray & array, unsigned flags)
703 MathGridInset grid(1, 1);
704 parse_into1(grid, flags, false);
705 array = grid.cell(0);
706 // remove 'unnecessary' braces:
707 if (array.size() == 1 && array.back()->asBraceInset()) {
708 lyxerr << "extra braces removed\n";
709 array = array.back()->asBraceInset()->cell(0);
714 void Parser::parse_into2(MathAtom & at, unsigned flags, bool numbered)
716 parse_into1(*(at->asGridInset()), flags, numbered);
720 void Parser::parse_into1(MathGridInset & grid, unsigned flags, bool numbered)
724 MathGridInset::row_type cellrow = 0;
725 MathGridInset::col_type cellcol = 0;
726 MathArray * cell = &grid.cell(grid.index(cellrow, cellcol));
728 if (grid.asHullInset())
729 grid.asHullInset()->numbered(cellrow, numbered);
732 //lyxerr << "grid: " << grid << endl;
735 Token const & t = getToken();
738 lyxerr << "t: " << t << " flags: " << flags << "\n";
743 if (flags & FLAG_ITEM) {
744 if (t.cat() == catSpace)
748 if (t.cat() == catBegin) {
749 // skip the brace and collect everything to the next matching
751 flags |= FLAG_BRACE_LAST;
755 // handle only this single token, leave the loop if done
759 if (flags & FLAG_BLOCK) {
760 if (t.cat() == catAlign || t.isCR() || t.cs() == "end") {
766 if (flags & FLAG_BLOCK2) {
767 if (t.cat() == catAlign || t.isCR() || t.cs() == "end"
768 || t.cat() == catEnd) {
777 if (t.cat() == catMath) {
778 if (flags & FLAG_TEXTMODE) {
779 // we are inside some text mode thingy, so opening new math is allowed
780 MathAtom at(new MathHullInset(LM_OT_SIMPLE));
781 parse_into2(at, FLAG_SIMPLE, false);
785 lyxerr << "something strange in the parser\n";
789 if (flags & FLAG_SIMPLE) {
790 // this is the end of the formula
795 else if (t.cat() == catLetter)
796 add(*cell, t.character());
798 else if (t.cat() == catSpace) //&& code == LM_TC_TEXTRM
799 add(*cell, t.character());
801 else if (t.cat() == catParameter) {
802 Token const & n = getToken();
803 cell->push_back(MathAtom(new MathMacroArgument(n.character()-'0')));
806 else if (t.cat() == catBegin) {
808 parse_into(ar, FLAG_BRACE_LAST);
809 #ifndef WITH_WARNINGS
810 #warning this might be wrong in general!
812 // ignore braces around simple items
813 if ((ar.size() == 1 && !ar.front()->needsBraces()
814 || (ar.size() == 2 && !ar.front()->needsBraces()
815 && ar.back()->asScriptInset()))
816 || (ar.size() == 0 && cell->size() == 0))
820 cell->push_back(MathAtom(new MathBraceInset));
821 cell->back()->cell(0).swap(ar);
825 else if (t.cat() == catEnd) {
826 if (flags & FLAG_BRACE_LAST)
828 lyxerr << "found '}' unexpectedly, cell: '" << cell << "'\n";
830 //lyxerr << "found '}' unexpectedly\n";
832 //add(cell, '}', LM_TC_TEX);
835 else if (t.cat() == catAlign) {
837 //lyxerr << " column now " << cellcol << " max: " << grid.ncols() << "\n";
838 if (cellcol == grid.ncols()) {
839 lyxerr << "adding column " << cellcol << "\n";
840 grid.addCol(cellcol - 1);
842 cell = &grid.cell(grid.index(cellrow, cellcol));
845 else if (t.cat() == catSuper || t.cat() == catSub) {
846 bool up = (t.cat() == catSuper);
847 MathScriptInset * p = 0;
849 p = cell->back()->asScriptInset();
850 if (!p || p->has(up)) {
851 cell->push_back(MathAtom(new MathScriptInset(up)));
852 p = cell->back()->asScriptInset();
855 parse_into(p->cell(up), FLAG_ITEM);
860 else if (t.character() == ')' && (flags & FLAG_SIMPLE2))
863 else if (t.character() == ']' && (flags & FLAG_BRACK_END))
866 else if (t.cat() == catOther)
867 add(*cell, t.character());
872 else if (t.cs() == "protect")
873 // ignore \\protect, will be re-added during output
876 else if (t.cs() == "end")
879 else if (t.cs() == ")")
882 else if (t.cs() == "]")
885 else if (t.cs() == "\\") {
886 grid.vcrskip(LyXLength(getArg('[', ']')), cellrow);
889 if (cellrow == grid.nrows())
890 grid.addRow(cellrow - 1);
891 if (grid.asHullInset())
892 grid.asHullInset()->numbered(cellrow, numbered);
893 cell = &grid.cell(grid.index(cellrow, cellcol));
896 else if (t.cs() == "limits")
899 else if (t.cs() == "nolimits")
902 else if (t.cs() == "nonumber") {
903 if (grid.asHullInset())
904 grid.asHullInset()->numbered(cellrow, false);
907 else if (t.cs() == "number") {
908 if (grid.asHullInset())
909 grid.asHullInset()->numbered(cellrow, true);
912 else if (t.cs() == "hline") {
913 if (grid.asHullInset())
914 grid.asHullInset()->rowinfo(cellrow + 1);
917 else if (t.cs() == "sqrt") {
920 cell->push_back(MathAtom(new MathRootInset));
921 parse_into(cell->back()->cell(0), FLAG_BRACK_END);
922 parse_into(cell->back()->cell(1), FLAG_ITEM);
925 cell->push_back(MathAtom(new MathSqrtInset));
926 parse_into(cell->back()->cell(0), FLAG_ITEM);
930 else if (t.cs() == "left") {
931 string l = getToken().asString();
933 parse_into(ar, FLAG_RIGHT);
934 string r = getToken().asString();
935 MathAtom dl(new MathDelimInset(l, r));
940 else if (t.cs() == "right") {
941 if (!(flags & FLAG_RIGHT)) {
942 //lyxerr << "got so far: '" << cell << "'\n";
943 error("Unmatched right delimiter");
948 else if (t.cs() == "begin") {
949 string const name = getArg('{', '}');
950 if (name == "array" || name == "subarray") {
951 string const valign = getArg('[', ']') + 'c';
952 string const halign = getArg('{', '}');
953 cell->push_back(MathAtom(new MathArrayInset(name, valign[0], halign)));
954 parse_into2(cell->back(), FLAG_END, false);
955 } else if (name == "split" || name == "cases" ||
956 name == "gathered" || name == "aligned") {
957 cell->push_back(createMathInset(name));
958 parse_into2(cell->back(), FLAG_END, false);
959 } else if (name == "matrix" || name == "pmatrix" || name == "bmatrix" ||
960 name == "vmatrix" || name == "Vmatrix") {
961 cell->push_back(createMathInset(name));
962 parse_into2(cell->back(), FLAG_END, false);
964 lyxerr << "unknow math inset begin '" << name << "'\n";
967 else if (t.cs() == "kern") {
973 Token const & t = getToken();
979 if (isValidLength(s))
982 cell->push_back(MathAtom(new MathKernInset(s)));
985 else if (t.cs() == "label") {
986 if (grid.asHullInset())
987 grid.asHullInset()->label(cellrow, getArg('{', '}'));
990 else if (t.cs() == "choose" || t.cs() == "over" || t.cs() == "atop") {
991 MathAtom p = createMathInset(t.cs());
992 cell->swap(p->cell(0));
993 parse_into(p->cell(1), flags);
998 else if (t.cs() == "substack") {
999 cell->push_back(createMathInset(t.cs()));
1000 parse_into2(cell->back(), FLAG_ITEM, false);
1003 else if (t.cs() == "xymatrix") {
1004 cell->push_back(createMathInset(t.cs()));
1005 parse_into2(cell->back(), FLAG_ITEM, false);
1010 else if (1 && t.cs() == "ar") {
1011 MathXYArrowInset * p = new MathXYArrowInset;
1013 // try to read target
1016 parse_into(p->cell(0), FLAG_BRACK_END);
1017 //lyxerr << "read target: " << p->cell(0) << "\n";
1022 // try to read label
1023 if (nextToken().cat() == catSuper || nextToken().cat() == catSub) {
1024 p->up_ = nextToken().cat() == catSuper;
1026 parse_into(p->cell(1), FLAG_ITEM);
1027 //lyxerr << "read label: " << p->cell(1) << "\n";
1030 cell->push_back(MathAtom(p));
1031 //lyxerr << "read cell: " << cell << "\n";
1035 else if (t.cs().size()) {
1036 latexkeys const * l = in_word_set(t.cs());
1038 if (l->inset == "font") {
1039 lyxerr << "starting font " << t.cs() << "\n";
1040 MathAtom p = createMathInset(t.cs());
1041 bool textmode = (t.cs()[0] == 't');
1042 parse_into(p->cell(0), FLAG_ITEM | (textmode ? FLAG_TEXTMODE : 0));
1044 //lyxerr << "ending font\n";
1047 else if (l->inset == "oldfont") {
1048 MathAtom p = createMathInset(t.cs());
1049 parse_into(p->cell(0), flags);
1054 else if (l->inset == "box") {
1055 MathAtom p = createMathInset(t.cs());
1056 parse_into(p->cell(0), FLAG_ITEM | FLAG_TEXTMODE);
1060 else if (l->inset == "style") {
1061 MathAtom p = createMathInset(t.cs());
1062 parse_into(p->cell(0), flags);
1068 MathAtom p = createMathInset(t.cs());
1069 for (MathInset::idx_type i = 0; i < p->nargs(); ++i)
1070 parse_into(p->cell(i), FLAG_ITEM);
1076 MathAtom p = createMathInset(t.cs());
1077 for (MathInset::idx_type i = 0; i < p->nargs(); ++i)
1078 parse_into(p->cell(i), FLAG_ITEM);
1084 if (flags & FLAG_LEAVE) {
1085 flags &= ~FLAG_LEAVE;
1091 lyxerr << " Math Panic, expect problems!\n";
1092 // Search for the end command.
1096 } while (good() && t.cs() != "end");
1102 } // anonymous namespace
1105 void mathed_parse_cell(MathArray & ar, string const & str)
1107 istringstream is(str.c_str());
1108 mathed_parse_cell(ar, is);
1112 void mathed_parse_cell(MathArray & ar, istream & is)
1114 Parser(is).parse_into(ar, 0);
1119 bool mathed_parse_macro(string & name, string const & str)
1121 istringstream is(str.c_str());
1123 return parser.parse_macro(name);
1126 bool mathed_parse_macro(string & name, istream & is)
1129 return parser.parse_macro(name);
1132 bool mathed_parse_macro(string & name, LyXLex & lex)
1135 return parser.parse_macro(name);
1140 bool mathed_parse_normal(MathAtom & t, string const & str)
1142 istringstream is(str.c_str());
1144 return parser.parse_normal(t);
1147 bool mathed_parse_normal(MathAtom & t, istream & is)
1150 return parser.parse_normal(t);
1153 bool mathed_parse_normal(MathAtom & t, LyXLex & lex)
1156 return parser.parse_normal(t);