2 \author André Pönitz (2001)
7 If someone desperately needs partial "structures" (such as a few
8 cells of an array inset or similar) (s)he could uses the
9 following hack as starting point to write some macros:
14 \def\makeamptab{\catcode`\&=4\relax}
15 \def\makeampletter{\catcode`\&=11\relax}
16 \def\b{\makeampletter\expandafter\makeamptab\bi}
36 #pragma implementation
39 #include "math_parser.h"
40 #include "math_inset.h"
41 #include "math_arrayinset.h"
42 #include "math_braceinset.h"
43 #include "math_boxinset.h"
44 #include "math_charinset.h"
45 #include "math_deliminset.h"
46 #include "math_extern.h"
47 #include "math_factory.h"
48 #include "math_kerninset.h"
49 #include "math_macro.h"
50 #include "math_macrotable.h"
51 #include "math_macrotemplate.h"
52 #include "math_hullinset.h"
53 #include "math_rootinset.h"
54 #include "math_sizeinset.h"
55 #include "math_sqrtinset.h"
56 #include "math_scriptinset.h"
57 #include "math_sqrtinset.h"
58 #include "math_support.h"
59 #include "math_xyarrowinset.h"
61 #include "ref_inset.h"
65 #include "support/LAssert.h"
66 #include "support/lstrings.h"
87 bool stared(string const & s)
89 string::size_type const n = s.size();
90 return n && s[n - 1] == '*';
94 // These are TeX's catcodes
96 catEscape, // 0 backslash
106 catSpace, // 10 space
107 catLetter, // 11 a-zA-Z
108 catOther, // 12 none of the above
111 catInvalid // 15 <delete>
114 CatCode theCatcode[256];
117 inline CatCode catcode(unsigned char c)
119 return theCatcode[c];
124 FLAG_BRACE_LAST = 1 << 1, // last closing brace ends the parsing
125 FLAG_RIGHT = 1 << 2, // next \\right ends the parsing process
126 FLAG_END = 1 << 3, // next \\end ends the parsing process
127 FLAG_BRACK_END = 1 << 4, // next closing bracket ends the parsing
128 FLAG_TEXTMODE = 1 << 5, // we are in a box
129 FLAG_ITEM = 1 << 6, // read a (possibly braced token)
130 FLAG_LEAVE = 1 << 7, // leave the loop at the end
131 FLAG_SIMPLE = 1 << 8, // next $ leaves the loop
132 FLAG_EQUATION = 1 << 9, // next \] leaves the loop
133 FLAG_SIMPLE2 = 1 << 10 // next \) leaves the loop
139 fill(theCatcode, theCatcode + 256, catOther);
140 fill(theCatcode + 'a', theCatcode + 'z' + 1, catLetter);
141 fill(theCatcode + 'A', theCatcode + 'Z' + 1, catLetter);
143 theCatcode['\\'] = catEscape;
144 theCatcode['{'] = catBegin;
145 theCatcode['}'] = catEnd;
146 theCatcode['$'] = catMath;
147 theCatcode['&'] = catAlign;
148 theCatcode['\n'] = catNewline;
149 theCatcode['#'] = catParameter;
150 theCatcode['^'] = catSuper;
151 theCatcode['_'] = catSub;
152 theCatcode['
\7f'] = catIgnore;
153 theCatcode[' '] = catSpace;
154 theCatcode['\t'] = catSpace;
155 theCatcode['\r'] = catSpace;
156 theCatcode['~'] = catActive;
157 theCatcode['%'] = catComment;
163 // Helper class for parsing
169 Token() : cs_(), char_(0), cat_(catIgnore) {}
171 Token(char c, CatCode cat) : cs_(), char_(c), cat_(cat) {}
173 Token(string const & cs) : cs_(cs), char_(0), cat_(catIgnore) {}
176 string const & cs() const { return cs_; }
178 CatCode cat() const { return cat_; }
180 char character() const { return char_; }
182 string asString() const;
195 bool Token::isCR() const
197 return cs_ == "\\" || cs_ == "cr" || cs_ == "crcr";
200 string Token::asString() const
202 return cs_.size() ? cs_ : string(1, char_);
205 ostream & operator<<(ostream & os, Token const & t)
208 os << "\\" << t.cs();
210 os << "[" << t.character() << "," << t.cat() << "]";
219 Parser(LyXLex & lex);
221 Parser(istream & is);
224 bool parse_macro(string & name);
226 bool parse_normal(MathAtom & at);
228 void parse_into(MathArray & array, unsigned flags, bool mathmode);
230 int lineno() const { return lineno_; }
236 void parse_into1(MathGridInset & grid, unsigned flags, bool mathmode, bool numbered);
238 void parse_into2(MathAtom & at, unsigned flags, bool mathmode, bool numbered);
239 /// get arg delimited by 'left' and 'right'
240 string getArg(char left, char right);
244 void error(string const & msg);
245 /// dump contents to screen
250 void tokenize(istream & is);
252 void tokenize(string const & s);
254 void skipSpaceTokens(istream & is, char c);
256 void push_back(Token const & t);
260 Token const & prevToken() const;
262 Token const & nextToken() const;
264 Token const & getToken();
265 /// skips spaces if any
268 void lex(string const & s);
275 vector<Token> tokens_;
281 Parser::Parser(LyXLex & lexer)
282 : lineno_(lexer.getLineNo()), pos_(0)
284 tokenize(lexer.getStream());
289 Parser::Parser(istream & is)
290 : lineno_(0), pos_(0)
296 void Parser::push_back(Token const & t)
298 tokens_.push_back(t);
302 void Parser::pop_back()
308 Token const & Parser::prevToken() const
310 static const Token dummy;
311 return pos_ > 0 ? tokens_[pos_ - 1] : dummy;
315 Token const & Parser::nextToken() const
317 static const Token dummy;
318 return good() ? tokens_[pos_] : dummy;
322 Token const & Parser::getToken()
324 static const Token dummy;
325 //lyxerr << "looking at token " << tokens_[pos_] << " pos: " << pos_ << '\n';
326 return good() ? tokens_[pos_++] : dummy;
330 void Parser::skipSpaces()
332 while (nextToken().cat() == catSpace)
337 void Parser::putback()
343 bool Parser::good() const
345 return pos_ < tokens_.size();
349 char Parser::getChar()
352 error("The input stream is not well...");
353 return tokens_[pos_++].character();
357 string Parser::getArg(char left, char right)
367 while ((c = getChar()) != right && good())
374 void Parser::tokenize(istream & is)
376 // eat everything up to the next \end_inset or end of stream
377 // and store it in s for further tokenization
382 if (s.size() >= 10 && s.substr(s.size() - 10) == "\\end_inset") {
383 s = s.substr(0, s.size() - 10);
393 void Parser::skipSpaceTokens(istream & is, char c)
395 // skip trailing spaces
396 while (catcode(c) == catSpace || catcode(c) == catNewline)
399 //lyxerr << "putting back: " << c << "\n";
404 void Parser::tokenize(string const & buffer)
406 static bool init_done = false;
413 istringstream is(buffer.c_str(), ios::in | ios::binary);
417 //lyxerr << "reading c: " << c << "\n";
419 switch (catcode(c)) {
423 if (catcode(c) == catNewline)
424 ; //push_back(Token("par"));
426 push_back(Token(' ', catSpace));
433 while (is.get(c) && catcode(c) != catNewline)
442 error("unexpected end of input");
445 if (catcode(c) == catLetter) {
447 while (is.get(c) && catcode(c) == catLetter)
449 skipSpaceTokens(is, c);
458 push_back(Token(c, catcode(c)));
460 skipSpaceTokens(is, c);
465 lyxerr << "ignoring a char: " << int(c) << "\n";
470 push_back(Token(c, catcode(c)));
480 void Parser::dump() const
482 lyxerr << "\nTokens: ";
483 for (unsigned i = 0; i < tokens_.size(); ++i) {
486 lyxerr << tokens_[i];
488 lyxerr << " pos: " << pos_ << "\n";
492 void Parser::error(string const & msg)
494 lyxerr << "Line ~" << lineno_ << ": Math parse error: " << msg << endl;
500 bool Parser::parse_macro(string & name)
506 if (nextToken().cs() == "def") {
509 name = getToken().cs();
512 while (good() && nextToken().cat() != catBegin)
513 pars += getToken().cs();
516 error("bad stream in parse_macro\n");
520 //lyxerr << "read \\def parameter list '" << pars << "'\n";
522 error("can't handle non-empty parameter lists\n");
526 } else if (nextToken().cs() == "newcommand") {
530 if (getToken().cat() != catBegin) {
531 error("'{' in \\newcommand expected (1) \n");
535 name = getToken().cs();
537 if (getToken().cat() != catEnd) {
538 error("'}' expected\n");
542 string arg = getArg('[', ']');
544 nargs = atoi(arg.c_str());
547 lyxerr << "\\newcommand or \\def expected\n";
552 if (getToken().cat() != catBegin) {
553 error("'{' in macro definition expected (2)\n");
558 parse_into(ar1, FLAG_BRACE_LAST, true);
560 // we cannot handle recursive stuff at all
562 test.push_back(createMathInset(name));
563 if (ar1.contains(test)) {
564 error("we cannot handle recursive macros at all.\n");
568 // is a version for display attached?
570 parse_into(ar2, FLAG_ITEM, true);
572 MathMacroTable::create(name, nargs, ar1, ar2);
577 bool Parser::parse_normal(MathAtom & at)
581 parse_into(ar, false, false);
582 if (ar.size() != 1) {
583 lyxerr << "Unusual contents found: " << ar << endl;
584 at.reset(new MathParInset);
588 lyxerr << "Unusual contents found: " << ar << endl;
596 void Parser::parse_into(MathArray & array, unsigned flags, bool mathmode)
598 MathGridInset grid(1, 1);
599 parse_into1(grid, flags, mathmode, false);
600 array = grid.cell(0);
604 void Parser::parse_into2(MathAtom & at, unsigned flags,
605 bool mathmode, bool numbered)
607 parse_into1(*(at->asGridInset()), flags, mathmode, numbered);
611 void Parser::parse_into1(MathGridInset & grid, unsigned flags,
612 bool mathmode, bool numbered)
615 MathGridInset::row_type cellrow = 0;
616 MathGridInset::col_type cellcol = 0;
617 MathArray * cell = &grid.cell(grid.index(cellrow, cellcol));
619 if (grid.asHullInset())
620 grid.asHullInset()->numbered(cellrow, numbered);
623 //lyxerr << "grid: " << grid << endl;
626 Token const & t = getToken();
629 lyxerr << "t: " << t << " flags: " << flags << "\n";
634 if (flags & FLAG_ITEM) {
635 if (t.cat() == catSpace)
639 if (t.cat() == catBegin) {
640 // skip the brace and collect everything to the next matching
642 flags |= FLAG_BRACE_LAST;
646 // handle only this single token, leave the loop if done
653 if (t.cat() == catMath) {
655 // we are inside some text mode thingy, so opening new math is allowed
656 Token const & n = getToken();
657 if (n.cat() == catMath) {
658 // TeX's $$...$$ syntax for displayed math
659 cell->push_back(MathAtom(new MathHullInset("equation")));
660 parse_into2(cell->back(), FLAG_SIMPLE, true, false);
661 getToken(); // skip the second '$' token
663 // simple $...$ stuff
665 cell->push_back(MathAtom(new MathHullInset("simple")));
666 parse_into2(cell->back(), FLAG_SIMPLE, true, false);
670 else if (flags & FLAG_SIMPLE) {
671 // this is the end of the formula
676 error("something strange in the parser\n");
681 else if (t.cat() == catLetter)
682 cell->push_back(MathAtom(new MathCharInset(t.character())));
684 else if (t.cat() == catSpace && !mathmode)
685 cell->push_back(MathAtom(new MathCharInset(t.character())));
687 else if (t.cat() == catParameter) {
688 Token const & n = getToken();
689 cell->push_back(MathAtom(new MathMacroArgument(n.character()-'0')));
692 else if (t.cat() == catBegin) {
694 parse_into(ar, FLAG_BRACE_LAST, mathmode);
695 // reduce multiple nesting levels to a single one
696 // this helps to keep the annoyance of "a choose b" to a minimum
697 if (ar.size() && ar.front()->asBraceInset())
698 ar = ar.front()->asBraceInset()->cell(0);
699 cell->push_back(MathAtom(new MathBraceInset));
700 cell->back()->cell(0).swap(ar);
703 else if (t.cat() == catEnd) {
704 if (flags & FLAG_BRACE_LAST)
706 error("found '}' unexpectedly");
708 //add(cell, '}', LM_TC_TEX);
711 else if (t.cat() == catAlign) {
713 //lyxerr << " column now " << cellcol << " max: " << grid.ncols() << "\n";
714 if (cellcol == grid.ncols()) {
715 lyxerr << "adding column " << cellcol << "\n";
716 grid.addCol(cellcol - 1);
718 cell = &grid.cell(grid.index(cellrow, cellcol));
721 else if (t.cat() == catSuper || t.cat() == catSub) {
722 bool up = (t.cat() == catSuper);
723 MathScriptInset * p = 0;
725 p = cell->back()->asScriptInset();
726 if (!p || p->has(up)) {
727 cell->push_back(MathAtom(new MathScriptInset(up)));
728 p = cell->back()->asScriptInset();
731 parse_into(p->cell(up), FLAG_ITEM, mathmode);
736 else if (t.character() == ']' && (flags & FLAG_BRACK_END))
739 else if (t.cat() == catOther)
740 cell->push_back(MathAtom(new MathCharInset(t.character())));
745 else if (t.cs() == "(") {
746 cell->push_back(MathAtom(new MathHullInset("simple")));
747 parse_into2(cell->back(), FLAG_SIMPLE2, true, false);
750 else if (t.cs() == "[") {
751 cell->push_back(MathAtom(new MathHullInset("equation")));
752 parse_into2(cell->back(), FLAG_EQUATION, true, false);
755 else if (t.cs() == "protect")
756 // ignore \\protect, will hopefully be re-added during output
759 else if (t.cs() == "end") {
760 if (flags & FLAG_END) {
761 // eat environment name
762 //string const name =
764 // FIXME: check that we ended the correct environment
767 error("found 'end' unexpectedly");
770 else if (t.cs() == ")") {
771 if (flags & FLAG_SIMPLE2)
773 error("found '\\)' unexpectedly");
776 else if (t.cs() == "]") {
777 if (flags & FLAG_EQUATION)
779 error("found '\\]' unexpectedly");
782 else if (t.cs() == "\\") {
783 grid.vcrskip(LyXLength(getArg('[', ']')), cellrow);
786 if (cellrow == grid.nrows())
787 grid.addRow(cellrow - 1);
788 if (grid.asHullInset())
789 grid.asHullInset()->numbered(cellrow, numbered);
790 cell = &grid.cell(grid.index(cellrow, cellcol));
794 else if (t.cs() == "multicolumn") {
795 // extract column count and insert dummy cells
797 parse_into(count, FLAG_ITEM, mathmode);
799 if (!extractNumber(count, cols)) {
800 lyxerr << " can't extract number of cells from " << count << "\n";
802 // resize the table if necessary
803 for (int i = 0; i < cols; ++i) {
805 if (cellcol == grid.ncols()) {
806 lyxerr << "adding column " << cellcol << "\n";
807 grid.addCol(cellcol - 1);
809 cell = &grid.cell(grid.index(cellrow, cellcol));
810 // mark this as dummy
811 grid.cellinfo(grid.index(cellrow, cellcol)).dummy_ = true;
813 // the last cell is the real thng, not a dummy
814 grid.cellinfo(grid.index(cellrow, cellcol)).dummy_ = false;
816 // read special alignment
818 parse_into(align, FLAG_ITEM, mathmode);
819 //grid.cellinfo(grid.index(cellrow, cellcol)).align_ = extractString(align);
821 // parse the remaining contents into the "real" cell
822 parse_into(*cell, FLAG_ITEM, mathmode);
826 else if (t.cs() == "limits")
829 else if (t.cs() == "nolimits")
832 else if (t.cs() == "nonumber") {
833 if (grid.asHullInset())
834 grid.asHullInset()->numbered(cellrow, false);
837 else if (t.cs() == "number") {
838 if (grid.asHullInset())
839 grid.asHullInset()->numbered(cellrow, true);
842 else if (t.cs() == "hline") {
843 if (grid.asHullInset())
844 grid.asHullInset()->rowinfo(cellrow + 1);
847 else if (t.cs() == "sqrt") {
850 cell->push_back(MathAtom(new MathRootInset));
851 parse_into(cell->back()->cell(0), FLAG_BRACK_END, mathmode);
852 parse_into(cell->back()->cell(1), FLAG_ITEM, mathmode);
855 cell->push_back(MathAtom(new MathSqrtInset));
856 parse_into(cell->back()->cell(0), FLAG_ITEM, mathmode);
860 else if (t.cs() == "ref") {
861 cell->push_back(MathAtom(new RefInset));
864 parse_into(cell->back()->cell(1), FLAG_BRACK_END, mathmode);
867 parse_into(cell->back()->cell(0), FLAG_ITEM, mathmode);
870 else if (t.cs() == "left") {
871 string l = getToken().asString();
873 parse_into(ar, FLAG_RIGHT, mathmode);
874 string r = getToken().asString();
875 cell->push_back(MathAtom(new MathDelimInset(l, r, ar)));
878 else if (t.cs() == "right") {
879 if (flags & FLAG_RIGHT)
881 //lyxerr << "got so far: '" << cell << "'\n";
882 error("Unmatched right delimiter");
886 else if (t.cs() == "begin") {
887 string const name = getArg('{', '}');
888 if (name == "array" || name == "subarray") {
889 string const valign = getArg('[', ']') + 'c';
890 string const halign = getArg('{', '}');
891 cell->push_back(MathAtom(new MathArrayInset(name, valign[0], halign)));
892 parse_into2(cell->back(), FLAG_END, mathmode, false);
895 else if (name == "split" || name == "cases" ||
896 name == "gathered" || name == "aligned") {
897 cell->push_back(createMathInset(name));
898 parse_into2(cell->back(), FLAG_END, mathmode, false);
901 else if (name == "math") {
902 cell->push_back(MathAtom(new MathHullInset("simple")));
903 parse_into2(cell->back(), FLAG_SIMPLE, true, true);
906 else if (name == "equation" || name == "equation*"
907 || name == "displaymath") {
908 cell->push_back(MathAtom(new MathHullInset("equation")));
909 parse_into2(cell->back(), FLAG_END, true, (name == "equation"));
912 else if (name == "eqnarray" || name == "eqnarray*") {
913 cell->push_back(MathAtom(new MathHullInset("eqnarray")));
914 parse_into2(cell->back(), FLAG_END, true, !stared(name));
917 else if (name == "align" || name == "align*") {
918 cell->push_back(MathAtom(new MathHullInset("align")));
919 parse_into2(cell->back(), FLAG_END, true, !stared(name));
922 else if (name == "alignat" || name == "alignat*") {
923 // ignore this for a while
925 cell->push_back(MathAtom(new MathHullInset("alignat")));
926 parse_into2(cell->back(), FLAG_END, true, !stared(name));
929 else if (name == "xalignat" || name == "xalignat*") {
930 // ignore this for a while
932 cell->push_back(MathAtom(new MathHullInset("xalignat")));
933 parse_into2(cell->back(), FLAG_END, true, !stared(name));
936 else if (name == "xxalignat") {
937 // ignore this for a while
939 cell->push_back(MathAtom(new MathHullInset("xxalignat")));
940 parse_into2(cell->back(), FLAG_END, true, !stared(name));
943 else if (name == "multline" || name == "multline*") {
944 cell->push_back(MathAtom(new MathHullInset("multline")));
945 parse_into2(cell->back(), FLAG_END, true, !stared(name));
948 else if (name == "gather" || name == "gather*") {
949 cell->push_back(MathAtom(new MathHullInset("gather")));
950 parse_into2(cell->back(), FLAG_END, true, !stared(name));
954 latexkeys const * l = in_word_set(name);
956 if (l->inset == "matrix") {
957 cell->push_back(createMathInset(name));
958 parse_into2(cell->back(), FLAG_END, mathmode, false);
961 lyxerr << "unknow math inset begin '" << name << "'\n";
966 else if (t.cs() == "kern") {
972 Token const & t = getToken();
978 if (isValidLength(s))
981 cell->push_back(MathAtom(new MathKernInset(s)));
984 else if (t.cs() == "label") {
985 if (grid.asHullInset())
986 grid.asHullInset()->label(cellrow, getArg('{', '}'));
989 else if (t.cs() == "choose" || t.cs() == "over" || t.cs() == "atop") {
990 MathAtom p = createMathInset(t.cs());
991 cell->swap(p->cell(0));
992 parse_into(p->cell(1), flags, mathmode);
997 else if (t.cs() == "substack") {
998 cell->push_back(createMathInset(t.cs()));
999 parse_into2(cell->back(), FLAG_ITEM, mathmode, false);
1002 else if (t.cs() == "xymatrix") {
1003 cell->push_back(createMathInset(t.cs()));
1004 parse_into2(cell->back(), FLAG_ITEM, mathmode, false);
1009 else if (1 && t.cs() == "ar") {
1010 MathXYArrowInset * p = new MathXYArrowInset;
1012 // try to read target
1015 parse_into(p->cell(0), FLAG_BRACK_END, mathmode);
1016 //lyxerr << "read target: " << p->cell(0) << "\n";
1021 // try to read label
1022 if (nextToken().cat() == catSuper || nextToken().cat() == catSub) {
1023 p->up_ = nextToken().cat() == catSuper;
1025 parse_into(p->cell(1), FLAG_ITEM, mathmode);
1026 //lyxerr << "read label: " << p->cell(1) << "\n";
1029 cell->push_back(MathAtom(p));
1030 //lyxerr << "read cell: " << cell << "\n";
1034 else if (t.cs().size()) {
1035 latexkeys const * l = in_word_set(t.cs());
1037 if (l->inset == "font") {
1038 lyxerr << "starting font " << t.cs() << "\n";
1039 MathAtom p = createMathInset(t.cs());
1040 bool textmode = (t.cs()[0] == 't');
1041 parse_into(p->cell(0), FLAG_ITEM, !textmode);
1043 //lyxerr << "ending font\n";
1046 else if (l->inset == "oldfont") {
1047 cell->push_back(createMathInset(t.cs()));
1048 parse_into(cell->back()->cell(0), flags, l->extra == "mathmode");
1052 else if (l->inset == "box") {
1053 // switch to text mode
1054 cell->push_back(createMathInset(t.cs()));
1055 parse_into(cell->back()->cell(0), FLAG_ITEM, mathmode);
1058 else if (l->inset == "style") {
1059 cell->push_back(createMathInset(t.cs()));
1060 parse_into(cell->back()->cell(0), flags, mathmode);
1065 MathAtom p = createMathInset(t.cs());
1066 for (MathInset::idx_type i = 0; i < p->nargs(); ++i)
1067 parse_into(p->cell(i), FLAG_ITEM, l->extra == "mathmode");
1073 MathAtom p = createMathInset(t.cs());
1074 for (MathInset::idx_type i = 0; i < p->nargs(); ++i)
1075 parse_into(p->cell(i), FLAG_ITEM, mathmode);
1081 if (flags & FLAG_LEAVE) {
1082 flags &= ~FLAG_LEAVE;
1090 } // anonymous namespace
1093 void mathed_parse_cell(MathArray & ar, string const & str)
1095 istringstream is(str.c_str());
1096 mathed_parse_cell(ar, is);
1100 void mathed_parse_cell(MathArray & ar, istream & is)
1102 Parser(is).parse_into(ar, 0, true);
1107 bool mathed_parse_macro(string & name, string const & str)
1109 istringstream is(str.c_str());
1111 return parser.parse_macro(name);
1114 bool mathed_parse_macro(string & name, istream & is)
1117 return parser.parse_macro(name);
1120 bool mathed_parse_macro(string & name, LyXLex & lex)
1123 return parser.parse_macro(name);
1128 bool mathed_parse_normal(MathAtom & t, string const & str)
1130 istringstream is(str.c_str());
1132 return parser.parse_normal(t);
1135 bool mathed_parse_normal(MathAtom & t, istream & is)
1138 return parser.parse_normal(t);
1141 bool mathed_parse_normal(MathAtom & t, LyXLex & lex)
1144 return parser.parse_normal(t);