2 \author André Pönitz (2001)
7 If someone desperately needs partial "structures" (such as a few
8 cells of an array inset or similar) (s)he could uses the
9 following hack as starting point to write some macros:
14 \def\makeamptab{\catcode`\&=4\relax}
15 \def\makeampletter{\catcode`\&=11\relax}
16 \def\b{\makeampletter\expandafter\makeamptab\bi}
36 #pragma implementation
39 #include "math_parser.h"
40 #include "math_inset.h"
41 #include "math_arrayinset.h"
42 #include "math_braceinset.h"
43 #include "math_boxinset.h"
44 #include "math_charinset.h"
45 #include "math_deliminset.h"
46 #include "math_extern.h"
47 #include "math_factory.h"
48 #include "math_kerninset.h"
49 #include "math_macro.h"
50 #include "math_macrotable.h"
51 #include "math_macrotemplate.h"
52 #include "math_hullinset.h"
53 #include "math_rootinset.h"
54 #include "math_sizeinset.h"
55 #include "math_sqrtinset.h"
56 #include "math_scriptinset.h"
57 #include "math_sqrtinset.h"
58 #include "math_support.h"
59 #include "math_xyarrowinset.h"
61 #include "ref_inset.h"
65 #include "support/LAssert.h"
66 #include "support/lstrings.h"
87 bool stared(string const & s)
89 string::size_type const n = s.size();
90 return n && s[n - 1] == '*';
94 // These are TeX's catcodes
96 catEscape, // 0 backslash
106 catSpace, // 10 space
107 catLetter, // 11 a-zA-Z
108 catOther, // 12 none of the above
111 catInvalid // 15 <delete>
114 CatCode theCatcode[256];
117 inline CatCode catcode(unsigned char c)
119 return theCatcode[c];
124 FLAG_BRACE_LAST = 1 << 1, // last closing brace ends the parsing
125 FLAG_RIGHT = 1 << 2, // next \\right ends the parsing process
126 FLAG_END = 1 << 3, // next \\end ends the parsing process
127 FLAG_BRACK_END = 1 << 4, // next closing bracket ends the parsing
128 FLAG_TEXTMODE = 1 << 5, // we are in a box
129 FLAG_ITEM = 1 << 6, // read a (possibly braced token)
130 FLAG_LEAVE = 1 << 7, // leave the loop at the end
131 FLAG_SIMPLE = 1 << 8, // next $ leaves the loop
132 FLAG_EQUATION = 1 << 9, // next \] leaves the loop
133 FLAG_SIMPLE2 = 1 << 10 // next \) leaves the loop
139 fill(theCatcode, theCatcode + 256, catOther);
140 fill(theCatcode + 'a', theCatcode + 'z' + 1, catLetter);
141 fill(theCatcode + 'A', theCatcode + 'Z' + 1, catLetter);
143 theCatcode['\\'] = catEscape;
144 theCatcode['{'] = catBegin;
145 theCatcode['}'] = catEnd;
146 theCatcode['$'] = catMath;
147 theCatcode['&'] = catAlign;
148 theCatcode['\n'] = catNewline;
149 theCatcode['#'] = catParameter;
150 theCatcode['^'] = catSuper;
151 theCatcode['_'] = catSub;
152 theCatcode['
\7f'] = catIgnore;
153 theCatcode[' '] = catSpace;
154 theCatcode['\t'] = catSpace;
155 theCatcode['\r'] = catSpace;
156 theCatcode['~'] = catActive;
157 theCatcode['%'] = catComment;
163 // Helper class for parsing
169 Token() : cs_(), char_(0), cat_(catIgnore) {}
171 Token(char c, CatCode cat) : cs_(), char_(c), cat_(cat) {}
173 Token(string const & cs) : cs_(cs), char_(0), cat_(catIgnore) {}
176 string const & cs() const { return cs_; }
178 CatCode cat() const { return cat_; }
180 char character() const { return char_; }
182 string asString() const;
195 bool Token::isCR() const
197 return cs_ == "\\" || cs_ == "cr" || cs_ == "crcr";
200 string Token::asString() const
202 return cs_.size() ? cs_ : string(1, char_);
205 ostream & operator<<(ostream & os, Token const & t)
208 os << "\\" << t.cs();
210 os << "[" << t.character() << "," << t.cat() << "]";
219 Parser(LyXLex & lex);
221 Parser(istream & is);
224 bool parse_macro(string & name);
226 bool parse_normal(MathAtom & at);
228 void parse_into(MathArray & array, unsigned flags, bool mathmode);
230 int lineno() const { return lineno_; }
236 void parse_into1(MathGridInset & grid, unsigned flags, bool mathmode, bool numbered);
238 void parse_into2(MathAtom & at, unsigned flags, bool mathmode, bool numbered);
239 /// get arg delimited by 'left' and 'right'
240 string getArg(char left, char right);
244 void error(string const & msg);
245 /// dump contents to screen
250 void tokenize(istream & is);
252 void tokenize(string const & s);
254 void skipSpaceTokens(istream & is, char c);
256 void push_back(Token const & t);
260 Token const & prevToken() const;
262 Token const & nextToken() const;
264 Token const & getToken();
265 /// skips spaces if any
268 void lex(string const & s);
275 vector<Token> tokens_;
281 Parser::Parser(LyXLex & lexer)
282 : lineno_(lexer.getLineNo()), pos_(0)
284 tokenize(lexer.getStream());
289 Parser::Parser(istream & is)
290 : lineno_(0), pos_(0)
296 void Parser::push_back(Token const & t)
298 tokens_.push_back(t);
302 void Parser::pop_back()
308 Token const & Parser::prevToken() const
310 static const Token dummy;
311 return pos_ > 0 ? tokens_[pos_ - 1] : dummy;
315 Token const & Parser::nextToken() const
317 static const Token dummy;
318 return good() ? tokens_[pos_] : dummy;
322 Token const & Parser::getToken()
324 static const Token dummy;
325 //lyxerr << "looking at token " << tokens_[pos_] << " pos: " << pos_ << '\n';
326 return good() ? tokens_[pos_++] : dummy;
330 void Parser::skipSpaces()
332 while (nextToken().cat() == catSpace)
337 void Parser::putback()
343 bool Parser::good() const
345 return pos_ < tokens_.size();
349 char Parser::getChar()
352 error("The input stream is not well...");
353 return tokens_[pos_++].character();
357 string Parser::getArg(char left, char right)
367 while ((c = getChar()) != right && good())
374 void Parser::tokenize(istream & is)
376 // eat everything up to the next \end_inset or end of stream
377 // and store it in s for further tokenization
382 if (s.size() >= 10 && s.substr(s.size() - 10) == "\\end_inset") {
383 s = s.substr(0, s.size() - 10);
393 void Parser::skipSpaceTokens(istream & is, char c)
395 // skip trailing spaces
396 while (catcode(c) == catSpace || catcode(c) == catNewline)
399 //lyxerr << "putting back: " << c << "\n";
404 void Parser::tokenize(string const & buffer)
406 static bool init_done = false;
413 istringstream is(buffer.c_str(), ios::in | ios::binary);
417 //lyxerr << "reading c: " << c << "\n";
419 switch (catcode(c)) {
423 if (catcode(c) == catNewline)
424 ; //push_back(Token("par"));
426 push_back(Token(' ', catSpace));
433 while (is.get(c) && catcode(c) != catNewline)
442 error("unexpected end of input");
445 if (catcode(c) == catLetter) {
447 while (is.get(c) && catcode(c) == catLetter)
449 skipSpaceTokens(is, c);
458 push_back(Token(c, catcode(c)));
460 skipSpaceTokens(is, c);
465 lyxerr << "ignoring a char: " << int(c) << "\n";
470 push_back(Token(c, catcode(c)));
480 void Parser::dump() const
482 lyxerr << "\nTokens: ";
483 for (unsigned i = 0; i < tokens_.size(); ++i) {
486 lyxerr << tokens_[i];
488 lyxerr << " pos: " << pos_ << "\n";
492 void Parser::error(string const & msg)
494 lyxerr << "Line ~" << lineno_ << ": Math parse error: " << msg << endl;
500 bool Parser::parse_macro(string & name)
506 if (nextToken().cs() == "def") {
509 name = getToken().cs();
512 while (good() && nextToken().cat() != catBegin)
513 pars += getToken().cs();
516 error("bad stream in parse_macro\n");
520 //lyxerr << "read \\def parameter list '" << pars << "'\n";
522 error("can't handle non-empty parameter lists\n");
526 } else if (nextToken().cs() == "newcommand") {
530 if (getToken().cat() != catBegin) {
531 error("'{' in \\newcommand expected (1) \n");
535 name = getToken().cs();
537 if (getToken().cat() != catEnd) {
538 error("'}' expected\n");
542 string arg = getArg('[', ']');
544 nargs = atoi(arg.c_str());
547 lyxerr << "\\newcommand or \\def expected\n";
552 if (getToken().cat() != catBegin) {
553 error("'{' in macro definition expected (2)\n");
558 parse_into(ar1, FLAG_BRACE_LAST, true);
560 // we cannot handle recursive stuff at all
562 test.push_back(createMathInset(name));
563 if (ar1.contains(test)) {
564 error("we cannot handle recursive macros at all.\n");
568 // is a version for display attached?
570 parse_into(ar2, FLAG_ITEM, true);
572 MathMacroTable::create(name, nargs, ar1, ar2);
577 bool Parser::parse_normal(MathAtom & at)
581 parse_into(ar, false, false);
582 if (ar.size() != 1) {
583 lyxerr << "Unusual contents found: " << ar << endl;
584 at.reset(new MathParInset);
593 void Parser::parse_into(MathArray & array, unsigned flags, bool mathmode)
595 MathGridInset grid(1, 1);
596 parse_into1(grid, flags, mathmode, false);
597 array = grid.cell(0);
598 // remove 'unnecessary' braces:
599 //if (array.size() == 1 && array.back()->asBraceInset()) {
600 // lyxerr << "extra braces removed\n";
601 // array = array.back()->asBraceInset()->cell(0);
606 void Parser::parse_into2(MathAtom & at, unsigned flags,
607 bool mathmode, bool numbered)
609 parse_into1(*(at->asGridInset()), flags, mathmode, numbered);
613 void Parser::parse_into1(MathGridInset & grid, unsigned flags,
614 bool mathmode, bool numbered)
617 MathGridInset::row_type cellrow = 0;
618 MathGridInset::col_type cellcol = 0;
619 MathArray * cell = &grid.cell(grid.index(cellrow, cellcol));
621 if (grid.asHullInset())
622 grid.asHullInset()->numbered(cellrow, numbered);
625 //lyxerr << "grid: " << grid << endl;
628 Token const & t = getToken();
631 lyxerr << "t: " << t << " flags: " << flags << "\n";
636 if (flags & FLAG_ITEM) {
637 if (t.cat() == catSpace)
641 if (t.cat() == catBegin) {
642 // skip the brace and collect everything to the next matching
644 flags |= FLAG_BRACE_LAST;
648 // handle only this single token, leave the loop if done
655 if (t.cat() == catMath) {
657 // we are inside some text mode thingy, so opening new math is allowed
658 Token const & n = getToken();
659 if (n.cat() == catMath) {
660 // TeX's $$...$$ syntax for displayed math
661 cell->push_back(MathAtom(new MathHullInset(LM_OT_EQUATION)));
662 parse_into2(cell->back(), FLAG_SIMPLE, true, false);
663 getToken(); // skip the second '$' token
665 // simple $...$ stuff
667 cell->push_back(MathAtom(new MathHullInset(LM_OT_SIMPLE)));
668 parse_into2(cell->back(), FLAG_SIMPLE, true, false);
672 else if (flags & FLAG_SIMPLE) {
673 // this is the end of the formula
678 error("something strange in the parser\n");
683 else if (t.cat() == catLetter)
684 cell->push_back(MathAtom(new MathCharInset(t.character())));
686 else if (t.cat() == catSpace && !mathmode)
687 cell->push_back(MathAtom(new MathCharInset(t.character())));
689 else if (t.cat() == catParameter) {
690 Token const & n = getToken();
691 cell->push_back(MathAtom(new MathMacroArgument(n.character()-'0')));
694 else if (t.cat() == catBegin) {
696 parse_into(ar, FLAG_BRACE_LAST, mathmode);
697 cell->push_back(MathAtom(new MathBraceInset));
698 cell->back()->cell(0).swap(ar);
701 else if (t.cat() == catEnd) {
702 if (flags & FLAG_BRACE_LAST)
704 error("found '}' unexpectedly");
706 //add(cell, '}', LM_TC_TEX);
709 else if (t.cat() == catAlign) {
711 //lyxerr << " column now " << cellcol << " max: " << grid.ncols() << "\n";
712 if (cellcol == grid.ncols()) {
713 lyxerr << "adding column " << cellcol << "\n";
714 grid.addCol(cellcol - 1);
716 cell = &grid.cell(grid.index(cellrow, cellcol));
719 else if (t.cat() == catSuper || t.cat() == catSub) {
720 bool up = (t.cat() == catSuper);
721 MathScriptInset * p = 0;
723 p = cell->back()->asScriptInset();
724 if (!p || p->has(up)) {
725 cell->push_back(MathAtom(new MathScriptInset(up)));
726 p = cell->back()->asScriptInset();
729 parse_into(p->cell(up), FLAG_ITEM, mathmode);
734 else if (t.character() == ']' && (flags & FLAG_BRACK_END))
737 else if (t.cat() == catOther)
738 cell->push_back(MathAtom(new MathCharInset(t.character())));
743 else if (t.cs() == "(") {
744 cell->push_back(MathAtom(new MathHullInset(LM_OT_SIMPLE)));
745 parse_into2(cell->back(), FLAG_SIMPLE2, true, false);
748 else if (t.cs() == "[") {
749 cell->push_back(MathAtom(new MathHullInset(LM_OT_EQUATION)));
750 parse_into2(cell->back(), FLAG_EQUATION, true, false);
753 else if (t.cs() == "protect")
754 // ignore \\protect, will hopefully be re-added during output
757 else if (t.cs() == "end") {
758 if (flags & FLAG_END) {
759 // eat environment name
760 //string const name =
762 // FIXME: check that we ended the correct environment
765 error("found 'end' unexpectedly");
768 else if (t.cs() == ")") {
769 if (flags & FLAG_SIMPLE2)
771 error("found '\\)' unexpectedly");
774 else if (t.cs() == "]") {
775 if (flags & FLAG_EQUATION)
777 error("found '\\]' unexpectedly");
780 else if (t.cs() == "\\") {
781 grid.vcrskip(LyXLength(getArg('[', ']')), cellrow);
784 if (cellrow == grid.nrows())
785 grid.addRow(cellrow - 1);
786 if (grid.asHullInset())
787 grid.asHullInset()->numbered(cellrow, numbered);
788 cell = &grid.cell(grid.index(cellrow, cellcol));
792 else if (t.cs() == "multicolumn") {
793 // extract column count and insert dummy cells
795 parse_into(count, FLAG_ITEM, mathmode);
797 if (!extractNumber(count, cols)) {
798 lyxerr << " can't extract number of cells from " << count << "\n";
800 // resize the table if necessary
801 for (int i = 0; i < cols; ++i) {
803 if (cellcol == grid.ncols()) {
804 lyxerr << "adding column " << cellcol << "\n";
805 grid.addCol(cellcol - 1);
807 cell = &grid.cell(grid.index(cellrow, cellcol));
808 // mark this as dummy
809 grid.cellinfo(grid.index(cellrow, cellcol)).dummy_ = true;
811 // the last cell is the real thng, not a dummy
812 grid.cellinfo(grid.index(cellrow, cellcol)).dummy_ = false;
814 // read special alignment
816 parse_into(align, FLAG_ITEM, mathmode);
817 //grid.cellinfo(grid.index(cellrow, cellcol)).align_ = extractString(align);
819 // parse the remaining contents into the "real" cell
820 parse_into(*cell, FLAG_ITEM, mathmode);
824 else if (t.cs() == "limits")
827 else if (t.cs() == "nolimits")
830 else if (t.cs() == "nonumber") {
831 if (grid.asHullInset())
832 grid.asHullInset()->numbered(cellrow, false);
835 else if (t.cs() == "number") {
836 if (grid.asHullInset())
837 grid.asHullInset()->numbered(cellrow, true);
840 else if (t.cs() == "hline") {
841 if (grid.asHullInset())
842 grid.asHullInset()->rowinfo(cellrow + 1);
845 else if (t.cs() == "sqrt") {
848 cell->push_back(MathAtom(new MathRootInset));
849 parse_into(cell->back()->cell(0), FLAG_BRACK_END, mathmode);
850 parse_into(cell->back()->cell(1), FLAG_ITEM, mathmode);
853 cell->push_back(MathAtom(new MathSqrtInset));
854 parse_into(cell->back()->cell(0), FLAG_ITEM, mathmode);
858 else if (t.cs() == "ref") {
859 cell->push_back(MathAtom(new RefInset));
862 parse_into(cell->back()->cell(1), FLAG_BRACK_END, mathmode);
865 parse_into(cell->back()->cell(0), FLAG_ITEM, mathmode);
868 else if (t.cs() == "left") {
869 string l = getToken().asString();
871 parse_into(ar, FLAG_RIGHT, mathmode);
872 string r = getToken().asString();
873 cell->push_back(MathAtom(new MathDelimInset(l, r, ar)));
876 else if (t.cs() == "right") {
877 if (flags & FLAG_RIGHT)
879 //lyxerr << "got so far: '" << cell << "'\n";
880 error("Unmatched right delimiter");
884 else if (t.cs() == "begin") {
885 string const name = getArg('{', '}');
886 if (name == "array" || name == "subarray") {
887 string const valign = getArg('[', ']') + 'c';
888 string const halign = getArg('{', '}');
889 cell->push_back(MathAtom(new MathArrayInset(name, valign[0], halign)));
890 parse_into2(cell->back(), FLAG_END, mathmode, false);
893 else if (name == "split" || name == "cases" ||
894 name == "gathered" || name == "aligned") {
895 cell->push_back(createMathInset(name));
896 parse_into2(cell->back(), FLAG_END, mathmode, false);
899 else if (name == "math") {
900 cell->push_back(MathAtom(new MathHullInset(LM_OT_SIMPLE)));
901 parse_into2(cell->back(), FLAG_SIMPLE, true, true);
904 else if (name == "equation" || name == "equation*"
905 || name == "displaymath") {
906 cell->push_back(MathAtom(new MathHullInset(LM_OT_EQUATION)));
907 parse_into2(cell->back(), FLAG_END, true, (name == "equation"));
910 else if (name == "eqnarray" || name == "eqnarray*") {
911 cell->push_back(MathAtom(new MathHullInset(LM_OT_EQNARRAY)));
912 parse_into2(cell->back(), FLAG_END, true, !stared(name));
915 else if (name == "align" || name == "align*") {
916 cell->push_back(MathAtom(new MathHullInset(LM_OT_ALIGN)));
917 parse_into2(cell->back(), FLAG_END, true, !stared(name));
920 else if (name == "alignat" || name == "alignat*") {
921 // ignore this for a while
923 cell->push_back(MathAtom(new MathHullInset(LM_OT_ALIGNAT)));
924 parse_into2(cell->back(), FLAG_END, true, !stared(name));
927 else if (name == "xalignat" || name == "xalignat*") {
928 // ignore this for a while
930 cell->push_back(MathAtom(new MathHullInset(LM_OT_XALIGNAT)));
931 parse_into2(cell->back(), FLAG_END, true, !stared(name));
934 else if (name == "xxalignat") {
935 // ignore this for a while
937 cell->push_back(MathAtom(new MathHullInset(LM_OT_XXALIGNAT)));
938 parse_into2(cell->back(), FLAG_END, true, !stared(name));
941 else if (name == "multline" || name == "multline*") {
942 cell->push_back(MathAtom(new MathHullInset(LM_OT_MULTLINE)));
943 parse_into2(cell->back(), FLAG_END, true, !stared(name));
946 else if (name == "gather" || name == "gather*") {
947 cell->push_back(MathAtom(new MathHullInset(LM_OT_GATHER)));
948 parse_into2(cell->back(), FLAG_END, true, !stared(name));
952 latexkeys const * l = in_word_set(name);
954 if (l->inset == "matrix") {
955 cell->push_back(createMathInset(name));
956 parse_into2(cell->back(), FLAG_END, mathmode, false);
959 lyxerr << "unknow math inset begin '" << name << "'\n";
964 else if (t.cs() == "kern") {
970 Token const & t = getToken();
976 if (isValidLength(s))
979 cell->push_back(MathAtom(new MathKernInset(s)));
982 else if (t.cs() == "label") {
983 if (grid.asHullInset())
984 grid.asHullInset()->label(cellrow, getArg('{', '}'));
987 else if (t.cs() == "choose" || t.cs() == "over" || t.cs() == "atop") {
988 MathAtom p = createMathInset(t.cs());
989 cell->swap(p->cell(0));
990 parse_into(p->cell(1), flags, mathmode);
995 else if (t.cs() == "substack") {
996 cell->push_back(createMathInset(t.cs()));
997 parse_into2(cell->back(), FLAG_ITEM, mathmode, false);
1000 else if (t.cs() == "xymatrix") {
1001 cell->push_back(createMathInset(t.cs()));
1002 parse_into2(cell->back(), FLAG_ITEM, mathmode, false);
1007 else if (1 && t.cs() == "ar") {
1008 MathXYArrowInset * p = new MathXYArrowInset;
1010 // try to read target
1013 parse_into(p->cell(0), FLAG_BRACK_END, mathmode);
1014 //lyxerr << "read target: " << p->cell(0) << "\n";
1019 // try to read label
1020 if (nextToken().cat() == catSuper || nextToken().cat() == catSub) {
1021 p->up_ = nextToken().cat() == catSuper;
1023 parse_into(p->cell(1), FLAG_ITEM, mathmode);
1024 //lyxerr << "read label: " << p->cell(1) << "\n";
1027 cell->push_back(MathAtom(p));
1028 //lyxerr << "read cell: " << cell << "\n";
1032 else if (t.cs().size()) {
1033 latexkeys const * l = in_word_set(t.cs());
1035 if (l->inset == "font") {
1036 lyxerr << "starting font " << t.cs() << "\n";
1037 MathAtom p = createMathInset(t.cs());
1038 bool textmode = (t.cs()[0] == 't');
1039 parse_into(p->cell(0), FLAG_ITEM, !textmode);
1041 //lyxerr << "ending font\n";
1044 else if (l->inset == "oldfont") {
1045 cell->push_back(createMathInset(t.cs()));
1046 parse_into(cell->back()->cell(0), flags, l->extra == "mathmode");
1050 else if (l->inset == "box") {
1051 // switch to text mode
1052 cell->push_back(createMathInset(t.cs()));
1053 parse_into(cell->back()->cell(0), FLAG_ITEM, mathmode);
1056 else if (l->inset == "style") {
1057 cell->push_back(createMathInset(t.cs()));
1058 parse_into(cell->back()->cell(0), flags, mathmode);
1063 MathAtom p = createMathInset(t.cs());
1064 for (MathInset::idx_type i = 0; i < p->nargs(); ++i)
1065 parse_into(p->cell(i), FLAG_ITEM, l->extra == "mathmode");
1071 MathAtom p = createMathInset(t.cs());
1072 for (MathInset::idx_type i = 0; i < p->nargs(); ++i)
1073 parse_into(p->cell(i), FLAG_ITEM, mathmode);
1079 if (flags & FLAG_LEAVE) {
1080 flags &= ~FLAG_LEAVE;
1088 } // anonymous namespace
1091 void mathed_parse_cell(MathArray & ar, string const & str)
1093 istringstream is(str.c_str());
1094 mathed_parse_cell(ar, is);
1098 void mathed_parse_cell(MathArray & ar, istream & is)
1100 Parser(is).parse_into(ar, 0, true);
1105 bool mathed_parse_macro(string & name, string const & str)
1107 istringstream is(str.c_str());
1109 return parser.parse_macro(name);
1112 bool mathed_parse_macro(string & name, istream & is)
1115 return parser.parse_macro(name);
1118 bool mathed_parse_macro(string & name, LyXLex & lex)
1121 return parser.parse_macro(name);
1126 bool mathed_parse_normal(MathAtom & t, string const & str)
1128 istringstream is(str.c_str());
1130 return parser.parse_normal(t);
1133 bool mathed_parse_normal(MathAtom & t, istream & is)
1136 return parser.parse_normal(t);
1139 bool mathed_parse_normal(MathAtom & t, LyXLex & lex)
1142 return parser.parse_normal(t);