2 \author André Pönitz (2001)
7 If someone desperately needs partial "structures" (such as a few
8 cells of an array inset or similar) (s)he could uses the
9 following hack as starting point to write some macros:
14 \def\makeamptab{\catcode`\&=4\relax}
15 \def\makeampletter{\catcode`\&=11\relax}
16 \def\b{\makeampletter\expandafter\makeamptab\bi}
36 #pragma implementation
39 #include "math_parser.h"
40 #include "math_inset.h"
41 #include "math_arrayinset.h"
42 #include "math_braceinset.h"
43 #include "math_boxinset.h"
44 #include "math_charinset.h"
45 #include "math_deliminset.h"
46 #include "math_envinset.h"
47 #include "math_extern.h"
48 #include "math_factory.h"
49 #include "math_kerninset.h"
50 #include "math_macro.h"
51 #include "math_macrotemplate.h"
52 #include "math_hullinset.h"
53 #include "math_parboxinset.h"
54 #include "math_parinset.h"
55 #include "math_rootinset.h"
56 #include "math_sizeinset.h"
57 #include "math_sqrtinset.h"
58 #include "math_scriptinset.h"
59 #include "math_sqrtinset.h"
60 #include "math_support.h"
61 #include "math_xyarrowinset.h"
63 //#include "insets/insetref.h"
64 #include "ref_inset.h"
68 #include "support/LAssert.h"
69 #include "support/lstrings.h"
88 bool stared(string const & s)
90 string::size_type const n = s.size();
91 return n && s[n - 1] == '*';
95 // These are TeX's catcodes
97 catEscape, // 0 backslash
107 catSpace, // 10 space
108 catLetter, // 11 a-zA-Z
109 catOther, // 12 none of the above
112 catInvalid // 15 <delete>
115 CatCode theCatcode[256];
118 inline CatCode catcode(unsigned char c)
120 return theCatcode[c];
125 FLAG_BRACE_LAST = 1 << 1, // last closing brace ends the parsing
126 FLAG_RIGHT = 1 << 2, // next \\right ends the parsing process
127 FLAG_END = 1 << 3, // next \\end ends the parsing process
128 FLAG_BRACK_LAST = 1 << 4, // next closing bracket ends the parsing
129 FLAG_TEXTMODE = 1 << 5, // we are in a box
130 FLAG_ITEM = 1 << 6, // read a (possibly braced token)
131 FLAG_LEAVE = 1 << 7, // leave the loop at the end
132 FLAG_SIMPLE = 1 << 8, // next $ leaves the loop
133 FLAG_EQUATION = 1 << 9, // next \] leaves the loop
134 FLAG_SIMPLE2 = 1 << 10, // next \) leaves the loop
135 FLAG_OPTION = 1 << 11 // read [...] style option
141 fill(theCatcode, theCatcode + 256, catOther);
142 fill(theCatcode + 'a', theCatcode + 'z' + 1, catLetter);
143 fill(theCatcode + 'A', theCatcode + 'Z' + 1, catLetter);
145 theCatcode['\\'] = catEscape;
146 theCatcode['{'] = catBegin;
147 theCatcode['}'] = catEnd;
148 theCatcode['$'] = catMath;
149 theCatcode['&'] = catAlign;
150 theCatcode['\n'] = catNewline;
151 theCatcode['#'] = catParameter;
152 theCatcode['^'] = catSuper;
153 theCatcode['_'] = catSub;
154 theCatcode['
\7f'] = catIgnore;
155 theCatcode[' '] = catSpace;
156 theCatcode['\t'] = catSpace;
157 theCatcode['\r'] = catNewline;
158 theCatcode['~'] = catActive;
159 theCatcode['%'] = catComment;
165 // Helper class for parsing
171 Token() : cs_(), char_(0), cat_(catIgnore) {}
173 Token(char c, CatCode cat) : cs_(), char_(c), cat_(cat) {}
175 Token(string const & cs) : cs_(cs), char_(0), cat_(catIgnore) {}
178 string const & cs() const { return cs_; }
180 CatCode cat() const { return cat_; }
182 char character() const { return char_; }
184 string asString() const;
195 string Token::asString() const
197 return cs_.size() ? cs_ : string(1, char_);
200 ostream & operator<<(ostream & os, Token const & t)
203 os << "\\" << t.cs();
205 os << "[" << t.character() << "," << t.cat() << "]";
214 Parser(LyXLex & lex);
216 Parser(istream & is);
219 bool parse(MathAtom & at);
221 void parse(MathArray & array, unsigned flags, bool mathmode);
223 int lineno() const { return lineno_; }
229 void parse1(MathGridInset & grid, unsigned flags, bool mathmode, bool numbered);
231 void parse2(MathAtom & at, unsigned flags, bool mathmode, bool numbered);
232 /// get arg delimited by 'left' and 'right'
233 string getArg(char left, char right);
237 void error(string const & msg);
238 /// dump contents to screen
241 void tokenize(istream & is);
243 void tokenize(string const & s);
245 void skipSpaceTokens(istream & is, char c);
247 void push_back(Token const & t);
251 Token const & prevToken() const;
253 Token const & nextToken() const;
255 Token const & getToken();
256 /// skips spaces if any
259 void lex(string const & s);
266 vector<Token> tokens_;
272 Parser::Parser(LyXLex & lexer)
273 : lineno_(lexer.getLineNo()), pos_(0)
275 tokenize(lexer.getStream());
280 Parser::Parser(istream & is)
281 : lineno_(0), pos_(0)
287 void Parser::push_back(Token const & t)
289 tokens_.push_back(t);
293 void Parser::pop_back()
299 Token const & Parser::prevToken() const
301 static const Token dummy;
302 return pos_ > 0 ? tokens_[pos_ - 1] : dummy;
306 Token const & Parser::nextToken() const
308 static const Token dummy;
309 return good() ? tokens_[pos_] : dummy;
313 Token const & Parser::getToken()
315 static const Token dummy;
316 //lyxerr << "looking at token " << tokens_[pos_] << " pos: " << pos_ << '\n';
317 return good() ? tokens_[pos_++] : dummy;
321 void Parser::skipSpaces()
323 while (nextToken().cat() == catSpace)
328 void Parser::putback()
334 bool Parser::good() const
336 return pos_ < tokens_.size();
340 char Parser::getChar()
343 error("The input stream is not well...");
344 return tokens_[pos_++].character();
348 string Parser::getArg(char left, char right)
358 while ((c = getChar()) != right && good())
365 void Parser::skipSpaceTokens(istream & is, char c)
367 // skip trailing spaces
368 while (catcode(c) == catSpace || catcode(c) == catNewline)
371 //lyxerr << "putting back: " << c << "\n";
376 void Parser::tokenize(istream & is)
378 // eat everything up to the next \end_inset or end of stream
379 // and store it in s for further tokenization
384 if (s.size() >= 10 && s.substr(s.size() - 10) == "\\end_inset") {
385 s = s.substr(0, s.size() - 10);
395 void Parser::tokenize(string const & buffer)
397 static bool init_done = false;
404 istringstream is(buffer.c_str(), ios::in | ios::binary);
408 //lyxerr << "reading c: " << c << "\n";
410 switch (catcode(c)) {
414 if (catcode(c) == catNewline)
415 ; //push_back(Token("par"));
417 push_back(Token('\n', catNewline));
424 while (is.get(c) && catcode(c) != catNewline)
433 error("unexpected end of input");
436 if (catcode(c) == catLetter) {
438 while (is.get(c) && catcode(c) == catLetter)
440 skipSpaceTokens(is, c);
449 push_back(Token(c, catcode(c)));
451 skipSpaceTokens(is, c);
456 lyxerr << "ignoring a char: " << int(c) << "\n";
461 push_back(Token(c, catcode(c)));
471 void Parser::dump() const
473 lyxerr << "\nTokens: ";
474 for (unsigned i = 0; i < tokens_.size(); ++i) {
477 lyxerr << tokens_[i];
479 lyxerr << " pos: " << pos_ << "\n";
483 void Parser::error(string const & msg)
485 lyxerr << "Line ~" << lineno_ << ": Math parse error: " << msg << endl;
491 bool Parser::parse(MathAtom & at)
495 parse(ar, false, false);
496 if (ar.size() != 1 || ar.front()->getType() == "none") {
497 lyxerr << "unusual contents found: " << ar << endl;
498 at.reset(new MathParInset);
502 lyxerr << "unusual contents found: " << ar << endl;
510 void Parser::parse(MathArray & array, unsigned flags, bool mathmode)
512 MathGridInset grid(1, 1);
513 parse1(grid, flags, mathmode, false);
514 array = grid.cell(0);
518 void Parser::parse2(MathAtom & at, unsigned flags,
519 bool mathmode, bool numbered)
521 parse1(*(at->asGridInset()), flags, mathmode, numbered);
525 void Parser::parse1(MathGridInset & grid, unsigned flags,
526 bool mathmode, bool numbered)
529 MathGridInset::row_type cellrow = 0;
530 MathGridInset::col_type cellcol = 0;
531 MathArray * cell = &grid.cell(grid.index(cellrow, cellcol));
533 if (grid.asHullInset())
534 grid.asHullInset()->numbered(cellrow, numbered);
537 //lyxerr << "grid: " << grid << endl;
540 Token const & t = getToken();
543 lyxerr << "t: " << t << " flags: " << flags << "\n";
548 if (flags & FLAG_ITEM) {
549 if (t.cat() == catSpace)
553 if (t.cat() == catBegin) {
554 // skip the brace and collect everything to the next matching
556 flags |= FLAG_BRACE_LAST;
560 // handle only this single token, leave the loop if done
565 if (flags & FLAG_OPTION) {
566 if (t.cat() == catOther && t.character() == '[') {
567 // skip the bracket and collect everything to the closing bracket
568 flags |= FLAG_BRACK_LAST;
572 // no option found, put back token and we are done
580 if (t.cat() == catMath) {
582 // we are inside some text mode thingy, so opening new math is allowed
583 Token const & n = getToken();
584 if (n.cat() == catMath) {
585 // TeX's $$...$$ syntax for displayed math
586 cell->push_back(MathAtom(new MathHullInset("equation")));
587 parse2(cell->back(), FLAG_SIMPLE, true, false);
588 getToken(); // skip the second '$' token
590 // simple $...$ stuff
592 cell->push_back(MathAtom(new MathHullInset("simple")));
593 parse2(cell->back(), FLAG_SIMPLE, true, false);
597 else if (flags & FLAG_SIMPLE) {
598 // this is the end of the formula
603 error("something strange in the parser\n");
608 else if (t.cat() == catLetter)
609 cell->push_back(MathAtom(new MathCharInset(t.character())));
611 else if (t.cat() == catSpace && !mathmode)
612 cell->push_back(MathAtom(new MathCharInset(t.character())));
614 else if (t.cat() == catNewline && !mathmode)
615 cell->push_back(MathAtom(new MathCharInset(t.character())));
617 else if (t.cat() == catParameter) {
618 Token const & n = getToken();
619 cell->push_back(MathAtom(new MathMacroArgument(n.character()-'0')));
622 else if (t.cat() == catBegin) {
624 parse(ar, FLAG_BRACE_LAST, mathmode);
625 // do not create a BraceInset if they were written by LyX
626 // this helps to keep the annoyance of "a choose b" to a minimum
627 if (ar.size() == 1 && ar[0]->extraBraces())
630 cell->push_back(MathAtom(new MathBraceInset(ar)));
633 else if (t.cat() == catEnd) {
634 if (flags & FLAG_BRACE_LAST)
636 error("found '}' unexpectedly");
638 //add(cell, '}', LM_TC_TEX);
641 else if (t.cat() == catAlign) {
643 //lyxerr << " column now " << cellcol << " max: " << grid.ncols() << "\n";
644 if (cellcol == grid.ncols()) {
645 lyxerr << "adding column " << cellcol << "\n";
646 grid.addCol(cellcol - 1);
648 cell = &grid.cell(grid.index(cellrow, cellcol));
651 else if (t.cat() == catSuper || t.cat() == catSub) {
652 bool up = (t.cat() == catSuper);
653 // we need no new script inset if the last thing was a scriptinset,
654 // which has that script already not the same script already
655 if (cell->size() && cell->back()->asScriptInset() &&
656 !cell->back()->asScriptInset()->has(up))
657 cell->back()->asScriptInset()->ensure(up);
658 else if (cell->back()->asScriptInset())
659 cell->push_back(MathAtom(new MathScriptInset(up)));
661 cell->back() = MathAtom(new MathScriptInset(cell->back(), up));
662 MathScriptInset * p = cell->back()->asScriptInset();
663 parse(p->cell(up), FLAG_ITEM, mathmode);
668 else if (t.character() == ']' && (flags & FLAG_BRACK_LAST))
671 else if (t.cat() == catOther)
672 cell->push_back(MathAtom(new MathCharInset(t.character())));
678 else if (t.cs() == "def" || t.cs() == "newcommand") {
681 if (t.cs() == "def") {
683 name = getToken().cs();
687 while (good() && nextToken().cat() != catBegin) {
688 pars += getToken().cs();
692 //lyxerr << "read \\def parameter list '" << pars << "'\n";
694 } else { // t.cs() == "newcommand"
696 if (getToken().cat() != catBegin) {
697 error("'{' in \\newcommand expected (1) \n");
701 name = getToken().cs();
703 if (getToken().cat() != catEnd) {
704 error("'}' in \\newcommand expected\n");
708 string arg = getArg('[', ']');
710 nargs = atoi(arg.c_str());
715 parse(ar1, FLAG_ITEM, true);
717 // we cannot handle recursive stuff at all
719 //test.push_back(createMathInset(name));
720 //if (ar1.contains(test)) {
721 // error("we cannot handle recursive macros at all.\n");
725 // is a version for display attached?
728 if (nextToken().cat() == catBegin) {
729 parse(ar2, FLAG_ITEM, true);
732 cell->push_back(MathAtom(new MathMacroTemplate(name, nargs, ar1, ar2)));
735 else if (t.cs() == "(") {
736 cell->push_back(MathAtom(new MathHullInset("simple")));
737 parse2(cell->back(), FLAG_SIMPLE2, true, false);
740 else if (t.cs() == "[") {
741 cell->push_back(MathAtom(new MathHullInset("equation")));
742 parse2(cell->back(), FLAG_EQUATION, true, false);
745 else if (t.cs() == "protect")
746 // ignore \\protect, will hopefully be re-added during output
749 else if (t.cs() == "end") {
750 if (flags & FLAG_END) {
751 // eat environment name
752 //string const name =
754 // FIXME: check that we ended the correct environment
757 error("found 'end' unexpectedly");
760 else if (t.cs() == ")") {
761 if (flags & FLAG_SIMPLE2)
763 error("found '\\)' unexpectedly");
766 else if (t.cs() == "]") {
767 if (flags & FLAG_EQUATION)
769 error("found '\\]' unexpectedly");
772 else if (t.cs() == "\\") {
773 grid.vcrskip(LyXLength(getArg('[', ']')), cellrow);
776 if (cellrow == grid.nrows())
777 grid.addRow(cellrow - 1);
778 if (grid.asHullInset())
779 grid.asHullInset()->numbered(cellrow, numbered);
780 cell = &grid.cell(grid.index(cellrow, cellcol));
784 else if (t.cs() == "multicolumn") {
785 // extract column count and insert dummy cells
787 parse(count, FLAG_ITEM, mathmode);
789 if (!extractNumber(count, cols)) {
790 lyxerr << " can't extract number of cells from " << count << "\n";
792 // resize the table if necessary
793 for (int i = 0; i < cols; ++i) {
795 if (cellcol == grid.ncols()) {
796 lyxerr << "adding column " << cellcol << "\n";
797 grid.addCol(cellcol - 1);
799 cell = &grid.cell(grid.index(cellrow, cellcol));
800 // mark this as dummy
801 grid.cellinfo(grid.index(cellrow, cellcol)).dummy_ = true;
803 // the last cell is the real thng, not a dummy
804 grid.cellinfo(grid.index(cellrow, cellcol)).dummy_ = false;
806 // read special alignment
808 parse(align, FLAG_ITEM, mathmode);
809 //grid.cellinfo(grid.index(cellrow, cellcol)).align_ = extractString(align);
811 // parse the remaining contents into the "real" cell
812 parse(*cell, FLAG_ITEM, mathmode);
816 else if (t.cs() == "limits")
819 else if (t.cs() == "nolimits")
822 else if (t.cs() == "nonumber") {
823 if (grid.asHullInset())
824 grid.asHullInset()->numbered(cellrow, false);
827 else if (t.cs() == "number") {
828 if (grid.asHullInset())
829 grid.asHullInset()->numbered(cellrow, true);
832 else if (t.cs() == "hline") {
833 if (grid.asHullInset())
834 grid.asHullInset()->rowinfo(cellrow + 1);
837 else if (t.cs() == "sqrt") {
839 parse(ar, FLAG_OPTION, mathmode);
841 cell->push_back(MathAtom(new MathRootInset));
842 cell->back()->cell(0) = ar;
843 parse(cell->back()->cell(1), FLAG_ITEM, mathmode);
845 cell->push_back(MathAtom(new MathSqrtInset));
846 parse(cell->back()->cell(0), FLAG_ITEM, mathmode);
850 else if (t.cs() == "ref") {
851 cell->push_back(MathAtom(new RefInset));
852 parse(cell->back()->cell(1), FLAG_OPTION, mathmode);
853 parse(cell->back()->cell(0), FLAG_ITEM, mathmode);
856 else if (t.cs() == "left") {
857 string l = getToken().asString();
859 parse(ar, FLAG_RIGHT, mathmode);
860 string r = getToken().asString();
861 cell->push_back(MathAtom(new MathDelimInset(l, r, ar)));
864 else if (t.cs() == "right") {
865 if (flags & FLAG_RIGHT)
867 //lyxerr << "got so far: '" << cell << "'\n";
868 error("Unmatched right delimiter");
872 else if (t.cs() == "begin") {
873 string const name = getArg('{', '}');
874 if (name == "array" || name == "subarray") {
875 string const valign = getArg('[', ']') + 'c';
876 string const halign = getArg('{', '}');
877 cell->push_back(MathAtom(new MathArrayInset(name, valign[0], halign)));
878 parse2(cell->back(), FLAG_END, mathmode, false);
881 else if (name == "split" || name == "cases" ||
882 name == "gathered" || name == "aligned") {
883 cell->push_back(createMathInset(name));
884 parse2(cell->back(), FLAG_END, mathmode, false);
887 else if (name == "math") {
888 cell->push_back(MathAtom(new MathHullInset("simple")));
889 parse2(cell->back(), FLAG_END, true, true);
892 else if (name == "equation" || name == "equation*"
893 || name == "displaymath") {
894 cell->push_back(MathAtom(new MathHullInset("equation")));
895 parse2(cell->back(), FLAG_END, true, (name == "equation"));
898 else if (name == "eqnarray" || name == "eqnarray*") {
899 cell->push_back(MathAtom(new MathHullInset("eqnarray")));
900 parse2(cell->back(), FLAG_END, true, !stared(name));
903 else if (name == "align" || name == "align*") {
904 cell->push_back(MathAtom(new MathHullInset("align")));
905 parse2(cell->back(), FLAG_END, true, !stared(name));
908 else if (name == "alignat" || name == "alignat*") {
909 // ignore this for a while
911 cell->push_back(MathAtom(new MathHullInset("alignat")));
912 parse2(cell->back(), FLAG_END, true, !stared(name));
915 else if (name == "xalignat" || name == "xalignat*") {
916 // ignore this for a while
918 cell->push_back(MathAtom(new MathHullInset("xalignat")));
919 parse2(cell->back(), FLAG_END, true, !stared(name));
922 else if (name == "xxalignat") {
923 // ignore this for a while
925 cell->push_back(MathAtom(new MathHullInset("xxalignat")));
926 parse2(cell->back(), FLAG_END, true, !stared(name));
929 else if (name == "multline" || name == "multline*") {
930 cell->push_back(MathAtom(new MathHullInset("multline")));
931 parse2(cell->back(), FLAG_END, true, !stared(name));
934 else if (name == "gather" || name == "gather*") {
935 cell->push_back(MathAtom(new MathHullInset("gather")));
936 parse2(cell->back(), FLAG_END, true, !stared(name));
939 else if (latexkeys const * l = in_word_set(name)) {
940 if (l->inset == "matrix") {
941 cell->push_back(createMathInset(name));
942 parse2(cell->back(), FLAG_END, mathmode, false);
947 // lyxerr << "unknow math inset begin '" << name << "'\n";
948 // create generic environment inset
949 cell->push_back(MathAtom(new MathEnvInset(name)));
950 parse(cell->back()->cell(0), FLAG_END, mathmode);
954 else if (t.cs() == "kern") {
960 Token const & t = getToken();
966 if (isValidLength(s))
969 cell->push_back(MathAtom(new MathKernInset(s)));
972 else if (t.cs() == "label") {
974 parse(ar, FLAG_ITEM, false);
975 if (grid.asHullInset()) {
976 grid.asHullInset()->label(cellrow, asString(ar));
978 cell->push_back(createMathInset(t.cs()));
979 cell->push_back(MathAtom(new MathBraceInset(ar)));
983 else if (t.cs() == "choose" || t.cs() == "over" || t.cs() == "atop") {
984 MathAtom p = createMathInset(t.cs());
987 parse(p->cell(1), flags, mathmode);
992 else if (t.cs() == "substack") {
993 cell->push_back(createMathInset(t.cs()));
994 parse2(cell->back(), FLAG_ITEM, mathmode, false);
997 else if (t.cs() == "xymatrix") {
998 cell->push_back(createMathInset(t.cs()));
999 parse2(cell->back(), FLAG_ITEM, mathmode, false);
1004 else if (1 && t.cs() == "ar") {
1005 MathXYArrowInset * p = new MathXYArrowInset;
1006 // try to read target
1007 parse(p->cell(0), FLAG_OTPTION, mathmode);
1008 // try to read label
1009 if (nextToken().cat() == catSuper || nextToken().cat() == catSub) {
1010 p->up_ = nextToken().cat() == catSuper;
1012 parse(p->cell(1), FLAG_ITEM, mathmode);
1013 //lyxerr << "read label: " << p->cell(1) << "\n";
1016 cell->push_back(MathAtom(p));
1017 //lyxerr << "read cell: " << cell << "\n";
1021 else if (t.cs().size()) {
1022 latexkeys const * l = in_word_set(t.cs());
1024 if (l->inset == "font") {
1025 cell->push_back(createMathInset(t.cs()));
1026 parse(cell->back()->cell(0), FLAG_ITEM, l->extra == "mathmode");
1029 else if (l->inset == "oldfont") {
1030 cell->push_back(createMathInset(t.cs()));
1031 parse(cell->back()->cell(0), flags, l->extra == "mathmode");
1035 else if (l->inset == "style") {
1036 cell->push_back(createMathInset(t.cs()));
1037 parse(cell->back()->cell(0), flags, mathmode);
1041 else if (l->inset == "parbox") {
1042 // read optional positioning and width
1043 MathArray pos, width;
1044 parse(pos, FLAG_OPTION, false);
1045 parse(width, FLAG_ITEM, false);
1046 cell->push_back(createMathInset(t.cs()));
1047 parse(cell->back()->cell(0), FLAG_ITEM, false);
1048 cell->back()->asParboxInset()->setPosition(asString(pos));
1049 cell->back()->asParboxInset()->setWidth(asString(width));
1053 MathAtom p = createMathInset(t.cs());
1054 for (MathInset::idx_type i = 0; i < p->nargs(); ++i)
1055 parse(p->cell(i), FLAG_ITEM, l->extra != "forcetext");
1061 MathAtom p = createMathInset(t.cs());
1062 bool mode = mathmode;
1063 if (mathmode && p->currentMode() == MathInset::TEXT_MODE)
1065 if (!mathmode && p->currentMode() == MathInset::MATH_MODE)
1067 for (MathInset::idx_type i = 0; i < p->nargs(); ++i)
1068 parse(p->cell(i), FLAG_ITEM, mode);
1074 if (flags & FLAG_LEAVE) {
1075 flags &= ~FLAG_LEAVE;
1083 } // anonymous namespace
1086 void mathed_parse_cell(MathArray & ar, string const & str)
1088 istringstream is(str.c_str());
1089 mathed_parse_cell(ar, is);
1093 void mathed_parse_cell(MathArray & ar, istream & is)
1095 Parser(is).parse(ar, 0, true);
1099 bool mathed_parse_normal(MathAtom & t, string const & str)
1101 istringstream is(str.c_str());
1102 return Parser(is).parse(t);
1106 bool mathed_parse_normal(MathAtom & t, istream & is)
1108 return Parser(is).parse(t);
1112 bool mathed_parse_normal(MathAtom & t, LyXLex & lex)
1114 return Parser(lex).parse(t);