2 \author André Pönitz (2001)
7 If someone desperately needs partial "structures" (such as a few
8 cells of an array inset or similar) (s)he could uses the
9 following hack as starting point to write some macros:
14 \def\makeamptab{\catcode`\&=4\relax}
15 \def\makeampletter{\catcode`\&=11\relax}
16 \def\b{\makeampletter\expandafter\makeamptab\bi}
36 #pragma implementation
39 #include "math_parser.h"
40 #include "math_inset.h"
41 #include "math_arrayinset.h"
42 #include "math_braceinset.h"
43 #include "math_boxinset.h"
44 #include "math_charinset.h"
45 #include "math_deliminset.h"
46 #include "math_envinset.h"
47 #include "math_extern.h"
48 #include "math_factory.h"
49 #include "math_kerninset.h"
50 #include "math_macro.h"
51 #include "math_macrotemplate.h"
52 #include "math_hullinset.h"
53 #include "math_parboxinset.h"
54 #include "math_parinset.h"
55 #include "math_rootinset.h"
56 #include "math_sizeinset.h"
57 #include "math_sqrtinset.h"
58 #include "math_scriptinset.h"
59 #include "math_sqrtinset.h"
60 #include "math_support.h"
61 #include "math_xyarrowinset.h"
63 //#include "insets/insetref.h"
64 #include "ref_inset.h"
68 #include "support/LAssert.h"
69 #include "support/lstrings.h"
90 bool stared(string const & s)
92 string::size_type const n = s.size();
93 return n && s[n - 1] == '*';
97 // These are TeX's catcodes
99 catEscape, // 0 backslash
109 catSpace, // 10 space
110 catLetter, // 11 a-zA-Z
111 catOther, // 12 none of the above
114 catInvalid // 15 <delete>
117 CatCode theCatcode[256];
120 inline CatCode catcode(unsigned char c)
122 return theCatcode[c];
127 FLAG_BRACE_LAST = 1 << 1, // last closing brace ends the parsing
128 FLAG_RIGHT = 1 << 2, // next \\right ends the parsing process
129 FLAG_END = 1 << 3, // next \\end ends the parsing process
130 FLAG_BRACK_LAST = 1 << 4, // next closing bracket ends the parsing
131 FLAG_TEXTMODE = 1 << 5, // we are in a box
132 FLAG_ITEM = 1 << 6, // read a (possibly braced token)
133 FLAG_LEAVE = 1 << 7, // leave the loop at the end
134 FLAG_SIMPLE = 1 << 8, // next $ leaves the loop
135 FLAG_EQUATION = 1 << 9, // next \] leaves the loop
136 FLAG_SIMPLE2 = 1 << 10, // next \) leaves the loop
137 FLAG_OPTION = 1 << 11 // read [...] style option
143 fill(theCatcode, theCatcode + 256, catOther);
144 fill(theCatcode + 'a', theCatcode + 'z' + 1, catLetter);
145 fill(theCatcode + 'A', theCatcode + 'Z' + 1, catLetter);
147 theCatcode['\\'] = catEscape;
148 theCatcode['{'] = catBegin;
149 theCatcode['}'] = catEnd;
150 theCatcode['$'] = catMath;
151 theCatcode['&'] = catAlign;
152 theCatcode['\n'] = catNewline;
153 theCatcode['#'] = catParameter;
154 theCatcode['^'] = catSuper;
155 theCatcode['_'] = catSub;
156 theCatcode['
\7f'] = catIgnore;
157 theCatcode[' '] = catSpace;
158 theCatcode['\t'] = catSpace;
159 theCatcode['\r'] = catSpace;
160 theCatcode['~'] = catActive;
161 theCatcode['%'] = catComment;
167 // Helper class for parsing
173 Token() : cs_(), char_(0), cat_(catIgnore) {}
175 Token(char c, CatCode cat) : cs_(), char_(c), cat_(cat) {}
177 Token(string const & cs) : cs_(cs), char_(0), cat_(catIgnore) {}
180 string const & cs() const { return cs_; }
182 CatCode cat() const { return cat_; }
184 char character() const { return char_; }
186 string asString() const;
199 bool Token::isCR() const
201 return cs_ == "\\" || cs_ == "cr" || cs_ == "crcr";
204 string Token::asString() const
206 return cs_.size() ? cs_ : string(1, char_);
209 ostream & operator<<(ostream & os, Token const & t)
212 os << "\\" << t.cs();
214 os << "[" << t.character() << "," << t.cat() << "]";
223 Parser(LyXLex & lex);
225 Parser(istream & is);
228 bool parse(MathAtom & at);
230 void parse(MathArray & array, unsigned flags, bool mathmode);
232 int lineno() const { return lineno_; }
238 void parse1(MathGridInset & grid, unsigned flags, bool mathmode, bool numbered);
240 void parse2(MathAtom & at, unsigned flags, bool mathmode, bool numbered);
241 /// get arg delimited by 'left' and 'right'
242 string getArg(char left, char right);
246 void error(string const & msg);
247 /// dump contents to screen
250 void tokenize(istream & is);
252 void tokenize(string const & s);
254 void skipSpaceTokens(istream & is, char c);
256 void push_back(Token const & t);
260 Token const & prevToken() const;
262 Token const & nextToken() const;
264 Token const & getToken();
265 /// skips spaces if any
268 void lex(string const & s);
275 vector<Token> tokens_;
281 Parser::Parser(LyXLex & lexer)
282 : lineno_(lexer.getLineNo()), pos_(0)
284 tokenize(lexer.getStream());
289 Parser::Parser(istream & is)
290 : lineno_(0), pos_(0)
296 void Parser::push_back(Token const & t)
298 tokens_.push_back(t);
302 void Parser::pop_back()
308 Token const & Parser::prevToken() const
310 static const Token dummy;
311 return pos_ > 0 ? tokens_[pos_ - 1] : dummy;
315 Token const & Parser::nextToken() const
317 static const Token dummy;
318 return good() ? tokens_[pos_] : dummy;
322 Token const & Parser::getToken()
324 static const Token dummy;
325 //lyxerr << "looking at token " << tokens_[pos_] << " pos: " << pos_ << '\n';
326 return good() ? tokens_[pos_++] : dummy;
330 void Parser::skipSpaces()
332 while (nextToken().cat() == catSpace)
337 void Parser::putback()
343 bool Parser::good() const
345 return pos_ < tokens_.size();
349 char Parser::getChar()
352 error("The input stream is not well...");
353 return tokens_[pos_++].character();
357 string Parser::getArg(char left, char right)
367 while ((c = getChar()) != right && good())
374 void Parser::tokenize(istream & is)
376 // eat everything up to the next \end_inset or end of stream
377 // and store it in s for further tokenization
382 if (s.size() >= 10 && s.substr(s.size() - 10) == "\\end_inset") {
383 s = s.substr(0, s.size() - 10);
393 void Parser::skipSpaceTokens(istream & is, char c)
395 // skip trailing spaces
396 while (catcode(c) == catSpace || catcode(c) == catNewline)
399 //lyxerr << "putting back: " << c << "\n";
404 void Parser::tokenize(string const & buffer)
406 static bool init_done = false;
413 istringstream is(buffer.c_str(), ios::in | ios::binary);
417 //lyxerr << "reading c: " << c << "\n";
419 switch (catcode(c)) {
423 if (catcode(c) == catNewline)
424 ; //push_back(Token("par"));
426 push_back(Token(' ', catSpace));
433 while (is.get(c) && catcode(c) != catNewline)
442 error("unexpected end of input");
445 if (catcode(c) == catLetter) {
447 while (is.get(c) && catcode(c) == catLetter)
449 skipSpaceTokens(is, c);
458 push_back(Token(c, catcode(c)));
460 skipSpaceTokens(is, c);
465 lyxerr << "ignoring a char: " << int(c) << "\n";
470 push_back(Token(c, catcode(c)));
480 void Parser::dump() const
482 lyxerr << "\nTokens: ";
483 for (unsigned i = 0; i < tokens_.size(); ++i) {
486 lyxerr << tokens_[i];
488 lyxerr << " pos: " << pos_ << "\n";
492 void Parser::error(string const & msg)
494 lyxerr << "Line ~" << lineno_ << ": Math parse error: " << msg << endl;
501 bool Parser::parse(MathAtom & at)
505 parse(ar, false, false);
506 if (ar.size() != 1 || ar.front()->getType() == "none") {
507 lyxerr << "unusual contents found: " << ar << endl;
508 at.reset(new MathParInset);
512 lyxerr << "unusual contents found: " << ar << endl;
520 void Parser::parse(MathArray & array, unsigned flags, bool mathmode)
522 MathGridInset grid(1, 1);
523 parse1(grid, flags, mathmode, false);
524 array = grid.cell(0);
528 void Parser::parse2(MathAtom & at, unsigned flags,
529 bool mathmode, bool numbered)
531 parse1(*(at->asGridInset()), flags, mathmode, numbered);
535 void Parser::parse1(MathGridInset & grid, unsigned flags,
536 bool mathmode, bool numbered)
539 MathGridInset::row_type cellrow = 0;
540 MathGridInset::col_type cellcol = 0;
541 MathArray * cell = &grid.cell(grid.index(cellrow, cellcol));
543 if (grid.asHullInset())
544 grid.asHullInset()->numbered(cellrow, numbered);
547 //lyxerr << "grid: " << grid << endl;
550 Token const & t = getToken();
553 lyxerr << "t: " << t << " flags: " << flags << "\n";
558 if (flags & FLAG_ITEM) {
559 if (t.cat() == catSpace)
563 if (t.cat() == catBegin) {
564 // skip the brace and collect everything to the next matching
566 flags |= FLAG_BRACE_LAST;
570 // handle only this single token, leave the loop if done
575 if (flags & FLAG_OPTION) {
576 if (t.cat() == catOther && t.character() == '[') {
577 // skip the bracket and collect everything to the closing bracket
578 flags |= FLAG_BRACK_LAST;
582 // no option found, put back token and we are done
590 if (t.cat() == catMath) {
592 // we are inside some text mode thingy, so opening new math is allowed
593 Token const & n = getToken();
594 if (n.cat() == catMath) {
595 // TeX's $$...$$ syntax for displayed math
596 cell->push_back(MathAtom(new MathHullInset("equation")));
597 parse2(cell->back(), FLAG_SIMPLE, true, false);
598 getToken(); // skip the second '$' token
600 // simple $...$ stuff
602 cell->push_back(MathAtom(new MathHullInset("simple")));
603 parse2(cell->back(), FLAG_SIMPLE, true, false);
607 else if (flags & FLAG_SIMPLE) {
608 // this is the end of the formula
613 error("something strange in the parser\n");
618 else if (t.cat() == catLetter)
619 cell->push_back(MathAtom(new MathCharInset(t.character())));
621 else if (t.cat() == catSpace && !mathmode)
622 cell->push_back(MathAtom(new MathCharInset(t.character())));
624 else if (t.cat() == catParameter) {
625 Token const & n = getToken();
626 cell->push_back(MathAtom(new MathMacroArgument(n.character()-'0')));
629 else if (t.cat() == catBegin) {
631 parse(ar, FLAG_BRACE_LAST, mathmode);
632 // do not create a BraceInset if they were written by LyX
633 // this helps to keep the annoyance of "a choose b" to a minimum
634 if (ar.size() == 1 && ar[0]->extraBraces())
637 cell->push_back(MathAtom(new MathBraceInset(ar)));
640 else if (t.cat() == catEnd) {
641 if (flags & FLAG_BRACE_LAST)
643 error("found '}' unexpectedly");
645 //add(cell, '}', LM_TC_TEX);
648 else if (t.cat() == catAlign) {
650 //lyxerr << " column now " << cellcol << " max: " << grid.ncols() << "\n";
651 if (cellcol == grid.ncols()) {
652 lyxerr << "adding column " << cellcol << "\n";
653 grid.addCol(cellcol - 1);
655 cell = &grid.cell(grid.index(cellrow, cellcol));
658 else if (t.cat() == catSuper || t.cat() == catSub) {
659 bool up = (t.cat() == catSuper);
660 MathScriptInset * p = 0;
662 p = cell->back()->asScriptInset();
663 if (!p || p->has(up)) {
664 cell->push_back(MathAtom(new MathScriptInset(up)));
665 p = cell->back()->asScriptInset();
668 parse(p->cell(up), FLAG_ITEM, mathmode);
673 else if (t.character() == ']' && (flags & FLAG_BRACK_LAST))
676 else if (t.cat() == catOther)
677 cell->push_back(MathAtom(new MathCharInset(t.character())));
683 else if (t.cs() == "def" || t.cs() == "newcommand") {
686 if (t.cs() == "def") {
688 name = getToken().cs();
692 while (good() && nextToken().cat() != catBegin) {
693 pars += getToken().cs();
697 //lyxerr << "read \\def parameter list '" << pars << "'\n";
699 } else { // t.cs() == "newcommand"
701 if (getToken().cat() != catBegin) {
702 error("'{' in \\newcommand expected (1) \n");
706 name = getToken().cs();
708 if (getToken().cat() != catEnd) {
709 error("'}' in \\newcommand expected\n");
713 string arg = getArg('[', ']');
715 nargs = atoi(arg.c_str());
720 parse(ar1, FLAG_ITEM, true);
722 // we cannot handle recursive stuff at all
724 //test.push_back(createMathInset(name));
725 //if (ar1.contains(test)) {
726 // error("we cannot handle recursive macros at all.\n");
730 // is a version for display attached?
733 if (nextToken().cat() == catBegin) {
734 parse(ar2, FLAG_ITEM, true);
737 cell->push_back(MathAtom(new MathMacroTemplate(name, nargs, ar1, ar2)));
740 else if (t.cs() == "(") {
741 cell->push_back(MathAtom(new MathHullInset("simple")));
742 parse2(cell->back(), FLAG_SIMPLE2, true, false);
745 else if (t.cs() == "[") {
746 cell->push_back(MathAtom(new MathHullInset("equation")));
747 parse2(cell->back(), FLAG_EQUATION, true, false);
750 else if (t.cs() == "protect")
751 // ignore \\protect, will hopefully be re-added during output
754 else if (t.cs() == "end") {
755 if (flags & FLAG_END) {
756 // eat environment name
757 //string const name =
759 // FIXME: check that we ended the correct environment
762 error("found 'end' unexpectedly");
765 else if (t.cs() == ")") {
766 if (flags & FLAG_SIMPLE2)
768 error("found '\\)' unexpectedly");
771 else if (t.cs() == "]") {
772 if (flags & FLAG_EQUATION)
774 error("found '\\]' unexpectedly");
777 else if (t.cs() == "\\") {
778 grid.vcrskip(LyXLength(getArg('[', ']')), cellrow);
781 if (cellrow == grid.nrows())
782 grid.addRow(cellrow - 1);
783 if (grid.asHullInset())
784 grid.asHullInset()->numbered(cellrow, numbered);
785 cell = &grid.cell(grid.index(cellrow, cellcol));
789 else if (t.cs() == "multicolumn") {
790 // extract column count and insert dummy cells
792 parse(count, FLAG_ITEM, mathmode);
794 if (!extractNumber(count, cols)) {
795 lyxerr << " can't extract number of cells from " << count << "\n";
797 // resize the table if necessary
798 for (int i = 0; i < cols; ++i) {
800 if (cellcol == grid.ncols()) {
801 lyxerr << "adding column " << cellcol << "\n";
802 grid.addCol(cellcol - 1);
804 cell = &grid.cell(grid.index(cellrow, cellcol));
805 // mark this as dummy
806 grid.cellinfo(grid.index(cellrow, cellcol)).dummy_ = true;
808 // the last cell is the real thng, not a dummy
809 grid.cellinfo(grid.index(cellrow, cellcol)).dummy_ = false;
811 // read special alignment
813 parse(align, FLAG_ITEM, mathmode);
814 //grid.cellinfo(grid.index(cellrow, cellcol)).align_ = extractString(align);
816 // parse the remaining contents into the "real" cell
817 parse(*cell, FLAG_ITEM, mathmode);
821 else if (t.cs() == "limits")
824 else if (t.cs() == "nolimits")
827 else if (t.cs() == "nonumber") {
828 if (grid.asHullInset())
829 grid.asHullInset()->numbered(cellrow, false);
832 else if (t.cs() == "number") {
833 if (grid.asHullInset())
834 grid.asHullInset()->numbered(cellrow, true);
837 else if (t.cs() == "hline") {
838 if (grid.asHullInset())
839 grid.asHullInset()->rowinfo(cellrow + 1);
842 else if (t.cs() == "sqrt") {
844 parse(ar, FLAG_OPTION, mathmode);
846 cell->push_back(MathAtom(new MathRootInset));
847 cell->back()->cell(0) = ar;
848 parse(cell->back()->cell(1), FLAG_ITEM, mathmode);
850 cell->push_back(MathAtom(new MathSqrtInset));
851 parse(cell->back()->cell(0), FLAG_ITEM, mathmode);
855 else if (t.cs() == "ref") {
856 cell->push_back(MathAtom(new RefInset));
857 parse(cell->back()->cell(1), FLAG_OPTION, mathmode);
858 parse(cell->back()->cell(0), FLAG_ITEM, mathmode);
861 else if (t.cs() == "left") {
862 string l = getToken().asString();
864 parse(ar, FLAG_RIGHT, mathmode);
865 string r = getToken().asString();
866 cell->push_back(MathAtom(new MathDelimInset(l, r, ar)));
869 else if (t.cs() == "right") {
870 if (flags & FLAG_RIGHT)
872 //lyxerr << "got so far: '" << cell << "'\n";
873 error("Unmatched right delimiter");
877 else if (t.cs() == "begin") {
878 string const name = getArg('{', '}');
879 if (name == "array" || name == "subarray") {
880 string const valign = getArg('[', ']') + 'c';
881 string const halign = getArg('{', '}');
882 cell->push_back(MathAtom(new MathArrayInset(name, valign[0], halign)));
883 parse2(cell->back(), FLAG_END, mathmode, false);
886 else if (name == "split" || name == "cases" ||
887 name == "gathered" || name == "aligned") {
888 cell->push_back(createMathInset(name));
889 parse2(cell->back(), FLAG_END, mathmode, false);
892 else if (name == "math") {
893 cell->push_back(MathAtom(new MathHullInset("simple")));
894 parse2(cell->back(), FLAG_END, true, true);
897 else if (name == "equation" || name == "equation*"
898 || name == "displaymath") {
899 cell->push_back(MathAtom(new MathHullInset("equation")));
900 parse2(cell->back(), FLAG_END, true, (name == "equation"));
903 else if (name == "eqnarray" || name == "eqnarray*") {
904 cell->push_back(MathAtom(new MathHullInset("eqnarray")));
905 parse2(cell->back(), FLAG_END, true, !stared(name));
908 else if (name == "align" || name == "align*") {
909 cell->push_back(MathAtom(new MathHullInset("align")));
910 parse2(cell->back(), FLAG_END, true, !stared(name));
913 else if (name == "alignat" || name == "alignat*") {
914 // ignore this for a while
916 cell->push_back(MathAtom(new MathHullInset("alignat")));
917 parse2(cell->back(), FLAG_END, true, !stared(name));
920 else if (name == "xalignat" || name == "xalignat*") {
921 // ignore this for a while
923 cell->push_back(MathAtom(new MathHullInset("xalignat")));
924 parse2(cell->back(), FLAG_END, true, !stared(name));
927 else if (name == "xxalignat") {
928 // ignore this for a while
930 cell->push_back(MathAtom(new MathHullInset("xxalignat")));
931 parse2(cell->back(), FLAG_END, true, !stared(name));
934 else if (name == "multline" || name == "multline*") {
935 cell->push_back(MathAtom(new MathHullInset("multline")));
936 parse2(cell->back(), FLAG_END, true, !stared(name));
939 else if (name == "gather" || name == "gather*") {
940 cell->push_back(MathAtom(new MathHullInset("gather")));
941 parse2(cell->back(), FLAG_END, true, !stared(name));
944 else if (latexkeys const * l = in_word_set(name)) {
945 if (l->inset == "matrix") {
946 cell->push_back(createMathInset(name));
947 parse2(cell->back(), FLAG_END, mathmode, false);
952 // lyxerr << "unknow math inset begin '" << name << "'\n";
953 // create generic environment inset
954 cell->push_back(MathAtom(new MathEnvInset(name)));
955 parse(cell->back()->cell(0), FLAG_END, mathmode);
959 else if (t.cs() == "kern") {
965 Token const & t = getToken();
971 if (isValidLength(s))
974 cell->push_back(MathAtom(new MathKernInset(s)));
977 else if (t.cs() == "label") {
979 parse(ar, FLAG_ITEM, false);
980 if (grid.asHullInset()) {
981 grid.asHullInset()->label(cellrow, asString(ar));
983 cell->push_back(createMathInset(t.cs()));
984 cell->push_back(MathAtom(new MathBraceInset(ar)));
988 else if (t.cs() == "choose" || t.cs() == "over" || t.cs() == "atop") {
989 MathAtom p = createMathInset(t.cs());
990 cell->swap(p->cell(0));
991 parse(p->cell(1), flags, mathmode);
996 else if (t.cs() == "substack") {
997 cell->push_back(createMathInset(t.cs()));
998 parse2(cell->back(), FLAG_ITEM, mathmode, false);
1001 else if (t.cs() == "xymatrix") {
1002 cell->push_back(createMathInset(t.cs()));
1003 parse2(cell->back(), FLAG_ITEM, mathmode, false);
1008 else if (1 && t.cs() == "ar") {
1009 MathXYArrowInset * p = new MathXYArrowInset;
1010 // try to read target
1011 parse(p->cell(0), FLAG_OTPTION, mathmode);
1012 // try to read label
1013 if (nextToken().cat() == catSuper || nextToken().cat() == catSub) {
1014 p->up_ = nextToken().cat() == catSuper;
1016 parse(p->cell(1), FLAG_ITEM, mathmode);
1017 //lyxerr << "read label: " << p->cell(1) << "\n";
1020 cell->push_back(MathAtom(p));
1021 //lyxerr << "read cell: " << cell << "\n";
1025 else if (t.cs().size()) {
1026 latexkeys const * l = in_word_set(t.cs());
1028 if (l->inset == "font") {
1029 lyxerr << "starting font " << t.cs() << "\n";
1030 MathAtom p = createMathInset(t.cs());
1031 bool textmode = (t.cs()[0] == 't');
1032 parse(p->cell(0), FLAG_ITEM, !textmode);
1034 //lyxerr << "ending font\n";
1037 else if (l->inset == "oldfont") {
1038 cell->push_back(createMathInset(t.cs()));
1039 parse(cell->back()->cell(0), flags, l->extra == "mathmode");
1043 else if (l->inset == "style") {
1044 cell->push_back(createMathInset(t.cs()));
1045 parse(cell->back()->cell(0), flags, mathmode);
1049 else if (l->inset == "parbox") {
1050 // read optional positioning and width
1051 MathArray pos, width;
1052 parse(pos, FLAG_OPTION, false);
1053 parse(width, FLAG_ITEM, false);
1054 cell->push_back(createMathInset(t.cs()));
1055 parse(cell->back()->cell(0), FLAG_ITEM, false);
1056 cell->back()->asParboxInset()->setPosition(asString(pos));
1057 cell->back()->asParboxInset()->setWidth(asString(width));
1061 MathAtom p = createMathInset(t.cs());
1062 for (MathInset::idx_type i = 0; i < p->nargs(); ++i)
1063 parse(p->cell(i), FLAG_ITEM, l->extra != "forcetext");
1069 MathAtom p = createMathInset(t.cs());
1070 for (MathInset::idx_type i = 0; i < p->nargs(); ++i)
1071 parse(p->cell(i), FLAG_ITEM, mathmode);
1077 if (flags & FLAG_LEAVE) {
1078 flags &= ~FLAG_LEAVE;
1086 } // anonymous namespace
1089 void mathed_parse_cell(MathArray & ar, string const & str)
1091 istringstream is(str.c_str());
1092 mathed_parse_cell(ar, is);
1096 void mathed_parse_cell(MathArray & ar, istream & is)
1098 Parser(is).parse(ar, 0, true);
1102 bool mathed_parse_normal(MathAtom & t, string const & str)
1104 istringstream is(str.c_str());
1105 return Parser(is).parse(t);
1109 bool mathed_parse_normal(MathAtom & t, istream & is)
1111 return Parser(is).parse(t);
1115 bool mathed_parse_normal(MathAtom & t, LyXLex & lex)
1117 return Parser(lex).parse(t);