3 * Purpose: Parser for mathed
4 * Author: Alejandro Aguilar Sierra <asierra@servidor.unam.mx>
5 * Created: January 1996
6 * Description: Parse LaTeX2e math mode code.
8 * Dependencies: Xlib, XForms
10 * Copyright: 1996, Alejandro Aguilar Sierra
14 * You are free to use and modify this code under the terms of
15 * the GNU General Public Licence version 2 or later.
20 If someone desperately needs partial "structures" (such as a few cells of
21 an array inset or similar) (s)he could uses the following hack as starting
22 point to write some macros:
27 \def\makeamptab{\catcode`\&=4\relax}
28 \def\makeampletter{\catcode`\&=11\relax}
29 \def\b{\makeampletter\expandafter\makeamptab\bi}
49 #pragma implementation
52 #include "math_parser.h"
53 #include "math_inset.h"
54 #include "math_arrayinset.h"
55 #include "math_braceinset.h"
56 #include "math_casesinset.h"
57 #include "math_charinset.h"
58 #include "math_deliminset.h"
59 #include "math_factory.h"
60 #include "math_funcinset.h"
61 #include "math_kerninset.h"
62 #include "math_macro.h"
63 #include "math_macrotable.h"
64 #include "math_macrotemplate.h"
65 #include "math_hullinset.h"
66 #include "math_rootinset.h"
67 #include "math_sizeinset.h"
68 #include "math_sqrtinset.h"
69 #include "math_scriptinset.h"
70 #include "math_specialcharinset.h"
71 #include "math_splitinset.h"
72 #include "math_sqrtinset.h"
73 #include "math_support.h"
78 #include "support/lstrings.h"
94 bool stared(string const & s)
96 string::size_type const n = s.size();
97 return n && s[n - 1] == '*';
101 void add(MathArray & ar, char c, MathTextCodes code)
103 ar.push_back(MathAtom(new MathCharInset(c, code)));
107 // These are TeX's catcodes
109 catEscape, // 0 backslash
119 catSpace, // 10 space
120 catLetter, // 11 a-zA-Z
121 catOther, // 12 none of the above
124 catInvalid // 15 <delete>
127 CatCode theCatcode[256];
130 inline CatCode catcode(unsigned char c)
132 return theCatcode[c];
137 FLAG_BRACE_LAST = 1 << 1, // last closing brace ends the parsing process
138 FLAG_RIGHT = 1 << 2, // next \\right ends the parsing process
139 FLAG_END = 1 << 3, // next \\end ends the parsing process
140 FLAG_BRACK_END = 1 << 4, // next closing bracket ends the parsing process
141 FLAG_BOX = 1 << 5, // we are in a box
142 FLAG_ITEM = 1 << 6, // read a (possibly braced token)
143 FLAG_BLOCK = 1 << 7, // next block ends the parsing process
144 FLAG_BLOCK2 = 1 << 8, // next block2 ends the parsing process
145 FLAG_LEAVE = 1 << 9 // leave the loop at the end
151 fill(theCatcode, theCatcode + 256, catOther);
152 fill(theCatcode + 'a', theCatcode + 'z' + 1, catLetter);
153 fill(theCatcode + 'A', theCatcode + 'Z' + 1, catLetter);
155 theCatcode['\\'] = catEscape;
156 theCatcode['{'] = catBegin;
157 theCatcode['}'] = catEnd;
158 theCatcode['$'] = catMath;
159 theCatcode['&'] = catAlign;
160 theCatcode['\n'] = catNewline;
161 theCatcode['#'] = catParameter;
162 theCatcode['^'] = catSuper;
163 theCatcode['_'] = catSub;
164 theCatcode['
\7f'] = catIgnore;
165 theCatcode[' '] = catSpace;
166 theCatcode['\t'] = catSpace;
167 theCatcode['\r'] = catSpace;
168 theCatcode['~'] = catActive;
169 theCatcode['%'] = catComment;
175 // Helper class for parsing
181 Token() : cs_(), char_(0), cat_(catIgnore) {}
183 Token(char c, CatCode cat) : cs_(), char_(c), cat_(cat) {}
185 Token(string const & cs) : cs_(cs), char_(0), cat_(catIgnore) {}
188 string const & cs() const { return cs_; }
190 CatCode cat() const { return cat_; }
192 char character() const { return char_; }
194 string asString() const;
207 bool Token::isCR() const
209 return cs_ == "\\" || cs_ == "cr" || cs_ == "crcr";
212 string Token::asString() const
214 return cs_.size() ? cs_ : string(1, char_);
217 // Angus' compiler says these are not needed
218 //bool operator==(Token const & s, Token const & t)
220 // return s.character() == t.character()
221 // && s.cat() == t.cat() && s.cs() == t.cs();
224 //bool operator!=(Token const & s, Token const & t)
229 ostream & operator<<(ostream & os, Token const & t)
232 os << "\\" << t.cs();
234 os << "[" << t.character() << "," << t.cat() << "]";
243 Parser(LyXLex & lex);
245 Parser(istream & is);
248 bool parse_macro(string & name);
250 bool parse_normal(MathAtom &);
252 void parse_into(MathArray & array, unsigned flags, MathTextCodes = LM_TC_MIN);
254 int lineno() const { return lineno_; }
260 string getArg(char lf, char rf);
264 void error(string const & msg);
266 bool parse_lines(MathAtom & t, bool numbered, bool outmost);
267 /// parses {... & ... \\ ... & ... }
268 bool parse_lines2(MathAtom & t);
272 void tokenize(istream & is);
274 void tokenize(string const & s);
276 void push_back(Token const & t);
280 Token const & prevToken() const;
282 Token const & nextToken() const;
284 Token const & getToken();
285 /// skips spaces if any
287 /// skips opening brace
289 /// skips closing brace
291 /// counts a sequence of hlines
294 void lex(string const & s);
301 std::vector<Token> tokens_;
313 Parser::Parser(LyXLex & lexer)
314 : lineno_(lexer.getLineNo()), pos_(0), curr_num_(false)
316 tokenize(lexer.getStream());
321 Parser::Parser(istream & is)
322 : lineno_(0), pos_(0), curr_num_(false)
328 void Parser::push_back(Token const & t)
330 tokens_.push_back(t);
334 void Parser::pop_back()
340 Token const & Parser::prevToken() const
342 static const Token dummy;
343 return pos_ > 0 ? tokens_[pos_ - 1] : dummy;
347 Token const & Parser::nextToken() const
349 static const Token dummy;
350 return good() ? tokens_[pos_] : dummy;
354 Token const & Parser::getToken()
356 static const Token dummy;
357 //lyxerr << "looking at token " << tokens_[pos_] << '\n';
358 return good() ? tokens_[pos_++] : dummy;
362 void Parser::skipSpaces()
364 while (nextToken().cat() == catSpace)
369 void Parser::skipBegin()
371 if (nextToken().cat() == catBegin)
374 lyxerr << "'{' expected\n";
378 void Parser::skipEnd()
380 if (nextToken().cat() == catEnd)
383 lyxerr << "'}' expected\n";
387 int Parser::readHLines()
391 while (nextToken().cs() == "hline") {
400 void Parser::putback()
406 bool Parser::good() const
408 return pos_ < tokens_.size();
412 char Parser::getChar()
415 lyxerr << "The input stream is not well..." << endl;
416 return tokens_[pos_++].character();
420 string Parser::getArg(char lf, char rg)
428 while ((c = getChar()) != rg && good())
435 void Parser::tokenize(istream & is)
437 // eat everything up to the next \end_inset or end of stream
438 // and store it in s for further tokenization
443 if (s.size() >= 10 && s.substr(s.size() - 10) == "\\end_inset") {
444 s = s.substr(0, s.size() - 10);
454 void Parser::tokenize(string const & buffer)
456 static bool init_done = false;
463 istringstream is(buffer.c_str(), ios::in | ios::binary);
468 switch (catcode(c)) {
472 if (catcode(c) == catNewline)
473 ; //push_back(Token("par"));
475 push_back(Token(' ', catSpace));
482 while (is.get(c) && catcode(c) != catNewline)
491 if (catcode(c) == catLetter) {
492 while (is.get(c) && catcode(c) == catLetter)
494 if (catcode(c) == catSpace)
495 while (is.get(c) && catcode(c) == catSpace)
504 push_back(Token(c, catcode(c)));
509 lyxerr << "\nTokens: ";
510 for (unsigned i = 0; i < tokens_.size(); ++i)
511 lyxerr << tokens_[i];
517 void Parser::error(string const & msg)
519 lyxerr << "Line ~" << lineno_ << ": Math parse error: " << msg << endl;
524 bool Parser::parse_lines(MathAtom & t, bool numbered, bool outmost)
526 MathGridInset * p = t->asGridInset();
528 lyxerr << "error in Parser::parse_lines() 1\n";
532 // save global variables
533 bool const saved_num = curr_num_;
534 string const saved_label = curr_label_;
536 // read initial hlines
537 p->rowinfo(0).lines_ = readHLines();
539 for (int row = 0; true; ++row) {
540 // reset global variables
541 curr_num_ = numbered;
545 for (MathInset::col_type col = 0; col < p->ncols(); ++col) {
546 //lyxerr << "reading cell " << row << " " << col << "\n";
548 parse_into(p->cell(col + row * p->ncols()), FLAG_BLOCK);
550 // break if cell is not followed by an ampersand
551 if (nextToken().cat() != catAlign) {
552 //lyxerr << "less cells read than normal in row/col: "
553 // << row << " " << col << "\n";
557 // skip the ampersand
562 MathHullInset * m = t->asHullInset();
564 lyxerr << "error in Parser::parse_lines() 2\n";
567 m->numbered(row, curr_num_);
568 m->label(row, curr_label_);
569 if (curr_skip_.size()) {
570 m->vcrskip(LyXLength(curr_skip_), row);
576 if (nextToken().isCR()) {
580 // try to read a length
583 // read hlines for next row
584 p->rowinfo(row + 1).lines_ = readHLines();
587 // we are finished if the next token is an 'end'
588 if (nextToken().cs() == "end") {
589 // skip the end-token
593 // leave the 'read a line'-loop
597 // otherwise, we have to start a new row
601 // restore "global" variables
602 curr_num_ = saved_num;
603 curr_label_ = saved_label;
609 bool Parser::parse_lines2(MathAtom & t)
611 MathGridInset * p = t->asGridInset();
613 lyxerr << "error in Parser::parse_lines() 1\n";
619 for (int row = 0; true; ++row) {
621 for (MathInset::col_type col = 0; true; ++col) {
622 //lyxerr << "reading cell " << row << " " << col << " " << p->ncols() << "\n";
624 if (col >= p->ncols()) {
625 //lyxerr << "adding col " << col << "\n";
626 p->addCol(p->ncols());
629 parse_into(p->cell(col + row * p->ncols()), FLAG_BLOCK2);
630 //lyxerr << "read cell: " << p->cell(col + row * p->ncols()) << "\n";
632 // break if cell is not followed by an ampersand
633 if (nextToken().cat() != catAlign) {
634 //lyxerr << "less cells read than normal in row/col: " << row << " " << col << "\n";
638 // skip the ampersand
643 if (nextToken().isCR()) {
648 // we are finished if the next token is an '}'
649 if (nextToken().cat() == catEnd) {
650 // skip the end-token
652 // leave the 'read a line'-loop
656 // otherwise, we have to start a new row
665 bool Parser::parse_macro(string & name)
670 if (getToken().cs() != "newcommand") {
671 lyxerr << "\\newcommand expected\n";
675 if (getToken().cat() != catBegin) {
676 lyxerr << "'{' in \\newcommand expected (1)\n";
680 name = getToken().cs();
682 if (getToken().cat() != catEnd) {
683 lyxerr << "'}' expected\n";
687 string arg = getArg('[', ']');
688 int narg = arg.empty() ? 0 : atoi(arg.c_str());
690 if (getToken().cat() != catBegin) {
691 lyxerr << "'{' in \\newcommand expected (2)\n";
696 parse_into(ar, FLAG_BRACE_LAST);
698 // we cannot handle recursive stuff at all
700 test.push_back(createMathInset(name));
701 if (ar.contains(test)) {
702 lyxerr << "we cannot handle recursive macros at all.\n";
706 MathMacroTable::create(name, narg, ar);
711 bool Parser::parse_normal(MathAtom & matrix)
714 Token const & t = getToken();
717 matrix = MathAtom(new MathHullInset(LM_OT_SIMPLE));
718 parse_into(matrix->cell(0), 0);
722 if (t.cat() == catMath) {
723 Token const & n = getToken();
724 if (n.cat() == catMath) {
725 // TeX's $$...$$ syntax for displayed math
726 matrix = MathAtom(new MathHullInset(LM_OT_EQUATION));
727 MathHullInset * p = matrix->asHullInset();
728 parse_into(p->cell(0), 0);
729 p->numbered(0, curr_num_);
730 p->label(0, curr_label_);
732 // simple $...$ stuff
734 matrix = MathAtom(new MathHullInset(LM_OT_SIMPLE));
735 parse_into(matrix->cell(0), 0);
740 if (!t.cs().size()) {
741 lyxerr << "start of math expected, got '" << t << "'\n";
745 string const & cs = t.cs();
750 matrix = MathAtom(new MathHullInset(LM_OT_EQUATION));
751 MathHullInset * p = matrix->asHullInset();
752 parse_into(p->cell(0), 0);
753 p->numbered(0, curr_num_);
754 p->label(0, curr_label_);
759 lyxerr << "'begin' of un-simple math expected, got '" << cs << "'\n";
763 string const name = getArg('{', '}');
765 if (name == "math") {
766 matrix = MathAtom(new MathHullInset(LM_OT_SIMPLE));
767 parse_into(matrix->cell(0), 0);
771 if (name == "equation" || name == "equation*" || name == "displaymath") {
772 curr_num_ = (name == "equation");
774 matrix = MathAtom(new MathHullInset(LM_OT_EQUATION));
775 MathHullInset * p = matrix->asHullInset();
776 parse_into(p->cell(0), FLAG_END);
777 p->numbered(0, curr_num_);
778 p->label(0, curr_label_);
782 if (name == "eqnarray" || name == "eqnarray*") {
783 matrix = MathAtom(new MathHullInset(LM_OT_EQNARRAY));
784 return parse_lines(matrix, !stared(name), true);
787 if (name == "align" || name == "align*") {
788 matrix = MathAtom(new MathHullInset(LM_OT_ALIGN));
789 return parse_lines(matrix, !stared(name), true);
792 if (name == "alignat" || name == "alignat*") {
793 int nc = 2 * atoi(getArg('{', '}').c_str());
794 matrix = MathAtom(new MathHullInset(LM_OT_ALIGNAT, nc));
795 return parse_lines(matrix, !stared(name), true);
798 if (name == "xalignat" || name == "xalignat*") {
799 int nc = 2 * atoi(getArg('{', '}').c_str());
800 matrix = MathAtom(new MathHullInset(LM_OT_XALIGNAT, nc));
801 return parse_lines(matrix, !stared(name), true);
804 if (name == "xxalignat") {
805 int nc = 2 * atoi(getArg('{', '}').c_str());
806 matrix = MathAtom(new MathHullInset(LM_OT_XXALIGNAT, nc));
807 return parse_lines(matrix, !stared(name), true);
810 if (name == "multline" || name == "multline*") {
811 matrix = MathAtom(new MathHullInset(LM_OT_MULTLINE));
812 return parse_lines(matrix, !stared(name), true);
815 if (name == "gather" || name == "gather*") {
816 matrix = MathAtom(new MathHullInset(LM_OT_GATHER));
817 return parse_lines(matrix, !stared(name), true);
820 lyxerr[Debug::MATHED] << "1: unknown math environment: " << name << "\n";
821 lyxerr << "1: unknown math environment: " << name << "\n";
826 void Parser::parse_into(MathArray & array, unsigned flags, MathTextCodes code)
832 Token const & t = getToken();
834 //lyxerr << "t: " << t << " flags: " << flags << "\n";
835 //array.dump(lyxerr);
838 if (flags & FLAG_ITEM) {
840 if (t.cat() == catBegin) {
841 // skip the brace and collect everything to the next matching
843 flags |= FLAG_BRACE_LAST;
846 // handle only this single token, leave the loop if done
851 if (flags & FLAG_BLOCK) {
852 if (t.cat() == catAlign || t.isCR() || t.cs() == "end") {
858 if (flags & FLAG_BLOCK2) {
859 if (t.cat() == catAlign || t.isCR() || t.cs() == "end"
860 || t.cat() == catEnd) {
869 if (t.cat() == catMath) {
870 if (flags & FLAG_BOX) {
871 // we are inside an mbox, so opening new math is allowed
872 array.push_back(MathAtom(new MathHullInset(LM_OT_SIMPLE)));
873 parse_into(array.back()->cell(0), 0);
875 // otherwise this is the end of the formula
880 else if (t.cat() == catLetter)
881 add(array, t.character(), code);
883 else if (t.cat() == catSpace && code == LM_TC_TEXTRM)
884 add(array, t.character(), code);
886 else if (t.cat() == catParameter) {
887 Token const & n = getToken();
888 array.push_back(MathAtom(new MathMacroArgument(n.character()-'0', code)));
891 else if (t.cat() == catBegin) {
893 parse_into(ar, FLAG_BRACE_LAST);
894 #ifndef WITH_WARNINGS
895 #warning this might be wrong in general!
897 // ignore braces around simple items
898 if ((ar.size() == 1 && !ar.front()->needsBraces()
899 || (ar.size() == 2 && !ar.front()->needsBraces()
900 && ar.back()->asScriptInset()))
901 || (ar.size() == 0 && array.size() == 0))
905 array.push_back(MathAtom(new MathBraceInset));
906 array.back()->cell(0).swap(ar);
910 else if (t.cat() == catEnd) {
911 if (flags & FLAG_BRACE_LAST)
913 lyxerr << "found '}' unexpectedly, array: '" << array << "'\n";
914 //lyxerr << "found '}' unexpectedly\n";
915 add(array, '}', LM_TC_TEX);
918 else if (t.cat() == catAlign) {
919 lyxerr << "found tab unexpectedly, array: '" << array << "'\n";
920 //lyxerr << "found tab unexpectedly\n";
921 add(array, '&', LM_TC_TEX);
924 else if (t.cat() == catSuper || t.cat() == catSub) {
925 bool up = (t.cat() == catSuper);
926 MathScriptInset * p = 0;
928 p = array.back()->asScriptInset();
929 if (!p || p->has(up)) {
930 array.push_back(MathAtom(new MathScriptInset(up)));
931 p = array.back()->asScriptInset();
934 parse_into(p->cell(up), FLAG_ITEM);
939 else if (t.character() == ']' && (flags & FLAG_BRACK_END))
942 else if (t.cat() == catOther)
943 add(array, t.character(), code);
948 else if (t.cs() == "protect")
949 // ignore \\protect, will be re-added during output
952 else if (t.cs() == "end")
955 else if (t.cs() == ")")
958 else if (t.cs() == "]")
961 else if (t.cs() == "\\") {
962 curr_skip_ = getArg('[', ']');
963 //lyxerr << "found newline unexpectedly, array: '" << array << "'\n";
964 lyxerr << "found newline unexpectedly\n";
965 array.push_back(createMathInset("\\"));
968 else if (t.cs() == "limits")
971 else if (t.cs() == "nolimits")
974 else if (t.cs() == "nonumber")
977 else if (t.cs() == "number")
980 else if (t.cs() == "sqrt") {
983 array.push_back(MathAtom(new MathRootInset));
984 parse_into(array.back()->cell(0), FLAG_BRACK_END);
985 parse_into(array.back()->cell(1), FLAG_ITEM);
988 array.push_back(MathAtom(new MathSqrtInset));
989 parse_into(array.back()->cell(0), FLAG_ITEM);
993 else if (t.cs() == "left") {
994 string l = getToken().asString();
996 parse_into(ar, FLAG_RIGHT);
997 string r = getToken().asString();
998 MathAtom dl(new MathDelimInset(l, r));
1000 array.push_back(dl);
1003 else if (t.cs() == "right") {
1004 if (!(flags & FLAG_RIGHT)) {
1005 //lyxerr << "got so far: '" << array << "'\n";
1006 error("Unmatched right delimiter");
1011 else if (t.cs() == "begin") {
1012 string const name = getArg('{', '}');
1013 if (name == "array") {
1014 string const valign = getArg('[', ']') + 'c';
1015 string const halign = getArg('{', '}');
1016 array.push_back(MathAtom(new MathArrayInset(valign[0], halign)));
1017 parse_lines(array.back(), false, false);
1018 } else if (name == "split") {
1019 array.push_back(MathAtom(new MathSplitInset(1)));
1020 parse_lines(array.back(), false, false);
1021 } else if (name == "cases") {
1022 array.push_back(MathAtom(new MathCasesInset));
1023 parse_lines(array.back(), false, false);
1025 lyxerr << "unknow math inset begin '" << name << "'\n";
1028 else if (t.cs() == "kern") {
1029 #ifdef WITH_WARNINGS
1034 Token const & t = getToken();
1040 if (isValidLength(s))
1043 array.push_back(MathAtom(new MathKernInset(s)));
1046 else if (t.cs() == "label") {
1047 curr_label_ = getArg('{', '}');
1050 else if (t.cs() == "choose" || t.cs() == "over" || t.cs() == "atop") {
1051 MathAtom p = createMathInset(t.cs());
1052 array.swap(p->cell(0));
1053 parse_into(p->cell(1), flags, code);
1058 else if (t.cs() == "xymatrix") {
1059 array.push_back(createMathInset(t.cs()));
1060 parse_lines2(array.back());
1061 // skip closing brace
1066 else if (t.cs() == "mbox") {
1067 array.push_back(createMathInset(t.cs()));
1068 // slurp in the argument of mbox
1070 MathBoxInset * p = array.back()->asBoxInset();
1076 else if (t.cs().size()) {
1077 latexkeys const * l = in_word_set(t.cs());
1079 if (l->token == LM_TK_FONT) {
1080 //lyxerr << "starting font\n";
1081 //CatCode catSpaceSave = theCatcode[' '];
1082 //if (l->id == LM_TC_TEXTRM) {
1083 // // temporarily change catcode
1084 // theCatcode[' '] = catLetter;
1088 parse_into(ar, FLAG_ITEM, static_cast<MathTextCodes>(l->id));
1089 array.push_back(ar);
1091 // undo catcode changes
1092 ////theCatcode[' '] = catSpaceSave;
1093 //lyxerr << "ending font\n";
1096 else if (l->token == LM_TK_OLDFONT) {
1097 code = static_cast<MathTextCodes>(l->id);
1100 else if (l->token == LM_TK_BOX) {
1101 MathAtom p = createMathInset(t.cs());
1102 parse_into(p->cell(0), FLAG_ITEM | FLAG_BOX, LM_TC_BOX);
1106 else if (l->token == LM_TK_STY) {
1107 MathAtom p = createMathInset(t.cs());
1108 parse_into(p->cell(0), flags, code);
1114 MathAtom p = createMathInset(t.cs());
1115 for (MathInset::idx_type i = 0; i < p->nargs(); ++i)
1116 parse_into(p->cell(i), FLAG_ITEM);
1122 MathAtom p = createMathInset(t.cs());
1123 for (MathInset::idx_type i = 0; i < p->nargs(); ++i)
1124 parse_into(p->cell(i), FLAG_ITEM);
1130 if (flags & FLAG_LEAVE) {
1131 flags &= ~FLAG_LEAVE;
1137 lyxerr << " Math Panic, expect problems!\n";
1138 // Search for the end command.
1142 } while (good() && t.cs() != "end");
1148 } // anonymous namespace
1151 void mathed_parse_cell(MathArray & ar, string const & str)
1153 istringstream is(str.c_str());
1154 mathed_parse_cell(ar, is);
1158 void mathed_parse_cell(MathArray & ar, istream & is)
1160 Parser(is).parse_into(ar, 0);
1165 bool mathed_parse_macro(string & name, string const & str)
1167 istringstream is(str.c_str());
1169 return parser.parse_macro(name);
1172 bool mathed_parse_macro(string & name, istream & is)
1175 return parser.parse_macro(name);
1178 bool mathed_parse_macro(string & name, LyXLex & lex)
1181 return parser.parse_macro(name);
1186 bool mathed_parse_normal(MathAtom & t, string const & str)
1188 istringstream is(str.c_str());
1190 return parser.parse_normal(t);
1193 bool mathed_parse_normal(MathAtom & t, istream & is)
1196 return parser.parse_normal(t);
1199 bool mathed_parse_normal(MathAtom & t, LyXLex & lex)
1202 return parser.parse_normal(t);