3 * Purpose: Parser for mathed
4 * Author: Alejandro Aguilar Sierra <asierra@servidor.unam.mx>
5 * Created: January 1996
6 * Description: Parse LaTeX2e math mode code.
8 * Dependencies: Xlib, XForms
10 * Copyright: 1996, Alejandro Aguilar Sierra
14 * You are free to use and modify this code under the terms of
15 * the GNU General Public Licence version 2 or later.
20 If someone desperately needs partial "structures" (such as a few cells of
21 an array inset or similar) (s)he could uses the following hack as starting
22 point to write some macros:
27 \def\makeamptab{\catcode`\&=4\relax}
28 \def\makeampletter{\catcode`\&=11\relax}
29 \def\b{\makeampletter\expandafter\makeamptab\bi}
49 #pragma implementation
52 #include "math_parser.h"
53 #include "math_inset.h"
54 #include "math_arrayinset.h"
55 #include "math_braceinset.h"
56 #include "math_casesinset.h"
57 #include "math_charinset.h"
58 #include "math_deliminset.h"
59 #include "math_factory.h"
60 #include "math_funcinset.h"
61 #include "math_kerninset.h"
62 #include "math_macro.h"
63 #include "math_macrotable.h"
64 #include "math_macrotemplate.h"
65 #include "math_hullinset.h"
66 #include "math_rootinset.h"
67 #include "math_sizeinset.h"
68 #include "math_sqrtinset.h"
69 #include "math_scriptinset.h"
70 #include "math_specialcharinset.h"
71 #include "math_splitinset.h"
72 #include "math_sqrtinset.h"
73 #include "math_support.h"
78 #include "support/lstrings.h"
94 bool stared(string const & s)
96 unsigned const n = s.size();
97 return n && s[n - 1] == '*';
101 void add(MathArray & ar, char c, MathTextCodes code)
103 ar.push_back(MathAtom(new MathCharInset(c, code)));
107 // These are TeX's catcodes
109 catEscape, // 0 backslash
119 catSpace, // 10 space
120 catLetter, // 11 a-zA-Z
121 catOther, // 12 none of the above
124 catInvalid // 15 <delete>
127 CatCode theCatcode[256];
130 inline CatCode catcode(unsigned char c)
132 return theCatcode[c];
137 FLAG_BRACE_LAST = 1 << 1, // last closing brace ends the parsing process
138 FLAG_RIGHT = 1 << 2, // next \\right ends the parsing process
139 FLAG_END = 1 << 3, // next \\end ends the parsing process
140 FLAG_BRACK_END = 1 << 4, // next closing bracket ends the parsing process
141 FLAG_BOX = 1 << 5, // we are in a box
142 FLAG_ITEM = 1 << 7, // read a (possibly braced token)
143 FLAG_BLOCK = 1 << 8, // next block ends the parsing process
144 FLAG_LEAVE = 1 << 9 // leave the loop at the end
150 fill(theCatcode, theCatcode + 256, catOther);
151 fill(theCatcode + 'a', theCatcode + 'z' + 1, catLetter);
152 fill(theCatcode + 'A', theCatcode + 'Z' + 1, catLetter);
154 theCatcode['\\'] = catEscape;
155 theCatcode['{'] = catBegin;
156 theCatcode['}'] = catEnd;
157 theCatcode['$'] = catMath;
158 theCatcode['&'] = catAlign;
159 theCatcode['\n'] = catNewline;
160 theCatcode['#'] = catParameter;
161 theCatcode['^'] = catSuper;
162 theCatcode['_'] = catSub;
163 theCatcode['
\7f'] = catIgnore;
164 theCatcode[' '] = catSpace;
165 theCatcode['\t'] = catSpace;
166 theCatcode['\r'] = catSpace;
167 theCatcode['~'] = catActive;
168 theCatcode['%'] = catComment;
174 // Helper class for parsing
180 Token() : cs_(), char_(0), cat_(catIgnore) {}
182 Token(char c, CatCode cat) : cs_(), char_(c), cat_(cat) {}
184 Token(string const & cs) : cs_(cs), char_(0), cat_(catIgnore) {}
187 string const & cs() const { return cs_; }
189 CatCode cat() const { return cat_; }
191 char character() const { return char_; }
193 string asString() const;
206 bool Token::isCR() const
208 return cs_ == "\\" || cs_ == "cr" || cs_ == "crcr";
211 string Token::asString() const
213 return cs_.size() ? cs_ : string(1, char_);
216 bool operator==(Token const & s, Token const & t)
218 return s.character() == t.character()
219 && s.cat() == t.cat() && s.cs() == t.cs();
222 bool operator!=(Token const & s, Token const & t)
227 ostream & operator<<(ostream & os, Token const & t)
230 os << "\\" << t.cs();
232 os << "[" << t.character() << "," << t.cat() << "]";
241 Parser(LyXLex & lex);
243 Parser(istream & is);
246 string parse_macro();
248 bool parse_normal(MathAtom &);
250 void parse_into(MathArray & array, unsigned flags, MathTextCodes = LM_TC_MIN);
252 int lineno() const { return lineno_; }
258 string getArg(char lf, char rf);
262 void error(string const & msg);
264 bool parse_lines(MathAtom & t, bool numbered, bool outmost);
268 void tokenize(istream & is);
270 void tokenize(string const & s);
272 void push_back(Token const & t);
276 Token const & prevToken() const;
278 Token const & nextToken() const;
280 Token const & getToken();
281 /// skips spaces if any
283 /// counts a sequence of hlines
286 void lex(string const & s);
293 std::vector<Token> tokens_;
305 Parser::Parser(LyXLex & lexer)
306 : lineno_(lexer.getLineNo()), pos_(0), curr_num_(false)
308 tokenize(lexer.getStream());
313 Parser::Parser(istream & is)
314 : lineno_(0), pos_(0), curr_num_(false)
320 void Parser::push_back(Token const & t)
322 tokens_.push_back(t);
326 void Parser::pop_back()
332 Token const & Parser::prevToken() const
334 static const Token dummy;
335 return pos_ > 0 ? tokens_[pos_ - 1] : dummy;
339 Token const & Parser::nextToken() const
341 static const Token dummy;
342 return good() ? tokens_[pos_] : dummy;
346 Token const & Parser::getToken()
348 static const Token dummy;
349 //lyxerr << "looking at token " << tokens_[pos_] << '\n';
350 return good() ? tokens_[pos_++] : dummy;
354 void Parser::skipSpaces()
356 while (nextToken().cat() == catSpace)
361 int Parser::readHLines()
365 while (nextToken().cs() == "hline") {
374 void Parser::putback()
380 bool Parser::good() const
382 return pos_ < tokens_.size();
386 char Parser::getChar()
389 lyxerr << "The input stream is not well..." << endl;
390 return tokens_[pos_++].character();
394 string Parser::getArg(char lf, char rg)
402 while ((c = getChar()) != rg && good())
409 void Parser::tokenize(istream & is)
411 // eat everything up to the next \end_inset or end of stream
412 // and store it in s for further tokenization
417 if (s.size() >= 10 && s.substr(s.size() - 10) == "\\end_inset") {
418 s = s.substr(0, s.size() - 10);
428 void Parser::tokenize(string const & buffer)
430 static bool init_done = false;
437 istringstream is(buffer.c_str(), ios::in | ios::binary);
442 switch (catcode(c)) {
446 if (catcode(c) == catNewline)
447 ; //push_back(Token("par"));
449 push_back(Token(' ', catSpace));
456 while (is.get(c) && catcode(c) != catNewline)
465 if (catcode(c) == catLetter) {
466 while (is.get(c) && catcode(c) == catLetter)
468 if (catcode(c) == catSpace)
469 while (is.get(c) && catcode(c) == catSpace)
478 push_back(Token(c, catcode(c)));
483 lyxerr << "\nTokens: ";
484 for (unsigned i = 0; i < tokens_.size(); ++i)
485 lyxerr << tokens_[i];
491 void Parser::error(string const & msg)
493 lyxerr << "Line ~" << lineno_ << ": Math parse error: " << msg << endl;
498 bool Parser::parse_lines(MathAtom & t, bool numbered, bool outmost)
500 MathGridInset * p = t->asGridInset();
502 lyxerr << "error in Parser::parse_lines() 1\n";
506 int const cols = p->ncols();
508 // save global variables
509 bool const saved_num = curr_num_;
510 string const saved_label = curr_label_;
512 // read initial hlines
513 p->rowinfo(0).lines_ = readHLines();
515 for (int row = 0; true; ++row) {
516 // reset global variables
517 curr_num_ = numbered;
521 for (int col = 0; col < cols; ++col) {
522 //lyxerr << "reading cell " << row << " " << col << "\n";
523 parse_into(p->cell(col + row * cols), FLAG_BLOCK);
525 // break if cell is not followed by an ampersand
526 if (nextToken().cat() != catAlign) {
527 //lyxerr << "less cells read than normal in row/col: "
528 // << row << " " << col << "\n";
532 // skip the ampersand
537 MathHullInset * m = t->asHullInset();
539 lyxerr << "error in Parser::parse_lines() 2\n";
542 m->numbered(row, curr_num_);
543 m->label(row, curr_label_);
544 if (curr_skip_.size()) {
545 m->vcrskip(LyXLength(curr_skip_), row);
551 if (nextToken().isCR()) {
555 // try to read a length
558 // read hlines for next row
559 p->rowinfo(row + 1).lines_ = readHLines();
562 // we are finished if the next token is an 'end'
563 if (nextToken().cs() == "end") {
564 // skip the end-token
568 // leave the 'read a line'-loop
572 // otherwise, we have to start a new row
576 // restore "global" variables
577 curr_num_ = saved_num;
578 curr_label_ = saved_label;
584 string Parser::parse_macro()
586 string name = "{error}";
589 if (getToken().cs() != "newcommand") {
590 lyxerr << "\\newcommand expected\n";
594 if (getToken().cat() != catBegin) {
595 lyxerr << "'{' in \\newcommand expected (1)\n";
599 name = getToken().cs();
601 if (getToken().cat() != catEnd) {
602 lyxerr << "'}' expected\n";
606 string arg = getArg('[', ']');
607 int narg = arg.empty() ? 0 : atoi(arg.c_str());
609 if (getToken().cat() != catBegin) {
610 lyxerr << "'{' in \\newcommand expected (2)\n";
615 parse_into(ar, FLAG_BRACE_LAST);
616 MathMacroTable::create(name, narg, ar);
621 bool Parser::parse_normal(MathAtom & matrix)
624 Token const & t = getToken();
627 matrix = MathAtom(new MathHullInset(LM_OT_SIMPLE));
628 parse_into(matrix->cell(0), 0);
632 if (t.cat() == catMath) {
633 Token const & n = getToken();
634 if (n.cat() == catMath) {
635 // TeX's $$...$$ syntax for displayed math
636 matrix = MathAtom(new MathHullInset(LM_OT_EQUATION));
637 MathHullInset * p = matrix->asHullInset();
638 parse_into(p->cell(0), 0);
639 p->numbered(0, curr_num_);
640 p->label(0, curr_label_);
642 // simple $...$ stuff
644 matrix = MathAtom(new MathHullInset(LM_OT_SIMPLE));
645 parse_into(matrix->cell(0), 0);
650 if (!t.cs().size()) {
651 lyxerr << "start of math expected, got '" << t << "'\n";
655 string const & cs = t.cs();
660 matrix = MathAtom(new MathHullInset(LM_OT_EQUATION));
661 MathHullInset * p = matrix->asHullInset();
662 parse_into(p->cell(0), 0);
663 p->numbered(0, curr_num_);
664 p->label(0, curr_label_);
669 lyxerr << "'begin' of un-simple math expected, got '" << cs << "'\n";
673 string const name = getArg('{', '}');
675 if (name == "equation" || name == "equation*" || name == "displaymath") {
676 curr_num_ = (name == "equation");
678 matrix = MathAtom(new MathHullInset(LM_OT_EQUATION));
679 MathHullInset * p = matrix->asHullInset();
680 parse_into(p->cell(0), FLAG_END);
681 p->numbered(0, curr_num_);
682 p->label(0, curr_label_);
686 if (name == "eqnarray" || name == "eqnarray*") {
687 matrix = MathAtom(new MathHullInset(LM_OT_EQNARRAY));
688 return parse_lines(matrix, !stared(name), true);
691 if (name == "align" || name == "align*") {
692 matrix = MathAtom(new MathHullInset(LM_OT_ALIGN));
693 return parse_lines(matrix, !stared(name), true);
696 if (name == "alignat" || name == "alignat*") {
697 int nc = 2 * atoi(getArg('{', '}').c_str());
698 matrix = MathAtom(new MathHullInset(LM_OT_ALIGNAT, nc));
699 return parse_lines(matrix, !stared(name), true);
702 if (name == "xalignat" || name == "xalignat*") {
703 int nc = 2 * atoi(getArg('{', '}').c_str());
704 matrix = MathAtom(new MathHullInset(LM_OT_XALIGNAT, nc));
705 return parse_lines(matrix, !stared(name), true);
708 if (name == "xxalignat") {
709 int nc = 2 * atoi(getArg('{', '}').c_str());
710 matrix = MathAtom(new MathHullInset(LM_OT_XXALIGNAT, nc));
711 return parse_lines(matrix, !stared(name), true);
714 if (name == "multline" || name == "multline*") {
715 matrix = MathAtom(new MathHullInset(LM_OT_MULTLINE));
716 return parse_lines(matrix, !stared(name), true);
719 if (name == "gather" || name == "gather*") {
720 matrix = MathAtom(new MathHullInset(LM_OT_GATHER));
721 return parse_lines(matrix, !stared(name), true);
724 lyxerr[Debug::MATHED] << "1: unknown math environment: " << name << "\n";
725 lyxerr << "1: unknown math environment: " << name << "\n";
730 void Parser::parse_into(MathArray & array, unsigned flags, MathTextCodes code)
736 Token const & t = getToken();
738 //lyxerr << "t: " << t << " flags: " << flags << "'\n";
739 //array.dump(lyxerr);
742 if (flags & FLAG_ITEM) {
744 if (t.cat() == catBegin) {
745 // skip the brace and collect everything to the next matching
747 flags |= FLAG_BRACE_LAST;
750 // handle only this single token, leave the loop if done
755 if (flags & FLAG_BLOCK) {
756 if (t.cat() == catAlign || t.isCR() || t.cs() == "end") {
765 if (t.cat() == catMath) {
766 if (flags & FLAG_BOX) {
767 // we are inside an mbox, so opening new math is allowed
768 array.push_back(MathAtom(new MathHullInset(LM_OT_SIMPLE)));
769 parse_into(array.back()->cell(0), 0);
771 // otherwise this is the end of the formula
776 else if (t.cat() == catLetter)
777 add(array, t.character(), code);
779 else if (t.cat() == catSpace && code == LM_TC_TEXTRM)
780 add(array, t.character(), code);
782 else if (t.cat() == catParameter) {
783 Token const & n = getToken();
784 array.push_back(MathAtom(new MathMacroArgument(n.character() - '0')));
787 else if (t.cat() == catBegin) {
789 parse_into(ar, FLAG_BRACE_LAST);
790 #ifndef WITH_WARNINGS
791 #warning this might be wrong in general!
793 // ignore braces around simple items
794 if (ar.size() == 1 || (ar.size() == 2 && ar.back()->asScriptInset())) {
797 array.push_back(MathAtom(new MathBraceInset));
798 array.back()->cell(0).swap(ar);
802 else if (t.cat() == catEnd) {
803 if (flags & FLAG_BRACE_LAST)
805 lyxerr << "found '}' unexpectedly, array: '" << array << "'\n";
806 //lyxerr << "found '}' unexpectedly\n";
807 add(array, '}', LM_TC_TEX);
810 else if (t.cat() == catAlign) {
811 lyxerr << "found tab unexpectedly, array: '" << array << "'\n";
812 //lyxerr << "found tab unexpectedly\n";
813 add(array, '&', LM_TC_TEX);
816 else if (t.cat() == catSuper || t.cat() == catSub) {
817 bool up = (t.cat() == catSuper);
818 MathScriptInset * p = 0;
820 p = array.back()->asScriptInset();
821 if (!p || p->has(up)) {
822 array.push_back(MathAtom(new MathScriptInset(up)));
823 p = array.back()->asScriptInset();
826 parse_into(p->cell(up), FLAG_ITEM);
831 else if (t.character() == ']' && (flags & FLAG_BRACK_END))
834 else if (t.cat() == catOther)
835 add(array, t.character(), code);
840 else if (t.cs() == "protect")
841 // ignore \\protect, will be re-added during output
844 else if (t.cs() == "end")
847 else if (t.cs() == ")")
850 else if (t.cs() == "]")
853 else if (t.cs() == "\\") {
854 curr_skip_ = getArg('[', ']');
855 //lyxerr << "found newline unexpectedly, array: '" << array << "'\n";
856 lyxerr << "found newline unexpectedly\n";
857 array.push_back(createMathInset("\\"));
860 else if (t.cs() == "limits")
863 else if (t.cs() == "nolimits")
866 else if (t.cs() == "nonumber")
869 else if (t.cs() == "number")
872 else if (t.cs() == "sqrt") {
875 array.push_back(MathAtom(new MathRootInset));
876 parse_into(array.back()->cell(0), FLAG_BRACK_END);
877 parse_into(array.back()->cell(1), FLAG_ITEM);
880 array.push_back(MathAtom(new MathSqrtInset));
881 parse_into(array.back()->cell(0), FLAG_ITEM);
885 else if (t.cs() == "left") {
886 string l = getToken().asString();
888 parse_into(ar, FLAG_RIGHT);
889 string r = getToken().asString();
890 MathAtom dl(new MathDelimInset(l, r));
895 else if (t.cs() == "right") {
896 if (!(flags & FLAG_RIGHT)) {
897 //lyxerr << "got so far: '" << array << "'\n";
898 error("Unmatched right delimiter");
903 else if (t.cs() == "begin") {
904 string const name = getArg('{', '}');
905 if (name == "array") {
906 string const valign = getArg('[', ']') + 'c';
907 string const halign = getArg('{', '}');
908 array.push_back(MathAtom(new MathArrayInset(valign[0], halign)));
909 parse_lines(array.back(), false, false);
910 } else if (name == "split") {
911 array.push_back(MathAtom(new MathSplitInset(1)));
912 parse_lines(array.back(), false, false);
913 } else if (name == "cases") {
914 array.push_back(MathAtom(new MathCasesInset));
915 parse_lines(array.back(), false, false);
917 lyxerr << "unknow math inset begin '" << name << "'\n";
920 else if (t.cs() == "kern") {
926 Token const & t = getToken();
932 if (isValidLength(s))
935 array.push_back(MathAtom(new MathKernInset(s)));
938 else if (t.cs() == "label") {
939 curr_label_ = getArg('{', '}');
942 else if (t.cs() == "choose" || t.cs() == "over" || t.cs() == "atop") {
943 MathAtom p = createMathInset(t.cs());
944 array.swap(p->cell(0));
945 parse_into(p->cell(1), flags, code);
952 else if (t.cs() == "mbox") {
953 array.push_back(createMathInset(t.cs()));
954 // slurp in the argument of mbox
956 MathBoxInset * p = array.back()->asBoxInset();
961 else if (t.cs().size()) {
962 latexkeys const * l = in_word_set(t.cs());
964 if (l->token == LM_TK_FONT) {
965 //lyxerr << "starting font\n";
966 //CatCode catSpaceSave = theCatcode[' '];
967 //if (l->id == LM_TC_TEXTRM) {
968 // // temporarily change catcode
969 // theCatcode[' '] = catLetter;
973 parse_into(ar, FLAG_ITEM, static_cast<MathTextCodes>(l->id));
976 // undo catcode changes
977 ////theCatcode[' '] = catSpaceSave;
978 //lyxerr << "ending font\n";
981 else if (l->token == LM_TK_OLDFONT) {
982 code = static_cast<MathTextCodes>(l->id);
985 else if (l->token == LM_TK_BOX) {
986 MathAtom p = createMathInset(t.cs());
987 parse_into(p->cell(0), FLAG_ITEM | FLAG_BOX, LM_TC_BOX);
991 else if (l->token == LM_TK_STY) {
992 MathAtom p = createMathInset(t.cs());
993 parse_into(p->cell(0), flags, code);
999 MathAtom p = createMathInset(t.cs());
1000 for (MathInset::idx_type i = 0; i < p->nargs(); ++i)
1001 parse_into(p->cell(i), FLAG_ITEM);
1007 MathAtom p = createMathInset(t.cs());
1008 for (MathInset::idx_type i = 0; i < p->nargs(); ++i)
1009 parse_into(p->cell(i), FLAG_ITEM);
1015 if (flags & FLAG_LEAVE) {
1016 flags &= ~FLAG_LEAVE;
1022 lyxerr << " Math Panic, expect problems!\n";
1023 // Search for the end command.
1027 } while (good() && t.cs() != "end");
1033 } // anonymous namespace
1036 void mathed_parse_cell(MathArray & ar, string const & str)
1038 istringstream is(str.c_str());
1039 mathed_parse_cell(ar, is);
1043 void mathed_parse_cell(MathArray & ar, istream & is)
1045 Parser(is).parse_into(ar, 0);
1050 string mathed_parse_macro(string const & str)
1052 istringstream is(str.c_str());
1054 return parser.parse_macro();
1057 string mathed_parse_macro(istream & is)
1060 return parser.parse_macro();
1063 string mathed_parse_macro(LyXLex & lex)
1066 return parser.parse_macro();
1071 bool mathed_parse_normal(MathAtom & t, string const & str)
1073 istringstream is(str.c_str());
1075 return parser.parse_normal(t);
1078 bool mathed_parse_normal(MathAtom & t, istream & is)
1081 return parser.parse_normal(t);
1084 bool mathed_parse_normal(MathAtom & t, LyXLex & lex)
1087 return parser.parse_normal(t);