3 * Purpose: Parser for mathed
4 * Author: Alejandro Aguilar Sierra <asierra@servidor.unam.mx>
5 * Created: January 1996
6 * Description: Parse LaTeX2e math mode code.
8 * Dependencies: Xlib, XForms
10 * Copyright: 1996, Alejandro Aguilar Sierra
14 * You are free to use and modify this code under the terms of
15 * the GNU General Public Licence version 2 or later.
20 If someone desperately needs partial "structures" (such as a few cells of
21 an array inset or similar) (s)he could uses the following hack as starting
22 point to write some macros:
27 \def\makeamptab{\catcode`\&=4\relax}
28 \def\makeampletter{\catcode`\&=11\relax}
29 \def\b{\makeampletter\expandafter\makeamptab\bi}
49 #pragma implementation
52 #include "math_parser.h"
53 #include "math_inset.h"
54 #include "math_arrayinset.h"
55 #include "math_braceinset.h"
56 #include "math_casesinset.h"
57 #include "math_charinset.h"
58 #include "math_deliminset.h"
59 #include "math_factory.h"
60 #include "math_funcinset.h"
61 #include "math_kerninset.h"
62 #include "math_macro.h"
63 #include "math_macrotable.h"
64 #include "math_macrotemplate.h"
65 #include "math_hullinset.h"
66 #include "math_rootinset.h"
67 #include "math_sqrtinset.h"
68 #include "math_scriptinset.h"
69 #include "math_specialcharinset.h"
70 #include "math_splitinset.h"
71 #include "math_sqrtinset.h"
72 #include "math_support.h"
77 #include "support/lstrings.h"
93 bool stared(string const & s)
95 unsigned const n = s.size();
96 return n && s[n - 1] == '*';
100 void add(MathArray & ar, char c, MathTextCodes code)
102 ar.push_back(MathAtom(new MathCharInset(c, code)));
106 // These are TeX's catcodes
108 catEscape, // 0 backslash
118 catSpace, // 10 space
119 catLetter, // 11 a-zA-Z
120 catOther, // 12 none of the above
123 catInvalid // 15 <delete>
126 CatCode theCatcode[256];
129 inline CatCode catcode(unsigned char c)
131 return theCatcode[c];
136 FLAG_BRACE_LAST = 1 << 1, // last closing brace ends the parsing process
137 FLAG_RIGHT = 1 << 2, // next \\right ends the parsing process
138 FLAG_END = 1 << 3, // next \\end ends the parsing process
139 FLAG_BRACK_END = 1 << 4, // next closing bracket ends the parsing process
140 FLAG_BOX = 1 << 5, // we are in a box
141 FLAG_ITEM = 1 << 7, // read a (possibly braced token)
142 FLAG_BLOCK = 1 << 8, // next block ends the parsing process
143 FLAG_LEAVE = 1 << 9 // leave the loop at the end
149 fill(theCatcode, theCatcode + 256, catOther);
150 fill(theCatcode + 'a', theCatcode + 'z' + 1, catLetter);
151 fill(theCatcode + 'A', theCatcode + 'Z' + 1, catLetter);
153 theCatcode['\\'] = catEscape;
154 theCatcode['{'] = catBegin;
155 theCatcode['}'] = catEnd;
156 theCatcode['$'] = catMath;
157 theCatcode['&'] = catAlign;
158 theCatcode['\n'] = catNewline;
159 theCatcode['#'] = catParameter;
160 theCatcode['^'] = catSuper;
161 theCatcode['_'] = catSub;
162 theCatcode['
\7f'] = catIgnore;
163 theCatcode[' '] = catSpace;
164 theCatcode['\t'] = catSpace;
165 theCatcode['\r'] = catSpace;
166 theCatcode['~'] = catActive;
167 theCatcode['%'] = catComment;
173 // Helper class for parsing
179 Token() : cs_(), char_(0), cat_(catIgnore) {}
181 Token(char c, CatCode cat) : cs_(), char_(c), cat_(cat) {}
183 Token(string const & cs) : cs_(cs), char_(0), cat_(catIgnore) {}
186 string const & cs() const { return cs_; }
188 CatCode cat() const { return cat_; }
190 char character() const { return char_; }
192 string asString() const;
205 bool Token::isCR() const
207 return cs_ == "\\" || cs_ == "cr" || cs_ == "crcr";
210 string Token::asString() const
212 return cs_.size() ? cs_ : string(1, char_);
215 bool operator==(Token const & s, Token const & t)
217 return s.character() == t.character()
218 && s.cat() == t.cat() && s.cs() == t.cs();
221 bool operator!=(Token const & s, Token const & t)
226 ostream & operator<<(ostream & os, Token const & t)
229 os << "\\" << t.cs();
231 os << "[" << t.character() << "," << t.cat() << "]";
240 Parser(LyXLex & lex);
242 Parser(istream & is);
245 string parse_macro();
247 bool parse_normal(MathAtom &);
249 void parse_into(MathArray & array, unsigned flags, MathTextCodes = LM_TC_MIN);
251 int lineno() const { return lineno_; }
257 string getArg(char lf, char rf);
261 void error(string const & msg);
263 bool parse_lines(MathAtom & t, bool numbered, bool outmost);
267 void tokenize(istream & is);
269 void tokenize(string const & s);
271 void push_back(Token const & t);
275 Token const & prevToken() const;
277 Token const & nextToken() const;
279 Token const & getToken();
280 /// skips spaces if any
282 /// counts a sequence of hlines
285 void lex(string const & s);
292 std::vector<Token> tokens_;
304 Parser::Parser(LyXLex & lexer)
305 : lineno_(lexer.getLineNo()), pos_(0), curr_num_(false)
307 tokenize(lexer.getStream());
312 Parser::Parser(istream & is)
313 : lineno_(0), pos_(0), curr_num_(false)
319 void Parser::push_back(Token const & t)
321 tokens_.push_back(t);
325 void Parser::pop_back()
331 Token const & Parser::prevToken() const
333 static const Token dummy;
334 return pos_ > 0 ? tokens_[pos_ - 1] : dummy;
338 Token const & Parser::nextToken() const
340 static const Token dummy;
341 return good() ? tokens_[pos_] : dummy;
345 Token const & Parser::getToken()
347 static const Token dummy;
348 //lyxerr << "looking at token " << tokens_[pos_] << '\n';
349 return good() ? tokens_[pos_++] : dummy;
353 void Parser::skipSpaces()
355 while (nextToken().cat() == catSpace)
360 int Parser::readHLines()
364 while (nextToken().cs() == "hline") {
373 void Parser::putback()
379 bool Parser::good() const
381 return pos_ < tokens_.size();
385 char Parser::getChar()
388 lyxerr << "The input stream is not well..." << endl;
389 return tokens_[pos_++].character();
393 string Parser::getArg(char lf, char rg)
401 while ((c = getChar()) != rg && good())
408 void Parser::tokenize(istream & is)
410 // eat everything up to the next \end_inset or end of stream
411 // and store it in s for further tokenization
416 if (s.size() >= 10 && s.substr(s.size() - 10) == "\\end_inset") {
417 s = s.substr(0, s.size() - 10);
427 void Parser::tokenize(string const & buffer)
429 static bool init_done = false;
436 istringstream is(buffer.c_str(), ios::in | ios::binary);
441 switch (catcode(c)) {
445 if (catcode(c) == catNewline)
446 ; //push_back(Token("par"));
448 push_back(Token(' ', catSpace));
455 while (is.get(c) && catcode(c) != catNewline)
464 if (catcode(c) == catLetter) {
465 while (is.get(c) && catcode(c) == catLetter)
467 if (catcode(c) == catSpace)
468 while (is.get(c) && catcode(c) == catSpace)
477 push_back(Token(c, catcode(c)));
482 lyxerr << "\nTokens: ";
483 for (unsigned i = 0; i < tokens_.size(); ++i)
484 lyxerr << tokens_[i];
490 void Parser::error(string const & msg)
492 lyxerr << "Line ~" << lineno_ << ": Math parse error: " << msg << endl;
497 bool Parser::parse_lines(MathAtom & t, bool numbered, bool outmost)
499 MathGridInset * p = t->asGridInset();
501 lyxerr << "error in Parser::parse_lines() 1\n";
505 int const cols = p->ncols();
507 // save global variables
508 bool const saved_num = curr_num_;
509 string const saved_label = curr_label_;
511 // read initial hlines
512 p->rowinfo(0).lines_ = readHLines();
514 for (int row = 0; true; ++row) {
515 // reset global variables
516 curr_num_ = numbered;
520 for (int col = 0; col < cols; ++col) {
521 //lyxerr << "reading cell " << row << " " << col << "\n";
522 parse_into(p->cell(col + row * cols), FLAG_BLOCK);
524 // break if cell is not followed by an ampersand
525 if (nextToken().cat() != catAlign) {
526 //lyxerr << "less cells read than normal in row/col: "
527 // << row << " " << col << "\n";
531 // skip the ampersand
536 MathHullInset * m = t->asHullInset();
538 lyxerr << "error in Parser::parse_lines() 2\n";
541 m->numbered(row, curr_num_);
542 m->label(row, curr_label_);
543 if (curr_skip_.size()) {
544 m->vcrskip(LyXLength(curr_skip_), row);
550 if (nextToken().isCR()) {
554 // try to read a length
557 // read hlines for next row
558 p->rowinfo(row + 1).lines_ = readHLines();
561 // we are finished if the next token is an 'end'
562 if (nextToken().cs() == "end") {
563 // skip the end-token
567 // leave the 'read a line'-loop
571 // otherwise, we have to start a new row
575 // restore "global" variables
576 curr_num_ = saved_num;
577 curr_label_ = saved_label;
583 string Parser::parse_macro()
585 string name = "{error}";
588 if (getToken().cs() != "newcommand") {
589 lyxerr << "\\newcommand expected\n";
593 if (getToken().cat() != catBegin) {
594 lyxerr << "'{' in \\newcommand expected (1)\n";
598 name = getToken().cs();
600 if (getToken().cat() != catEnd) {
601 lyxerr << "'}' expected\n";
605 string arg = getArg('[', ']');
606 int narg = arg.empty() ? 0 : atoi(arg.c_str());
608 if (getToken().cat() != catBegin) {
609 lyxerr << "'{' in \\newcommand expected (2)\n";
614 parse_into(ar, FLAG_BRACE_LAST);
615 MathMacroTable::create(name, narg, ar);
620 bool Parser::parse_normal(MathAtom & matrix)
623 Token const & t = getToken();
626 matrix = MathAtom(new MathHullInset(LM_OT_SIMPLE));
627 parse_into(matrix->cell(0), 0);
631 if (t.cat() == catMath) {
632 Token const & n = getToken();
633 if (n.cat() == catMath) {
634 // TeX's $$...$$ syntax for displayed math
635 matrix = MathAtom(new MathHullInset(LM_OT_EQUATION));
636 MathHullInset * p = matrix->asHullInset();
637 parse_into(p->cell(0), 0);
638 p->numbered(0, curr_num_);
639 p->label(0, curr_label_);
641 // simple $...$ stuff
643 matrix = MathAtom(new MathHullInset(LM_OT_SIMPLE));
644 parse_into(matrix->cell(0), 0);
649 if (!t.cs().size()) {
650 lyxerr << "start of math expected, got '" << t << "'\n";
654 string const & cs = t.cs();
659 matrix = MathAtom(new MathHullInset(LM_OT_EQUATION));
660 MathHullInset * p = matrix->asHullInset();
661 parse_into(p->cell(0), 0);
662 p->numbered(0, curr_num_);
663 p->label(0, curr_label_);
668 lyxerr << "'begin' of un-simple math expected, got '" << cs << "'\n";
672 string const name = getArg('{', '}');
674 if (name == "equation" || name == "equation*" || name == "displaymath") {
675 curr_num_ = (name == "equation");
677 matrix = MathAtom(new MathHullInset(LM_OT_EQUATION));
678 MathHullInset * p = matrix->asHullInset();
679 parse_into(p->cell(0), FLAG_END);
680 p->numbered(0, curr_num_);
681 p->label(0, curr_label_);
685 if (name == "eqnarray" || name == "eqnarray*") {
686 matrix = MathAtom(new MathHullInset(LM_OT_EQNARRAY));
687 return parse_lines(matrix, !stared(name), true);
690 if (name == "align" || name == "align*") {
691 matrix = MathAtom(new MathHullInset(LM_OT_ALIGN));
692 return parse_lines(matrix, !stared(name), true);
695 if (name == "alignat" || name == "alignat*") {
696 int nc = 2 * atoi(getArg('{', '}').c_str());
697 matrix = MathAtom(new MathHullInset(LM_OT_ALIGNAT, nc));
698 return parse_lines(matrix, !stared(name), true);
701 if (name == "xalignat" || name == "xalignat*") {
702 int nc = 2 * atoi(getArg('{', '}').c_str());
703 matrix = MathAtom(new MathHullInset(LM_OT_XALIGNAT, nc));
704 return parse_lines(matrix, !stared(name), true);
707 if (name == "xxalignat") {
708 int nc = 2 * atoi(getArg('{', '}').c_str());
709 matrix = MathAtom(new MathHullInset(LM_OT_XXALIGNAT, nc));
710 return parse_lines(matrix, !stared(name), true);
713 if (name == "multline" || name == "multline*") {
714 matrix = MathAtom(new MathHullInset(LM_OT_MULTLINE));
715 return parse_lines(matrix, !stared(name), true);
718 if (name == "gather" || name == "gather*") {
719 matrix = MathAtom(new MathHullInset(LM_OT_GATHER));
720 return parse_lines(matrix, !stared(name), true);
723 lyxerr[Debug::MATHED] << "1: unknown math environment: " << name << "\n";
724 lyxerr << "1: unknown math environment: " << name << "\n";
729 void Parser::parse_into(MathArray & array, unsigned flags, MathTextCodes code)
735 Token const & t = getToken();
737 //lyxerr << "t: " << t << " flags: " << flags << "'\n";
738 //array.dump(lyxerr);
741 if (flags & FLAG_ITEM) {
743 if (t.cat() == catBegin) {
744 // skip the brace and collect everything to the next matching
746 flags |= FLAG_BRACE_LAST;
749 // handle only this single token, leave the loop if done
754 if (flags & FLAG_BLOCK) {
755 if (t.cat() == catAlign || t.isCR() || t.cs() == "end") {
764 if (t.cat() == catMath) {
765 if (flags & FLAG_BOX) {
766 // we are inside an mbox, so opening new math is allowed
767 array.push_back(MathAtom(new MathHullInset(LM_OT_SIMPLE)));
768 parse_into(array.back()->cell(0), 0);
770 // otherwise this is the end of the formula
775 else if (t.cat() == catLetter)
776 add(array, t.character(), code);
778 else if (t.cat() == catSpace && code == LM_TC_TEXTRM)
779 add(array, t.character(), code);
781 else if (t.cat() == catParameter) {
782 Token const & n = getToken();
783 array.push_back(MathAtom(new MathMacroArgument(n.character() - '0')));
786 else if (t.cat() == catBegin) {
788 parse_into(ar, FLAG_BRACE_LAST);
789 #ifndef WITH_WARNINGS
790 #warning this might be wrong in general!
792 // ignore braces around simple items
793 if (ar.size() == 1 || (ar.size() == 2 && ar.back()->asScriptInset())) {
796 array.push_back(MathAtom(new MathBraceInset));
797 array.back()->cell(0).swap(ar);
801 else if (t.cat() == catEnd) {
802 if (flags & FLAG_BRACE_LAST)
804 lyxerr << "found '}' unexpectedly, array: '" << array << "'\n";
805 //lyxerr << "found '}' unexpectedly\n";
806 add(array, '}', LM_TC_TEX);
809 else if (t.cat() == catAlign) {
810 lyxerr << "found tab unexpectedly, array: '" << array << "'\n";
811 //lyxerr << "found tab unexpectedly\n";
812 add(array, '&', LM_TC_TEX);
815 else if (t.cat() == catSuper || t.cat() == catSub) {
816 bool up = (t.cat() == catSuper);
817 MathScriptInset * p = 0;
819 p = array.back()->asScriptInset();
820 if (!p || p->has(up)) {
821 array.push_back(MathAtom(new MathScriptInset(up)));
822 p = array.back()->asScriptInset();
825 parse_into(p->cell(up), FLAG_ITEM);
830 else if (t.character() == ']' && (flags & FLAG_BRACK_END))
833 else if (t.cat() == catOther)
834 add(array, t.character(), code);
839 else if (t.cs() == "protect")
840 // ignore \\protect, will be re-added during output
843 else if (t.cs() == "end")
846 else if (t.cs() == ")")
849 else if (t.cs() == "]")
852 else if (t.cs() == "\\") {
853 curr_skip_ = getArg('[', ']');
854 //lyxerr << "found newline unexpectedly, array: '" << array << "'\n";
855 lyxerr << "found newline unexpectedly\n";
856 array.push_back(createMathInset("\\"));
859 else if (t.cs() == "limits")
862 else if (t.cs() == "nolimits")
865 else if (t.cs() == "nonumber")
868 else if (t.cs() == "number")
871 else if (t.cs() == "sqrt") {
874 array.push_back(MathAtom(new MathRootInset));
875 parse_into(array.back()->cell(0), FLAG_BRACK_END);
876 parse_into(array.back()->cell(1), FLAG_ITEM);
879 array.push_back(MathAtom(new MathSqrtInset));
880 parse_into(array.back()->cell(0), FLAG_ITEM);
884 else if (t.cs() == "left") {
885 string l = getToken().asString();
887 parse_into(ar, FLAG_RIGHT);
888 string r = getToken().asString();
889 MathAtom dl(new MathDelimInset(l, r));
894 else if (t.cs() == "right") {
895 if (!(flags & FLAG_RIGHT)) {
896 //lyxerr << "got so far: '" << array << "'\n";
897 error("Unmatched right delimiter");
905 lyxerr[Debug::MATHED] << "LM_TK_STY not implemented\n";
906 //MathArray tmp = array;
907 //MathSizeInset * p = new MathSizeInset(MathStyles(lval_->id));
908 //array.push_back(p);
909 //parse_into(p->cell(0), FLAG_BRACE_FONT);
915 else if (t.cs() == "begin") {
916 string const name = getArg('{', '}');
917 if (name == "array") {
918 string const valign = getArg('[', ']') + 'c';
919 string const halign = getArg('{', '}');
920 array.push_back(MathAtom(new MathArrayInset(valign[0], halign)));
921 parse_lines(array.back(), false, false);
922 } else if (name == "split") {
923 array.push_back(MathAtom(new MathSplitInset(1)));
924 parse_lines(array.back(), false, false);
925 } else if (name == "cases") {
926 array.push_back(MathAtom(new MathCasesInset));
927 parse_lines(array.back(), false, false);
929 lyxerr << "unknow math inset begin '" << name << "'\n";
932 else if (t.cs() == "kern") {
938 Token const & t = getToken();
944 if (isValidLength(s))
947 array.push_back(MathAtom(new MathKernInset(s)));
950 else if (t.cs() == "label") {
951 curr_label_ = getArg('{', '}');
954 else if (t.cs() == "choose" || t.cs() == "over" || t.cs() == "atop") {
955 MathAtom p = createMathInset(t.cs());
956 array.swap(p->cell(0));
957 parse_into(p->cell(1), flags, code);
964 else if (t.cs() == "mbox") {
965 array.push_back(createMathInset(t.cs()));
966 // slurp in the argument of mbox
968 MathBoxInset * p = array.back()->asBoxInset();
973 else if (t.cs().size()) {
974 latexkeys const * l = in_word_set(t.cs());
976 if (l->token == LM_TK_FONT) {
977 //lyxerr << "starting font\n";
978 //CatCode catSpaceSave = theCatcode[' '];
979 //if (l->id == LM_TC_TEXTRM) {
980 // // temporarily change catcode
981 // theCatcode[' '] = catLetter;
985 parse_into(ar, FLAG_ITEM, static_cast<MathTextCodes>(l->id));
988 // undo catcode changes
989 ////theCatcode[' '] = catSpaceSave;
990 //lyxerr << "ending font\n";
993 else if (l->token == LM_TK_OLDFONT) {
994 code = static_cast<MathTextCodes>(l->id);
997 else if (l->token == LM_TK_BOX) {
998 MathAtom p = createMathInset(t.cs());
999 parse_into(p->cell(0), FLAG_ITEM | FLAG_BOX, LM_TC_BOX);
1004 MathAtom p = createMathInset(t.cs());
1005 for (MathInset::idx_type i = 0; i < p->nargs(); ++i)
1006 parse_into(p->cell(i), FLAG_ITEM);
1012 MathAtom p = createMathInset(t.cs());
1013 for (MathInset::idx_type i = 0; i < p->nargs(); ++i)
1014 parse_into(p->cell(i), FLAG_ITEM);
1020 if (flags & FLAG_LEAVE) {
1021 flags &= ~FLAG_LEAVE;
1027 lyxerr << " Math Panic, expect problems!\n";
1028 // Search for the end command.
1032 } while (good() && t.cs() != "end");
1038 } // anonymous namespace
1041 void mathed_parse_cell(MathArray & ar, string const & str)
1043 istringstream is(str.c_str());
1044 mathed_parse_cell(ar, is);
1048 void mathed_parse_cell(MathArray & ar, istream & is)
1050 Parser(is).parse_into(ar, 0);
1055 string mathed_parse_macro(string const & str)
1057 istringstream is(str.c_str());
1059 return parser.parse_macro();
1062 string mathed_parse_macro(istream & is)
1065 return parser.parse_macro();
1068 string mathed_parse_macro(LyXLex & lex)
1071 return parser.parse_macro();
1076 bool mathed_parse_normal(MathAtom & t, string const & str)
1078 istringstream is(str.c_str());
1080 return parser.parse_normal(t);
1083 bool mathed_parse_normal(MathAtom & t, istream & is)
1086 return parser.parse_normal(t);
1089 bool mathed_parse_normal(MathAtom & t, LyXLex & lex)
1092 return parser.parse_normal(t);