3 * Purpose: Parser for mathed
4 * Author: Alejandro Aguilar Sierra <asierra@servidor.unam.mx>
5 * Created: January 1996
6 * Description: Parse LaTeX2e math mode code.
8 * Dependencies: Xlib, XForms
10 * Copyright: 1996, Alejandro Aguilar Sierra
14 * You are free to use and modify this code under the terms of
15 * the GNU General Public Licence version 2 or later.
20 If someone desperately needs partial "structures" (such as a few cells of
21 an array inset or similar) (s)he could uses the following hack as starting
22 point to write some macros:
27 \def\makeamptab{\catcode`\&=4\relax}
28 \def\makeampletter{\catcode`\&=11\relax}
29 \def\b{\makeampletter\expandafter\makeamptab\bi}
52 #pragma implementation
55 #include "math_parser.h"
57 #include "math_inset.h"
58 #include "math_arrayinset.h"
59 #include "math_braceinset.h"
60 #include "math_charinset.h"
61 #include "math_deliminset.h"
62 #include "math_factory.h"
63 #include "math_funcinset.h"
64 #include "math_kerninset.h"
65 #include "math_macro.h"
66 #include "math_macrotable.h"
67 #include "math_macrotemplate.h"
68 #include "math_matrixinset.h"
69 #include "math_rootinset.h"
70 #include "math_sqrtinset.h"
71 #include "math_scriptinset.h"
72 #include "math_specialcharinset.h"
73 #include "math_splitinset.h"
74 #include "math_sqrtinset.h"
78 #include "support/lstrings.h"
89 bool stared(string const & s)
91 unsigned n = s.size();
92 return n && s[n - 1] == '*';
96 void add(MathArray & ar, char c, MathTextCodes code)
98 ar.push_back(MathAtom(new MathCharInset(c, code)));
102 // These are TeX's catcodes
104 catEscape, // 0 backslash
114 catSpace, // 10 space
115 catLetter, // 11 a-zA-Z
116 catOther, // 12 none of the above
119 catInvalid // 15 <delete>
122 CatCode theCatcode[256];
125 inline CatCode catcode(unsigned char c)
127 return theCatcode[c];
132 FLAG_BRACE = 1 << 0, // an opening brace needed
133 FLAG_BRACE_LAST = 1 << 1, // last closing brace ends the parsing process
134 FLAG_RIGHT = 1 << 2, // next \\right ends the parsing process
135 FLAG_END = 1 << 3, // next \\end ends the parsing process
136 FLAG_BRACK_END = 1 << 4, // next closing bracket ends the parsing process
137 FLAG_NEWLINE = 1 << 6, // next \\\\ ends the parsing process
138 FLAG_ITEM = 1 << 7, // read a (possibly braced token)
139 FLAG_BLOCK = 1 << 8, // next block ends the parsing process
140 FLAG_LEAVE = 1 << 9 // leave the loop at the end
146 for (int i = 0; i <= 255; ++i)
147 theCatcode[i] = catOther;
148 for (int i = 'a'; i <= 'z'; ++i)
149 theCatcode[i] = catLetter;
150 for (int i = 'A'; i <= 'Z'; ++i)
151 theCatcode[i] = catLetter;
153 theCatcode['\\'] = catEscape;
154 theCatcode['{'] = catBegin;
155 theCatcode['}'] = catEnd;
156 theCatcode['$'] = catMath;
157 theCatcode['&'] = catAlign;
158 theCatcode['\n'] = catNewline;
159 theCatcode['#'] = catParameter;
160 theCatcode['^'] = catSuper;
161 theCatcode['_'] = catSub;
162 theCatcode['
\7f'] = catIgnore;
163 theCatcode[' '] = catSpace;
164 theCatcode['\t'] = catSpace;
165 theCatcode['\r'] = catSpace;
166 theCatcode['~'] = catActive;
167 theCatcode['%'] = catComment;
173 // Helper class for parsing
179 Token() : cs_(), char_(0), cat_(catIgnore) {}
181 Token(char c, CatCode cat) : cs_(), char_(c), cat_(cat) {}
183 Token(const string & cs) : cs_(cs), char_(0), cat_(catIgnore) {}
186 string const & cs() const { return cs_; }
188 CatCode cat() const { return cat_; }
190 char character() const { return char_; }
192 string asString() const;
203 string Token::asString() const
205 return cs_.size() ? cs_ : string(1, char_);
208 bool operator==(Token const & s, Token const & t)
210 return s.character() == t.character()
211 && s.cat() == t.cat() && s.cs() == t.cs();
214 bool operator!=(Token const & s, Token const & t)
219 ostream & operator<<(ostream & os, Token const & t)
222 os << "\\" << t.cs();
224 os << "[" << t.character() << "," << t.cat() << "]";
233 Parser(LyXLex & lex);
235 Parser(istream & is);
238 string parse_macro();
240 bool parse_normal(MathAtom &);
242 void parse_into(MathArray & array, unsigned flags, MathTextCodes = LM_TC_MIN);
244 int lineno() const { return lineno_; }
250 string getArg(char lf, char rf);
254 void error(string const & msg);
256 bool parse_lines(MathAtom & t, bool numbered, bool outmost);
260 void tokenize(istream & is);
262 void tokenize(string const & s);
264 void push_back(Token const & t);
268 Token const & prevToken() const;
270 Token const & nextToken() const;
272 Token const & getToken();
274 void lex(string const & s);
281 std::vector<Token> tokens_;
293 Parser::Parser(LyXLex & lexer)
294 : lineno_(lexer.getLineNo()), pos_(0), curr_num_(false)
296 tokenize(lexer.getStream());
301 Parser::Parser(istream & is)
302 : lineno_(0), pos_(0), curr_num_(false)
308 void Parser::push_back(Token const & t)
310 tokens_.push_back(t);
314 void Parser::pop_back()
320 Token const & Parser::prevToken() const
322 static const Token dummy;
323 return pos_ > 0 ? tokens_[pos_ - 1] : dummy;
327 Token const & Parser::nextToken() const
329 static const Token dummy;
330 return good() ? tokens_[pos_] : dummy;
334 Token const & Parser::getToken()
336 static const Token dummy;
337 return good() ? tokens_[pos_++] : dummy;
341 void Parser::putback()
347 bool Parser::good() const
349 return pos_ < tokens_.size();
353 char Parser::getChar()
356 lyxerr << "The input stream is not well..." << endl;
357 return tokens_[pos_++].character();
361 string Parser::getArg(char lf, char rg)
369 while ((c = getChar()) != rg && good())
376 void Parser::tokenize(istream & is)
378 // eat everything up to the next \end_inset or end of stream
379 // and store it in s for further tokenization
384 if (s.size() >= 10 && s.substr(s.size() - 10) == "\\end_inset") {
385 s = s.substr(0, s.size() - 10);
395 void Parser::tokenize(string const & buffer)
397 static bool init_done = false;
404 istringstream is(buffer.c_str(), ios::in | ios::binary);
409 switch (catcode(c)) {
413 if (catcode(c) == catNewline)
414 ; //push_back(Token("par"));
416 push_back(Token(' ', catSpace));
423 while (is.get(c) && catcode(c) != catNewline)
432 if (catcode(c) == catLetter) {
433 while (is.get(c) && catcode(c) == catLetter)
435 if (catcode(c) == catSpace)
436 while (is.get(c) && catcode(c) == catSpace)
445 push_back(Token(c, catcode(c)));
450 lyxerr << "\nTokens: ";
451 for (unsigned i = 0; i < tokens_.size(); ++i)
452 lyxerr << tokens_[i];
458 void Parser::error(string const & msg)
460 lyxerr << "Line ~" << lineno_ << ": Math parse error: " << msg << endl;
465 bool Parser::parse_lines(MathAtom & t, bool numbered, bool outmost)
467 MathGridInset * p = t->asGridInset();
469 lyxerr << "error in Parser::parse_lines() 1\n";
473 const int cols = p->ncols();
475 // save global variables
476 bool const saved_num = curr_num_;
477 string const saved_label = curr_label_;
479 for (int row = 0; true; ++row) {
480 // reset global variables
481 curr_num_ = numbered;
485 for (int col = 0; col < cols; ++col) {
486 //lyxerr << "reading cell " << row << " " << col << "\n";
487 parse_into(p->cell(col + row * cols), FLAG_BLOCK);
490 if (prevToken().cat() != catAlign) {
491 //lyxerr << "less cells read than normal in row/col: "
492 // << row << " " << col << "\n";
498 MathMatrixInset * m = t->asMatrixInset();
500 lyxerr << "error in Parser::parse_lines() 2\n";
503 m->numbered(row, curr_num_);
504 m->label(row, curr_label_);
505 if (curr_skip_.size()) {
506 m->vskip(LyXLength(curr_skip_), row);
512 if (prevToken() != Token("\\")) {
513 //lyxerr << "no newline here\n";
520 // restore "global" variables
521 curr_num_ = saved_num;
522 curr_label_ = saved_label;
528 string Parser::parse_macro()
530 string name = "{error}";
532 while (nextToken().cat() == catSpace)
535 if (getToken().cs() != "newcommand") {
536 lyxerr << "\\newcommand expected\n";
540 if (getToken().cat() != catBegin) {
541 lyxerr << "'{' expected\n";
545 name = getToken().cs();
547 if (getToken().cat() != catEnd) {
548 lyxerr << "'}' expected\n";
552 string arg = getArg('[', ']');
553 int narg = arg.empty() ? 0 : atoi(arg.c_str());
555 parse_into(ar, FLAG_BRACE | FLAG_BRACE_LAST);
556 MathMacroTable::create(name, narg, ar);
562 bool Parser::parse_normal(MathAtom & matrix)
564 while (nextToken().cat() == catSpace)
567 Token const & t = getToken();
570 matrix = MathAtom(new MathMatrixInset(LM_OT_SIMPLE));
571 parse_into(matrix->cell(0), 0);
575 if (t.cat() == catMath) {
576 Token const & n = getToken();
577 if (n.cat() == catMath) {
578 // TeX's $$...$$ syntax for displayed math
579 matrix = MathAtom(new MathMatrixInset(LM_OT_EQUATION));
580 MathMatrixInset * p = matrix->asMatrixInset();
581 parse_into(p->cell(0), 0);
582 p->numbered(0, curr_num_);
583 p->label(0, curr_label_);
585 // simple $...$ stuff
587 matrix = MathAtom(new MathMatrixInset(LM_OT_SIMPLE));
588 parse_into(matrix->cell(0), 0);
593 if (!t.cs().size()) {
594 lyxerr << "start of math expected, got '" << t << "'\n";
598 string const & cs = t.cs();
603 matrix = MathAtom(new MathMatrixInset(LM_OT_EQUATION));
604 MathMatrixInset * p = matrix->asMatrixInset();
605 parse_into(p->cell(0), 0);
606 p->numbered(0, curr_num_);
607 p->label(0, curr_label_);
612 lyxerr << "'begin' of un-simple math expected, got '" << cs << "'\n";
616 string const name = getArg('{', '}');
618 if (name == "equation" || name == "equation*" || name == "displaymath") {
619 curr_num_ = (name == "equation");
621 matrix = MathAtom(new MathMatrixInset(LM_OT_EQUATION));
622 MathMatrixInset * p = matrix->asMatrixInset();
623 parse_into(p->cell(0), FLAG_END);
624 p->numbered(0, curr_num_);
625 p->label(0, curr_label_);
629 if (name == "eqnarray" || name == "eqnarray*") {
630 matrix = MathAtom(new MathMatrixInset(LM_OT_EQNARRAY));
631 return parse_lines(matrix, !stared(name), true);
634 if (name == "align" || name == "align*") {
635 matrix = MathAtom(new MathMatrixInset(LM_OT_ALIGN));
636 return parse_lines(matrix, !stared(name), true);
639 if (name == "alignat" || name == "alignat*") {
640 int nc = 2 * atoi(getArg('{', '}').c_str());
641 matrix = MathAtom(new MathMatrixInset(LM_OT_ALIGNAT, nc));
642 return parse_lines(matrix, !stared(name), true);
645 if (name == "xalignat" || name == "xalignat*") {
646 int nc = 2 * atoi(getArg('{', '}').c_str());
647 matrix = MathAtom(new MathMatrixInset(LM_OT_XALIGNAT, nc));
648 return parse_lines(matrix, !stared(name), true);
651 if (name == "xxalignat") {
652 int nc = 2 * atoi(getArg('{', '}').c_str());
653 matrix = MathAtom(new MathMatrixInset(LM_OT_XXALIGNAT, nc));
654 return parse_lines(matrix, !stared(name), true);
657 if (name == "multline" || name == "multline*") {
658 matrix = MathAtom(new MathMatrixInset(LM_OT_MULTLINE));
659 return parse_lines(matrix, !stared(name), true);
662 if (name == "gather" || name == "gather*") {
663 matrix = MathAtom(new MathMatrixInset(LM_OT_GATHER));
664 return parse_lines(matrix, !stared(name), true);
667 lyxerr[Debug::MATHED] << "1: unknown math environment: " << name << "\n";
672 void Parser::parse_into(MathArray & array, unsigned flags, MathTextCodes code)
678 Token const & t = getToken();
680 //lyxerr << "t: " << t << " flags: " << flags << "'\n";
681 //array.dump(lyxerr);
684 if (flags & FLAG_ITEM) {
686 if (t.cat() == catBegin) {
687 // skip the brace and collect everything to the next matching
689 flags |= FLAG_BRACE_LAST;
692 // handle only this single token, leave the loop if done
697 if (flags & FLAG_BRACE) {
698 if (t.cat() != catBegin) {
699 error("Expected {. Maybe you forgot to enclose an argument in {}");
703 flags &= ~FLAG_BRACE;
708 if (flags & FLAG_BLOCK) {
709 if (t.cat() == catAlign || t.cs() == "\\")
711 if (t.cs() == "end") {
720 if (t.cat() == catMath)
723 else if (t.cat() == catLetter)
724 add(array, t.character(), code);
726 else if (t.cat() == catSpace && code == LM_TC_TEXTRM)
727 add(array, t.character(), code);
729 else if (t.cat() == catParameter) {
730 Token const & n = getToken();
731 array.push_back(MathAtom(new MathMacroArgument(n.character() - '0')));
734 else if (t.cat() == catBegin) {
735 array.push_back(MathAtom(new MathBraceInset));
736 parse_into(array.back()->cell(0), FLAG_BRACE_LAST, LM_TC_MIN);
739 else if (t.cat() == catEnd) {
740 if (flags & FLAG_BRACE_LAST)
742 lyxerr << "found '}' unexpectedly, array: '" << array << "'\n";
743 add(array, '}', LM_TC_TEX);
746 else if (t.cat() == catAlign) {
747 lyxerr << "found tab unexpectedly, array: '" << array << "'\n";
748 add(array, '&', LM_TC_TEX);
751 else if (t.cat() == catSuper || t.cat() == catSub) {
752 bool up = (t.cat() == catSuper);
753 MathScriptInset * p = 0;
755 p = array.back()->asScriptInset();
756 if (!p || p->has(up)) {
757 array.push_back(MathAtom(new MathScriptInset(up)));
758 p = array.back()->asScriptInset();
761 parse_into(p->cell(up), FLAG_ITEM);
766 else if (t.character() == ']' && (flags & FLAG_BRACK_END))
769 else if (t.cat() == catOther)
770 add(array, t.character(), code);
775 else if (t.cs() == "protect")
778 else if (t.cs() == "end")
781 else if (t.cs() == ")")
784 else if (t.cs() == "]")
787 else if (t.cs() == "\\") {
788 curr_skip_ = getArg('[', ']');
789 if (flags & FLAG_NEWLINE)
791 lyxerr[Debug::MATHED]
792 << "found newline unexpectedly, array: '" << array << "'\n";
793 array.push_back(createMathInset("\\"));
796 else if (t.cs() == "limits")
799 else if (t.cs() == "nolimits")
802 else if (t.cs() == "nonumber")
805 else if (t.cs() == "number")
808 else if (t.cs() == "sqrt") {
811 array.push_back(MathAtom(new MathRootInset));
812 parse_into(array.back()->cell(0), FLAG_BRACK_END);
813 parse_into(array.back()->cell(1), FLAG_ITEM);
816 array.push_back(MathAtom(new MathSqrtInset));
817 parse_into(array.back()->cell(0), FLAG_ITEM);
821 else if (t.cs() == "left") {
822 string l = getToken().asString();
824 parse_into(ar, FLAG_RIGHT);
825 string r = getToken().asString();
826 MathAtom dl(new MathDelimInset(l, r));
831 else if (t.cs() == "right") {
832 if (!(flags & FLAG_RIGHT)) {
833 lyxerr << "got so far: '" << array << "'\n";
834 error("Unmatched right delimiter");
842 lyxerr[Debug::MATHED] << "LM_TK_STY not implemented\n";
843 //MathArray tmp = array;
844 //MathSizeInset * p = new MathSizeInset(MathStyles(lval_->id));
845 //array.push_back(p);
846 //parse_into(p->cell(0), FLAG_BRACE_FONT);
852 else if (t.cs() == "begin") {
853 string const name = getArg('{', '}');
854 if (name == "array") {
855 string const valign = getArg('[', ']') + 'c';
856 string const halign = getArg('{', '}');
858 MathAtom(new MathArrayInset(halign.size(), 1, valign[0], halign)));
859 parse_lines(array.back(), false, false);
860 } else if (name == "split") {
861 array.push_back(MathAtom(new MathSplitInset(1)));
862 parse_lines(array.back(), false, false);
864 lyxerr[Debug::MATHED] << "unknow math inset begin '" << name << "'\n";
867 else if (t.cs() == "kern") {
873 Token const & t = getToken();
879 if (isValidLength(s))
882 array.push_back(MathAtom(new MathKernInset(s)));
885 else if (t.cs() == "label") {
887 //parse_into(ar, FLAG_ITEM);
889 //ar.write(os, true);
890 //curr_label_ = os.str();
892 curr_label_ = getArg('{', '}');
895 else if (t.cs() == "choose" || t.cs() == "over" || t.cs() == "atop") {
896 MathAtom p = createMathInset(t.cs());
897 // search backward for position of last '{' if any
899 for (pos = array.size() - 1; pos >= 0; --pos)
900 if (array.at(pos)->getChar() == '{')
903 // found it -> use the part after '{' as "numerator"
904 p->cell(0) = MathArray(array, pos + 1, array.size());
905 parse_into(p->cell(1), FLAG_BRACE_LAST);
906 // delete denominator and the '{'
907 array.erase(pos, array.size());
908 } else if (flags & FLAG_RIGHT) {
909 // we are inside a \left ... \right block
910 //lyxerr << "found '" << t.cs() << "' enclosed by \\left .. \\right\n";
911 p->cell(0).swap(array);
912 parse_into(p->cell(1), FLAG_RIGHT);
913 // handle the right delimiter properly
916 // not found -> use everything as "numerator"
917 p->cell(0).swap(array);
918 parse_into(p->cell(1), FLAG_BLOCK);
920 array.push_back(MathAtom(p));
925 else if (t.cs() == "mbox") {
926 array.push_back(createMathInset(t.cs()));
927 // slurp in the argument of mbox
929 MathBoxInset * p = array.back()->asBoxInset();
934 else if (t.cs().size()) {
935 latexkeys const * l = in_word_set(t.cs());
937 if (l->token == LM_TK_FONT) {
938 //lyxerr << "starting font\n";
939 //CatCode catSpaceSave = theCatcode[' '];
940 //if (l->id == LM_TC_TEXTRM) {
941 // // temporarily change catcode
942 // theCatcode[' '] = catLetter;
945 MathTextCodes t = static_cast<MathTextCodes>(l->id);
947 parse_into(ar, FLAG_ITEM, t);
948 for (MathArray::iterator it = ar.begin(); it != ar.end(); ++it)
949 (*it)->handleFont(t);
952 // undo catcode changes
953 ////theCatcode[' '] = catSpaceSave;
954 //lyxerr << "ending font\n";
957 else if (l->token == LM_TK_OLDFONT) {
958 code = static_cast<MathTextCodes>(l->id);
962 MathAtom p = createMathInset(t.cs());
963 for (MathInset::idx_type i = 0; i < p->nargs(); ++i)
964 parse_into(p->cell(i), FLAG_ITEM);
970 MathAtom p = createMathInset(t.cs());
971 for (MathInset::idx_type i = 0; i < p->nargs(); ++i)
972 parse_into(p->cell(i), FLAG_ITEM);
978 if (flags & FLAG_LEAVE) {
979 flags &= ~FLAG_LEAVE;
985 lyxerr << " Math Panic, expect problems!\n";
986 // Search for the end command.
990 } while (good() && t.cs() != "end");
996 } // anonymous namespace
999 void mathed_parse_cell(MathArray & ar, string const & str)
1001 istringstream is(str.c_str());
1002 mathed_parse_cell(ar, is);
1006 void mathed_parse_cell(MathArray & ar, istream & is)
1008 Parser(is).parse_into(ar, 0);
1013 string mathed_parse_macro(string const & str)
1015 istringstream is(str.c_str());
1017 return parser.parse_macro();
1020 string mathed_parse_macro(istream & is)
1023 return parser.parse_macro();
1026 string mathed_parse_macro(LyXLex & lex)
1029 return parser.parse_macro();
1034 bool mathed_parse_normal(MathAtom & t, string const & str)
1036 istringstream is(str.c_str());
1038 return parser.parse_normal(t);
1041 bool mathed_parse_normal(MathAtom & t, istream & is)
1044 return parser.parse_normal(t);
1047 bool mathed_parse_normal(MathAtom & t, LyXLex & lex)
1050 return parser.parse_normal(t);