3 * Purpose: Parser for mathed
4 * Author: Alejandro Aguilar Sierra <asierra@servidor.unam.mx>
5 * Created: January 1996
6 * Description: Parse LaTeX2e math mode code.
8 * Dependencies: Xlib, XForms
10 * Copyright: 1996, Alejandro Aguilar Sierra
14 * You are free to use and modify this code under the terms of
15 * the GNU General Public Licence version 2 or later.
24 #pragma implementation
27 #include "math_parser.h"
29 #include "math_inset.h"
30 #include "math_arrayinset.h"
31 #include "math_charinset.h"
32 #include "math_deliminset.h"
33 #include "math_factory.h"
34 #include "math_funcinset.h"
35 #include "math_kerninset.h"
36 #include "math_macro.h"
37 #include "math_macrotable.h"
38 #include "math_macrotemplate.h"
39 #include "math_matrixinset.h"
40 #include "math_rootinset.h"
41 #include "math_sqrtinset.h"
42 #include "math_scriptinset.h"
43 #include "math_specialcharinset.h"
44 #include "math_splitinset.h"
45 #include "math_sqrtinset.h"
49 #include "support/lstrings.h"
60 bool stared(string const & s)
62 unsigned n = s.size();
63 return n && s[n - 1] == '*';
67 void add(MathArray & ar, char c, MathTextCodes code)
69 ar.push_back(MathAtom(new MathCharInset(c, code)));
73 // These are TeX's catcodes
75 catEscape, // 0 backslash
86 catLetter, // 11 a-zA-Z
87 catOther, // 12 none of the above
90 catInvalid // 15 <delete>
93 CatCode theCatcode[256];
96 inline CatCode catcode(unsigned char c)
103 FLAG_BRACE = 1 << 0, // an opening brace needed
104 FLAG_BRACE_LAST = 1 << 1, // last closing brace ends the parsing process
105 FLAG_RIGHT = 1 << 2, // next \\right ends the parsing process
106 FLAG_END = 1 << 3, // next \\end ends the parsing process
107 FLAG_BRACK_END = 1 << 4, // next closing bracket ends the parsing process
108 FLAG_NEWLINE = 1 << 6, // next \\\\ ends the parsing process
109 FLAG_ITEM = 1 << 7, // read a (possibly braced token)
110 FLAG_BLOCK = 1 << 8, // next block ends the parsing process
111 FLAG_LEAVE = 1 << 9 // leave the loop at the end
117 for (int i = 0; i <= 255; ++i)
118 theCatcode[i] = catOther;
119 for (int i = 'a'; i <= 'z'; ++i)
120 theCatcode[i] = catLetter;
121 for (int i = 'A'; i <= 'Z'; ++i)
122 theCatcode[i] = catLetter;
124 theCatcode['\\'] = catEscape;
125 theCatcode['{'] = catBegin;
126 theCatcode['}'] = catEnd;
127 theCatcode['$'] = catMath;
128 theCatcode['&'] = catAlign;
129 theCatcode['\n'] = catNewline;
130 theCatcode['#'] = catParameter;
131 theCatcode['^'] = catSuper;
132 theCatcode['_'] = catSub;
133 theCatcode['
\7f'] = catIgnore;
134 theCatcode[' '] = catSpace;
135 theCatcode['\t'] = catSpace;
136 theCatcode['\r'] = catSpace;
137 theCatcode['~'] = catActive;
138 theCatcode['%'] = catComment;
144 // Helper class for parsing
150 Token() : cs_(), char_(0), cat_(catIgnore) {}
152 Token(char c, CatCode cat) : cs_(), char_(c), cat_(cat) {}
154 Token(const string & cs) : cs_(cs), char_(0), cat_(catIgnore) {}
157 string const & cs() const { return cs_; }
159 CatCode cat() const { return cat_; }
161 char character() const { return char_; }
163 string asString() const;
174 string Token::asString() const
176 return cs_.size() ? cs_ : string(1, char_);
179 bool operator==(Token const & s, Token const & t)
181 return s.character() == t.character()
182 && s.cat() == t.cat() && s.cs() == t.cs();
185 bool operator!=(Token const & s, Token const & t)
190 ostream & operator<<(ostream & os, Token const & t)
193 os << "\\" << t.cs();
195 os << "[" << t.character() << "," << t.cat() << "]";
204 Parser(LyXLex & lex);
206 Parser(istream & is);
209 string parse_macro();
211 bool parse_normal(MathAtom &);
213 void parse_into(MathArray & array, unsigned flags, MathTextCodes = LM_TC_MIN);
215 int lineno() const { return lineno_; }
221 string getArg(char lf, char rf);
225 void error(string const & msg);
227 bool parse_lines(MathAtom & t, bool numbered, bool outmost);
231 void tokenize(istream & is);
233 void tokenize(string const & s);
235 void push_back(Token const & t);
239 Token const & prevToken() const;
241 Token const & nextToken() const;
243 Token const & getToken();
245 void lex(string const & s);
252 std::vector<Token> tokens_;
264 Parser::Parser(LyXLex & lexer)
265 : lineno_(lexer.getLineNo()), pos_(0), curr_num_(false)
267 tokenize(lexer.getStream());
272 Parser::Parser(istream & is)
273 : lineno_(0), pos_(0), curr_num_(false)
279 void Parser::push_back(Token const & t)
281 tokens_.push_back(t);
285 void Parser::pop_back()
291 Token const & Parser::prevToken() const
293 static const Token dummy;
294 return pos_ > 0 ? tokens_[pos_ - 1] : dummy;
298 Token const & Parser::nextToken() const
300 static const Token dummy;
301 return good() ? tokens_[pos_] : dummy;
305 Token const & Parser::getToken()
307 static const Token dummy;
308 return good() ? tokens_[pos_++] : dummy;
312 void Parser::putback()
318 bool Parser::good() const
320 return pos_ < tokens_.size();
324 char Parser::getChar()
327 lyxerr << "The input stream is not well..." << endl;
328 return tokens_[pos_++].character();
332 string Parser::getArg(char lf, char rg)
340 while ((c = getChar()) != rg && good())
347 void Parser::tokenize(istream & is)
349 // eat everything up to the next \end_inset or end of stream
350 // and store it in s for further tokenization
355 if (s.size() >= 10 && s.substr(s.size() - 10) == "\\end_inset") {
356 s = s.substr(0, s.size() - 10);
366 void Parser::tokenize(string const & buffer)
368 static bool init_done = false;
375 istringstream is(buffer.c_str(), ios::in | ios::binary);
380 switch (catcode(c)) {
384 if (catcode(c) == catNewline)
385 ; //push_back(Token("par"));
387 push_back(Token(' ', catSpace));
394 while (is.get(c) && catcode(c) != catNewline)
403 if (catcode(c) == catLetter) {
404 while (is.get(c) && catcode(c) == catLetter)
406 if (catcode(c) == catSpace)
407 while (is.get(c) && catcode(c) == catSpace)
416 push_back(Token(c, catcode(c)));
421 lyxerr << "\nTokens: ";
422 for (unsigned i = 0; i < tokens_.size(); ++i)
423 lyxerr << tokens_[i];
429 void Parser::error(string const & msg)
431 lyxerr << "Line ~" << lineno_ << ": Math parse error: " << msg << endl;
436 bool Parser::parse_lines(MathAtom & t, bool numbered, bool outmost)
438 MathGridInset * p = t->asGridInset();
440 lyxerr << "error in Parser::parse_lines() 1\n";
444 const int cols = p->ncols();
446 // save global variables
447 bool const saved_num = curr_num_;
448 string const saved_label = curr_label_;
450 for (int row = 0; true; ++row) {
451 // reset global variables
452 curr_num_ = numbered;
456 for (int col = 0; col < cols; ++col) {
457 //lyxerr << "reading cell " << row << " " << col << "\n";
458 parse_into(p->cell(col + row * cols), FLAG_BLOCK);
461 if (prevToken().cat() != catAlign) {
462 //lyxerr << "less cells read than normal in row/col: "
463 // << row << " " << col << "\n";
469 MathMatrixInset * m = t->asMatrixInset();
471 lyxerr << "error in Parser::parse_lines() 2\n";
474 m->numbered(row, curr_num_);
475 m->label(row, curr_label_);
476 if (curr_skip_.size()) {
477 m->vskip(LyXLength(curr_skip_), row);
483 if (prevToken() != Token("\\")) {
484 //lyxerr << "no newline here\n";
491 // restore "global" variables
492 curr_num_ = saved_num;
493 curr_label_ = saved_label;
499 string Parser::parse_macro()
501 string name = "{error}";
503 while (nextToken().cat() == catSpace)
506 if (getToken().cs() != "newcommand") {
507 lyxerr << "\\newcommand expected\n";
511 if (getToken().cat() != catBegin) {
512 lyxerr << "'{' expected\n";
516 name = getToken().cs();
518 if (getToken().cat() != catEnd) {
519 lyxerr << "'}' expected\n";
523 string arg = getArg('[', ']');
524 int narg = arg.empty() ? 0 : atoi(arg.c_str());
526 parse_into(ar, FLAG_BRACE | FLAG_BRACE_LAST);
527 MathMacroTable::create(name, narg, ar);
533 bool Parser::parse_normal(MathAtom & matrix)
535 while (nextToken().cat() == catSpace)
538 Token const & t = getToken();
541 matrix = MathAtom(new MathMatrixInset(LM_OT_SIMPLE));
542 parse_into(matrix->cell(0), 0);
546 if (t.cat() == catMath) {
547 Token const & n = getToken();
548 if (n.cat() == catMath) {
549 // TeX's $$...$$ syntax for displayed math
550 matrix = MathAtom(new MathMatrixInset(LM_OT_EQUATION));
551 MathMatrixInset * p = matrix->asMatrixInset();
552 parse_into(p->cell(0), 0);
553 p->numbered(0, curr_num_);
554 p->label(0, curr_label_);
556 // simple $...$ stuff
558 matrix = MathAtom(new MathMatrixInset(LM_OT_SIMPLE));
559 parse_into(matrix->cell(0), 0);
564 if (!t.cs().size()) {
565 lyxerr << "start of math expected, got '" << t << "'\n";
569 string const & cs = t.cs();
574 matrix = MathAtom(new MathMatrixInset(LM_OT_EQUATION));
575 MathMatrixInset * p = matrix->asMatrixInset();
576 parse_into(p->cell(0), 0);
577 p->numbered(0, curr_num_);
578 p->label(0, curr_label_);
583 lyxerr << "'begin' of un-simple math expected, got '" << cs << "'\n";
587 string const name = getArg('{', '}');
589 if (name == "equation" || name == "equation*") {
590 curr_num_ = !stared(name);
592 matrix = MathAtom(new MathMatrixInset(LM_OT_EQUATION));
593 MathMatrixInset * p = matrix->asMatrixInset();
594 parse_into(p->cell(0), FLAG_END);
595 p->numbered(0, curr_num_);
596 p->label(0, curr_label_);
600 if (name == "eqnarray" || name == "eqnarray*") {
601 matrix = MathAtom(new MathMatrixInset(LM_OT_EQNARRAY));
602 return parse_lines(matrix, !stared(name), true);
605 if (name == "align" || name == "align*") {
606 matrix = MathAtom(new MathMatrixInset(LM_OT_ALIGN));
607 return parse_lines(matrix, !stared(name), true);
610 if (name == "alignat" || name == "alignat*") {
611 int nc = 2 * atoi(getArg('{', '}').c_str());
612 matrix = MathAtom(new MathMatrixInset(LM_OT_ALIGNAT, nc));
613 return parse_lines(matrix, !stared(name), true);
616 if (name == "xalignat" || name == "xalignat*") {
617 int nc = 2 * atoi(getArg('{', '}').c_str());
618 matrix = MathAtom(new MathMatrixInset(LM_OT_XALIGNAT, nc));
619 return parse_lines(matrix, !stared(name), true);
622 if (name == "xxalignat") {
623 int nc = 2 * atoi(getArg('{', '}').c_str());
624 matrix = MathAtom(new MathMatrixInset(LM_OT_XXALIGNAT, nc));
625 return parse_lines(matrix, !stared(name), true);
628 if (name == "multline" || name == "multline*") {
629 matrix = MathAtom(new MathMatrixInset(LM_OT_MULTLINE));
630 return parse_lines(matrix, !stared(name), true);
633 if (name == "gather" || name == "gather*") {
634 matrix = MathAtom(new MathMatrixInset(LM_OT_GATHER));
635 return parse_lines(matrix, !stared(name), true);
638 lyxerr[Debug::MATHED] << "1: unknown math environment: " << name << "\n";
643 void Parser::parse_into(MathArray & array, unsigned flags, MathTextCodes code)
645 stack<MathTextCodes> fontcodes;
646 fontcodes.push(LM_TC_MIN);
652 Token const & t = getToken();
654 //lyxerr << "t: " << t << " flags: " << flags << "'\n";
655 //array.dump(lyxerr);
658 if (flags & FLAG_ITEM) {
660 if (t.cat() == catBegin) {
661 // skip the brace and collect everything to the next matching
663 flags |= FLAG_BRACE_LAST;
666 // handle only this single token, leave the loop if done
671 if (flags & FLAG_BRACE) {
672 if (t.cat() != catBegin) {
673 error("Expected {. Maybe you forgot to enclose an argument in {}");
677 flags &= ~FLAG_BRACE;
682 if (flags & FLAG_BLOCK) {
683 if (t.cat() == catAlign || t.cs() == "\\")
685 if (t.cs() == "end") {
694 if (t.cat() == catMath)
697 else if (t.cat() == catLetter)
698 add(array, t.character(), fontcodes.top());
700 else if (t.cat() == catSpace &&
701 (fontcodes.top() == LM_TC_TEXTRM || code == LM_TC_TEXTRM))
702 add(array, ' ', fontcodes.top());
704 else if (t.cat() == catParameter) {
705 Token const & n = getToken();
706 array.push_back(MathAtom(new MathMacroArgument(n.character() - '0')));
709 else if (t.cat() == catBegin) {
710 add(array, '{', LM_TC_TEX);
711 fontcodes.push(LM_TC_MIN);
714 else if (t.cat() == catEnd) {
715 if (flags & FLAG_BRACE_LAST)
717 add(array, '}', LM_TC_TEX);
721 else if (t.cat() == catAlign) {
722 lyxerr << "found tab unexpectedly, array: '" << array << "'\n";
723 add(array, '&', LM_TC_TEX);
726 else if (t.cat() == catSuper || t.cat() == catSub) {
727 bool up = (t.cat() == catSuper);
728 MathScriptInset * p = 0;
730 p = array.back()->asScriptInset();
731 if (!p || p->has(up)) {
732 array.push_back(MathAtom(new MathScriptInset(up)));
733 p = array.back()->asScriptInset();
736 parse_into(p->cell(up), FLAG_ITEM);
741 else if (t.character() == ']' && (flags & FLAG_BRACK_END))
744 else if (t.cat() == catOther)
745 add(array, t.character(), fontcodes.top());
750 else if (t.cs() == "protect")
753 else if (t.cs() == "end")
756 else if (t.cs() == ")")
759 else if (t.cs() == "]")
762 else if (t.cs() == "\\") {
763 curr_skip_ = getArg('[', ']');
764 if (flags & FLAG_NEWLINE)
766 lyxerr[Debug::MATHED]
767 << "found newline unexpectedly, array: '" << array << "'\n";
768 array.push_back(createMathInset("\\"));
771 else if (t.cs() == "limits")
774 else if (t.cs() == "nolimits")
777 else if (t.cs() == "nonumber")
780 else if (t.cs() == "number")
783 else if (t.cs() == "sqrt") {
786 array.push_back(MathAtom(new MathRootInset));
787 parse_into(array.back()->cell(0), FLAG_BRACK_END);
788 parse_into(array.back()->cell(1), FLAG_ITEM);
791 array.push_back(MathAtom(new MathSqrtInset));
792 parse_into(array.back()->cell(0), FLAG_ITEM);
796 else if (t.cs() == "left") {
797 string l = getToken().asString();
799 parse_into(ar, FLAG_RIGHT);
800 string r = getToken().asString();
801 MathAtom dl(new MathDelimInset(l, r));
806 else if (t.cs() == "right") {
807 if (!(flags & FLAG_RIGHT)) {
808 lyxerr << "got so far: '" << array << "'\n";
809 error("Unmatched right delimiter");
817 lyxerr[Debug::MATHED] << "LM_TK_STY not implemented\n";
818 //MathArray tmp = array;
819 //MathSizeInset * p = new MathSizeInset(MathStyles(lval_->id));
820 //array.push_back(p);
821 //parse_into(p->cell(0), FLAG_BRACE_FONT);
827 else if (t.cs() == "begin") {
828 string const name = getArg('{', '}');
829 if (name == "array") {
830 string const valign = getArg('[', ']') + 'c';
831 string const halign = getArg('{', '}');
833 MathAtom(new MathArrayInset(halign.size(), 1, valign[0], halign)));
834 parse_lines(array.back(), false, false);
835 } else if (name == "split") {
836 array.push_back(MathAtom(new MathSplitInset(1)));
837 parse_lines(array.back(), false, false);
839 lyxerr[Debug::MATHED] << "unknow math inset begin '" << name << "'\n";
842 else if (t.cs() == "kern") {
848 Token const & t = getToken();
854 if (isValidLength(s))
857 array.push_back(MathAtom(new MathKernInset(s)));
860 else if (t.cs() == "label") {
862 //parse_into(ar, FLAG_ITEM);
864 //ar.write(os, true);
865 //curr_label_ = os.str();
867 curr_label_ = getArg('{', '}');
870 else if (t.cs() == "choose" || t.cs() == "over" || t.cs() == "atop") {
871 MathAtom p = createMathInset(t.cs());
872 // search backward for position of last '{' if any
874 for (pos = array.size() - 1; pos >= 0; --pos)
875 if (array.at(pos)->getChar() == '{')
878 // found it -> use the part after '{' as "numerator"
879 p->cell(0) = MathArray(array, pos + 1, array.size());
880 parse_into(p->cell(1), FLAG_BRACE_LAST);
881 // delete denominator and the '{'
882 array.erase(pos, array.size());
883 } else if (flags & FLAG_RIGHT) {
884 // we are inside a \left ... \right block
885 //lyxerr << "found '" << t.cs() << "' enclosed by \\left .. \\right\n";
886 p->cell(0).swap(array);
887 parse_into(p->cell(1), FLAG_RIGHT);
888 // handle the right delimiter properly
891 // not found -> use everything as "numerator"
892 p->cell(0).swap(array);
893 parse_into(p->cell(1), FLAG_BLOCK);
895 array.push_back(MathAtom(p));
900 else if (t.cs() == "mbox") {
901 array.push_back(createMathInset(t.cs()));
902 // slurp in the argument of mbox
904 MathBoxInset * p = array.back()->asBoxInset();
909 else if (t.cs().size()) {
910 latexkeys const * l = in_word_set(t.cs());
912 if (l->token == LM_TK_FONT) {
913 //lyxerr << "starting font\n";
914 //CatCode catSpaceSave = theCatcode[' '];
915 //if (l->id == LM_TC_TEXTRM) {
916 // // temporarily change catcode
917 // theCatcode[' '] = catLetter;
920 MathTextCodes t = static_cast<MathTextCodes>(l->id);
922 parse_into(ar, FLAG_ITEM, t);
923 for (MathArray::iterator it = ar.begin(); it != ar.end(); ++it)
924 (*it)->handleFont(t);
927 // undo catcode changes
928 ////theCatcode[' '] = catSpaceSave;
929 //lyxerr << "ending font\n";
932 else if (l->token == LM_TK_OLDFONT) {
934 fontcodes.push(static_cast<MathTextCodes>(l->id));
938 MathAtom p = createMathInset(t.cs());
939 for (MathInset::idx_type i = 0; i < p->nargs(); ++i)
940 parse_into(p->cell(i), FLAG_ITEM);
946 MathAtom p = createMathInset(t.cs());
947 for (MathInset::idx_type i = 0; i < p->nargs(); ++i)
948 parse_into(p->cell(i), FLAG_ITEM);
954 if (flags & FLAG_LEAVE) {
955 flags &= ~FLAG_LEAVE;
961 lyxerr << " Math Panic, expect problems!\n";
962 // Search for the end command.
966 } while (good() && t.cs() != "end");
972 } // anonymous namespace
975 void mathed_parse_cell(MathArray & ar, string const & str)
977 istringstream is(str.c_str());
978 mathed_parse_cell(ar, is);
982 void mathed_parse_cell(MathArray & ar, istream & is)
984 Parser(is).parse_into(ar, 0);
989 string mathed_parse_macro(string const & str)
991 istringstream is(str.c_str());
993 return parser.parse_macro();
996 string mathed_parse_macro(istream & is)
999 return parser.parse_macro();
1002 string mathed_parse_macro(LyXLex & lex)
1005 return parser.parse_macro();
1010 bool mathed_parse_normal(MathAtom & t, string const & str)
1012 istringstream is(str.c_str());
1014 return parser.parse_normal(t);
1017 bool mathed_parse_normal(MathAtom & t, istream & is)
1020 return parser.parse_normal(t);
1023 bool mathed_parse_normal(MathAtom & t, LyXLex & lex)
1026 return parser.parse_normal(t);