3 * Purpose: Parser for mathed
4 * Author: Alejandro Aguilar Sierra <asierra@servidor.unam.mx>
5 * Created: January 1996
6 * Description: Parse LaTeX2e math mode code.
8 * Dependencies: Xlib, XForms
10 * Copyright: 1996, Alejandro Aguilar Sierra
14 * You are free to use and modify this code under the terms of
15 * the GNU General Public Licence version 2 or later.
23 #pragma implementation
26 #include "math_parser.h"
28 #include "math_inset.h"
29 #include "math_arrayinset.h"
30 #include "math_charinset.h"
31 #include "math_deliminset.h"
32 #include "math_factory.h"
33 #include "math_funcinset.h"
34 #include "math_kerninset.h"
35 #include "math_macro.h"
36 #include "math_macrotable.h"
37 #include "math_macrotemplate.h"
38 #include "math_matrixinset.h"
39 #include "math_rootinset.h"
40 #include "math_sqrtinset.h"
41 #include "math_scriptinset.h"
42 #include "math_specialcharinset.h"
43 #include "math_splitinset.h"
44 #include "math_sqrtinset.h"
48 #include "support/lstrings.h"
58 bool stared(string const & s)
60 unsigned n = s.size();
61 return n && s[n - 1] == '*';
66 // These are TeX's catcodes
68 catEscape, // 0 backslash
79 catLetter, // 11 a-zA-Z
80 catOther, // 12 none of the above
83 catInvalid // 15 <delete>
86 CatCode theCatcode[256];
89 inline CatCode catcode(unsigned char c)
96 FLAG_BRACE = 1 << 0, // an opening brace needed
97 FLAG_BRACE_LAST = 1 << 1, // last closing brace ends the parsing process
98 FLAG_RIGHT = 1 << 2, // next \\right ends the parsing process
99 FLAG_END = 1 << 3, // next \\end ends the parsing process
100 FLAG_BRACK_END = 1 << 4, // next closing bracket ends the parsing process
101 FLAG_NEWLINE = 1 << 6, // next \\\\ ends the parsing process
102 FLAG_ITEM = 1 << 7, // read a (possibly braced token)
103 FLAG_BLOCK = 1 << 8, // next block ends the parsing process
104 FLAG_LEAVE = 1 << 9 // leave the loop at the end
110 for (int i = 0; i <= 255; ++i)
111 theCatcode[i] = catOther;
112 for (int i = 'a'; i <= 'z'; ++i)
113 theCatcode[i] = catLetter;
114 for (int i = 'A'; i <= 'Z'; ++i)
115 theCatcode[i] = catLetter;
117 theCatcode['\\'] = catEscape;
118 theCatcode['{'] = catBegin;
119 theCatcode['}'] = catEnd;
120 theCatcode['$'] = catMath;
121 theCatcode['&'] = catAlign;
122 theCatcode['\n'] = catNewline;
123 theCatcode['#'] = catParameter;
124 theCatcode['^'] = catSuper;
125 theCatcode['_'] = catSub;
126 theCatcode['
\7f'] = catIgnore;
127 theCatcode[' '] = catSpace;
128 theCatcode['\t'] = catSpace;
129 theCatcode['\r'] = catSpace;
130 theCatcode['~'] = catActive;
131 theCatcode['%'] = catComment;
137 // Helper class for parsing
143 Token() : cs_(), char_(0), cat_(catIgnore) {}
145 Token(char c, CatCode cat) : cs_(), char_(c), cat_(cat) {}
147 Token(const string & cs) : cs_(cs), char_(0), cat_(catIgnore) {}
150 string const & cs() const { return cs_; }
152 CatCode cat() const { return cat_; }
154 char character() const { return char_; }
156 string asString() const;
167 string Token::asString() const
169 return cs_.size() ? cs_ : string(1, char_);
172 bool operator==(Token const & s, Token const & t)
174 return s.character() == t.character()
175 && s.cat() == t.cat() && s.cs() == t.cs();
178 bool operator!=(Token const & s, Token const & t)
183 ostream & operator<<(ostream & os, Token const & t)
186 os << "\\" << t.cs();
188 os << "[" << t.character() << "," << t.cat() << "]";
197 Parser(LyXLex & lex);
199 Parser(istream & is);
202 MathMacroTemplate * parse_macro();
204 MathMatrixInset * parse_normal();
206 void parse_into(MathArray & array, unsigned flags, MathTextCodes = LM_TC_MIN);
208 int lineno() const { return lineno_; }
214 string getArg(char lf, char rf);
218 void error(string const & msg);
220 void parse_lines(MathGridInset * p, bool numbered, bool outmost);
222 latexkeys const * read_delim();
226 void tokenize(istream & is);
228 void tokenize(string const & s);
230 void push_back(Token const & t);
234 Token const & prevToken() const;
236 Token const & nextToken() const;
238 Token const & getToken();
240 void lex(string const & s);
247 std::vector<Token> tokens_;
259 Parser::Parser(LyXLex & lexer)
260 : lineno_(lexer.getLineNo()), pos_(0), curr_num_(false)
262 tokenize(lexer.getStream());
267 Parser::Parser(istream & is)
268 : lineno_(0), pos_(0), curr_num_(false)
274 void Parser::push_back(Token const & t)
276 tokens_.push_back(t);
280 void Parser::pop_back()
286 Token const & Parser::prevToken() const
288 static const Token dummy;
289 return pos_ > 0 ? tokens_[pos_ - 1] : dummy;
293 Token const & Parser::nextToken() const
295 static const Token dummy;
296 return good() ? tokens_[pos_] : dummy;
300 Token const & Parser::getToken()
302 static const Token dummy;
303 return good() ? tokens_[pos_++] : dummy;
307 void Parser::putback()
313 bool Parser::good() const
315 return pos_ < tokens_.size();
319 char Parser::getChar()
322 lyxerr << "The input stream is not well..." << endl;
323 return tokens_[pos_++].character();
327 string Parser::getArg(char lf, char rg)
335 while ((c = getChar()) != rg && good())
342 void Parser::tokenize(istream & is)
344 // eat everything up to the next \end_inset or end of stream
345 // and store it in s for further tokenization
350 if (s.size() >= 10 && s.substr(s.size() - 10) == "\\end_inset") {
351 s = s.substr(0, s.size() - 10);
361 void Parser::tokenize(string const & buffer)
363 static bool init_done = false;
370 istringstream is(buffer.c_str(), ios::in | ios::binary);
375 switch (catcode(c)) {
379 if (catcode(c) == catNewline)
380 ; //push_back(Token("par"));
382 push_back(Token(' ', catSpace));
389 while (is.get(c) && catcode(c) != catNewline)
398 if (catcode(c) == catLetter) {
399 while (is.get(c) && catcode(c) == catLetter)
401 if (catcode(c) == catSpace)
402 while (is.get(c) && catcode(c) == catSpace)
411 push_back(Token(c, catcode(c)));
416 lyxerr << "\nTokens: ";
417 for (unsigned i = 0; i < tokens_.size(); ++i)
418 lyxerr << tokens_[i];
424 void Parser::error(string const & msg)
426 lyxerr << "Line ~" << lineno_ << ": Math parse error: " << msg << endl;
431 void Parser::parse_lines(MathGridInset * p, bool numbered, bool outmost)
433 const int cols = p->ncols();
435 // save global variables
436 bool const saved_num = curr_num_;
437 string const saved_label = curr_label_;
439 for (int row = 0; true; ++row) {
440 // reset global variables
441 curr_num_ = numbered;
445 for (int col = 0; col < cols; ++col) {
446 //lyxerr << "reading cell " << row << " " << col << "\n";
447 parse_into(p->cell(col + row * cols), FLAG_BLOCK);
450 if (prevToken().cat() != catAlign) {
451 //lyxerr << "less cells read than normal in row/col: "
452 // << row << " " << col << "\n";
458 MathMatrixInset * m = static_cast<MathMatrixInset *>(p);
459 m->numbered(row, curr_num_);
460 m->label(row, curr_label_);
461 if (curr_skip_.size()) {
462 m->vskip(LyXLength(curr_skip_), row);
468 if (prevToken() != Token("\\")) {
469 //lyxerr << "no newline here\n";
476 // restore "global" variables
477 curr_num_ = saved_num;
478 curr_label_ = saved_label;
482 MathMacroTemplate * Parser::parse_macro()
484 while (nextToken().cat() == catSpace)
487 if (getToken().cs() != "newcommand") {
488 lyxerr << "\\newcommand expected\n";
492 if (getToken().cat() != catBegin) {
493 lyxerr << "'{' expected\n";
497 string name = getToken().cs();
499 if (getToken().cat() != catEnd) {
500 lyxerr << "'}' expected\n";
504 string arg = getArg('[', ']');
505 int narg = arg.empty() ? 0 : atoi(arg.c_str());
506 //lyxerr << "creating macro " << name << " with " << narg << "args\n";
507 MathMacroTemplate * p = new MathMacroTemplate(name, narg);
508 parse_into(p->cell(0), FLAG_BRACE | FLAG_BRACE_LAST);
513 MathMatrixInset * Parser::parse_normal()
515 while (nextToken().cat() == catSpace)
518 Token const & t = getToken();
520 if (t.cat() == catMath || t.cs() == "(") {
521 MathMatrixInset * p = new MathMatrixInset(LM_OT_SIMPLE);
522 parse_into(p->cell(0), 0);
526 if (!t.cs().size()) {
527 lyxerr << "start of math expected, got '" << t << "'\n";
531 string const & cs = t.cs();
536 MathMatrixInset * p = new MathMatrixInset(LM_OT_EQUATION);
537 parse_into(p->cell(0), 0);
538 p->numbered(0, curr_num_);
539 p->label(0, curr_label_);
544 lyxerr << "'begin' of un-simple math expected, got '" << cs << "'\n";
548 string const name = getArg('{', '}');
550 if (name == "equation" || name == "equation*") {
551 curr_num_ = !stared(name);
553 MathMatrixInset * p = new MathMatrixInset(LM_OT_EQUATION);
554 parse_into(p->cell(0), FLAG_END);
555 p->numbered(0, curr_num_);
556 p->label(0, curr_label_);
560 if (name == "eqnarray" || name == "eqnarray*") {
561 MathMatrixInset * p = new MathMatrixInset(LM_OT_EQNARRAY);
562 parse_lines(p, !stared(name), true);
566 if (name == "align" || name == "align*") {
567 MathMatrixInset * p = new MathMatrixInset(LM_OT_ALIGN);
568 parse_lines(p, !stared(name), true);
572 if (name == "alignat" || name == "alignat*") {
573 MathMatrixInset * p =
574 new MathMatrixInset(LM_OT_ALIGNAT, 2 * atoi(getArg('{', '}').c_str()));
575 parse_lines(p, !stared(name), true);
579 if (name == "xalignat" || name == "xalignat*") {
580 MathMatrixInset * p =
581 new MathMatrixInset(LM_OT_XALIGNAT, 2 * atoi(getArg('{', '}').c_str()));
582 parse_lines(p, !stared(name), true);
586 if (name == "xxalignat") {
587 MathMatrixInset * p =
588 new MathMatrixInset(LM_OT_XXALIGNAT, 2 * atoi(getArg('{', '}').c_str()));
589 parse_lines(p, !stared(name), true);
593 if (name == "multline" || name == "multline*") {
594 MathMatrixInset * p = new MathMatrixInset(LM_OT_MULTLINE);
595 parse_lines(p, !stared(name), true);
599 if (name == "gather" || name == "gather*") {
600 MathMatrixInset * p = new MathMatrixInset(LM_OT_GATHER);
601 parse_lines(p, !stared(name), true);
605 lyxerr[Debug::MATHED] << "1: unknown math environment: " << name << "\n";
610 latexkeys const * Parser::read_delim()
612 Token const & t = getToken();
613 latexkeys const * l = in_word_set(t.asString());
614 return l ? l : in_word_set(".");
618 void Parser::parse_into(MathArray & array, unsigned flags, MathTextCodes code)
620 MathTextCodes yyvarcode = LM_TC_MIN;
625 Token const & t = getToken();
627 //lyxerr << "t: " << t << " flags: " << flags << "'\n";
628 //array.dump(lyxerr);
631 if (flags & FLAG_ITEM) {
633 if (t.cat() == catBegin) {
634 // skip the brace and collect everything to the next matching
636 flags |= FLAG_BRACE_LAST;
639 // handle only this single token, leave the loop if done
644 if (flags & FLAG_BRACE) {
645 if (t.cat() != catBegin) {
646 error("Expected {. Maybe you forgot to enclose an argument in {}");
650 flags &= ~FLAG_BRACE;
655 if (flags & FLAG_BLOCK) {
656 if (t.cat() == catAlign || t.cs() == "\\" || t.cs() == "right")
658 if (t.cs() == "end") {
667 if (t.cat() == catMath)
670 else if (t.cat() == catLetter)
671 array.push_back(new MathCharInset(t.character(), yyvarcode));
673 else if (t.cat() == catSpace &&
674 (yyvarcode == LM_TC_TEXTRM || code == LM_TC_TEXTRM))
675 array.push_back(new MathCharInset(' ', yyvarcode));
677 else if (t.cat() == catParameter) {
678 Token const & n = getToken();
679 MathMacroArgument * p = new MathMacroArgument(n.character() - '0');
683 else if (t.cat() == catBegin) {
684 array.push_back(new MathCharInset('{', LM_TC_TEX));
687 else if (t.cat() == catEnd) {
688 if (flags & FLAG_BRACE_LAST)
690 array.push_back(new MathCharInset('}', LM_TC_TEX));
693 else if (t.cat() == catAlign) {
694 lyxerr << "found tab unexpectedly, array: '" << array << "'\n";
695 array.push_back(new MathCharInset('&', LM_TC_TEX));
698 else if (t.cat() == catSuper || t.cat() == catSub) {
699 bool up = (t.cat() == catSuper);
701 array.push_back(new MathCharInset(' '));
702 parse_into(array.back().ensure(up)->cell(0), FLAG_ITEM);
705 else if (t.character() == ']' && (flags & FLAG_BRACK_END))
708 else if (t.cat() == catOther)
709 array.push_back(new MathCharInset(t.character(), yyvarcode));
714 else if (t.cs() == "protect")
717 else if (t.cs() == "end")
720 else if (t.cs() == ")")
723 else if (t.cs() == "]")
726 else if (t.cs() == "\\") {
727 curr_skip_ = getArg('[', ']');
728 if (flags & FLAG_NEWLINE)
730 lyxerr[Debug::MATHED]
731 << "found newline unexpectedly, array: '" << array << "'\n";
732 array.push_back(createMathInset("\\"));
735 else if (t.cs() == "limits" && array.size())
736 array.back().limits(1);
738 else if (t.cs() == "nolimits" && array.size())
739 array.back().limits(-1);
741 else if (t.cs() == "nonumber")
744 else if (t.cs() == "number")
747 else if (t.cs() == "sqrt") {
750 array.push_back(new MathRootInset);
751 parse_into(array.back().nucleus()->cell(0), FLAG_BRACK_END);
752 parse_into(array.back().nucleus()->cell(1), FLAG_ITEM);
755 array.push_back(new MathSqrtInset);
756 parse_into(array.back().nucleus()->cell(0), FLAG_ITEM);
760 else if (t.cs() == "left") {
761 latexkeys const * l = read_delim();
763 parse_into(ar, FLAG_RIGHT);
764 latexkeys const * r = read_delim();
765 MathDelimInset * dl = new MathDelimInset(l, r);
770 else if (t.cs() == "right") {
771 if (!(flags & FLAG_RIGHT)) {
772 lyxerr << "got so far: '" << array << "'\n";
773 error("Unmatched right delimiter");
781 lyxerr[Debug::MATHED] << "LM_TK_STY not implemented\n";
782 //MathArray tmp = array;
783 //MathSizeInset * p = new MathSizeInset(MathStyles(lval_->id));
784 //array.push_back(p);
785 //parse_into(p->cell(0), FLAG_BRACE_FONT);
790 array.push_back(new MathCharInset(ival_, LM_TC_TEX));
794 else if (t.cs() == "begin") {
795 string const name = getArg('{', '}');
796 if (name == "array") {
797 string const valign = getArg('[', ']') + 'c';
798 string const halign = getArg('{', '}');
799 MathArrayInset * m = new MathArrayInset(halign.size(), 1);
800 m->valign(valign[0]);
802 parse_lines(m, false, false);
804 } else if (name == "split") {
805 MathSplitInset * m = new MathSplitInset(1);
806 parse_lines(m, false, false);
809 lyxerr[Debug::MATHED] << "unknow math inset begin '" << name << "'\n";
812 else if (t.cs() == "kern") {
818 Token const & t = getToken();
824 if (isValidLength(s))
827 array.push_back(new MathKernInset(s));
830 else if (t.cs() == "label") {
832 //parse_into(ar, FLAG_ITEM);
834 //ar.write(os, true);
835 //curr_label_ = os.str();
837 curr_label_ = getArg('{', '}');
840 else if (t.cs() == "choose" || t.cs() == "over" || t.cs() == "atop") {
841 MathInset * p = createMathInset(t.cs());
842 // search backward for position of last '{' if any
844 for (pos = array.size() - 1; pos >= 0; --pos)
845 if (array.at(pos)->nucleus()->getChar() == '{')
848 // found it -> use the part after '{' as "numerator", erase the '{'
849 p->cell(0) = MathArray(array, pos + 1, array.size());
850 array.erase(pos, array.size());
852 // not found -> use everything as "numerator"
853 p->cell(0).swap(array);
855 parse_into(p->cell(1), FLAG_BLOCK);
859 else if (t.cs().size()) {
860 latexkeys const * l = in_word_set(t.cs());
862 if (l->token == LM_TK_FONT) {
863 //lyxerr << "starting font\n";
864 //CatCode catSpaceSave = theCatcode[' '];
865 //if (l->id == LM_TC_TEXTRM) {
866 // // temporarily change catcode
867 // theCatcode[' '] = catLetter;
870 MathTextCodes t = static_cast<MathTextCodes>(l->id);
872 parse_into(ar, FLAG_ITEM, t);
873 for (MathArray::iterator it = ar.begin(); it != ar.end(); ++it)
874 it->nucleus()->handleFont(t);
877 // undo catcode changes
878 ////theCatcode[' '] = catSpaceSave;
879 //lyxerr << "ending font\n";
882 else if (l->token == LM_TK_OLDFONT)
883 yyvarcode = static_cast<MathTextCodes>(l->id);
886 MathInset * p = createMathInset(t.cs());
887 for (int i = 0; i < p->nargs(); ++i)
888 parse_into(p->cell(i), FLAG_ITEM);
894 MathInset * p = createMathInset(t.cs());
896 for (int i = 0; i < p->nargs(); ++i)
897 parse_into(p->cell(i), FLAG_ITEM);
900 error("Unrecognized token");
901 //lyxerr[Debug::MATHED] << "[" << t << "]\n";
908 if (flags & FLAG_LEAVE) {
909 flags &= ~FLAG_LEAVE;
915 lyxerr << " Math Panic, expect problems!\n";
916 // Search for the end command.
920 } while (good() && t.cs() != "end");
924 } // anonymous namespace
928 MathArray mathed_parse_cell(string const & str)
930 istringstream is(str.c_str());
933 parser.parse_into(ar, 0);
939 MathMacroTemplate * mathed_parse_macro(string const & str)
941 istringstream is(str.c_str());
943 return parser.parse_macro();
946 MathMacroTemplate * mathed_parse_macro(istream & is)
949 return parser.parse_macro();
952 MathMacroTemplate * mathed_parse_macro(LyXLex & lex)
955 return parser.parse_macro();
960 MathMatrixInset * mathed_parse_normal(string const & str)
962 istringstream is(str.c_str());
964 return parser.parse_normal();
967 MathMatrixInset * mathed_parse_normal(istream & is)
970 return parser.parse_normal();
973 MathMatrixInset * mathed_parse_normal(LyXLex & lex)
976 return parser.parse_normal();