3 * Purpose: Parser for mathed
4 * Author: Alejandro Aguilar Sierra <asierra@servidor.unam.mx>
5 * Created: January 1996
6 * Description: Parse LaTeX2e math mode code.
8 * Dependencies: Xlib, XForms
10 * Copyright: 1996, Alejandro Aguilar Sierra
14 * You are free to use and modify this code under the terms of
15 * the GNU General Public Licence version 2 or later.
23 #pragma implementation
26 #include "math_parser.h"
28 #include "math_inset.h"
29 #include "math_arrayinset.h"
30 #include "math_charinset.h"
31 #include "math_deliminset.h"
32 #include "math_factory.h"
33 #include "math_funcinset.h"
34 #include "math_kerninset.h"
35 #include "math_macro.h"
36 #include "math_macrotable.h"
37 #include "math_macrotemplate.h"
38 #include "math_matrixinset.h"
39 #include "math_rootinset.h"
40 #include "math_sqrtinset.h"
41 #include "math_scriptinset.h"
42 #include "math_specialcharinset.h"
43 #include "math_splitinset.h"
44 #include "math_sqrtinset.h"
48 #include "support/lstrings.h"
58 bool stared(string const & s)
60 unsigned n = s.size();
61 return n && s[n - 1] == '*';
66 // These are TeX's catcodes
68 catEscape, // 0 backslash
79 catLetter, // 11 a-zA-Z
80 catOther, // 12 none of the above
83 catInvalid // 15 <delete>
86 CatCode theCatcode[256];
89 inline CatCode catcode(unsigned char c)
96 FLAG_BRACE = 1 << 0, // an opening brace needed
97 FLAG_BRACE_LAST = 1 << 1, // last closing brace ends the parsing process
98 FLAG_RIGHT = 1 << 2, // next \\right ends the parsing process
99 FLAG_END = 1 << 3, // next \\end ends the parsing process
100 FLAG_BRACK_END = 1 << 4, // next closing bracket ends the parsing process
101 FLAG_NEWLINE = 1 << 6, // next \\\\ ends the parsing process
102 FLAG_ITEM = 1 << 7, // read a (possibly braced token)
103 FLAG_BLOCK = 1 << 8, // next block ends the parsing process
104 FLAG_LEAVE = 1 << 9 // leave the loop at the end
110 for (int i = 0; i <= 255; ++i)
111 theCatcode[i] = catOther;
112 for (int i = 'a'; i <= 'z'; ++i)
113 theCatcode[i] = catLetter;
114 for (int i = 'A'; i <= 'Z'; ++i)
115 theCatcode[i] = catLetter;
117 theCatcode['\\'] = catEscape;
118 theCatcode['{'] = catBegin;
119 theCatcode['}'] = catEnd;
120 theCatcode['$'] = catMath;
121 theCatcode['&'] = catAlign;
122 theCatcode['\n'] = catNewline;
123 theCatcode['#'] = catParameter;
124 theCatcode['^'] = catSuper;
125 theCatcode['_'] = catSub;
126 theCatcode['
\7f'] = catIgnore;
127 theCatcode[' '] = catSpace;
128 theCatcode['\t'] = catSpace;
129 theCatcode['\r'] = catSpace;
130 theCatcode['~'] = catActive;
131 theCatcode['%'] = catComment;
137 // Helper class for parsing
143 Token() : cs_(), char_(0), cat_(catIgnore) {}
145 Token(char c, CatCode cat) : cs_(), char_(c), cat_(cat) {}
147 Token(const string & cs) : cs_(cs), char_(0), cat_(catIgnore) {}
150 string const & cs() const { return cs_; }
152 CatCode cat() const { return cat_; }
154 char character() const { return char_; }
156 string asString() const;
167 string Token::asString() const
169 return cs_.size() ? cs_ : string(1, char_);
172 bool operator==(Token const & s, Token const & t)
174 return s.character() == t.character()
175 && s.cat() == t.cat() && s.cs() == t.cs();
178 bool operator!=(Token const & s, Token const & t)
183 ostream & operator<<(ostream & os, Token const & t)
186 os << "\\" << t.cs();
188 os << "[" << t.character() << "," << t.cat() << "]";
197 Parser(LyXLex & lex);
199 Parser(istream & is);
202 MathMacroTemplate * parse_macro();
204 MathMatrixInset * parse_normal();
206 void parse_into(MathArray & array, unsigned flags, MathTextCodes = LM_TC_MIN);
208 int lineno() const { return lineno_; }
214 string getArg(char lf, char rf);
218 void error(string const & msg);
220 void parse_lines(MathGridInset * p, bool numbered, bool outmost);
224 void tokenize(istream & is);
226 void tokenize(string const & s);
228 void push_back(Token const & t);
232 Token const & prevToken() const;
234 Token const & nextToken() const;
236 Token const & getToken();
238 void lex(string const & s);
245 std::vector<Token> tokens_;
257 Parser::Parser(LyXLex & lexer)
258 : lineno_(lexer.getLineNo()), pos_(0), curr_num_(false)
260 tokenize(lexer.getStream());
265 Parser::Parser(istream & is)
266 : lineno_(0), pos_(0), curr_num_(false)
272 void Parser::push_back(Token const & t)
274 tokens_.push_back(t);
278 void Parser::pop_back()
284 Token const & Parser::prevToken() const
286 static const Token dummy;
287 return pos_ > 0 ? tokens_[pos_ - 1] : dummy;
291 Token const & Parser::nextToken() const
293 static const Token dummy;
294 return good() ? tokens_[pos_] : dummy;
298 Token const & Parser::getToken()
300 static const Token dummy;
301 return good() ? tokens_[pos_++] : dummy;
305 void Parser::putback()
311 bool Parser::good() const
313 return pos_ < tokens_.size();
317 char Parser::getChar()
320 lyxerr << "The input stream is not well..." << endl;
321 return tokens_[pos_++].character();
325 string Parser::getArg(char lf, char rg)
333 while ((c = getChar()) != rg && good())
340 void Parser::tokenize(istream & is)
342 // eat everything up to the next \end_inset or end of stream
343 // and store it in s for further tokenization
348 if (s.size() >= 10 && s.substr(s.size() - 10) == "\\end_inset") {
349 s = s.substr(0, s.size() - 10);
359 void Parser::tokenize(string const & buffer)
361 static bool init_done = false;
368 istringstream is(buffer.c_str(), ios::in | ios::binary);
373 switch (catcode(c)) {
377 if (catcode(c) == catNewline)
378 ; //push_back(Token("par"));
380 push_back(Token(' ', catSpace));
387 while (is.get(c) && catcode(c) != catNewline)
396 if (catcode(c) == catLetter) {
397 while (is.get(c) && catcode(c) == catLetter)
399 if (catcode(c) == catSpace)
400 while (is.get(c) && catcode(c) == catSpace)
409 push_back(Token(c, catcode(c)));
414 lyxerr << "\nTokens: ";
415 for (unsigned i = 0; i < tokens_.size(); ++i)
416 lyxerr << tokens_[i];
422 void Parser::error(string const & msg)
424 lyxerr << "Line ~" << lineno_ << ": Math parse error: " << msg << endl;
429 void Parser::parse_lines(MathGridInset * p, bool numbered, bool outmost)
431 const int cols = p->ncols();
433 // save global variables
434 bool const saved_num = curr_num_;
435 string const saved_label = curr_label_;
437 for (int row = 0; true; ++row) {
438 // reset global variables
439 curr_num_ = numbered;
443 for (int col = 0; col < cols; ++col) {
444 //lyxerr << "reading cell " << row << " " << col << "\n";
445 parse_into(p->cell(col + row * cols), FLAG_BLOCK);
448 if (prevToken().cat() != catAlign) {
449 //lyxerr << "less cells read than normal in row/col: "
450 // << row << " " << col << "\n";
456 MathMatrixInset * m = static_cast<MathMatrixInset *>(p);
457 m->numbered(row, curr_num_);
458 m->label(row, curr_label_);
459 if (curr_skip_.size()) {
460 m->vskip(LyXLength(curr_skip_), row);
466 if (prevToken() != Token("\\")) {
467 //lyxerr << "no newline here\n";
474 // restore "global" variables
475 curr_num_ = saved_num;
476 curr_label_ = saved_label;
480 MathMacroTemplate * Parser::parse_macro()
482 while (nextToken().cat() == catSpace)
485 if (getToken().cs() != "newcommand") {
486 lyxerr << "\\newcommand expected\n";
490 if (getToken().cat() != catBegin) {
491 lyxerr << "'{' expected\n";
495 string name = getToken().cs();
497 if (getToken().cat() != catEnd) {
498 lyxerr << "'}' expected\n";
502 string arg = getArg('[', ']');
503 int narg = arg.empty() ? 0 : atoi(arg.c_str());
504 //lyxerr << "creating macro " << name << " with " << narg << "args\n";
505 MathMacroTemplate * p = new MathMacroTemplate(name, narg);
506 parse_into(p->cell(0), FLAG_BRACE | FLAG_BRACE_LAST);
511 MathMatrixInset * Parser::parse_normal()
513 while (nextToken().cat() == catSpace)
516 Token const & t = getToken();
518 if (t.cat() == catMath || t.cs() == "(") {
519 MathMatrixInset * p = new MathMatrixInset(LM_OT_SIMPLE);
520 parse_into(p->cell(0), 0);
524 if (!t.cs().size()) {
525 lyxerr << "start of math expected, got '" << t << "'\n";
529 string const & cs = t.cs();
534 MathMatrixInset * p = new MathMatrixInset(LM_OT_EQUATION);
535 parse_into(p->cell(0), 0);
536 p->numbered(0, curr_num_);
537 p->label(0, curr_label_);
542 lyxerr << "'begin' of un-simple math expected, got '" << cs << "'\n";
546 string const name = getArg('{', '}');
548 if (name == "equation" || name == "equation*") {
549 curr_num_ = !stared(name);
551 MathMatrixInset * p = new MathMatrixInset(LM_OT_EQUATION);
552 parse_into(p->cell(0), FLAG_END);
553 p->numbered(0, curr_num_);
554 p->label(0, curr_label_);
558 if (name == "eqnarray" || name == "eqnarray*") {
559 MathMatrixInset * p = new MathMatrixInset(LM_OT_EQNARRAY);
560 parse_lines(p, !stared(name), true);
564 if (name == "align" || name == "align*") {
565 MathMatrixInset * p = new MathMatrixInset(LM_OT_ALIGN);
566 parse_lines(p, !stared(name), true);
570 if (name == "alignat" || name == "alignat*") {
571 MathMatrixInset * p =
572 new MathMatrixInset(LM_OT_ALIGNAT, 2 * atoi(getArg('{', '}').c_str()));
573 parse_lines(p, !stared(name), true);
577 if (name == "xalignat" || name == "xalignat*") {
578 MathMatrixInset * p =
579 new MathMatrixInset(LM_OT_XALIGNAT, 2 * atoi(getArg('{', '}').c_str()));
580 parse_lines(p, !stared(name), true);
584 if (name == "xxalignat") {
585 MathMatrixInset * p =
586 new MathMatrixInset(LM_OT_XXALIGNAT, 2 * atoi(getArg('{', '}').c_str()));
587 parse_lines(p, !stared(name), true);
591 if (name == "multline" || name == "multline*") {
592 MathMatrixInset * p = new MathMatrixInset(LM_OT_MULTLINE);
593 parse_lines(p, !stared(name), true);
597 if (name == "gather" || name == "gather*") {
598 MathMatrixInset * p = new MathMatrixInset(LM_OT_GATHER);
599 parse_lines(p, !stared(name), true);
603 lyxerr[Debug::MATHED] << "1: unknown math environment: " << name << "\n";
608 void Parser::parse_into(MathArray & array, unsigned flags, MathTextCodes code)
610 MathTextCodes yyvarcode = LM_TC_MIN;
615 Token const & t = getToken();
617 //lyxerr << "t: " << t << " flags: " << flags << "'\n";
618 //array.dump(lyxerr);
621 if (flags & FLAG_ITEM) {
623 if (t.cat() == catBegin) {
624 // skip the brace and collect everything to the next matching
626 flags |= FLAG_BRACE_LAST;
629 // handle only this single token, leave the loop if done
634 if (flags & FLAG_BRACE) {
635 if (t.cat() != catBegin) {
636 error("Expected {. Maybe you forgot to enclose an argument in {}");
640 flags &= ~FLAG_BRACE;
645 if (flags & FLAG_BLOCK) {
646 if (t.cat() == catAlign || t.cs() == "\\")
648 if (t.cs() == "end") {
657 if (t.cat() == catMath)
660 else if (t.cat() == catLetter)
661 array.push_back(new MathCharInset(t.character(), yyvarcode));
663 else if (t.cat() == catSpace &&
664 (yyvarcode == LM_TC_TEXTRM || code == LM_TC_TEXTRM))
665 array.push_back(new MathCharInset(' ', yyvarcode));
667 else if (t.cat() == catParameter) {
668 Token const & n = getToken();
669 MathMacroArgument * p = new MathMacroArgument(n.character() - '0');
673 else if (t.cat() == catBegin) {
674 array.push_back(new MathCharInset('{', LM_TC_TEX));
677 else if (t.cat() == catEnd) {
678 if (flags & FLAG_BRACE_LAST)
680 array.push_back(new MathCharInset('}', LM_TC_TEX));
683 else if (t.cat() == catAlign) {
684 lyxerr << "found tab unexpectedly, array: '" << array << "'\n";
685 array.push_back(new MathCharInset('&', LM_TC_TEX));
688 else if (t.cat() == catSuper || t.cat() == catSub) {
689 bool up = (t.cat() == catSuper);
691 array.push_back(new MathCharInset(' '));
692 parse_into(array.back().ensure(up)->cell(0), FLAG_ITEM);
695 else if (t.character() == ']' && (flags & FLAG_BRACK_END))
698 else if (t.cat() == catOther)
699 array.push_back(new MathCharInset(t.character(), yyvarcode));
704 else if (t.cs() == "protect")
707 else if (t.cs() == "end")
710 else if (t.cs() == ")")
713 else if (t.cs() == "]")
716 else if (t.cs() == "\\") {
717 curr_skip_ = getArg('[', ']');
718 if (flags & FLAG_NEWLINE)
720 lyxerr[Debug::MATHED]
721 << "found newline unexpectedly, array: '" << array << "'\n";
722 array.push_back(createMathInset("\\"));
725 else if (t.cs() == "limits" && array.size())
726 array.back().limits(1);
728 else if (t.cs() == "nolimits" && array.size())
729 array.back().limits(-1);
731 else if (t.cs() == "nonumber")
734 else if (t.cs() == "number")
737 else if (t.cs() == "sqrt") {
740 array.push_back(new MathRootInset);
741 parse_into(array.back().nucleus()->cell(0), FLAG_BRACK_END);
742 parse_into(array.back().nucleus()->cell(1), FLAG_ITEM);
745 array.push_back(new MathSqrtInset);
746 parse_into(array.back().nucleus()->cell(0), FLAG_ITEM);
750 else if (t.cs() == "left") {
751 string l = getToken().asString();
753 parse_into(ar, FLAG_RIGHT);
754 string r = getToken().asString();
755 MathDelimInset * dl = new MathDelimInset(l, r);
760 else if (t.cs() == "right") {
761 if (!(flags & FLAG_RIGHT)) {
762 lyxerr << "got so far: '" << array << "'\n";
763 error("Unmatched right delimiter");
771 lyxerr[Debug::MATHED] << "LM_TK_STY not implemented\n";
772 //MathArray tmp = array;
773 //MathSizeInset * p = new MathSizeInset(MathStyles(lval_->id));
774 //array.push_back(p);
775 //parse_into(p->cell(0), FLAG_BRACE_FONT);
781 else if (t.cs() == "begin") {
782 string const name = getArg('{', '}');
783 if (name == "array") {
784 string const valign = getArg('[', ']') + 'c';
785 string const halign = getArg('{', '}');
786 MathArrayInset * m = new MathArrayInset(halign.size(), 1);
787 m->valign(valign[0]);
789 parse_lines(m, false, false);
791 } else if (name == "split") {
792 MathSplitInset * m = new MathSplitInset(1);
793 parse_lines(m, false, false);
796 lyxerr[Debug::MATHED] << "unknow math inset begin '" << name << "'\n";
799 else if (t.cs() == "kern") {
805 Token const & t = getToken();
811 if (isValidLength(s))
814 array.push_back(new MathKernInset(s));
817 else if (t.cs() == "label") {
819 //parse_into(ar, FLAG_ITEM);
821 //ar.write(os, true);
822 //curr_label_ = os.str();
824 curr_label_ = getArg('{', '}');
827 else if (t.cs() == "choose" || t.cs() == "over" || t.cs() == "atop") {
828 MathInset * p = createMathInset(t.cs());
829 // search backward for position of last '{' if any
831 for (pos = array.size() - 1; pos >= 0; --pos)
832 if (array.at(pos)->nucleus()->getChar() == '{')
835 // found it -> use the part after '{' as "numerator"
836 p->cell(0) = MathArray(array, pos + 1, array.size());
837 parse_into(p->cell(1), FLAG_BRACE_LAST);
838 // delete denominator and the '{'
839 array.erase(pos, array.size());
840 } else if (flags & FLAG_RIGHT) {
841 // we are inside a \left ... \right block
842 //lyxerr << "found '" << t.cs() << "' enclosed by \\left .. \\right\n";
843 p->cell(0).swap(array);
844 parse_into(p->cell(1), FLAG_RIGHT);
845 // handle the right delimiter properly
848 // not found -> use everything as "numerator"
849 p->cell(0).swap(array);
850 parse_into(p->cell(1), FLAG_BLOCK);
855 else if (t.cs().size()) {
856 latexkeys const * l = in_word_set(t.cs());
858 if (l->token == LM_TK_FONT) {
859 //lyxerr << "starting font\n";
860 //CatCode catSpaceSave = theCatcode[' '];
861 //if (l->id == LM_TC_TEXTRM) {
862 // // temporarily change catcode
863 // theCatcode[' '] = catLetter;
866 MathTextCodes t = static_cast<MathTextCodes>(l->id);
868 parse_into(ar, FLAG_ITEM, t);
869 for (MathArray::iterator it = ar.begin(); it != ar.end(); ++it)
870 it->nucleus()->handleFont(t);
873 // undo catcode changes
874 ////theCatcode[' '] = catSpaceSave;
875 //lyxerr << "ending font\n";
878 else if (l->token == LM_TK_OLDFONT)
879 yyvarcode = static_cast<MathTextCodes>(l->id);
882 MathInset * p = createMathInset(t.cs());
883 for (unsigned int i = 0; i < p->nargs(); ++i)
884 parse_into(p->cell(i), FLAG_ITEM);
890 MathInset * p = createMathInset(t.cs());
892 for (unsigned int i = 0; i < p->nargs(); ++i)
893 parse_into(p->cell(i), FLAG_ITEM);
896 error("Unrecognized token");
897 //lyxerr[Debug::MATHED] << "[" << t << "]\n";
904 if (flags & FLAG_LEAVE) {
905 flags &= ~FLAG_LEAVE;
911 lyxerr << " Math Panic, expect problems!\n";
912 // Search for the end command.
916 } while (good() && t.cs() != "end");
920 } // anonymous namespace
924 MathArray mathed_parse_cell(string const & str)
926 istringstream is(str.c_str());
929 parser.parse_into(ar, 0);
935 MathMacroTemplate * mathed_parse_macro(string const & str)
937 istringstream is(str.c_str());
939 return parser.parse_macro();
942 MathMacroTemplate * mathed_parse_macro(istream & is)
945 return parser.parse_macro();
948 MathMacroTemplate * mathed_parse_macro(LyXLex & lex)
951 return parser.parse_macro();
956 MathMatrixInset * mathed_parse_normal(string const & str)
958 istringstream is(str.c_str());
960 return parser.parse_normal();
963 MathMatrixInset * mathed_parse_normal(istream & is)
966 return parser.parse_normal();
969 MathMatrixInset * mathed_parse_normal(LyXLex & lex)
972 return parser.parse_normal();