3 * Purpose: Parser for mathed
4 * Author: Alejandro Aguilar Sierra <asierra@servidor.unam.mx>
5 * Created: January 1996
6 * Description: Parse LaTeX2e math mode code.
8 * Dependencies: Xlib, XForms
10 * Copyright: 1996, Alejandro Aguilar Sierra
14 * You are free to use and modify this code under the terms of
15 * the GNU General Public Licence version 2 or later.
23 #pragma implementation
26 #include "math_parser.h"
28 #include "math_inset.h"
29 #include "math_arrayinset.h"
30 #include "math_charinset.h"
31 #include "math_deliminset.h"
32 #include "math_factory.h"
33 #include "math_funcinset.h"
34 #include "math_macro.h"
35 #include "math_macrotable.h"
36 #include "math_macrotemplate.h"
37 #include "math_matrixinset.h"
38 #include "math_rootinset.h"
39 #include "math_scopeinset.h"
40 #include "math_sqrtinset.h"
41 #include "math_scriptinset.h"
42 #include "math_sqrtinset.h"
46 #include "support/lstrings.h"
56 bool stared(string const & s)
58 unsigned n = s.size();
59 return n && s[n - 1] == '*';
62 MathScriptInset * prevScriptInset(MathArray const & array)
64 MathInset * p = array.back();
65 return (p && p->isScriptInset()) ? static_cast<MathScriptInset *>(p) : 0;
69 MathInset * lastScriptInset(MathArray & array, bool up, int limits)
71 MathScriptInset * p = prevScriptInset(array);
73 MathInset * b = array.back();
74 if (b && b->isScriptable()) {
75 p = new MathScriptInset(up, !up, b->clone());
78 p = new MathScriptInset(up, !up);
92 // These are TeX's catcodes
94 catEscape, // 0 backslash
104 catSpace, // 10 space
105 catLetter, // 11 a-zA-Z
106 catOther, // 12 none of the above
109 catInvalid // 15 <delete>
112 CatCode theCatcode[256];
115 inline CatCode catcode(unsigned char c)
117 return theCatcode[c];
122 FLAG_BRACE = 1 << 0, // an opening brace needed
123 FLAG_BRACE_LAST = 1 << 1, // last closing brace ends the parsing process
124 FLAG_RIGHT = 1 << 2, // next \\right ends the parsing process
125 FLAG_END = 1 << 3, // next \\end ends the parsing process
126 FLAG_BRACK_END = 1 << 4, // next closing bracket ends the parsing process
127 FLAG_NEWLINE = 1 << 6, // next \\\\ ends the parsing process
128 FLAG_ITEM = 1 << 7, // read a (possibly braced token)
129 FLAG_BLOCK = 1 << 8, // next block ends the parsing process
130 FLAG_LEAVE = 1 << 9 // leave the loop at the end
136 for (int i = 0; i <= 255; ++i)
137 theCatcode[i] = catOther;
138 for (int i = 'a'; i <= 'z'; ++i)
139 theCatcode[i] = catLetter;
140 for (int i = 'A'; i <= 'Z'; ++i)
141 theCatcode[i] = catLetter;
143 theCatcode['\\'] = catEscape;
144 theCatcode['{'] = catBegin;
145 theCatcode['}'] = catEnd;
146 theCatcode['$'] = catMath;
147 theCatcode['&'] = catAlign;
148 theCatcode['\n'] = catNewline;
149 theCatcode['#'] = catParameter;
150 theCatcode['^'] = catSuper;
151 theCatcode['_'] = catSub;
152 theCatcode['
\7f'] = catIgnore;
153 theCatcode[' '] = catSpace;
154 theCatcode['\t'] = catSpace;
155 theCatcode['\r'] = catSpace;
156 theCatcode['~'] = catActive;
157 theCatcode['%'] = catComment;
163 // Helper class for parsing
169 Token() : cs_(), char_(0), cat_(catIgnore) {}
171 Token(char c, CatCode cat) : cs_(), char_(c), cat_(cat) {}
173 Token(const string & cs) : cs_(cs), char_(0), cat_(catIgnore) {}
176 string const & cs() const { return cs_; }
178 CatCode cat() const { return cat_; }
180 char character() const { return char_; }
182 string asString() const;
193 string Token::asString() const
195 return cs_.size() ? cs_ : string(1, char_);
198 bool operator==(Token const & s, Token const & t)
200 return s.character() == t.character()
201 && s.cat() == t.cat() && s.cs() == t.cs();
204 bool operator!=(Token const & s, Token const & t)
209 ostream & operator<<(ostream & os, Token const & t)
212 os << "\\" << t.cs();
214 os << "[" << t.character() << "," << t.cat() << "]";
223 Parser(LyXLex & lex);
225 Parser(istream & is);
228 MathMacroTemplate * parse_macro();
230 MathMatrixInset * parse_normal();
232 void parse_into(MathArray & array, unsigned flags);
234 int lineno() const { return lineno_; }
240 string getArg(char lf, char rf);
244 void error(string const & msg);
246 void parse_lines(MathGridInset * p, bool numbered, bool outmost);
248 latexkeys const * read_delim();
252 void tokenize(istream & is);
254 void tokenize(string const & s);
256 void push_back(Token const & t);
260 Token const & prevToken() const;
262 Token const & nextToken() const;
264 Token const & getToken();
266 void lex(string const & s);
273 std::vector<Token> tokens_;
285 Parser::Parser(LyXLex & lexer)
286 : lineno_(lexer.getLineNo()), pos_(0), curr_num_(false)
288 tokenize(lexer.getStream());
293 Parser::Parser(istream & is)
294 : lineno_(0), pos_(0), curr_num_(false)
300 void Parser::push_back(Token const & t)
302 tokens_.push_back(t);
306 void Parser::pop_back()
312 Token const & Parser::prevToken() const
314 static const Token dummy;
315 return pos_ > 0 ? tokens_[pos_ - 1] : dummy;
319 Token const & Parser::nextToken() const
321 static const Token dummy;
322 return good() ? tokens_[pos_] : dummy;
326 Token const & Parser::getToken()
328 static const Token dummy;
329 return good() ? tokens_[pos_++] : dummy;
333 void Parser::putback()
339 bool Parser::good() const
341 return pos_ < tokens_.size();
345 char Parser::getChar()
348 lyxerr << "The input stream is not well..." << endl;
349 return tokens_[pos_++].character();
353 string Parser::getArg(char lf, char rg)
361 while ((c = getChar()) != rg && good())
368 void Parser::tokenize(istream & is)
370 // eat everything up to the next \end_inset or end of stream
371 // and store it in s for further tokenization
376 if (s.size() >= 10 && s.substr(s.size() - 10) == "\\end_inset") {
377 s = s.substr(0, s.size() - 10);
387 void Parser::tokenize(string const & buffer)
389 static bool init_done = false;
396 istringstream is(buffer, ios::in | ios::binary);
401 switch (catcode(c)) {
405 if (catcode(c) == catNewline)
406 ; //push_back(Token("par"));
408 push_back(Token(' ', catSpace));
415 while (is.get(c) && catcode(c) != catNewline)
424 if (catcode(c) == catLetter) {
425 while (is.get(c) && catcode(c) == catLetter)
427 if (catcode(c) == catSpace)
428 while (is.get(c) && catcode(c) == catSpace)
437 push_back(Token(c, catcode(c)));
442 lyxerr << "\nTokens: ";
443 for (unsigned i = 0; i < tokens_.size(); ++i)
444 lyxerr << tokens_[i];
450 void Parser::error(string const & msg)
452 lyxerr << "Line ~" << lineno_ << ": Math parse error: " << msg << endl;
456 void Parser::parse_lines(MathGridInset * p, bool numbered, bool outmost)
458 const int cols = p->ncols();
460 // save global variables
461 bool const saved_num = curr_num_;
462 string const saved_label = curr_label_;
464 for (int row = 0; true; ++row) {
465 // reset global variables
466 curr_num_ = numbered;
470 for (int col = 0; col < cols; ++col) {
471 //lyxerr << "reading cell " << row << " " << col << "\n";
472 parse_into(p->cell(col + row * cols), FLAG_BLOCK);
475 if (prevToken().cat() != catAlign) {
476 //lyxerr << "less cells read than normal in row/col: "
477 // << row << " " << col << "\n";
483 MathMatrixInset * m = static_cast<MathMatrixInset *>(p);
484 m->numbered(row, curr_num_);
485 m->label(row, curr_label_);
486 if (curr_skip_.size()) {
487 m->vskip(LyXLength(curr_skip_), row);
493 if (prevToken() != Token("\\")) {
494 //lyxerr << "no newline here\n";
501 // restore "global" variables
502 curr_num_ = saved_num;
503 curr_label_ = saved_label;
507 MathMacroTemplate * Parser::parse_macro()
509 while (nextToken().cat() == catSpace)
512 if (getToken().cs() != "newcommand") {
513 lyxerr << "\\newcommand expected\n";
517 if (getToken().cat() != catBegin) {
518 lyxerr << "'{' expected\n";
522 string name = getToken().cs();
524 if (getToken().cat() != catEnd) {
525 lyxerr << "'}' expected\n";
529 string arg = getArg('[', ']');
530 int narg = arg.empty() ? 0 : atoi(arg.c_str());
531 //lyxerr << "creating macro " << name << " with " << narg << "args\n";
532 MathMacroTemplate * p = new MathMacroTemplate(name, narg);
533 parse_into(p->cell(0), FLAG_BRACE | FLAG_BRACE_LAST);
538 MathMatrixInset * Parser::parse_normal()
540 Token const & t = getToken();
542 if (t.cat() == catMath || t.cs() == "(") {
543 MathMatrixInset * p = new MathMatrixInset(LM_OT_SIMPLE);
544 parse_into(p->cell(0), 0);
548 if (!t.cs().size()) {
549 lyxerr << "start of math expected, got '" << t << "'\n";
553 string const & cs = t.cs();
558 MathMatrixInset * p = new MathMatrixInset(LM_OT_EQUATION);
559 parse_into(p->cell(0), 0);
560 p->numbered(0, curr_num_);
561 p->label(0, curr_label_);
566 lyxerr << "'begin' of un-simple math expected, got '" << cs << "'\n";
570 string const name = getArg('{', '}');
572 if (name == "equation" || name == "equation*") {
573 curr_num_ = !stared(name);
575 MathMatrixInset * p = new MathMatrixInset(LM_OT_EQUATION);
576 parse_into(p->cell(0), FLAG_END);
577 p->numbered(0, curr_num_);
578 p->label(0, curr_label_);
582 if (name == "eqnarray" || name == "eqnarray*") {
583 MathMatrixInset * p = new MathMatrixInset(LM_OT_EQNARRAY);
584 parse_lines(p, !stared(name), true);
588 if (name == "align" || name == "align*") {
589 MathMatrixInset * p = new MathMatrixInset(LM_OT_ALIGN);
590 p->halign(getArg('{', '}'));
591 parse_lines(p, !stared(name), true);
595 if (name == "alignat" || name == "alignat*") {
596 MathMatrixInset * p = new MathMatrixInset(LM_OT_ALIGNAT);
597 p->halign(getArg('{', '}'));
598 parse_lines(p, !stared(name), true);
602 lyxerr[Debug::MATHED] << "1: unknown math environment: " << name << "\n";
607 latexkeys const * Parser::read_delim()
609 Token const & t = getToken();
610 latexkeys const * l = in_word_set(t.asString());
611 return l ? l : in_word_set(".");
615 void Parser::parse_into(MathArray & array, unsigned flags)
617 MathTextCodes yyvarcode = LM_TC_MIN;
623 Token const & t = getToken();
625 //lyxerr << "t: " << t << " flags: " << flags << "'\n";
626 //array.dump(lyxerr);
629 if (flags & FLAG_ITEM) {
631 if (t.cat() == catBegin) {
632 // skip the brace and collect everything to the next matching
634 flags |= FLAG_BRACE_LAST;
637 // handle only this single token, leave the loop if done
642 if (flags & FLAG_BRACE) {
643 if (t.cat() != catBegin) {
644 error("Expected {. Maybe you forgot to enclose an argument in {}");
648 flags &= ~FLAG_BRACE;
653 if (flags & FLAG_BLOCK) {
654 if (t.cat() == catEnd || t.cat() == catAlign || t.cs() == "\\")
656 if (t.cs() == "end") {
665 if (t.cat() == catMath)
668 else if (t.cat() == catLetter)
669 array.push_back(new MathCharInset(t.character(), yyvarcode));
671 else if (t.cat() == catSpace && yyvarcode == LM_TC_TEXTRM)
672 array.push_back(new MathCharInset(' ', yyvarcode));
674 else if (t.cat() == catParameter) {
675 Token const & n = getToken();
676 MathMacroArgument * p = new MathMacroArgument(n.character() - '0');
680 else if (t.cat() == catBegin) {
681 //lyxerr << " creating ScopeInset\n";
682 array.push_back(new MathScopeInset);
683 parse_into(array.back()->cell(0), FLAG_BRACE_LAST);
686 else if (t.cat() == catEnd) {
687 if (!(flags & FLAG_BRACE_LAST))
688 lyxerr << " ##### unexpected end of block\n";
692 else if (t.cat() == catAlign) {
693 lyxerr << "found tab unexpectedly, array: '" << array << "'\n";
697 else if (t.cat() == catSuper)
698 parse_into(lastScriptInset(array, true, limits)->cell(0), FLAG_ITEM);
700 else if (t.cat() == catSub)
701 parse_into(lastScriptInset(array, false, limits)->cell(1), FLAG_ITEM);
703 else if (t.character() == ']' && (flags & FLAG_BRACK_END))
706 else if (t.cat() == catOther)
707 array.push_back(new MathCharInset(t.character(), yyvarcode));
712 else if (t.cs() == "protect")
715 else if (t.cs() == "end")
718 else if (t.cs() == ")")
721 else if (t.cs() == "]")
724 else if (t.cs() == "\\") {
725 curr_skip_ = getArg('[', ']');
726 if (!(flags & FLAG_NEWLINE))
727 lyxerr[Debug::MATHED]
728 << "found newline unexpectedly, array: '" << array << "'\n";
732 else if (t.cs() == "limits")
735 else if (t.cs() == "nolimits")
738 else if (t.cs() == "nonumber")
741 else if (t.cs() == "number")
744 else if (t.cs() == "sqrt") {
747 array.push_back(new MathRootInset);
748 parse_into(array.back()->cell(0), FLAG_BRACK_END);
749 parse_into(array.back()->cell(1), FLAG_ITEM);
752 array.push_back(new MathSqrtInset);
753 parse_into(array.back()->cell(0), FLAG_ITEM);
757 else if (t.cs() == "left") {
758 latexkeys const * l = read_delim();
760 parse_into(ar, FLAG_RIGHT);
761 latexkeys const * r = read_delim();
762 MathDelimInset * dl = new MathDelimInset(l, r);
767 else if (t.cs() == "right") {
768 if (!(flags & FLAG_RIGHT))
769 error("Unmatched right delimiter");
776 lyxerr[Debug::MATHED] << "LM_TK_STY not implemented\n";
777 //MathArray tmp = array;
778 //MathSizeInset * p = new MathSizeInset(MathStyles(lval_->id));
779 //array.push_back(p);
780 //parse_into(p->cell(0), FLAG_BRACE_FONT);
785 if (MathMacroTable::hasTemplate(sval_)) {
786 MathMacro * m = MathMacroTable::cloneTemplate(sval_);
787 for (int i = 0; i < m->nargs(); ++i)
788 parse_into(m->cell(i), FLAG_ITEM);
790 m->metrics(LM_ST_TEXT);
792 array.push_back(new MathFuncInset(sval_));
796 array.push_back(new MathCharInset(ival_, LM_TC_SPECIAL));
800 else if (t.cs() == "begin") {
801 string const name = getArg('{', '}');
802 if (name == "array") {
803 string const valign = getArg('[', ']') + 'c';
804 string const halign = getArg('{', '}');
805 MathArrayInset * m = new MathArrayInset(halign.size(), 1);
806 m->valign(valign[0]);
808 parse_lines(m, false, false);
811 lyxerr[Debug::MATHED] << "unknow math inset begin '" << name << "'\n";
814 else if (t.cs() == "label") {
816 parse_into(ar, FLAG_ITEM);
819 curr_label_ = os.str();
821 //curr_label_ = getArg('{', '}');
824 else if (t.cs() == "choose" || t.cs() == "over" || t.cs() == "atop") {
826 MathInset * p = createMathInset(t.cs());
827 p->cell(0).swap(array);
829 parse_into(p->cell(1), FLAG_BLOCK);
832 else if (t.cs().size()) {
834 latexkeys const * l = in_word_set(t.cs());
836 if (l->token == LM_TK_FONT) {
837 //lyxerr << "starting font\n";
839 parse_into(ar, FLAG_ITEM);
840 for (MathArray::iterator it = ar.begin(); it != ar.end(); ++it)
841 (*it)->handleFont(static_cast<MathTextCodes>(l->id));
843 //lyxerr << "ending font\n";
846 else if (l->token == LM_TK_OLDFONT)
847 yyvarcode = static_cast<MathTextCodes>(l->id);
850 MathInset * p = createMathInset(t.cs());
851 for (int i = 0; i < p->nargs(); ++i)
852 parse_into(p->cell(i), FLAG_ITEM);
858 MathInset * p = createMathInset(t.cs());
860 for (int i = 0; i < p->nargs(); ++i)
861 parse_into(p->cell(i), FLAG_ITEM);
864 error("Unrecognized token");
865 //lyxerr[Debug::MATHED] << "[" << t << "]\n";
872 if (flags & FLAG_LEAVE) {
873 flags &= ~FLAG_LEAVE;
879 lyxerr << " Math Panic, expect problems!\n";
880 // Search for the end command.
884 } while (good() && t.cs() != "end");
888 } // anonymous namespace
892 MathArray mathed_parse_cell(string const & str)
894 istringstream is(str.c_str());
897 parser.parse_into(ar, 0);
903 MathMacroTemplate * mathed_parse_macro(string const & str)
905 istringstream is(str.c_str());
907 return parser.parse_macro();
910 MathMacroTemplate * mathed_parse_macro(istream & is)
913 return parser.parse_macro();
916 MathMacroTemplate * mathed_parse_macro(LyXLex & lex)
919 return parser.parse_macro();
924 MathMatrixInset * mathed_parse_normal(string const & str)
926 istringstream is(str.c_str());
928 return parser.parse_normal();
931 MathMatrixInset * mathed_parse_normal(istream & is)
934 return parser.parse_normal();
937 MathMatrixInset * mathed_parse_normal(LyXLex & lex)
940 return parser.parse_normal();