3 * Purpose: Parser for mathed
4 * Author: Alejandro Aguilar Sierra <asierra@servidor.unam.mx>
5 * Created: January 1996
6 * Description: Parse LaTeX2e math mode code.
8 * Dependencies: Xlib, XForms
10 * Copyright: 1996, Alejandro Aguilar Sierra
14 * You are free to use and modify this code under the terms of
15 * the GNU General Public Licence version 2 or later.
23 #pragma implementation
26 #include "math_parser.h"
28 #include "math_inset.h"
29 #include "math_arrayinset.h"
30 #include "math_charinset.h"
31 #include "math_deliminset.h"
32 #include "math_factory.h"
33 #include "math_funcinset.h"
34 #include "math_macro.h"
35 #include "math_macrotable.h"
36 #include "math_macrotemplate.h"
37 #include "math_matrixinset.h"
38 #include "math_rootinset.h"
39 #include "math_scopeinset.h"
40 #include "math_sqrtinset.h"
41 #include "math_scriptinset.h"
42 #include "math_sqrtinset.h"
46 #include "support/lstrings.h"
54 bool stared(string const & s)
56 unsigned n = s.size();
57 return n && s[n - 1] == '*';
60 MathScriptInset * prevScriptInset(MathArray const & array)
62 MathInset * p = array.back();
63 return (p && p->isScriptInset()) ? static_cast<MathScriptInset *>(p) : 0;
67 MathInset * lastScriptInset(MathArray & array, bool up, int limits)
69 MathScriptInset * p = prevScriptInset(array);
71 MathInset * b = array.back();
72 if (b && b->isScriptable()) {
73 p = new MathScriptInset(up, !up, b->clone());
76 p = new MathScriptInset(up, !up);
90 // These are TeX's catcodes
92 catEscape, // 0 backslash
102 catSpace, // 10 space
103 catLetter, // 11 a-zA-Z
104 catOther, // 12 none of the above
107 catInvalid // 15 <delete>
110 CatCode catcode[256];
112 const unsigned char LM_TK_OPEN = '{';
113 const unsigned char LM_TK_CLOSE = '}';
116 FLAG_BRACE = 1 << 0, // an opening brace needed
117 FLAG_BRACE_LAST = 1 << 1, // last closing brace ends the parsing process
118 FLAG_RIGHT = 1 << 2, // next \\right ends the parsing process
119 FLAG_END = 1 << 3, // next \\end ends the parsing process
120 FLAG_BRACK_END = 1 << 4, // next closing bracket ends the parsing process
121 FLAG_NEWLINE = 1 << 6, // next \\\\ ends the parsing process
122 FLAG_ITEM = 1 << 7, // read a (possibly braced token)
123 FLAG_BLOCK = 1 << 8, // next block ends the parsing process
124 FLAG_LEAVE = 1 << 9, // leave the loop at the end
130 for (int i = 0; i <= 255; ++i)
131 catcode[i] = catOther;
132 for (int i = 'a'; i <= 'z'; ++i)
133 catcode[i] = catLetter;
134 for (int i = 'A'; i <= 'Z'; ++i)
135 catcode[i] = catLetter;
137 catcode['\\'] = catEscape;
138 catcode['{'] = catBegin;
139 catcode['}'] = catEnd;
140 catcode['$'] = catMath;
141 catcode['&'] = catAlign;
142 catcode['\n'] = catNewline;
143 catcode['#'] = catParameter;
144 catcode['^'] = catSuper;
145 catcode['_'] = catSub;
146 catcode['
\7f'] = catIgnore;
147 catcode[' '] = catSpace;
148 catcode['\t'] = catSpace;
149 catcode['\r'] = catSpace;
150 catcode['~'] = catActive;
151 catcode['%'] = catComment;
157 // Helper class for parsing
163 Token() : cs_(), char_(0), cat_(catIgnore) {}
165 Token(char c, CatCode cat) : cs_(), char_(c), cat_(cat) {}
167 Token(const string & cs) : cs_(cs), char_(0), cat_(catIgnore) {}
170 bool operator==(Token const & t) const;
172 string const & cs() const { return cs_; }
174 CatCode cat() const { return cat_; }
176 char character() const { return char_; }
178 string asString() const;
189 string Token::asString() const
191 return cs_.size() ? cs_ : string(1, char_);
194 bool Token::operator==(Token const & t) const
196 return char_ == t.char_ && cat_ == t.cat_ && cs_ == t.cs_;
199 ostream & operator<<(ostream & os, Token const & t)
202 os << "\\" << t.cs();
204 os << "[" << t.character() << "," << t.cat() << "]";
213 Parser(LyXLex & lex);
215 Parser(istream & is);
218 MathMacroTemplate * parse_macro();
220 MathMatrixInset * parse_normal();
222 void parse_into(MathArray & array, unsigned flags);
224 int lineno() const { return lineno_; }
230 string getArg(char lf, char rf);
234 void error(string const & msg);
236 void parse_lines(MathGridInset * p, bool numbered, bool outmost);
238 latexkeys const * read_delim();
242 void tokenize(istream & is);
244 void tokenize(string const & s);
246 void push_back(Token const & t);
250 Token const & prevToken() const;
252 Token const & nextToken() const;
254 Token const & getToken();
256 void lex(string const & s);
263 std::vector<Token> tokens_;
275 Parser::Parser(LyXLex & lexer)
276 : lineno_(lexer.getLineNo()), pos_(0)
278 tokenize(lexer.getStream());
282 Parser::Parser(istream & is)
283 : lineno_(0), pos_(0)
289 void Parser::push_back(Token const & t)
291 tokens_.push_back(t);
295 void Parser::pop_back()
301 Token const & Parser::prevToken() const
303 static const Token dummy;
304 return pos_ > 0 ? tokens_[pos_ - 1] : dummy;
308 Token const & Parser::nextToken() const
310 static const Token dummy;
311 return good() ? tokens_[pos_] : dummy;
315 Token const & Parser::getToken()
317 static const Token dummy;
318 return good() ? tokens_[pos_++] : dummy;
322 void Parser::putback()
328 bool Parser::good() const
330 return pos_ < tokens_.size();
334 char Parser::getChar()
337 lyxerr << "The input stream is not well..." << endl;
338 return tokens_[pos_++].character();
342 string Parser::getArg(char lf, char rg)
350 while ((c = getChar()) != rg && good())
357 void Parser::tokenize(istream & is)
359 // eat everything up to the next \end_inset or end of stream
360 // and store it in s for further tokenization
365 if (s.size() >= 10 && s.substr(s.size() - 10) == "\\end_inset") {
366 s = s.substr(0, s.size() - 10);
376 void Parser::tokenize(string const & buffer)
378 static bool init_done = false;
385 istringstream is(buffer, ios::in || ios::binary);
390 switch (catcode[c]) {
394 if (catcode[c] == catNewline)
395 ; //push_back(Token("par"));
397 push_back(Token(' ',catSpace));
404 while (is.get(c) && catcode[c] != catNewline)
413 if (catcode[c] == catLetter) {
414 while (is.get(c) && catcode[c] == catLetter)
416 if (catcode[c] == catSpace)
417 while (is.get(c) && catcode[c] == catSpace)
426 push_back(Token(c, catcode[c]));
430 //lyxerr << "\nTokens: ";
431 //for (unsigned i = 0; i < tokens_.size(); ++i)
432 // lyxerr << tokens_[i];
437 void Parser::error(string const & msg)
439 lyxerr << "Line ~" << lineno_ << ": Math parse error: " << msg << endl;
443 void Parser::parse_lines(MathGridInset * p, bool numbered, bool outmost)
445 const int cols = p->ncols();
447 // save global variables
448 bool const saved_num = curr_num_;
449 string const saved_label = curr_label_;
451 for (int row = 0; true; ++row) {
452 // reset global variables
453 curr_num_ = numbered;
457 for (int col = 0; col < cols; ++col) {
458 //lyxerr << "reading cell " << row << " " << col << "\n";
459 parse_into(p->cell(col + row * cols), FLAG_BLOCK);
462 if (prevToken().cat() != catAlign) {
463 //lyxerr << "less cells read than normal in row/col: "
464 // << row << " " << col << "\n";
470 MathMatrixInset * m = static_cast<MathMatrixInset *>(p);
471 m->numbered(row, curr_num_);
472 m->label(row, curr_label_);
473 if (curr_skip_.size()) {
474 m->vskip(LyXLength(curr_skip_), row);
480 if (prevToken() != Token("\\")) {
481 //lyxerr << "no newline here\n";
488 // restore "global" variables
489 curr_num_ = saved_num;
490 curr_label_ = saved_label;
494 MathMacroTemplate * Parser::parse_macro()
496 while (nextToken().cat() == catSpace)
499 if (getToken().cs() != "newcommand") {
500 lyxerr << "\\newcommand expected\n";
504 if (getToken().cat() != catBegin) {
505 lyxerr << "'{' expected\n";
509 string name = getToken().cs();
511 if (getToken().cat() != catEnd) {
512 lyxerr << "'}' expected\n";
516 string arg = getArg('[', ']');
517 int narg = arg.empty() ? 0 : atoi(arg.c_str());
518 //lyxerr << "creating macro " << name << " with " << narg << "args\n";
519 MathMacroTemplate * p = new MathMacroTemplate(name, narg);
520 parse_into(p->cell(0), FLAG_BRACE | FLAG_BRACE_LAST);
525 MathMatrixInset * Parser::parse_normal()
527 Token const & t = getToken();
529 if (t.cat() == catMath || t.cs() == "(") {
530 MathMatrixInset * p = new MathMatrixInset(LM_OT_SIMPLE);
531 parse_into(p->cell(0), 0);
535 if (!t.cs().size()) {
536 lyxerr << "start of math expected, got '" << t << "'\n";
540 string const & cs = t.cs();
545 MathMatrixInset * p = new MathMatrixInset(LM_OT_EQUATION);
546 parse_into(p->cell(0), 0);
547 p->numbered(0, curr_num_);
548 p->label(0, curr_label_);
553 lyxerr << "'begin' of un-simple math expected, got '" << cs << "'\n";
557 string const name = getArg('{', '}');
559 if (name == "equation" || name == "equation*") {
560 curr_num_ = stared(name);
562 MathMatrixInset * p = new MathMatrixInset(LM_OT_EQUATION);
563 parse_into(p->cell(0), FLAG_END);
564 p->numbered(0, curr_num_);
565 p->label(0, curr_label_);
569 if (name == "eqnarray" || name == "eqnarray*") {
570 MathMatrixInset * p = new MathMatrixInset(LM_OT_EQNARRAY);
571 parse_lines(p, stared(name), true);
575 if (name == "align" || name == "align*") {
576 MathMatrixInset * p = new MathMatrixInset(LM_OT_ALIGN);
577 p->halign(getArg('{', '}'));
578 parse_lines(p, stared(name), true);
582 if (name == "alignat" || name == "alignat*") {
583 MathMatrixInset * p = new MathMatrixInset(LM_OT_ALIGNAT);
584 p->halign(getArg('{', '}'));
585 parse_lines(p, stared(name), true);
589 lyxerr[Debug::MATHED] << "1: unknown math environment: " << name << "\n";
594 latexkeys const * Parser::read_delim()
596 Token const & t = getToken();
597 latexkeys const * l = in_word_set(t.asString());
598 return l ? l : in_word_set(".");
602 void Parser::parse_into(MathArray & array, unsigned flags)
604 MathTextCodes yyvarcode = LM_TC_MIN;
610 Token const & t = getToken();
612 lyxerr << "t: " << t << " flags: " << flags << "'\n";
613 //array.dump(lyxerr);
616 if (flags & FLAG_ITEM) {
618 if (t.cat() == catBegin) {
619 // skip the brace and collect everything to the next matching
621 flags |= FLAG_BRACE_LAST;
624 // handle only this single token, leave the loop if done
629 if (flags & FLAG_BRACE) {
630 if (t.cat() != catBegin) {
631 error("Expected {. Maybe you forgot to enclose an argument in {}");
635 flags &= ~FLAG_BRACE;
640 if (flags & FLAG_BLOCK) {
641 if (t.cat() == catEnd || t.cat() == catAlign || t.cs() == "\\")
643 if (t.cs() == "end") {
652 if (t.cat() == catMath)
655 else if (t.cat() == catLetter)
656 array.push_back(new MathCharInset(t.character(), yyvarcode));
658 else if (t.cat() == catSpace && yyvarcode == LM_TC_TEXTRM)
659 array.push_back(new MathCharInset(' ', yyvarcode));
661 else if (t.cat() == catParameter) {
662 Token const & n = getToken();
663 MathMacroArgument * p = new MathMacroArgument(n.character() - '0');
667 else if (t.cat() == catBegin) {
668 //lyxerr << " creating ScopeInset\n";
669 array.push_back(new MathScopeInset);
670 parse_into(array.back()->cell(0), FLAG_BRACE_LAST);
673 else if (t.cat() == catEnd) {
674 if (!(flags & FLAG_BRACE_LAST))
675 lyxerr << " ##### unexpected end of block\n";
679 else if (t.cat() == catAlign) {
680 lyxerr << "found tab unexpectedly, array: '" << array << "'\n";
684 else if (t.cat() == catSuper)
685 parse_into(lastScriptInset(array, true, limits)->cell(0), FLAG_ITEM);
687 else if (t.cat() == catSub)
688 parse_into(lastScriptInset(array, false, limits)->cell(1), FLAG_ITEM);
690 else if (t.character() == ']' && (flags & FLAG_BRACK_END))
693 else if (t.cat() == catOther)
694 array.push_back(new MathCharInset(t.character(), yyvarcode));
699 else if (t.cs() == "protect")
702 else if (t.cs() == "end")
705 else if (t.cs() == ")")
708 else if (t.cs() == "]")
711 else if (t.cs() == "\\") {
712 curr_skip_ = getArg('[', ']');
713 if (!(flags & FLAG_NEWLINE))
714 lyxerr[Debug::MATHED]
715 << "found newline unexpectedly, array: '" << array << "'\n";
719 else if (t.cs() == "limits")
722 else if (t.cs() == "nolimits")
725 else if (t.cs() == "nonumber")
728 else if (t.cs() == "number")
731 else if (t.cs() == "sqrt") {
734 array.push_back(new MathRootInset);
735 parse_into(array.back()->cell(0), FLAG_BRACK_END);
736 parse_into(array.back()->cell(1), FLAG_ITEM);
739 array.push_back(new MathSqrtInset);
740 parse_into(array.back()->cell(0), FLAG_ITEM);
744 else if (t.cs() == "left") {
745 latexkeys const * l = read_delim();
747 parse_into(ar, FLAG_RIGHT);
748 latexkeys const * r = read_delim();
749 MathDelimInset * dl = new MathDelimInset(l, r);
754 else if (t.cs() == "right") {
755 if (!(flags & FLAG_RIGHT))
756 error("Unmatched right delimiter");
763 lyxerr[Debug::MATHED] << "LM_TK_STY not implemented\n";
764 //MathArray tmp = array;
765 //MathSizeInset * p = new MathSizeInset(MathStyles(lval_->id));
766 //array.push_back(p);
767 //parse_into(p->cell(0), FLAG_BRACE_FONT);
772 if (MathMacroTable::hasTemplate(sval_)) {
773 MathMacro * m = MathMacroTable::cloneTemplate(sval_);
774 for (int i = 0; i < m->nargs(); ++i)
775 parse_into(m->cell(i), FLAG_ITEM);
777 m->metrics(LM_ST_TEXT);
779 array.push_back(new MathFuncInset(sval_));
783 array.push_back(new MathCharInset(ival_, LM_TC_SPECIAL));
787 else if (t.cs() == "begin") {
788 string const name = getArg('{', '}');
789 if (name == "array") {
790 string const valign = getArg('[', ']') + 'c';
791 string const halign = getArg('{', '}');
792 MathArrayInset * m = new MathArrayInset(halign.size(), 1);
793 m->valign(valign[0]);
795 parse_lines(m, false, false);
798 lyxerr[Debug::MATHED] << "unknow math inset begin '" << name << "'\n";
801 else if (t.cs() == "label") {
803 parse_into(ar, FLAG_ITEM);
806 curr_label_ = os.str();
808 //curr_label_ = getArg('{', '}');
811 else if (t.cs() == "choose" || t.cs() == "over") {
813 MathInset * p = createMathInset(t.cs());
814 p->cell(0).swap(array);
816 parse_into(p->cell(1), FLAG_BLOCK);
819 else if (t.cs().size()) {
821 latexkeys const * l = in_word_set(t.cs());
823 if (l->token == LM_TK_FONT) {
824 //lyxerr << "starting font\n";
826 parse_into(ar, FLAG_ITEM);
827 for (MathArray::iterator it = ar.begin(); it != ar.end(); ++it)
828 (*it)->handleFont(static_cast<MathTextCodes>(l->id));
830 //lyxerr << "ending font\n";
833 else if (l->token == LM_TK_OLDFONT)
834 yyvarcode = static_cast<MathTextCodes>(l->id);
837 MathInset * p = createMathInset(t.cs());
838 for (int i = 0; i < p->nargs(); ++i)
839 parse_into(p->cell(i), FLAG_ITEM);
845 MathInset * p = createMathInset(t.cs());
847 for (int i = 0; i < p->nargs(); ++i)
848 parse_into(p->cell(i), FLAG_ITEM);
851 error("Unrecognized token");
852 //lyxerr[Debug::MATHED] << "[" << t << "]\n";
859 if (flags & FLAG_LEAVE) {
860 flags &= ~FLAG_LEAVE;
866 lyxerr << " Math Panic, expect problems!\n";
867 // Search for the end command.
871 } while (good() && t.cs() != "end");
875 } // anonymous namespace
879 MathArray mathed_parse_cell(string const & str)
881 istringstream is(str.c_str());
884 parser.parse_into(ar, 0);
890 MathMacroTemplate * mathed_parse_macro(string const & str)
892 istringstream is(str.c_str());
894 return parser.parse_macro();
897 MathMacroTemplate * mathed_parse_macro(istream & is)
900 return parser.parse_macro();
903 MathMacroTemplate * mathed_parse_macro(LyXLex & lex)
906 return parser.parse_macro();
911 MathMatrixInset * mathed_parse_normal(string const & str)
913 istringstream is(str.c_str());
915 return parser.parse_normal();
918 MathMatrixInset * mathed_parse_normal(istream & is)
921 return parser.parse_normal();
924 MathMatrixInset * mathed_parse_normal(LyXLex & lex)
927 return parser.parse_normal();