3 * Purpose: Parser for mathed
4 * Author: Alejandro Aguilar Sierra <asierra@servidor.unam.mx>
5 * Created: January 1996
6 * Description: Parse LaTeX2e math mode code.
8 * Dependencies: Xlib, XForms
10 * Copyright: 1996, Alejandro Aguilar Sierra
14 * You are free to use and modify this code under the terms of
15 * the GNU General Public Licence version 2 or later.
20 If someone desperately needs partial "structures" (such as a few cells of
21 an array inset or similar) (s)he could uses the following hack as starting
22 point to write some macros:
27 \def\makeamptab{\catcode`\&=4\relax}
28 \def\makeampletter{\catcode`\&=11\relax}
29 \def\b{\makeampletter\expandafter\makeamptab\bi}
52 #pragma implementation
55 #include "math_parser.h"
56 #include "math_inset.h"
57 #include "math_arrayinset.h"
58 #include "math_braceinset.h"
59 #include "math_charinset.h"
60 #include "math_deliminset.h"
61 #include "math_factory.h"
62 #include "math_funcinset.h"
63 #include "math_kerninset.h"
64 #include "math_macro.h"
65 #include "math_macrotable.h"
66 #include "math_macrotemplate.h"
67 #include "math_hullinset.h"
68 #include "math_rootinset.h"
69 #include "math_sqrtinset.h"
70 #include "math_scriptinset.h"
71 #include "math_specialcharinset.h"
72 #include "math_splitinset.h"
73 #include "math_sqrtinset.h"
74 #include "math_support.h"
77 #include "support/lstrings.h"
88 bool stared(string const & s)
90 unsigned n = s.size();
91 return n && s[n - 1] == '*';
95 void add(MathArray & ar, char c, MathTextCodes code)
97 ar.push_back(MathAtom(new MathCharInset(c, code)));
101 // These are TeX's catcodes
103 catEscape, // 0 backslash
113 catSpace, // 10 space
114 catLetter, // 11 a-zA-Z
115 catOther, // 12 none of the above
118 catInvalid // 15 <delete>
121 CatCode theCatcode[256];
124 inline CatCode catcode(unsigned char c)
126 return theCatcode[c];
131 FLAG_BRACE_LAST = 1 << 1, // last closing brace ends the parsing process
132 FLAG_RIGHT = 1 << 2, // next \\right ends the parsing process
133 FLAG_END = 1 << 3, // next \\end ends the parsing process
134 FLAG_BRACK_END = 1 << 4, // next closing bracket ends the parsing process
135 FLAG_ITEM = 1 << 7, // read a (possibly braced token)
136 FLAG_BLOCK = 1 << 8, // next block ends the parsing process
137 FLAG_LEAVE = 1 << 9 // leave the loop at the end
143 for (int i = 0; i <= 255; ++i)
144 theCatcode[i] = catOther;
145 for (int i = 'a'; i <= 'z'; ++i)
146 theCatcode[i] = catLetter;
147 for (int i = 'A'; i <= 'Z'; ++i)
148 theCatcode[i] = catLetter;
150 theCatcode['\\'] = catEscape;
151 theCatcode['{'] = catBegin;
152 theCatcode['}'] = catEnd;
153 theCatcode['$'] = catMath;
154 theCatcode['&'] = catAlign;
155 theCatcode['\n'] = catNewline;
156 theCatcode['#'] = catParameter;
157 theCatcode['^'] = catSuper;
158 theCatcode['_'] = catSub;
159 theCatcode['
\7f'] = catIgnore;
160 theCatcode[' '] = catSpace;
161 theCatcode['\t'] = catSpace;
162 theCatcode['\r'] = catSpace;
163 theCatcode['~'] = catActive;
164 theCatcode['%'] = catComment;
170 // Helper class for parsing
176 Token() : cs_(), char_(0), cat_(catIgnore) {}
178 Token(char c, CatCode cat) : cs_(), char_(c), cat_(cat) {}
180 Token(const string & cs) : cs_(cs), char_(0), cat_(catIgnore) {}
183 string const & cs() const { return cs_; }
185 CatCode cat() const { return cat_; }
187 char character() const { return char_; }
189 string asString() const;
200 string Token::asString() const
202 return cs_.size() ? cs_ : string(1, char_);
205 bool operator==(Token const & s, Token const & t)
207 return s.character() == t.character()
208 && s.cat() == t.cat() && s.cs() == t.cs();
211 bool operator!=(Token const & s, Token const & t)
216 ostream & operator<<(ostream & os, Token const & t)
219 os << "\\" << t.cs();
221 os << "[" << t.character() << "," << t.cat() << "]";
230 Parser(LyXLex & lex);
232 Parser(istream & is);
235 string parse_macro();
237 bool parse_normal(MathAtom &);
239 void parse_into(MathArray & array, unsigned flags, MathTextCodes = LM_TC_MIN);
241 int lineno() const { return lineno_; }
247 string getArg(char lf, char rf);
251 void error(string const & msg);
253 bool parse_lines(MathAtom & t, bool numbered, bool outmost);
257 void tokenize(istream & is);
259 void tokenize(string const & s);
261 void push_back(Token const & t);
265 Token const & prevToken() const;
267 Token const & nextToken() const;
269 Token const & getToken();
271 void lex(string const & s);
278 std::vector<Token> tokens_;
290 Parser::Parser(LyXLex & lexer)
291 : lineno_(lexer.getLineNo()), pos_(0), curr_num_(false)
293 tokenize(lexer.getStream());
298 Parser::Parser(istream & is)
299 : lineno_(0), pos_(0), curr_num_(false)
305 void Parser::push_back(Token const & t)
307 tokens_.push_back(t);
311 void Parser::pop_back()
317 Token const & Parser::prevToken() const
319 static const Token dummy;
320 return pos_ > 0 ? tokens_[pos_ - 1] : dummy;
324 Token const & Parser::nextToken() const
326 static const Token dummy;
327 return good() ? tokens_[pos_] : dummy;
331 Token const & Parser::getToken()
333 static const Token dummy;
334 return good() ? tokens_[pos_++] : dummy;
338 void Parser::putback()
344 bool Parser::good() const
346 return pos_ < tokens_.size();
350 char Parser::getChar()
353 lyxerr << "The input stream is not well..." << endl;
354 return tokens_[pos_++].character();
358 string Parser::getArg(char lf, char rg)
366 while ((c = getChar()) != rg && good())
373 void Parser::tokenize(istream & is)
375 // eat everything up to the next \end_inset or end of stream
376 // and store it in s for further tokenization
381 if (s.size() >= 10 && s.substr(s.size() - 10) == "\\end_inset") {
382 s = s.substr(0, s.size() - 10);
392 void Parser::tokenize(string const & buffer)
394 static bool init_done = false;
401 istringstream is(buffer.c_str(), ios::in | ios::binary);
406 switch (catcode(c)) {
410 if (catcode(c) == catNewline)
411 ; //push_back(Token("par"));
413 push_back(Token(' ', catSpace));
420 while (is.get(c) && catcode(c) != catNewline)
429 if (catcode(c) == catLetter) {
430 while (is.get(c) && catcode(c) == catLetter)
432 if (catcode(c) == catSpace)
433 while (is.get(c) && catcode(c) == catSpace)
442 push_back(Token(c, catcode(c)));
447 lyxerr << "\nTokens: ";
448 for (unsigned i = 0; i < tokens_.size(); ++i)
449 lyxerr << tokens_[i];
455 void Parser::error(string const & msg)
457 lyxerr << "Line ~" << lineno_ << ": Math parse error: " << msg << endl;
462 bool Parser::parse_lines(MathAtom & t, bool numbered, bool outmost)
464 MathGridInset * p = t->asGridInset();
466 lyxerr << "error in Parser::parse_lines() 1\n";
470 const int cols = p->ncols();
472 // save global variables
473 bool const saved_num = curr_num_;
474 string const saved_label = curr_label_;
476 for (int row = 0; true; ++row) {
477 // reset global variables
478 curr_num_ = numbered;
482 for (int col = 0; col < cols; ++col) {
483 //lyxerr << "reading cell " << row << " " << col << "\n";
484 parse_into(p->cell(col + row * cols), FLAG_BLOCK);
487 if (prevToken().cat() != catAlign) {
488 //lyxerr << "less cells read than normal in row/col: "
489 // << row << " " << col << "\n";
495 MathHullInset * m = t->asMatrixInset();
497 lyxerr << "error in Parser::parse_lines() 2\n";
500 m->numbered(row, curr_num_);
501 m->label(row, curr_label_);
502 if (curr_skip_.size()) {
503 m->vskip(LyXLength(curr_skip_), row);
509 if (prevToken() != Token("\\")) {
510 //lyxerr << "no newline here\n";
517 // restore "global" variables
518 curr_num_ = saved_num;
519 curr_label_ = saved_label;
525 string Parser::parse_macro()
527 string name = "{error}";
529 while (nextToken().cat() == catSpace)
532 if (getToken().cs() != "newcommand") {
533 lyxerr << "\\newcommand expected\n";
537 if (getToken().cat() != catBegin) {
538 lyxerr << "'{' in \\newcommand expected (1)\n";
542 name = getToken().cs();
544 if (getToken().cat() != catEnd) {
545 lyxerr << "'}' expected\n";
549 string arg = getArg('[', ']');
550 int narg = arg.empty() ? 0 : atoi(arg.c_str());
552 if (getToken().cat() != catBegin) {
553 lyxerr << "'{' in \\newcommand expected (2)\n";
558 parse_into(ar, FLAG_BRACE_LAST);
559 MathMacroTable::create(name, narg, ar);
564 bool Parser::parse_normal(MathAtom & matrix)
566 while (nextToken().cat() == catSpace)
569 Token const & t = getToken();
572 matrix = MathAtom(new MathHullInset(LM_OT_SIMPLE));
573 parse_into(matrix->cell(0), 0);
577 if (t.cat() == catMath) {
578 Token const & n = getToken();
579 if (n.cat() == catMath) {
580 // TeX's $$...$$ syntax for displayed math
581 matrix = MathAtom(new MathHullInset(LM_OT_EQUATION));
582 MathHullInset * p = matrix->asMatrixInset();
583 parse_into(p->cell(0), 0);
584 p->numbered(0, curr_num_);
585 p->label(0, curr_label_);
587 // simple $...$ stuff
589 matrix = MathAtom(new MathHullInset(LM_OT_SIMPLE));
590 parse_into(matrix->cell(0), 0);
595 if (!t.cs().size()) {
596 lyxerr << "start of math expected, got '" << t << "'\n";
600 string const & cs = t.cs();
605 matrix = MathAtom(new MathHullInset(LM_OT_EQUATION));
606 MathHullInset * p = matrix->asMatrixInset();
607 parse_into(p->cell(0), 0);
608 p->numbered(0, curr_num_);
609 p->label(0, curr_label_);
614 lyxerr << "'begin' of un-simple math expected, got '" << cs << "'\n";
618 string const name = getArg('{', '}');
620 if (name == "equation" || name == "equation*" || name == "displaymath") {
621 curr_num_ = (name == "equation");
623 matrix = MathAtom(new MathHullInset(LM_OT_EQUATION));
624 MathHullInset * p = matrix->asMatrixInset();
625 parse_into(p->cell(0), FLAG_END);
626 p->numbered(0, curr_num_);
627 p->label(0, curr_label_);
631 if (name == "eqnarray" || name == "eqnarray*") {
632 matrix = MathAtom(new MathHullInset(LM_OT_EQNARRAY));
633 return parse_lines(matrix, !stared(name), true);
636 if (name == "align" || name == "align*") {
637 matrix = MathAtom(new MathHullInset(LM_OT_ALIGN));
638 return parse_lines(matrix, !stared(name), true);
641 if (name == "alignat" || name == "alignat*") {
642 int nc = 2 * atoi(getArg('{', '}').c_str());
643 matrix = MathAtom(new MathHullInset(LM_OT_ALIGNAT, nc));
644 return parse_lines(matrix, !stared(name), true);
647 if (name == "xalignat" || name == "xalignat*") {
648 int nc = 2 * atoi(getArg('{', '}').c_str());
649 matrix = MathAtom(new MathHullInset(LM_OT_XALIGNAT, nc));
650 return parse_lines(matrix, !stared(name), true);
653 if (name == "xxalignat") {
654 int nc = 2 * atoi(getArg('{', '}').c_str());
655 matrix = MathAtom(new MathHullInset(LM_OT_XXALIGNAT, nc));
656 return parse_lines(matrix, !stared(name), true);
659 if (name == "multline" || name == "multline*") {
660 matrix = MathAtom(new MathHullInset(LM_OT_MULTLINE));
661 return parse_lines(matrix, !stared(name), true);
664 if (name == "gather" || name == "gather*") {
665 matrix = MathAtom(new MathHullInset(LM_OT_GATHER));
666 return parse_lines(matrix, !stared(name), true);
669 lyxerr[Debug::MATHED] << "1: unknown math environment: " << name << "\n";
674 void Parser::parse_into(MathArray & array, unsigned flags, MathTextCodes code)
680 Token const & t = getToken();
682 //lyxerr << "t: " << t << " flags: " << flags << "'\n";
683 //array.dump(lyxerr);
686 if (flags & FLAG_ITEM) {
688 if (t.cat() == catBegin) {
689 // skip the brace and collect everything to the next matching
691 flags |= FLAG_BRACE_LAST;
694 // handle only this single token, leave the loop if done
699 if (flags & FLAG_BLOCK) {
700 if (t.cat() == catAlign || t.cs() == "\\")
702 if (t.cs() == "end") {
711 if (t.cat() == catMath)
714 else if (t.cat() == catLetter)
715 add(array, t.character(), code);
717 else if (t.cat() == catSpace && code == LM_TC_TEXTRM)
718 add(array, t.character(), code);
720 else if (t.cat() == catParameter) {
721 Token const & n = getToken();
722 array.push_back(MathAtom(new MathMacroArgument(n.character() - '0')));
725 else if (t.cat() == catBegin) {
727 parse_into(ar, FLAG_BRACE_LAST);
728 #ifndef WITH_WARNINGS
729 #warning this might be wrong in general!
731 // ignore braces around simple items
732 if (ar.size() == 1 || (ar.size() == 2 && ar.back()->asScriptInset())) {
735 array.push_back(MathAtom(new MathBraceInset));
736 array.back()->cell(0).swap(ar);
740 else if (t.cat() == catEnd) {
741 if (flags & FLAG_BRACE_LAST)
743 //lyxerr << "found '}' unexpectedly, array: '" << array << "'\n";
744 lyxerr << "found '}' unexpectedly\n";
745 add(array, '}', LM_TC_TEX);
748 else if (t.cat() == catAlign) {
749 //lyxerr << "found tab unexpectedly, array: '" << array << "'\n";
750 lyxerr << "found tab unexpectedly\n";
751 add(array, '&', LM_TC_TEX);
754 else if (t.cat() == catSuper || t.cat() == catSub) {
755 bool up = (t.cat() == catSuper);
756 MathScriptInset * p = 0;
758 p = array.back()->asScriptInset();
759 if (!p || p->has(up)) {
760 array.push_back(MathAtom(new MathScriptInset(up)));
761 p = array.back()->asScriptInset();
764 parse_into(p->cell(up), FLAG_ITEM);
769 else if (t.character() == ']' && (flags & FLAG_BRACK_END))
772 else if (t.cat() == catOther)
773 add(array, t.character(), code);
778 else if (t.cs() == "protect")
779 // ignore \\protect, will be re-added during output
782 else if (t.cs() == "end")
785 else if (t.cs() == ")")
788 else if (t.cs() == "]")
791 else if (t.cs() == "\\") {
792 curr_skip_ = getArg('[', ']');
793 //lyxerr << "found newline unexpectedly, array: '" << array << "'\n";
794 lyxerr << "found newline unexpectedly\n";
795 array.push_back(createMathInset("\\"));
798 else if (t.cs() == "limits")
801 else if (t.cs() == "nolimits")
804 else if (t.cs() == "nonumber")
807 else if (t.cs() == "number")
810 else if (t.cs() == "sqrt") {
813 array.push_back(MathAtom(new MathRootInset));
814 parse_into(array.back()->cell(0), FLAG_BRACK_END);
815 parse_into(array.back()->cell(1), FLAG_ITEM);
818 array.push_back(MathAtom(new MathSqrtInset));
819 parse_into(array.back()->cell(0), FLAG_ITEM);
823 else if (t.cs() == "left") {
824 string l = getToken().asString();
826 parse_into(ar, FLAG_RIGHT);
827 string r = getToken().asString();
828 MathAtom dl(new MathDelimInset(l, r));
833 else if (t.cs() == "right") {
834 if (!(flags & FLAG_RIGHT)) {
835 //lyxerr << "got so far: '" << array << "'\n";
836 error("Unmatched right delimiter");
844 lyxerr[Debug::MATHED] << "LM_TK_STY not implemented\n";
845 //MathArray tmp = array;
846 //MathSizeInset * p = new MathSizeInset(MathStyles(lval_->id));
847 //array.push_back(p);
848 //parse_into(p->cell(0), FLAG_BRACE_FONT);
854 else if (t.cs() == "begin") {
855 string const name = getArg('{', '}');
856 if (name == "array") {
857 string const valign = getArg('[', ']') + 'c';
858 string const halign = getArg('{', '}');
860 MathAtom(new MathArrayInset(halign.size(), 1, valign[0], halign)));
861 parse_lines(array.back(), false, false);
862 } else if (name == "split") {
863 array.push_back(MathAtom(new MathSplitInset(1)));
864 parse_lines(array.back(), false, false);
866 lyxerr[Debug::MATHED] << "unknow math inset begin '" << name << "'\n";
869 else if (t.cs() == "kern") {
875 Token const & t = getToken();
881 if (isValidLength(s))
884 array.push_back(MathAtom(new MathKernInset(s)));
887 else if (t.cs() == "label") {
888 curr_label_ = getArg('{', '}');
891 else if (t.cs() == "choose" || t.cs() == "over" || t.cs() == "atop") {
892 MathAtom p = createMathInset(t.cs());
893 array.swap(p->cell(0));
894 parse_into(p->cell(1), flags, code);
901 else if (t.cs() == "mbox") {
902 array.push_back(createMathInset(t.cs()));
903 // slurp in the argument of mbox
905 MathBoxInset * p = array.back()->asBoxInset();
910 else if (t.cs().size()) {
911 latexkeys const * l = in_word_set(t.cs());
913 if (l->token == LM_TK_FONT) {
914 //lyxerr << "starting font\n";
915 //CatCode catSpaceSave = theCatcode[' '];
916 //if (l->id == LM_TC_TEXTRM) {
917 // // temporarily change catcode
918 // theCatcode[' '] = catLetter;
922 parse_into(ar, FLAG_ITEM, static_cast<MathTextCodes>(l->id));
925 // undo catcode changes
926 ////theCatcode[' '] = catSpaceSave;
927 //lyxerr << "ending font\n";
930 else if (l->token == LM_TK_OLDFONT) {
931 code = static_cast<MathTextCodes>(l->id);
935 MathAtom p = createMathInset(t.cs());
936 for (MathInset::idx_type i = 0; i < p->nargs(); ++i)
937 parse_into(p->cell(i), FLAG_ITEM);
943 MathAtom p = createMathInset(t.cs());
944 for (MathInset::idx_type i = 0; i < p->nargs(); ++i)
945 parse_into(p->cell(i), FLAG_ITEM);
951 if (flags & FLAG_LEAVE) {
952 flags &= ~FLAG_LEAVE;
958 lyxerr << " Math Panic, expect problems!\n";
959 // Search for the end command.
963 } while (good() && t.cs() != "end");
969 } // anonymous namespace
972 void mathed_parse_cell(MathArray & ar, string const & str)
974 istringstream is(str.c_str());
975 mathed_parse_cell(ar, is);
979 void mathed_parse_cell(MathArray & ar, istream & is)
981 Parser(is).parse_into(ar, 0);
986 string mathed_parse_macro(string const & str)
988 istringstream is(str.c_str());
990 return parser.parse_macro();
993 string mathed_parse_macro(istream & is)
996 return parser.parse_macro();
999 string mathed_parse_macro(LyXLex & lex)
1002 return parser.parse_macro();
1007 bool mathed_parse_normal(MathAtom & t, string const & str)
1009 istringstream is(str.c_str());
1011 return parser.parse_normal(t);
1014 bool mathed_parse_normal(MathAtom & t, istream & is)
1017 return parser.parse_normal(t);
1020 bool mathed_parse_normal(MathAtom & t, LyXLex & lex)
1023 return parser.parse_normal(t);