3 * Purpose: Parser for mathed
4 * Author: Alejandro Aguilar Sierra <asierra@servidor.unam.mx>
5 * Created: January 1996
6 * Description: Parse LaTeX2e math mode code.
8 * Dependencies: Xlib, XForms
10 * Copyright: 1996, Alejandro Aguilar Sierra
14 * You are free to use and modify this code under the terms of
15 * the GNU General Public Licence version 2 or later.
23 #pragma implementation
26 #include "math_parser.h"
28 #include "math_inset.h"
29 #include "math_arrayinset.h"
30 #include "math_charinset.h"
31 #include "math_deliminset.h"
32 #include "math_factory.h"
33 #include "math_funcinset.h"
34 #include "math_kerninset.h"
35 #include "math_macro.h"
36 #include "math_macrotable.h"
37 #include "math_macrotemplate.h"
38 #include "math_matrixinset.h"
39 #include "math_rootinset.h"
40 #include "math_sqrtinset.h"
41 #include "math_scriptinset.h"
42 #include "math_specialcharinset.h"
43 #include "math_sqrtinset.h"
47 #include "support/lstrings.h"
57 bool stared(string const & s)
59 unsigned n = s.size();
60 return n && s[n - 1] == '*';
63 MathScriptInset * prevScriptInset(MathArray const & array)
65 MathInset * p = array.back();
66 return (p && p->isScriptInset()) ? static_cast<MathScriptInset *>(p) : 0;
70 MathInset * lastScriptInset(MathArray & array, bool up, int limits)
72 MathScriptInset * p = prevScriptInset(array);
74 MathInset * b = array.back();
75 if (b && b->isScriptable()) {
76 p = new MathScriptInset(up, !up, b->clone());
79 p = new MathScriptInset(up, !up);
93 // These are TeX's catcodes
95 catEscape, // 0 backslash
105 catSpace, // 10 space
106 catLetter, // 11 a-zA-Z
107 catOther, // 12 none of the above
110 catInvalid // 15 <delete>
113 CatCode theCatcode[256];
116 inline CatCode catcode(unsigned char c)
118 return theCatcode[c];
123 FLAG_BRACE = 1 << 0, // an opening brace needed
124 FLAG_BRACE_LAST = 1 << 1, // last closing brace ends the parsing process
125 FLAG_RIGHT = 1 << 2, // next \\right ends the parsing process
126 FLAG_END = 1 << 3, // next \\end ends the parsing process
127 FLAG_BRACK_END = 1 << 4, // next closing bracket ends the parsing process
128 FLAG_NEWLINE = 1 << 6, // next \\\\ ends the parsing process
129 FLAG_ITEM = 1 << 7, // read a (possibly braced token)
130 FLAG_BLOCK = 1 << 8, // next block ends the parsing process
131 FLAG_LEAVE = 1 << 9 // leave the loop at the end
137 for (int i = 0; i <= 255; ++i)
138 theCatcode[i] = catOther;
139 for (int i = 'a'; i <= 'z'; ++i)
140 theCatcode[i] = catLetter;
141 for (int i = 'A'; i <= 'Z'; ++i)
142 theCatcode[i] = catLetter;
144 theCatcode['\\'] = catEscape;
145 theCatcode['{'] = catBegin;
146 theCatcode['}'] = catEnd;
147 theCatcode['$'] = catMath;
148 theCatcode['&'] = catAlign;
149 theCatcode['\n'] = catNewline;
150 theCatcode['#'] = catParameter;
151 theCatcode['^'] = catSuper;
152 theCatcode['_'] = catSub;
153 theCatcode['
\7f'] = catIgnore;
154 theCatcode[' '] = catSpace;
155 theCatcode['\t'] = catSpace;
156 theCatcode['\r'] = catSpace;
157 theCatcode['~'] = catActive;
158 theCatcode['%'] = catComment;
164 // Helper class for parsing
170 Token() : cs_(), char_(0), cat_(catIgnore) {}
172 Token(char c, CatCode cat) : cs_(), char_(c), cat_(cat) {}
174 Token(const string & cs) : cs_(cs), char_(0), cat_(catIgnore) {}
177 string const & cs() const { return cs_; }
179 CatCode cat() const { return cat_; }
181 char character() const { return char_; }
183 string asString() const;
194 string Token::asString() const
196 return cs_.size() ? cs_ : string(1, char_);
199 bool operator==(Token const & s, Token const & t)
201 return s.character() == t.character()
202 && s.cat() == t.cat() && s.cs() == t.cs();
205 bool operator!=(Token const & s, Token const & t)
210 ostream & operator<<(ostream & os, Token const & t)
213 os << "\\" << t.cs();
215 os << "[" << t.character() << "," << t.cat() << "]";
224 Parser(LyXLex & lex);
226 Parser(istream & is);
229 MathMacroTemplate * parse_macro();
231 MathMatrixInset * parse_normal();
233 void parse_into(MathArray & array, unsigned flags, MathTextCodes = LM_TC_MIN);
235 int lineno() const { return lineno_; }
241 string getArg(char lf, char rf);
245 void error(string const & msg);
247 void parse_lines(MathGridInset * p, bool numbered, bool outmost);
249 latexkeys const * read_delim();
253 void tokenize(istream & is);
255 void tokenize(string const & s);
257 void push_back(Token const & t);
261 Token const & prevToken() const;
263 Token const & nextToken() const;
265 Token const & getToken();
267 void lex(string const & s);
274 std::vector<Token> tokens_;
286 Parser::Parser(LyXLex & lexer)
287 : lineno_(lexer.getLineNo()), pos_(0), curr_num_(false)
289 tokenize(lexer.getStream());
294 Parser::Parser(istream & is)
295 : lineno_(0), pos_(0), curr_num_(false)
301 void Parser::push_back(Token const & t)
303 tokens_.push_back(t);
307 void Parser::pop_back()
313 Token const & Parser::prevToken() const
315 static const Token dummy;
316 return pos_ > 0 ? tokens_[pos_ - 1] : dummy;
320 Token const & Parser::nextToken() const
322 static const Token dummy;
323 return good() ? tokens_[pos_] : dummy;
327 Token const & Parser::getToken()
329 static const Token dummy;
330 return good() ? tokens_[pos_++] : dummy;
334 void Parser::putback()
340 bool Parser::good() const
342 return pos_ < tokens_.size();
346 char Parser::getChar()
349 lyxerr << "The input stream is not well..." << endl;
350 return tokens_[pos_++].character();
354 string Parser::getArg(char lf, char rg)
362 while ((c = getChar()) != rg && good())
369 void Parser::tokenize(istream & is)
371 // eat everything up to the next \end_inset or end of stream
372 // and store it in s for further tokenization
377 if (s.size() >= 10 && s.substr(s.size() - 10) == "\\end_inset") {
378 s = s.substr(0, s.size() - 10);
388 void Parser::tokenize(string const & buffer)
390 static bool init_done = false;
397 istringstream is(buffer, ios::in | ios::binary);
402 switch (catcode(c)) {
406 if (catcode(c) == catNewline)
407 ; //push_back(Token("par"));
409 push_back(Token(' ', catSpace));
416 while (is.get(c) && catcode(c) != catNewline)
425 if (catcode(c) == catLetter) {
426 while (is.get(c) && catcode(c) == catLetter)
428 if (catcode(c) == catSpace)
429 while (is.get(c) && catcode(c) == catSpace)
438 push_back(Token(c, catcode(c)));
443 lyxerr << "\nTokens: ";
444 for (unsigned i = 0; i < tokens_.size(); ++i)
445 lyxerr << tokens_[i];
451 void Parser::error(string const & msg)
453 lyxerr << "Line ~" << lineno_ << ": Math parse error: " << msg << endl;
457 void Parser::parse_lines(MathGridInset * p, bool numbered, bool outmost)
459 const int cols = p->ncols();
461 // save global variables
462 bool const saved_num = curr_num_;
463 string const saved_label = curr_label_;
465 for (int row = 0; true; ++row) {
466 // reset global variables
467 curr_num_ = numbered;
471 for (int col = 0; col < cols; ++col) {
472 //lyxerr << "reading cell " << row << " " << col << "\n";
473 parse_into(p->cell(col + row * cols), FLAG_BLOCK);
476 if (prevToken().cat() != catAlign) {
477 //lyxerr << "less cells read than normal in row/col: "
478 // << row << " " << col << "\n";
484 MathMatrixInset * m = static_cast<MathMatrixInset *>(p);
485 m->numbered(row, curr_num_);
486 m->label(row, curr_label_);
487 if (curr_skip_.size()) {
488 m->vskip(LyXLength(curr_skip_), row);
494 if (prevToken() != Token("\\")) {
495 //lyxerr << "no newline here\n";
502 // restore "global" variables
503 curr_num_ = saved_num;
504 curr_label_ = saved_label;
508 MathMacroTemplate * Parser::parse_macro()
510 while (nextToken().cat() == catSpace)
513 if (getToken().cs() != "newcommand") {
514 lyxerr << "\\newcommand expected\n";
518 if (getToken().cat() != catBegin) {
519 lyxerr << "'{' expected\n";
523 string name = getToken().cs();
525 if (getToken().cat() != catEnd) {
526 lyxerr << "'}' expected\n";
530 string arg = getArg('[', ']');
531 int narg = arg.empty() ? 0 : atoi(arg.c_str());
532 //lyxerr << "creating macro " << name << " with " << narg << "args\n";
533 MathMacroTemplate * p = new MathMacroTemplate(name, narg);
534 parse_into(p->cell(0), FLAG_BRACE | FLAG_BRACE_LAST);
539 MathMatrixInset * Parser::parse_normal()
541 while (nextToken().cat() == catSpace)
544 Token const & t = getToken();
546 if (t.cat() == catMath || t.cs() == "(") {
547 MathMatrixInset * p = new MathMatrixInset(LM_OT_SIMPLE);
548 parse_into(p->cell(0), 0);
552 if (!t.cs().size()) {
553 lyxerr << "start of math expected, got '" << t << "'\n";
557 string const & cs = t.cs();
562 MathMatrixInset * p = new MathMatrixInset(LM_OT_EQUATION);
563 parse_into(p->cell(0), 0);
564 p->numbered(0, curr_num_);
565 p->label(0, curr_label_);
570 lyxerr << "'begin' of un-simple math expected, got '" << cs << "'\n";
574 string const name = getArg('{', '}');
576 if (name == "equation" || name == "equation*") {
577 curr_num_ = !stared(name);
579 MathMatrixInset * p = new MathMatrixInset(LM_OT_EQUATION);
580 parse_into(p->cell(0), FLAG_END);
581 p->numbered(0, curr_num_);
582 p->label(0, curr_label_);
586 if (name == "eqnarray" || name == "eqnarray*") {
587 MathMatrixInset * p = new MathMatrixInset(LM_OT_EQNARRAY);
588 parse_lines(p, !stared(name), true);
592 if (name == "align" || name == "align*") {
593 MathMatrixInset * p = new MathMatrixInset(LM_OT_ALIGN);
594 p->halign(getArg('{', '}'));
595 parse_lines(p, !stared(name), true);
599 if (name == "alignat" || name == "alignat*") {
600 MathMatrixInset * p = new MathMatrixInset(LM_OT_ALIGNAT);
601 p->halign(getArg('{', '}'));
602 parse_lines(p, !stared(name), true);
606 lyxerr[Debug::MATHED] << "1: unknown math environment: " << name << "\n";
611 latexkeys const * Parser::read_delim()
613 Token const & t = getToken();
614 latexkeys const * l = in_word_set(t.asString());
615 return l ? l : in_word_set(".");
619 void Parser::parse_into(MathArray & array, unsigned flags, MathTextCodes code)
621 MathTextCodes yyvarcode = LM_TC_MIN;
627 Token const & t = getToken();
629 //lyxerr << "t: " << t << " flags: " << flags << "'\n";
630 //array.dump(lyxerr);
633 if (flags & FLAG_ITEM) {
635 if (t.cat() == catBegin) {
636 // skip the brace and collect everything to the next matching
638 flags |= FLAG_BRACE_LAST;
641 // handle only this single token, leave the loop if done
646 if (flags & FLAG_BRACE) {
647 if (t.cat() != catBegin) {
648 error("Expected {. Maybe you forgot to enclose an argument in {}");
652 flags &= ~FLAG_BRACE;
657 if (flags & FLAG_BLOCK) {
658 if (t.cat() == catAlign || t.cs() == "\\")
660 if (t.cs() == "end") {
669 if (t.cat() == catMath)
672 else if (t.cat() == catLetter)
673 array.push_back(new MathCharInset(t.character(), yyvarcode));
675 else if (t.cat() == catSpace &&
676 (yyvarcode == LM_TC_TEXTRM || code == LM_TC_TEXTRM))
677 array.push_back(new MathCharInset(' ', yyvarcode));
679 else if (t.cat() == catParameter) {
680 Token const & n = getToken();
681 MathMacroArgument * p = new MathMacroArgument(n.character() - '0');
685 else if (t.cat() == catBegin) {
686 array.push_back(new MathCharInset('{', LM_TC_TEX));
689 else if (t.cat() == catEnd) {
690 if (flags & FLAG_BRACE_LAST)
692 array.push_back(new MathCharInset('}', LM_TC_TEX));
695 else if (t.cat() == catAlign) {
696 lyxerr << "found tab unexpectedly, array: '" << array << "'\n";
697 array.push_back(new MathCharInset('&', LM_TC_TEX));
700 else if (t.cat() == catSuper)
701 parse_into(lastScriptInset(array, true, limits)->cell(0), FLAG_ITEM);
703 else if (t.cat() == catSub)
704 parse_into(lastScriptInset(array, false, limits)->cell(1), FLAG_ITEM);
706 else if (t.character() == ']' && (flags & FLAG_BRACK_END))
709 else if (t.cat() == catOther)
710 array.push_back(new MathCharInset(t.character(), yyvarcode));
715 else if (t.cs() == "protect")
718 else if (t.cs() == "end")
721 else if (t.cs() == ")")
724 else if (t.cs() == "]")
727 else if (t.cs() == "\\") {
728 curr_skip_ = getArg('[', ']');
729 if (flags & FLAG_NEWLINE)
731 lyxerr[Debug::MATHED]
732 << "found newline unexpectedly, array: '" << array << "'\n";
733 array.push_back(createMathInset("\\"));
736 else if (t.cs() == "limits")
739 else if (t.cs() == "nolimits")
742 else if (t.cs() == "nonumber")
745 else if (t.cs() == "number")
748 else if (t.cs() == "sqrt") {
751 array.push_back(new MathRootInset);
752 parse_into(array.back()->cell(0), FLAG_BRACK_END);
753 parse_into(array.back()->cell(1), FLAG_ITEM);
756 array.push_back(new MathSqrtInset);
757 parse_into(array.back()->cell(0), FLAG_ITEM);
761 else if (t.cs() == "left") {
762 latexkeys const * l = read_delim();
764 parse_into(ar, FLAG_RIGHT);
765 latexkeys const * r = read_delim();
766 MathDelimInset * dl = new MathDelimInset(l, r);
771 else if (t.cs() == "right") {
772 if (!(flags & FLAG_RIGHT))
773 error("Unmatched right delimiter");
780 lyxerr[Debug::MATHED] << "LM_TK_STY not implemented\n";
781 //MathArray tmp = array;
782 //MathSizeInset * p = new MathSizeInset(MathStyles(lval_->id));
783 //array.push_back(p);
784 //parse_into(p->cell(0), FLAG_BRACE_FONT);
789 array.push_back(new MathCharInset(ival_, LM_TC_TEX));
793 else if (t.cs() == "begin") {
794 string const name = getArg('{', '}');
795 if (name == "array") {
796 string const valign = getArg('[', ']') + 'c';
797 string const halign = getArg('{', '}');
798 MathArrayInset * m = new MathArrayInset(halign.size(), 1);
799 m->valign(valign[0]);
801 parse_lines(m, false, false);
804 lyxerr[Debug::MATHED] << "unknow math inset begin '" << name << "'\n";
807 else if (t.cs() == "kern") {
813 Token const & t = getToken();
819 if (isValidLength(s))
822 array.push_back(new MathKernInset(s));
825 else if (t.cs() == "label") {
827 //parse_into(ar, FLAG_ITEM);
829 //ar.write(os, true);
830 //curr_label_ = os.str();
832 curr_label_ = getArg('{', '}');
835 else if (t.cs() == "choose" || t.cs() == "over" || t.cs() == "atop") {
837 MathInset * p = createMathInset(t.cs());
838 // search backward for position of last '{' if any
840 for (pos = array.size() - 1; pos >= 0; --pos) {
841 MathInset * q = array.nextInset(pos);
842 if (q->getChar() == '{')
846 // found it -> use the part after '{' as "numerator", erase the '{'
847 p->cell(0) = MathArray(array, pos + 1, array.size());
848 array.erase(pos, array.size());
850 // not found -> use everything as "numerator"
851 p->cell(0).swap(array);
854 parse_into(p->cell(1), FLAG_BLOCK);
857 else if (t.cs().size()) {
859 latexkeys const * l = in_word_set(t.cs());
861 if (l->token == LM_TK_FONT) {
862 //lyxerr << "starting font\n";
863 //CatCode catSpaceSave = theCatcode[' '];
864 //if (l->id == LM_TC_TEXTRM) {
865 // // temporarily change catcode
866 // theCatcode[' '] = catLetter;
869 MathTextCodes t = static_cast<MathTextCodes>(l->id);
871 parse_into(ar, FLAG_ITEM, t);
872 for (MathArray::iterator it = ar.begin(); it != ar.end(); ++it)
873 (*it)->handleFont(t);
876 // undo catcode changes
877 ////theCatcode[' '] = catSpaceSave;
878 //lyxerr << "ending font\n";
881 else if (l->token == LM_TK_OLDFONT)
882 yyvarcode = static_cast<MathTextCodes>(l->id);
885 MathInset * p = createMathInset(t.cs());
886 for (int i = 0; i < p->nargs(); ++i)
887 parse_into(p->cell(i), FLAG_ITEM);
893 MathInset * p = createMathInset(t.cs());
895 for (int i = 0; i < p->nargs(); ++i)
896 parse_into(p->cell(i), FLAG_ITEM);
899 error("Unrecognized token");
900 //lyxerr[Debug::MATHED] << "[" << t << "]\n";
907 if (flags & FLAG_LEAVE) {
908 flags &= ~FLAG_LEAVE;
914 lyxerr << " Math Panic, expect problems!\n";
915 // Search for the end command.
919 } while (good() && t.cs() != "end");
923 } // anonymous namespace
927 MathArray mathed_parse_cell(string const & str)
929 istringstream is(str.c_str());
932 parser.parse_into(ar, 0);
938 MathMacroTemplate * mathed_parse_macro(string const & str)
940 istringstream is(str.c_str());
942 return parser.parse_macro();
945 MathMacroTemplate * mathed_parse_macro(istream & is)
948 return parser.parse_macro();
951 MathMacroTemplate * mathed_parse_macro(LyXLex & lex)
954 return parser.parse_macro();
959 MathMatrixInset * mathed_parse_normal(string const & str)
961 istringstream is(str.c_str());
963 return parser.parse_normal();
966 MathMatrixInset * mathed_parse_normal(istream & is)
969 return parser.parse_normal();
972 MathMatrixInset * mathed_parse_normal(LyXLex & lex)
975 return parser.parse_normal();