3 * Purpose: Parser for mathed
4 * Author: Alejandro Aguilar Sierra <asierra@servidor.unam.mx>
5 * Created: January 1996
6 * Description: Parse LaTeX2e math mode code.
8 * Dependencies: Xlib, XForms
10 * Copyright: 1996, Alejandro Aguilar Sierra
14 * You are free to use and modify this code under the terms of
15 * the GNU General Public Licence version 2 or later.
23 #pragma implementation
26 #include "math_parser.h"
28 #include "math_inset.h"
29 #include "math_arrayinset.h"
30 #include "math_charinset.h"
31 #include "math_deliminset.h"
32 #include "math_factory.h"
33 #include "math_fracinset.h"
34 #include "math_funcinset.h"
35 #include "math_macro.h"
36 #include "math_macrotable.h"
37 #include "math_macrotemplate.h"
38 #include "math_matrixinset.h"
39 #include "math_rootinset.h"
40 #include "math_scopeinset.h"
41 #include "math_sqrtinset.h"
42 #include "math_scriptinset.h"
43 #include "math_sqrtinset.h"
47 #include "support/lstrings.h"
55 MathScriptInset * prevScriptInset(MathArray const & array)
57 MathInset * p = array.back();
58 return (p && p->isScriptInset()) ? static_cast<MathScriptInset *>(p) : 0;
62 MathInset * lastScriptInset(MathArray & array, bool up, bool down, int limits)
64 MathScriptInset * p = prevScriptInset(array);
66 MathInset * b = array.back();
67 if (b && b->isScriptable()) {
68 p = new MathScriptInset(up, down, b->clone());
71 p = new MathScriptInset(up, down);
85 // These are lexical codes, not semantic
89 LexBOP, // Binary operators or relations
99 lexcode_enum lexcode[256];
102 const unsigned char LM_TK_OPEN = '{';
103 const unsigned char LM_TK_CLOSE = '}';
106 FLAG_BRACE = 1 << 0, // an opening brace needed
107 FLAG_BRACE_LAST = 1 << 1, // last closing brace ends the parsing process
108 FLAG_RIGHT = 1 << 2, // next right ends the parsing process
109 FLAG_END = 1 << 3, // next end ends the parsing process
110 FLAG_BRACK_END = 1 << 4, // next closing bracket ends the parsing process
111 FLAG_AMPERSAND = 1 << 5, // next & ends the parsing process
112 FLAG_NEWLINE = 1 << 6, // next \\ ends the parsing process
113 FLAG_ITEM = 1 << 7, // read a (possibly braced token)
114 FLAG_BLOCK = 1 << 8, // next block ends the parsing process
115 FLAG_LEAVE = 1 << 9, // marker for leaving the
119 struct latex_mathenv_type {
121 char const * basename;
127 latex_mathenv_type latex_mathenv[] = {
128 {"math", "math", LM_OT_SIMPLE, 0, 0},
129 {"equation*", "equation", LM_OT_EQUATION, 0, 0},
130 {"equation", "equation", LM_OT_EQUATION, 1, 0},
131 {"eqnarray*", "eqnarray", LM_OT_EQNARRAY, 0, 0},
132 {"eqnarray", "eqnarray", LM_OT_EQNARRAY, 1, 0},
133 {"align*", "align", LM_OT_ALIGN, 0, 1},
134 {"align", "align", LM_OT_ALIGN, 1, 1},
135 {"alignat*", "alignat", LM_OT_ALIGNAT, 0, 1},
136 {"alignat", "alignat", LM_OT_ALIGNAT, 1, 1},
137 {"multline*", "multline", LM_OT_MULTLINE, 0, 1},
138 {"multline", "multline", LM_OT_MULTLINE, 1, 1},
139 {"array", "array", LM_OT_MATRIX, 0, 1}
142 int const latex_mathenv_num = sizeof(latex_mathenv)/sizeof(latex_mathenv[0]);
148 for (int i = 0; i <= 255; ++i) {
150 lexcode[i] = LexOther;
152 lexcode[i] = LexSpace;
154 lexcode[i] = LexAlpha;
157 lexcode['\t'] = lexcode['\f'] = lexcode[' '] = LexSpace;
158 lexcode['\n'] = LexNewLine;
159 lexcode['%'] = LexComment;
160 lexcode['#'] = LexArgument;
161 lexcode['$'] = LexMath;
162 lexcode['+'] = lexcode['-'] = lexcode['*'] = lexcode['/']
163 = lexcode['<'] = lexcode['>'] = lexcode['='] = LexBOP;
165 lexcode['('] = lexcode[')'] = lexcode['|'] = lexcode['.'] =
166 lexcode['?'] = LexOther;
168 lexcode['\''] = lexcode['@'] = LexAlpha;
170 lexcode['['] = lexcode[']'] = lexcode['^'] = lexcode['_'] =
171 lexcode['&'] = LexSelf;
173 lexcode['{'] = LexSelf;
174 lexcode['}'] = LexSelf;
176 lexcode['\\'] = LexESC;
182 // Helper class for parsing
190 : is_(lex.getStream()), lineno_(lex.getLineNo()), putback_token_(0)
194 : is_(is), lineno_(0), putback_token_(0)
198 MathMacroTemplate * parse_macro();
200 MathMatrixInset * parse_normal();
202 void parse_into(MathArray & array, unsigned flags);
204 int lineno() const { return lineno_; }
206 void putback(int token);
212 string lexArg(unsigned char lf, bool accept_spaces = false);
214 unsigned char getuchar();
216 void error(string const & msg);
218 void parse_lines(MathGridInset * p, int col, bool numbered, bool outmost);
220 latexkeys const * read_delim();
231 latexkeys const * lval_;
247 void Parser::putback(int token)
249 putback_token_ = token;
253 unsigned char Parser::getuchar()
257 lyxerr << "The input stream is not well..." << endl;
259 return static_cast<unsigned char>(c);
263 string Parser::lexArg(unsigned char lf, bool accept_spaces = false)
278 unsigned char rg = 0;
279 if (lf == '{') rg = '}';
280 if (lf == '[') rg = ']';
281 if (lf == '(') rg = ')';
283 lyxerr[Debug::MATHED] << "Math parse error: unknown bracket '"
284 << lf << "'" << endl;
290 unsigned char c = getuchar();
295 if ((!isspace(c) || (c == ' ' && accept_spaces)) && depth > 0)
297 } while (depth > 0 && is_.good());
305 static bool init_done = false;
312 if (putback_token_) {
313 int token = putback_token_;
319 unsigned char c = getuchar();
320 //lyxerr << "reading byte: '" << c << "' code: " << lexcode[c] << endl;
322 if (lexcode[c] == LexNewLine) {
327 if (lexcode[c] == LexComment) {
330 } while (c != '\n' && is_.good()); // eat comments
333 if (lexcode[c] == LexOther) {
338 if (lexcode[c] == LexAlpha || lexcode[c] == LexSpace) {
343 if (lexcode[c] == LexBOP) {
348 if (lexcode[c] == LexMath) {
353 if (lexcode[c] == LexSelf) {
357 if (lexcode[c] == LexArgument) {
360 return LM_TK_ARGUMENT;
363 if (lexcode[c] == LexESC) {
365 //lyxerr << "reading second byte: '" << c << "' code: " << lexcode[c] << endl;
368 latexkeys const * l = in_word_set(s);
370 //lyxerr << "found key: " << l << endl;
371 //lyxerr << "found key name: " << l->name << endl;
372 //lyxerr << "found key token: " << l->token << endl;
377 if (lexcode[c] == LexAlpha) {
379 while (lexcode[c] == LexAlpha && is_.good()) {
383 while (lexcode[c] == LexSpace && is_.good())
385 if (lexcode[c] != LexSpace)
388 //lyxerr[Debug::MATHED] << "reading: text '" << sval_ << "'\n";
389 //lyxerr << "reading: text '" << sval_ << "'\n";
390 lval_ = in_word_set(sval_);
394 if (lval_->token == LM_TK_BEGIN || lval_->token == LM_TK_END) {
395 string name = lexArg('{');
397 while (i < latex_mathenv_num && name != latex_mathenv[i].name)
409 void Parser::error(string const & msg)
411 lyxerr << "Line ~" << lineno_ << ": Math parse error: " << msg << endl;
415 void Parser::parse_lines(MathGridInset * p, int col, bool numbered, bool outmost)
417 // save global variables
418 bool const saved_num = curr_num_;
419 string const saved_label = curr_label_;
421 for (int row = 0; true; ++row) {
422 // reset global variables
423 curr_num_ = numbered;
427 int idx = p->nargs() - p->ncols();
428 for (int i = 0; i < col - 1; ++i, ++idx)
429 parse_into(p->cell(idx), FLAG_AMPERSAND);
430 parse_into(p->cell(idx), FLAG_NEWLINE | FLAG_END);
433 MathMatrixInset * m = static_cast<MathMatrixInset *>(p);
434 m->numbered(row, curr_num_);
435 m->label(row, curr_label_);
436 if (curr_skip_.size()) {
437 m->vskip(LyXLength(curr_skip_), row);
452 // restore "global" variables
453 curr_num_ = saved_num;
454 curr_label_ = saved_label;
458 MathMacroTemplate * Parser::parse_macro()
460 if (yylex() != LM_TK_NEWCOMMAND) {
461 lyxerr << "\\newcommand expected\n";
465 string name = lexArg('{').substr(1);
466 string arg = lexArg('[');
467 int narg = arg.empty() ? 0 : atoi(arg.c_str());
468 MathMacroTemplate * p = new MathMacroTemplate(name, narg);
469 parse_into(p->cell(0), FLAG_BRACE | FLAG_BRACE_LAST);
474 MathMatrixInset * Parser::parse_normal()
476 MathMatrixInset * p = 0;
483 lyxerr[Debug::MATHED]
484 << "reading math environment " << i << " "
485 << latex_mathenv[i].name << "\n";
487 MathInsetTypes typ = latex_mathenv[i].typ;
488 p = new MathMatrixInset(typ);
493 curr_num_ = latex_mathenv[i].numbered;
495 parse_into(p->cell(0), 0);
496 p->numbered(0, curr_num_);
497 p->label(0, curr_label_);
501 case LM_OT_EQUATION: {
502 curr_num_ = latex_mathenv[i].numbered;
504 parse_into(p->cell(0), FLAG_END);
505 p->numbered(0, curr_num_);
506 p->label(0, curr_label_);
510 case LM_OT_EQNARRAY: {
511 parse_lines(p, 3, latex_mathenv[i].numbered, true);
516 p->halign(lexArg('{'));
517 parse_lines(p, 2, latex_mathenv[i].numbered, true);
521 case LM_OT_ALIGNAT: {
522 p->halign(lexArg('{'));
523 parse_lines(p, 2, latex_mathenv[i].numbered, true);
528 lyxerr[Debug::MATHED]
529 << "1: unknown math environment: " << typ << "\n";
536 lyxerr[Debug::MATHED]
537 << "2 unknown math environment: " << t << "\n";
544 latexkeys const * Parser::read_delim()
547 //lyxerr << "found symbol: " << ld << "\n";
548 latexkeys const * l = in_word_set(".");
555 //lyxerr << "found key 1: '" << l << "'\n";
556 //lyxerr << "found key 1: '" << l->name << "'\n";
564 //lyxerr << "found key 2: '" << l->name << "'\n";
571 //lyxerr << "found key 2: '" << l->name << "'\n";
578 void Parser::parse_into(MathArray & array, unsigned flags)
580 MathTextCodes yyvarcode = LM_TC_VAR;
587 //lyxerr << "t: " << t << " flags: " << flags << " i: " << ival_
588 // << " '" << sval_ << "'\n";
589 //array.dump(lyxerr);
592 if (flags & FLAG_ITEM) {
594 if (t == LM_TK_OPEN) {
595 // skip the brace and collect everything to the next matching
597 flags |= FLAG_BRACE_LAST;
600 // take only this single token
605 if (flags & FLAG_BRACE) {
606 if (t != LM_TK_OPEN) {
607 error("Expected {. Maybe you forgot to enclose an argument in {}");
611 flags &= ~FLAG_BRACE;
617 if (flags & FLAG_BLOCK) {
618 if (t == LM_TK_CLOSE || t == '&' ||
619 t == LM_TK_NEWLINE || t == LM_TK_END) {
633 if (!isspace(ival_) || yyvarcode == LM_TC_TEXTRM)
634 array.push_back(new MathCharInset(ival_, yyvarcode));
637 case LM_TK_ARGUMENT: {
638 MathMacroArgument * p = new MathMacroArgument(ival_);
639 //p->code(yyvarcode);
645 array.push_back(new MathCharInset(ival_, LM_TC_SPECIAL));
649 array.push_back(new MathCharInset(ival_, LM_TC_CONST));
653 //lyxerr << " creating ScopeInset\n";
654 array.push_back(new MathScopeInset);
655 parse_into(array.back()->cell(0), FLAG_BRACE_LAST);
659 if (flags & FLAG_BRACE_LAST)
664 array.push_back(new MathCharInset('[', LM_TC_CONST));
668 if (flags & FLAG_BRACK_END)
671 array.push_back(new MathCharInset(']', LM_TC_CONST));
676 lastScriptInset(array, true, false, limits)->cell(0), FLAG_ITEM);
681 lastScriptInset(array, false, true, limits)->cell(1), FLAG_ITEM);
686 //lyxerr << "setting limit to " << limits << "\n";
690 if (flags & FLAG_AMPERSAND) {
691 flags &= ~FLAG_AMPERSAND;
694 lyxerr[Debug::MATHED]
695 << "found tab unexpectedly, array: '" << array << "'\n";
700 curr_skip_ = lexArg('[');
701 if (flags & FLAG_NEWLINE) {
702 flags &= ~FLAG_NEWLINE;
705 lyxerr[Debug::MATHED]
706 << "found newline unexpectedly, array: '" << array << "'\n";
714 array.push_back(new MathCharInset(ival_, LM_TC_BOP));
719 unsigned char c = getuchar();
721 array.push_back(new MathRootInset);
722 parse_into(array.back()->cell(0), FLAG_BRACK_END);
723 parse_into(array.back()->cell(1), FLAG_ITEM);
726 array.push_back(new MathSqrtInset);
727 parse_into(array.back()->cell(0), FLAG_ITEM);
734 latexkeys const * l = read_delim();
736 parse_into(ar, FLAG_RIGHT);
737 latexkeys const * r = read_delim();
738 MathDelimInset * dl = new MathDelimInset(l, r);
745 if (flags & FLAG_RIGHT)
747 error("Unmatched right delimiter");
753 MathTextCodes t = static_cast<MathTextCodes>(lval_->id);
755 parse_into(ar, FLAG_ITEM);
756 for (MathArray::iterator it = ar.begin(); it != ar.end(); ++it)
757 (*it)->handleFont(t);
763 yyvarcode = static_cast<MathTextCodes>(lval_->id);
768 lyxerr[Debug::MATHED] << "LM_TK_STY not implemented\n";
769 //MathArray tmp = array;
770 //MathSizeInset * p = new MathSizeInset(MathStyles(lval_->id));
771 //array.push_back(p);
772 //parse_into(p->cell(0), FLAG_BRACE_FONT);
781 if (MathMacroTable::hasTemplate(sval_)) {
782 MathMacro * m = MathMacroTable::cloneTemplate(sval_);
783 for (int i = 0; i < m->nargs(); ++i)
784 parse_into(m->cell(i), FLAG_ITEM);
786 m->metrics(LM_ST_TEXT);
788 array.push_back(new MathFuncInset(sval_));
794 MathInsetTypes typ = latex_mathenv[i].typ;
796 if (typ == LM_OT_MATRIX) {
797 string const valign = lexArg('[') + 'c';
798 string const halign = lexArg('{');
799 //lyxerr << "valign: '" << valign << "'\n";
800 //lyxerr << "halign: '" << halign << "'\n";
801 MathArrayInset * m = new MathArrayInset(halign.size(), 1);
802 m->valign(valign[0]);
805 parse_lines(m, halign.size(), latex_mathenv[i].numbered, false);
807 //lyxerr << "read matrix " << *m << "\n";
810 lyxerr[Debug::MATHED] << "unknow math inset " << typ << "\n";
815 curr_label_ = lexArg('{', true);
820 MathFracInset * p = new MathFracInset;
821 p->cell(0).swap(array);
823 parse_into(p->cell(1), FLAG_BLOCK);
829 MathInset * p = createMathInset(lval_);
831 for (int i = 0; i < p->nargs(); ++i)
832 parse_into(p->cell(i), FLAG_ITEM);
835 error("Unrecognized token");
836 //lyxerr[Debug::MATHED] << "[" << t << " " << sval_ << "]\n";
837 lyxerr << "[" << t << " " << sval_ << "]\n";
840 } // end of big switch
842 if (flags & FLAG_LEAVE) {
843 flags &= ~FLAG_LEAVE;
848 lyxerr << " Math Panic, expect problems!\n";
849 // Search for the end command.
852 } while (is_.good() && t != LM_TK_END && t);
860 void parse_end(LyXLex & lex, int lineno)
862 // Update line number
863 lex.setLineNo(lineno);
865 // reading of end_inset
868 if (lex.getString() == "\\end_inset")
870 //lyxerr[Debug::MATHED] << "InsetFormula::Read: Garbage before \\end_inset,"
871 lyxerr << "InsetFormula::Read: Garbage before \\end_inset,"
872 " or missing \\end_inset!\n";
876 } // anonymous namespace
880 MathArray mathed_parse_cell(string const & str)
882 istringstream is(str.c_str());
885 parser.parse_into(ar, 0);
891 MathMacroTemplate * mathed_parse_macro(string const & str)
893 istringstream is(str.c_str());
895 return parser.parse_macro();
898 MathMacroTemplate * mathed_parse_macro(istream & is)
901 return parser.parse_macro();
904 MathMacroTemplate * mathed_parse_macro(LyXLex & lex)
907 MathMacroTemplate * p = parser.parse_macro();
908 parse_end(lex, parser.lineno());
914 MathMatrixInset * mathed_parse_normal(string const & str)
916 istringstream is(str.c_str());
918 return parser.parse_normal();
921 MathMatrixInset * mathed_parse_normal(istream & is)
924 return parser.parse_normal();
927 MathMatrixInset * mathed_parse_normal(LyXLex & lex)
930 MathMatrixInset * p = parser.parse_normal();
931 parse_end(lex, parser.lineno());