3 * Purpose: Parser for mathed
4 * Author: Alejandro Aguilar Sierra <asierra@servidor.unam.mx>
5 * Created: January 1996
6 * Description: Parse LaTeX2e math mode code.
8 * Dependencies: Xlib, XForms
10 * Copyright: 1996, Alejandro Aguilar Sierra
14 * You are free to use and modify this code under the terms of
15 * the GNU General Public Licence version 2 or later.
23 #pragma implementation
26 #include "math_parser.h"
28 #include "math_inset.h"
29 #include "math_arrayinset.h"
30 #include "math_bigopinset.h"
31 #include "math_charinset.h"
32 #include "math_dotsinset.h"
33 #include "math_decorationinset.h"
34 #include "math_deliminset.h"
35 #include "math_fracinset.h"
36 #include "math_funcinset.h"
37 #include "math_funcliminset.h"
38 #include "math_macro.h"
39 #include "math_macrotable.h"
40 #include "math_macrotemplate.h"
41 #include "math_matrixinset.h"
42 #include "math_noglyphinset.h"
43 #include "math_rootinset.h"
44 #include "math_scriptinset.h"
45 #include "math_sizeinset.h"
46 #include "math_spaceinset.h"
47 #include "math_sqrtinset.h"
48 #include "math_stackrelinset.h"
49 #include "math_symbolinset.h"
51 #include "mathed/support.h"
53 #include "support/lstrings.h"
61 MathScriptInset * prevScriptInset(MathArray const & array)
63 MathInset * p = array.back();
64 return (p && p->isScriptInset()) ? static_cast<MathScriptInset *>(p) : 0;
68 MathInset * lastScriptInset(MathArray & array, bool up, bool down, int limits)
70 MathScriptInset * p = prevScriptInset(array);
72 MathInset * b = array.back();
73 if (b && b->isScriptable()) {
74 p = new MathScriptInset(up, down, b->clone());
77 p = new MathScriptInset(up, down);
91 // These are lexical codes, not semantic
96 LexBOP, // Binary operators or relations
108 lexcode_enum lexcode[256];
111 const unsigned char LM_TK_OPEN = '{';
112 const unsigned char LM_TK_CLOSE = '}';
115 FLAG_BRACE = 1 << 0, // A { needed //}
116 FLAG_BRACE_LAST = 1 << 1, // // { Last } ends the parsing process
117 FLAG_RIGHT = 1 << 2, // Next right ends the parsing process
118 FLAG_END = 1 << 3, // Next end ends the parsing process
119 FLAG_BRACE_FONT = 1 << 4, // // { Next } closes a font
120 FLAG_BRACK_END = 1 << 5, // // [ Next ] ends the parsing process
121 FLAG_AMPERSAND = 1 << 6, // Next & ends the parsing process
122 FLAG_NEWLINE = 1 << 7, // Next \\ ends the parsing process
123 FLAG_ITEM = 1 << 8, // read a (possibly braced token)
124 FLAG_LEAVE = 1 << 9, // marker for leaving the
125 FLAG_OPTARG = 1 << 10 // reads an argument in []
129 struct latex_mathenv_type {
131 char const * basename;
137 latex_mathenv_type latex_mathenv[] = {
138 {"math", "math", LM_OT_SIMPLE, 0, 0},
139 {"equation*", "equation", LM_OT_EQUATION, 0, 0},
140 {"equation", "equation", LM_OT_EQUATION, 1, 0},
141 {"eqnarray*", "eqnarray", LM_OT_EQNARRAY, 0, 0},
142 {"eqnarray", "eqnarray", LM_OT_EQNARRAY, 1, 0},
143 {"align*", "align", LM_OT_ALIGN, 0, 1},
144 {"align", "align", LM_OT_ALIGN, 1, 1},
145 {"alignat*", "alignat", LM_OT_ALIGNAT, 0, 1},
146 {"alignat", "alignat", LM_OT_ALIGNAT, 1, 1},
147 {"multline*", "multline", LM_OT_MULTLINE, 0, 1},
148 {"multline", "multline", LM_OT_MULTLINE, 1, 1},
149 {"array", "array", LM_OT_MATRIX, 0, 1}
152 int const latex_mathenv_num = sizeof(latex_mathenv)/sizeof(latex_mathenv[0]);
158 for (int i = 0; i <= 255; ++i) {
160 lexcode[i] = LexOther;
162 lexcode[i] = LexSpace;
164 lexcode[i] = LexAlpha;
167 lexcode['\t'] = lexcode['\f'] = lexcode[' '] = LexSpace;
168 lexcode['\n'] = LexNewLine;
169 lexcode['%'] = LexComment;
170 lexcode['#'] = LexArgument;
171 lexcode['$'] = LexMath;
172 lexcode['+'] = lexcode['-'] = lexcode['*'] = lexcode['/']
173 = lexcode['<'] = lexcode['>'] = lexcode['='] = LexBOP;
175 lexcode['('] = lexcode[')'] = lexcode['|'] = lexcode['.'] =
176 lexcode['?'] = LexOther;
178 lexcode['\''] = lexcode['@'] = LexAlpha;
180 lexcode['['] = lexcode[']'] = lexcode['^'] = lexcode['_'] =
181 lexcode['&'] = LexSelf;
183 lexcode['\\'] = LexESC;
184 lexcode['{'] = LexOpen;
185 lexcode['}'] = LexClose;
191 // Helper class for parsing
198 Parser(LyXLex & lex) : is_(lex.getStream()), lineno_(lex.getLineNo()) {}
200 Parser(istream & is) : is_(is), lineno_(0) {}
203 MathMacroTemplate * parse_macro();
205 MathMatrixInset * parse_normal();
207 void parse_into(MathArray & array, unsigned flags);
209 int lineno() const { return lineno_; }
215 string lexArg(unsigned char lf, bool accept_spaces = false);
217 unsigned char getuchar();
219 void error(string const & msg);
221 void parse_lines(MathGridInset * p, int col, bool numbered, bool outmost);
223 latexkeys const * read_delim();
234 latexkeys const * lval_;
247 unsigned char Parser::getuchar()
251 lyxerr << "The input stream is not well..." << endl;
253 return static_cast<unsigned char>(c);
257 string Parser::lexArg(unsigned char lf, bool accept_spaces = false)
272 unsigned char rg = 0;
273 if (lf == '{') rg = '}';
274 if (lf == '[') rg = ']';
275 if (lf == '(') rg = ')';
277 lyxerr[Debug::MATHED] << "Math parse error: unknown bracket '"
278 << lf << "'" << endl;
284 unsigned char c = getuchar();
289 if ((!isspace(c) || (c == ' ' && accept_spaces)) && depth > 0)
291 } while (depth > 0 && is_.good());
299 static bool init_done = false;
307 unsigned char c = getuchar();
308 //lyxerr << "reading byte: '" << c << "' code: " << lexcode[c] << endl;
310 if (lexcode[c] == LexNewLine) {
313 } else if (lexcode[c] == LexComment) {
316 } while (c != '\n' && is_.good()); // eat comments
317 } else if (lexcode[c] == LexOther) {
320 } else if (lexcode[c] == LexAlpha || lexcode[c] == LexSpace) {
323 } else if (lexcode[c] == LexBOP) {
326 } else if (lexcode[c] == LexMath) {
329 } else if (lexcode[c] == LexSelf) {
331 } else if (lexcode[c] == LexArgument) {
334 return LM_TK_ARGUMENT;
335 } else if (lexcode[c] == LexOpen) {
337 } else if (lexcode[c] == LexClose) {
339 } else if (lexcode[c] == LexESC) {
341 //lyxerr << "reading second byte: '" << c << "' code: " << lexcode[c] << endl;
344 latexkeys const * l = in_word_set(s);
346 //lyxerr << "found key: " << l << endl;
347 //lyxerr << "found key name: " << l->name << endl;
348 //lyxerr << "found key token: " << l->token << endl;
353 if (lexcode[c] == LexAlpha) {
355 while (lexcode[c] == LexAlpha && is_.good()) {
359 while (lexcode[c] == LexSpace && is_.good())
361 if (lexcode[c] != LexSpace)
364 //lyxerr[Debug::MATHED] << "reading: text '" << sval_ << "'\n";
365 //lyxerr << "reading: text '" << sval_ << "'\n";
366 latexkeys const * l = in_word_set(sval_);
370 if (l->token == LM_TK_BEGIN || l->token == LM_TK_END) {
371 string name = lexArg('{');
373 while (i < latex_mathenv_num && name != latex_mathenv[i].name)
376 } else if (l->token == LM_TK_SPACE)
388 void Parser::error(string const & msg)
390 lyxerr << "Line ~" << lineno_ << ": Math parse error: " << msg << endl;
394 void Parser::parse_lines(MathGridInset * p, int col, bool numbered, bool outmost)
396 // save global variables
397 bool const saved_num = curr_num_;
398 string const saved_label = curr_label_;
400 for (int row = 0; true; ++row) {
401 // reset global variables
402 curr_num_ = numbered;
406 int idx = p->nargs() - p->ncols();
407 for (int i = 0; i < col - 1; ++i, ++idx)
408 parse_into(p->cell(idx), FLAG_AMPERSAND);
409 parse_into(p->cell(idx), FLAG_NEWLINE | FLAG_END);
412 MathMatrixInset * m = static_cast<MathMatrixInset *>(p);
413 m->numbered(row, curr_num_);
414 m->label(row, curr_label_);
415 if (curr_skip_.size()) {
416 m->vskip(LyXLength(curr_skip_), row);
431 // restore "global" variables
432 curr_num_ = saved_num;
433 curr_label_ = saved_label;
437 MathMacroTemplate * Parser::parse_macro()
439 if (yylex() != LM_TK_NEWCOMMAND) {
440 lyxerr << "\\newcommand expected\n";
444 string name = lexArg('{').substr(1);
445 string arg = lexArg('[');
446 int narg = arg.empty() ? 0 : atoi(arg.c_str());
447 MathMacroTemplate * p = new MathMacroTemplate(name, narg);
448 parse_into(p->cell(0), FLAG_BRACE | FLAG_BRACE_LAST);
453 MathMatrixInset * Parser::parse_normal()
455 MathMatrixInset * p = 0;
462 lyxerr[Debug::MATHED]
463 << "reading math environment " << i << " "
464 << latex_mathenv[i].name << "\n";
466 MathInsetTypes typ = latex_mathenv[i].typ;
467 p = new MathMatrixInset(typ);
472 curr_num_ = latex_mathenv[i].numbered;
474 parse_into(p->cell(0), 0);
475 p->numbered(0, curr_num_);
476 p->label(0, curr_label_);
480 case LM_OT_EQUATION: {
481 curr_num_ = latex_mathenv[i].numbered;
483 parse_into(p->cell(0), FLAG_END);
484 p->numbered(0, curr_num_);
485 p->label(0, curr_label_);
489 case LM_OT_EQNARRAY: {
490 parse_lines(p, 3, latex_mathenv[i].numbered, true);
495 p->halign(lexArg('{'));
496 parse_lines(p, 2, latex_mathenv[i].numbered, true);
500 case LM_OT_ALIGNAT: {
501 p->halign(lexArg('{'));
502 parse_lines(p, 2, latex_mathenv[i].numbered, true);
507 lyxerr[Debug::MATHED]
508 << "1: unknown math environment: " << typ << "\n";
515 lyxerr[Debug::MATHED]
516 << "2 unknown math environment: " << t << "\n";
523 latexkeys const * Parser::read_delim()
526 //lyxerr << "found symbol: " << ld << "\n";
527 latexkeys const * l = in_word_set(".");
534 //lyxerr << "found key 1: '" << l << "'\n";
535 //lyxerr << "found key 1: '" << l->name << "'\n";
543 //lyxerr << "found key 2: '" << l->name << "'\n";
550 //lyxerr << "found key 2: '" << l->name << "'\n";
557 void Parser::parse_into(MathArray & array, unsigned flags)
559 MathTextCodes yyvarcode = LM_TC_VAR;
567 //lyxerr << "t: " << t << " flags: " << flags << " i: " << ival_
568 // << " '" << sval_ << "'\n";
569 //array.dump(lyxerr);
572 if (flags & FLAG_ITEM) {
574 if (t == LM_TK_OPEN) {
575 // skip the brace and regard everything to the next matching
579 flags |= FLAG_BRACE_LAST;
581 // regard only this single token
586 if ((flags & FLAG_BRACE) && t != LM_TK_OPEN) {
588 "Expected {. Maybe you forgot to enclose an argument in {}");
596 if (!isspace(ival_) || yyvarcode == LM_TC_TEXTRM)
597 array.push_back(new MathCharInset(ival_, yyvarcode));
600 case LM_TK_ARGUMENT: {
601 MathMacroArgument * p = new MathMacroArgument(ival_);
602 //p->code(yyvarcode);
608 array.push_back(new MathCharInset(ival_, LM_TC_SPECIAL));
612 array.push_back(new MathCharInset(ival_, LM_TC_CONST));
617 if (flags & FLAG_BRACE)
618 flags &= ~FLAG_BRACE;
620 array.push_back(new MathCharInset('{', LM_TC_TEX));
626 error("Unmatching braces");
630 if (flags & FLAG_BRACE_FONT) {
631 yyvarcode = LM_TC_VAR;
632 flags &= ~FLAG_BRACE_FONT;
635 if (brace == 0 && (flags & FLAG_BRACE_LAST))
638 array.push_back(new MathCharInset('}', LM_TC_TEX));
642 array.push_back(new MathCharInset('[', LM_TC_CONST));
646 if (flags & FLAG_BRACK_END)
649 array.push_back(new MathCharInset(']', LM_TC_CONST));
654 lastScriptInset(array, true, false, limits)->cell(0), FLAG_ITEM);
659 lastScriptInset(array, false, true, limits)->cell(1), FLAG_ITEM);
664 //lyxerr << "setting limit to " << limits << "\n";
668 if (flags & FLAG_AMPERSAND) {
669 flags &= ~FLAG_AMPERSAND;
672 lyxerr[Debug::MATHED]
673 << "found tab unexpectedly, array: '" << array << "'\n";
678 curr_skip_ = lexArg('[');
679 if (flags & FLAG_NEWLINE) {
680 flags &= ~FLAG_NEWLINE;
683 lyxerr[Debug::MATHED]
684 << "found newline unexpectedly, array: '" << array << "'\n";
694 array.push_back(new MathNoglyphInset(lval_));
699 array.push_back(new MathBigopInset(lval_));
704 array.push_back(new MathFuncLimInset(lval_));
709 array.push_back(new MathSymbolInset(lval_));
713 array.push_back(new MathCharInset(ival_, LM_TC_BOP));
718 array.push_back(new MathSpaceInset(ival_));
722 array.push_back(new MathDotsInset(lval_));
727 MathStackrelInset * p = new MathStackrelInset;
728 parse_into(p->cell(0), FLAG_ITEM);
729 parse_into(p->cell(1), FLAG_ITEM);
736 MathFracInset * p = new MathFracInset;
737 parse_into(p->cell(0), FLAG_ITEM);
738 parse_into(p->cell(1), FLAG_ITEM);
745 unsigned char c = getuchar();
747 array.push_back(new MathRootInset);
748 parse_into(array.back()->cell(0), FLAG_BRACK_END);
749 parse_into(array.back()->cell(1), FLAG_ITEM);
752 array.push_back(new MathSqrtInset);
753 parse_into(array.back()->cell(0), FLAG_ITEM);
760 latexkeys const * l = read_delim();
762 parse_into(ar, FLAG_RIGHT);
763 latexkeys const * r = read_delim();
764 MathDelimInset * dl = new MathDelimInset(l, r);
771 if (flags & FLAG_RIGHT)
773 error("Unmatched right delimiter");
778 yyvarcode = static_cast<MathTextCodes>(lval_->id);
779 flags |= (FLAG_BRACE | FLAG_BRACE_FONT);
784 lyxerr[Debug::MATHED] << "LM_TK_STY not implemented\n";
785 //MathArray tmp = array;
786 //MathSizeInset * p = new MathSizeInset(MathStyles(lval_->id));
787 //array.push_back(p);
788 //parse_into(p->cell(0), FLAG_BRACE_FONT);
792 case LM_TK_DECORATION:
794 MathDecorationInset * p = new MathDecorationInset(lval_);
795 parse_into(p->cell(0), FLAG_ITEM);
805 array.push_back(new MathSymbolInset(lval_));
809 if (MathMacroTable::hasTemplate(sval_)) {
810 MathMacro * m = MathMacroTable::cloneTemplate(sval_);
811 for (int i = 0; i < m->nargs(); ++i)
812 parse_into(m->cell(i), FLAG_ITEM);
814 m->metrics(LM_ST_TEXT);
816 array.push_back(new MathFuncInset(sval_));
826 MathInsetTypes typ = latex_mathenv[i].typ;
828 if (typ == LM_OT_MATRIX) {
829 string const valign = lexArg('[') + 'c';
830 string const halign = lexArg('{');
831 //lyxerr << "valign: '" << valign << "'\n";
832 //lyxerr << "halign: '" << halign << "'\n";
833 MathArrayInset * m = new MathArrayInset(halign.size(), 1);
834 m->valign(valign[0]);
837 parse_lines(m, halign.size(), latex_mathenv[i].numbered, false);
839 //lyxerr << "read matrix " << *m << "\n";
842 lyxerr[Debug::MATHED] << "unknow math inset " << typ << "\n";
847 array.push_back(MathMacroTable::cloneTemplate(lval_->name));
851 curr_label_ = lexArg('{', true);
855 error("Unrecognized token");
856 lyxerr[Debug::MATHED] << "[" << t << " " << sval_ << "]" << endl;
859 } // end of big switch
861 if (flags & FLAG_LEAVE) {
862 flags &= ~FLAG_LEAVE;
867 lyxerr << " Math Panic, expect problems!" << endl;
868 // Search for the end command.
871 } while (is_.good() && t != LM_TK_END && t);
879 void parse_end(LyXLex & lex, int lineno)
881 // Update line number
882 lex.setLineNo(lineno);
884 // reading of end_inset
887 if (lex.getString() == "\\end_inset")
889 lyxerr[Debug::MATHED] << "InsetFormula::Read: Garbage before \\end_inset,"
890 " or missing \\end_inset!" << endl;
894 } // anonymous namespace
898 MathArray mathed_parse_cell(string const & str)
900 istringstream is(str.c_str());
903 parser.parse_into(ar, 0);
909 MathMacroTemplate * mathed_parse_macro(string const & str)
911 istringstream is(str.c_str());
913 return parser.parse_macro();
916 MathMacroTemplate * mathed_parse_macro(istream & is)
919 return parser.parse_macro();
922 MathMacroTemplate * mathed_parse_macro(LyXLex & lex)
925 MathMacroTemplate * p = parser.parse_macro();
926 parse_end(lex, parser.lineno());
932 MathMatrixInset * mathed_parse_normal(string const & str)
934 istringstream is(str.c_str());
936 return parser.parse_normal();
939 MathMatrixInset * mathed_parse_normal(istream & is)
942 return parser.parse_normal();
945 MathMatrixInset * mathed_parse_normal(LyXLex & lex)
948 MathMatrixInset * p = parser.parse_normal();
949 parse_end(lex, parser.lineno());