3 * Purpose: Parser for mathed
4 * Author: Alejandro Aguilar Sierra <asierra@servidor.unam.mx>
5 * Created: January 1996
6 * Description: Parse LaTeX2e math mode code.
8 * Dependencies: Xlib, XForms
10 * Copyright: 1996, Alejandro Aguilar Sierra
14 * You are free to use and modify this code under the terms of
15 * the GNU General Public Licence version 2 or later.
23 #pragma implementation
26 #include "math_parser.h"
28 #include "math_inset.h"
29 #include "math_macro.h"
30 #include "math_macrotable.h"
31 #include "math_macrotemplate.h"
32 #include "math_root.h"
33 #include "math_arrayinset.h"
34 #include "math_sqrtinset.h"
35 #include "math_matrixinset.h"
36 #include "math_accentinset.h"
37 #include "math_bigopinset.h"
38 #include "math_funcinset.h"
39 #include "math_spaceinset.h"
40 #include "math_sizeinset.h"
41 #include "math_dotsinset.h"
42 #include "math_fracinset.h"
43 #include "math_deliminset.h"
44 #include "math_decorationinset.h"
46 #include "mathed/support.h"
54 // These are lexical codes, not semantic
60 LexBOP, // Binary operators or relations
72 lexcode_enum lexcode[256];
76 char const * latex_special_chars = "#$%&_{}";
79 /// Read TeX into data, flags give stop conditions
80 void mathed_parse(MathArray & data, unsigned flags);
85 const char LM_TK_OPEN = '{';
86 const char LM_TK_CLOSE = '}';
89 FLAG_BRACE = 1 << 0, // A { needed //}
90 FLAG_BRACE_OPT = 1 << 2, // Optional {
91 FLAG_BRACE_LAST = 1 << 3, // Last } ends the parsing process
92 FLAG_BRACK_ARG = 1 << 4, // Optional [
93 FLAG_RIGHT = 1 << 5, // Next right ends the parsing process
94 FLAG_END = 1 << 6, // Next end ends the parsing process
95 FLAG_BRACE_FONT = 1 << 7, // Next } closes a font
96 FLAG_BRACK_END = 1 << 9, // Next ] ends the parsing process
97 FLAG_AMPERSAND = 1 << 10, // Next & ends the parsing process
98 FLAG_NEWLINE = 1 << 11 // Next \\ ends the parsing process
114 MathTextCodes yyvarcode;
118 struct latex_mathenv_type {
120 char const * basename;
126 latex_mathenv_type latex_mathenv[] = {
127 {"math", "math", LM_OT_SIMPLE, 0, 0},
128 {"equation*", "equation", LM_OT_EQUATION, 0, 0},
129 {"equation", "equation", LM_OT_EQUATION, 1, 0},
130 {"eqnarray*", "eqnarray", LM_OT_EQNARRAY, 0, 0},
131 {"eqnarray", "eqnarray", LM_OT_EQNARRAY, 1, 0},
132 {"align*", "align", LM_OT_ALIGN, 0, 1},
133 {"align", "align", LM_OT_ALIGN, 1, 1},
134 {"alignat*", "alignat", LM_OT_ALIGNAT, 0, 1},
135 {"alignat", "alignat", LM_OT_ALIGNAT, 1, 1},
136 {"multline*", "multline", LM_OT_MULTLINE, 0, 1},
137 {"multline", "multline", LM_OT_MULTLINE, 1, 1},
138 {"array", "array", LM_OT_MATRIX, 0, 1}
141 int const latex_mathenv_num = sizeof(latex_mathenv)/sizeof(latex_mathenv[0]);
145 void mathPrintError(string const & msg)
147 lyxerr << "Line ~" << yylineno << ": Math parse error: " << msg << endl;
153 for (int i = 0; i <= 255; ++i) {
155 lexcode[i] = LexAlpha;
157 lexcode[i] = LexDigit;
159 lexcode[i] = LexSpace;
161 lexcode[i] = LexNone;
164 lexcode['\t'] = lexcode['\f'] = lexcode[' '] = LexSpace;
165 lexcode['\n'] = LexNewLine;
166 lexcode['%'] = LexComment;
167 lexcode['#'] = LexArgument;
168 lexcode['+'] = lexcode['-'] = lexcode['*'] = lexcode['/']
169 = lexcode['<'] = lexcode['>'] = lexcode['='] = LexBOP;
171 lexcode['!'] = lexcode[','] = lexcode[':']
172 = lexcode[';'] = LexMathSpace;
174 lexcode['('] = lexcode[')'] = lexcode['|'] = lexcode['.'] =
175 lexcode['?'] = LexOther;
177 lexcode['\''] = lexcode['@'] = LexAlpha;
179 lexcode['['] = lexcode[']'] = lexcode['^'] = lexcode['_'] =
180 lexcode['&'] = LexSelf;
182 lexcode['\\'] = LexESC;
183 lexcode['{'] = LexOpen;
184 lexcode['}'] = LexClose;
188 char LexGetArg(char lf, bool accept_spaces = false)
190 while (yyis->good()) {
197 lyxerr << "Math parse error: unexpected '" << c << "'" << endl;
204 if (lf == '{') rg = '}';
205 if (lf == '[') rg = ']';
206 if (lf == '(') rg = ')';
208 lyxerr << "Math parse error: unknown bracket '" << lf << "'" << endl;
218 if ((c > ' ' || (c == ' ' && accept_spaces)) && bcnt > 0)
220 } while (bcnt > 0 && yyis->good());
228 static int init_done;
230 if (!init_done) LexInitCodes();
232 while (yyis->good()) {
236 if (yyvarcode == LM_TC_TEXTRM && c == ' ') {
239 } else if (lexcode[c] == LexNewLine) {
242 } else if (lexcode[c] == LexComment) {
245 } while (c != '\n' && yyis->good()); // eat comments
246 } else if (lexcode[c] == LexDigit
247 || lexcode[c] == LexOther
248 || lexcode[c] == LexMathSpace) {
251 } else if (lexcode[c] == LexAlpha) {
254 } else if (lexcode[c] == LexBOP) {
257 } else if (lexcode[c] == LexSelf) {
259 } else if (lexcode[c] == LexArgument) {
262 return LM_TK_ARGUMENT;
263 } else if (lexcode[c] == LexOpen) {
265 } else if (lexcode[c] == LexClose) {
267 } else if (lexcode[c] == LexESC) {
271 return LM_TK_NEWLINE;
274 yylval.i = LM_OT_SIMPLE;
278 yylval.i = LM_OT_SIMPLE;
282 yylval.i = LM_OT_EQUATION;
286 yylval.i = LM_OT_EQUATION;
289 if (contains(latex_special_chars, c)) {
291 return LM_TK_SPECIAL;
293 if (lexcode[c] == LexMathSpace) {
295 for (i = 0; i < 4 && static_cast<int>(c) != latex_mathspace[i][0]; ++i)
297 yylval.i = (i < 4) ? i : 0;
300 if (lexcode[c] == LexAlpha || lexcode[c] == LexDigit) {
302 while (lexcode[c] == LexAlpha || lexcode[c] == LexDigit) {
308 lyxerr << "reading: text '" << yytext << "'\n";
309 latexkeys const * l = in_word_set(yytext);
313 if (l->token == LM_TK_BEGIN || l->token == LM_TK_END) {
316 while (i < latex_mathenv_num && yytext != latex_mathenv[i].name)
319 } else if (l->token == LM_TK_SPACE)
331 // Accent hacks only for 0.12. Stolen from Cursor.
335 void setAccent(int ac)
337 if (ac > 0 && accent < 8)
338 nestaccent[accent++] = ac;
340 accent = 0; // consumed!
344 MathInset * doAccent(byte c, MathTextCodes t)
348 for (int i = accent - 1; i >= 0; --i) {
350 ac = new MathAccentInset(c, t, nestaccent[i]);
352 ac = new MathAccentInset(ac, nestaccent[i]);
354 accent = 0; // consumed!
360 MathInset * doAccent(MathInset * p)
364 for (int i = accent - 1; i >= 0; --i) {
366 ac = new MathAccentInset(p, nestaccent[i]);
368 ac = new MathAccentInset(ac, nestaccent[i]);
370 accent = 0; // consumed!
376 void do_insert(MathArray & dat, MathInset * m)
379 dat.push_back(doAccent(m));
384 void do_insert(MathArray & dat, byte ch, MathTextCodes fcode)
387 dat.push_back(doAccent(ch, fcode));
389 dat.push_back(ch, fcode);
393 void handle_frac(MathArray & dat, MathInsetTypes t)
395 MathFracInset * p = new MathFracInset(t);
396 mathed_parse(p->cell(0), FLAG_BRACE | FLAG_BRACE_LAST);
397 mathed_parse(p->cell(1), FLAG_BRACE | FLAG_BRACE_LAST);
402 MathScriptInset * lastScriptInset(MathArray & array)
404 MathInset * p = array.back_inset();
405 if (!p || !p->isScriptInset()) {
406 p = new MathScriptInset;
409 return static_cast<MathScriptInset *>(p);
416 static bool curr_num;
417 static string curr_label;
419 void mathed_parse_lines(MathInset * inset, int col, bool numbered, bool outmost)
421 // save global variables
422 bool saved_num = curr_num;
423 string saved_label = curr_label;
425 MathGridInset * p = static_cast<MathGridInset *>(inset);
426 for (int row = 0; true; ++row) {
427 // reset global variables
429 curr_label = string();
432 int idx = p->nargs() - p->ncols();
433 for (int i = 0; i < col - 1; ++i, ++idx)
434 mathed_parse(p->cell(idx), FLAG_AMPERSAND);
435 mathed_parse(p->cell(idx), FLAG_NEWLINE | FLAG_END);
438 MathMatrixInset * m = static_cast<MathMatrixInset *>(p);
439 m->numbered(row, curr_num);
440 m->label(row, curr_label);
451 // restore global variables
452 curr_num = saved_num;
453 curr_label = saved_label;
457 MathInset * mathed_parse()
463 case LM_TK_NEWCOMMAND: {
465 string name = yytext.substr(1);
468 char const c = yyis->peek();
471 na = atoi(yytext.c_str());
474 p = new MathMacroTemplate(name, na);
475 mathed_parse(p->cell(0), FLAG_BRACE | FLAG_BRACE_LAST);
476 lyxerr << "LM_TK_NEWCOMMAND: name: " << name << " na: " << na << "\n";
482 lyxerr << "reading math environment " << i << " "
483 << latex_mathenv[i].name << "\n";
485 MathInsetTypes typ = latex_mathenv[i].typ;
486 p = new MathMatrixInset(typ);
490 curr_num = latex_mathenv[i].numbered;
491 curr_label = string();
492 mathed_parse(p->cell(0), 0);
493 MathMatrixInset * m = static_cast<MathMatrixInset *>(p);
494 m->numbered(0, curr_num);
495 m->label(0, curr_label);
499 case LM_OT_EQUATION: {
500 curr_num = latex_mathenv[i].numbered;
501 curr_label = string();
502 mathed_parse(p->cell(0), FLAG_END);
503 MathMatrixInset * m = static_cast<MathMatrixInset *>(p);
504 m->numbered(0, curr_num);
505 m->label(0, curr_label);
509 case LM_OT_EQNARRAY: {
510 mathed_parse_lines(p, 3, latex_mathenv[i].numbered, true);
514 case LM_OT_ALIGNAT: {
516 //int c = atoi(yytext.c_str());
517 lyxerr << "LM_OT_ALIGNAT: not implemented\n";
518 mathed_parse_lines(p, 2, latex_mathenv[i].numbered, true);
519 lyxerr << "LM_OT_ALIGNAT: par: " << *p << "\n";
524 lyxerr << "1: unknown math environment: " << typ << "\n";
527 p->SetName(latex_mathenv[i].basename);
533 lyxerr << "2 unknown math environment: " << t << "\n";
540 void mathed_parse(MathArray & array, unsigned flags)
545 static int plevel = -1;
546 yyvarcode = LM_TC_VAR;
554 //lyxerr << "t: " << t << " flags: " << flags;
555 //array.dump(lyxerr);
558 if ((flags & FLAG_BRACE) && t != LM_TK_OPEN) {
559 if (!(flags & FLAG_BRACK_ARG) || t != '[') {
561 "Expected {. Maybe you forgot to enclose an argument in {}");
570 do_insert(array, yylval.i, yyvarcode);
574 array.push_back(new MathMacroArgument(yylval.i));
578 array.push_back(yylval.i, LM_TC_SPECIAL);
582 do_insert(array, yylval.i, LM_TC_CONST);
587 if (accent && tprev == LM_TK_ACCENT) {
588 acc_braces[acc_brace++] = brace;
591 if (flags & FLAG_BRACE_OPT) {
592 flags &= ~FLAG_BRACE_OPT;
596 if (flags & FLAG_BRACE)
597 flags &= ~FLAG_BRACE;
599 array.push_back('{', LM_TC_TEX);
605 mathPrintError("Unmatching braces");
609 if (acc_brace && brace == acc_braces[acc_brace - 1] - 1) {
613 if (flags & FLAG_BRACE_FONT) {
614 yyvarcode = LM_TC_VAR;
615 flags &= ~FLAG_BRACE_FONT;
618 if (brace == 0 && (flags & FLAG_BRACE_LAST)) {
622 array.push_back('}', LM_TC_TEX);
626 if (flags & FLAG_BRACK_ARG) {
627 flags &= ~FLAG_BRACK_ARG;
628 char const rg = LexGetArg('[');
630 mathPrintError("Expected ']'");
635 array.push_back('[', LM_TC_CONST);
639 if (flags & FLAG_BRACK_END) {
643 array.push_back(']', LM_TC_CONST);
649 mathed_parse(ar, FLAG_BRACE_OPT | FLAG_BRACE_LAST);
650 MathScriptInset * p = lastScriptInset(array);
659 mathed_parse(ar, FLAG_BRACE_OPT | FLAG_BRACE_LAST);
660 MathScriptInset * p = lastScriptInset(array);
668 MathScriptInset * p = lastScriptInset(array);
670 p->limits(yylval.l->id ? 1 : -1);
676 if (flags & FLAG_AMPERSAND) {
677 flags &= ~FLAG_AMPERSAND;
681 lyxerr << "found tab unexpectedly, array: '" << array << "'\n";
687 if (flags & FLAG_NEWLINE) {
688 flags &= ~FLAG_NEWLINE;
692 lyxerr << "found newline unexpectedly, array: '" << array << "'\n";
698 array.push_back(new MathBigopInset(yylval.l->name, yylval.l->id));
703 if (yylval.l->id < 256) {
704 MathTextCodes tc = MathIsBOPS(yylval.l->id) ? LM_TC_BOPS: LM_TC_SYMB;
705 do_insert(array, yylval.l->id, tc);
707 do_insert(array, new MathFuncInset(yylval.l->name));
711 do_insert(array, yylval.i, LM_TC_BOP);
716 array.push_back(new MathSpaceInset(yylval.i));
720 array.push_back(new MathDotsInset(yylval.l->name, yylval.l->id));
724 handle_frac(array, LM_OT_ATOP);
728 handle_frac(array, LM_OT_STACKREL);
732 handle_frac(array, LM_OT_FRAC);
740 MathRootInset * rt = new MathRootInset;
741 mathed_parse(rt->cell(0), FLAG_BRACK_END);
742 mathed_parse(rt->cell(1), FLAG_BRACE | FLAG_BRACE_LAST);
746 MathSqrtInset * sq = new MathSqrtInset;
747 mathed_parse(sq->cell(0), FLAG_BRACE | FLAG_BRACE_LAST);
758 else if (ld == LM_TK_STR || ld == LM_TK_BOP || ld == LM_TK_SPECIAL)
762 mathed_parse(ar, FLAG_RIGHT);
767 else if (rd == LM_TK_STR || rd == LM_TK_BOP || rd == LM_TK_SPECIAL)
770 MathDelimInset * dl = new MathDelimInset(ld, rd);
777 if (flags & FLAG_RIGHT) {
781 mathPrintError("Unmatched right delimiter");
786 yyvarcode = static_cast<MathTextCodes>(yylval.l->id);
787 flags |= (FLAG_BRACE | FLAG_BRACE_FONT);
792 lyxerr << "LM_TK_STY not implemented\n";
793 //MathArray tmp = array;
794 //MathSizeInset * p = new MathSizeInset(MathStyles(yylval.l->id));
795 //array.push_back(p);
796 //mathed_parse(p->cell(0), FLAG_BRACE_FONT);
803 MathDecorationInset * sq = new MathDecorationInset(yylval.l->id);
804 mathed_parse(sq->cell(0), FLAG_BRACE | FLAG_BRACE_LAST);
810 setAccent(yylval.l->id);
820 array.push_back(t, LM_TC_CONST);
822 array.push_back(new MathFuncInset(yylval.l->name));
826 array.push_back(new MathFuncInset(yylval.l->name, LM_OT_FUNCLIM));
830 if (MathMacroTable::hasTemplate(yytext)) {
831 MathMacro * m = MathMacroTable::cloneTemplate(yytext);
832 for (int i = 0; i < m->nargs(); ++i) {
833 mathed_parse(m->cell(i), FLAG_BRACE_OPT | FLAG_BRACE_LAST);
834 lyxerr << "reading cell " << i << " '" << m->cell(i) << "'\n";
838 do_insert(array, new MathFuncInset(yytext, LM_OT_UNDEF));
848 MathInsetTypes typ = latex_mathenv[i].typ;
850 if (typ == LM_OT_MATRIX) {
851 string valign = "\0";
852 char rg = LexGetArg(0);
858 string halign = yytext;
859 MathArrayInset * mm = new MathArrayInset(halign.size(), 1);
861 mm->valign(valign[0]);
864 mathed_parse_lines(mm, halign.size(), latex_mathenv[i].numbered, false);
865 do_insert(array, mm);
866 //lyxerr << "read matrix " << *mm << "\n";
869 lyxerr << "unknow math inset " << typ << "\n";
874 do_insert(array, MathMacroTable::cloneTemplate(yylval.l->name));
879 char const rg = LexGetArg('\0', true);
881 mathPrintError("Expected '{'");
883 lyxerr << "[" << yytext << "]" << endl;
887 //lyxerr << " setting label to " << yytext << "\n";
893 mathPrintError("Unrecognized token");
894 lyxerr << "[" << t << " " << yytext << "]" << endl;
897 } // end of big switch
901 lyxerr << " Math Panic, expect problems!" << endl;
902 // Search for the end command.
905 } while (t != LM_TK_END && t);
909 if (flags & FLAG_BRACE_OPT) {
910 flags &= ~FLAG_BRACE_OPT;
918 MathInset * mathed_parse(istream & is)
922 return mathed_parse();
926 MathInset * mathed_parse(LyXLex & lex)
928 yyis = &lex.getStream();
929 yylineno = lex.GetLineNo();
931 MathInset * p = mathed_parse();
933 // Update line number
934 lex.setLineNo(yylineno);
936 // reading of end_inset
939 if (lex.GetString() == "\\end_inset")
941 lyxerr << "InsetFormula::Read: Garbage before \\end_inset,"
942 " or missing \\end_inset!" << endl;