3 * Purpose: Parser for mathed
4 * Author: Alejandro Aguilar Sierra <asierra@servidor.unam.mx>
5 * Created: January 1996
6 * Description: Parse LaTeX2e math mode code.
8 * Dependencies: Xlib, XForms
10 * Copyright: 1996, Alejandro Aguilar Sierra
14 * You are free to use and modify this code under the terms of
15 * the GNU General Public Licence version 2 or later.
25 #pragma implementation
28 #include "math_parser.h"
30 #include "math_inset.h"
31 #include "math_arrayinset.h"
33 #include "math_bigopinset.h"
35 #include "math_dotsinset.h"
36 #include "math_decorationinset.h"
37 #include "math_deliminset.h"
38 #include "math_fracinset.h"
39 #include "math_funcinset.h"
41 #include "math_funcliminset.h"
43 #include "math_macro.h"
44 #include "math_macrotable.h"
45 #include "math_macrotemplate.h"
46 #include "math_matrixinset.h"
47 #include "math_rootinset.h"
48 #include "math_scriptinset.h"
49 #include "math_sizeinset.h"
50 #include "math_spaceinset.h"
51 #include "math_sqrtinset.h"
52 #include "math_symbolinset.h"
54 #include "mathed/support.h"
56 #include "support/lstrings.h"
63 // These are lexical codes, not semantic
69 LexBOP, // Binary operators or relations
82 lexcode_enum lexcode[256];
85 char const * latex_special_chars = "#$%&_{}";
90 void mathed_parse_into(MathArray & array, unsigned flags);
92 unsigned char getuchar(std::istream * is)
97 lyxerr << "The input stream is not well..." << endl;
99 return static_cast<unsigned char>(c);
102 const unsigned char LM_TK_OPEN = '{';
103 const unsigned char LM_TK_CLOSE = '}';
106 FLAG_BRACE = 1 << 0, // A { needed //}
107 FLAG_BRACE_LAST = 1 << 1, // // { Last } ends the parsing process
108 FLAG_RIGHT = 1 << 2, // Next right ends the parsing process
109 FLAG_END = 1 << 3, // Next end ends the parsing process
110 FLAG_BRACE_FONT = 1 << 4, // // { Next } closes a font
111 FLAG_BRACK_END = 1 << 5, // // [ Next ] ends the parsing process
112 FLAG_AMPERSAND = 1 << 6, // Next & ends the parsing process
113 FLAG_NEWLINE = 1 << 7, // Next \\ ends the parsing process
114 FLAG_ITEM = 1 << 8, // read a (possibly braced token)
115 FLAG_LEAVE = 1 << 9, // marker for leaving the
116 FLAG_OPTARG = 1 << 10 // reads an argument in []
135 struct latex_mathenv_type {
137 char const * basename;
143 latex_mathenv_type latex_mathenv[] = {
144 {"math", "math", LM_OT_SIMPLE, 0, 0},
145 {"equation*", "equation", LM_OT_EQUATION, 0, 0},
146 {"equation", "equation", LM_OT_EQUATION, 1, 0},
147 {"eqnarray*", "eqnarray", LM_OT_EQNARRAY, 0, 0},
148 {"eqnarray", "eqnarray", LM_OT_EQNARRAY, 1, 0},
149 {"align*", "align", LM_OT_ALIGN, 0, 1},
150 {"align", "align", LM_OT_ALIGN, 1, 1},
151 {"alignat*", "alignat", LM_OT_ALIGNAT, 0, 1},
152 {"alignat", "alignat", LM_OT_ALIGNAT, 1, 1},
153 {"multline*", "multline", LM_OT_MULTLINE, 0, 1},
154 {"multline", "multline", LM_OT_MULTLINE, 1, 1},
155 {"array", "array", LM_OT_MATRIX, 0, 1}
158 int const latex_mathenv_num = sizeof(latex_mathenv)/sizeof(latex_mathenv[0]);
162 void mathPrintError(string const & msg)
164 //lyxerr[Debug::MATHED] << "Line ~" << yylineno << ": Math parse error: " << msg << endl;
165 lyxerr << "Line ~" << yylineno << ": Math parse error: " << msg << endl;
171 for (int i = 0; i <= 255; ++i) {
173 lexcode[i] = LexDigit;
175 lexcode[i] = LexSpace;
177 lexcode[i] = LexAlpha;
180 lexcode['\t'] = lexcode['\f'] = lexcode[' '] = LexSpace;
181 lexcode['\n'] = LexNewLine;
182 lexcode['%'] = LexComment;
183 lexcode['#'] = LexArgument;
184 lexcode['$'] = LexMath;
185 lexcode['+'] = lexcode['-'] = lexcode['*'] = lexcode['/']
186 = lexcode['<'] = lexcode['>'] = lexcode['='] = LexBOP;
188 lexcode['!'] = lexcode[','] = lexcode[':']
189 = lexcode[';'] = LexMathSpace;
191 lexcode['('] = lexcode[')'] = lexcode['|'] = lexcode['.'] =
192 lexcode['?'] = LexOther;
194 lexcode['\''] = lexcode['@'] = LexAlpha;
196 lexcode['['] = lexcode[']'] = lexcode['^'] = lexcode['_'] =
197 lexcode['&'] = LexSelf;
199 lexcode['\\'] = LexESC;
200 lexcode['{'] = LexOpen;
201 lexcode['}'] = LexClose;
205 string lexArg(unsigned char lf, bool accept_spaces = false)
209 while (yyis->good()) {
220 unsigned char rg = 0;
221 if (lf == '{') rg = '}';
222 if (lf == '[') rg = ']';
223 if (lf == '(') rg = ')';
225 lyxerr[Debug::MATHED] << "Math parse error: unknown bracket '"
226 << lf << "'" << endl;
232 unsigned char c = getuchar(yyis);
237 if ((!isspace(c) || (c == ' ' && accept_spaces)) && depth > 0)
239 } while (depth > 0 && yyis->good());
247 static bool init_done = false;
254 while (yyis->good()) {
255 unsigned char c = getuchar(yyis);
256 //lyxerr << "reading byte: '" << c << "' code: " << lexcode[c] << endl;
258 if (lexcode[c] == LexNewLine) {
261 } else if (lexcode[c] == LexComment) {
264 } while (c != '\n' && yyis->good()); // eat comments
265 } else if (lexcode[c] == LexDigit
266 || lexcode[c] == LexOther
267 || lexcode[c] == LexMathSpace) {
270 } else if (lexcode[c] == LexAlpha || lexcode[c] == LexSpace) {
273 } else if (lexcode[c] == LexBOP) {
276 } else if (lexcode[c] == LexMath) {
279 } else if (lexcode[c] == LexSelf) {
281 } else if (lexcode[c] == LexArgument) {
284 return LM_TK_ARGUMENT;
285 } else if (lexcode[c] == LexOpen) {
287 } else if (lexcode[c] == LexClose) {
289 } else if (lexcode[c] == LexESC) {
293 return LM_TK_NEWLINE;
296 yylval.i = LM_OT_SIMPLE;
300 yylval.i = LM_OT_SIMPLE;
304 yylval.i = LM_OT_EQUATION;
308 yylval.i = LM_OT_EQUATION;
315 if (contains(latex_special_chars, c)) {
317 return LM_TK_SPECIAL;
319 if (lexcode[c] == LexMathSpace) {
321 for (i = 0; i < 4 && static_cast<int>(c) != latex_mathspace[i][0]; ++i)
323 yylval.i = (i < 4) ? i : 0;
326 if (lexcode[c] == LexAlpha) {
328 while (lexcode[c] == LexAlpha && yyis->good()) {
332 while (lexcode[c] == LexSpace && yyis->good())
334 if (lexcode[c] != LexSpace)
337 //lyxerr[Debug::MATHED] << "reading: text '" << yytext << "'\n";
338 latexkeys const * l = in_word_set(yytext);
342 if (l->token == LM_TK_BEGIN || l->token == LM_TK_END) {
343 string name = lexArg('{');
345 while (i < latex_mathenv_num && name != latex_mathenv[i].name)
348 } else if (l->token == LM_TK_SPACE)
360 MathScriptInset * prevScriptInset(MathArray const & array)
362 MathInset * p = array.back();
363 return (p && p->isScriptInset()) ? static_cast<MathScriptInset *>(p) : 0;
367 MathInset * lastScriptInset(MathArray & array, bool up, bool down, int limits)
369 MathScriptInset * p = prevScriptInset(array);
371 MathInset * b = array.back();
372 if (b && b->isScriptable()) {
373 p = new MathScriptInset(up, down, b->clone());
376 p = new MathScriptInset(up, down);
390 static bool curr_num;
391 static string curr_label;
393 void mathed_parse_lines(MathInset * inset, int col,
394 bool numbered, bool outmost)
396 // save global variables
397 bool const saved_num = curr_num;
398 string const saved_label = curr_label;
400 MathGridInset * p = static_cast<MathGridInset *>(inset);
401 for (int row = 0; true; ++row) {
402 // reset global variables
407 int idx = p->nargs() - p->ncols();
408 for (int i = 0; i < col - 1; ++i, ++idx)
409 mathed_parse_into(p->cell(idx), FLAG_AMPERSAND);
410 mathed_parse_into(p->cell(idx), FLAG_NEWLINE | FLAG_END);
413 MathMatrixInset * m = static_cast<MathMatrixInset *>(p);
414 m->numbered(row, curr_num);
415 m->label(row, curr_label);
426 // restore global variables
427 curr_num = saved_num;
428 curr_label = saved_label;
432 MathMacroTemplate * mathed_parse_macro()
434 if (yylex() != LM_TK_NEWCOMMAND) {
435 lyxerr << "\\newcommand expected\n";
439 string name = lexArg('{').substr(1);
440 string arg = lexArg('[');
441 int narg = arg.empty() ? 0 : atoi(arg.c_str());
442 MathMacroTemplate * p = new MathMacroTemplate(name, narg);
443 mathed_parse_into(p->cell(0), FLAG_BRACE | FLAG_BRACE_LAST);
448 MathMatrixInset * mathed_parse_normal()
450 MathMatrixInset * p = 0;
457 lyxerr[Debug::MATHED]
458 << "reading math environment " << i << " "
459 << latex_mathenv[i].name << "\n";
461 MathInsetTypes typ = latex_mathenv[i].typ;
462 p = new MathMatrixInset(typ);
463 MathMatrixInset * m = static_cast<MathMatrixInset *>(p);
467 curr_num = latex_mathenv[i].numbered;
469 mathed_parse_into(m->cell(0), 0);
470 m->numbered(0, curr_num);
471 m->label(0, curr_label);
475 case LM_OT_EQUATION: {
476 curr_num = latex_mathenv[i].numbered;
478 mathed_parse_into(m->cell(0), FLAG_END);
479 m->numbered(0, curr_num);
480 m->label(0, curr_label);
484 case LM_OT_EQNARRAY: {
485 mathed_parse_lines(m, 3, latex_mathenv[i].numbered, true);
490 m->halign(lexArg('{'));
491 mathed_parse_lines(m, 2, latex_mathenv[i].numbered, true);
495 case LM_OT_ALIGNAT: {
496 m->halign(lexArg('{'));
497 mathed_parse_lines(m, 2, latex_mathenv[i].numbered, true);
502 lyxerr[Debug::MATHED]
503 << "1: unknown math environment: " << typ << "\n";
506 p->setName(latex_mathenv[i].basename);
512 lyxerr[Debug::MATHED]
513 << "2 unknown math environment: " << t << "\n";
520 void handle_frac(MathArray & array, string const & name)
522 MathFracInset * p = new MathFracInset(name);
523 mathed_parse_into(p->cell(0), FLAG_ITEM);
524 mathed_parse_into(p->cell(1), FLAG_ITEM);
529 void mathed_parse_into(MathArray & array, unsigned flags)
531 static int plevel = -1;
534 MathTextCodes yyvarcode = LM_TC_VAR;
542 //lyxerr << "t: " << t << " flags: " << flags << " i: " << yylval.i
543 // << " '" << yytext << "'\n";
544 //array.dump(lyxerr);
547 if (flags & FLAG_ITEM) {
549 if (t == LM_TK_OPEN) {
550 // skip the brace and regard everything to the next matching
554 flags |= FLAG_BRACE_LAST;
556 // regard only this single token
561 if ((flags & FLAG_BRACE) && t != LM_TK_OPEN) {
563 "Expected {. Maybe you forgot to enclose an argument in {}");
571 if (!isspace(yylval.i) || yyvarcode == LM_TC_TEXTRM)
572 array.push_back(yylval.i, yyvarcode);
575 case LM_TK_ARGUMENT: {
576 MathMacroArgument * p = new MathMacroArgument(yylval.i);
583 array.push_back(yylval.i, LM_TC_SPECIAL);
587 array.push_back(yylval.i, LM_TC_CONST);
592 if (flags & FLAG_BRACE)
593 flags &= ~FLAG_BRACE;
595 array.push_back('{', LM_TC_TEX);
601 mathPrintError("Unmatching braces");
605 if (flags & FLAG_BRACE_FONT) {
606 yyvarcode = LM_TC_VAR;
607 flags &= ~FLAG_BRACE_FONT;
610 if (brace == 0 && (flags & FLAG_BRACE_LAST))
613 array.push_back('}', LM_TC_TEX);
617 array.push_back('[', LM_TC_CONST);
621 if (flags & FLAG_BRACK_END)
624 array.push_back(']', LM_TC_CONST);
629 lastScriptInset(array, true, false, limits)->cell(0), FLAG_ITEM);
634 lastScriptInset(array, false, true, limits)->cell(1), FLAG_ITEM);
638 limits = yylval.l->id;
639 //lyxerr << "setting limit to " << limits << "\n";
643 if (flags & FLAG_AMPERSAND) {
644 flags &= ~FLAG_AMPERSAND;
648 lyxerr[Debug::MATHED]
649 << "found tab unexpectedly, array: '" << array << "'\n";
653 if (flags & FLAG_NEWLINE) {
654 flags &= ~FLAG_NEWLINE;
658 lyxerr[Debug::MATHED]
659 << "found newline unexpectedly, array: '" << array << "'\n";
668 array.push_back(new MathBigopInset(yylval.l));
675 array.push_back(new MathFuncLimInset(yylval.l));
681 array.push_back(new MathSymbolInset(yylval.l));
685 array.push_back(yylval.i, LM_TC_BOP);
690 array.push_back(new MathSpaceInset(yylval.i));
694 array.push_back(new MathDotsInset(yylval.l));
698 handle_frac(array, "atop");
702 handle_frac(array, "stackrel");
706 handle_frac(array, "frac");
711 unsigned char c = getuchar(yyis);
713 array.push_back(new MathRootInset);
714 mathed_parse_into(array.back()->cell(0), FLAG_BRACK_END);
715 mathed_parse_into(array.back()->cell(1), FLAG_ITEM);
718 array.push_back(new MathSqrtInset);
719 mathed_parse_into(array.back()->cell(0), FLAG_ITEM);
729 else if (ld == LM_TK_STR || ld == LM_TK_BOP || ld == LM_TK_SPECIAL)
733 mathed_parse_into(ar, FLAG_RIGHT);
738 else if (rd == LM_TK_STR || rd == LM_TK_BOP || rd == LM_TK_SPECIAL)
741 MathDelimInset * dl = new MathDelimInset(ld, rd);
748 if (flags & FLAG_RIGHT) {
752 mathPrintError("Unmatched right delimiter");
757 yyvarcode = static_cast<MathTextCodes>(yylval.l->id);
758 flags |= (FLAG_BRACE | FLAG_BRACE_FONT);
763 lyxerr[Debug::MATHED] << "LM_TK_STY not implemented\n";
764 //MathArray tmp = array;
765 //MathSizeInset * p = new MathSizeInset(MathStyles(yylval.l->id));
766 //array.push_back(p);
767 //mathed_parse_into(p->cell(0), FLAG_BRACE_FONT);
772 case LM_TK_DECORATION:
774 MathDecorationInset * p = new MathDecorationInset(yylval.l);
775 mathed_parse_into(p->cell(0), FLAG_ITEM);
785 array.push_back(new MathSymbolInset(yylval.l));
789 if (MathMacroTable::hasTemplate(yytext)) {
790 MathMacro * m = MathMacroTable::cloneTemplate(yytext);
791 for (int i = 0; i < m->nargs(); ++i)
792 mathed_parse_into(m->cell(i), FLAG_ITEM);
794 m->metrics(LM_ST_TEXT);
796 array.push_back(new MathFuncInset(yytext));
807 MathInsetTypes typ = latex_mathenv[i].typ;
809 if (typ == LM_OT_MATRIX) {
810 string const valign = lexArg('[') + 'c';
811 string const halign = lexArg('{');
812 //lyxerr << "valign: '" << valign << "'\n";
813 //lyxerr << "halign: '" << halign << "'\n";
814 MathArrayInset * m = new MathArrayInset(halign.size(), 1);
815 m->valign(valign[0]);
818 mathed_parse_lines(m, halign.size(), latex_mathenv[i].numbered, false);
820 //lyxerr << "read matrix " << *m << "\n";
823 lyxerr[Debug::MATHED] << "unknow math inset " << typ << "\n";
828 array.push_back(MathMacroTable::cloneTemplate(yylval.l->name));
832 curr_label = lexArg('{', true);
836 mathPrintError("Unrecognized token");
837 lyxerr[Debug::MATHED] << "[" << t << " " << yytext << "]" << endl;
840 } // end of big switch
842 if (flags & FLAG_LEAVE) {
843 flags &= ~FLAG_LEAVE;
848 lyxerr << " Math Panic, expect problems!" << endl;
849 // Search for the end command.
852 } while (yyis->good() && t != LM_TK_END && t);
863 MathArray mathed_parse_cell(string const & str)
865 istringstream is(str.c_str());
869 mathed_parse_into(ar, 0);
874 MathMacroTemplate * mathed_parse_macro(string const & str)
876 istringstream is(str.c_str());
877 return mathed_parse_macro(is);
881 MathMatrixInset * mathed_parse_normal(string const & str)
883 istringstream is(str.c_str());
884 return mathed_parse_normal(is);
889 MathMatrixInset * mathed_parse_normal(istream & is)
893 return mathed_parse_normal();
897 MathMacroTemplate * mathed_parse_macro(istream & is)
901 return mathed_parse_macro();
906 MathMatrixInset * mathed_parse_normal(LyXLex & lex)
908 yyis = &lex.getStream();
909 yylineno = lex.getLineNo();
911 MathMatrixInset * p = mathed_parse_normal();
913 // Update line number
914 lex.setLineNo(yylineno);
916 // reading of end_inset
919 if (lex.getString() == "\\end_inset")
921 lyxerr[Debug::MATHED] << "InsetFormula::Read: Garbage before \\end_inset,"
922 " or missing \\end_inset!" << endl;
928 MathMacroTemplate * mathed_parse_macro(LyXLex & lex)
930 yyis = &lex.getStream();
931 yylineno = lex.getLineNo();
933 MathMacroTemplate * p = mathed_parse_macro();
935 // Update line number
936 lex.setLineNo(yylineno);
938 // reading of end_inset
941 if (lex.getString() == "\\end_inset")
943 lyxerr[Debug::MATHED] << "InsetFormula::Read: Garbage before \\end_inset,"
944 " or missing \\end_inset!" << endl;