/* * File: math_parser.C * Purpose: Parser for mathed * Author: Alejandro Aguilar Sierra * Created: January 1996 * Description: Parse LaTeX2e math mode code. * * Dependencies: Xlib, XForms * * Copyright: 1996, Alejandro Aguilar Sierra * * Version: 0.8beta. * * You are free to use and modify this code under the terms of * the GNU General Public Licence version 2 or later. */ #include #include #ifdef __GNUG__ #pragma implementation #endif #include "math_parser.h" #include "array.h" #include "math_rowst.h" #include "math_iter.h" #include "math_inset.h" #include "math_macro.h" #include "math_root.h" #include "math_matrixinset.h" #include "math_accentinset.h" #include "math_bigopinset.h" #include "math_funcinset.h" #include "math_spaceinset.h" #include "math_dotsinset.h" #include "math_fracinset.h" #include "math_deliminset.h" #include "math_decorationinset.h" #include "debug.h" #include "support/lyxlib.h" #include "mathed/support.h" using std::istream; using std::endl; #if 0 using std::isalpha; using std::isdigit; using std::isspace; #endif extern MathMatrixInset * create_multiline(short int type, int cols); enum { FLAG_BRACE = 1, // A { needed FLAG_BRACE_ARG = 2, // Next { is argument FLAG_BRACE_OPT = 4, // Optional { FLAG_BRACE_LAST = 8, // Last } ends the parsing process FLAG_BRACK_ARG = 16, // Optional [ FLAG_RIGHT = 32, // Next right ends the parsing process FLAG_END = 64, // Next end ends the parsing process FLAG_BRACE_FONT = 128, // Next } closes a font FLAG_BRACK_END = 256 // Next ] ends the parsing process }; static YYSTYPE yylval; static MathedInsetTypes mathed_env = LM_OT_MIN; string mathed_label; int const latex_mathenv_num = 12; char const * latex_mathenv[latex_mathenv_num] = { "math", "displaymath", "equation", "eqnarray*", "eqnarray", "align*", "align", "alignat*", "alignat", "multline*", "multline", "array" }; char const * latex_special_chars = "#$%&_{}"; // These are lexical codes, not semantic enum lexcode_enum { LexNone, LexESC, LexAlpha, LexDigit, LexBOP, // Binary operators or relations LexMathSpace, LexOpen, LexClose, LexComment, LexArgument, LexSpace, LexNewLine, LexOther, LexSelf }; static lexcode_enum lexcode[256]; #warning Replace with string static char yytext[256]; static int yylineno; static istream * yyis; static bool yy_mtextmode= false; static void mathPrintError(string const & msg) { lyxerr << "Line ~" << yylineno << ": Math parse error: " << msg << endl; } static void LexInitCodes() { for (int i = 0; i <= 255; ++i) { if (isalpha(i)) lexcode[i] = LexAlpha; else if (isdigit(i)) lexcode[i] = LexDigit; else if (isspace(i)) lexcode[i] = LexSpace; else lexcode[i] = LexNone; } lexcode['\t'] = lexcode['\f'] = lexcode[' '] = LexSpace; lexcode['\n'] = LexNewLine; lexcode['%'] = LexComment; lexcode['#'] = LexArgument; lexcode['+'] = lexcode['-'] = lexcode['*'] = lexcode['/'] = lexcode['<'] = lexcode['>'] = lexcode['='] = LexBOP; lexcode['!'] = lexcode[','] = lexcode[':'] = lexcode[';'] = LexMathSpace; lexcode['('] = lexcode[')'] = lexcode['|'] = lexcode['.'] = lexcode['?'] = LexOther; lexcode['\''] = lexcode['@'] = LexAlpha; lexcode['['] = lexcode[']'] = lexcode['^'] = lexcode['_'] = lexcode['&'] = LexSelf; lexcode['\\'] = LexESC; lexcode['{'] = LexOpen; lexcode['}'] = LexClose; } static char LexGetArg(char lf, bool accept_spaces= false) { unsigned char c; char cc; while (yyis->good()) { yyis->get(cc); c = cc; if (c > ' ') { if (!lf) lf = c; else if (c != lf) { lyxerr << "Math parse error: unexpected '" << c << "'" << endl; return '\0'; } break; } } char const rg = (lf == '{') ? '}' : ((lf == '[') ? ']' : ((lf == '(') ? ')' : 0)); if (!rg) { lyxerr << "Math parse error: unknown bracket '" << lf << "'" << endl; return '\0'; } char * p = &yytext[0]; int bcnt = 1; do { yyis->get(cc); c = cc; if (c == lf) ++bcnt; if (c == rg) --bcnt; if ((c > ' ' || (c == ' ' && accept_spaces)) && bcnt > 0) *(p++) = c; } while (bcnt > 0 && yyis->good() && p-yytext < 255); *p = '\0'; return rg; } static int yylex(void) { static int init_done = 0; if (!init_done) LexInitCodes(); unsigned char c; char cc; while (yyis->good()) { yyis->get(cc); c = cc; if (yy_mtextmode && c == ' ') { yylval.i= ' '; return LM_TK_ALPHA; } else if (lexcode[c] == LexNewLine) { ++yylineno; continue; } else if (lexcode[c] == LexComment) { do { yyis->get(cc); c = cc; } while (c != '\n' % yyis->good()); // eat comments } else if (lexcode[c] == LexDigit || lexcode[c] == LexOther || lexcode[c] == LexMathSpace) { yylval.i = c; return LM_TK_STR; } else if (lexcode[c] == LexAlpha) { yylval.i= c; return LM_TK_ALPHA; } else if (lexcode[c] == LexBOP) { yylval.i= c; return LM_TK_BOP; } else if (lexcode[c] == LexSelf) { return c; } else if (lexcode[c] == LexArgument) { yyis->get(cc); c = cc; yylval.i = c - '0'; return LM_TK_ARGUMENT; } else if (lexcode[c] == LexOpen) { return LM_TK_OPEN; } else if (lexcode[c] == LexClose) { return LM_TK_CLOSE; } else if (lexcode[c] == LexESC) { yyis->get(cc); c = cc; if (c == '\\') { return LM_TK_NEWLINE; } if (c == '(') { yylval.i = LM_OT_MIN; return LM_TK_BEGIN; } if (c == ')') { yylval.i = LM_OT_MIN; return LM_TK_END; } if (c == '[') { yylval.i = LM_OT_PAR; return LM_TK_BEGIN; } if (c == ']') { yylval.i = LM_OT_PAR; return LM_TK_END; } if (strchr(latex_special_chars, c)) { yylval.i = c; return LM_TK_SPECIAL; } if (lexcode[c] == LexMathSpace) { int i; for (i = 0; i < 4 && static_cast(c) != latex_mathspace[i][0]; ++i); yylval.i = (i < 4) ? i : 0; return LM_TK_SPACE; } if (lexcode[c] == LexAlpha || lexcode[c] == LexDigit) { char * p = &yytext[0]; while ((lexcode[c] == LexAlpha || lexcode[c] == LexDigit) && p-yytext < 255) { *p = c; yyis->get(cc); c = cc; ++p; } *p = '\0'; if (yyis->good()) yyis->putback(c); latexkeys * l = in_word_set (yytext, strlen(yytext)); if (l) { if (l->token == LM_TK_BEGIN || l->token == LM_TK_END) { int i; LexGetArg('{'); // for (i = 0; i < 5 && strncmp(yytext, latex_mathenv[i], // strlen(latex_mathenv[i])); ++i); for (i = 0; i < latex_mathenv_num && strcmp(yytext, latex_mathenv[i]); ++i); yylval.i = i; } else if (l->token == LM_TK_SPACE) yylval.i = l->id; else yylval.l = l; return l->token; } else { yylval.s = yytext; return LM_TK_UNDEF; } } } } return 0; } static int parse_align(char * hor, char *) { int nc = 0; for (char * c = hor; c && *c > ' '; ++c) ++nc; return nc; } // Accent hacks only for 0.12. Stolen from Cursor. static int accent = 0; static int nestaccent[8]; static void setAccent(int ac) { if (ac > 0 && accent < 8) { nestaccent[accent++] = ac; } else accent = 0; // consumed! } static MathedInset * doAccent(byte c, MathedTextCodes t) { MathedInset * ac = 0; for (int i = accent - 1; i >= 0; --i) { if (i == accent - 1) ac = new MathAccentInset(c, t, nestaccent[i]); else ac = new MathAccentInset(ac, nestaccent[i]); } accent = 0; // consumed! return ac; } static MathedInset * doAccent(MathedInset * p) { MathedInset * ac = 0; for (int i = accent - 1; i >= 0; --i) { if (i == accent - 1) ac = new MathAccentInset(p, nestaccent[i]); else ac = new MathAccentInset(ac, nestaccent[i]); } accent = 0; // consumed! return ac; } MathedArray * mathed_parse(unsigned flags, MathedArray * array, MathParInset ** mtx) { int t = yylex(); int tprev = 0; bool panic = false; static int plevel = -1; static int size = LM_ST_TEXT; MathedTextCodes varcode = LM_TC_VAR; MathedInset * binset = 0; static MathMacroTemplate * macro = 0; int brace = 0; int acc_brace = 0; int acc_braces[8]; MathParInset * mt = (mtx) ? *mtx : 0;//(MathParInset*)0; MathedRowSt * crow = (mt) ? mt->getRowSt() : 0; ++plevel; if (!array) array = new MathedArray; MathedIter data(array); while (t) { if ((flags & FLAG_BRACE) && t != LM_TK_OPEN) { if ((flags & FLAG_BRACK_ARG) && t == '[') { } else { mathPrintError("Expected {. Maybe you forgot to enclose an argument in {}"); panic = true; break; } } MathedInsetTypes fractype = LM_OT_FRAC; switch (t) { case LM_TK_ALPHA: { if (accent) { data.Insert(doAccent(yylval.i, varcode)); } else data.Insert (yylval.i, varcode); //LM_TC_VAR); break; } case LM_TK_ARGUMENT: { if (macro) { data.Insert(macro->getMacroPar(yylval.i-1), LM_TC_INSET); } break; } case LM_TK_NEWCOMMAND: { int na = 0; LexGetArg('{'); string const name(&yytext[1]); // ugly trick to be removed soon (lyx3) char const c = yyis->peek(); if (c == '[') { LexGetArg('['); na = lyx::atoi(yytext); } macro = new MathMacroTemplate(name, na); flags = FLAG_BRACE|FLAG_BRACE_LAST; *mtx = macro; macro->SetData(array); break; } case LM_TK_SPECIAL: { data.Insert (yylval.i, LM_TC_SPECIAL); break; } case LM_TK_STR: { if (accent) { data.Insert(doAccent(yylval.i, LM_TC_CONST)); } else data.Insert (yylval.i, LM_TC_CONST); break; } case LM_TK_OPEN: { ++brace; if (accent && tprev == LM_TK_ACCENT) { acc_braces[acc_brace++] = brace; break; } if (flags & FLAG_BRACE_OPT) { flags &= ~FLAG_BRACE_OPT; flags |= FLAG_BRACE; } if (flags & FLAG_BRACE) flags &= ~FLAG_BRACE; else { data.Insert ('{', LM_TC_TEX); } break; } case LM_TK_CLOSE: { --brace; if (brace < 0) { mathPrintError("Unmatching braces"); panic = true; break; } if (acc_brace && brace == acc_braces[acc_brace-1]-1) { --acc_brace; break; } if (flags & FLAG_BRACE_FONT) { varcode = LM_TC_VAR; yy_mtextmode = false; flags &= ~FLAG_BRACE_FONT; break; } if (brace == 0 && (flags & FLAG_BRACE_LAST)) { --plevel; return array; } else { data.Insert ('}', LM_TC_TEX); } break; } case '[': { if (flags & FLAG_BRACK_ARG) { flags &= ~FLAG_BRACK_ARG; char const rg = LexGetArg('['); if (rg != ']') { mathPrintError("Expected ']'"); panic = true; break; } // if (arg) strcpy(arg, yytext); } else data.Insert ('['); break; } case ']': { if (flags & FLAG_BRACK_END) { --plevel; return array; } else data.Insert (']'); break; } case '^': { MathParInset * p = new MathParInset(size, "", LM_OT_SCRIPT); MathedArray * ar = mathed_parse(FLAG_BRACE_OPT|FLAG_BRACE_LAST, 0); p->SetData(ar); // lyxerr << "UP[" << p->GetStyle() << "]" << endl; data.Insert (p, LM_TC_UP); break; } case '_': { MathParInset * p = new MathParInset(size, "", LM_OT_SCRIPT); MathedArray * ar = mathed_parse(FLAG_BRACE_OPT|FLAG_BRACE_LAST, 0); p->SetData(ar); data.Insert (p, LM_TC_DOWN); break; } case LM_TK_LIMIT: { if (binset) { binset->SetLimits(bool(yylval.l->id)); binset = 0; } break; } case '&': // Tab { if ((flags & FLAG_END) && mt && data.getCol()GetColumns() - 1) { data.setNumCols(mt->GetColumns()); data.Insert('T', LM_TC_TAB); } else mathPrintError("Unexpected tab"); // debug info. [made that conditional -JMarc] if (lyxerr.debugging(Debug::MATHED)) lyxerr << data.getCol() << " " << mt->GetColumns() << endl; break; } case LM_TK_NEWLINE: { if (mt && (flags & FLAG_END)) { if (mt->Permit(LMPF_ALLOW_CR)) { if (crow) { crow->setNext(new MathedRowSt(mt->GetColumns() + 1)); // this leaks crow = crow->getNext(); } data.Insert('K', LM_TC_CR); } else mathPrintError("Unexpected newline"); } break; } case LM_TK_BIGSYM: { binset = new MathBigopInset(yylval.l->name, yylval.l->id); data.Insert(binset); break; } case LM_TK_SYM: { if (yylval.l->id < 256) { MathedTextCodes tc = MathIsBOPS(yylval.l->id) ? LM_TC_BOPS: LM_TC_SYMB; if (accent) { data.Insert(doAccent(yylval.l->id, tc)); } else data.Insert(yylval.l->id, tc); } else { MathFuncInset * bg = new MathFuncInset(yylval.l->name); if (accent) { data.Insert(doAccent(bg)); } else data.Insert(bg, true); } break; } case LM_TK_BOP: { if (accent) { data.Insert(doAccent(yylval.i, LM_TC_BOP)); } else data.Insert(yylval.i, LM_TC_BOP); break; } case LM_TK_STY: { if (mt) { mt->UserSetSize(yylval.l->id); } break; } case LM_TK_SPACE: { if (yylval.i >= 0) { MathSpaceInset * sp = new MathSpaceInset(yylval.i); data.Insert(sp); } break; } case LM_TK_DOTS: { MathDotsInset * p = new MathDotsInset(yylval.l->name, yylval.l->id); data.Insert(p); break; } case LM_TK_STACK: fractype = LM_OT_STACKREL; case LM_TK_FRAC: { MathFracInset * fc = new MathFracInset(fractype); MathedArray * num = mathed_parse(FLAG_BRACE|FLAG_BRACE_LAST); MathedArray * den = mathed_parse(FLAG_BRACE|FLAG_BRACE_LAST); fc->SetData(num, den); data.Insert(fc, LM_TC_ACTIVE_INSET); break; } case LM_TK_SQRT: { MathParInset * rt; char c; yyis->get(c); if (c == '[') { rt = new MathRootInset(size); rt->setArgumentIdx(0); rt->SetData(mathed_parse(FLAG_BRACK_END, 0, &rt)); rt->setArgumentIdx(1); } else { yyis->putback(c); rt = new MathSqrtInset(size); } rt->SetData(mathed_parse(FLAG_BRACE|FLAG_BRACE_LAST, 0, &rt)); data.Insert(rt, LM_TC_ACTIVE_INSET); break; } case LM_TK_LEFT: { int lfd = yylex(); if (lfd == LM_TK_SYM || lfd == LM_TK_STR || lfd == LM_TK_BOP|| lfd == LM_TK_SPECIAL) lfd = (lfd == LM_TK_SYM) ? yylval.l->id: yylval.i; // lyxerr << "L[" << lfd << " " << lfd << "]"; MathedArray * a = mathed_parse(FLAG_RIGHT); int rgd = yylex(); // lyxerr << "R[" << rgd << "]"; if (rgd == LM_TK_SYM || rgd == LM_TK_STR || rgd == LM_TK_BOP || rgd == LM_TK_SPECIAL) rgd = (rgd == LM_TK_SYM) ? yylval.l->id: yylval.i; MathDelimInset * dl = new MathDelimInset(lfd, rgd); dl->SetData(a); data.Insert(dl, LM_TC_ACTIVE_INSET); // lyxerr << "RL[" << lfd << " " << rgd << "]"; break; } case LM_TK_RIGHT: { if (flags & FLAG_RIGHT) { --plevel; return array; } else { mathPrintError("Unmatched right delimiter"); // panic = true; } break; } case LM_TK_FONT: { varcode = static_cast(yylval.l->id); yy_mtextmode = bool(varcode == LM_TC_TEXTRM); flags |= (FLAG_BRACE|FLAG_BRACE_FONT); break; } case LM_TK_WIDE: { MathDecorationInset * sq = new MathDecorationInset(yylval.l->id, size); sq->SetData(mathed_parse(FLAG_BRACE|FLAG_BRACE_LAST)); data.Insert(sq, LM_TC_ACTIVE_INSET); break; } case LM_TK_ACCENT: setAccent(yylval.l->id); break; case LM_TK_NONUM: { if (crow) crow->setNumbered(false); break; } case LM_TK_PMOD: case LM_TK_FUNC: { MathedInset * bg = new MathFuncInset(yylval.l->name); if (accent) { data.Insert(t); } else data.Insert(bg); break; } case LM_TK_FUNCLIM: { data.Insert(new MathFuncInset(yylval.l->name, LM_OT_FUNCLIM)); break; } case LM_TK_UNDEF: { MathMacro * p = MathMacroTable::mathMTable.getMacro(yylval.s); if (p) { if (accent) data.Insert(doAccent(p), p->getTCode()); else data.Insert(p, p->getTCode()); for (int i = 0; p->setArgumentIdx(i); ++i) p->SetData(mathed_parse(FLAG_BRACE|FLAG_BRACE_LAST)); } else { MathedInset * q = new MathFuncInset(yylval.s, LM_OT_UNDEF); if (accent) { data.Insert(doAccent(q)); } else { data.Insert(q); } } break; } case LM_TK_END: { if (mathed_env != yylval.i && yylval.i != LM_OT_MATRIX) mathPrintError("Unmatched environment"); // debug info [made that conditional -JMarc] if (lyxerr.debugging(Debug::MATHED)) lyxerr << "[" << yylval.i << "]" << endl; --plevel; if (mt) { // && (flags & FLAG_END)) { mt->SetData(array); array = 0; } return array; } case LM_TK_BEGIN: { if (yylval.i == LM_OT_MATRIX) { char ar[120], ar2[8]; ar[0] = ar2[0] = '\0'; char rg = LexGetArg(0); if (rg == ']') { strcpy(ar2, yytext); rg = LexGetArg('{'); } strcpy(ar, yytext); int const nc = parse_align(ar, ar2); MathParInset * mm = new MathMatrixInset(nc, 0); mm->SetAlign(ar2[0], ar); data.Insert(mm, LM_TC_ACTIVE_INSET); mathed_parse(FLAG_END, mm->GetData(), &mm); } else if (is_eqn_type(yylval.i)) { if (plevel!= 0) { mathPrintError("Misplaced environment"); break; } if (!mt) { mathPrintError("0 paragraph."); panic = true; } mathed_env = static_cast(yylval.i); if (mathed_env != LM_OT_MIN) { size = LM_ST_DISPLAY; if (is_multiline(mathed_env)) { int cols = 1; if (is_multicolumn(mathed_env)) { if (mathed_env != LM_OT_ALIGNAT && mathed_env != LM_OT_ALIGNATN && yyis->good()) { char c; yyis->get(c); if (c != '%') lyxerr << "Math parse error: unexpected '" << c << "'" << endl; } LexGetArg('{'); cols = strToInt(string(yytext)); } mt = create_multiline(mathed_env, cols); if (mtx) *mtx = mt; flags |= FLAG_END; // data.Insert(' ', LM_TC_TAB); // data.Insert(' ', LM_TC_TAB); // data.Reset(); } mt->SetStyle(size); mt->SetType(mathed_env); crow = mt->getRowSt(); } #ifdef DEBUG lyxerr << "MATH BEGIN[" << mathed_env << "]" << endl; #endif } else { // lyxerr << "MATHCRO[" << yytext << "]"; MathMacro * p = MathMacroTable::mathMTable.getMacro(yytext); if (p) { data.Insert(p, p->getTCode()); p->setArgumentIdx(0); mathed_parse(FLAG_END, p->GetData(), reinterpret_cast(&p)); // for (int i = 0; p->setArgumentIdx(i); ++i) // p->SetData(mathed_parse(FLAG_BRACE|FLAG_BRACE_LAST)); } else mathPrintError("Unrecognized environment"); } break; } case LM_TK_MACRO: { MathedInset * p = MathMacroTable::mathMTable.getMacro(yylval.l->name); if (p) { if (accent) { data.Insert(doAccent(p)); } else data.Insert(p, static_cast(p)->getTCode()); } break; } case LM_TK_LABEL: { char const rg = LexGetArg('\0', true); if (rg != '}') { mathPrintError("Expected '{'"); // debug info lyxerr << "[" << yytext << "]" << endl; panic = true; break; } if (crow) { crow->setLabel(yytext); } else { mathed_label = yytext; } #ifdef DEBUG lyxerr << "Label[" << mathed_label << "]" << endl; #endif break; } default: mathPrintError("Unrecognized token"); // debug info lyxerr << "[" << t << " " << yytext << "]" << endl; break; } tprev = t; if (panic) { lyxerr << " Math Panic, expect problems!" << endl; // Search for the end command. do { t = yylex (); } while (t != LM_TK_END && t); } else t = yylex (); if ((flags & FLAG_BRACE_OPT)/* && t!= '^' && t!= '_'*/) { flags &= ~FLAG_BRACE_OPT; //data.Insert (LM_TC_CLOSE); break; } } --plevel; return array; } void mathed_parser_file(istream & is, int lineno) { yyis = &is; yylineno = lineno; if (!MathMacroTable::built) MathMacroTable::mathMTable.builtinMacros(); } int mathed_parser_lineno() { return yylineno; }