/* * File: math_parser.C * Purpose: Parser for mathed * Author: Alejandro Aguilar Sierra * Created: January 1996 * Description: Parse LaTeX2e math mode code. * * Dependencies: Xlib, XForms * * Copyright: (c) 1996, Alejandro Aguilar Sierra * * Version: 0.8beta. * * You are free to use and modify this code under the terms of * the GNU General Public Licence version 2 or later. */ #include #include #include #ifdef __GNUG__ #pragma implementation "math_parser.h" #endif #include "math_parser.h" #include "math_iter.h" #include "math_inset.h" #include "math_macro.h" #include "math_root.h" #include "debug.h" enum { FLAG_BRACE = 1, // A { needed FLAG_BRACE_ARG = 2, // Next { is argument FLAG_BRACE_OPT = 4, // Optional { FLAG_BRACE_LAST = 8, // Last } ends the parsing process FLAG_BRACK_ARG = 16, // Optional [ FLAG_RIGHT = 32, // Next right ends the parsing process FLAG_END = 64, // Next end ends the parsing process FLAG_BRACE_FONT = 128, // Next } closes a font FLAG_BRACK_END = 256 // Next ] ends the parsing process }; YYSTYPE yylval; static short mathed_env = LM_EN_INTEXT; char * mathed_label = 0; char const * latex_mathenv[] = { "math", "displaymath", "equation", "eqnarray*", "eqnarray", "array" }; char const * latex_mathspace[] = { "!", ",", ":", ";", "quad", "qquad" }; char const * latex_special_chars = "#$%&_{}"; // These are lexical codes, not semantic enum lexcode_enum { LexNone, LexESC, LexAlpha, LexDigit, LexBOP, // Binary operators or relations LexMathSpace, LexOpen, LexClose, LexComment, LexArgument, LexSpace, LexNewLine, LexOther, LexSelf }; static lexcode_enum lexcode[256]; static char yytext[256]; static int yylineno; static FILE * yyin; static bool yy_mtextmode= false; inline char * strnew(char const * s) { char *s1 = new char[strlen(s)+1]; // this leaks when not delete[]'ed strcpy(s1, s); return s1; } static void mathPrintError(char const * msg) { lyxerr << "Line ~" << yylineno << ": Math parse error: " << msg << endl; } static void LexInitCodes() { for (int i = 0; i <= 255; ++i) { if (isalpha(i)) lexcode[i] = LexAlpha; else if (isdigit(i)) lexcode[i] = LexDigit; else if (isspace(i)) lexcode[i] = LexSpace; else lexcode[i] = LexNone; } lexcode['\t'] = lexcode['\f'] = lexcode[' '] = LexSpace; lexcode['\n'] = LexNewLine; lexcode['%'] = LexComment; lexcode['#'] = LexArgument; lexcode['+'] = lexcode['-'] = lexcode['*'] = lexcode['/'] = lexcode['<'] = lexcode['>'] = lexcode['='] = LexBOP; lexcode['!'] = lexcode[','] = lexcode[':'] = lexcode[';'] = LexMathSpace; lexcode['('] = lexcode[')'] = lexcode['|'] = lexcode['.'] = lexcode['?'] = LexOther; lexcode['\'']= LexAlpha; lexcode['['] = lexcode[']'] = lexcode['^'] = lexcode['_'] = lexcode['&'] = LexSelf; lexcode['\\'] = LexESC; lexcode['{'] = LexOpen; lexcode['}'] = LexClose; } static char LexGetArg(char lf, bool accept_spaces= false) { char c, rg, * p = &yytext[0]; int bcnt = 1; while (!feof(yyin)) { c = getc(yyin); if (c>' ') { if (!lf) lf = c; else if (c!= lf) lyxerr << "Math parse error: unexpected '" << c << "'" << endl; break; } } rg = (lf == '{') ? '}': ((lf == '[') ? ']': ((lf == '(') ? ')': 0)); if (!rg) { lyxerr << "Math parse error: unknown bracket '" << lf << "'" << endl; return '\0'; } do { c = getc(yyin); if (c == lf) bcnt++; if (c == rg) bcnt--; if ((c>' ' || (c == ' ' && accept_spaces)) && bcnt>0) *(p++) = c; } while (bcnt>0 && !feof(yyin)); *p = '\0'; return rg; } static int yylex(void) { static int init_done = 0; unsigned char c; if (!init_done) LexInitCodes(); while (!feof(yyin)) { c = getc(yyin); if (yy_mtextmode && c == ' ') { yylval.i= ' '; return LM_TK_ALPHA; } if (lexcode[c] == LexNewLine) { yylineno++; continue; } if (lexcode[c] == LexComment) do c = getc(yyin); while (c!= '\n' % !feof(yyin)); // eat comments if (lexcode[c] == LexDigit || lexcode[c] == LexOther || lexcode[c] == LexMathSpace) { yylval.i= c; return LM_TK_STR; } if (lexcode[c] == LexAlpha) { yylval.i= c; return LM_TK_ALPHA; } if (lexcode[c] == LexBOP) { yylval.i= c; return LM_TK_BOP; } if (lexcode[c] == LexSelf) { return c; } if (lexcode[c] == LexArgument) { c = getc(yyin); yylval.i = c - '0'; return LM_TK_ARGUMENT; } if (lexcode[c] == LexOpen) { return LM_TK_OPEN; } if (lexcode[c] == LexClose) { return LM_TK_CLOSE; } if (lexcode[c] == LexESC) { c = getc(yyin); if (c == '\\') { return LM_TK_NEWLINE; } if (c == '(') { yylval.i = LM_EN_INTEXT; return LM_TK_BEGIN; } if (c == ')') { yylval.i = LM_EN_INTEXT; return LM_TK_END; } if (c == '[') { yylval.i = LM_EN_DISPLAY; return LM_TK_BEGIN; } if (c == ']') { yylval.i = LM_EN_DISPLAY; return LM_TK_END; } if (strchr(latex_special_chars, c)) { yylval.i = c; return LM_TK_SPECIAL; } if (lexcode[c] == LexMathSpace) { int i; for (i= 0; i<4 && c!= latex_mathspace[i][0]; i++); yylval.i = (i<4) ? i: 0; return LM_TK_SPACE; } if (lexcode[c] == LexAlpha || lexcode[c] == LexDigit) { char* p = &yytext[0]; while (lexcode[c] == LexAlpha || lexcode[c] == LexDigit) { *p = c; c = getc(yyin); p++; } *p = '\0'; if (!feof(yyin)) ungetc(c, yyin); latexkeys *l = in_word_set (yytext, strlen(yytext)); if (l) { if (l->token == LM_TK_BEGIN || l->token == LM_TK_END) { int i; LexGetArg('{'); // for (i= 0; i<5 && strncmp(yytext, latex_mathenv[i], // strlen(latex_mathenv[i])); i++); for (i= 0; i<6 && strcmp(yytext, latex_mathenv[i]); i++); yylval.i = i; } else if (l->token == LM_TK_SPACE) yylval.i = l->id; else yylval.l = l; return l->token; } else { yylval.s = yytext; return LM_TK_UNDEF; } } } } return 0; } int parse_align(char * hor, char *) { int nc = 0; for (char * c = hor; c && *c > ' '; ++c) nc++; return nc; } // Accent hacks only for 0.12. Stolen from Cursor. int accent = 0; int nestaccent[8]; void setAccent(int ac) { if (ac > 0 && accent < 8) { nestaccent[accent++] = ac; } else accent = 0; // consumed! } MathedInset * doAccent(byte c, MathedTextCodes t) { MathedInset * ac = 0; for (int i= accent-1; i>= 0; i--) { if (i == accent-1) ac = new MathAccentInset(c, t, nestaccent[i]); else ac = new MathAccentInset(ac, nestaccent[i]); } accent = 0; // consumed! return ac; } MathedInset * doAccent(MathedInset * p) { MathedInset * ac = 0; for (int i= accent-1; i>= 0; i--) { if (i == accent-1) ac = new MathAccentInset(p, nestaccent[i]); else ac = new MathAccentInset(ac, nestaccent[i]); } accent = 0; // consumed! return ac; } LyxArrayBase * mathed_parse(unsigned flags, LyxArrayBase * array, MathParInset ** mtx) { int t = yylex(), tprev = 0; bool panic = false; static int plevel = -1; static int size = LM_ST_TEXT; MathedTextCodes varcode = LM_TC_VAR; MathedInset* binset = 0; static MathMacroTemplate *macro= 0; int brace = 0; int acc_brace = 0; int acc_braces[8]; MathParInset * mt = (mtx) ? *mtx: 0;//(MathParInset*)0; MathedRowSt * crow = (mt) ? mt->getRowSt(): 0; ++plevel; if (!array) array = new LyxArrayBase; MathedIter data(array); while (t) { if ((flags & FLAG_BRACE) && t != LM_TK_OPEN) { if ((flags & FLAG_BRACK_ARG) && t == '[') { } else { mathPrintError("Expected {. Maybe you forgot to enclose an argument in {}"); panic = true; break; } } MathedInsetTypes fractype = LM_OT_FRAC; switch (t) { case LM_TK_ALPHA: { if (accent) { data.Insert(doAccent(yylval.i, varcode)); } else data.Insert (yylval.i, varcode); //LM_TC_VAR); break; } case LM_TK_ARGUMENT: { if (macro) { data.Insert(macro->getMacroPar(yylval.i-1), LM_TC_INSET); } break; } case LM_TK_NEWCOMMAND: { int na = 0; LexGetArg('{'); // This name lives until quitting, for that reason // I didn't care on deleting explicitly. Later I will. char const *name = strnew(&yytext[1]); // ugly trick to be removed soon (lyx3) char c = getc(yyin); ungetc(c, yyin); if (c == '[') { LexGetArg('['); na = atoi(yytext); } macro = new MathMacroTemplate(name, na); flags = FLAG_BRACE|FLAG_BRACE_LAST; *mtx = macro; macro->SetData(array); break; } case LM_TK_SPECIAL: { data.Insert (yylval.i, LM_TC_SPECIAL); break; } case LM_TK_STR: { if (accent) { data.Insert(doAccent(yylval.i, LM_TC_CONST)); } else data.Insert (yylval.i, LM_TC_CONST); break; } case LM_TK_OPEN: { brace++; if (accent && tprev == LM_TK_ACCENT) { acc_braces[acc_brace++] = brace; break; } if (flags & FLAG_BRACE_OPT) { flags &= ~FLAG_BRACE_OPT; flags |= FLAG_BRACE; } if (flags & FLAG_BRACE) flags &= ~FLAG_BRACE; else { data.Insert ('{', LM_TC_TEX); } break; } case LM_TK_CLOSE: { brace--; if (brace < 0) { mathPrintError("Unmatching braces"); panic = true; break; } if (acc_brace && brace == acc_braces[acc_brace-1]-1) { acc_brace--; break; } if (flags & FLAG_BRACE_FONT) { varcode = LM_TC_VAR; yy_mtextmode = false; flags &= ~FLAG_BRACE_FONT; break; } if (brace == 0 && (flags & FLAG_BRACE_LAST)) { plevel--; return array; } else { data.Insert ('}', LM_TC_TEX); } break; } case '[': { if (flags & FLAG_BRACK_ARG) { flags &= ~FLAG_BRACK_ARG; char rg= LexGetArg('['); if (rg!= ']') { mathPrintError("Expected ']'"); panic = true; break; } // if (arg) strcpy(arg, yytext); } else data.Insert ('['); break; } case ']': { if (flags & FLAG_BRACK_END) { plevel--; return array; } else data.Insert (']'); break; } case '^': { MathParInset *p = new MathParInset(size, "", LM_OT_SCRIPT); LyxArrayBase * ar = mathed_parse(FLAG_BRACE_OPT|FLAG_BRACE_LAST, 0); p->SetData(ar); // lyxerr << "UP[" << p->GetStyle() << "]" << endl; data.Insert (p, LM_TC_UP); break; } case '_': { MathParInset * p = new MathParInset(size, "", LM_OT_SCRIPT); LyxArrayBase * ar = mathed_parse(FLAG_BRACE_OPT|FLAG_BRACE_LAST, 0); p->SetData(ar); data.Insert (p, LM_TC_DOWN); break; } case LM_TK_LIMIT: { if (binset) { binset->SetLimits((bool)(yylval.l->id)); binset = 0; } break; } case '&': // Tab { if ((flags & FLAG_END) && mt && data.getCol()GetColumns()-1) { data.setNumCols(mt->GetColumns()); data.Insert('T', LM_TC_TAB); } else mathPrintError("Unexpected tab"); // debug info. [made that conditional -JMarc] if (lyxerr.debugging(Debug::MATHED)) lyxerr << data.getCol() << " " << mt->GetColumns() << endl; break; } case LM_TK_NEWLINE: { if (mt && (flags & FLAG_END)) { if (mt->Permit(LMPF_ALLOW_CR)) { if (crow) { crow->setNext(new MathedRowSt(mt->GetColumns()+1)); // this leaks crow = crow->getNext(); } data.Insert('K', LM_TC_CR); } else mathPrintError("Unexpected newline"); } break; } case LM_TK_BIGSYM: { binset = new MathBigopInset(yylval.l->name, yylval.l->id); data.Insert(binset); break; } case LM_TK_SYM: { if (yylval.l->id < 256) { MathedTextCodes tc = MathIsBOPS(yylval.l->id) ? LM_TC_BOPS: LM_TC_SYMB; if (accent) { data.Insert(doAccent(yylval.l->id, tc)); } else data.Insert (yylval.l->id, tc); } else { MathFuncInset *bg = new MathFuncInset(yylval.l->name); if (accent) { data.Insert(doAccent(bg)); } else data.Insert(bg, true); } break; } case LM_TK_BOP: { if (accent) { data.Insert(doAccent(yylval.i, LM_TC_BOP)); } else data.Insert (yylval.i, LM_TC_BOP); break; } case LM_TK_STY: { if (mt) { mt->UserSetSize(yylval.l->id); } break; } case LM_TK_SPACE: { if (yylval.i>= 0) { MathSpaceInset * sp = new MathSpaceInset(yylval.i); data.Insert(sp); } break; } case LM_TK_DOTS: { MathDotsInset * p = new MathDotsInset(yylval.l->name, yylval.l->id); data.Insert(p); break; } case LM_TK_STACK: fractype = LM_OT_STACKREL; case LM_TK_FRAC: { MathFracInset * fc = new MathFracInset(fractype); LyxArrayBase * num = mathed_parse(FLAG_BRACE|FLAG_BRACE_LAST); LyxArrayBase * den = mathed_parse(FLAG_BRACE|FLAG_BRACE_LAST); fc->SetData(num, den); data.Insert(fc, LM_TC_ACTIVE_INSET); break; } case LM_TK_SQRT: { MathParInset * rt; char c = getc(yyin); if (c == '[') { rt = new MathRootInset(size); rt->setArgumentIdx(0); rt->SetData(mathed_parse(FLAG_BRACK_END, 0, &rt)); rt->setArgumentIdx(1); } else { ungetc(c, yyin); rt = new MathSqrtInset(size); } rt->SetData(mathed_parse(FLAG_BRACE|FLAG_BRACE_LAST, 0, &rt)); data.Insert(rt, LM_TC_ACTIVE_INSET); break; } case LM_TK_LEFT: { int lfd, rgd; lfd = yylex(); if (lfd == LM_TK_SYM || lfd == LM_TK_STR || lfd == LM_TK_BOP|| lfd == LM_TK_SPECIAL) lfd = (lfd == LM_TK_SYM) ? yylval.l->id: yylval.i; // lyxerr << "L[" << lfd << " " << lfd << "]"; LyxArrayBase * a = mathed_parse(FLAG_RIGHT); rgd = yylex(); // lyxerr << "R[" << rgd << "]"; if (rgd == LM_TK_SYM || rgd == LM_TK_STR || rgd == LM_TK_BOP || rgd == LM_TK_SPECIAL) rgd = (rgd == LM_TK_SYM) ? yylval.l->id: yylval.i; MathDelimInset * dl = new MathDelimInset(lfd, rgd); dl->SetData(a); data.Insert(dl, LM_TC_ACTIVE_INSET); // lyxerr << "RL[" << lfd << " " << rgd << "]"; break; } case LM_TK_RIGHT: { if (flags & FLAG_RIGHT) { plevel--; return array; } else { mathPrintError("Unmatched right delimiter"); // panic = true; } break; } case LM_TK_FONT: { varcode = (MathedTextCodes)yylval.l->id; yy_mtextmode = bool(varcode == LM_TC_TEXTRM); flags |= (FLAG_BRACE|FLAG_BRACE_FONT); break; } case LM_TK_WIDE: { MathDecorationInset * sq = new MathDecorationInset(yylval.l->id, size); sq->SetData(mathed_parse(FLAG_BRACE|FLAG_BRACE_LAST)); data.Insert(sq, LM_TC_ACTIVE_INSET); break; } case LM_TK_ACCENT: setAccent(yylval.l->id); break; case LM_TK_NONUM: { if (crow) crow->setNumbered(false); break; } case LM_TK_PMOD: case LM_TK_FUNC: { MathedInset * bg = new MathFuncInset(yylval.l->name); if (accent) { data.Insert(t); } else data.Insert(bg); break; } case LM_TK_FUNCLIM: { data.Insert(new MathFuncInset(yylval.l->name, LM_OT_FUNCLIM)); break; } case LM_TK_UNDEF: { MathMacro * p = MathMacroTable::mathMTable.getMacro(yylval.s); if (p) { if (accent) data.Insert(doAccent(p), p->getTCode()); else data.Insert(p, p->getTCode()); for (int i= 0; p->setArgumentIdx(i); i++) p->SetData(mathed_parse(FLAG_BRACE|FLAG_BRACE_LAST)); } else { MathedInset * q = new MathFuncInset(yylval.s, LM_OT_UNDEF); if (accent) { data.Insert(doAccent(q)); } else { data.Insert(q); } } break; } case LM_TK_END: { if (mathed_env != yylval.i && yylval.i!= LM_EN_ARRAY) mathPrintError("Unmatched environment"); // debug info [made that conditional -JMarc] if (lyxerr.debugging(Debug::MATHED)) lyxerr << "[" << yylval.i << "]" << endl; plevel--; if (mt) { // && (flags & FLAG_END)) { mt->SetData(array); array = 0; } return array; } case LM_TK_BEGIN: { if (yylval.i == LM_EN_ARRAY) { char ar[120], ar2[8]; ar[0] = ar2[0] = '\0'; char rg= LexGetArg(0); if (rg == ']') { strcpy(ar2, yytext); rg = LexGetArg('{'); } strcpy(ar, yytext); int nc = parse_align(ar, ar2); MathParInset * mm = new MathMatrixInset(nc, 0); mm->SetAlign(ar2[0], ar); data.Insert(mm, LM_TC_ACTIVE_INSET); mathed_parse(FLAG_END, mm->GetData(), &mm); } else if (yylval.i>= LM_EN_INTEXT && yylval.i<= LM_EN_EQNARRAY) { if (plevel!= 0) { mathPrintError("Misplaced environment"); break; } if (!mt) { mathPrintError("0 paragraph."); panic = true; } mathed_env = yylval.i; if (mathed_env>= LM_EN_DISPLAY) { size = LM_ST_DISPLAY; if (mathed_env>LM_EN_EQUATION) { mt = new MathMatrixInset(3, -1); mt->SetAlign(' ', "rcl"); if (mtx) *mtx = mt; flags |= FLAG_END; // data.Insert(' ', LM_TC_TAB); // data.Insert(' ', LM_TC_TAB); // data.Reset(); } mt->SetStyle(size); mt->SetType(mathed_env); crow = mt->getRowSt(); } #ifdef DEBUG lyxerr << "MATH BEGIN[" << mathed_env << "]" << endl; #endif } else { // lyxerr << "MATHCRO[" << yytext << "]"; MathMacro * p = MathMacroTable::mathMTable.getMacro(yytext); if (p) { data.Insert(p, p->getTCode()); p->setArgumentIdx(0); mathed_parse(FLAG_END, p->GetData(), (MathParInset**)&p); // for (int i= 0; p->setArgumentIdx(i); i++) // p->SetData(mathed_parse(FLAG_BRACE|FLAG_BRACE_LAST)); } else mathPrintError("Unrecognized environment"); } break; } case LM_TK_MACRO: { MathedInset * p = MathMacroTable::mathMTable.getMacro(yylval.l->name); if (p) { if (accent) { data.Insert(doAccent(p)); } else data.Insert(p, ((MathMacro*)p)->getTCode()); } break; } case LM_TK_LABEL: { char rg = LexGetArg('\0', true); if (rg != '}') { mathPrintError("Expected '{'"); // debug info lyxerr << "[" << yytext << "]" << endl; panic = true; break; } if (crow) { // This is removed by crow's destructor. Bad design? yes, this // will be changed after 0.12 crow->setLabel(strnew(yytext)); } else { // where is this math_label free'ed? // Supposedly in ~formula, another bad hack, // give me some time please. mathed_label = strnew(yytext); } #ifdef DEBUG lyxerr << "Label[" << mathed_label << "]" << endl; #endif break; } default: mathPrintError("Unrecognized token"); // debug info lyxerr << "[" << t << " " << yytext << "]" << endl; break; } tprev = t; if (panic) { lyxerr << " Math Panic, expect problems!" << endl; // Search for the end command. do t = yylex (); while (t != LM_TK_END && t); } else t = yylex (); if ((flags & FLAG_BRACE_OPT)/* && t!= '^' && t!= '_'*/) { flags &= ~FLAG_BRACE_OPT; //data.Insert (LM_TC_CLOSE); break; } } plevel--; return array; } void mathed_parser_file(FILE * file, int lineno) { yyin = file; yylineno = lineno; if (!MathMacroTable::built) MathMacroTable::mathMTable.builtinMacros(); } int mathed_parser_lineno() { return yylineno; }