/*
* File: math_parser.C
* Purpose: Parser for mathed
- * Author: Alejandro Aguilar Sierra <asierra@servidor.unam.mx>
+ * Author: Alejandro Aguilar Sierra <asierra@servidor.unam.mx>
* Created: January 1996
* Description: Parse LaTeX2e math mode code.
*
* the GNU General Public Licence version 2 or later.
*/
-/*
+/*
If someone desperately needs partial "structures" (such as a few cells of
an array inset or similar) (s)he could uses the following hack as starting
#include "math_inset.h"
#include "math_arrayinset.h"
#include "math_braceinset.h"
-#include "math_casesinset.h"
+#include "math_boxinset.h"
#include "math_charinset.h"
#include "math_deliminset.h"
#include "math_factory.h"
#include "math_sqrtinset.h"
#include "math_scriptinset.h"
#include "math_specialcharinset.h"
-#include "math_splitinset.h"
#include "math_sqrtinset.h"
#include "math_support.h"
+#include "math_xyarrowinset.h"
#include "lyxlex.h"
#include "debug.h"
-
+#include "support/LAssert.h"
#include "support/lstrings.h"
#include <cctype>
using std::endl;
using std::stack;
using std::fill;
+using std::vector;
+
+//#define FILEDEBUG
namespace {
// These are TeX's catcodes
enum CatCode {
- catEscape, // 0 backslash
+ catEscape, // 0 backslash
catBegin, // 1 {
catEnd, // 2 }
catMath, // 3 $
catParameter, // 6 #
catSuper, // 7 ^
catSub, // 8 _
- catIgnore, // 9
+ catIgnore, // 9
catSpace, // 10 space
catLetter, // 11 a-zA-Z
catOther, // 12 none of the above
catInvalid // 15 <delete>
};
-CatCode theCatcode[256];
+CatCode theCatcode[256];
inline CatCode catcode(unsigned char c)
fill(theCatcode + 'a', theCatcode + 'z' + 1, catLetter);
fill(theCatcode + 'A', theCatcode + 'Z' + 1, catLetter);
- theCatcode['\\'] = catEscape;
- theCatcode['{'] = catBegin;
- theCatcode['}'] = catEnd;
- theCatcode['$'] = catMath;
- theCatcode['&'] = catAlign;
- theCatcode['\n'] = catNewline;
- theCatcode['#'] = catParameter;
- theCatcode['^'] = catSuper;
- theCatcode['_'] = catSub;
- theCatcode['\7f'] = catIgnore;
- theCatcode[' '] = catSpace;
- theCatcode['\t'] = catSpace;
- theCatcode['\r'] = catSpace;
- theCatcode['~'] = catActive;
- theCatcode['%'] = catComment;
+ theCatcode['\\'] = catEscape;
+ theCatcode['{'] = catBegin;
+ theCatcode['}'] = catEnd;
+ theCatcode['$'] = catMath;
+ theCatcode['&'] = catAlign;
+ theCatcode['\n'] = catNewline;
+ theCatcode['#'] = catParameter;
+ theCatcode['^'] = catSuper;
+ theCatcode['_'] = catSub;
+ theCatcode['\7f'] = catIgnore;
+ theCatcode[' '] = catSpace;
+ theCatcode['\t'] = catSpace;
+ theCatcode['\r'] = catSpace;
+ theCatcode['~'] = catActive;
+ theCatcode['%'] = catComment;
}
///
bool isCR() const;
-private:
+private:
///
string cs_;
///
//bool operator==(Token const & s, Token const & t)
//{
// return s.character() == t.character()
-// && s.cat() == t.cat() && s.cs() == t.cs();
+// && s.cat() == t.cat() && s.cs() == t.cs();
//}
//
//bool operator!=(Token const & s, Token const & t)
///
bool parse_lines(MathAtom & t, bool numbered, bool outmost);
/// parses {... & ... \\ ... & ... }
- bool parse_lines2(MathAtom & t);
+ bool parse_lines2(MathAtom & t, bool braced);
+ /// dump contents to screen
+ void dump() const;
private:
///
///
void tokenize(string const & s);
///
+ void skipSpaceTokens(istream & is, char c);
+ ///
void push_back(Token const & t);
///
void pop_back();
///
int lineno_;
///
- std::vector<Token> tokens_;
+ vector<Token> tokens_;
///
unsigned pos_;
///
Token const & Parser::getToken()
{
static const Token dummy;
- //lyxerr << "looking at token " << tokens_[pos_] << '\n';
+ //lyxerr << "looking at token " << tokens_[pos_] << " pos: " << pos_ << '\n';
return good() ? tokens_[pos_++] : dummy;
}
}
-string Parser::getArg(char lf, char rg)
+string Parser::getArg(char left, char right)
{
+ skipSpaces();
+
string result;
char c = getChar();
- if (c != lf)
+ if (c != left)
putback();
- else
- while ((c = getChar()) != rg && good())
+ else
+ while ((c = getChar()) != right && good())
result += c;
return result;
}
+void Parser::skipSpaceTokens(istream & is, char c)
+{
+ // skip trailing spaces
+ while (catcode(c) == catSpace || catcode(c) == catNewline)
+ if (!is.get(c))
+ break;
+ //lyxerr << "putting back: " << c << "\n";
+ is.putback(c);
+}
+
+
void Parser::tokenize(string const & buffer)
{
static bool init_done = false;
-
+
if (!init_done) {
catInit();
init_done = true;
char c;
while (is.get(c)) {
+ //lyxerr << "reading c: " << c << "\n";
switch (catcode(c)) {
case catNewline: {
- ++lineno_;
+ ++lineno_;
is.get(c);
if (catcode(c) == catNewline)
; //push_back(Token("par"));
else {
push_back(Token(' ', catSpace));
- is.putback(c);
+ is.putback(c);
}
break;
}
case catComment: {
while (is.get(c) && catcode(c) != catNewline)
;
- ++lineno_;
+ ++lineno_;
break;
}
case catEscape: {
is.get(c);
- string s(1, c);
- if (catcode(c) == catLetter) {
- while (is.get(c) && catcode(c) == catLetter)
- s += c;
- if (catcode(c) == catSpace)
- while (is.get(c) && catcode(c) == catSpace)
- ;
- is.putback(c);
- }
- push_back(Token(s));
+ if (!is) {
+ error("unexpected end of input");
+ } else {
+ string s(1, c);
+ if (catcode(c) == catLetter) {
+ // collect letters
+ while (is.get(c) && catcode(c) == catLetter)
+ s += c;
+ skipSpaceTokens(is, c);
+ }
+ push_back(Token(s));
+ }
+ break;
+ }
+
+ case catSuper:
+ case catSub: {
+ push_back(Token(c, catcode(c)));
+ is.get(c);
+ skipSpaceTokens(is, c);
+ break;
+ }
+
+ case catIgnore: {
+ lyxerr << "ignoring a char: " << int(c) << "\n";
break;
}
}
}
-#if 0
+#ifdef FILEDEBUG
+ dump();
+#endif
+}
+
+
+void Parser::dump() const
+{
lyxerr << "\nTokens: ";
- for (unsigned i = 0; i < tokens_.size(); ++i)
+ for (unsigned i = 0; i < tokens_.size(); ++i) {
+ if (i == pos_)
+ lyxerr << " <#> ";
lyxerr << tokens_[i];
+ }
lyxerr << "\n";
-#endif
}
-void Parser::error(string const & msg)
+void Parser::error(string const & msg)
{
lyxerr << "Line ~" << lineno_ << ": Math parse error: " << msg << endl;
+ dump();
//exit(1);
}
bool Parser::parse_lines(MathAtom & t, bool numbered, bool outmost)
-{
+{
MathGridInset * p = t->asGridInset();
if (!p) {
+ dump();
lyxerr << "error in Parser::parse_lines() 1\n";
return false;
}
curr_label_.erase();
// reading a row
- for (MathInset::col_type col = 0; col < p->ncols(); ++col) {
- //lyxerr << "reading cell " << row << " " << col << "\n";
-
+ for (MathInset::col_type col = 0; true; ++col) {
+ //lyxerr << "reading cell " << row << " " << col << " "
+ // << p->ncols() << "\n";
+ //lyxerr << "ncols: " << p->ncols() << "\n";
+
+ if (col >= p->ncols()) {
+ //lyxerr << "adding col " << col << "\n";
+ p->addCol(p->ncols());
+ }
+
MathArray & ar = p->cell(col + row * p->ncols());
parse_into(ar, FLAG_BLOCK);
// remove 'unnecessary' braces:
if (ar.size() == 1 && ar.back()->asBraceInset())
ar = ar.back()->asBraceInset()->cell(0);
+ //lyxerr << "ar: " << ar << "\n";
// break if cell is not followed by an ampersand
if (nextToken().cat() != catAlign) {
// << row << " " << col << "\n";
break;
}
-
+
// skip the ampersand
getToken();
}
}
-bool Parser::parse_lines2(MathAtom & t)
-{
+bool Parser::parse_lines2(MathAtom & t, bool braced)
+{
MathGridInset * p = t->asGridInset();
if (!p) {
lyxerr << "error in Parser::parse_lines() 1\n";
return false;
}
- skipBegin();
-
for (int row = 0; true; ++row) {
// reading a row
for (MathInset::col_type col = 0; true; ++col) {
//lyxerr << "reading cell " << row << " " << col << " " << p->ncols() << "\n";
-
+
if (col >= p->ncols()) {
//lyxerr << "adding col " << col << "\n";
p->addCol(p->ncols());
//lyxerr << "less cells read than normal in row/col: " << row << " " << col << "\n";
break;
}
-
+
// skip the ampersand
getToken();
}
getToken();
}
- // we are finished if the next token is an '}'
- if (nextToken().cat() == catEnd) {
- // skip the end-token
- getToken();
- // leave the 'read a line'-loop
- break;
+ // we are finished if the next token is the one we expected
+ // skip the end-token
+ // leave the 'read a line'-loop
+ if (braced) {
+ if (nextToken().cat() == catEnd) {
+ getToken();
+ break;
+ }
+ } else {
+ if (nextToken().cs() == "end") {
+ getToken();
+ getArg('{','}');
+ break;
+ }
}
// otherwise, we have to start a new row
+
bool Parser::parse_macro(string & name)
{
+ int nargs = 0;
name = "{error}";
skipSpaces();
- if (getToken().cs() != "newcommand") {
- lyxerr << "\\newcommand expected\n";
- return false;
- }
+ if (nextToken().cs() == "def") {
- if (getToken().cat() != catBegin) {
- lyxerr << "'{' in \\newcommand expected (1)\n";
- return false;
- }
+ getToken();
+ name = getToken().cs();
- name = getToken().cs();
+ string pars;
+ while (good() && nextToken().cat() != catBegin)
+ pars += getToken().cs();
+
+ if (!good()) {
+ lyxerr << "bad stream in parse_macro\n";
+ dump();
+ return false;
+ }
+
+ //lyxerr << "read \\def parameter list '" << pars << "'\n";
+ if (!pars.empty()) {
+ lyxerr << "can't handle non-empty parameter lists\n";
+ dump();
+ return false;
+ }
- if (getToken().cat() != catEnd) {
- lyxerr << "'}' expected\n";
+ } else if (nextToken().cs() == "newcommand") {
+
+ getToken();
+
+ if (getToken().cat() != catBegin) {
+ lyxerr << "'{' in \\newcommand expected (1) \n";
+ dump();
+ return false;
+ }
+
+ name = getToken().cs();
+
+ if (getToken().cat() != catEnd) {
+ lyxerr << "'}' expected\n";
+ return false;
+ }
+
+ string arg = getArg('[', ']');
+ if (!arg.empty())
+ nargs = atoi(arg.c_str());
+
+ } else {
+ lyxerr << "\\newcommand or \\def expected\n";
return false;
}
- string arg = getArg('[', ']');
- int narg = arg.empty() ? 0 : atoi(arg.c_str());
if (getToken().cat() != catBegin) {
- lyxerr << "'{' in \\newcommand expected (2)\n";
+ lyxerr << "'{' in macro definition expected (2)\n";
return false;
}
- MathArray ar;
- parse_into(ar, FLAG_BRACE_LAST);
+ MathArray ar1;
+ parse_into(ar1, FLAG_BRACE_LAST);
// we cannot handle recursive stuff at all
MathArray test;
test.push_back(createMathInset(name));
- if (ar.contains(test)) {
+ if (ar1.contains(test)) {
lyxerr << "we cannot handle recursive macros at all.\n";
return false;
}
- MathMacroTable::create(name, narg, ar);
+ MathArray ar2;
+ parse_into(ar2, FLAG_ITEM);
+
+ MathMacroTable::create(name, nargs, ar1, ar2);
return true;
}
{
parse_into1(array, flags, code);
// remove 'unnecessary' braces:
- if (array.size() == 1 && array.back()->asBraceInset())
+ if (array.size() == 1 && array.back()->asBraceInset()) {
+ lyxerr << "extra braces removed\n";
array = array.back()->asBraceInset()->cell(0);
+ }
}
while (good()) {
Token const & t = getToken();
-
- //lyxerr << "t: " << t << " flags: " << flags << "\n";
- //array.dump(lyxerr);
- //lyxerr << "\n";
+
+#ifdef FILEDEBUG
+ lyxerr << "t: " << t << " flags: " << flags << "\n";
+ //array.dump();
+ lyxerr << "\n";
+#endif
if (flags & FLAG_ITEM) {
+ if (t.cat() == catSpace)
+ continue;
+
flags &= ~FLAG_ITEM;
- if (t.cat() == catBegin) {
+ if (t.cat() == catBegin) {
// skip the brace and collect everything to the next matching
// closing brace
flags |= FLAG_BRACE_LAST;
continue;
- } else {
- // handle only this single token, leave the loop if done
- flags |= FLAG_LEAVE;
}
+
+ // handle only this single token, leave the loop if done
+ flags |= FLAG_LEAVE;
}
if (flags & FLAG_BLOCK) {
// ignore braces around simple items
if ((ar.size() == 1 && !ar.front()->needsBraces()
|| (ar.size() == 2 && !ar.front()->needsBraces()
- && ar.back()->asScriptInset()))
+ && ar.back()->asScriptInset()))
|| (ar.size() == 0 && array.size() == 0))
{
array.push_back(ar);
else if (t.cat() == catEnd) {
if (flags & FLAG_BRACE_LAST)
return;
+ dump();
lyxerr << "found '}' unexpectedly, array: '" << array << "'\n";
//lyxerr << "found '}' unexpectedly\n";
+ lyx::Assert(0);
add(array, '}', LM_TC_TEX);
}
-
+
else if (t.cat() == catAlign) {
lyxerr << "found tab unexpectedly, array: '" << array << "'\n";
//lyxerr << "found tab unexpectedly\n";
add(array, '&', LM_TC_TEX);
}
-
+
else if (t.cat() == catSuper || t.cat() == catSub) {
bool up = (t.cat() == catSuper);
- MathScriptInset * p = 0;
- if (array.size())
+ MathScriptInset * p = 0;
+ if (array.size())
p = array.back()->asScriptInset();
if (!p || p->has(up)) {
array.push_back(MathAtom(new MathScriptInset(up)));
else if (t.cat() == catOther)
add(array, t.character(), code);
-
+
//
// control sequences
- //
+ //
else if (t.cs() == "protect")
- // ignore \\protect, will be re-added during output
+ // ignore \\protect, will be re-added during output
;
else if (t.cs() == "end")
lyxerr << "found newline unexpectedly\n";
array.push_back(createMathInset("\\"));
}
-
+
else if (t.cs() == "limits")
limits = 1;
-
+
else if (t.cs() == "nolimits")
limits = -1;
-
+
else if (t.cs() == "nonumber")
curr_num_ = false;
parse_into(array.back()->cell(0), FLAG_ITEM);
}
}
-
+
else if (t.cs() == "left") {
string l = getToken().asString();
MathArray ar;
dl->cell(0) = ar;
array.push_back(dl);
}
-
+
else if (t.cs() == "right") {
if (!(flags & FLAG_RIGHT)) {
//lyxerr << "got so far: '" << array << "'\n";
}
else if (t.cs() == "begin") {
- string const name = getArg('{', '}');
- if (name == "array") {
+ string const name = getArg('{', '}');
+ if (name == "array" || name == "subarray") {
string const valign = getArg('[', ']') + 'c';
string const halign = getArg('{', '}');
- array.push_back(MathAtom(new MathArrayInset(valign[0], halign)));
- parse_lines(array.back(), false, false);
- } else if (name == "split") {
- array.push_back(MathAtom(new MathSplitInset(1)));
+ array.push_back(MathAtom(new MathArrayInset(name, valign[0], halign)));
parse_lines(array.back(), false, false);
- } else if (name == "cases") {
- array.push_back(MathAtom(new MathCasesInset));
+ } else if (name == "split" || name == "cases" ||
+ name == "gathered" || name == "aligned") {
+ array.push_back(createMathInset(name));
parse_lines(array.back(), false, false);
- } else
- lyxerr << "unknow math inset begin '" << name << "'\n";
+ } else if (name == "matrix" || name == "pmatrix" || name == "bmatrix" ||
+ name == "vmatrix" || name == "Vmatrix") {
+ array.push_back(createMathInset(name));
+ parse_lines2(array.back(), false);
+ } else
+ lyxerr << "unknow math inset begin '" << name << "'\n";
}
-
+
else if (t.cs() == "kern") {
#ifdef WITH_WARNINGS
#warning A hack...
while (1) {
Token const & t = getToken();
if (!good()) {
- putback();
+ putback();
break;
}
s += t.character();
array.push_back(MathAtom(new MathKernInset(s)));
}
+/*
+ else if (t.cs() == "lyxkern") {
+ MathAtom p = createMathInset(t.cs());
+ parse_into(p->cell(0), flags, code);
+ array.push_back(p);
+ }
+*/
+
else if (t.cs() == "label") {
curr_label_ = getArg('{', '}');
}
return;
}
+ else if (t.cs() == "substack") {
+ array.push_back(createMathInset(t.cs()));
+ skipBegin();
+ parse_lines2(array.back(), true);
+ }
+
else if (t.cs() == "xymatrix") {
array.push_back(createMathInset(t.cs()));
- parse_lines2(array.back());
+ skipBegin();
+ parse_lines2(array.back(), true);
}
- // Disabled
#if 0
- else if (0 && t.cs() == "ar") {
- array.push_back(createMathInset(t.cs()));
- parse_lines2(array.back());
+ // Disabled
+ else if (1 && t.cs() == "ar") {
+ MathXYArrowInset * p = new MathXYArrowInset;
+
+ // try to read target
+ char c = getChar();
+ if (c == '[') {
+ parse_into(p->cell(0), FLAG_BRACK_END);
+ //lyxerr << "read target: " << p->cell(0) << "\n";
+ } else {
+ putback();
+ }
+
+ // try to read label
+ if (nextToken().cat() == catSuper || nextToken().cat() == catSub) {
+ p->up_ = nextToken().cat() == catSuper;
+ getToken();
+ parse_into(p->cell(1), FLAG_ITEM);
+ //lyxerr << "read label: " << p->cell(1) << "\n";
+ }
+
+ array.push_back(MathAtom(p));
+ //lyxerr << "read array: " << array << "\n";
}
+#endif
- else if (t.cs() == "mbox") {
- array.push_back(createMathInset(t.cs()));
+#if 0
+ else if (t.cs() == "mbox" || t.cs() == "text") {
+ //array.push_back(createMathInset(t.cs()));
+ array.push_back(MathAtom(new MathBoxInset(t.cs())));
// slurp in the argument of mbox
-
+
MathBoxInset * p = array.back()->asBoxInset();
//lyx::assert(p);
}
#endif
-
+
else if (t.cs().size()) {
latexkeys const * l = in_word_set(t.cs());
if (l) {
//lyxerr << "starting font\n";
//CatCode catSpaceSave = theCatcode[' '];
//if (l->id == LM_TC_TEXTRM) {
- // // temporarily change catcode
- // theCatcode[' '] = catLetter;
+ // // temporarily change catcode
+ // theCatcode[' '] = catLetter;
//}
MathArray ar;
else {
MathAtom p = createMathInset(t.cs());
- for (MathInset::idx_type i = 0; i < p->nargs(); ++i)
+ for (MathInset::idx_type i = 0; i < p->nargs(); ++i)
parse_into(p->cell(i), FLAG_ITEM);
array.push_back(p);
}
if (panic) {
lyxerr << " Math Panic, expect problems!\n";
- // Search for the end command.
+ // Search for the end command.
Token t;
do {
t = getToken();