X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=src%2Fmathed%2FMathParser.C;h=cb208ffca3c78798280fe2cb0b0a89a2f54185af;hb=8b7cc1b498b9dc1925647d4a7e7721a569e42dc5;hp=b8268eb72a9647ef18a0498c5152c0962758a777;hpb=368f6a53a87eb5997433ecd0f51e854c99360d28;p=lyx.git diff --git a/src/mathed/MathParser.C b/src/mathed/MathParser.C index b8268eb72a..cb208ffca3 100644 --- a/src/mathed/MathParser.C +++ b/src/mathed/MathParser.C @@ -69,6 +69,9 @@ following hack as starting point to write some macros: #include + +namespace lyx { + using std::endl; using std::fill; @@ -85,7 +88,7 @@ using std::vector; namespace { -InsetMath::mode_type asMode(InsetMath::mode_type oldmode, string const & str) +InsetMath::mode_type asMode(InsetMath::mode_type oldmode, docstring const & str) { //lyxerr << "handling mode: '" << str << "'" << endl; if (str == "mathmode") @@ -96,9 +99,9 @@ InsetMath::mode_type asMode(InsetMath::mode_type oldmode, string const & str) } -bool stared(string const & s) +bool stared(docstring const & s) { - string::size_type const n = s.size(); + size_t const n = s.size(); return n && s[n - 1] == '*'; } @@ -109,7 +112,7 @@ bool stared(string const & s) * environments like "equation" that have a fixed number of rows. */ bool addRow(InsetMathGrid & grid, InsetMathGrid::row_type & cellrow, - string const & vskip) + docstring const & vskip) { ++cellrow; if (cellrow == grid.nrows()) { @@ -125,12 +128,12 @@ bool addRow(InsetMathGrid & grid, InsetMathGrid::row_type & cellrow, --cellrow; lyxerr << "ignoring extra row"; if (!vskip.empty()) - lyxerr << " with extra space " << vskip; + lyxerr << " with extra space " << to_utf8(vskip); lyxerr << '.' << endl; return false; } } - grid.vcrskip(LyXLength(vskip), cellrow - 1); + grid.vcrskip(LyXLength(to_utf8(vskip)), cellrow - 1); return true; } @@ -210,11 +213,19 @@ enum CatCode { catInvalid // 15 }; -CatCode theCatcode[256]; +CatCode theCatcode[128]; -inline CatCode catcode(unsigned char c) +inline CatCode catcode(lyx::char_type c) { + /* The fact that we use unicode internally does not change Knuth's TeX + engine. It is still 7bit only, not even latin1 or something like that. + Therefore, the catcode table needs only to have 128 entries. + Everything not in that range is catOther. + */ + if (c >= 128) + return catOther; + return theCatcode[c]; } @@ -245,26 +256,26 @@ public: /// Token() : cs_(), char_(0), cat_(catIgnore) {} /// - Token(char c, CatCode cat) : cs_(), char_(c), cat_(cat) {} + Token(char_type c, CatCode cat) : cs_(), char_(c), cat_(cat) {} /// - Token(string const & cs) : cs_(cs), char_(0), cat_(catIgnore) {} + Token(docstring const & cs) : cs_(cs), char_(0), cat_(catIgnore) {} /// - string const & cs() const { return cs_; } + docstring const & cs() const { return cs_; } /// CatCode cat() const { return cat_; } /// - char character() const { return char_; } + char_type character() const { return char_; } /// - string asString() const { return cs_.size() ? cs_ : string(1, char_); } + docstring asString() const { return cs_.size() ? cs_ : docstring(1, char_); } /// - string asInput() const { return cs_.size() ? '\\' + cs_ : string(1, char_); } + docstring asInput() const { return cs_.size() ? '\\' + cs_ : docstring(1, char_); } private: /// - string cs_; + docstring cs_; /// - char char_; + char_type char_; /// CatCode cat_; }; @@ -309,19 +320,20 @@ private: /// void parse2(MathAtom & at, unsigned flags, mode_type mode, bool numbered); /// get arg delimited by 'left' and 'right' - string getArg(char left, char right); + docstring getArg(char_type left, char_type right); /// - char getChar(); + char_type getChar(); /// void error(string const & msg); + void error(docstring const & msg) { error(to_utf8(msg)); } /// dump contents to screen void dump() const; /// void tokenize(istream & is); /// - void tokenize(string const & s); + void tokenize(docstring const & s); /// - void skipSpaceTokens(istream & is, char c); + void skipSpaceTokens(idocstream & is, char_type c); /// void push_back(Token const & t); /// @@ -335,13 +347,13 @@ private: /// skips spaces if any void skipSpaces(); /// - void lex(string const & s); + void lex(docstring const & s); /// bool good() const; /// - string parse_verbatim_item(); + docstring parse_verbatim_item(); /// - string parse_verbatim_option(); + docstring parse_verbatim_option(); /// int lineno_; @@ -350,7 +362,7 @@ private: /// unsigned pos_; /// Stack of active environments - vector environments_; + vector environments_; }; @@ -422,7 +434,7 @@ bool Parser::good() const } -char Parser::getChar() +char_type Parser::getChar() { if (!good()) error("The input stream is not well..."); @@ -430,12 +442,12 @@ char Parser::getChar() } -string Parser::getArg(char left, char right) +docstring Parser::getArg(char_type left, char_type right) { skipSpaces(); - string result; - char c = getChar(); + docstring result; + char_type c = getChar(); if (c != left) putback(); @@ -447,7 +459,7 @@ string Parser::getArg(char left, char right) } -void Parser::skipSpaceTokens(istream & is, char c) +void Parser::skipSpaceTokens(idocstream & is, char_type c) { // skip trailing spaces while (catcode(c) == catSpace || catcode(c) == catNewline) @@ -462,7 +474,7 @@ void Parser::tokenize(istream & is) { // eat everything up to the next \end_inset or end of stream // and store it in s for further tokenization - string s; + std::string s; char c; while (is.get(c)) { s += c; @@ -476,15 +488,15 @@ void Parser::tokenize(istream & is) is.unget(); // tokenize buffer - tokenize(s); + tokenize(from_utf8(s)); } -void Parser::tokenize(string const & buffer) +void Parser::tokenize(docstring const & buffer) { - istringstream is(buffer, ios::in | ios::binary); + idocstringstream is(buffer, ios::in | ios::binary); - char c; + char_type c; while (is.get(c)) { //lyxerr << "reading c: " << c << endl; @@ -515,7 +527,7 @@ void Parser::tokenize(string const & buffer) if (!is) { error("unexpected end of input"); } else { - string s(1, c); + docstring s(1, c); if (catcode(c) == catLetter) { // collect letters while (is.get(c) && catcode(c) == catLetter) @@ -590,10 +602,10 @@ bool Parser::parse(MathAtom & at) } -string Parser::parse_verbatim_option() +docstring Parser::parse_verbatim_option() { skipSpaces(); - string res; + docstring res; if (nextToken().character() == '[') { Token t = getToken(); for (Token t = getToken(); t.character() != ']' && good(); t = getToken()) { @@ -608,10 +620,10 @@ string Parser::parse_verbatim_option() } -string Parser::parse_verbatim_item() +docstring Parser::parse_verbatim_item() { skipSpaces(); - string res; + docstring res; if (nextToken().cat() == catBegin) { Token t = getToken(); for (Token t = getToken(); t.cat() != catEnd && good(); t = getToken()) { @@ -812,9 +824,13 @@ void Parser::parse1(InsetMathGrid & grid, unsigned flags, cell->back() = MathAtom(new InsetMathScript(cell->back(), up)); InsetMathScript * p = cell->back().nucleus()->asScriptInset(); // special handling of {}-bases + // Test for empty brace inset, otherwise \xxx{\vec{H}}_{0} + // where \xxx is an unknown command gets misparsed to + // \xxx\vec{H}_{0}, and that is invalid LaTeX. // is this always correct? - if (p->nuc().size() == 1 - && p->nuc().back()->asBraceInset()) + if (p->nuc().size() == 1 && + p->nuc().back()->asBraceInset() && + p->nuc().back()->asBraceInset()->cell(0).empty()) p->nuc() = p->nuc().back()->asNestInset()->cell(0); parse(p->cell(p->idxOfScript(up)), FLAG_ITEM, mode); if (limits) { @@ -832,7 +848,7 @@ void Parser::parse1(InsetMathGrid & grid, unsigned flags, cell->push_back(MathAtom(new InsetMathChar(t.character()))); else if (t.cat() == catComment) { - string s; + docstring s; while (good()) { Token const & t = getToken(); if (t.cat() == catNewline) @@ -856,15 +872,15 @@ void Parser::parse1(InsetMathGrid & grid, unsigned flags, t.cs() == "newcommand" || t.cs() == "renewcommand") { - string const type = t.cs(); - string name; + docstring const type = t.cs(); + docstring name; int nargs = 0; if (t.cs() == "def") { // get name name = getToken().cs(); // read parameter - string pars; + docstring pars; while (good() && nextToken().cat() != catBegin) { pars += getToken().cs(); ++nargs; @@ -886,7 +902,7 @@ void Parser::parse1(InsetMathGrid & grid, unsigned flags, return; } - string const arg = getArg('[', ']'); + docstring const arg = getArg('[', ']'); if (!arg.empty()) nargs = convert(arg); @@ -930,7 +946,7 @@ void Parser::parse1(InsetMathGrid & grid, unsigned flags, else if (t.cs() == "end") { if (flags & FLAG_END) { // eat environment name - string const name = getArg('{', '}'); + docstring const name = getArg('{', '}'); if (environments_.empty()) error("'found \\end{" + name + "}' without matching '\\begin{" + @@ -1065,12 +1081,12 @@ void Parser::parse1(InsetMathGrid & grid, unsigned flags, // \| and \Vert are equivalent, and InsetMathDelim // can't handle \| // FIXME: fix this in InsetMathDelim itself! - string const l = tl.cs() == "|" ? "Vert" : tl.asString(); + docstring const l = tl.cs() == "|" ? from_ascii("Vert") : tl.asString(); MathArray ar; parse(ar, FLAG_RIGHT, mode); skipSpaces(); Token const & tr = getToken(); - string const r = tr.cs() == "|" ? "Vert" : tr.asString(); + docstring const r = tr.cs() == "|" ? from_ascii("Vert") : tr.asString(); cell->push_back(MathAtom(new InsetMathDelim(l, r, ar))); } @@ -1083,20 +1099,20 @@ void Parser::parse1(InsetMathGrid & grid, unsigned flags, } else if (t.cs() == "begin") { - string const name = getArg('{', '}'); + docstring const name = getArg('{', '}'); environments_.push_back(name); if (name == "array" || name == "subarray") { - string const valign = parse_verbatim_option() + 'c'; - string const halign = parse_verbatim_item(); - cell->push_back(MathAtom(new InsetMathArray(name, valign[0], halign))); + docstring const valign = parse_verbatim_option() + 'c'; + docstring const halign = parse_verbatim_item(); + cell->push_back(MathAtom(new InsetMathArray(name, (char)valign[0], halign))); parse2(cell->back(), FLAG_END, mode, false); } else if (name == "tabular") { - string const valign = parse_verbatim_option() + 'c'; - string const halign = parse_verbatim_item(); - cell->push_back(MathAtom(new InsetMathTabular(name, valign[0], halign))); + docstring const valign = parse_verbatim_option() + 'c'; + docstring const halign = parse_verbatim_item(); + cell->push_back(MathAtom(new InsetMathTabular(name, (char)valign[0], halign))); parse2(cell->back(), FLAG_END, InsetMath::TEXT_MODE, false); } @@ -1106,10 +1122,10 @@ void Parser::parse1(InsetMathGrid & grid, unsigned flags, } else if (name == "alignedat") { - string const valign = parse_verbatim_option() + 'c'; + docstring const valign = parse_verbatim_option() + 'c'; // ignore this for a while getArg('{', '}'); - cell->push_back(MathAtom(new InsetMathSplit(name, valign[0]))); + cell->push_back(MathAtom(new InsetMathSplit(name, (char)valign[0]))); parse2(cell->back(), FLAG_END, mode, false); } @@ -1175,8 +1191,8 @@ void Parser::parse1(InsetMathGrid & grid, unsigned flags, cell->push_back(createInsetMath(name)); parse2(cell->back(), FLAG_END, mode, false); } else if (l->inset == "split") { - string const valign = parse_verbatim_option() + 'c'; - cell->push_back(MathAtom(new InsetMathSplit(name, valign[0]))); + docstring const valign = parse_verbatim_option() + 'c'; + cell->push_back(MathAtom(new InsetMathSplit(name, (char)valign[0]))); parse2(cell->back(), FLAG_END, mode, false); } else { dump(); @@ -1202,7 +1218,7 @@ void Parser::parse1(InsetMathGrid & grid, unsigned flags, #ifdef WITH_WARNINGS #warning A hack... #endif - string s; + docstring s; while (true) { Token const & t = getToken(); if (!good()) { @@ -1210,7 +1226,7 @@ void Parser::parse1(InsetMathGrid & grid, unsigned flags, break; } s += t.character(); - if (isValidLength(s)) + if (isValidLength(to_utf8(s))) break; } cell->push_back(MathAtom(new InsetMathKern(s))); @@ -1218,7 +1234,7 @@ void Parser::parse1(InsetMathGrid & grid, unsigned flags, else if (t.cs() == "label") { // FIXME: This is swallowed in inline formulas - string label = parse_verbatim_item(); + docstring label = parse_verbatim_item(); MathArray ar; asArray(label, ar); if (grid.asHullInset()) { @@ -1239,14 +1255,14 @@ void Parser::parse1(InsetMathGrid & grid, unsigned flags, } else if (t.cs() == "color") { - string const color = parse_verbatim_item(); + docstring const color = parse_verbatim_item(); cell->push_back(MathAtom(new InsetMathColor(true, color))); parse(cell->back().nucleus()->cell(0), flags, mode); return; } else if (t.cs() == "textcolor") { - string const color = parse_verbatim_item(); + docstring const color = parse_verbatim_item(); cell->push_back(MathAtom(new InsetMathColor(false, color))); parse(cell->back().nucleus()->cell(0), FLAG_ITEM, InsetMath::TEXT_MODE); } @@ -1263,7 +1279,10 @@ void Parser::parse1(InsetMathGrid & grid, unsigned flags, } else if (t.cs() == "xymatrix") { - cell->push_back(createInsetMath(t.cs())); + odocstringstream os; + while (good() && nextToken().cat() != catBegin) + os << getToken().asInput(); + cell->push_back(createInsetMath(t.cs() + os.str())); parse2(cell->back(), FLAG_ITEM, mode, false); } @@ -1314,14 +1333,13 @@ void Parser::parse1(InsetMathGrid & grid, unsigned flags, if (l) { if (l->inset == "big") { skipSpaces(); - string const delim = getToken().asInput(); + docstring const delim = getToken().asInput(); if (InsetMathBig::isBigInsetDelim(delim)) cell->push_back(MathAtom( new InsetMathBig(t.cs(), delim))); else { cell->push_back(createInsetMath(t.cs())); - cell->push_back(createInsetMath( - delim.substr(1))); + putback(); } } @@ -1397,9 +1415,9 @@ void Parser::parse1(InsetMathGrid & grid, unsigned flags, } // anonymous namespace -void mathed_parse_cell(MathArray & ar, string const & str) +void mathed_parse_cell(MathArray & ar, docstring const & str) { - istringstream is(str); + istringstream is(to_utf8(str)); mathed_parse_cell(ar, is); } @@ -1438,7 +1456,7 @@ void mathed_parse_normal(InsetMathGrid & grid, string const & str) void initParser() { - fill(theCatcode, theCatcode + 256, catOther); + fill(theCatcode, theCatcode + 128, catOther); fill(theCatcode + 'a', theCatcode + 'z' + 1, catLetter); fill(theCatcode + 'A', theCatcode + 'Z' + 1, catLetter); @@ -1458,3 +1476,6 @@ void initParser() theCatcode[int('~')] = catActive; theCatcode[int('%')] = catComment; } + + +} // namespace lyx