]> git.lyx.org Git - lyx.git/blobdiff - src/mathed/MathParser.C
Convert to unicode.
[lyx.git] / src / mathed / MathParser.C
index 826d61529ec680da64093d3e2c68862a57830c65..cb208ffca3c78798280fe2cb0b0a89a2f54185af 100644 (file)
@@ -39,6 +39,7 @@ following hack as starting point to write some macros:
 #include <config.h>
 
 #include "MathParser.h"
+
 #include "InsetMathArray.h"
 #include "InsetMathBig.h"
 #include "InsetMathBrace.h"
@@ -47,20 +48,19 @@ following hack as starting point to write some macros:
 #include "InsetMathComment.h"
 #include "InsetMathDelim.h"
 #include "InsetMathEnv.h"
-#include "MathFactory.h"
 #include "InsetMathKern.h"
 #include "InsetMathMacro.h"
-#include "MathMacroArgument.h"
-#include "MathMacroTemplate.h"
 #include "InsetMathPar.h"
+#include "InsetMathRef.h"
 #include "InsetMathRoot.h"
 #include "InsetMathScript.h"
+#include "InsetMathSplit.h"
 #include "InsetMathSqrt.h"
-#include "MathSupport.h"
 #include "InsetMathTabular.h"
-
-//#include "insets/insetref.h"
-#include "InsetMathRef.h"
+#include "MathMacroTemplate.h"
+#include "MathFactory.h"
+#include "MathMacroArgument.h"
+#include "MathSupport.h"
 
 #include "lyxlex.h"
 #include "debug.h"
@@ -69,6 +69,9 @@ following hack as starting point to write some macros:
 
 #include <sstream>
 
+
+namespace lyx {
+
 using std::endl;
 using std::fill;
 
@@ -85,7 +88,7 @@ using std::vector;
 
 namespace {
 
-InsetMath::mode_type asMode(InsetMath::mode_type oldmode, string const & str)
+InsetMath::mode_type asMode(InsetMath::mode_type oldmode, docstring const & str)
 {
        //lyxerr << "handling mode: '" << str << "'" << endl;
        if (str == "mathmode")
@@ -96,9 +99,9 @@ InsetMath::mode_type asMode(InsetMath::mode_type oldmode, string const & str)
 }
 
 
-bool stared(string const & s)
+bool stared(docstring const & s)
 {
-       string::size_type const n = s.size();
+       size_t const n = s.size();
        return n && s[n - 1] == '*';
 }
 
@@ -109,7 +112,7 @@ bool stared(string const & s)
  * environments like "equation" that have a fixed number of rows.
  */
 bool addRow(InsetMathGrid & grid, InsetMathGrid::row_type & cellrow,
-           string const & vskip)
+           docstring const & vskip)
 {
        ++cellrow;
        if (cellrow == grid.nrows()) {
@@ -125,12 +128,12 @@ bool addRow(InsetMathGrid & grid, InsetMathGrid::row_type & cellrow,
                        --cellrow;
                        lyxerr << "ignoring extra row";
                        if (!vskip.empty())
-                               lyxerr << " with extra space " << vskip;
+                               lyxerr << " with extra space " << to_utf8(vskip);
                        lyxerr << '.' << endl;
                        return false;
                }
        }
-       grid.vcrskip(LyXLength(vskip), cellrow - 1);
+       grid.vcrskip(LyXLength(to_utf8(vskip)), cellrow - 1);
        return true;
 }
 
@@ -210,11 +213,19 @@ enum CatCode {
        catInvalid     // 15   <delete>
 };
 
-CatCode theCatcode[256];
+CatCode theCatcode[128];
 
 
-inline CatCode catcode(unsigned char c)
+inline CatCode catcode(lyx::char_type c)
 {
+       /* The fact that we use unicode internally does not change Knuth's TeX
+       engine. It is still 7bit only, not even latin1 or something like that.
+       Therefore, the catcode table needs only to have 128 entries.
+       Everything not in that range is catOther.
+       */
+       if (c >= 128)
+               return catOther;
+
        return theCatcode[c];
 }
 
@@ -245,26 +256,26 @@ public:
        ///
        Token() : cs_(), char_(0), cat_(catIgnore) {}
        ///
-       Token(char c, CatCode cat) : cs_(), char_(c), cat_(cat) {}
+       Token(char_type c, CatCode cat) : cs_(), char_(c), cat_(cat) {}
        ///
-       Token(string const & cs) : cs_(cs), char_(0), cat_(catIgnore) {}
+       Token(docstring const & cs) : cs_(cs), char_(0), cat_(catIgnore) {}
 
        ///
-       string const & cs() const { return cs_; }
+       docstring const & cs() const { return cs_; }
        ///
        CatCode cat() const { return cat_; }
        ///
-       char character() const { return char_; }
+       char_type character() const { return char_; }
        ///
-       string asString() const { return cs_.size() ? cs_ : string(1, char_); }
+       docstring asString() const { return cs_.size() ? cs_ : docstring(1, char_); }
        ///
-       string asInput() const { return cs_.size() ? '\\' + cs_ : string(1, char_); }
+       docstring asInput() const { return cs_.size() ? '\\' + cs_ : docstring(1, char_); }
 
 private:
        ///
-       string cs_;
+       docstring cs_;
        ///
-       char char_;
+       char_type char_;
        ///
        CatCode cat_;
 };
@@ -309,19 +320,20 @@ private:
        ///
        void parse2(MathAtom & at, unsigned flags, mode_type mode, bool numbered);
        /// get arg delimited by 'left' and 'right'
-       string getArg(char left, char right);
+       docstring getArg(char_type left, char_type right);
        ///
-       char getChar();
+       char_type getChar();
        ///
        void error(string const & msg);
+       void error(docstring const & msg) { error(to_utf8(msg)); }
        /// dump contents to screen
        void dump() const;
        ///
        void tokenize(istream & is);
        ///
-       void tokenize(string const & s);
+       void tokenize(docstring const & s);
        ///
-       void skipSpaceTokens(istream & is, char c);
+       void skipSpaceTokens(idocstream & is, char_type c);
        ///
        void push_back(Token const & t);
        ///
@@ -335,13 +347,13 @@ private:
        /// skips spaces if any
        void skipSpaces();
        ///
-       void lex(string const & s);
+       void lex(docstring const & s);
        ///
        bool good() const;
        ///
-       string parse_verbatim_item();
+       docstring parse_verbatim_item();
        ///
-       string parse_verbatim_option();
+       docstring parse_verbatim_option();
 
        ///
        int lineno_;
@@ -350,7 +362,7 @@ private:
        ///
        unsigned pos_;
        /// Stack of active environments
-       vector<string> environments_;
+       vector<docstring> environments_;
 };
 
 
@@ -422,7 +434,7 @@ bool Parser::good() const
 }
 
 
-char Parser::getChar()
+char_type Parser::getChar()
 {
        if (!good())
                error("The input stream is not well...");
@@ -430,12 +442,12 @@ char Parser::getChar()
 }
 
 
-string Parser::getArg(char left, char right)
+docstring Parser::getArg(char_type left, char_type right)
 {
        skipSpaces();
 
-       string result;
-       char c = getChar();
+       docstring result;
+       char_type c = getChar();
 
        if (c != left)
                putback();
@@ -447,7 +459,7 @@ string Parser::getArg(char left, char right)
 }
 
 
-void Parser::skipSpaceTokens(istream & is, char c)
+void Parser::skipSpaceTokens(idocstream & is, char_type c)
 {
        // skip trailing spaces
        while (catcode(c) == catSpace || catcode(c) == catNewline)
@@ -462,7 +474,7 @@ void Parser::tokenize(istream & is)
 {
        // eat everything up to the next \end_inset or end of stream
        // and store it in s for further tokenization
-       string s;
+       std::string s;
        char c;
        while (is.get(c)) {
                s += c;
@@ -476,15 +488,15 @@ void Parser::tokenize(istream & is)
                is.unget();
 
        // tokenize buffer
-       tokenize(s);
+       tokenize(from_utf8(s));
 }
 
 
-void Parser::tokenize(string const & buffer)
+void Parser::tokenize(docstring const & buffer)
 {
-       istringstream is(buffer, ios::in | ios::binary);
+       idocstringstream is(buffer, ios::in | ios::binary);
 
-       char c;
+       char_type c;
        while (is.get(c)) {
                //lyxerr << "reading c: " << c << endl;
 
@@ -515,7 +527,7 @@ void Parser::tokenize(string const & buffer)
                                if (!is) {
                                        error("unexpected end of input");
                                } else {
-                                       string s(1, c);
+                                       docstring s(1, c);
                                        if (catcode(c) == catLetter) {
                                                // collect letters
                                                while (is.get(c) && catcode(c) == catLetter)
@@ -590,10 +602,10 @@ bool Parser::parse(MathAtom & at)
 }
 
 
-string Parser::parse_verbatim_option()
+docstring Parser::parse_verbatim_option()
 {
        skipSpaces();
-       string res;
+       docstring res;
        if (nextToken().character() == '[') {
                Token t = getToken();
                for (Token t = getToken(); t.character() != ']' && good(); t = getToken()) {
@@ -608,10 +620,10 @@ string Parser::parse_verbatim_option()
 }
 
 
-string Parser::parse_verbatim_item()
+docstring Parser::parse_verbatim_item()
 {
        skipSpaces();
-       string res;
+       docstring res;
        if (nextToken().cat() == catBegin) {
                Token t = getToken();
                for (Token t = getToken(); t.cat() != catEnd && good(); t = getToken()) {
@@ -812,9 +824,13 @@ void Parser::parse1(InsetMathGrid & grid, unsigned flags,
                                cell->back() = MathAtom(new InsetMathScript(cell->back(), up));
                        InsetMathScript * p = cell->back().nucleus()->asScriptInset();
                        // special handling of {}-bases
+                       // Test for empty brace inset, otherwise \xxx{\vec{H}}_{0}
+                       // where \xxx is an unknown command gets misparsed to
+                       // \xxx\vec{H}_{0}, and that is invalid LaTeX.
                        // is this always correct?
-                       if (p->nuc().size() == 1 
-                           && p->nuc().back()->asBraceInset())
+                       if (p->nuc().size() == 1 &&
+                           p->nuc().back()->asBraceInset() &&
+                           p->nuc().back()->asBraceInset()->cell(0).empty())
                                p->nuc() = p->nuc().back()->asNestInset()->cell(0);
                        parse(p->cell(p->idxOfScript(up)), FLAG_ITEM, mode);
                        if (limits) {
@@ -832,7 +848,7 @@ void Parser::parse1(InsetMathGrid & grid, unsigned flags,
                        cell->push_back(MathAtom(new InsetMathChar(t.character())));
 
                else if (t.cat() == catComment) {
-                       string s;
+                       docstring s;
                        while (good()) {
                                Token const & t = getToken();
                                if (t.cat() == catNewline)
@@ -856,15 +872,15 @@ void Parser::parse1(InsetMathGrid & grid, unsigned flags,
                        t.cs() == "newcommand" ||
                        t.cs() == "renewcommand")
                {
-                       string const type = t.cs();
-                       string name;
+                       docstring const type = t.cs();
+                       docstring name;
                        int nargs = 0;
                        if (t.cs() == "def") {
                                // get name
                                name = getToken().cs();
 
                                // read parameter
-                               string pars;
+                               docstring pars;
                                while (good() && nextToken().cat() != catBegin) {
                                        pars += getToken().cs();
                                        ++nargs;
@@ -886,7 +902,7 @@ void Parser::parse1(InsetMathGrid & grid, unsigned flags,
                                        return;
                                }
 
-                               string const arg  = getArg('[', ']');
+                               docstring const arg  = getArg('[', ']');
                                if (!arg.empty())
                                        nargs = convert<int>(arg);
 
@@ -930,7 +946,7 @@ void Parser::parse1(InsetMathGrid & grid, unsigned flags,
                else if (t.cs() == "end") {
                        if (flags & FLAG_END) {
                                // eat environment name
-                               string const name = getArg('{', '}');
+                               docstring const name = getArg('{', '}');
                                if (environments_.empty())
                                        error("'found \\end{" + name +
                                              "}' without matching '\\begin{" +
@@ -1065,12 +1081,12 @@ void Parser::parse1(InsetMathGrid & grid, unsigned flags,
                        // \| and \Vert are equivalent, and InsetMathDelim
                        // can't handle \|
                        // FIXME: fix this in InsetMathDelim itself!
-                       string const l = tl.cs() == "|" ? "Vert" : tl.asString();
+                       docstring const l = tl.cs() == "|" ? from_ascii("Vert") : tl.asString();
                        MathArray ar;
                        parse(ar, FLAG_RIGHT, mode);
                        skipSpaces();
                        Token const & tr = getToken();
-                       string const r = tr.cs() == "|" ? "Vert" : tr.asString();
+                       docstring const r = tr.cs() == "|" ? from_ascii("Vert") : tr.asString();
                        cell->push_back(MathAtom(new InsetMathDelim(l, r, ar)));
                }
 
@@ -1083,33 +1099,33 @@ void Parser::parse1(InsetMathGrid & grid, unsigned flags,
                }
 
                else if (t.cs() == "begin") {
-                       string const name = getArg('{', '}');
+                       docstring const name = getArg('{', '}');
                        environments_.push_back(name);
 
                        if (name == "array" || name == "subarray") {
-                               string const valign = parse_verbatim_option() + 'c';
-                               string const halign = parse_verbatim_item();
-                               cell->push_back(MathAtom(new InsetMathArray(name, valign[0], halign)));
+                               docstring const valign = parse_verbatim_option() + 'c';
+                               docstring const halign = parse_verbatim_item();
+                               cell->push_back(MathAtom(new InsetMathArray(name, (char)valign[0], halign)));
                                parse2(cell->back(), FLAG_END, mode, false);
                        }
 
                        else if (name == "tabular") {
-                               string const valign = parse_verbatim_option() + 'c';
-                               string const halign = parse_verbatim_item();
-                               cell->push_back(MathAtom(new InsetMathTabular(name, valign[0], halign)));
+                               docstring const valign = parse_verbatim_option() + 'c';
+                               docstring const halign = parse_verbatim_item();
+                               cell->push_back(MathAtom(new InsetMathTabular(name, (char)valign[0], halign)));
                                parse2(cell->back(), FLAG_END, InsetMath::TEXT_MODE, false);
                        }
 
-                       else if (name == "split" || name == "cases" ||
-                                name == "gathered" || name == "aligned") {
+                       else if (name == "split" || name == "cases") {
                                cell->push_back(createInsetMath(name));
                                parse2(cell->back(), FLAG_END, mode, false);
                        }
 
                        else if (name == "alignedat") {
+                               docstring const valign = parse_verbatim_option() + 'c';
                                // ignore this for a while
                                getArg('{', '}');
-                               cell->push_back(createInsetMath(name));
+                               cell->push_back(MathAtom(new InsetMathSplit(name, (char)valign[0])));
                                parse2(cell->back(), FLAG_END, mode, false);
                        }
 
@@ -1174,6 +1190,18 @@ void Parser::parse1(InsetMathGrid & grid, unsigned flags,
                                if (l->inset == "matrix") {
                                        cell->push_back(createInsetMath(name));
                                        parse2(cell->back(), FLAG_END, mode, false);
+                               } else if (l->inset == "split") {
+                                       docstring const valign = parse_verbatim_option() + 'c';
+                                       cell->push_back(MathAtom(new InsetMathSplit(name, (char)valign[0])));
+                                       parse2(cell->back(), FLAG_END, mode, false);
+                               } else {
+                                       dump();
+                                       lyxerr << "found math environment `" << name
+                                              << "' in symbols file with unsupported inset `"
+                                              << l->inset << "'." << endl;
+                                       // create generic environment inset
+                                       cell->push_back(MathAtom(new InsetMathEnv(name)));
+                                       parse(cell->back().nucleus()->cell(0), FLAG_ITEM, mode);
                                }
                        }
 
@@ -1190,7 +1218,7 @@ void Parser::parse1(InsetMathGrid & grid, unsigned flags,
 #ifdef WITH_WARNINGS
 #warning A hack...
 #endif
-                       string s;
+                       docstring s;
                        while (true) {
                                Token const & t = getToken();
                                if (!good()) {
@@ -1198,7 +1226,7 @@ void Parser::parse1(InsetMathGrid & grid, unsigned flags,
                                        break;
                                }
                                s += t.character();
-                               if (isValidLength(s))
+                               if (isValidLength(to_utf8(s)))
                                        break;
                        }
                        cell->push_back(MathAtom(new InsetMathKern(s)));
@@ -1206,7 +1234,7 @@ void Parser::parse1(InsetMathGrid & grid, unsigned flags,
 
                else if (t.cs() == "label") {
                        // FIXME: This is swallowed in inline formulas
-                       string label = parse_verbatim_item();
+                       docstring label = parse_verbatim_item();
                        MathArray ar;
                        asArray(label, ar);
                        if (grid.asHullInset()) {
@@ -1227,14 +1255,14 @@ void Parser::parse1(InsetMathGrid & grid, unsigned flags,
                }
 
                else if (t.cs() == "color") {
-                       string const color = parse_verbatim_item();
+                       docstring const color = parse_verbatim_item();
                        cell->push_back(MathAtom(new InsetMathColor(true, color)));
                        parse(cell->back().nucleus()->cell(0), flags, mode);
                        return;
                }
 
                else if (t.cs() == "textcolor") {
-                       string const color = parse_verbatim_item();
+                       docstring const color = parse_verbatim_item();
                        cell->push_back(MathAtom(new InsetMathColor(false, color)));
                        parse(cell->back().nucleus()->cell(0), FLAG_ITEM, InsetMath::TEXT_MODE);
                }
@@ -1251,7 +1279,10 @@ void Parser::parse1(InsetMathGrid & grid, unsigned flags,
                }
 
                else if (t.cs() == "xymatrix") {
-                       cell->push_back(createInsetMath(t.cs()));
+                       odocstringstream os;
+                       while (good() && nextToken().cat() != catBegin)
+                               os << getToken().asInput();
+                       cell->push_back(createInsetMath(t.cs() + os.str()));
                        parse2(cell->back(), FLAG_ITEM, mode, false);
                }
 
@@ -1302,14 +1333,13 @@ void Parser::parse1(InsetMathGrid & grid, unsigned flags,
                        if (l) {
                                if (l->inset == "big") {
                                        skipSpaces();
-                                       string const delim = getToken().asInput();
+                                       docstring const delim = getToken().asInput();
                                        if (InsetMathBig::isBigInsetDelim(delim))
                                                cell->push_back(MathAtom(
                                                        new InsetMathBig(t.cs(), delim)));
                                        else {
                                                cell->push_back(createInsetMath(t.cs()));
-                                               cell->push_back(createInsetMath(
-                                                               delim.substr(1)));
+                                               putback();
                                        }
                                }
 
@@ -1385,9 +1415,9 @@ void Parser::parse1(InsetMathGrid & grid, unsigned flags,
 } // anonymous namespace
 
 
-void mathed_parse_cell(MathArray & ar, string const & str)
+void mathed_parse_cell(MathArray & ar, docstring const & str)
 {
-       istringstream is(str);
+       istringstream is(to_utf8(str));
        mathed_parse_cell(ar, is);
 }
 
@@ -1426,7 +1456,7 @@ void mathed_parse_normal(InsetMathGrid & grid, string const & str)
 
 void initParser()
 {
-       fill(theCatcode, theCatcode + 256, catOther);
+       fill(theCatcode, theCatcode + 128, catOther);
        fill(theCatcode + 'a', theCatcode + 'z' + 1, catLetter);
        fill(theCatcode + 'A', theCatcode + 'Z' + 1, catLetter);
 
@@ -1446,3 +1476,6 @@ void initParser()
        theCatcode[int('~')]  = catActive;
        theCatcode[int('%')]  = catComment;
 }
+
+
+} // namespace lyx