Fix comment according to Enricos explanation

[lyx.git] / src / mathed / MathParser.C
diff --git a/src/mathed/MathParser.C b/src/mathed/MathParser.C

index a43a8dba27a0fd396eb6c665b8cb5c91c7632b61..623b9234e87fede82fce8c24a6437066e50c966a 100644 (file)
--- a/src/mathed/MathParser.C
+++ b/src/mathed/MathParser.C
@@ -213,11 +213,22 @@ enum CatCode {
         catInvalid     // 15   <delete>
  };
  
-CatCode theCatcode[256];
+CatCode theCatcode[128];
  
  
-inline CatCode catcode(unsigned char c)
+inline CatCode catcode(char_type c)
  {
+       /* The only characters that are not catOther lie in the pure ASCII
+        * range. Therefore theCatcode has only 128 entries.
+        * TeX itself deals with 8bit characters, so if needed this table
+        * could be enlarged to 256 entries.
+        * Any larger value does not make sense, since the fact that we use
+        * unicode internally does not change Knuth's TeX engine.
+        * Apart from that a table for the full 21bit UCS4 range would waste
+        * too much memory. */
+       if (c >= 128)
+               return catOther;
+
         return theCatcode[c];
  }
  
@@ -325,7 +336,7 @@ private:
         ///
         void tokenize(docstring const & s);
         ///
-       void skipSpaceTokens(istream & is, char c);
+       void skipSpaceTokens(idocstream & is, char_type c);
         ///
         void push_back(Token const & t);
         ///
@@ -451,7 +462,7 @@ docstring Parser::getArg(char_type left, char_type right)
  }
  
  
-void Parser::skipSpaceTokens(istream & is, char c)
+void Parser::skipSpaceTokens(idocstream & is, char_type c)
  {
         // skip trailing spaces
         while (catcode(c) == catSpace || catcode(c) == catNewline)
@@ -466,7 +477,7 @@ void Parser::tokenize(istream & is)
  {
         // eat everything up to the next \end_inset or end of stream
         // and store it in s for further tokenization
-       docstring s;
+       std::string s;
         char c;
         while (is.get(c)) {
                 s += c;
@@ -480,15 +491,15 @@ void Parser::tokenize(istream & is)
                 is.unget();
  
         // tokenize buffer
-       tokenize(s);
+       tokenize(from_utf8(s));
  }
  
  
  void Parser::tokenize(docstring const & buffer)
  {
-       istringstream is(to_utf8(buffer), ios::in | ios::binary);
+       idocstringstream is(buffer, ios::in | ios::binary);
  
-       char c;
+       char_type c;
         while (is.get(c)) {
                 //lyxerr << "reading c: " << c << endl;
  
@@ -816,9 +827,13 @@ void Parser::parse1(InsetMathGrid & grid, unsigned flags,
                                 cell->back() = MathAtom(new InsetMathScript(cell->back(), up));
                         InsetMathScript * p = cell->back().nucleus()->asScriptInset();
                         // special handling of {}-bases
+                       // Test for empty brace inset, otherwise \xxx{\vec{H}}_{0}
+                       // where \xxx is an unknown command gets misparsed to
+                       // \xxx\vec{H}_{0}, and that is invalid LaTeX.
                         // is this always correct?
-                       if (p->nuc().size() == 1 
-                           && p->nuc().back()->asBraceInset())
+                       if (p->nuc().size() == 1 &&
+                           p->nuc().back()->asBraceInset() &&
+                           p->nuc().back()->asBraceInset()->cell(0).empty())
                                 p->nuc() = p->nuc().back()->asNestInset()->cell(0);
                         parse(p->cell(p->idxOfScript(up)), FLAG_ITEM, mode);
                         if (limits) {
@@ -1267,7 +1282,10 @@ void Parser::parse1(InsetMathGrid & grid, unsigned flags,
                 }
  
                 else if (t.cs() == "xymatrix") {
-                       cell->push_back(createInsetMath(t.cs()));
+                       odocstringstream os;
+                       while (good() && nextToken().cat() != catBegin)
+                               os << getToken().asInput();
+                       cell->push_back(createInsetMath(t.cs() + os.str()));
                         parse2(cell->back(), FLAG_ITEM, mode, false);
                 }
  
@@ -1441,7 +1459,7 @@ void mathed_parse_normal(InsetMathGrid & grid, string const & str)
  
  void initParser()
  {
-       fill(theCatcode, theCatcode + 256, catOther);
+       fill(theCatcode, theCatcode + 128, catOther);
         fill(theCatcode + 'a', theCatcode + 'z' + 1, catLetter);
         fill(theCatcode + 'A', theCatcode + 'Z' + 1, catLetter);