catInvalid // 15 <delete>
};
-CatCode theCatcode[256];
+CatCode theCatcode[128];
-inline CatCode catcode(unsigned char c)
+inline CatCode catcode(char_type c)
{
+ /* The only characters that are not catOther lie in the pure ASCII
+ * range. Therefore theCatcode has only 128 entries.
+ * TeX itself deals with 8bit characters, so if needed this table
+ * could be enlarged to 256 entries.
+ * Any larger value does not make sense, since the fact that we use
+ * unicode internally does not change Knuth's TeX engine.
+ * Apart from that a table for the full 21bit UCS4 range would waste
+ * too much memory. */
+ if (c >= 128)
+ return catOther;
+
return theCatcode[c];
}
///
void tokenize(docstring const & s);
///
- void skipSpaceTokens(istream & is, char c);
+ void skipSpaceTokens(idocstream & is, char_type c);
///
void push_back(Token const & t);
///
}
-void Parser::skipSpaceTokens(istream & is, char c)
+void Parser::skipSpaceTokens(idocstream & is, char_type c)
{
// skip trailing spaces
while (catcode(c) == catSpace || catcode(c) == catNewline)
{
// eat everything up to the next \end_inset or end of stream
// and store it in s for further tokenization
- docstring s;
+ std::string s;
char c;
while (is.get(c)) {
s += c;
is.unget();
// tokenize buffer
- tokenize(s);
+ tokenize(from_utf8(s));
}
void Parser::tokenize(docstring const & buffer)
{
- istringstream is(to_utf8(buffer), ios::in | ios::binary);
+ idocstringstream is(buffer, ios::in | ios::binary);
- char c;
+ char_type c;
while (is.get(c)) {
//lyxerr << "reading c: " << c << endl;
cell->back() = MathAtom(new InsetMathScript(cell->back(), up));
InsetMathScript * p = cell->back().nucleus()->asScriptInset();
// special handling of {}-bases
- // is this always correct?
- if (p->nuc().size() == 1
- && p->nuc().back()->asBraceInset())
- p->nuc() = p->nuc().back()->asNestInset()->cell(0);
+ // Here we could remove the brace inset for things
+ // like {a'}^2 and add the braces back in
+ // InsetMathScript::write().
+ // We do not do it, since it is not possible to detect
+ // reliably whether the braces are needed because the
+ // nucleus contains more than one symbol, or whether
+ // they are needed for unknown commands like \xx{a}_0
+ // or \yy{a}{b}_0. This was done in revision 14819
+ // in an unreliable way. See this thread
+ // http://www.mail-archive.com/lyx-devel%40lists.lyx.org/msg104917.html
+ // for more details.
parse(p->cell(p->idxOfScript(up)), FLAG_ITEM, mode);
if (limits) {
p->limits(limits);
}
else if (t.cs() == "xymatrix") {
- cell->push_back(createInsetMath(t.cs()));
+ odocstringstream os;
+ while (good() && nextToken().cat() != catBegin)
+ os << getToken().asInput();
+ cell->push_back(createInsetMath(t.cs() + os.str()));
parse2(cell->back(), FLAG_ITEM, mode, false);
}
void initParser()
{
- fill(theCatcode, theCatcode + 256, catOther);
+ fill(theCatcode, theCatcode + 128, catOther);
fill(theCatcode + 'a', theCatcode + 'z' + 1, catLetter);
fill(theCatcode + 'A', theCatcode + 'Z' + 1, catLetter);