read utf8 tex documents and translate them to lyxformat 249.
There is still no code to discover the encoding and use it, but it is the
easiest part (I hope).
git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@27563
a592a061-630c-0410-9148-
cb99ea01b6c8
#include "Parser.h"
#include <iostream>
#include "Parser.h"
#include <iostream>
theCatcode[int('@')] = catLetter;
}
theCatcode[int('@')] = catLetter;
}
/*!
* Translate a line ending to '\n'.
* \p c must have catcode catNewline, and it must be the last character read
* from \p is.
*/
/*!
* Translate a line ending to '\n'.
* \p c must have catcode catNewline, and it must be the last character read
* from \p is.
*/
-char getNewline(istream & is, char c)
+char getNewline(idocstream & is, char c)
{
// we have to handle 3 different line endings:
// - UNIX (\n)
{
// we have to handle 3 different line endings:
// - UNIX (\n)
// - DOS (\r\n)
if (c == '\r') {
// MAC or DOS
// - DOS (\r\n)
if (c == '\r') {
// MAC or DOS
- if (is.get(c) && c != '\n') {
+ char_type wc;
+ if (is.get(wc) && wc != '\n') {
-}
-
-
-//
-// catcodes
-//
-
-CatCode catcode(unsigned char c)
+CatCode catcode(char_type c)
+ if (c < 256)
+ return theCatcode[(unsigned char)c];
+ return catOther;
else if (t.cat() == catEscape)
os << '\\' << t.cs() << ' ';
else if (t.cat() == catLetter)
else if (t.cat() == catEscape)
os << '\\' << t.cs() << ' ';
else if (t.cat() == catLetter)
else if (t.cat() == catNewline)
os << "[" << t.cs().size() << "\\n," << t.cat() << "]\n";
else
else if (t.cat() == catNewline)
os << "[" << t.cs().size() << "\\n," << t.cat() << "]\n";
else
- os << '[' << t.character() << ',' << t.cat() << ']';
+ os << '[' << t.cs() << ',' << t.cat() << ']';
return os;
}
string Token::asString() const
{
return os;
}
string Token::asString() const
{
- return cs_.size() ? cs_ : string(1, char_);
{
if (cat_ == catComment)
return '%' + cs_ + '\n';
{
if (cat_ == catComment)
return '%' + cs_ + '\n';
- if (cat_ == catSpace || cat_ == catNewline)
- return cs_;
- return char_ ? string(1, char_) : '\\' + cs_;
+ if (cat_ == catEscape)
+ return '\\' + cs_;
+ return cs_;
-Parser::Parser(istream & is)
+Parser::Parser(idocstream & is)
: lineno_(0), pos_(0), iss_(0), is_(is)
{
}
Parser::Parser(string const & s)
: lineno_(0), pos_(0), iss_(0), is_(is)
{
}
Parser::Parser(string const & s)
- : lineno_(0), pos_(0), iss_(new istringstream(s)), is_(*iss_)
+ : lineno_(0), pos_(0),
+ iss_(new idocstringstream(from_utf8(s))), is_(*iss_)
{
if (!good())
error("The input stream is not well...");
{
if (!good())
error("The input stream is not well...");
- return tokens_[pos_++].character();
+ return get_token().character();
void Parser::tokenize_one()
{
catInit();
void Parser::tokenize_one()
{
catInit();
- //cerr << "reading c: " << c << "\n";
switch (catcode(c)) {
case catSpace: {
switch (catcode(c)) {
case catSpace: {
while (is_.get(c) && catcode(c) == catSpace)
s += c;
if (catcode(c) != catSpace)
while (is_.get(c) && catcode(c) == catSpace)
s += c;
if (catcode(c) != catSpace)
case catNewline: {
++lineno_;
case catNewline: {
++lineno_;
- string s(1, getNewline(is_, c));
+ docstring s(1, getNewline(is_, c));
while (is_.get(c) && catcode(c) == catNewline) {
++lineno_;
s += getNewline(is_, c);
while (is_.get(c) && catcode(c) == catNewline) {
++lineno_;
s += getNewline(is_, c);
case catComment: {
// We don't treat "%\n" combinations here specially because
// we want to preserve them in the preamble
case catComment: {
// We don't treat "%\n" combinations here specially because
// we want to preserve them in the preamble
while (is_.get(c) && catcode(c) != catNewline)
s += c;
// handle possible DOS line ending
while (is_.get(c) && catcode(c) != catNewline)
s += c;
// handle possible DOS line ending
if (!is_) {
error("unexpected end of input");
} else {
if (!is_) {
error("unexpected end of input");
} else {
if (catcode(c) == catLetter) {
// collect letters
while (is_.get(c) && catcode(c) == catLetter)
if (catcode(c) == catLetter) {
// collect letters
while (is_.get(c) && catcode(c) == catLetter)
- cerr << "ignoring a char: " << int(c) << "\n";
+ cerr << "ignoring a char: " << c << "\n";
- push_back(Token(c, catcode(c)));
+ push_back(Token(docstring(1, c), catcode(c)));
+ //cerr << tokens_.back();
string res;
if (next_token().character() == '[') {
Token t = get_token();
string res;
if (next_token().character() == '[') {
Token t = get_token();
- for (Token t = get_token(); t.character() != ']' && good(); t = get_token()) {
+ for (t = get_token(); t.character() != ']' && good(); t = get_token()) {
if (t.cat() == catBegin) {
putback();
res += '{' + verbatim_item() + '}';
if (t.cat() == catBegin) {
putback();
res += '{' + verbatim_item() + '}';
#ifndef PARSER_H
#define PARSER_H
#ifndef PARSER_H
#define PARSER_H
#include <string>
#include <utility>
#include <string>
#include <utility>
+#include "support/docstream.h"
-CatCode catcode(unsigned char c);
-
-
enum {
FLAG_BRACE_LAST = 1 << 1, // last closing brace ends the parsing
FLAG_RIGHT = 1 << 2, // next \\right ends the parsing process
enum {
FLAG_BRACE_LAST = 1 << 1, // last closing brace ends the parsing
FLAG_RIGHT = 1 << 2, // next \\right ends the parsing process
class Token {
public:
///
class Token {
public:
///
- Token() : cs_(), char_(0), cat_(catIgnore) {}
- ///
- Token(char c, CatCode cat) : cs_(), char_(c), cat_(cat) {}
+ Token() : cs_(), cat_(catIgnore) {}
- Token(std::string const & cs, CatCode cat) : cs_(cs), char_(0), cat_(cat) {}
+ Token(docstring const & cs, CatCode cat) : cs_(to_utf8(cs)), cat_(cat) {}
///
std::string const & cs() const { return cs_; }
/// Returns the catcode of the token
CatCode cat() const { return cat_; }
///
///
std::string const & cs() const { return cs_; }
/// Returns the catcode of the token
CatCode cat() const { return cat_; }
///
- char character() const { return char_; }
+ char character() const { return cs_.empty() ? 0 : cs_[0]; }
/// Returns the token as string
std::string asString() const;
/// Returns the token verbatim
/// Returns the token as string
std::string asString() const;
/// Returns the token verbatim
- Parser(std::istream & is);
+ Parser(idocstream & is);
///
Parser(std::string const & s);
///
///
Parser(std::string const & s);
///
- std::istringstream * iss_;
+ idocstringstream * iss_;
t.cat() == catAlign ||
t.cat() == catActive ||
t.cat() == catParameter)
t.cat() == catAlign ||
t.cat() == catActive ||
t.cat() == catParameter)
else if (t.cat() == catBegin) {
os << '{';
else if (t.cat() == catBegin) {
os << '{';
void end_preamble(ostream & os, TextClass const & /*textclass*/)
{
os << "#LyX file created by tex2lyx " << PACKAGE_VERSION << "\n"
void end_preamble(ostream & os, TextClass const & /*textclass*/)
{
os << "#LyX file created by tex2lyx " << PACKAGE_VERSION << "\n"
<< "\\begin_document\n"
<< "\\begin_header\n"
<< "\\textclass " << h_textclass << "\n";
<< "\\begin_document\n"
<< "\\begin_header\n"
<< "\\textclass " << h_textclass << "\n";
- else if (t.cat() == catSpace || t.cat() == catNewline)
- os << t.cs();
-
- else if (t.cat() == catLetter ||
- t.cat() == catSuper ||
- t.cat() == catSub ||
- t.cat() == catOther ||
- t.cat() == catActive ||
- t.cat() == catParameter)
- os << t.character();
+ else if (t.cat() == catSpace
+ || t.cat() == catNewline
+ || t.cat() == catLetter
+ || t.cat() == catSuper
+ || t.cat() == catSub
+ || t.cat() == catOther
+ || t.cat() == catActive
+ || t.cat() == catParameter)
+ os << t.cs();
else if (t.cat() == catBegin) {
os << '{';
else if (t.cat() == catBegin) {
os << '{';
#include "TextClass.h"
#include "Layout.h"
#include "TextClass.h"
#include "Layout.h"
-#include "support/lassert.h"
#include "support/convert.h"
#include "support/debug.h"
#include "support/ExceptionMessage.h"
#include "support/filetools.h"
#include "support/convert.h"
#include "support/debug.h"
#include "support/ExceptionMessage.h"
#include "support/filetools.h"
+#include "support/lassert.h"
#include "support/lstrings.h"
#include "support/os.h"
#include "support/Package.h"
#include <cstdlib>
#include "support/lstrings.h"
#include "support/os.h"
#include "support/Package.h"
#include <cstdlib>
#include <iostream>
#include <string>
#include <sstream>
#include <iostream>
#include <string>
#include <sstream>
*/
void read_syntaxfile(FileName const & file_name)
{
*/
void read_syntaxfile(FileName const & file_name)
{
- ifstream is(file_name.toFilesystemEncoding().c_str());
+ ifdocstream is(file_name.toFilesystemEncoding().c_str());
if (!is.good()) {
cerr << "Could not open syntax file \"" << file_name
<< "\" for reading." << endl;
if (!is.good()) {
cerr << "Could not open syntax file \"" << file_name
<< "\" for reading." << endl;
* You must ensure that \p parentFilePath is properly set before calling
* this function!
*/
* You must ensure that \p parentFilePath is properly set before calling
* this function!
*/
-void tex2lyx(istream & is, ostream & os)
+void tex2lyx(idocstream & is, ostream & os)
{
Parser p(is);
//p.dump();
{
Parser p(is);
//p.dump();
os << ss.str();
#ifdef TEST_PARSER
p.reset();
os << ss.str();
#ifdef TEST_PARSER
p.reset();
- ofstream parsertest("parsertest.tex");
+ ofdocstream parsertest("parsertest.tex");
while (p.good())
parsertest << p.get_token().asInput();
// <origfile> and parsertest.tex should now have identical content
while (p.good())
parsertest << p.get_token().asInput();
// <origfile> and parsertest.tex should now have identical content
/// convert TeX from \p infilename to LyX and write it to \p os
bool tex2lyx(FileName const & infilename, ostream & os)
{
/// convert TeX from \p infilename to LyX and write it to \p os
bool tex2lyx(FileName const & infilename, ostream & os)
{
- ifstream is(infilename.toFilesystemEncoding().c_str());
+ ifdocstream is(infilename.toFilesystemEncoding().c_str());
if (!is.good()) {
cerr << "Could not open input file \"" << infilename
<< "\" for reading." << endl;
if (!is.good()) {
cerr << "Could not open input file \"" << infilename
<< "\" for reading." << endl;
t.cat() == catParameter) {
// This translates "&" to "\\&" which may be wrong...
context.check_layout(os);
t.cat() == catParameter) {
// This translates "&" to "\\&" which may be wrong...
context.check_layout(os);
}
else if (p.isParagraph()) {
}
else if (p.isParagraph()) {
else
os << "\\InsetSpace ~\n";
} else
else
os << "\\InsetSpace ~\n";
} else
}
else if (t.cat() == catBegin &&
}
else if (t.cat() == catBegin &&
next.character() == '*') {
p.get_token();
if (p.next_token().cat() == catEnd) {
next.character() == '*') {
p.get_token();
if (p.next_token().cat() == catEnd) {
- os << next.character();
p.get_token();
} else {
p.putback();
p.get_token();
} else {
p.putback();