X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=src%2Flyxlex_pimpl.C;h=0ae2b1391c85323f36a6f4d3aef64ef6543879bb;hb=e7f4618bcce770369cf46335c2c7f0164b4b8857;hp=b0587400dfccc9e78bf1c88752cf22c41d0d0001;hpb=b17500c1c47d7bdd4508743c27fb72f0e57a5105;p=lyx.git diff --git a/src/lyxlex_pimpl.C b/src/lyxlex_pimpl.C index b0587400df..0ae2b1391c 100644 --- a/src/lyxlex_pimpl.C +++ b/src/lyxlex_pimpl.C @@ -1,61 +1,97 @@ -#include +/** + * \file lyxlex_pimpl.C + * This file is part of LyX, the document processor. + * Licence details can be found in the file COPYING. + * + * \author Lars Gullik Bjønnes + * \author Jean-Marc Lasgouttes + * \author Jürgen Vigna + * + * Full author contact details are available in file CREDITS. + */ -#ifdef __GNUG__ -#pragma implementation -#endif -#include +#include #include "lyxlex_pimpl.h" -#include "support/lyxalgo.h" -#include "support/filetools.h" + #include "debug.h" -#if 1 // to get NEW_INSETS -#include "lyxparagraph.h" -#endif +#include "support/filetools.h" +#include "support/lyxalgo.h" +#include "support/lstrings.h" +#include "support/types.h" +#include "support/unicode.h" + +#include + + +namespace lyx { + +using support::compare_ascii_no_case; +using support::FileName; +using support::getFormatFromContents; +using support::makeDisplayPath; +using support::split; +using support::subst; + +using std::endl; +using std::getline; +using std::lower_bound; using std::sort; -using std::ostream; +using std::string; using std::ios; using std::istream; -using std::endl; -using std::lower_bound; +using std::ostream; + +namespace { -// namespace { -struct compare_tags { +class compare_tags + : public std::binary_function { +public: // used by lower_bound, sort and sorted - inline - int operator()(keyword_item const & a, keyword_item const & b) const { - return compare_no_case(a.tag, b.tag) < 0; + bool operator()(keyword_item const & a, keyword_item const & b) const + { + // we use the ascii version, because in turkish, 'i' + // is not the lowercase version of 'I', and thus + // turkish locale breaks parsing of tags. + return compare_ascii_no_case(a.tag, b.tag) < 0; } }; -// } // end of anon namespace + +} // end of anon namespace -LyXLex::Pimpl::Pimpl(keyword_item * tab, int num) - : is(&fb__), table(tab), no_items(num), +LyXLex::Pimpl::Pimpl(keyword_item * tab, int num) + : is(&fb_), table(tab), no_items(num), status(0), lineno(0), commentChar('#') { verifyTable(); } -string const LyXLex::Pimpl::GetString() const +string const LyXLex::Pimpl::getString() const { - return string(buff); + return buff; +} + + +docstring const LyXLex::Pimpl::getDocString() const +{ + return from_utf8(buff); } void LyXLex::Pimpl::printError(string const & message) const { - string tmpmsg = subst(message, "$$Token", GetString()); + string const tmpmsg = subst(message, "$$Token", getString()); lyxerr << "LyX: " << tmpmsg << " [around line " << lineno - << " of file " << MakeDisplayPath(name) << ']' << endl; + << " of file " << to_utf8(makeDisplayPath(name)) << ']' << endl; } - + void LyXLex::Pimpl::printTable(ostream & os) { - os << "\nNumber of tags: " << no_items << '\n'; + os << "\nNumber of tags: " << no_items << endl; for (int i= 0; i < no_items; ++i) os << "table[" << i << "]: tag: `" << table[i].tag @@ -72,11 +108,11 @@ void LyXLex::Pimpl::verifyTable() lyxerr << "The table passed to LyXLex is not sorted!\n" << "Tell the developers to fix it!" << endl; // We sort it anyway to avoid problems. - lyxerr << "\nUnsorted:\n"; + lyxerr << "\nUnsorted:" << endl; printTable(lyxerr); sort(table, table + no_items, compare_tags()); - lyxerr << "\nSorted:\n"; + lyxerr << "\nSorted:" << endl; printTable(lyxerr); } } @@ -93,14 +129,14 @@ void LyXLex::Pimpl::pushTable(keyword_item * tab, int num) verifyTable(); } - + void LyXLex::Pimpl::popTable() { if (pushed.empty()) { lyxerr << "LyXLex error: nothing to pop!" << endl; return; } - + pushed_table tmp = pushed.top(); pushed.pop(); table = tmp.table_elem; @@ -108,31 +144,54 @@ void LyXLex::Pimpl::popTable() } -bool LyXLex::Pimpl::setFile(string const & filename) +bool LyXLex::Pimpl::setFile(FileName const & filename) { - // The check only outputs a debug message, because it triggers - // a bug in compaq cxx 6.2, where is_open() returns 'true' for a - // fresh new filebuf. (JMarc) - if (fb__.is_open() || is.tellg() > 0) - lyxerr[Debug::LYXLEX] << "Error in LyXLex::setFile: " - "file or stream already set." << endl; - fb__.open(filename.c_str(), ios::in); - is.rdbuf(&fb__); - name = filename; - lineno = 0; - return fb__.is_open() && is.good(); + // Check the format of the file. + string const format = getFormatFromContents(filename); + + if (format == "gzip" || format == "zip" || format == "compress") { + lyxerr[Debug::LYXLEX] << "lyxlex: compressed" << endl; + + // The check only outputs a debug message, because it triggers + // a bug in compaq cxx 6.2, where is_open() returns 'true' for + // a fresh new filebuf. (JMarc) + if (!gz_.empty() || istream::off_type(is.tellg()) > -1) + lyxerr[Debug::LYXLEX] << "Error in LyXLex::setFile: " + "file or stream already set." << endl; + gz_.push(io::gzip_decompressor()); + gz_.push(io::file_source(filename.toFilesystemEncoding())); + is.rdbuf(&gz_); + name = filename.absFilename(); + lineno = 0; + return gz_.component(1)->is_open() && is.good(); + } else { + lyxerr[Debug::LYXLEX] << "lyxlex: UNcompressed" << endl; + + // The check only outputs a debug message, because it triggers + // a bug in compaq cxx 6.2, where is_open() returns 'true' for + // a fresh new filebuf. (JMarc) + if (fb_.is_open() || istream::off_type(is.tellg()) > 0) + lyxerr[Debug::LYXLEX] << "Error in LyXLex::setFile: " + "file or stream already set." << endl; + fb_.open(filename.toFilesystemEncoding().c_str(), ios::in); + is.rdbuf(&fb_); + name = filename.absFilename(); + lineno = 0; + return fb_.is_open() && is.good(); + } } - + void LyXLex::Pimpl::setStream(istream & i) { - if (fb__.is_open() || is.tellg() > 0) + if (fb_.is_open() || istream::off_type(is.tellg()) > 0) lyxerr[Debug::LYXLEX] << "Error in LyXLex::setStream: " "file or stream already set." << endl; is.rdbuf(i.rdbuf()); lineno = 0; } + void LyXLex::Pimpl::setCommentChar(char c) { commentChar = c; @@ -142,28 +201,18 @@ void LyXLex::Pimpl::setCommentChar(char c) bool LyXLex::Pimpl::next(bool esc /* = false */) { if (!pushTok.empty()) { -#ifndef NEW_INSETS - pushTok.copy(buff, string::npos); - buff[pushTok.length()] = '\0'; - pushTok.erase(); - return true; -#else // There can have been a whole line pushed so // we extract the first word and leaves the rest // in pushTok. (Lgb) - if (pushTok.find(' ') != string::npos) { - string tmp; - pushTok = split(pushTok, tmp, ' '); - tmp.copy(buff, string::npos); - buff[tmp.length()] = '\0'; + if (pushTok.find(' ') != string::npos && pushTok[0] == '\\') { + buff.clear(); + pushTok = split(pushTok, buff, ' '); return true; } else { - pushTok.copy(buff, string::npos); - buff[pushTok.length()] = '\0'; - pushTok.erase(); + buff = pushTok; + pushTok.clear(); return true; - } -#endif + } } if (!esc) { unsigned char c = 0; // getc() returns an int @@ -174,68 +223,61 @@ bool LyXLex::Pimpl::next(bool esc /* = false */) c = cc; if (c == commentChar) { // Read rest of line (fast :-) - // That is not fast... (Lgb) #if 1 - is.getline(buff, sizeof(buff)); + // That is not fast... (Lgb) + string dummy; + getline(is, dummy); + lyxerr[Debug::LYXLEX] << "Comment read: `" << c - << buff << "'" << endl; + << dummy << '\'' << endl; #else - // unfortunately is ignore buggy (Lgb) + // unfortunately ignore is buggy (Lgb) is.ignore(100, '\n'); #endif ++lineno; continue; } - + if (c == '\"') { - int i = -1; + buff.clear(); + do { is.get(cc); c = cc; if (c != '\r') - buff[++i] = c; - } while (c != '\"' && c != '\n' && is && - i != (LEX_MAX_BUFF - 2)); - - if (i == (LEX_MAX_BUFF - 2)) { - printError("Line too long"); - c = '\"'; // Pretend we got a " - ++i; - } - + buff.push_back(c); + } while (c != '\"' && c != '\n' && is); + if (c != '\"') { printError("Missing quote"); if (c == '\n') ++lineno; } - - buff[i] = '\0'; + + buff.resize(buff.size()-1); status = LEX_DATA; - break; + break; } - + if (c == ',') continue; /* Skip ','s */ - + // using relational operators with chars other // than == and != is not safe. And if it is done // the type _have_ to be unsigned. It usually a // lot better to use the functions from cctype if (c > ' ' && is) { - int i = 0; + buff.clear(); + do { - buff[i++] = c; + buff.push_back(c); is.get(cc); c = cc; - } while (c > ' ' && c != ',' && is - && (i != LEX_MAX_BUFF - 1) ); - if (i == LEX_MAX_BUFF - 1) { - printError("Line too long"); - } - buff[i] = '\0'; + } while (c > ' ' && c != ',' && is); + status = LEX_TOKEN; } - + if (c == '\r' && is) { // The Windows support has lead to the // possibility of "\r\n" at the end of @@ -244,57 +286,56 @@ bool LyXLex::Pimpl::next(bool esc /* = false */) is.get(cc); c = cc; } - + if (c == '\n') ++lineno; - + } if (status) return true; - + status = is.eof() ? LEX_FEOF: LEX_UNDEF; - buff[0] = '\0'; + buff.clear(); return false; } else { unsigned char c = 0; // getc() returns an int char cc = 0; - + status = 0; while (is && !status) { is.get(cc); c = cc; - + // skip ','s if (c == ',') continue; - + if (c == '\\') { // escape - int i = 0; + buff.clear(); + do { if (c == '\\') { // escape the next char is.get(cc); c = cc; } - buff[i++] = c; + buff.push_back(c); is.get(cc); c = cc; - } while (c > ' ' && c != ',' && is - && (i != LEX_MAX_BUFF - 1) ); - if (i == LEX_MAX_BUFF - 1) { - printError("Line too long"); - } - buff[i] = '\0'; + } while (c > ' ' && c != ',' && is); + status = LEX_TOKEN; continue; } - + if (c == commentChar) { // Read rest of line (fast :-) - // That is still not fast... (Lgb) #if 1 - is.getline(buff, sizeof(buff)); + // That is still not fast... (Lgb) + string dummy; + getline(is, dummy); + lyxerr[Debug::LYXLEX] << "Comment read: `" << c - << buff << "'" << endl; + << dummy << '\'' << endl; #else // but ignore is also still buggy (Lgb) // This is fast (Lgb) @@ -303,10 +344,11 @@ bool LyXLex::Pimpl::next(bool esc /* = false */) ++lineno; continue; } - + // string if (c == '\"') { - int i = -1; + buff.clear(); + bool escaped = false; do { escaped = false; @@ -317,33 +359,30 @@ bool LyXLex::Pimpl::next(bool esc /* = false */) // escape the next char is.get(cc); c = cc; - escaped = true; + if (c == '\"' || c == '\\') + escaped = true; + else + buff.push_back('\\'); } - buff[++i] = c; - + buff.push_back(c); + if (!escaped && c == '\"') break; - } while (c != '\n' && is && - i != (LEX_MAX_BUFF - 2)); - - if (i == (LEX_MAX_BUFF - 2)) { - printError("Line too long"); - c = '\"'; // Pretend we got a " - ++i; - } - + } while (c != '\n' && is); + if (c != '\"') { printError("Missing quote"); if (c == '\n') ++lineno; } - - buff[i] = '\0'; + + buff.resize(buff.size() -1); status = LEX_DATA; - break; + break; } - + if (c > ' ' && is) { - int i = 0; + buff.clear(); + do { if (c == '\\') { // escape the next char @@ -351,26 +390,22 @@ bool LyXLex::Pimpl::next(bool esc /* = false */) c = cc; //escaped = true; } - buff[i++] = c; + buff.push_back(c); is.get(cc); c = cc; - } while (c > ' ' && c != ',' && is - && (i != LEX_MAX_BUFF-1) ); - if (i == LEX_MAX_BUFF-1) { - printError("Line too long"); - } - buff[i] = '\0'; + } while (c > ' ' && c != ',' && is); + status = LEX_TOKEN; } // new line if (c == '\n') ++lineno; } - + if (status) return true; - + status = is.eof() ? LEX_FEOF : LEX_UNDEF; - buff[0] = '\0'; + buff.clear(); return false; } } @@ -382,8 +417,11 @@ int LyXLex::Pimpl::search_kw(char const * const tag) const keyword_item * res = lower_bound(table, table + no_items, search_tag, compare_tags()); + // use the compare_ascii_no_case instead of compare_no_case, + // because in turkish, 'i' is not the lowercase version of 'I', + // and thus turkish locale breaks parsing of tags. if (res != table + no_items - && !compare_no_case(res->tag, tag)) + && !compare_ascii_no_case(res->tag, tag)) return res->code; return LEX_UNDEF; } @@ -392,48 +430,34 @@ int LyXLex::Pimpl::search_kw(char const * const tag) const int LyXLex::Pimpl::lex() { //NOTE: possible bug. - if (next() && status == LEX_TOKEN) - return search_kw(buff); - else + if (next() && status == LEX_TOKEN) { + return search_kw(getString().c_str()); + } else return status; } - -bool LyXLex::Pimpl::EatLine() + +bool LyXLex::Pimpl::eatLine() { -#ifndef NEW_INSETS - // This is not handling the pushed token - if (!pushTok.empty()) { - pushTok.copy(buff, string::npos); - buff[pushTok.length()] = '\0'; - pushTok.erase(); - return true; - } -#endif - int i = 0; + buff.clear(); + unsigned char c = '\0'; char cc = 0; - while(is && c != '\n' && i != (LEX_MAX_BUFF - 1)) { + while (is && c != '\n') { is.get(cc); c = cc; - lyxerr[Debug::LYXLEX] << "LyXLex::EatLine read char: `" - << c << "'" << endl; + //lyxerr[Debug::LYXLEX] << "LyXLex::EatLine read char: `" + // << c << '\'' << endl; if (c != '\r') - buff[i++] = c; - } - if (i == (LEX_MAX_BUFF - 1) && c != '\n') { - printError("Line too long"); - c = '\n'; // Pretend we had an end of line - --lineno; // but don't increase line counter (netto effect) - ++i; // and preserve last character read. + buff.push_back(c); } + if (c == '\n') { ++lineno; - buff[--i] = '\0'; // i can never be 0 here, so no danger + buff.resize(buff.size() - 1); status = LEX_DATA; return true; } else { - buff[i] = '\0'; return false; } } @@ -442,28 +466,18 @@ bool LyXLex::Pimpl::EatLine() bool LyXLex::Pimpl::nextToken() { if (!pushTok.empty()) { -#ifndef NEW_INSETS - pushTok.copy(buff, string::npos); - buff[pushTok.length()] = '\0'; - pushTok.erase(); - return true; -#else // There can have been a whole line pushed so // we extract the first word and leaves the rest // in pushTok. (Lgb) - if (pushTok.find(' ') != string::npos) { - string tmp; - pushTok = split(pushTok, tmp, ' '); - tmp.copy(buff, string::npos); - buff[tmp.length()] = '\0'; + if (pushTok.find(' ') != string::npos && pushTok[0] == '\\') { + buff.clear(); + pushTok = split(pushTok, buff, ' '); return true; } else { - pushTok.copy(buff, string::npos); - buff[pushTok.length()] = '\0'; - pushTok.erase(); + buff = pushTok; + pushTok.clear(); return true; } -#endif } status = 0; @@ -473,40 +487,34 @@ bool LyXLex::Pimpl::nextToken() is.get(cc); c = cc; if (c >= ' ' && is) { - int i = 0; + buff.clear(); + if (c == '\\') { // first char == '\\' do { - buff[i++] = c; + buff.push_back(c); is.get(cc); c = cc; - } while (c > ' ' && c != '\\' && is - && i != (LEX_MAX_BUFF-1)); + } while (c > ' ' && c != '\\' && is); } else { do { - buff[i++] = c; + buff.push_back(c); is.get(cc); c = cc; - } while (c >= ' ' && c != '\\' && is - && i != (LEX_MAX_BUFF-1)); - } - - if (i == (LEX_MAX_BUFF - 1)) { - printError("Line too long"); + } while (c >= ' ' && c != '\\' && is); } - + if (c == '\\') is.putback(c); // put it back - buff[i] = '\0'; status = LEX_TOKEN; } - + if (c == '\n') ++lineno; - + } if (status) return true; - + status = is.eof() ? LEX_FEOF: LEX_UNDEF; - buff[0] = '\0'; + buff.clear(); return false; } @@ -515,3 +523,6 @@ void LyXLex::Pimpl::pushToken(string const & pt) { pushTok = pt; } + + +} // namespace lyx