3 * This file is part of LyX, the document processor.
4 * Licence details can be found in the file COPYING.
6 * \author Lars Gullik Bjønnes
7 * \author Jean-Marc Lasgouttes
10 * Full author contact details are available in file CREDITS.
15 #include "lyxlex_pimpl.h"
19 #include "support/filetools.h"
20 #include "support/lyxalgo.h"
21 #include "support/lstrings.h"
22 #include "support/types.h"
23 #include "support/unicode.h"
30 using support::compare_ascii_no_case;
31 using support::FileName;
32 using support::getFormatFromContents;
33 using support::makeDisplayPath;
39 using std::lower_bound;
49 : public std::binary_function<keyword_item, keyword_item, bool> {
51 // used by lower_bound, sort and sorted
52 bool operator()(keyword_item const & a, keyword_item const & b) const
54 // we use the ascii version, because in turkish, 'i'
55 // is not the lowercase version of 'I', and thus
56 // turkish locale breaks parsing of tags.
57 return compare_ascii_no_case(a.tag, b.tag) < 0;
61 } // end of anon namespace
64 LyXLex::Pimpl::Pimpl(keyword_item * tab, int num)
65 : is(&fb_), table(tab), no_items(num),
66 status(0), lineno(0), commentChar('#')
72 string const LyXLex::Pimpl::getString() const
78 docstring const LyXLex::Pimpl::getDocString() const
80 return from_utf8(buff);
84 void LyXLex::Pimpl::printError(string const & message) const
86 string const tmpmsg = subst(message, "$$Token", getString());
87 lyxerr << "LyX: " << tmpmsg << " [around line " << lineno
88 << " of file " << to_utf8(makeDisplayPath(name)) << ']' << endl;
92 void LyXLex::Pimpl::printTable(ostream & os)
94 os << "\nNumber of tags: " << no_items << endl;
95 for (int i= 0; i < no_items; ++i)
97 << "]: tag: `" << table[i].tag
98 << "' code:" << table[i].code << '\n';
103 void LyXLex::Pimpl::verifyTable()
105 // Check if the table is sorted and if not, sort it.
107 && !lyx::sorted(table, table + no_items, compare_tags())) {
108 lyxerr << "The table passed to LyXLex is not sorted!\n"
109 << "Tell the developers to fix it!" << endl;
110 // We sort it anyway to avoid problems.
111 lyxerr << "\nUnsorted:" << endl;
114 sort(table, table + no_items, compare_tags());
115 lyxerr << "\nSorted:" << endl;
121 void LyXLex::Pimpl::pushTable(keyword_item * tab, int num)
123 pushed_table tmppu(table, no_items);
133 void LyXLex::Pimpl::popTable()
135 if (pushed.empty()) {
136 lyxerr << "LyXLex error: nothing to pop!" << endl;
140 pushed_table tmp = pushed.top();
142 table = tmp.table_elem;
143 no_items = tmp.table_siz;
147 bool LyXLex::Pimpl::setFile(FileName const & filename)
149 // Check the format of the file.
150 string const format = getFormatFromContents(filename);
152 if (format == "gzip" || format == "zip" || format == "compress") {
153 lyxerr[Debug::LYXLEX] << "lyxlex: compressed" << endl;
155 // The check only outputs a debug message, because it triggers
156 // a bug in compaq cxx 6.2, where is_open() returns 'true' for
157 // a fresh new filebuf. (JMarc)
158 if (!gz_.empty() || istream::off_type(is.tellg()) > -1)
159 lyxerr[Debug::LYXLEX] << "Error in LyXLex::setFile: "
160 "file or stream already set." << endl;
161 gz_.push(io::gzip_decompressor());
162 gz_.push(io::file_source(filename.toFilesystemEncoding()));
164 name = filename.absFilename();
166 return gz_.component<io::file_source>(1)->is_open() && is.good();
168 lyxerr[Debug::LYXLEX] << "lyxlex: UNcompressed" << endl;
170 // The check only outputs a debug message, because it triggers
171 // a bug in compaq cxx 6.2, where is_open() returns 'true' for
172 // a fresh new filebuf. (JMarc)
173 if (fb_.is_open() || istream::off_type(is.tellg()) > 0)
174 lyxerr[Debug::LYXLEX] << "Error in LyXLex::setFile: "
175 "file or stream already set." << endl;
176 fb_.open(filename.toFilesystemEncoding().c_str(), ios::in);
178 name = filename.absFilename();
180 return fb_.is_open() && is.good();
185 void LyXLex::Pimpl::setStream(istream & i)
187 if (fb_.is_open() || istream::off_type(is.tellg()) > 0)
188 lyxerr[Debug::LYXLEX] << "Error in LyXLex::setStream: "
189 "file or stream already set." << endl;
195 void LyXLex::Pimpl::setCommentChar(char c)
201 bool LyXLex::Pimpl::next(bool esc /* = false */)
203 if (!pushTok.empty()) {
204 // There can have been a whole line pushed so
205 // we extract the first word and leaves the rest
207 if (pushTok.find(' ') != string::npos && pushTok[0] == '\\') {
209 pushTok = split(pushTok, buff, ' ');
218 unsigned char c = 0; // getc() returns an int
221 while (is && !status) {
224 if (c == commentChar) {
225 // Read rest of line (fast :-)
227 // That is not fast... (Lgb)
231 lyxerr[Debug::LYXLEX] << "Comment read: `" << c
232 << dummy << '\'' << endl;
234 // unfortunately ignore is buggy (Lgb)
235 is.ignore(100, '\n');
249 } while (c != '\"' && c != '\n' && is);
252 printError("Missing quote");
257 buff.resize(buff.size()-1);
263 continue; /* Skip ','s */
265 // using relational operators with chars other
266 // than == and != is not safe. And if it is done
267 // the type _have_ to be unsigned. It usually a
268 // lot better to use the functions from cctype
276 } while (c > ' ' && c != ',' && is);
281 if (c == '\r' && is) {
282 // The Windows support has lead to the
283 // possibility of "\r\n" at the end of
284 // a line. This will stop LyX choking
285 // when it expected to find a '\n'
294 if (status) return true;
296 status = is.eof() ? LEX_FEOF: LEX_UNDEF;
300 unsigned char c = 0; // getc() returns an int
304 while (is && !status) {
309 if (c == ',') continue;
317 // escape the next char
324 } while (c > ' ' && c != ',' && is);
330 if (c == commentChar) {
331 // Read rest of line (fast :-)
333 // That is still not fast... (Lgb)
337 lyxerr[Debug::LYXLEX] << "Comment read: `" << c
338 << dummy << '\'' << endl;
340 // but ignore is also still buggy (Lgb)
341 // This is fast (Lgb)
342 is.ignore(100, '\n');
352 bool escaped = false;
357 if (c == '\r') continue;
359 // escape the next char
362 if (c == '\"' || c == '\\')
365 buff.push_back('\\');
369 if (!escaped && c == '\"') break;
370 } while (c != '\n' && is);
373 printError("Missing quote");
378 buff.resize(buff.size() -1);
388 // escape the next char
396 } while (c > ' ' && c != ',' && is);
405 if (status) return true;
407 status = is.eof() ? LEX_FEOF : LEX_UNDEF;
414 int LyXLex::Pimpl::search_kw(char const * const tag) const
416 keyword_item search_tag = { tag, 0 };
418 lower_bound(table, table + no_items,
419 search_tag, compare_tags());
420 // use the compare_ascii_no_case instead of compare_no_case,
421 // because in turkish, 'i' is not the lowercase version of 'I',
422 // and thus turkish locale breaks parsing of tags.
423 if (res != table + no_items
424 && !compare_ascii_no_case(res->tag, tag))
430 int LyXLex::Pimpl::lex()
432 //NOTE: possible bug.
433 if (next() && status == LEX_TOKEN) {
434 return search_kw(getString().c_str());
440 bool LyXLex::Pimpl::eatLine()
444 unsigned char c = '\0';
446 while (is && c != '\n') {
449 //lyxerr[Debug::LYXLEX] << "LyXLex::EatLine read char: `"
450 // << c << '\'' << endl;
457 buff.resize(buff.size() - 1);
466 bool LyXLex::Pimpl::nextToken()
468 if (!pushTok.empty()) {
469 // There can have been a whole line pushed so
470 // we extract the first word and leaves the rest
472 if (pushTok.find(' ') != string::npos && pushTok[0] == '\\') {
474 pushTok = split(pushTok, buff, ' ');
484 while (is && !status) {
489 if (c >= ' ' && is) {
492 if (c == '\\') { // first char == '\\'
497 } while (c > ' ' && c != '\\' && is);
503 } while (c >= ' ' && c != '\\' && is);
506 if (c == '\\') is.putback(c); // put it back
514 if (status) return true;
516 status = is.eof() ? LEX_FEOF: LEX_UNDEF;
522 void LyXLex::Pimpl::pushToken(string const & pt)