]> git.lyx.org Git - lyx.git/blobdiff - src/lyxlex.C
Fix bug 2474; partial fix for 1777. Added last_reference_ member to QRef class and...
[lyx.git] / src / lyxlex.C
index f036360a67669472561ff581c5ff8a9d95bc2d70..24b8072f1235a7a2415ab1e0e3a2cf794f066550 100644 (file)
-//  Generalized simple lexical analizer.
-//  It can be used for simple syntax parsers, like lyxrc,
-//  texclass and others to come.   [asierra30/03/96]
-//
-//   (C) 1996 Lyx Team.
+/**
+ * \file lyxlex.C
+ * This file is part of LyX, the document processor.
+ * Licence details can be found in the file COPYING.
+ *
+ * \author Alejandro Aguilar Sierra
+ * \author Lars Gullik Bjønnes
+ * \author Jean-Marc Lasgouttes
+ * \author John Levon
+ *
+ * Full author contact details are available in file CREDITS.
+ */
 
 #include <config.h>
 
-#include <cstdlib>
+#include "lyxlex.h"
 
-#ifdef __GNUG__
-#pragma implementation "lyxlex.h"
-#endif
+#include "debug.h"
 
-#include "lyxlex.h"
-#include "error.h"
+#include "support/convert.h"
 #include "support/filetools.h"
+#include "support/lstrings.h"
+#include "support/lyxalgo.h"
+#include "support/types.h"
+#include "support/unicode.h"
+
+#include <boost/iostreams/filtering_streambuf.hpp>
+#include <boost/iostreams/filter/gzip.hpp>
+#include <boost/iostreams/device/file.hpp>
+#include <boost/utility.hpp>
+
+namespace io = boost::iostreams;
+
+#include <functional>
+#include <istream>
+#include <stack>
+#include <sstream>
+#include <vector>
+
+
+namespace lyx {
+
+using support::compare_ascii_no_case;
+using support::FileName;
+using support::getFormatFromContents;
+using support::isStrDbl;
+using support::isStrInt;
+using support::ltrim;
+using support::makeDisplayPath;
+using support::prefixIs;
+using support::split;
+using support::subst;
+using support::trim;
+
+using std::endl;
+using std::getline;
+using std::lower_bound;
+using std::sort;
+using std::string;
+using std::ios;
+using std::istream;
+using std::ostream;
+
+
+//////////////////////////////////////////////////////////////////////
+//
+// LyXLex::Pimpl
+//
+//////////////////////////////////////////////////////////////////////
+
+
+///
+class LyXLex::Pimpl : boost::noncopyable {
+public:
+       ///
+       Pimpl(keyword_item * tab, int num);
+       ///
+       std::string const getString() const;
+       ///
+       docstring const getDocString() const;
+       ///
+       void printError(std::string const & message) const;
+       ///
+       void printTable(std::ostream & os);
+       ///
+       void pushTable(keyword_item * tab, int num);
+       ///
+       void popTable();
+       ///
+       bool setFile(support::FileName const & filename);
+       ///
+       void setStream(std::istream & i);
+       ///
+       void setCommentChar(char c);
+       ///
+       bool next(bool esc = false);
+       ///
+       int search_kw(char const * const tag) const;
+       ///
+       int lex();
+       ///
+       bool eatLine();
+       ///
+       bool nextToken();
+       /// test if there is a pushed token or the stream is ok
+       bool inputAvailable();
+       ///
+       void pushToken(std::string const &);
+       /// fb_ is only used to open files, the stream is accessed through is.
+       std::filebuf fb_;
+
+       /// gz_ is only used to open files, the stream is accessed through is.
+       io::filtering_istreambuf gz_;
+
+       /// the stream that we use.
+       std::istream is;
+       ///
+       std::string name;
+       ///
+       keyword_item * table;
+       ///
+       int no_items;
+       ///
+       std::string buff;
+       ///
+       int status;
+       ///
+       int lineno;
+       ///
+       std::string pushTok;
+       ///
+       char commentChar;
+private:
+       ///
+       void verifyTable();
+       ///
+       class pushed_table {
+       public:
+               ///
+               pushed_table()
+                       : table_elem(0), table_siz(0) {}
+               ///
+               pushed_table(keyword_item * ki, int siz)
+                       : table_elem(ki), table_siz(siz) {}
+               ///
+               keyword_item * table_elem;
+               ///
+               int table_siz;
+       };
+       ///
+       std::stack<pushed_table> pushed;
+};
+
+
+
+namespace {
+
+class compare_tags
+       : public std::binary_function<keyword_item, keyword_item, bool> {
+public:
+       // used by lower_bound, sort and sorted
+       bool operator()(keyword_item const & a, keyword_item const & b) const
+       {
+               // we use the ascii version, because in turkish, 'i'
+               // is not the lowercase version of 'I', and thus
+               // turkish locale breaks parsing of tags.
+               return compare_ascii_no_case(a.tag, b.tag) < 0;
+       }
+};
 
-LyXLex::LyXLex(keyword_item * tab, int num)
-       : table(tab), no_items(num)
-{
-       file = 0;
-       owns_file = false;
-       status = 0;
-       pushed = 0;
-}
+} // end of anon namespace
 
 
-void LyXLex::pushTable(keyword_item * tab, int num)
+LyXLex::Pimpl::Pimpl(keyword_item * tab, int num)
+       : is(&fb_), table(tab), no_items(num),
+         status(0), lineno(0), commentChar('#')
 {
-       pushed_table * tmppu = new pushed_table;
-       tmppu->next = pushed;
-       tmppu->table_elem = table;
-       tmppu->table_siz = no_items;
-       pushed = tmppu;
-       table = tab;
-       no_items = num;
+       verifyTable();
 }
 
 
-void LyXLex::popTable()
+string const LyXLex::Pimpl::getString() const
 {
-       if (pushed == 0)
-               lyxerr.print("LyXLex error: nothing to pop!");
-
-       pushed_table * tmp;
-       tmp = pushed;
-       table = tmp->table_elem;
-       no_items = tmp->table_siz;
-       tmp->table_elem = 0;
-       pushed = tmp->next;
-       delete tmp;
+       return buff;
 }
 
 
-void LyXLex::printTable()
+docstring const LyXLex::Pimpl::getDocString() const
 {
-       lyxerr.print(string("\nNumber of tags: ") + tostr(no_items));
-       for(int i=0; i<no_items; i++)
-               lyxerr.print(string("table[")+ tostr(i) +
-                              "]:  tag: `" + table[i].tag +
-                              "'  code:" + tostr(table[i].code));
-       lyxerr.print(string());
+       return from_utf8(buff);
 }
 
 
-void LyXLex::printError(string const & message)
+void LyXLex::Pimpl::printError(string const & message) const
 {
-       string tmpmsg = subst(message, "$$Token", GetString());
-       lyxerr.print("LyX: " + tmpmsg + " [around line " + tostr(lineno) + " of file "
-                     + MakeDisplayPath(name) + ']');
+       string const tmpmsg = subst(message, "$$Token", getString());
+       lyxerr << "LyX: " << tmpmsg << " [around line " << lineno
+               << " of file " << to_utf8(makeDisplayPath(name)) << ']' << endl;
 }
 
 
-bool LyXLex::setFile(string const & filename)
+void LyXLex::Pimpl::printTable(ostream & os)
 {
-        if (file) 
-               lyxerr.print("Error in LyXLex::setFile: file already set.");
-       file = fopen(filename.c_str(), "r");
-       name = filename;
-       owns_file = true;
-       lineno = 0;
-       return (file ? true : false);
+       os << "\nNumber of tags: " << no_items << endl;
+       for (int i= 0; i < no_items; ++i)
+               os << "table[" << i
+                  << "]:  tag: `" << table[i].tag
+                  << "'  code:" << table[i].code << '\n';
+       os.flush();
 }
 
 
-void LyXLex::setFile(FILE * f)
+void LyXLex::Pimpl::verifyTable()
 {
-        if (file) 
-               lyxerr.print("Error in LyXLex::setFile: file already set.");
-       file = f;
-       owns_file = false;
-       lineno = 0; // this is bogus if the file already has been read from
+       // Check if the table is sorted and if not, sort it.
+       if (table
+           && !lyx::sorted(table, table + no_items, compare_tags())) {
+               lyxerr << "The table passed to LyXLex is not sorted!\n"
+                      << "Tell the developers to fix it!" << endl;
+               // We sort it anyway to avoid problems.
+               lyxerr << "\nUnsorted:" << endl;
+               printTable(lyxerr);
+
+               sort(table, table + no_items, compare_tags());
+               lyxerr << "\nSorted:" << endl;
+               printTable(lyxerr);
+       }
 }
 
 
-int LyXLex::lex()
+void LyXLex::Pimpl::pushTable(keyword_item * tab, int num)
 {
-       //NOTE: possible bug.
-   if (next() && status==LEX_TOKEN)
-       return search_kw(buff);
-   else
-       return status;
-}
+       pushed_table tmppu(table, no_items);
+       pushed.push(tmppu);
 
+       table = tab;
+       no_items = num;
 
-int LyXLex::GetInteger()
-{
-   if (buff[0]>' ')   
-       return atoi(buff);
-   else {
-       printError("Bad integer `$$Token'");
-       return -1;
-   }
+       verifyTable();
 }
 
 
-float LyXLex::GetFloat()
+void LyXLex::Pimpl::popTable()
 {
-   if (buff[0]>' ')   
-       return (float)strtod(buff, (char**)0);
-   else {
-       printError("Bad float `$$Token'");
-       return -1;
-   }
-}
-
+       if (pushed.empty()) {
+               lyxerr << "LyXLex error: nothing to pop!" << endl;
+               return;
+       }
 
-string LyXLex::GetString() const
-{
-       return string(buff);
+       pushed_table tmp = pushed.top();
+       pushed.pop();
+       table = tmp.table_elem;
+       no_items = tmp.table_siz;
 }
 
 
-// I would prefer to give a tag number instead of an explicit token
-// here, but it is not possible because Buffer::readLyXformat2 uses
-// explicit tokens (JMarc) 
-string LyXLex::getLongString(string const & endtoken)
+bool LyXLex::Pimpl::setFile(FileName const & filename)
 {
-       string str, prefix;
-       bool firstline = true;
-
-       while (IsOK()) {
-               if (!EatLine())
-                       // blank line in the file being read
-                       continue;
-               
-               string const token = frontStrip(strip(GetString()), " \t");
-               
-               lyxerr.debug("LongString: `"+GetString()+'\'', Error::LEX_PARSER);
-
-               // We do a case independent comparison, like search_kw
-               // does.
-                if (compare_no_case(token, endtoken) != 0) {
-                       string tmpstr = GetString();
-                       if (firstline) {
-                               unsigned int i = 0;
-                               while(i < tmpstr.length()
-                                     && tmpstr[i] == ' ') {
-                                       ++i;
-                                       prefix += ' ';
-                               }
-                               firstline = false;
-                               lyxerr.debug("Prefix = `"+prefix+'\'',
-                                             Error::LEX_PARSER); 
-                       } 
-
-                       if (!prefix.empty() 
-                           && prefixIs(tmpstr, prefix.c_str())) {
-                               tmpstr.erase(0, prefix.length() - 1);
-                       }
-                       str += tmpstr + '\n';
-                }
-               else // token == endtoken
-                       break;
+       // Check the format of the file.
+       string const format = getFormatFromContents(filename);
+
+       if (format == "gzip" || format == "zip" || format == "compress") {
+               LYXERR(Debug::LYXLEX) << "lyxlex: compressed" << endl;
+
+               // The check only outputs a debug message, because it triggers
+               // a bug in compaq cxx 6.2, where is_open() returns 'true' for
+               // a fresh new filebuf.  (JMarc)
+               if (!gz_.empty() || istream::off_type(is.tellg()) > -1)
+                       LYXERR(Debug::LYXLEX) << "Error in LyXLex::setFile: "
+                               "file or stream already set." << endl;
+               gz_.push(io::gzip_decompressor());
+               gz_.push(io::file_source(filename.toFilesystemEncoding()));
+               is.rdbuf(&gz_);
+               name = filename.absFilename();
+               lineno = 0;
+               return gz_.component<io::file_source>(1)->is_open() && is.good();
+       } else {
+               LYXERR(Debug::LYXLEX) << "lyxlex: UNcompressed" << endl;
+
+               // The check only outputs a debug message, because it triggers
+               // a bug in compaq cxx 6.2, where is_open() returns 'true' for
+               // a fresh new filebuf.  (JMarc)
+               if (fb_.is_open() || istream::off_type(is.tellg()) > 0)
+                       LYXERR(Debug::LYXLEX) << "Error in LyXLex::setFile: "
+                               "file or stream already set." << endl;
+               fb_.open(filename.toFilesystemEncoding().c_str(), ios::in);
+               is.rdbuf(&fb_);
+               name = filename.absFilename();
+               lineno = 0;
+               return fb_.is_open() && is.good();
        }
-       if (!IsOK())
-               printError("Long string not ended by `" + endtoken + '\'');
-
-       return str;
 }
 
 
-bool LyXLex::GetBool()
+void LyXLex::Pimpl::setStream(istream & i)
 {
-   if (compare(buff, "true") == 0)
-       return true;
-   else if (compare(buff, "false") != 0)
-       printError("Bad boolean `$$Token'. Use \"false\" or \"true\"");
-   return false;
+       if (fb_.is_open() || istream::off_type(is.tellg()) > 0)
+               LYXERR(Debug::LYXLEX)  << "Error in LyXLex::setStream: "
+                       "file or stream already set." << endl;
+       is.rdbuf(i.rdbuf());
+       lineno = 0;
 }
 
 
-bool LyXLex::EatLine()
+void LyXLex::Pimpl::setCommentChar(char c)
 {
-       int i=0;
-       int c = '\0'; // getc() returns an int
-
-       while (!feof(file) && c!='\n' && i!=(LEX_MAX_BUFF-1)) {
-               c = getc(file);
-               if (c != '\r')
-                       buff[i++] = c;
-       }
-       if (i==(LEX_MAX_BUFF-1) && c !='\n') {
-               printError("Line too long");
-               c = '\n'; // Pretend we had an end of line
-               --lineno; // but don't increase line counter (netto effect)
-               ++i; // and preserve last character read.
-       }
-       if (c=='\n') {
-               ++lineno;
-               buff[--i] = '\0'; // i can never be 0 here, so no danger
-               status = LEX_DATA;
-               return true;
-       } else {
-               buff[i] = '\0';
-               return false;
-       }
+       commentChar = c;
 }
 
 
-int LyXLex::search_kw(char const * const tag) const
+bool LyXLex::Pimpl::next(bool esc /* = false */)
 {
-       int m, k=0 , l= 0, r=no_items;
-
-       while (l < r) {
-               m = (l+r)/2;
-
-               if (lyxerr.debugging(Error::LEX_PARSER)) {
-                       string my_l;
-                       my_l+="LyXLex::search_kw: elem " ;
-                       my_l+= m; 
-                       my_l+=" tag "; 
-                       my_l+=table[m].tag;
-                       my_l+=" search tag ";
-                       my_l+= tag;
-                       lyxerr.print(my_l);
+       if (!pushTok.empty()) {
+               // There can have been a whole line pushed so
+               // we extract the first word and leaves the rest
+               // in pushTok. (Lgb)
+               if (pushTok[0] == '\\' && pushTok.find(' ') != string::npos) {
+                       buff.clear();
+                       pushTok = split(pushTok, buff, ' ');
+               } else {
+                       buff = pushTok;
+                       pushTok.clear();
                }
-
-               if (table[m].tag)
-                       k = compare_no_case(table[m].tag, tag);
-               if (k==0)
-                       return table[m].code;
-               else
-                       if (k<0) l = m+1; else r = m;
+               status = LEX_TOKEN;
+               return true;
        }
-       return -1;
-}
-
-
-bool LyXLex::next(bool esc)
-{
-
        if (!esc) {
-               int c; // getc() returns an int
-               int i;
-               
-               
+               unsigned char c = 0; // getc() returns an int
+               char cc = 0;
                status = 0;
-               while (!feof(file) && !status) { 
-                       c = getc(file);
-                       if (c=='#') {
+               while (is && !status) {
+                       is.get(cc);
+                       c = cc;
+                       if (c == commentChar) {
                                // Read rest of line (fast :-)
-                               fgets(buff, sizeof(buff), file);
+#if 1
+                               // That is not fast... (Lgb)
+                               string dummy;
+                               getline(is, dummy);
+
+                               LYXERR(Debug::LYXLEX) << "Comment read: `" << c
+                                                     << dummy << '\'' << endl;
+#else
+                               // unfortunately ignore is buggy (Lgb)
+                               is.ignore(100, '\n');
+#endif
                                ++lineno;
                                continue;
                        }
-                       
-                       if (c=='\"') {
-                               i = -1;
+
+                       if (c == '\"') {
+                               buff.clear();
+
                                do {
-                                       c = getc(file);
+                                       is.get(cc);
+                                       c = cc;
                                        if (c != '\r')
-                                               buff[++i] = c;
-                               } while (c!='\"' && c!='\n' && !feof(file) &&
-                                        i!=(LEX_MAX_BUFF-2));
-                               
-                               if (i==(LEX_MAX_BUFF-2)) {
-                                       printError("Line too long");
-                                       c = '\"'; // Pretend we got a "
-                                       ++i;
-                               }
-                               
-                               if (c!='\"') {
+                                               buff.push_back(c);
+                               } while (c != '\"' && c != '\n' && is);
+
+                               if (c != '\"') {
                                        printError("Missing quote");
-                                       if (c=='\n')
+                                       if (c == '\n')
                                                ++lineno;
                                }
-                               
-                               buff[i] = '\0';
+
+                               buff.resize(buff.size()-1);
                                status = LEX_DATA;
-                               break; 
+                               break;
                        }
-                       
-                       if (c==',')
+
+                       if (c == ',')
                                continue;              /* Skip ','s */
-                       
-                       if (c > ' ' && !feof(file))  {
-                               i = 0;
+
+                               // using relational operators with chars other
+                               // than == and != is not safe. And if it is done
+                               // the type _have_ to be unsigned. It usually a
+                               // lot better to use the functions from cctype
+                       if (c > ' ' && is)  {
+                               buff.clear();
+
                                do {
-                                       buff[i++] = c;
-                                       c = getc(file);
-                               } while (c > ' ' && c != ',' && !feof(file) &&
-                                        (i != LEX_MAX_BUFF-1) );
-                               if (i == LEX_MAX_BUFF-1) {
-                                       printError("Line too long");
-                               }
-                               buff[i] = '\0';
+                                       buff.push_back(c);
+                                       is.get(cc);
+                                       c = cc;
+                               } while (c > ' ' && c != ',' && is);
+
                                status = LEX_TOKEN;
                        }
-                       
-                       if (c== '\r' && !feof(file)) {
+
+                       if (c == '\r' && is) {
                                // The Windows support has lead to the
                                // possibility of "\r\n" at the end of
                                // a line.  This will stop LyX choking
                                // when it expected to find a '\n'
-                               c = getc(file);
+                               is.get(cc);
+                               c = cc;
                        }
 
-                       if (c=='\n')
+                       if (c == '\n')
                                ++lineno;
-                       
+
                }
-               if (status) return true;
-               
-               status = (feof(file)) ? LEX_FEOF: LEX_UNDEF;
-               buff[0] = '\0';
+               if (status)
+                       return true;
+
+               status = is.eof() ? LEX_FEOF: LEX_UNDEF;
+               buff.clear();
                return false;
        } else {
-               int c; // getc() returns an int
-               int i;
-               
-               
+               unsigned char c = 0; // getc() returns an int
+               char cc = 0;
+
                status = 0;
-               while (!feof(file) && !status) { 
-                       c = getc(file);
+               while (is && !status) {
+                       is.get(cc);
+                       c = cc;
 
                        // skip ','s
-                       if (c==',') continue;
-                       
-                       if (c=='\\') {
-                               // escape
-                               i = 0;
-                               do {
-                                       if (c == '\\') {
-                                               // escape the next char
-                                               c = getc(file);
-                                       }
-                                       buff[i++] = c;
-                                       c = getc(file);
-                               } while (c > ' ' && c != ',' && !feof(file) &&
-                                        (i != LEX_MAX_BUFF-1) );
-                               if (i == LEX_MAX_BUFF-1) {
-                                       printError("Line too long");
-                               }
-                               buff[i] = '\0';
-                               status = LEX_TOKEN;
+                       if (c == ',') 
                                continue;
-                       }
-                       
-                       if (c=='#') {
+
+                       if (c == commentChar) {
                                // Read rest of line (fast :-)
-                               fgets(buff, sizeof(buff), file);
+#if 1
+                               // That is still not fast... (Lgb)
+                               string dummy;
+                               getline(is, dummy);
+
+                               LYXERR(Debug::LYXLEX) << "Comment read: `" << c
+                                                     << dummy << '\'' << endl;
+#else
+                               // but ignore is also still buggy (Lgb)
+                               // This is fast (Lgb)
+                               is.ignore(100, '\n');
+#endif
                                ++lineno;
                                continue;
                        }
 
                        // string
-                       if (c=='\"') {
-                               i = -1;
+                       if (c == '\"') {
+                               buff.clear();
+
                                bool escaped = false;
                                do {
                                        escaped = false;
-                                       c = getc(file);
+                                       is.get(cc);
+                                       c = cc;
                                        if (c == '\r') continue;
                                        if (c == '\\') {
                                                // escape the next char
-                                               c = getc(file);
-                                               escaped = true;
+                                               is.get(cc);
+                                               c = cc;
+                                               if (c == '\"' || c == '\\')
+                                                       escaped = true;
+                                               else
+                                                       buff.push_back('\\');
                                        }
-                                       buff[++i] = c;
-                               
-                                       if (!escaped && c == '\"') break;
-                               } while (c!='\n' && !feof(file) &&
-                                        i!=(LEX_MAX_BUFF-2));
-                               
-                               if (i==(LEX_MAX_BUFF-2)) {
-                                       printError("Line too long");
-                                       c = '\"'; // Pretend we got a "
-                                       ++i;
-                               }
-                               
-                               if (c!='\"') {
+                                       buff.push_back(c);
+
+                                       if (!escaped && c == '\"')
+                                               break;
+                               } while (c != '\n' && is);
+
+                               if (c != '\"') {
                                        printError("Missing quote");
-                                       if (c=='\n')
+                                       if (c == '\n')
                                                ++lineno;
                                }
-                               
-                               buff[i] = '\0';
+
+                               buff.resize(buff.size() -1);
                                status = LEX_DATA;
-                               break; 
+                               break;
                        }
-                       
-                       if (c > ' ' && !feof(file))  {
-                               i = 0;
+
+                       if (c > ' ' && is) {
+                               buff.clear();
+
                                do {
                                        if (c == '\\') {
                                                // escape the next char
-                                               c = getc(file);
+                                               is.get(cc);
+                                               c = cc;
                                                //escaped = true;
                                        }
-                                       buff[i++] = c;
-                                       c = getc(file);
-                               } while (c > ' ' && c != ',' && !feof(file) &&
-                                        (i != LEX_MAX_BUFF-1) );
-                               if (i == LEX_MAX_BUFF-1) {
-                                       printError("Line too long");
-                               }
-                               buff[i] = '\0';
+                                       buff.push_back(c);
+                                       is.get(cc);
+                                       c = cc;
+                               } while (c > ' ' && c != ',' && is);
+
                                status = LEX_TOKEN;
                        }
-
                        // new line
-                       if (c=='\n')
+                       if (c == '\n')
                                ++lineno;
                }
-               
-               if (status) return true;
-               
-               status = (feof(file)) ? LEX_FEOF: LEX_UNDEF;
-               buff[0] = '\0';
-               return false;   
+
+               if (status)
+                       return true;
+
+               status = is.eof() ? LEX_FEOF : LEX_UNDEF;
+               buff.clear();
+               return false;
        }
 }
 
 
-bool LyXLex::nextToken()
+int LyXLex::Pimpl::search_kw(char const * const tag) const
+{
+       keyword_item search_tag = { tag, 0 };
+       keyword_item * res =
+               lower_bound(table, table + no_items,
+                           search_tag, compare_tags());
+       // use the compare_ascii_no_case instead of compare_no_case,
+       // because in turkish, 'i' is not the lowercase version of 'I',
+       // and thus turkish locale breaks parsing of tags.
+       if (res != table + no_items
+           && !compare_ascii_no_case(res->tag, tag))
+               return res->code;
+       return LEX_UNDEF;
+}
+
+
+int LyXLex::Pimpl::lex()
+{
+       //NOTE: possible bug.
+       if (next() && status == LEX_TOKEN) 
+               return search_kw(getString().c_str());
+       return status;
+}
+
+
+bool LyXLex::Pimpl::eatLine()
+{
+       buff.clear();
+
+       unsigned char c = '\0';
+       char cc = 0;
+       while (is && c != '\n') {
+               is.get(cc);
+               c = cc;
+               //LYXERR(Debug::LYXLEX) << "LyXLex::EatLine read char: `"
+               //                    << c << '\'' << endl;
+               if (c != '\r')
+                       buff.push_back(c);
+       }
+
+       if (c == '\n') {
+               ++lineno;
+               buff.resize(buff.size() - 1);
+               status = LEX_DATA;
+               return true;
+       } else if (buff.length() > 0) { // last line
+               status = LEX_DATA;
+               return true;
+       } else {
+               return false;
+       }
+}
+
+
+bool LyXLex::Pimpl::nextToken()
 {
-       int c; // getc() returns an int
-       int i;
-        
-        status = 0;
-       while (!feof(file) && !status) { 
-               c = getc(file);
-          
-               if (c >= ' ' && !feof(file))  {
-                       i = 0;
+       if (!pushTok.empty()) {
+               // There can have been a whole line pushed so
+               // we extract the first word and leaves the rest
+               // in pushTok. (Lgb)
+               if (pushTok[0] == '\\' && pushTok.find(' ') != string::npos) {
+                       buff.clear();
+                       pushTok = split(pushTok, buff, ' ');
+               } else {
+                       buff = pushTok;
+                       pushTok.clear();
+               }
+               status = LEX_TOKEN;
+               return true;
+       }
+
+       status = 0;
+       while (is && !status) {
+               unsigned char c = 0;
+               char cc = 0;
+               is.get(cc);
+               c = cc;
+               if (c >= ' ' && is) {
+                       buff.clear();
+
                        if (c == '\\') { // first char == '\\'
                                do {
-                                       buff[i++] = c;
-                                       c = getc(file);
-                               } while (c > ' ' && c != '\\' && !feof(file) &&
-                                        i != (LEX_MAX_BUFF-1));
+                                       buff.push_back(c);
+                                       is.get(cc);
+                                       c = cc;
+                               } while (c > ' ' && c != '\\' && is);
                        } else {
                                do {
-                                       buff[i++] = c;
-                                       c = getc(file);
-                               } while (c >= ' ' && c != '\\' && !feof(file)
-                                        && i != (LEX_MAX_BUFF-1));
-                       }
-
-                       if (i == (LEX_MAX_BUFF-1)) {
-                               printError("Line too long");
+                                       buff.push_back(c);
+                                       is.get(cc);
+                                       c = cc;
+                               } while (c >= ' ' && c != '\\' && is);
                        }
 
-                       if (c == '\\') ungetc(c,file); // put it back
-                       buff[i] = '\0';
-                       status = LEX_TOKEN;
+                       if (c == '\\') is.putback(c); // put it back
+                       status = LEX_TOKEN;
                }
-                 
-               if (c=='\n')
+
+               if (c == '\n')
                        ++lineno;
+
+       }
+       if (status)
+               return true;
+
+       status = is.eof() ? LEX_FEOF: LEX_UNDEF;
+       buff.clear();
+       return false;
+}
+
+
+bool LyXLex::Pimpl::inputAvailable()
+{
+       return is.good(); 
+}
+
+
+void LyXLex::Pimpl::pushToken(string const & pt)
+{
+       pushTok = pt;
+}
+
+
+
+
+//////////////////////////////////////////////////////////////////////
+//
+// LyXLex
+//
+//////////////////////////////////////////////////////////////////////
+
+LyXLex::LyXLex(keyword_item * tab, int num)
+       : pimpl_(new Pimpl(tab, num))
+{}
+
+
+LyXLex::~LyXLex()
+{
+       delete pimpl_;
+}
+
+
+bool LyXLex::isOK() const
+{
+       return pimpl_->inputAvailable();
+}
+
+
+void LyXLex::setLineNo(int l)
+{
+       pimpl_->lineno = l;
+}
+
+
+int LyXLex::getLineNo() const
+{
+       return pimpl_->lineno;
+}
+
+
+istream & LyXLex::getStream()
+{
+       return pimpl_->is;
+}
+
+
+void LyXLex::pushTable(keyword_item * tab, int num)
+{
+       pimpl_->pushTable(tab, num);
+}
+
+
+void LyXLex::popTable()
+{
+       pimpl_->popTable();
+}
+
+
+void LyXLex::printTable(ostream & os)
+{
+       pimpl_->printTable(os);
+}
+
+
+void LyXLex::printError(string const & message) const
+{
+       pimpl_->printError(message);
+}
+
+
+bool LyXLex::setFile(support::FileName const & filename)
+{
+       return pimpl_->setFile(filename);
+}
+
+
+void LyXLex::setStream(istream & i)
+{
+       pimpl_->setStream(i);
+}
+
+
+void LyXLex::setCommentChar(char c)
+{
+       pimpl_->setCommentChar(c);
+}
+
+int LyXLex::lex()
+{
+       return pimpl_->lex();
+}
+
+
+int LyXLex::getInteger() const
+{
+       lastReadOk_ = pimpl_->status == LEX_DATA || pimpl_->status == LEX_TOKEN;
+       if (!lastReadOk_) {
+               pimpl_->printError("integer token missing");
+               return -1;
+       }
+
+       if (isStrInt(pimpl_->getString()))
+               return convert<int>(pimpl_->getString());
+
+       lastReadOk_ = false;
+       pimpl_->printError("Bad integer `$$Token'");
+       return -1;
+}
+
+
+double LyXLex::getFloat() const
+{
+       // replace comma with dot in case the file was written with
+       // the wrong locale (should be rare, but is easy enough to
+       // avoid).
+       lastReadOk_ = pimpl_->status == LEX_DATA || pimpl_->status == LEX_TOKEN;
+       if (!lastReadOk_) {
+               pimpl_->printError("float token missing");
+               return -1;
+       }
+
+       string const str = subst(pimpl_->getString(), ",", ".");
+       if (isStrDbl(str))
+               return convert<double>(str);
+
+       lastReadOk_ = false;
+       pimpl_->printError("Bad float `$$Token'");
+       return -1;
+}
+
+
+string const LyXLex::getString() const
+{
+       lastReadOk_ = pimpl_->status == LEX_DATA || pimpl_->status == LEX_TOKEN;
+
+       if (lastReadOk_)
+       return pimpl_->getString();
+
+       return string();
+}
+
+
+docstring const LyXLex::getDocString() const
+{
+       lastReadOk_ = pimpl_->status == LEX_DATA || pimpl_->status == LEX_TOKEN;
        
+       if (lastReadOk_)
+               return pimpl_->getDocString();
+
+       return docstring();
+}
+
+
+// I would prefer to give a tag number instead of an explicit token
+// here, but it is not possible because Buffer::readDocument uses
+// explicit tokens (JMarc)
+string const LyXLex::getLongString(string const & endtoken)
+{
+       string str, prefix;
+       bool firstline = true;
+
+       while (pimpl_->is) { //< eatLine only reads from is, not from pushTok
+               if (!eatLine())
+                       // blank line in the file being read
+                       continue;
+
+               string const token = trim(getString(), " \t");
+
+               LYXERR(Debug::PARSER) << "LongString: `"
+                                     << getString() << '\'' << endl;
+
+               // We do a case independent comparison, like search_kw does.
+               if (compare_ascii_no_case(token, endtoken) == 0)
+                       break;
+
+               string tmpstr = getString();
+               if (firstline) {
+                       string::size_type i(tmpstr.find_first_not_of(' '));
+                       if (i != string::npos)
+                               prefix = tmpstr.substr(0, i);
+                       firstline = false;
+                       LYXERR(Debug::PARSER)
+                               << "Prefix = `" << prefix << "\'" << endl;
+               }
+
+               // further lines in long strings may have the same
+               // whitespace prefix as the first line. Remove it.
+               if (prefix.length() && prefixIs(tmpstr, prefix)) {
+                       tmpstr.erase(0, prefix.length() - 1);
+               }
+
+               str += ltrim(tmpstr, "\t") + '\n';
        }
-        if (status)  return true;
-        
-        status = (feof(file)) ? LEX_FEOF: LEX_UNDEF;
-        buff[0] = '\0';
-        return false;
-}
-
-
-int LyXLex::FindToken(char const * str[])
-{  
-   int i = -1;
-   
-   if (next()) {
-      if (compare(buff, "default")) {
-        for (i = 0; str[i][0] && compare(str[i], buff); ++i);
-        if (!str[i][0]) {
-           printError("Unknown argument `$$Token'");
-           i = -1;
-        }
-      }  
-   } else
-     printError("file ended while scanning string token");
-   return i;
-}
-
-
-int LyXLex::CheckToken(char const * str[], int print_error)
-{  
-   int i = -1;
-   
-   if (compare(buff, "default")) {
-       for (i = 0; str[i][0] && compare(str[i], buff); i++);
-       if (!str[i][0]) {
-           if (print_error)
-               printError("Unknown argument `$$Token'");
-           i = -1;
-       }
-   }
-   return i;
+
+       if (!pimpl_->is) {
+               printError("Long string not ended by `" + endtoken + '\'');
+       }
+
+       return str;
+}
+
+
+bool LyXLex::getBool() const
+{
+       if (pimpl_->getString() == "true") {
+               lastReadOk_ = true;
+               return true;
+       } else if (pimpl_->getString() != "false") {
+               pimpl_->printError("Bad boolean `$$Token'. "
+                                  "Use \"false\" or \"true\"");
+               lastReadOk_ = false;
+       }
+       lastReadOk_ = true;
+       return false;
+}
+
+
+bool LyXLex::eatLine()
+{
+       return pimpl_->eatLine();
+}
+
+
+bool LyXLex::next(bool esc)
+{
+       return pimpl_->next(esc);
 }
+
+
+bool LyXLex::nextToken()
+{
+       return pimpl_->nextToken();
+}
+
+
+void LyXLex::pushToken(string const & pt)
+{
+       pimpl_->pushToken(pt);
+}
+
+
+LyXLex::operator void const *() const
+{
+       // This behaviour is NOT the same as the std::streams which would
+       // use fail() here. However, our implementation of getString() et al.
+       // can cause the eof() and fail() bits to be set, even though we
+       // haven't tried to read 'em.
+       return lastReadOk_? this : 0;
+}
+
+
+bool LyXLex::operator!() const
+{
+       return !lastReadOk_;
+}
+
+
+LyXLex & LyXLex::operator>>(std::string & s)
+{
+       if (isOK()) {
+               next();
+               s = getString();
+       } else {
+               lastReadOk_ = false;
+       }
+       return *this;
+}
+
+
+LyXLex & LyXLex::operator>>(docstring & s)
+{
+       if (isOK()) {
+               next();
+               s = getDocString();
+       } else {
+               lastReadOk_ = false;
+       }
+       return *this;
+}
+
+
+LyXLex & LyXLex::operator>>(double & s)
+{
+       if (isOK()) {
+               next();
+               s = getFloat();
+       } else {
+               lastReadOk_ = false;
+       }
+       return *this;
+}
+
+
+LyXLex & LyXLex::operator>>(int & s)
+{
+       if (isOK()) {
+               next();
+               s = getInteger();
+       } else {
+               lastReadOk_ = false;
+       }
+       return *this;
+}
+
+
+LyXLex & LyXLex::operator>>(unsigned int & s)
+{
+       if (isOK()) {
+               next();
+               s = getInteger();
+       } else {
+               lastReadOk_ = false;
+       }
+       return *this;
+}
+
+
+LyXLex & LyXLex::operator>>(bool & s)
+{
+       if (isOK()) {
+               next();
+               s = getBool();
+       } else {
+               lastReadOk_ = false;
+       }
+       return *this;
+}
+
+
+/// quotes a string, e.g. for use in preferences files or as an argument of the "log" dialog
+string const LyXLex::quoteString(string const & arg)
+{
+       std::ostringstream os;
+       os << '"' << subst(subst(arg, "\\", "\\\\"), "\"", "\\\"") << '"';
+       return os.str();
+}
+
+
+} // namespace lyx