X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=src%2Flyxlex.C;h=824173ed83e3f76026d5b89db45b5528201fa0e9;hb=35204f8f33d7400a5fefeffea533fb4cb4097211;hp=c35246550517c7163083d821cdefe58523871399;hpb=132fe5e1322fbc86a32692df51eba78d6b4e479c;p=lyx.git

diff --git a/src/lyxlex.C b/src/lyxlex.C
index c352465505..824173ed83 100644
--- a/src/lyxlex.C
+++ b/src/lyxlex.C
@@ -1,561 +1,328 @@
-//  Generalized simple lexical analizer.
-//  It can be used for simple syntax parsers, like lyxrc,
-//  texclass and others to come.   [asierra30/03/96]
-//
-//   Copyright 1996 Lyx Team.
+/**
+ * \file lyxlex.C
+ * This file is part of LyX, the document processor.
+ * Licence details can be found in the file COPYING.
+ *
+ * \author Alejandro Aguilar Sierra
+ * \author Lars Gullik Bjønnes
+ * \author Jean-Marc Lasgouttes
+ * \author John Levon
+ *
+ * Full author contact details are available in file CREDITS.
+ */
 
 #include <config.h>
 
-#include <algorithm>
-#include <cstdlib>
-
-#ifdef __GNUG__
-#pragma implementation "lyxlex.h"
-#endif
-
 #include "lyxlex.h"
+
 #include "debug.h"
-#include "support/filetools.h"
-#include "support/lyxalgo.h"
+#include "lyxlex_pimpl.h"
 
-using std::ios;
-using std::lower_bound;
-using std::sort;
+#include "support/convert.h"
+#include "support/lstrings.h"
 
+#include <sstream>
 
-// namespace {
-struct compare_tags {
-	// used by lower_bound
-	inline
-	int operator()(keyword_item const & a, char const * const tag) const {
-		return compare_no_case(a.tag, tag) < 0;
-	}
-	// used by sorted and sort
-	inline
-	int operator()(keyword_item const & a, keyword_item const & b) const {
-		return compare_no_case(a.tag, b.tag) < 0;
-	}
-};
-// } // end of anon namespace
+
+namespace lyx {
+
+using support::compare_ascii_no_case;
+using support::isStrDbl;
+using support::isStrInt;
+using support::ltrim;
+using support::prefixIs;
+using support::subst;
+using support::trim;
+
+using std::endl;
+using std::string;
+using std::istream;
+using std::ostream;
 
 
 LyXLex::LyXLex(keyword_item * tab, int num)
-	: is(&fb__), table(tab), no_items(num)
-{
-	status = 0; 
-	pushed = 0;
-	// Check if the table is sorted and if not, sort it.
-	if (table && !sorted(table, table + no_items, compare_tags())) {
-		lyxerr << "The table passed to LyXLex is not sorted!!\n"
-		       << "Tell the developers to fix it!" << endl;
-		// We sort it anyway to avoid problems.
-		lyxerr << "\nUnsorted:\n";
-		printTable(lyxerr);
-		
-		sort(table, table + no_items,
-		     compare_tags());
-		lyxerr << "\nSorted:\n";
-		printTable(lyxerr);
-	}
+	: pimpl_(new Pimpl(tab, num))
+{}
+
+
+LyXLex::~LyXLex()
+{
+	delete pimpl_;
+}
+
+
+bool LyXLex::isOK() const
+{
+	return pimpl_->is.good();
+}
+
+
+void LyXLex::setLineNo(int l)
+{
+	pimpl_->lineno = l;
+}
+
+
+int LyXLex::getLineNo() const
+{
+	return pimpl_->lineno;
+}
+
+
+istream & LyXLex::getStream()
+{
+	return pimpl_->is;
 }
 
 
 void LyXLex::pushTable(keyword_item * tab, int num)
 {
-	pushed_table * tmppu = new pushed_table;
-	tmppu->next = pushed;
-	tmppu->table_elem = table;
-	tmppu->table_siz = no_items;
-	pushed = tmppu;
-	table = tab;
-	no_items = num;
-	// Check if the table is sorted and if not, sort it.
-	if (table && !sorted(table, table + no_items, compare_tags())) {
-		lyxerr << "The table passed to LyXLex is not sorted!!\n"
-		       << "Tell the developers to fix it!" << endl;
-		// We sort it anyway to avoid problems.
-		lyxerr << "\nUnsorted:\n";
-		printTable(lyxerr);
-		
-		sort(table, table + no_items, compare_tags());
-		lyxerr << "\nSorted:\n";
-		printTable(lyxerr);
-	}
+	pimpl_->pushTable(tab, num);
 }
 
 
 void LyXLex::popTable()
 {
-	if (pushed == 0)
-		lyxerr << "LyXLex error: nothing to pop!" << endl;
-
-	pushed_table * tmp;
-	tmp = pushed;
-	table = tmp->table_elem;
-	no_items = tmp->table_siz;
-	tmp->table_elem = 0;
-	pushed = tmp->next;
-	delete tmp;
+	pimpl_->popTable();
 }
 
 
 void LyXLex::printTable(ostream & os)
 {
-	os << "\nNumber of tags: " << no_items << '\n';
-	for(int i= 0; i < no_items; ++i)
-		os << "table[" << i
-		   << "]:  tag: `" << table[i].tag
-		   << "'  code:" << table[i].code << '\n';
-	os.flush();
+	pimpl_->printTable(os);
 }
 
 
 void LyXLex::printError(string const & message) const
 {
-	string tmpmsg = subst(message, "$$Token", GetString());
-	lyxerr << "LyX: " << tmpmsg << " [around line " << lineno
-	       << " of file " << MakeDisplayPath(name) << ']' << endl;
+	pimpl_->printError(message);
 }
 
 
 bool LyXLex::setFile(string const & filename)
 {
-	if (fb__.is_open())
-		lyxerr << "Error in LyXLex::setFile: "
-			"file or stream already set." << endl;
-	fb__.open(filename.c_str(), ios::in);
-	is.rdbuf(&fb__);
-	name = filename;
-	lineno = 0;
-	return fb__.is_open() && is.good();
+	return pimpl_->setFile(filename);
 }
 
 
 void LyXLex::setStream(istream & i)
 {
-	if (fb__.is_open() || is.rdbuf()->in_avail())
-		lyxerr << "Error in LyXLex::setStream: "
-			"file or stream already set." << endl;
-	is.rdbuf(i.rdbuf());
-	lineno = 0;
+	pimpl_->setStream(i);
 }
 
 
+void LyXLex::setCommentChar(char c)
+{
+	pimpl_->setCommentChar(c);
+}
+
 int LyXLex::lex()
 {
-	//NOTE: possible bug.
-	if (next() && status == LEX_TOKEN)
-		return search_kw(buff);
-	else
-		return status;
+	return pimpl_->lex();
 }
 
 
-int LyXLex::GetInteger() const
+int LyXLex::getInteger() const
 {
-	if (buff[0] > ' ')   
-		return atoi(buff);
-	else {
-		printError("Bad integer `$$Token'");
-		return -1;
-	}
+	if (isStrInt(pimpl_->getString()))
+		return convert<int>(pimpl_->getString());
+	pimpl_->printError("Bad integer `$$Token'");
+	return -1;
 }
 
 
-float LyXLex::GetFloat() const
+double LyXLex::getFloat() const
 {
-   if (buff[0] > ' ')   
-       return atof(buff);
-   else {
-	printError("Bad float `$$Token'");
+	// replace comma with dot in case the file was written with
+	// the wrong locale (should be rare, but is easy enough to
+	// avoid).
+	string const str = subst(pimpl_->getString(), ",", ".");
+	if (isStrDbl(str))
+		return convert<double>(str);
+	pimpl_->printError("Bad float `$$Token'");
 	return -1;
-   }
 }
 
 
-string LyXLex::GetString() const
+string const LyXLex::getString() const
+{
+	return pimpl_->getString();
+}
+
+
+docstring const LyXLex::getDocString() const
 {
-	return string(buff);
+	return pimpl_->getDocString();
 }
 
 
 // I would prefer to give a tag number instead of an explicit token
-// here, but it is not possible because Buffer::readLyXformat2 uses
-// explicit tokens (JMarc) 
-string LyXLex::getLongString(string const & endtoken)
+// here, but it is not possible because Buffer::readDocument uses
+// explicit tokens (JMarc)
+string const LyXLex::getLongString(string const & endtoken)
 {
 	string str, prefix;
 	bool firstline = true;
 
-	while (IsOK()) {
-		if (!EatLine())
+	while (isOK()) {
+		if (!eatLine())
 			// blank line in the file being read
 			continue;
-		
-		string const token = frontStrip(strip(GetString()), " \t");
-		
+
+		string const token = trim(getString(), " \t");
+
 		lyxerr[Debug::PARSER] << "LongString: `"
-				      << GetString() << '\'' << endl;
-
-		// We do a case independent comparison, like search_kw
-		// does.
-                if (compare_no_case(token, endtoken) != 0) {
-			string tmpstr = GetString();
-			if (firstline) {
-				unsigned int i = 0;
-				while(i < tmpstr.length()
-				      && tmpstr[i] == ' ') {
-					++i;
-					prefix += ' ';
-				}
-				firstline = false;
-				lyxerr[Debug::PARSER] << "Prefix = `" << prefix
-						      << '\'' << endl;
-			} 
-
-			if (!prefix.empty() 
-			    && prefixIs(tmpstr, prefix.c_str())) {
-				tmpstr.erase(0, prefix.length() - 1);
-			}
-			str += tmpstr + '\n';
-                }
-		else // token == endtoken
+				      << getString() << '\'' << endl;
+
+		// We do a case independent comparison, like search_kw does.
+		if (compare_ascii_no_case(token, endtoken) == 0)
 			break;
+
+		string tmpstr = getString();
+		if (firstline) {
+			string::size_type i(tmpstr.find_first_not_of(' '));
+			if (i != string::npos)
+				prefix = tmpstr.substr(0, i);
+			firstline = false;
+			lyxerr[Debug::PARSER]
+				<< "Prefix = `" << prefix << "\'" << endl;
+		}
+
+		// further lines in long strings may have the same
+		// whitespace prefix as the first line. Remove it.
+		if (prefix.length() && prefixIs(tmpstr, prefix)) {
+			tmpstr.erase(0, prefix.length() - 1);
+		}
+
+		str += ltrim(tmpstr, "\t") + '\n';
 	}
-	if (!IsOK())
+
+	if (!isOK()) {
 		printError("Long string not ended by `" + endtoken + '\'');
+	}
 
 	return str;
 }
 
 
-bool LyXLex::GetBool() const
+bool LyXLex::getBool() const
 {
-	if (compare(buff, "true") == 0)
+	if (pimpl_->getString() == "true") {
 		return true;
-	else if (compare(buff, "false") != 0)
-		printError("Bad boolean `$$Token'. Use \"false\" or \"true\"");
+	} else if (pimpl_->getString() != "false") {
+		pimpl_->printError("Bad boolean `$$Token'. "
+				   "Use \"false\" or \"true\"");
+	}
 	return false;
 }
 
 
-bool LyXLex::EatLine()
+bool LyXLex::eatLine()
+{
+	return pimpl_->eatLine();
+}
+
+
+bool LyXLex::next(bool esc)
+{
+	return pimpl_->next(esc);
+}
+
+
+bool LyXLex::nextToken()
+{
+	return pimpl_->nextToken();
+}
+
+
+void LyXLex::pushToken(string const & pt)
+{
+	pimpl_->pushToken(pt);
+}
+
+LyXLex::operator void const *() const
+{
+	// This behaviour is NOT the same as the std::streams which would
+	// use fail() here. However, our implementation of getString() et al.
+	// can cause the eof() and fail() bits to be set, even though we
+	// haven't tried to read 'em.
+	return pimpl_->is.bad() ? 0 : this;
+}
+
+
+bool LyXLex::operator!() const
+{
+	return pimpl_->is.bad();
+}
+
+
+LyXLex & LyXLex::operator>>(std::string & s)
 {
-	int i = 0;
-	unsigned char c = '\0';
-	char cc = 0;
-	while(is && c != '\n' && i != (LEX_MAX_BUFF - 1)) {
-		is.get(cc);
-		c = cc;
-		lyxerr[Debug::LYXLEX] << "LyXLex::EatLine read char: `"
-				      << c << "'" << endl;
-		if (c != '\r')
-			buff[i++] = c;
+	if (isOK()) {
+		next();
+		s = getString();
 	}
-	if (i == (LEX_MAX_BUFF - 1) && c != '\n') {
-		printError("Line too long");
-		c = '\n'; // Pretend we had an end of line
-		--lineno; // but don't increase line counter (netto effect)
-		++i; // and preserve last character read.
+	return *this;
+}
+
+
+LyXLex & LyXLex::operator>>(docstring & s)
+{
+	if (isOK()) {
+		next();
+		s = getDocString();
 	}
-	if (c == '\n') {
-		++lineno;
-		buff[--i] = '\0'; // i can never be 0 here, so no danger
-		status = LEX_DATA;
-		return true;
-	} else {
-		buff[i] = '\0';
-		return false;
+	return *this;
+}
+
+
+LyXLex & LyXLex::operator>>(double & s)
+{
+	if (isOK()) {
+		next();
+		s = getFloat();
 	}
+	return *this;
 }
 
 
-int LyXLex::search_kw(char const * const tag) const
+LyXLex & LyXLex::operator>>(int & s)
 {
-	keyword_item * res =
-		lower_bound(table, table + no_items, tag, compare_tags());
-	if (res != table + no_items && !compare_no_case(res->tag, tag))
-		return res->code;
-	return LEX_UNDEF;
+	if (isOK()) {
+		next();
+		s = getInteger();
+	}
+	return *this;
 }
 
 
-bool LyXLex::next(bool esc)
+LyXLex & LyXLex::operator>>(unsigned int & s)
 {
-	if (!esc) {
-		unsigned char c = 0; // getc() returns an int
-		char cc = 0;
-		status = 0;
-		while (is && !status) {
-			is.get(cc);
-			c = cc;
-			if (c == '#') {
-				// Read rest of line (fast :-)
-				is.getline(buff, sizeof(buff));
-				lyxerr[Debug::LYXLEX] << "Comment read: `" << c
-						      << buff << "'" << endl;
-				++lineno;
-				continue;
-			}
-			
-			if (c == '\"') {
-				int i = -1;
-				do {
-					is.get(cc);
-					c = cc;
-					if (c != '\r')
-						buff[++i] = c;
-				} while (c != '\"' && c != '\n' && is &&
-					 i != (LEX_MAX_BUFF - 2));
-				
-				if (i == (LEX_MAX_BUFF - 2)) {
-					printError("Line too long");
-					c = '\"'; // Pretend we got a "
-					++i;
-				}
-				
-				if (c != '\"') {
-					printError("Missing quote");
-					if (c == '\n')
-						++lineno;
-				}
-				
-				buff[i] = '\0';
-				status = LEX_DATA;
-				break; 
-			}
-			
-			if (c == ',')
-				continue;              /* Skip ','s */
-
-			// using relational operators with chars other
-			// than == and != is not safe. And if it is done
-			// the type _have_ to be unsigned. It usually a
-			// lot better to use the functions from cctype
-			if (c > ' ' && is)  {
-				int i = 0;
-				do {
-					buff[i++] = c;
-					is.get(cc);
-					c = cc;
-				} while (c > ' ' && c != ',' && is
-					 && (i != LEX_MAX_BUFF - 1) );
-				if (i == LEX_MAX_BUFF - 1) {
-					printError("Line too long");
-				}
-				buff[i] = '\0';
-				status = LEX_TOKEN;
-			}
-			
-			if (c == '\r' && is) {
-				// The Windows support has lead to the
-				// possibility of "\r\n" at the end of
-				// a line.  This will stop LyX choking
-				// when it expected to find a '\n'
-				is.get(cc);
-				c = cc;
-			}
-
-			if (c == '\n')
-				++lineno;
-			
-		}
-		if (status) return true;
-		
-		status = is.eof() ? LEX_FEOF: LEX_UNDEF;
-		buff[0] = '\0';
-		return false;
-	} else {
-		unsigned char c = 0; // getc() returns an int
-		char cc = 0;
-		
-		status = 0;
-		while (is && !status) {
-			is.get(cc);
-			c = cc;
-
-			// skip ','s
-			if (c == ',') continue;
-			
-			if (c == '\\') {
-				// escape
-				int i = 0;
-				do {
-					if (c == '\\') {
-						// escape the next char
-						is.get(cc);
-						c = cc;
-					}
-					buff[i++] = c;
-					is.get(cc);
-					c = cc;
-				} while (c > ' ' && c != ',' && is
-					 && (i != LEX_MAX_BUFF - 1) );
-				if (i == LEX_MAX_BUFF - 1) {
-					printError("Line too long");
-				}
-				buff[i] = '\0';
-				status = LEX_TOKEN;
-				continue;
-			}
-			
-			if (c == '#') {
-				// Read rest of line (fast :-)
-				is.getline(buff, sizeof(buff));
-				lyxerr[Debug::LYXLEX] << "Comment read: `" << c
-						      << buff << "'" << endl;
-				++lineno;
-				continue;
-			}
-
-			// string
-			if (c == '\"') {
-				int i = -1;
-				bool escaped = false;
-				do {
-					escaped = false;
-					is.get(cc);
-					c = cc;
-					if (c == '\r') continue;
-					if (c == '\\') {
-						// escape the next char
-						is.get(cc);
-						c = cc;
-						escaped = true;
-					}
-					buff[++i] = c;
-				
-					if (!escaped && c == '\"') break;
-				} while (c != '\n' && is &&
-					 i != (LEX_MAX_BUFF - 2));
-				
-				if (i == (LEX_MAX_BUFF - 2)) {
-					printError("Line too long");
-					c = '\"'; // Pretend we got a "
-					++i;
-				}
-				
-				if (c != '\"') {
-					printError("Missing quote");
-					if (c == '\n')
-						++lineno;
-				}
-				
-				buff[i] = '\0';
-				status = LEX_DATA;
-				break; 
-			}
-			
-			if (c > ' ' && is) {
-				int i = 0;
-				do {
-					if (c == '\\') {
-						// escape the next char
-						is.get(cc);
-						c = cc;
-						//escaped = true;
-					}
-					buff[i++] = c;
-					is.get(cc);
-					c = cc;
-				} while (c > ' ' && c != ',' && is
-					 && (i != LEX_MAX_BUFF-1) );
-				if (i == LEX_MAX_BUFF-1) {
-					printError("Line too long");
-				}
-				buff[i] = '\0';
-				status = LEX_TOKEN;
-			}
-			// new line
-			if (c == '\n')
-				++lineno;
-		}
-		
-		if (status) return true;
-		
-		status = is.eof() ? LEX_FEOF : LEX_UNDEF;
-		buff[0] = '\0';
-		return false;
+	if (isOK()) {
+		next();
+		s = getInteger();
 	}
+	return *this;
 }
 
 
-bool LyXLex::nextToken()
+LyXLex & LyXLex::operator>>(bool & s)
 {
-        status = 0;
-	while (is && !status) {
-		unsigned char c = 0;
-		char cc = 0;
-		is.get(cc);
-		c = cc;
-		if (c >= ' ' && is) {
-			int i = 0;
-			if (c == '\\') { // first char == '\\'
-				do {
-					buff[i++] = c;
-					is.get(cc);
-					c = cc;
-				} while (c > ' ' && c != '\\' && is
-					 && i != (LEX_MAX_BUFF-1));
-			} else {
-				do {
-					buff[i++] = c;
-					is.get(cc);
-					c = cc;
-				} while (c >= ' ' && c != '\\' && is
-					 && i != (LEX_MAX_BUFF-1));
-			}
-
-			if (i == (LEX_MAX_BUFF - 1)) {
-				printError("Line too long");
-			}
-
-			if (c == '\\') is.putback(c); // put it back
-			buff[i] = '\0';
-		        status = LEX_TOKEN;
-		}
-		  
-		if (c == '\n')
-			++lineno;
-	
+	if (isOK()) {
+		next();
+		s = getBool();
 	}
-        if (status)  return true;
-        
-        status = is.eof() ? LEX_FEOF: LEX_UNDEF;
-        buff[0] = '\0';
-        return false;
-}
-
-
-int LyXLex::FindToken(char const * str[])
-{  
-   int i = -1;
-   
-   if (next()) {
-      if (compare(buff, "default")) {
-	 for (i = 0; str[i][0] && compare(str[i], buff); ++i);
-	 if (!str[i][0]) {
-	    printError("Unknown argument `$$Token'");
-	    i = -1;
-	 }
-      }  
-   } else
-     printError("file ended while scanning string token");
-   return i;
-}
-
-
-int LyXLex::CheckToken(char const * str[], int print_error)
-{  
-   int i = -1;
-   
-   if (compare(buff, "default")) {
-       for (i = 0; str[i][0] && compare(str[i], buff); ++i);
-       if (!str[i][0]) {
-           if (print_error)
-               printError("Unknown argument `$$Token'");
-           i = -1;
-       }
-   }
-   return i;
+	return *this;
 }
+
+
+/// quotes a string, e.g. for use in preferences files or as an argument of the "log" dialog
+string const LyXLex::quoteString(string const & arg)
+{
+	std::ostringstream os;
+	os << '"' << subst(subst(arg, "\\", "\\\\"), "\"", "\\\"") << '"';
+	return os.str();
+}
+
+
+} // namespace lyx