From: Abdelrazak Younes <younes@lyx.org>
Date: Sat, 28 Oct 2006 15:16:30 +0000 (+0000)
Subject: * LyXLex::Pimpl::buff is now a string.
X-Git-Tag: 1.6.10~12158
X-Git-Url: https://git.lyx.org/gitweb/?a=commitdiff_plain;h=451b12d00d4e82215ac07b5204179156dc1d3283;p=features.git

* LyXLex::Pimpl::buff is now a string.

* unicode.[Ch]:
 - new non template iconv_convert()
 - iconv_convert() template use above function.

* docstring.C:
 - utf8_to_ucs4(): new function, use the new iconv_convert() function above.
 - from_utf8(): use utf8_to_ucs4() function above.



git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@15592 a592a061-630c-0410-9148-cb99ea01b6c8
---

diff --git a/src/lyxlex_pimpl.C b/src/lyxlex_pimpl.C
index eb8765db5f..80b757359a 100644
--- a/src/lyxlex_pimpl.C
+++ b/src/lyxlex_pimpl.C
@@ -70,15 +70,13 @@ LyXLex::Pimpl::Pimpl(keyword_item * tab, int num)
 
 string const LyXLex::Pimpl::getString() const
 {
-	return string(buff.begin(), buff.end());
+	return buff;
 }
 
 
 docstring const LyXLex::Pimpl::getDocString() const
 {
-	std::vector<char_type> res = utf8_to_ucs4(buff);
-	docstring dstr(res.begin(), res.end());
-	return dstr;
+	return from_utf8(buff);
 }
 
 
@@ -206,13 +204,12 @@ bool LyXLex::Pimpl::next(bool esc /* = false */)
 		// we extract the first word and leaves the rest
 		// in pushTok. (Lgb)
 		if (pushTok.find(' ') != string::npos && pushTok[0] == '\\') {
-			string tmp;
-			pushTok = split(pushTok, tmp, ' ');
-			buff.assign(tmp.begin(), tmp.end());
+			buff.clear();
+			pushTok = split(pushTok, buff, ' ');
 			return true;
 		} else {
-			buff.assign(pushTok.begin(), pushTok.end());
-			pushTok.erase();
+			buff = pushTok;
+			pushTok.clear();
 			return true;
 		}
 	}
@@ -256,7 +253,7 @@ bool LyXLex::Pimpl::next(bool esc /* = false */)
 						++lineno;
 				}
 
-				buff.pop_back();
+				buff.resize(buff.size()-1);
 				status = LEX_DATA;
 				break;
 			}
@@ -377,7 +374,7 @@ bool LyXLex::Pimpl::next(bool esc /* = false */)
 						++lineno;
 				}
 
-				buff.pop_back();
+				buff.resize(buff.size() -1);
 				status = LEX_DATA;
 				break;
 			}
@@ -456,7 +453,7 @@ bool LyXLex::Pimpl::eatLine()
 
 	if (c == '\n') {
 		++lineno;
-		buff.pop_back();
+		buff.resize(buff.size() - 1);
 		status = LEX_DATA;
 		return true;
 	} else {
@@ -472,13 +469,12 @@ bool LyXLex::Pimpl::nextToken()
 		// we extract the first word and leaves the rest
 		// in pushTok. (Lgb)
 		if (pushTok.find(' ') != string::npos && pushTok[0] == '\\') {
-			string tmp;
-			pushTok = split(pushTok, tmp, ' ');
-			buff.assign(tmp.begin(), tmp.end());
+			buff.clear();
+			pushTok = split(pushTok, buff, ' ');
 			return true;
 		} else {
-			buff.assign(pushTok.begin(), pushTok.end());
-			pushTok.erase();
+			buff = pushTok;
+			pushTok.clear();
 			return true;
 		}
 	}
diff --git a/src/lyxlex_pimpl.h b/src/lyxlex_pimpl.h
index 1a66c47725..a5cb4a9d78 100644
--- a/src/lyxlex_pimpl.h
+++ b/src/lyxlex_pimpl.h
@@ -81,7 +81,7 @@ public:
 	///
 	int no_items;
 	///
-	std::vector<char> buff;
+	std::string buff;
 	///
 	int status;
 	///
diff --git a/src/support/docstring.C b/src/support/docstring.C
index 3d40fc48fc..10a4138638 100644
--- a/src/support/docstring.C
+++ b/src/support/docstring.C
@@ -20,6 +20,7 @@
 
 namespace lyx {
 
+
 docstring const from_ascii(char const * ascii)
 {
 	docstring s;
@@ -53,11 +54,33 @@ std::string const to_ascii(docstring const & ucs4)
 }
 
 
+void utf8_to_ucs4(std::string const & utf8, docstring & ucs4)
+{
+	size_t n = utf8.size();
+	// as utf8 is a multi-byte encoding, there would be at most
+	// n characters:
+	ucs4.resize(n);
+	if (n == 0)
+		return;
+
+	int maxoutsize = n * 4;
+	int cd = -1;
+	// basic_string::data() is not recognized by some old gcc version
+	// so we use &(ucs4[0]) instead.
+	char * outbuf = (char *)(&(ucs4[0]));
+	int bytes = iconv_convert(cd, ucs4_codeset, "UTF-8",
+		utf8.c_str(), n, outbuf, maxoutsize);
+
+	// adjust to the real converted size
+	ucs4.resize(bytes/4);
+}
+
+
 docstring const from_utf8(std::string const & utf8)
 {
-	std::vector<lyx::char_type> const ucs4 =
-		utf8_to_ucs4(utf8.data(), utf8.size());
-	return docstring(ucs4.begin(), ucs4.end());
+	docstring ucs4;
+	utf8_to_ucs4(utf8, ucs4);
+	return ucs4;
 }
 
 
diff --git a/src/support/unicode.C b/src/support/unicode.C
index c9b9210a4e..405e145a87 100644
--- a/src/support/unicode.C
+++ b/src/support/unicode.C
@@ -35,22 +35,20 @@ using std::endl;
 	char const * ucs2_codeset = "UCS-2LE";
 #endif
 
-namespace {
-
-template<typename RetType, typename InType>
-std::vector<RetType>
-iconv_convert(iconv_t * cd,
+int iconv_convert(int & cd,
 	      char const * tocode,
 	      char const * fromcode,
-	      InType const * buf,
-	      size_t buflen)
+	      char const * buf,
+	      size_t buflen,
+		  char * outbuf,
+		  size_t maxoutsize)
 {
 	if (buflen == 0)
-		return std::vector<RetType>();
+		return 0;
 
-	if (*cd == (iconv_t)(-1)) {
-		*cd = iconv_open(tocode, fromcode);
-		if (*cd == (iconv_t)(-1)) {
+	if (cd == -1) {
+		cd = (int)(iconv_open(tocode, fromcode));
+		if (cd == -1) {
 			lyxerr << "Error returned from iconv_open" << endl;
 			switch (errno) {
 			case EINVAL:
@@ -66,17 +64,13 @@ iconv_convert(iconv_t * cd,
 		}
 	}
 
-	char ICONV_CONST * inbuf = const_cast<char ICONV_CONST *>(reinterpret_cast<char const *>(buf));
-	size_t inbytesleft = buflen * sizeof(InType);
-	// The preamble of the user guide is more than 11.500 characters, so we go for 32kb
-	size_t const outsize = 32768;
-	static char out[outsize];
-	char * outbuf = out;
-	size_t outbytesleft = outsize;
+	char ICONV_CONST * inbuf = const_cast<char ICONV_CONST *>(buf);
+	size_t inbytesleft = buflen;
+	size_t outbytesleft = maxoutsize;
 
-	size_t res = iconv(*cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
+	int res = iconv((iconv_t)(cd), &inbuf, &inbytesleft, &outbuf, &outbytesleft);
 
-	if (res == (size_t)(-1)) {
+	if (res == -1) {
 		lyxerr << "Error returned from iconv" << endl;
 		switch (errno) {
 		case E2BIG:
@@ -111,17 +105,43 @@ iconv_convert(iconv_t * cd,
 			break;
 		}
 		// We got an error so we close down the conversion engine
-		if (iconv_close(*cd) == -1) {
+		if (iconv_close((iconv_t)(cd)) == -1) {
 			lyxerr << "Error returned from iconv_close("
 			       << errno << ")" << endl;
 		}
-		*cd = (iconv_t)(-1);
+		cd = -1;
 	}
 
 	//lyxerr << std::dec;
 	//lyxerr << "Inbytesleft: " << inbytesleft << endl;
 	//lyxerr << "Outbytesleft: " << outbytesleft << endl;
-	int bytes = outsize - outbytesleft;
+
+	return maxoutsize - outbytesleft;
+}
+
+
+namespace {
+
+
+template<typename RetType, typename InType>
+std::vector<RetType>
+iconv_convert(int & cd,
+	      char const * tocode,
+	      char const * fromcode,
+	      InType const * buf,
+	      size_t buflen)
+{
+	if (buflen == 0)
+		return std::vector<RetType>();
+
+	char const * inbuf = reinterpret_cast<char const *>(buf);
+	size_t inbytesleft = buflen * sizeof(InType);
+
+	size_t const outsize = 32768;
+	static char out[outsize];
+	char * outbuf = out;
+
+	int bytes = lyx::iconv_convert(cd, tocode, fromcode, inbuf, inbytesleft, outbuf, outsize);
 
 	RetType const * tmp = reinterpret_cast<RetType const *>(out);
 	return std::vector<RetType>(tmp, tmp + bytes / sizeof(RetType));
@@ -142,8 +162,8 @@ std::vector<lyx::char_type> utf8_to_ucs4(std::vector<char> const & utf8str)
 std::vector<lyx::char_type>
 utf8_to_ucs4(char const * utf8str, size_t ls)
 {
-	static iconv_t cd = (iconv_t)(-1);
-	return iconv_convert<lyx::char_type>(&cd, ucs4_codeset, "UTF-8",
+	static int cd = -1;
+	return iconv_convert<lyx::char_type>(cd, ucs4_codeset, "UTF-8",
 					      utf8str, ls);
 }
 
@@ -168,8 +188,8 @@ ucs2_to_ucs4(std::vector<unsigned short> const & ucs2str)
 std::vector<lyx::char_type>
 ucs2_to_ucs4(unsigned short const * ucs2str, size_t ls)
 {
-	static iconv_t cd = (iconv_t)(-1);
-	return iconv_convert<lyx::char_type>(&cd, ucs4_codeset, ucs2_codeset,
+	static int cd = -1;
+	return iconv_convert<lyx::char_type>(cd, ucs4_codeset, ucs2_codeset,
 					      ucs2str, ls);
 }
 
@@ -194,8 +214,8 @@ ucs4_to_ucs2(std::vector<lyx::char_type> const & ucs4str)
 std::vector<unsigned short>
 ucs4_to_ucs2(lyx::char_type const * s, size_t ls)
 {
-	static iconv_t cd = (iconv_t)(-1);
-	return iconv_convert<unsigned short>(&cd, ucs2_codeset, ucs4_codeset,
+	static int cd = -1;
+	return iconv_convert<unsigned short>(cd, ucs2_codeset, ucs4_codeset,
 					     s, ls);
 }
 
@@ -203,8 +223,8 @@ ucs4_to_ucs2(lyx::char_type const * s, size_t ls)
 std::vector<char>
 ucs4_to_utf8(lyx::char_type c)
 {
-	static iconv_t cd = (iconv_t)(-1);
-	return iconv_convert<char>(&cd, "UTF-8", ucs4_codeset, &c, 1);
+	static int cd = -1;
+	return iconv_convert<char>(cd, "UTF-8", ucs4_codeset, &c, 1);
 }
 
 
@@ -221,8 +241,8 @@ ucs4_to_utf8(std::vector<lyx::char_type> const & ucs4str)
 std::vector<char>
 ucs4_to_utf8(lyx::char_type const * ucs4str, size_t ls)
 {
-	static iconv_t cd = (iconv_t)(-1);
-	return iconv_convert<char>(&cd, "UTF-8", ucs4_codeset,
+	static int cd = -1;
+	return iconv_convert<char>(cd, "UTF-8", ucs4_codeset,
 				   ucs4str, ls);
 }
 
@@ -230,10 +250,10 @@ ucs4_to_utf8(lyx::char_type const * ucs4str, size_t ls)
 std::vector<lyx::char_type>
 eightbit_to_ucs4(char const * s, size_t ls, std::string const & encoding)
 {
-	static std::map<std::string, iconv_t> cd;
+	static std::map<std::string, int> cd;
 	if (cd.find(encoding) == cd.end())
-		cd[encoding] = (iconv_t)(-1);
-	return iconv_convert<char_type>(&cd[encoding], ucs4_codeset,
+		cd[encoding] = -1;
+	return iconv_convert<char_type>(cd[encoding], ucs4_codeset,
 	                                encoding.c_str(), s, ls);
 }
 
@@ -241,10 +261,10 @@ eightbit_to_ucs4(char const * s, size_t ls, std::string const & encoding)
 std::vector<char>
 ucs4_to_eightbit(lyx::char_type const * ucs4str, size_t ls, std::string const & encoding)
 {
-	static std::map<std::string, iconv_t> cd;
+	static std::map<std::string, int> cd;
 	if (cd.find(encoding) == cd.end())
-		cd[encoding] = (iconv_t)(-1);
-	return iconv_convert<char>(&cd[encoding], encoding.c_str(),
+		cd[encoding] = -1;
+	return iconv_convert<char>(cd[encoding], encoding.c_str(),
 	                           ucs4_codeset, ucs4str, ls);
 }
 
diff --git a/src/support/unicode.h b/src/support/unicode.h
index 7f99f52832..fa9b4c897b 100644
--- a/src/support/unicode.h
+++ b/src/support/unicode.h
@@ -66,6 +66,16 @@ eightbit_to_ucs4(char const * s, size_t ls, std::string const & encoding);
 std::vector<char>
 ucs4_to_eightbit(lyx::char_type const * ucs4str, size_t ls, std::string const & encoding);
 
+/// convert any data from \c fromcode to \c tocode unicode format.
+/// \return the number of bytes of the converted output buffer.
+extern int iconv_convert(int & cd,
+	      char const * tocode,
+	      char const * fromcode,
+	      char const * buf, ///< maximum input buffer
+	      size_t buflen,    ///< maximum input buffer size in bytes
+		  char * outbuf,    ///< maximum output buffer
+		  size_t maxoutsize);    ///< maximum output buffer size in bytes
+
 extern char const * ucs4_codeset;
 extern char const * ucs2_codeset;