* LyXLex::Pimpl::buff is now a string.

author Abdelrazak Younes <younes@lyx.org>

Sat, 28 Oct 2006 15:16:30 +0000 (15:16 +0000)

committer Abdelrazak Younes <younes@lyx.org>

Sat, 28 Oct 2006 15:16:30 +0000 (15:16 +0000)
author Abdelrazak Younes <younes@lyx.org>
Sat, 28 Oct 2006 15:16:30 +0000 (15:16 +0000)
committer Abdelrazak Younes <younes@lyx.org>
Sat, 28 Oct 2006 15:16:30 +0000 (15:16 +0000)
diff --git a/src/lyxlex_pimpl.C b/src/lyxlex_pimpl.C

index eb8765db5fceed0651df36526a3cea0e03409262..80b757359aba6fd8828880adec12a68fc4f02494 100644 (file)
--- a/src/lyxlex_pimpl.C
+++ b/src/lyxlex_pimpl.C
@@ -70,15 +70,13 @@ LyXLex::Pimpl::Pimpl(keyword_item * tab, int num)
  
  string const LyXLex::Pimpl::getString() const
  {
-       return string(buff.begin(), buff.end());
+       return buff;
  }
  
  
  docstring const LyXLex::Pimpl::getDocString() const
  {
-       std::vector<char_type> res = utf8_to_ucs4(buff);
-       docstring dstr(res.begin(), res.end());
-       return dstr;
+       return from_utf8(buff);
  }
  
  
@@ -206,13 +204,12 @@ bool LyXLex::Pimpl::next(bool esc /* = false */)
                 // we extract the first word and leaves the rest
                 // in pushTok. (Lgb)
                 if (pushTok.find(' ') != string::npos && pushTok[0] == '\\') {
-                       string tmp;
-                       pushTok = split(pushTok, tmp, ' ');
-                       buff.assign(tmp.begin(), tmp.end());
+                       buff.clear();
+                       pushTok = split(pushTok, buff, ' ');
                         return true;
                 } else {
-                       buff.assign(pushTok.begin(), pushTok.end());
-                       pushTok.erase();
+                       buff = pushTok;
+                       pushTok.clear();
                         return true;
                 }
         }
@@ -256,7 +253,7 @@ bool LyXLex::Pimpl::next(bool esc /* = false */)
                                                 ++lineno;
                                 }
  
-                               buff.pop_back();
+                               buff.resize(buff.size()-1);
                                 status = LEX_DATA;
                                 break;
                         }
@@ -377,7 +374,7 @@ bool LyXLex::Pimpl::next(bool esc /* = false */)
                                                 ++lineno;
                                 }
  
-                               buff.pop_back();
+                               buff.resize(buff.size() -1);
                                 status = LEX_DATA;
                                 break;
                         }
@@ -456,7 +453,7 @@ bool LyXLex::Pimpl::eatLine()
  
         if (c == '\n') {
                 ++lineno;
-               buff.pop_back();
+               buff.resize(buff.size() - 1);
                 status = LEX_DATA;
                 return true;
         } else {
@@ -472,13 +469,12 @@ bool LyXLex::Pimpl::nextToken()
                 // we extract the first word and leaves the rest
                 // in pushTok. (Lgb)
                 if (pushTok.find(' ') != string::npos && pushTok[0] == '\\') {
-                       string tmp;
-                       pushTok = split(pushTok, tmp, ' ');
-                       buff.assign(tmp.begin(), tmp.end());
+                       buff.clear();
+                       pushTok = split(pushTok, buff, ' ');
                         return true;
                 } else {
-                       buff.assign(pushTok.begin(), pushTok.end());
-                       pushTok.erase();
+                       buff = pushTok;
+                       pushTok.clear();
                         return true;
                 }
         }
diff --git a/src/lyxlex_pimpl.h b/src/lyxlex_pimpl.h

index 1a66c47725d0ee1694100bf2043263d4c5d9692f..a5cb4a9d78e9e3ccf908cf9c643374386905d289 100644 (file)
--- a/src/lyxlex_pimpl.h
+++ b/src/lyxlex_pimpl.h
@@ -81,7 +81,7 @@ public:
         ///
         int no_items;
         ///
-       std::vector<char> buff;
+       std::string buff;
         ///
         int status;
         ///
diff --git a/src/support/docstring.C b/src/support/docstring.C

index 3d40fc48fc605c2d3d14253a7791f0a2c1e9f414..10a4138638697c3969b69746263e299b0e24dbe3 100644 (file)
--- a/src/support/docstring.C
+++ b/src/support/docstring.C
@@ -20,6 +20,7 @@
  
  namespace lyx {
  
+
  docstring const from_ascii(char const * ascii)
  {
         docstring s;
@@ -53,11 +54,33 @@ std::string const to_ascii(docstring const & ucs4)
  }
  
  
+void utf8_to_ucs4(std::string const & utf8, docstring & ucs4)
+{
+       size_t n = utf8.size();
+       // as utf8 is a multi-byte encoding, there would be at most
+       // n characters:
+       ucs4.resize(n);
+       if (n == 0)
+               return;
+
+       int maxoutsize = n * 4;
+       int cd = -1;
+       // basic_string::data() is not recognized by some old gcc version
+       // so we use &(ucs4[0]) instead.
+       char * outbuf = (char *)(&(ucs4[0]));
+       int bytes = iconv_convert(cd, ucs4_codeset, "UTF-8",
+               utf8.c_str(), n, outbuf, maxoutsize);
+
+       // adjust to the real converted size
+       ucs4.resize(bytes/4);
+}
+
+
  docstring const from_utf8(std::string const & utf8)
  {
-       std::vector<lyx::char_type> const ucs4 =
-               utf8_to_ucs4(utf8.data(), utf8.size());
-       return docstring(ucs4.begin(), ucs4.end());
+       docstring ucs4;
+       utf8_to_ucs4(utf8, ucs4);
+       return ucs4;
  }
  
  
diff --git a/src/support/unicode.C b/src/support/unicode.C

index c9b9210a4e9f1e6fe7251ecdbcc931088f7b2392..405e145a8761dfce5879f5a79549e2a94ca1c961 100644 (file)
--- a/src/support/unicode.C
+++ b/src/support/unicode.C
@@ -35,22 +35,20 @@ using std::endl;
         char const * ucs2_codeset = "UCS-2LE";
  #endif
  
-namespace {
-
-template<typename RetType, typename InType>
-std::vector<RetType>
-iconv_convert(iconv_t * cd,
+int iconv_convert(int & cd,
               char const * tocode,
               char const * fromcode,
-             InType const * buf,
-             size_t buflen)
+             char const * buf,
+             size_t buflen,
+                 char * outbuf,
+                 size_t maxoutsize)
  {
         if (buflen == 0)
-               return std::vector<RetType>();
+               return 0;
  
-       if (*cd == (iconv_t)(-1)) {
-               *cd = iconv_open(tocode, fromcode);
-               if (*cd == (iconv_t)(-1)) {
+       if (cd == -1) {
+               cd = (int)(iconv_open(tocode, fromcode));
+               if (cd == -1) {
                         lyxerr << "Error returned from iconv_open" << endl;
                         switch (errno) {
                         case EINVAL:
@@ -66,17 +64,13 @@ iconv_convert(iconv_t * cd,
                 }
         }
  
-       char ICONV_CONST * inbuf = const_cast<char ICONV_CONST *>(reinterpret_cast<char const *>(buf));
-       size_t inbytesleft = buflen * sizeof(InType);
-       // The preamble of the user guide is more than 11.500 characters, so we go for 32kb
-       size_t const outsize = 32768;
-       static char out[outsize];
-       char * outbuf = out;
-       size_t outbytesleft = outsize;
+       char ICONV_CONST * inbuf = const_cast<char ICONV_CONST *>(buf);
+       size_t inbytesleft = buflen;
+       size_t outbytesleft = maxoutsize;
  
-       size_t res = iconv(*cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
+       int res = iconv((iconv_t)(cd), &inbuf, &inbytesleft, &outbuf, &outbytesleft);
  
-       if (res == (size_t)(-1)) {
+       if (res == -1) {
                 lyxerr << "Error returned from iconv" << endl;
                 switch (errno) {
                 case E2BIG:
@@ -111,17 +105,43 @@ iconv_convert(iconv_t * cd,
                         break;
                 }
                 // We got an error so we close down the conversion engine
-               if (iconv_close(*cd) == -1) {
+               if (iconv_close((iconv_t)(cd)) == -1) {
                         lyxerr << "Error returned from iconv_close("
                                << errno << ")" << endl;
                 }
-               *cd = (iconv_t)(-1);
+               cd = -1;
         }
  
         //lyxerr << std::dec;
         //lyxerr << "Inbytesleft: " << inbytesleft << endl;
         //lyxerr << "Outbytesleft: " << outbytesleft << endl;
-       int bytes = outsize - outbytesleft;
+
+       return maxoutsize - outbytesleft;
+}
+
+
+namespace {
+
+
+template<typename RetType, typename InType>
+std::vector<RetType>
+iconv_convert(int & cd,
+             char const * tocode,
+             char const * fromcode,
+             InType const * buf,
+             size_t buflen)
+{
+       if (buflen == 0)
+               return std::vector<RetType>();
+
+       char const * inbuf = reinterpret_cast<char const *>(buf);
+       size_t inbytesleft = buflen * sizeof(InType);
+
+       size_t const outsize = 32768;
+       static char out[outsize];
+       char * outbuf = out;
+
+       int bytes = lyx::iconv_convert(cd, tocode, fromcode, inbuf, inbytesleft, outbuf, outsize);
  
         RetType const * tmp = reinterpret_cast<RetType const *>(out);
         return std::vector<RetType>(tmp, tmp + bytes / sizeof(RetType));
@@ -142,8 +162,8 @@ std::vector<lyx::char_type> utf8_to_ucs4(std::vector<char> const & utf8str)
  std::vector<lyx::char_type>
  utf8_to_ucs4(char const * utf8str, size_t ls)
  {
-       static iconv_t cd = (iconv_t)(-1);
-       return iconv_convert<lyx::char_type>(&cd, ucs4_codeset, "UTF-8",
+       static int cd = -1;
+       return iconv_convert<lyx::char_type>(cd, ucs4_codeset, "UTF-8",
                                               utf8str, ls);
  }
  
@@ -168,8 +188,8 @@ ucs2_to_ucs4(std::vector<unsigned short> const & ucs2str)
  std::vector<lyx::char_type>
  ucs2_to_ucs4(unsigned short const * ucs2str, size_t ls)
  {
-       static iconv_t cd = (iconv_t)(-1);
-       return iconv_convert<lyx::char_type>(&cd, ucs4_codeset, ucs2_codeset,
+       static int cd = -1;
+       return iconv_convert<lyx::char_type>(cd, ucs4_codeset, ucs2_codeset,
                                               ucs2str, ls);
  }
  
@@ -194,8 +214,8 @@ ucs4_to_ucs2(std::vector<lyx::char_type> const & ucs4str)
  std::vector<unsigned short>
  ucs4_to_ucs2(lyx::char_type const * s, size_t ls)
  {
-       static iconv_t cd = (iconv_t)(-1);
-       return iconv_convert<unsigned short>(&cd, ucs2_codeset, ucs4_codeset,
+       static int cd = -1;
+       return iconv_convert<unsigned short>(cd, ucs2_codeset, ucs4_codeset,
                                              s, ls);
  }
  
@@ -203,8 +223,8 @@ ucs4_to_ucs2(lyx::char_type const * s, size_t ls)
  std::vector<char>
  ucs4_to_utf8(lyx::char_type c)
  {
-       static iconv_t cd = (iconv_t)(-1);
-       return iconv_convert<char>(&cd, "UTF-8", ucs4_codeset, &c, 1);
+       static int cd = -1;
+       return iconv_convert<char>(cd, "UTF-8", ucs4_codeset, &c, 1);
  }
  
  
@@ -221,8 +241,8 @@ ucs4_to_utf8(std::vector<lyx::char_type> const & ucs4str)
  std::vector<char>
  ucs4_to_utf8(lyx::char_type const * ucs4str, size_t ls)
  {
-       static iconv_t cd = (iconv_t)(-1);
-       return iconv_convert<char>(&cd, "UTF-8", ucs4_codeset,
+       static int cd = -1;
+       return iconv_convert<char>(cd, "UTF-8", ucs4_codeset,
                                    ucs4str, ls);
  }
  
@@ -230,10 +250,10 @@ ucs4_to_utf8(lyx::char_type const * ucs4str, size_t ls)
  std::vector<lyx::char_type>
  eightbit_to_ucs4(char const * s, size_t ls, std::string const & encoding)
  {
-       static std::map<std::string, iconv_t> cd;
+       static std::map<std::string, int> cd;
         if (cd.find(encoding) == cd.end())
-               cd[encoding] = (iconv_t)(-1);
-       return iconv_convert<char_type>(&cd[encoding], ucs4_codeset,
+               cd[encoding] = -1;
+       return iconv_convert<char_type>(cd[encoding], ucs4_codeset,
                                         encoding.c_str(), s, ls);
  }
  
@@ -241,10 +261,10 @@ eightbit_to_ucs4(char const * s, size_t ls, std::string const & encoding)
  std::vector<char>
  ucs4_to_eightbit(lyx::char_type const * ucs4str, size_t ls, std::string const & encoding)
  {
-       static std::map<std::string, iconv_t> cd;
+       static std::map<std::string, int> cd;
         if (cd.find(encoding) == cd.end())
-               cd[encoding] = (iconv_t)(-1);
-       return iconv_convert<char>(&cd[encoding], encoding.c_str(),
+               cd[encoding] = -1;
+       return iconv_convert<char>(cd[encoding], encoding.c_str(),
                                    ucs4_codeset, ucs4str, ls);
  }
  
diff --git a/src/support/unicode.h b/src/support/unicode.h

index 7f99f528320c5cedf2627faca5c16980b6ff35e8..fa9b4c897b0a4207147fff58a9865db333149957 100644 (file)
--- a/src/support/unicode.h
+++ b/src/support/unicode.h
@@ -66,6 +66,16 @@ eightbit_to_ucs4(char const * s, size_t ls, std::string const & encoding);
  std::vector<char>
  ucs4_to_eightbit(lyx::char_type const * ucs4str, size_t ls, std::string const & encoding);
  
+/// convert any data from \c fromcode to \c tocode unicode format.
+/// \return the number of bytes of the converted output buffer.
+extern int iconv_convert(int & cd,
+             char const * tocode,
+             char const * fromcode,
+             char const * buf, ///< maximum input buffer
+             size_t buflen,    ///< maximum input buffer size in bytes
+                 char * outbuf,    ///< maximum output buffer
+                 size_t maxoutsize);    ///< maximum output buffer size in bytes
+
  extern char const * ucs4_codeset;
  extern char const * ucs2_codeset;
author	Abdelrazak Younes <younes@lyx.org>
	Sat, 28 Oct 2006 15:16:30 +0000 (15:16 +0000)
committer	Abdelrazak Younes <younes@lyx.org>
	Sat, 28 Oct 2006 15:16:30 +0000 (15:16 +0000)
src/lyxlex_pimpl.C		patch \| blob \| history
src/lyxlex_pimpl.h		patch \| blob \| history
src/support/docstring.C		patch \| blob \| history
src/support/unicode.C		patch \| blob \| history
src/support/unicode.h		patch \| blob \| history