]> git.lyx.org Git - features.git/commitdiff
* LyXLex::Pimpl::buff is now a string.
authorAbdelrazak Younes <younes@lyx.org>
Sat, 28 Oct 2006 15:16:30 +0000 (15:16 +0000)
committerAbdelrazak Younes <younes@lyx.org>
Sat, 28 Oct 2006 15:16:30 +0000 (15:16 +0000)
* unicode.[Ch]:
 - new non template iconv_convert()
 - iconv_convert() template use above function.

* docstring.C:
 - utf8_to_ucs4(): new function, use the new iconv_convert() function above.
 - from_utf8(): use utf8_to_ucs4() function above.

git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@15592 a592a061-630c-0410-9148-cb99ea01b6c8

src/lyxlex_pimpl.C
src/lyxlex_pimpl.h
src/support/docstring.C
src/support/unicode.C
src/support/unicode.h

index eb8765db5fceed0651df36526a3cea0e03409262..80b757359aba6fd8828880adec12a68fc4f02494 100644 (file)
@@ -70,15 +70,13 @@ LyXLex::Pimpl::Pimpl(keyword_item * tab, int num)
 
 string const LyXLex::Pimpl::getString() const
 {
-       return string(buff.begin(), buff.end());
+       return buff;
 }
 
 
 docstring const LyXLex::Pimpl::getDocString() const
 {
-       std::vector<char_type> res = utf8_to_ucs4(buff);
-       docstring dstr(res.begin(), res.end());
-       return dstr;
+       return from_utf8(buff);
 }
 
 
@@ -206,13 +204,12 @@ bool LyXLex::Pimpl::next(bool esc /* = false */)
                // we extract the first word and leaves the rest
                // in pushTok. (Lgb)
                if (pushTok.find(' ') != string::npos && pushTok[0] == '\\') {
-                       string tmp;
-                       pushTok = split(pushTok, tmp, ' ');
-                       buff.assign(tmp.begin(), tmp.end());
+                       buff.clear();
+                       pushTok = split(pushTok, buff, ' ');
                        return true;
                } else {
-                       buff.assign(pushTok.begin(), pushTok.end());
-                       pushTok.erase();
+                       buff = pushTok;
+                       pushTok.clear();
                        return true;
                }
        }
@@ -256,7 +253,7 @@ bool LyXLex::Pimpl::next(bool esc /* = false */)
                                                ++lineno;
                                }
 
-                               buff.pop_back();
+                               buff.resize(buff.size()-1);
                                status = LEX_DATA;
                                break;
                        }
@@ -377,7 +374,7 @@ bool LyXLex::Pimpl::next(bool esc /* = false */)
                                                ++lineno;
                                }
 
-                               buff.pop_back();
+                               buff.resize(buff.size() -1);
                                status = LEX_DATA;
                                break;
                        }
@@ -456,7 +453,7 @@ bool LyXLex::Pimpl::eatLine()
 
        if (c == '\n') {
                ++lineno;
-               buff.pop_back();
+               buff.resize(buff.size() - 1);
                status = LEX_DATA;
                return true;
        } else {
@@ -472,13 +469,12 @@ bool LyXLex::Pimpl::nextToken()
                // we extract the first word and leaves the rest
                // in pushTok. (Lgb)
                if (pushTok.find(' ') != string::npos && pushTok[0] == '\\') {
-                       string tmp;
-                       pushTok = split(pushTok, tmp, ' ');
-                       buff.assign(tmp.begin(), tmp.end());
+                       buff.clear();
+                       pushTok = split(pushTok, buff, ' ');
                        return true;
                } else {
-                       buff.assign(pushTok.begin(), pushTok.end());
-                       pushTok.erase();
+                       buff = pushTok;
+                       pushTok.clear();
                        return true;
                }
        }
index 1a66c47725d0ee1694100bf2043263d4c5d9692f..a5cb4a9d78e9e3ccf908cf9c643374386905d289 100644 (file)
@@ -81,7 +81,7 @@ public:
        ///
        int no_items;
        ///
-       std::vector<char> buff;
+       std::string buff;
        ///
        int status;
        ///
index 3d40fc48fc605c2d3d14253a7791f0a2c1e9f414..10a4138638697c3969b69746263e299b0e24dbe3 100644 (file)
@@ -20,6 +20,7 @@
 
 namespace lyx {
 
+
 docstring const from_ascii(char const * ascii)
 {
        docstring s;
@@ -53,11 +54,33 @@ std::string const to_ascii(docstring const & ucs4)
 }
 
 
+void utf8_to_ucs4(std::string const & utf8, docstring & ucs4)
+{
+       size_t n = utf8.size();
+       // as utf8 is a multi-byte encoding, there would be at most
+       // n characters:
+       ucs4.resize(n);
+       if (n == 0)
+               return;
+
+       int maxoutsize = n * 4;
+       int cd = -1;
+       // basic_string::data() is not recognized by some old gcc version
+       // so we use &(ucs4[0]) instead.
+       char * outbuf = (char *)(&(ucs4[0]));
+       int bytes = iconv_convert(cd, ucs4_codeset, "UTF-8",
+               utf8.c_str(), n, outbuf, maxoutsize);
+
+       // adjust to the real converted size
+       ucs4.resize(bytes/4);
+}
+
+
 docstring const from_utf8(std::string const & utf8)
 {
-       std::vector<lyx::char_type> const ucs4 =
-               utf8_to_ucs4(utf8.data(), utf8.size());
-       return docstring(ucs4.begin(), ucs4.end());
+       docstring ucs4;
+       utf8_to_ucs4(utf8, ucs4);
+       return ucs4;
 }
 
 
index c9b9210a4e9f1e6fe7251ecdbcc931088f7b2392..405e145a8761dfce5879f5a79549e2a94ca1c961 100644 (file)
@@ -35,22 +35,20 @@ using std::endl;
        char const * ucs2_codeset = "UCS-2LE";
 #endif
 
-namespace {
-
-template<typename RetType, typename InType>
-std::vector<RetType>
-iconv_convert(iconv_t * cd,
+int iconv_convert(int & cd,
              char const * tocode,
              char const * fromcode,
-             InType const * buf,
-             size_t buflen)
+             char const * buf,
+             size_t buflen,
+                 char * outbuf,
+                 size_t maxoutsize)
 {
        if (buflen == 0)
-               return std::vector<RetType>();
+               return 0;
 
-       if (*cd == (iconv_t)(-1)) {
-               *cd = iconv_open(tocode, fromcode);
-               if (*cd == (iconv_t)(-1)) {
+       if (cd == -1) {
+               cd = (int)(iconv_open(tocode, fromcode));
+               if (cd == -1) {
                        lyxerr << "Error returned from iconv_open" << endl;
                        switch (errno) {
                        case EINVAL:
@@ -66,17 +64,13 @@ iconv_convert(iconv_t * cd,
                }
        }
 
-       char ICONV_CONST * inbuf = const_cast<char ICONV_CONST *>(reinterpret_cast<char const *>(buf));
-       size_t inbytesleft = buflen * sizeof(InType);
-       // The preamble of the user guide is more than 11.500 characters, so we go for 32kb
-       size_t const outsize = 32768;
-       static char out[outsize];
-       char * outbuf = out;
-       size_t outbytesleft = outsize;
+       char ICONV_CONST * inbuf = const_cast<char ICONV_CONST *>(buf);
+       size_t inbytesleft = buflen;
+       size_t outbytesleft = maxoutsize;
 
-       size_t res = iconv(*cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
+       int res = iconv((iconv_t)(cd), &inbuf, &inbytesleft, &outbuf, &outbytesleft);
 
-       if (res == (size_t)(-1)) {
+       if (res == -1) {
                lyxerr << "Error returned from iconv" << endl;
                switch (errno) {
                case E2BIG:
@@ -111,17 +105,43 @@ iconv_convert(iconv_t * cd,
                        break;
                }
                // We got an error so we close down the conversion engine
-               if (iconv_close(*cd) == -1) {
+               if (iconv_close((iconv_t)(cd)) == -1) {
                        lyxerr << "Error returned from iconv_close("
                               << errno << ")" << endl;
                }
-               *cd = (iconv_t)(-1);
+               cd = -1;
        }
 
        //lyxerr << std::dec;
        //lyxerr << "Inbytesleft: " << inbytesleft << endl;
        //lyxerr << "Outbytesleft: " << outbytesleft << endl;
-       int bytes = outsize - outbytesleft;
+
+       return maxoutsize - outbytesleft;
+}
+
+
+namespace {
+
+
+template<typename RetType, typename InType>
+std::vector<RetType>
+iconv_convert(int & cd,
+             char const * tocode,
+             char const * fromcode,
+             InType const * buf,
+             size_t buflen)
+{
+       if (buflen == 0)
+               return std::vector<RetType>();
+
+       char const * inbuf = reinterpret_cast<char const *>(buf);
+       size_t inbytesleft = buflen * sizeof(InType);
+
+       size_t const outsize = 32768;
+       static char out[outsize];
+       char * outbuf = out;
+
+       int bytes = lyx::iconv_convert(cd, tocode, fromcode, inbuf, inbytesleft, outbuf, outsize);
 
        RetType const * tmp = reinterpret_cast<RetType const *>(out);
        return std::vector<RetType>(tmp, tmp + bytes / sizeof(RetType));
@@ -142,8 +162,8 @@ std::vector<lyx::char_type> utf8_to_ucs4(std::vector<char> const & utf8str)
 std::vector<lyx::char_type>
 utf8_to_ucs4(char const * utf8str, size_t ls)
 {
-       static iconv_t cd = (iconv_t)(-1);
-       return iconv_convert<lyx::char_type>(&cd, ucs4_codeset, "UTF-8",
+       static int cd = -1;
+       return iconv_convert<lyx::char_type>(cd, ucs4_codeset, "UTF-8",
                                              utf8str, ls);
 }
 
@@ -168,8 +188,8 @@ ucs2_to_ucs4(std::vector<unsigned short> const & ucs2str)
 std::vector<lyx::char_type>
 ucs2_to_ucs4(unsigned short const * ucs2str, size_t ls)
 {
-       static iconv_t cd = (iconv_t)(-1);
-       return iconv_convert<lyx::char_type>(&cd, ucs4_codeset, ucs2_codeset,
+       static int cd = -1;
+       return iconv_convert<lyx::char_type>(cd, ucs4_codeset, ucs2_codeset,
                                              ucs2str, ls);
 }
 
@@ -194,8 +214,8 @@ ucs4_to_ucs2(std::vector<lyx::char_type> const & ucs4str)
 std::vector<unsigned short>
 ucs4_to_ucs2(lyx::char_type const * s, size_t ls)
 {
-       static iconv_t cd = (iconv_t)(-1);
-       return iconv_convert<unsigned short>(&cd, ucs2_codeset, ucs4_codeset,
+       static int cd = -1;
+       return iconv_convert<unsigned short>(cd, ucs2_codeset, ucs4_codeset,
                                             s, ls);
 }
 
@@ -203,8 +223,8 @@ ucs4_to_ucs2(lyx::char_type const * s, size_t ls)
 std::vector<char>
 ucs4_to_utf8(lyx::char_type c)
 {
-       static iconv_t cd = (iconv_t)(-1);
-       return iconv_convert<char>(&cd, "UTF-8", ucs4_codeset, &c, 1);
+       static int cd = -1;
+       return iconv_convert<char>(cd, "UTF-8", ucs4_codeset, &c, 1);
 }
 
 
@@ -221,8 +241,8 @@ ucs4_to_utf8(std::vector<lyx::char_type> const & ucs4str)
 std::vector<char>
 ucs4_to_utf8(lyx::char_type const * ucs4str, size_t ls)
 {
-       static iconv_t cd = (iconv_t)(-1);
-       return iconv_convert<char>(&cd, "UTF-8", ucs4_codeset,
+       static int cd = -1;
+       return iconv_convert<char>(cd, "UTF-8", ucs4_codeset,
                                   ucs4str, ls);
 }
 
@@ -230,10 +250,10 @@ ucs4_to_utf8(lyx::char_type const * ucs4str, size_t ls)
 std::vector<lyx::char_type>
 eightbit_to_ucs4(char const * s, size_t ls, std::string const & encoding)
 {
-       static std::map<std::string, iconv_t> cd;
+       static std::map<std::string, int> cd;
        if (cd.find(encoding) == cd.end())
-               cd[encoding] = (iconv_t)(-1);
-       return iconv_convert<char_type>(&cd[encoding], ucs4_codeset,
+               cd[encoding] = -1;
+       return iconv_convert<char_type>(cd[encoding], ucs4_codeset,
                                        encoding.c_str(), s, ls);
 }
 
@@ -241,10 +261,10 @@ eightbit_to_ucs4(char const * s, size_t ls, std::string const & encoding)
 std::vector<char>
 ucs4_to_eightbit(lyx::char_type const * ucs4str, size_t ls, std::string const & encoding)
 {
-       static std::map<std::string, iconv_t> cd;
+       static std::map<std::string, int> cd;
        if (cd.find(encoding) == cd.end())
-               cd[encoding] = (iconv_t)(-1);
-       return iconv_convert<char>(&cd[encoding], encoding.c_str(),
+               cd[encoding] = -1;
+       return iconv_convert<char>(cd[encoding], encoding.c_str(),
                                   ucs4_codeset, ucs4str, ls);
 }
 
index 7f99f528320c5cedf2627faca5c16980b6ff35e8..fa9b4c897b0a4207147fff58a9865db333149957 100644 (file)
@@ -66,6 +66,16 @@ eightbit_to_ucs4(char const * s, size_t ls, std::string const & encoding);
 std::vector<char>
 ucs4_to_eightbit(lyx::char_type const * ucs4str, size_t ls, std::string const & encoding);
 
+/// convert any data from \c fromcode to \c tocode unicode format.
+/// \return the number of bytes of the converted output buffer.
+extern int iconv_convert(int & cd,
+             char const * tocode,
+             char const * fromcode,
+             char const * buf, ///< maximum input buffer
+             size_t buflen,    ///< maximum input buffer size in bytes
+                 char * outbuf,    ///< maximum output buffer
+                 size_t maxoutsize);    ///< maximum output buffer size in bytes
+
 extern char const * ucs4_codeset;
 extern char const * ucs2_codeset;