From: Abdelrazak Younes Date: Sat, 28 Oct 2006 15:16:30 +0000 (+0000) Subject: * LyXLex::Pimpl::buff is now a string. X-Git-Tag: 1.6.10~12158 X-Git-Url: https://git.lyx.org/gitweb/?a=commitdiff_plain;h=451b12d00d4e82215ac07b5204179156dc1d3283;p=features.git * LyXLex::Pimpl::buff is now a string. * unicode.[Ch]: - new non template iconv_convert() - iconv_convert() template use above function. * docstring.C: - utf8_to_ucs4(): new function, use the new iconv_convert() function above. - from_utf8(): use utf8_to_ucs4() function above. git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@15592 a592a061-630c-0410-9148-cb99ea01b6c8 --- diff --git a/src/lyxlex_pimpl.C b/src/lyxlex_pimpl.C index eb8765db5f..80b757359a 100644 --- a/src/lyxlex_pimpl.C +++ b/src/lyxlex_pimpl.C @@ -70,15 +70,13 @@ LyXLex::Pimpl::Pimpl(keyword_item * tab, int num) string const LyXLex::Pimpl::getString() const { - return string(buff.begin(), buff.end()); + return buff; } docstring const LyXLex::Pimpl::getDocString() const { - std::vector res = utf8_to_ucs4(buff); - docstring dstr(res.begin(), res.end()); - return dstr; + return from_utf8(buff); } @@ -206,13 +204,12 @@ bool LyXLex::Pimpl::next(bool esc /* = false */) // we extract the first word and leaves the rest // in pushTok. (Lgb) if (pushTok.find(' ') != string::npos && pushTok[0] == '\\') { - string tmp; - pushTok = split(pushTok, tmp, ' '); - buff.assign(tmp.begin(), tmp.end()); + buff.clear(); + pushTok = split(pushTok, buff, ' '); return true; } else { - buff.assign(pushTok.begin(), pushTok.end()); - pushTok.erase(); + buff = pushTok; + pushTok.clear(); return true; } } @@ -256,7 +253,7 @@ bool LyXLex::Pimpl::next(bool esc /* = false */) ++lineno; } - buff.pop_back(); + buff.resize(buff.size()-1); status = LEX_DATA; break; } @@ -377,7 +374,7 @@ bool LyXLex::Pimpl::next(bool esc /* = false */) ++lineno; } - buff.pop_back(); + buff.resize(buff.size() -1); status = LEX_DATA; break; } @@ -456,7 +453,7 @@ bool LyXLex::Pimpl::eatLine() if (c == '\n') { ++lineno; - buff.pop_back(); + buff.resize(buff.size() - 1); status = LEX_DATA; return true; } else { @@ -472,13 +469,12 @@ bool LyXLex::Pimpl::nextToken() // we extract the first word and leaves the rest // in pushTok. (Lgb) if (pushTok.find(' ') != string::npos && pushTok[0] == '\\') { - string tmp; - pushTok = split(pushTok, tmp, ' '); - buff.assign(tmp.begin(), tmp.end()); + buff.clear(); + pushTok = split(pushTok, buff, ' '); return true; } else { - buff.assign(pushTok.begin(), pushTok.end()); - pushTok.erase(); + buff = pushTok; + pushTok.clear(); return true; } } diff --git a/src/lyxlex_pimpl.h b/src/lyxlex_pimpl.h index 1a66c47725..a5cb4a9d78 100644 --- a/src/lyxlex_pimpl.h +++ b/src/lyxlex_pimpl.h @@ -81,7 +81,7 @@ public: /// int no_items; /// - std::vector buff; + std::string buff; /// int status; /// diff --git a/src/support/docstring.C b/src/support/docstring.C index 3d40fc48fc..10a4138638 100644 --- a/src/support/docstring.C +++ b/src/support/docstring.C @@ -20,6 +20,7 @@ namespace lyx { + docstring const from_ascii(char const * ascii) { docstring s; @@ -53,11 +54,33 @@ std::string const to_ascii(docstring const & ucs4) } +void utf8_to_ucs4(std::string const & utf8, docstring & ucs4) +{ + size_t n = utf8.size(); + // as utf8 is a multi-byte encoding, there would be at most + // n characters: + ucs4.resize(n); + if (n == 0) + return; + + int maxoutsize = n * 4; + int cd = -1; + // basic_string::data() is not recognized by some old gcc version + // so we use &(ucs4[0]) instead. + char * outbuf = (char *)(&(ucs4[0])); + int bytes = iconv_convert(cd, ucs4_codeset, "UTF-8", + utf8.c_str(), n, outbuf, maxoutsize); + + // adjust to the real converted size + ucs4.resize(bytes/4); +} + + docstring const from_utf8(std::string const & utf8) { - std::vector const ucs4 = - utf8_to_ucs4(utf8.data(), utf8.size()); - return docstring(ucs4.begin(), ucs4.end()); + docstring ucs4; + utf8_to_ucs4(utf8, ucs4); + return ucs4; } diff --git a/src/support/unicode.C b/src/support/unicode.C index c9b9210a4e..405e145a87 100644 --- a/src/support/unicode.C +++ b/src/support/unicode.C @@ -35,22 +35,20 @@ using std::endl; char const * ucs2_codeset = "UCS-2LE"; #endif -namespace { - -template -std::vector -iconv_convert(iconv_t * cd, +int iconv_convert(int & cd, char const * tocode, char const * fromcode, - InType const * buf, - size_t buflen) + char const * buf, + size_t buflen, + char * outbuf, + size_t maxoutsize) { if (buflen == 0) - return std::vector(); + return 0; - if (*cd == (iconv_t)(-1)) { - *cd = iconv_open(tocode, fromcode); - if (*cd == (iconv_t)(-1)) { + if (cd == -1) { + cd = (int)(iconv_open(tocode, fromcode)); + if (cd == -1) { lyxerr << "Error returned from iconv_open" << endl; switch (errno) { case EINVAL: @@ -66,17 +64,13 @@ iconv_convert(iconv_t * cd, } } - char ICONV_CONST * inbuf = const_cast(reinterpret_cast(buf)); - size_t inbytesleft = buflen * sizeof(InType); - // The preamble of the user guide is more than 11.500 characters, so we go for 32kb - size_t const outsize = 32768; - static char out[outsize]; - char * outbuf = out; - size_t outbytesleft = outsize; + char ICONV_CONST * inbuf = const_cast(buf); + size_t inbytesleft = buflen; + size_t outbytesleft = maxoutsize; - size_t res = iconv(*cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft); + int res = iconv((iconv_t)(cd), &inbuf, &inbytesleft, &outbuf, &outbytesleft); - if (res == (size_t)(-1)) { + if (res == -1) { lyxerr << "Error returned from iconv" << endl; switch (errno) { case E2BIG: @@ -111,17 +105,43 @@ iconv_convert(iconv_t * cd, break; } // We got an error so we close down the conversion engine - if (iconv_close(*cd) == -1) { + if (iconv_close((iconv_t)(cd)) == -1) { lyxerr << "Error returned from iconv_close(" << errno << ")" << endl; } - *cd = (iconv_t)(-1); + cd = -1; } //lyxerr << std::dec; //lyxerr << "Inbytesleft: " << inbytesleft << endl; //lyxerr << "Outbytesleft: " << outbytesleft << endl; - int bytes = outsize - outbytesleft; + + return maxoutsize - outbytesleft; +} + + +namespace { + + +template +std::vector +iconv_convert(int & cd, + char const * tocode, + char const * fromcode, + InType const * buf, + size_t buflen) +{ + if (buflen == 0) + return std::vector(); + + char const * inbuf = reinterpret_cast(buf); + size_t inbytesleft = buflen * sizeof(InType); + + size_t const outsize = 32768; + static char out[outsize]; + char * outbuf = out; + + int bytes = lyx::iconv_convert(cd, tocode, fromcode, inbuf, inbytesleft, outbuf, outsize); RetType const * tmp = reinterpret_cast(out); return std::vector(tmp, tmp + bytes / sizeof(RetType)); @@ -142,8 +162,8 @@ std::vector utf8_to_ucs4(std::vector const & utf8str) std::vector utf8_to_ucs4(char const * utf8str, size_t ls) { - static iconv_t cd = (iconv_t)(-1); - return iconv_convert(&cd, ucs4_codeset, "UTF-8", + static int cd = -1; + return iconv_convert(cd, ucs4_codeset, "UTF-8", utf8str, ls); } @@ -168,8 +188,8 @@ ucs2_to_ucs4(std::vector const & ucs2str) std::vector ucs2_to_ucs4(unsigned short const * ucs2str, size_t ls) { - static iconv_t cd = (iconv_t)(-1); - return iconv_convert(&cd, ucs4_codeset, ucs2_codeset, + static int cd = -1; + return iconv_convert(cd, ucs4_codeset, ucs2_codeset, ucs2str, ls); } @@ -194,8 +214,8 @@ ucs4_to_ucs2(std::vector const & ucs4str) std::vector ucs4_to_ucs2(lyx::char_type const * s, size_t ls) { - static iconv_t cd = (iconv_t)(-1); - return iconv_convert(&cd, ucs2_codeset, ucs4_codeset, + static int cd = -1; + return iconv_convert(cd, ucs2_codeset, ucs4_codeset, s, ls); } @@ -203,8 +223,8 @@ ucs4_to_ucs2(lyx::char_type const * s, size_t ls) std::vector ucs4_to_utf8(lyx::char_type c) { - static iconv_t cd = (iconv_t)(-1); - return iconv_convert(&cd, "UTF-8", ucs4_codeset, &c, 1); + static int cd = -1; + return iconv_convert(cd, "UTF-8", ucs4_codeset, &c, 1); } @@ -221,8 +241,8 @@ ucs4_to_utf8(std::vector const & ucs4str) std::vector ucs4_to_utf8(lyx::char_type const * ucs4str, size_t ls) { - static iconv_t cd = (iconv_t)(-1); - return iconv_convert(&cd, "UTF-8", ucs4_codeset, + static int cd = -1; + return iconv_convert(cd, "UTF-8", ucs4_codeset, ucs4str, ls); } @@ -230,10 +250,10 @@ ucs4_to_utf8(lyx::char_type const * ucs4str, size_t ls) std::vector eightbit_to_ucs4(char const * s, size_t ls, std::string const & encoding) { - static std::map cd; + static std::map cd; if (cd.find(encoding) == cd.end()) - cd[encoding] = (iconv_t)(-1); - return iconv_convert(&cd[encoding], ucs4_codeset, + cd[encoding] = -1; + return iconv_convert(cd[encoding], ucs4_codeset, encoding.c_str(), s, ls); } @@ -241,10 +261,10 @@ eightbit_to_ucs4(char const * s, size_t ls, std::string const & encoding) std::vector ucs4_to_eightbit(lyx::char_type const * ucs4str, size_t ls, std::string const & encoding) { - static std::map cd; + static std::map cd; if (cd.find(encoding) == cd.end()) - cd[encoding] = (iconv_t)(-1); - return iconv_convert(&cd[encoding], encoding.c_str(), + cd[encoding] = -1; + return iconv_convert(cd[encoding], encoding.c_str(), ucs4_codeset, ucs4str, ls); } diff --git a/src/support/unicode.h b/src/support/unicode.h index 7f99f52832..fa9b4c897b 100644 --- a/src/support/unicode.h +++ b/src/support/unicode.h @@ -66,6 +66,16 @@ eightbit_to_ucs4(char const * s, size_t ls, std::string const & encoding); std::vector ucs4_to_eightbit(lyx::char_type const * ucs4str, size_t ls, std::string const & encoding); +/// convert any data from \c fromcode to \c tocode unicode format. +/// \return the number of bytes of the converted output buffer. +extern int iconv_convert(int & cd, + char const * tocode, + char const * fromcode, + char const * buf, ///< maximum input buffer + size_t buflen, ///< maximum input buffer size in bytes + char * outbuf, ///< maximum output buffer + size_t maxoutsize); ///< maximum output buffer size in bytes + extern char const * ucs4_codeset; extern char const * ucs2_codeset;