From 826c31a80d3ad7dc310790c99ac20427f33fb905 Mon Sep 17 00:00:00 2001 From: Abdelrazak Younes Date: Sun, 29 Oct 2006 21:59:59 +0000 Subject: [PATCH] * unicode.[Ch]: new IconvProcessor class that enable to split iconv_convert() initialization and conversion parts. * docstring.C: utf8_to_ucs4() makes use of IconvProcessor instead of iconv_convert. The IconvProcessor interface will permit to switch to another processor than iconv. git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@15609 a592a061-630c-0410-9148-cb99ea01b6c8 --- src/support/docstring.C | 5 +- src/support/unicode.C | 182 +++++++++++++++++++++++----------------- src/support/unicode.h | 38 ++++++--- 3 files changed, 137 insertions(+), 88 deletions(-) diff --git a/src/support/docstring.C b/src/support/docstring.C index 10a4138638..3930146bc0 100644 --- a/src/support/docstring.C +++ b/src/support/docstring.C @@ -56,6 +56,8 @@ std::string const to_ascii(docstring const & ucs4) void utf8_to_ucs4(std::string const & utf8, docstring & ucs4) { + static IconvProcessor iconv(ucs4_codeset, "UTF-8"); + size_t n = utf8.size(); // as utf8 is a multi-byte encoding, there would be at most // n characters: @@ -68,8 +70,7 @@ void utf8_to_ucs4(std::string const & utf8, docstring & ucs4) // basic_string::data() is not recognized by some old gcc version // so we use &(ucs4[0]) instead. char * outbuf = (char *)(&(ucs4[0])); - int bytes = iconv_convert(cd, ucs4_codeset, "UTF-8", - utf8.c_str(), n, outbuf, maxoutsize); + int bytes = iconv.convert(utf8.c_str(), n, outbuf, maxoutsize); // adjust to the real converted size ucs4.resize(bytes/4); diff --git a/src/support/unicode.C b/src/support/unicode.C index 405e145a87..f5518b4f0b 100644 --- a/src/support/unicode.C +++ b/src/support/unicode.C @@ -22,11 +22,10 @@ #include #include +using std::endl; namespace lyx { -using std::endl; - #ifdef WORDS_BIGENDIAN char const * ucs4_codeset = "UCS-4BE"; char const * ucs2_codeset = "UCS-2BE"; @@ -35,52 +34,93 @@ using std::endl; char const * ucs2_codeset = "UCS-2LE"; #endif -int iconv_convert(int & cd, - char const * tocode, - char const * fromcode, - char const * buf, - size_t buflen, - char * outbuf, - size_t maxoutsize) +static const iconv_t invalid_cd = (iconv_t)(-1); + + +struct IconvProcessor::Private { + Private(): cd(invalid_cd) {} + iconv_t cd; +}; + + +IconvProcessor::IconvProcessor(char const * tocode, + char const * fromcode): tocode_(tocode), fromcode_(fromcode), + pimpl_(new IconvProcessor::Private) +{ +} + + +IconvProcessor::~IconvProcessor() +{ + if (iconv_close(pimpl_->cd) == -1) { + lyxerr << "Error returned from iconv_close(" + << errno << ")" << endl; + } + delete pimpl_; +} + + +bool IconvProcessor::init() +{ + if (pimpl_->cd != invalid_cd) + return true; + + pimpl_->cd = iconv_open(tocode_.c_str(), fromcode_.c_str()); + if (pimpl_->cd != invalid_cd) + return true; + + lyxerr << "Error returned from iconv_open" << endl; + switch (errno) { + case EINVAL: + lyxerr << "EINVAL The conversion from " << fromcode_ + << " to " << tocode_ + << " is not supported by the implementation." + << endl; + break; + default: + lyxerr << "\tSome other error: " << errno << endl; + break; + } + return false; +} + + +int IconvProcessor::convert(char const * buf, size_t buflen, + char * outbuf, size_t maxoutsize) { if (buflen == 0) return 0; - if (cd == -1) { - cd = (int)(iconv_open(tocode, fromcode)); - if (cd == -1) { - lyxerr << "Error returned from iconv_open" << endl; - switch (errno) { - case EINVAL: - lyxerr << "EINVAL The conversion from " << fromcode - << " to " << tocode - << " is not supported by the implementation." - << endl; - break; - default: - lyxerr << "\tSome other error: " << errno << endl; - break; - } - } + if (pimpl_->cd == invalid_cd) { + if (!init()) + return -1; } char ICONV_CONST * inbuf = const_cast(buf); size_t inbytesleft = buflen; size_t outbytesleft = maxoutsize; - int res = iconv((iconv_t)(cd), &inbuf, &inbytesleft, &outbuf, &outbytesleft); + int res = iconv(pimpl_->cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft); - if (res == -1) { - lyxerr << "Error returned from iconv" << endl; - switch (errno) { + //lyxerr << std::dec; + //lyxerr << "Inbytesleft: " << inbytesleft << endl; + //lyxerr << "Outbytesleft: " << outbytesleft << endl; + + if (res != -1) + // Everything went well. + return maxoutsize - outbytesleft; + + // There are some errors in the conversion + lyxerr << "Error returned from iconv" << endl; + switch (errno) { case E2BIG: lyxerr << "E2BIG There is not sufficient room at *outbuf." << endl; break; case EILSEQ: lyxerr << "EILSEQ An invalid multibyte sequence" - << " has been encountered in the input.\n" - << "When converting from " << fromcode - << " to " << tocode << ".\n"; + << " has been encountered in the input.\n" + << "When converting from " << fromcode_ + << " to " << tocode_ << ".\n"; lyxerr << "Input: " << std::hex; for (size_t i = 0; i < buflen; ++i) { boost::uint32_t const b = buf[i]; @@ -90,9 +130,9 @@ int iconv_convert(int & cd, break; case EINVAL: lyxerr << "EINVAL An incomplete multibyte sequence" - << " has been encountered in the input.\n" - << "When converting from " << fromcode - << " to " << tocode << ".\n"; + << " has been encountered in the input.\n" + << "When converting from " << fromcode_ + << " to " << tocode_ << ".\n"; lyxerr << "Input: " << std::hex; for (size_t i = 0; i < buflen; ++i) { boost::uint32_t const b = buf[i]; @@ -103,20 +143,14 @@ int iconv_convert(int & cd, default: lyxerr << "\tSome other error: " << errno << endl; break; - } - // We got an error so we close down the conversion engine - if (iconv_close((iconv_t)(cd)) == -1) { - lyxerr << "Error returned from iconv_close(" - << errno << ")" << endl; - } - cd = -1; } - - //lyxerr << std::dec; - //lyxerr << "Inbytesleft: " << inbytesleft << endl; - //lyxerr << "Outbytesleft: " << outbytesleft << endl; - - return maxoutsize - outbytesleft; + // We got an error so we close down the conversion engine + if (iconv_close(pimpl_->cd) == -1) { + lyxerr << "Error returned from iconv_close(" + << errno << ")" << endl; + } + pimpl_->cd = invalid_cd; + return -1; } @@ -125,9 +159,7 @@ namespace { template std::vector -iconv_convert(int & cd, - char const * tocode, - char const * fromcode, +iconv_convert(IconvProcessor & processor, InType const * buf, size_t buflen) { @@ -141,7 +173,7 @@ iconv_convert(int & cd, static char out[outsize]; char * outbuf = out; - int bytes = lyx::iconv_convert(cd, tocode, fromcode, inbuf, inbytesleft, outbuf, outsize); + int bytes = processor.convert(inbuf, inbytesleft, outbuf, outsize); RetType const * tmp = reinterpret_cast(out); return std::vector(tmp, tmp + bytes / sizeof(RetType)); @@ -162,9 +194,8 @@ std::vector utf8_to_ucs4(std::vector const & utf8str) std::vector utf8_to_ucs4(char const * utf8str, size_t ls) { - static int cd = -1; - return iconv_convert(cd, ucs4_codeset, "UTF-8", - utf8str, ls); + static IconvProcessor processor(ucs4_codeset, "UTF-8"); + return iconv_convert(processor, utf8str, ls); } @@ -188,9 +219,8 @@ ucs2_to_ucs4(std::vector const & ucs2str) std::vector ucs2_to_ucs4(unsigned short const * ucs2str, size_t ls) { - static int cd = -1; - return iconv_convert(cd, ucs4_codeset, ucs2_codeset, - ucs2str, ls); + static IconvProcessor processor(ucs4_codeset, ucs2_codeset); + return iconv_convert(processor, ucs2str, ls); } @@ -214,17 +244,16 @@ ucs4_to_ucs2(std::vector const & ucs4str) std::vector ucs4_to_ucs2(lyx::char_type const * s, size_t ls) { - static int cd = -1; - return iconv_convert(cd, ucs2_codeset, ucs4_codeset, - s, ls); + static IconvProcessor processor(ucs2_codeset, ucs4_codeset); + return iconv_convert(processor, s, ls); } std::vector ucs4_to_utf8(lyx::char_type c) { - static int cd = -1; - return iconv_convert(cd, "UTF-8", ucs4_codeset, &c, 1); + static IconvProcessor processor("UTF-8", ucs4_codeset); + return iconv_convert(processor, &c, 1); } @@ -241,31 +270,32 @@ ucs4_to_utf8(std::vector const & ucs4str) std::vector ucs4_to_utf8(lyx::char_type const * ucs4str, size_t ls) { - static int cd = -1; - return iconv_convert(cd, "UTF-8", ucs4_codeset, - ucs4str, ls); + static IconvProcessor processor("UTF-8", ucs4_codeset); + return iconv_convert(processor, ucs4str, ls); } std::vector eightbit_to_ucs4(char const * s, size_t ls, std::string const & encoding) { - static std::map cd; - if (cd.find(encoding) == cd.end()) - cd[encoding] = -1; - return iconv_convert(cd[encoding], ucs4_codeset, - encoding.c_str(), s, ls); + static std::map processors; + if (processors.find(encoding) == processors.end()) { + IconvProcessor processor(ucs4_codeset, encoding.c_str()); + processors.insert(std::make_pair(encoding, processor)); + } + return iconv_convert(processors[encoding], s, ls); } std::vector ucs4_to_eightbit(lyx::char_type const * ucs4str, size_t ls, std::string const & encoding) { - static std::map cd; - if (cd.find(encoding) == cd.end()) - cd[encoding] = -1; - return iconv_convert(cd[encoding], encoding.c_str(), - ucs4_codeset, ucs4str, ls); + static std::map processors; + if (processors.find(encoding) == processors.end()) { + IconvProcessor processor(encoding.c_str(), ucs4_codeset); + processors.insert(std::make_pair(encoding, processor)); + } + return iconv_convert(processors[encoding], ucs4str, ls); } } // namespace lyx diff --git a/src/support/unicode.h b/src/support/unicode.h index fa9b4c897b..055dd7029d 100644 --- a/src/support/unicode.h +++ b/src/support/unicode.h @@ -15,11 +15,39 @@ #include "support/types.h" +#include #include namespace lyx { +class IconvProcessor +{ +public: + IconvProcessor( + char const * tocode = "", + char const * fromcode = ""); + ~IconvProcessor(); + + /// convert any data from \c fromcode to \c tocode unicode format. + /// \return the number of bytes of the converted output buffer. + int convert( + char const * in_buffer, + size_t in_size, + char * out_buffer, + size_t max_out_size); +private: + /// open iconv. + /// \return true if the processor is ready to use. + bool init(); + + std::string const tocode_; + std::string const fromcode_; + + struct Private; + Private * pimpl_; +}; + // utf8_to_ucs4 // A single codepoint conversion for utf8_to_ucs4 does not make @@ -66,16 +94,6 @@ eightbit_to_ucs4(char const * s, size_t ls, std::string const & encoding); std::vector ucs4_to_eightbit(lyx::char_type const * ucs4str, size_t ls, std::string const & encoding); -/// convert any data from \c fromcode to \c tocode unicode format. -/// \return the number of bytes of the converted output buffer. -extern int iconv_convert(int & cd, - char const * tocode, - char const * fromcode, - char const * buf, ///< maximum input buffer - size_t buflen, ///< maximum input buffer size in bytes - char * outbuf, ///< maximum output buffer - size_t maxoutsize); ///< maximum output buffer size in bytes - extern char const * ucs4_codeset; extern char const * ucs2_codeset; -- 2.39.2