From 15be4fb8ef0223583d24151d626a1d51fed87898 Mon Sep 17 00:00:00 2001 From: Georg Baum Date: Mon, 4 Sep 2006 14:43:25 +0000 Subject: [PATCH] Fix byte order problems * src/support/unicode.C (bytes_to_ucs4): make it work on big endian machines (bytes_to_ucs2): make it work on little endian machines (ucs2_to_ucs4): ditto (utf8_to_ucs4): Invoke iconv with explicit BE suffix (ucs2_to_ucs4): ditto (ucs4_to_ucs2): ditto (ucs4_to_utf8): ditto * configure.ac: Check for byte order * development/scons/SConstruct: Ditto git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@14890 a592a061-630c-0410-9148-cb99ea01b6c8 --- configure.ac | 3 +++ development/scons/SConstruct | 4 ++++ src/support/unicode.C | 30 +++++++++++++++++++++--------- 3 files changed, 28 insertions(+), 9 deletions(-) diff --git a/configure.ac b/configure.ac index 8f63d1d759..5d1bbb19f7 100644 --- a/configure.ac +++ b/configure.ac @@ -64,6 +64,9 @@ AC_PROG_CC AC_ISC_POSIX AC_AIX +### we need to know the byte order for unicode conversions +AC_C_BIGENDIAN + ### check which frontend we want to use LYX_USE_FRONTENDS diff --git a/development/scons/SConstruct b/development/scons/SConstruct index 97083a8409..c3923ea712 100644 --- a/development/scons/SConstruct +++ b/development/scons/SConstruct @@ -1100,6 +1100,10 @@ int count() (spell_engine is not None, spell_engine, 'Spell engine to use' ), + # we need to know the byte order for unicode conversions + (sys.byteorder == 'big', 'WORDS_BIGENDIAN' + 'Define to 1 if your processor stores words with the most significant byte first (like Motorola and SPARC, unlike Intel and VAX).' + ), ], extra_items = [ ('#define PACKAGE "%s%s"' % (package, program_suffix), diff --git a/src/support/unicode.C b/src/support/unicode.C index 9fbd818ee6..b4b6385f3a 100644 --- a/src/support/unicode.C +++ b/src/support/unicode.C @@ -122,10 +122,17 @@ std::vector bytes_to_ucs4(std::vector const & bytes) boost::uint32_t c; char * cc = reinterpret_cast(&c); +#ifdef WORDS_BIGENDIAN + cc[0] = b1; + cc[1] = b2; + cc[2] = b3; + cc[3] = b4; +#else cc[3] = b1; cc[2] = b2; cc[1] = b3; cc[0] = b4; +#endif if (c > 0xffff) { lyxerr << "Strange ucs4 value encountered\n"; @@ -158,8 +165,13 @@ std::vector bytes_to_ucs2(std::vector const & bytes) unsigned short c; char * cc = reinterpret_cast(&c); +#ifdef WORDS_BIGENDIAN cc[0] = b1; cc[1] = b2; +#else + cc[1] = b1; + cc[0] = b2; +#endif //lyxerr << "0x" // << std::setw(2) << std::setfill('0') << int(b2) @@ -185,7 +197,7 @@ std::vector utf8_to_ucs4(std::vector const & utf8str) //lyxerr << "Res = " << string(res.begin(), res.end()) // << " (" << res.size() << ")" << endl; - std::vector res = iconv_convert("UCS-4", "UTF-8", utf8str); + std::vector res = iconv_convert("UCS-4BE", "UTF-8", utf8str); return bytes_to_ucs4(res); } @@ -200,13 +212,13 @@ ucs2_to_ucs4(std::vector const & ucs2str) //lyxerr << std::hex; for (; cit != end; ++cit) { unsigned short s = *cit; - in.push_back(static_cast(s & 0x00ff)); in.push_back(static_cast((s & 0xff00) >> 8)); - lyxerr << std::setw(2) << std::setfill('0') << (s & 0x00ff) << endl; + in.push_back(static_cast(s & 0x00ff)); lyxerr << std::setw(2) << std::setfill('0') << ((s & 0xff00) >> 8) << endl; + lyxerr << std::setw(2) << std::setfill('0') << (s & 0x00ff) << endl; } - std::vector res = iconv_convert("UCS-4", "UCS-2", in); + std::vector res = iconv_convert("UCS-4BE", "UCS-2BE", in); return bytes_to_ucs4(res); } @@ -224,7 +236,7 @@ ucs4_to_ucs2(std::vector const & ucs4str) in.push_back(static_cast((s & 0x0000ff00) >> 8)); in.push_back(static_cast(s & 0x000000ff)); } - std::vector res = iconv_convert("UCS-2", "UCS-4", in); + std::vector res = iconv_convert("UCS-2BE", "UCS-4BE", in); return bytes_to_ucs2(res); } @@ -239,7 +251,7 @@ ucs4_to_ucs2(boost::uint32_t const * s, size_t ls) in.push_back(static_cast((s[i] & 0x0000ff00) >> 8)); in.push_back(static_cast(s[i] & 0x000000ff)); } - std::vector res = iconv_convert("UCS-2", "UCS-4", in); + std::vector res = iconv_convert("UCS-2BE", "UCS-4BE", in); return bytes_to_ucs2(res); } @@ -252,7 +264,7 @@ ucs4_to_ucs2(boost::uint32_t c) in.push_back(static_cast((c & 0x00ff0000) >> 16)); in.push_back(static_cast((c & 0x0000ff00) >> 8)); in.push_back(static_cast(c & 0x000000ff)); - std::vector res = iconv_convert("UCS-2", "UCS-4", in); + std::vector res = iconv_convert("UCS-2BE", "UCS-4BE", in); std::vector us = bytes_to_ucs2(res); if (!us.empty()) return us[0]; @@ -273,7 +285,7 @@ std::vector ucs4_to_utf8(std::vector const & ucs4str) in.push_back(static_cast((s & 0x0000ff00) >> 8)); in.push_back(static_cast(s & 0x000000ff)); } - std::vector res = iconv_convert("UTF-8", "UCS-4", in); + std::vector res = iconv_convert("UTF-8", "UCS-4BE", in); return res; } @@ -285,6 +297,6 @@ std::vector ucs4_to_utf8(boost::uint32_t c) in.push_back(static_cast((c & 0x00ff0000) >> 16)); in.push_back(static_cast((c & 0x0000ff00) >> 8)); in.push_back(static_cast(c & 0x000000ff)); - std::vector res = iconv_convert("UTF-8", "UCS-4", in); + std::vector res = iconv_convert("UTF-8", "UCS-4BE", in); return res; } -- 2.39.2