From: Georg Baum Date: Mon, 13 Nov 2006 09:53:25 +0000 (+0000) Subject: Use wctype character classification functions if possible X-Git-Tag: 1.6.10~11900 X-Git-Url: https://git.lyx.org/gitweb/?a=commitdiff_plain;h=a116e04b8b75b847bfcb5114cc4d92ce3cc2386a;p=features.git Use wctype character classification functions if possible * src/buffer.C (Buffer::insertStringAsLines): Uncomment isPrintable test * src/support/lstrings.C (compare_no_case): Use char_type and not int for the docstring version (ascii_tolower): Convert to a template (compare_ascii_no_case): Do not use a template anymore, because we need int for the string version and char_type for the docstring version as intermediate type (lowercase): Use towlower if possible (uppercase): Use towupper if possible * src/support/textutils.h (isLetterChar): Use iswalpha if possible (isPrintable): Use iswprint if possible (isPrintableNonspace): Use iswprint and iswspace if possible (isDigit): Use iswdigit if possible * src/paragraph.C (Paragraph::asString): remove obsolete FIXME (Paragraph::transformChar): add FIXME * configure.ac: Add definition of LIBC_WCTYPE_USES_UCS4 to config.h * development/cmake/config.h.cmake: ditto * development/scons/SConstruct: ditto git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@15893 a592a061-630c-0410-9148-cb99ea01b6c8 --- diff --git a/configure.ac b/configure.ac index 8f96cbae63..a8679d4ff5 100644 --- a/configure.ac +++ b/configure.ac @@ -383,6 +383,10 @@ int mkstemp(char*); # define WANT_GETFILEATTRIBUTESEX_WRAPPER 1 #endif +#if defined(HAVE_WCHAR_T) && SIZEOF_WCHAR_T == 4 +# define LIBC_WCTYPE_USES_UCS4 +#endif + #endif ]) diff --git a/development/cmake/config.h.cmake b/development/cmake/config.h.cmake index f08c853ef6..d7f3886f12 100644 --- a/development/cmake/config.h.cmake +++ b/development/cmake/config.h.cmake @@ -158,6 +158,10 @@ # define WANT_GETFILEATTRIBUTESEX_WRAPPER 1 #endif +#if defined(HAVE_WCHAR_T) && SIZEOF_WCHAR_T == 4 +# define LIBC_WCTYPE_USES_UCS4 +#endif + #if defined(MAKE_INTL_LIB) && defined(_MSC_VER) #define __attribute__(x) #define inline diff --git a/development/scons/SConstruct b/development/scons/SConstruct index bca2d69b26..8e23e71b86 100644 --- a/development/scons/SConstruct +++ b/development/scons/SConstruct @@ -895,6 +895,10 @@ utils.createConfigFile(conf, # define WANT_GETFILEATTRIBUTESEX_WRAPPER 1 #endif +#if defined(HAVE_WCHAR_T) && SIZEOF_WCHAR_T == 4 +# define LIBC_WCTYPE_USES_UCS4 +#endif + #endif ''' ) diff --git a/src/buffer.C b/src/buffer.C index cf93290a14..32960215b9 100644 --- a/src/buffer.C +++ b/src/buffer.C @@ -554,11 +554,9 @@ void Buffer::insertStringAsLines(ParagraphList & pars, } space_inserted = true; } -/* FIXME: not needed anymore? } else if (!isPrintable(*cit)) { // Ignore unprintables continue; -*/ } else { // just insert the character par.insertChar(pos, *cit, font, params().trackChanges); diff --git a/src/paragraph.C b/src/paragraph.C index efb49e8f13..c0c53c3e52 100644 --- a/src/paragraph.C +++ b/src/paragraph.C @@ -1392,7 +1392,6 @@ docstring const Paragraph::asString(Buffer const & buffer, for (pos_type i = beg; i < end; ++i) { value_type const c = getUChar(buffer.params(), i); - // FIXME: isPrintable does not work for lyx::char_type if (isPrintable(c)) os.put(c); else if (c == META_INSET) @@ -1570,6 +1569,7 @@ char_type Paragraph::transformChar(char_type c, pos_type pos) const { if (!Encodings::is_arabic(c)) if (lyxrc.font_norm_type == LyXRC::ISO_8859_6_8 && isDigit(c)) + // FIXME UNICODE What does this do? return c + (0xb0 - '0'); else return c; diff --git a/src/support/lstrings.C b/src/support/lstrings.C index 9761da6d34..324f33e1ee 100644 --- a/src/support/lstrings.C +++ b/src/support/lstrings.C @@ -32,6 +32,16 @@ #include #include +#ifdef LIBC_WCTYPE_USES_UCS4 +// We can use the libc ctype functions because we unset the LC_CTYPE +// category of the current locale in gettext.C +#include +#else +// Steal some code from somewhere else, e.g. glib (look at gunicode.h) +// The code that we currently use does not really work. +#endif + + using lyx::docstring; using std::transform; @@ -76,8 +86,8 @@ int compare_no_case(docstring const & s, docstring const & s2) docstring::const_iterator p2 = s2.begin(); while (p != s.end() && p2 != s2.end()) { - int const lc1 = tolower(*p); - int const lc2 = tolower(*p2); + char_type const lc1 = lowercase(*p); + char_type const lc2 = lowercase(*p2); if (lc1 != lc2) return (lc1 < lc2) ? -1 : 1; ++p; @@ -94,18 +104,20 @@ int compare_no_case(docstring const & s, docstring const & s2) namespace { -int ascii_tolower(int c) { +template +Char ascii_tolower(Char c) { if (c >= 'A' && c <= 'Z') return c - 'A' + 'a'; return c; } +} -template inline -int do_compare_ascii_no_case(String const & s, String const & s2) + +int compare_ascii_no_case(string const & s, string const & s2) { - typename String::const_iterator p = s.begin(); - typename String::const_iterator p2 = s2.begin(); + string::const_iterator p = s.begin(); + string::const_iterator p2 = s2.begin(); while (p != s.end() && p2 != s2.end()) { int const lc1 = ascii_tolower(*p); @@ -123,18 +135,26 @@ int do_compare_ascii_no_case(String const & s, String const & s2) return 1; } -} - -int compare_ascii_no_case(string const & s, string const & s2) +int compare_ascii_no_case(docstring const & s, docstring const & s2) { - return do_compare_ascii_no_case(s, s2); -} + docstring::const_iterator p = s.begin(); + docstring::const_iterator p2 = s2.begin(); + while (p != s.end() && p2 != s2.end()) { + char_type const lc1 = ascii_tolower(*p); + char_type const lc2 = ascii_tolower(*p2); + if (lc1 != lc2) + return (lc1 < lc2) ? -1 : 1; + ++p; + ++p2; + } -int compare_ascii_no_case(docstring const & s, docstring const & s2) -{ - return do_compare_ascii_no_case(s, s2); + if (s.size() == s2.size()) + return 0; + if (s.size() < s2.size()) + return -1; + return 1; } @@ -300,7 +320,9 @@ char uppercase(char c) return char(toupper(c)); } -// FIXME for lowercase() and uppercase() function below: + +// FIXME UNICODE +// for lowercase() and uppercase() function below when wchar_t is not used: // 1) std::tolower() and std::toupper() are templates that // compile fine with char_type. With the test (c >= 256) we // do not trust these function to do the right thing with @@ -310,19 +332,27 @@ char uppercase(char c) char_type lowercase(char_type c) { +#ifdef LIBC_WCTYPE_USES_UCS4 + return towlower(c); +#else if (c >= 256) return c; return tolower(c); +#endif } char_type uppercase(char_type c) { +#ifdef LIBC_WCTYPE_USES_UCS4 + return towupper(c); +#else if (c >= 256) return c; return toupper(c); +#endif } diff --git a/src/support/textutils.h b/src/support/textutils.h index c0f3850a6c..3d70c69df1 100644 --- a/src/support/textutils.h +++ b/src/support/textutils.h @@ -17,12 +17,21 @@ #include "support/types.h" +#ifdef LIBC_WCTYPE_USES_UCS4 +// We can use the libc ctype functions because we unset the LC_CTYPE +// category of the current locale in gettext.C +#include +#else +// Steal some code from somewhere else, e.g. glib (look at gunicode.h) +// The code that we currently use does not really work. +#endif + namespace lyx { /// return true if the char is a line separator inline -bool isLineSeparatorChar(lyx::char_type c) +bool isLineSeparatorChar(char_type c) { return c == ' '; } @@ -30,34 +39,55 @@ bool isLineSeparatorChar(lyx::char_type c) /// return true if a char is alphabetical (including accented chars) inline -bool isLetterChar(lyx::char_type c) +bool isLetterChar(char_type c) { +#ifdef LIBC_WCTYPE_USES_UCS4 + return iswalpha(c); +#else + // FIXME UNICODE This is wrong! return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= 192 && c < 256); // in iso-8859-x these are accented chars +#endif } -/// return true if the char is printable (masked to 7-bit ASCII) +/// return true if the char is printable inline -bool isPrintable(lyx::char_type c) +bool isPrintable(char_type c) { +#ifdef LIBC_WCTYPE_USES_UCS4 + return iswprint(c); +#else + // FIXME UNICODE This is wrong! return (c & 127) >= ' '; +#endif } -/// return true if the char is printable and not a space (masked to 7-bit ASCII) +/// return true if the char is printable and not a space inline -bool isPrintableNonspace(lyx::char_type c) +bool isPrintableNonspace(char_type c) { +#ifdef LIBC_WCTYPE_USES_UCS4 + return iswprint(c) && !iswspace(c); +#else + // FIXME UNICODE This is wrong! return (c & 127) > ' '; +#endif } + /// return true if a unicode char is a digit. inline -bool isDigit(lyx::char_type ch) +bool isDigit(char_type c) { - return ch >= '0' && ch <= '9'; +#ifdef LIBC_WCTYPE_USES_UCS4 + return iswdigit(c); +#else + // FIXME UNICODE This is wrong! + return c >= '0' && c <= '9'; +#endif }