Use wctype character classification functions if possible

author Georg Baum <Georg.Baum@post.rwth-aachen.de>

Mon, 13 Nov 2006 09:53:25 +0000 (09:53 +0000)

committer Georg Baum <Georg.Baum@post.rwth-aachen.de>

Mon, 13 Nov 2006 09:53:25 +0000 (09:53 +0000)
author Georg Baum <Georg.Baum@post.rwth-aachen.de>
Mon, 13 Nov 2006 09:53:25 +0000 (09:53 +0000)
committer Georg Baum <Georg.Baum@post.rwth-aachen.de>
Mon, 13 Nov 2006 09:53:25 +0000 (09:53 +0000)
diff --git a/configure.ac b/configure.ac

index 8f96cbae6322b5aae093405b37cd121b794d2e9f..a8679d4ff5c5f74a23837ea005d4596468f691c6 100644 (file)
--- a/configure.ac
+++ b/configure.ac
@@ -383,6 +383,10 @@ int mkstemp(char*);
  #  define WANT_GETFILEATTRIBUTESEX_WRAPPER 1
  #endif
  
+#if defined(HAVE_WCHAR_T) && SIZEOF_WCHAR_T == 4
+#  define LIBC_WCTYPE_USES_UCS4
+#endif
+
  #endif
  ])
  
diff --git a/development/cmake/config.h.cmake b/development/cmake/config.h.cmake

index f08c853ef65e32dddb8269404313addcb7a3985f..d7f3886f122cf905b62c8b0550ed72ff19349e92 100644 (file)
--- a/development/cmake/config.h.cmake
+++ b/development/cmake/config.h.cmake
@@ -158,6 +158,10 @@
  #  define WANT_GETFILEATTRIBUTESEX_WRAPPER 1
  #endif
  
+#if defined(HAVE_WCHAR_T) && SIZEOF_WCHAR_T == 4
+#  define LIBC_WCTYPE_USES_UCS4
+#endif
+
  #if defined(MAKE_INTL_LIB) && defined(_MSC_VER)
  #define __attribute__(x)
  #define inline
diff --git a/development/scons/SConstruct b/development/scons/SConstruct

index bca2d69b26d90a05613d30985d1dfba00674a09a..8e23e71b86445a4e1f00d605597a932683c2cf58 100644 (file)
--- a/development/scons/SConstruct
+++ b/development/scons/SConstruct
@@ -895,6 +895,10 @@ utils.createConfigFile(conf,
  #  define WANT_GETFILEATTRIBUTESEX_WRAPPER 1
  #endif
  
+#if defined(HAVE_WCHAR_T) && SIZEOF_WCHAR_T == 4
+#  define LIBC_WCTYPE_USES_UCS4
+#endif
+
  #endif
  '''
  )
diff --git a/src/buffer.C b/src/buffer.C

index cf93290a146ae648b51733d393d9cab4c61a7fac..32960215b9eff2d7e94671dacb93dcec823e54b0 100644 (file)
--- a/src/buffer.C
+++ b/src/buffer.C
@@ -554,11 +554,9 @@ void Buffer::insertStringAsLines(ParagraphList & pars,
                                 }
                                 space_inserted = true;
                         }
-/* FIXME: not needed anymore?
                 } else if (!isPrintable(*cit)) {
                         // Ignore unprintables
                         continue;
-*/
                 } else {
                         // just insert the character
                         par.insertChar(pos, *cit, font, params().trackChanges);
diff --git a/src/paragraph.C b/src/paragraph.C

index efb49e8f1377f21cccb5b9c6502bdc3ace5e7d48..c0c53c3e524220eed2aefacf3aef80680facef39 100644 (file)
--- a/src/paragraph.C
+++ b/src/paragraph.C
@@ -1392,7 +1392,6 @@ docstring const Paragraph::asString(Buffer const & buffer,
  
         for (pos_type i = beg; i < end; ++i) {
                 value_type const c = getUChar(buffer.params(), i);
-               // FIXME: isPrintable does not work for lyx::char_type
                 if (isPrintable(c))
                         os.put(c);
                 else if (c == META_INSET)
@@ -1570,6 +1569,7 @@ char_type Paragraph::transformChar(char_type c, pos_type pos) const
  {
         if (!Encodings::is_arabic(c))
                 if (lyxrc.font_norm_type == LyXRC::ISO_8859_6_8 && isDigit(c))
+                       // FIXME UNICODE What does this do?
                         return c + (0xb0 - '0');
                 else
                         return c;
diff --git a/src/support/lstrings.C b/src/support/lstrings.C

index 9761da6d34ba6add3551cc7c2177020a49992b4e..324f33e1ee6a7f9d1c7c24f2eef2cace92c984f9 100644 (file)
--- a/src/support/lstrings.C
+++ b/src/support/lstrings.C
@@ -32,6 +32,16 @@
  #include <algorithm>
  #include <sstream>
  
+#ifdef LIBC_WCTYPE_USES_UCS4
+// We can use the libc ctype functions because we unset the LC_CTYPE
+// category of the current locale in gettext.C
+#include <wctype.h>
+#else
+// Steal some code from somewhere else, e.g. glib (look at gunicode.h)
+// The code that we currently use does not really work.
+#endif
+
+
  using lyx::docstring;
  
  using std::transform;
@@ -76,8 +86,8 @@ int compare_no_case(docstring const & s, docstring const & s2)
         docstring::const_iterator p2 = s2.begin();
  
         while (p != s.end() && p2 != s2.end()) {
-               int const lc1 = tolower(*p);
-               int const lc2 = tolower(*p2);
+               char_type const lc1 = lowercase(*p);
+               char_type const lc2 = lowercase(*p2);
                 if (lc1 != lc2)
                         return (lc1 < lc2) ? -1 : 1;
                 ++p;
@@ -94,18 +104,20 @@ int compare_no_case(docstring const & s, docstring const & s2)
  
  namespace {
  
-int ascii_tolower(int c) {
+template<typename Char>
+Char ascii_tolower(Char c) {
         if (c >= 'A' && c <= 'Z')
                 return c - 'A' + 'a';
         return c;
  }
  
+}
  
-template<typename String> inline
-int do_compare_ascii_no_case(String const & s, String const & s2)
+
+int compare_ascii_no_case(string const & s, string const & s2)
  {
-       typename String::const_iterator p = s.begin();
-       typename String::const_iterator p2 = s2.begin();
+       string::const_iterator p = s.begin();
+       string::const_iterator p2 = s2.begin();
  
         while (p != s.end() && p2 != s2.end()) {
                 int const lc1 = ascii_tolower(*p);
@@ -123,18 +135,26 @@ int do_compare_ascii_no_case(String const & s, String const & s2)
         return 1;
  }
  
-}
  
-
-int compare_ascii_no_case(string const & s, string const & s2)
+int compare_ascii_no_case(docstring const & s, docstring const & s2)
  {
-       return do_compare_ascii_no_case(s, s2);
-}
+       docstring::const_iterator p = s.begin();
+       docstring::const_iterator p2 = s2.begin();
  
+       while (p != s.end() && p2 != s2.end()) {
+               char_type const lc1 = ascii_tolower(*p);
+               char_type const lc2 = ascii_tolower(*p2);
+               if (lc1 != lc2)
+                       return (lc1 < lc2) ? -1 : 1;
+               ++p;
+               ++p2;
+       }
  
-int compare_ascii_no_case(docstring const & s, docstring const & s2)
-{
-       return do_compare_ascii_no_case(s, s2);
+       if (s.size() == s2.size())
+               return 0;
+       if (s.size() < s2.size())
+               return -1;
+       return 1;
  }
  
  
@@ -300,7 +320,9 @@ char uppercase(char c)
         return char(toupper(c));
  }
  
-// FIXME for lowercase() and uppercase() function below:
+
+// FIXME UNICODE
+// for lowercase() and uppercase() function below when wchar_t is not used:
  // 1) std::tolower() and std::toupper() are templates that
  // compile fine with char_type. With the test (c >= 256) we
  // do not trust these function to do the right thing with
@@ -310,19 +332,27 @@ char uppercase(char c)
  
  char_type lowercase(char_type c)
  {
+#ifdef LIBC_WCTYPE_USES_UCS4
+       return towlower(c);
+#else
         if (c >= 256)
                 return c;
  
         return tolower(c);
+#endif
  }
  
  
  char_type uppercase(char_type c)
  {
+#ifdef LIBC_WCTYPE_USES_UCS4
+       return towupper(c);
+#else
         if (c >= 256)
                 return c;
  
         return toupper(c);
+#endif
  }
  
  
diff --git a/src/support/textutils.h b/src/support/textutils.h

index c0f3850a6c04938ec05e8091a5d9e13a486fd47c..3d70c69df121a5c60c435f3873fbed7b95f26f9d 100644 (file)
--- a/src/support/textutils.h
+++ b/src/support/textutils.h
@@ -17,12 +17,21 @@
  
  #include "support/types.h"
  
+#ifdef LIBC_WCTYPE_USES_UCS4
+// We can use the libc ctype functions because we unset the LC_CTYPE
+// category of the current locale in gettext.C
+#include <wctype.h>
+#else
+// Steal some code from somewhere else, e.g. glib (look at gunicode.h)
+// The code that we currently use does not really work.
+#endif
+
  
  namespace lyx {
  
  /// return true if the char is a line separator
  inline
-bool isLineSeparatorChar(lyx::char_type c)
+bool isLineSeparatorChar(char_type c)
  {
         return c == ' ';
  }
@@ -30,34 +39,55 @@ bool isLineSeparatorChar(lyx::char_type c)
  
  /// return true if a char is alphabetical (including accented chars)
  inline
-bool isLetterChar(lyx::char_type c)
+bool isLetterChar(char_type c)
  {
+#ifdef LIBC_WCTYPE_USES_UCS4
+       return iswalpha(c);
+#else
+       // FIXME UNICODE This is wrong!
         return (c >= 'A' && c <= 'Z')
                 || (c >= 'a' && c <= 'z')
                 || (c >= 192 && c < 256); // in iso-8859-x these are accented chars
+#endif
  }
  
  
-/// return true if the char is printable (masked to 7-bit ASCII)
+/// return true if the char is printable
  inline
-bool isPrintable(lyx::char_type c)
+bool isPrintable(char_type c)
  {
+#ifdef LIBC_WCTYPE_USES_UCS4
+       return iswprint(c);
+#else
+       // FIXME UNICODE This is wrong!
         return (c & 127) >= ' ';
+#endif
  }
  
  
-/// return true if the char is printable and not a space (masked to 7-bit ASCII)
+/// return true if the char is printable and not a space
  inline
-bool isPrintableNonspace(lyx::char_type c)
+bool isPrintableNonspace(char_type c)
  {
+#ifdef LIBC_WCTYPE_USES_UCS4
+       return iswprint(c) && !iswspace(c);
+#else
+       // FIXME UNICODE This is wrong!
         return (c & 127) > ' ';
+#endif
  }
  
+
  /// return true if a unicode char is a digit.
  inline
-bool isDigit(lyx::char_type ch)
+bool isDigit(char_type c)
  {
-       return ch >= '0' && ch <= '9';
+#ifdef LIBC_WCTYPE_USES_UCS4
+       return iswdigit(c);
+#else
+       // FIXME UNICODE This is wrong!
+       return c >= '0' && c <= '9';
+#endif
  }
author	Georg Baum <Georg.Baum@post.rwth-aachen.de>
	Mon, 13 Nov 2006 09:53:25 +0000 (09:53 +0000)
committer	Georg Baum <Georg.Baum@post.rwth-aachen.de>
	Mon, 13 Nov 2006 09:53:25 +0000 (09:53 +0000)
configure.ac		patch \| blob \| history
development/cmake/config.h.cmake		patch \| blob \| history
development/scons/SConstruct		patch \| blob \| history
src/buffer.C		patch \| blob \| history
src/paragraph.C		patch \| blob \| history
src/support/lstrings.C		patch \| blob \| history
src/support/textutils.h		patch \| blob \| history