From: Georg Baum <Georg.Baum@post.rwth-aachen.de>
Date: Mon, 13 Nov 2006 09:53:25 +0000 (+0000)
Subject: Use wctype character classification functions if possible
X-Git-Tag: 1.6.10~11900
X-Git-Url: https://git.lyx.org/gitweb/?a=commitdiff_plain;h=a116e04b8b75b847bfcb5114cc4d92ce3cc2386a;p=features.git

Use wctype character classification functions if possible

	* src/buffer.C
	(Buffer::insertStringAsLines): Uncomment isPrintable test

	* src/support/lstrings.C
	(compare_no_case): Use char_type and not int for the docstring version
	(ascii_tolower): Convert to a template
	(compare_ascii_no_case): Do not use a template anymore, because we
	need int for the string version and char_type for the docstring
	version as intermediate type
	(lowercase): Use towlower if possible
	(uppercase): Use towupper if possible

	* src/support/textutils.h
	(isLetterChar): Use iswalpha if possible
	(isPrintable): Use iswprint if possible
	(isPrintableNonspace): Use iswprint and iswspace if possible
	(isDigit): Use iswdigit if possible

	* src/paragraph.C
	(Paragraph::asString): remove obsolete FIXME
	(Paragraph::transformChar): add FIXME

	* configure.ac: Add definition of LIBC_WCTYPE_USES_UCS4 to config.h

	* development/cmake/config.h.cmake: ditto

	* development/scons/SConstruct: ditto


git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@15893 a592a061-630c-0410-9148-cb99ea01b6c8
---

diff --git a/configure.ac b/configure.ac
index 8f96cbae63..a8679d4ff5 100644
--- a/configure.ac
+++ b/configure.ac
@@ -383,6 +383,10 @@ int mkstemp(char*);
 #  define WANT_GETFILEATTRIBUTESEX_WRAPPER 1
 #endif
 
+#if defined(HAVE_WCHAR_T) && SIZEOF_WCHAR_T == 4
+#  define LIBC_WCTYPE_USES_UCS4
+#endif
+
 #endif
 ])
 
diff --git a/development/cmake/config.h.cmake b/development/cmake/config.h.cmake
index f08c853ef6..d7f3886f12 100644
--- a/development/cmake/config.h.cmake
+++ b/development/cmake/config.h.cmake
@@ -158,6 +158,10 @@
 #  define WANT_GETFILEATTRIBUTESEX_WRAPPER 1
 #endif
 
+#if defined(HAVE_WCHAR_T) && SIZEOF_WCHAR_T == 4
+#  define LIBC_WCTYPE_USES_UCS4
+#endif
+
 #if defined(MAKE_INTL_LIB) && defined(_MSC_VER)
 #define __attribute__(x)
 #define inline
diff --git a/development/scons/SConstruct b/development/scons/SConstruct
index bca2d69b26..8e23e71b86 100644
--- a/development/scons/SConstruct
+++ b/development/scons/SConstruct
@@ -895,6 +895,10 @@ utils.createConfigFile(conf,
 #  define WANT_GETFILEATTRIBUTESEX_WRAPPER 1
 #endif
 
+#if defined(HAVE_WCHAR_T) && SIZEOF_WCHAR_T == 4
+#  define LIBC_WCTYPE_USES_UCS4
+#endif
+
 #endif
 '''
 )
diff --git a/src/buffer.C b/src/buffer.C
index cf93290a14..32960215b9 100644
--- a/src/buffer.C
+++ b/src/buffer.C
@@ -554,11 +554,9 @@ void Buffer::insertStringAsLines(ParagraphList & pars,
 				}
 				space_inserted = true;
 			}
-/* FIXME: not needed anymore?
 		} else if (!isPrintable(*cit)) {
 			// Ignore unprintables
 			continue;
-*/
 		} else {
 			// just insert the character
 			par.insertChar(pos, *cit, font, params().trackChanges);
diff --git a/src/paragraph.C b/src/paragraph.C
index efb49e8f13..c0c53c3e52 100644
--- a/src/paragraph.C
+++ b/src/paragraph.C
@@ -1392,7 +1392,6 @@ docstring const Paragraph::asString(Buffer const & buffer,
 
 	for (pos_type i = beg; i < end; ++i) {
 		value_type const c = getUChar(buffer.params(), i);
-		// FIXME: isPrintable does not work for lyx::char_type
 		if (isPrintable(c))
 			os.put(c);
 		else if (c == META_INSET)
@@ -1570,6 +1569,7 @@ char_type Paragraph::transformChar(char_type c, pos_type pos) const
 {
 	if (!Encodings::is_arabic(c))
 		if (lyxrc.font_norm_type == LyXRC::ISO_8859_6_8 && isDigit(c))
+			// FIXME UNICODE What does this do?
 			return c + (0xb0 - '0');
 		else
 			return c;
diff --git a/src/support/lstrings.C b/src/support/lstrings.C
index 9761da6d34..324f33e1ee 100644
--- a/src/support/lstrings.C
+++ b/src/support/lstrings.C
@@ -32,6 +32,16 @@
 #include <algorithm>
 #include <sstream>
 
+#ifdef LIBC_WCTYPE_USES_UCS4
+// We can use the libc ctype functions because we unset the LC_CTYPE
+// category of the current locale in gettext.C
+#include <wctype.h>
+#else
+// Steal some code from somewhere else, e.g. glib (look at gunicode.h)
+// The code that we currently use does not really work.
+#endif
+
+
 using lyx::docstring;
 
 using std::transform;
@@ -76,8 +86,8 @@ int compare_no_case(docstring const & s, docstring const & s2)
 	docstring::const_iterator p2 = s2.begin();
 
 	while (p != s.end() && p2 != s2.end()) {
-		int const lc1 = tolower(*p);
-		int const lc2 = tolower(*p2);
+		char_type const lc1 = lowercase(*p);
+		char_type const lc2 = lowercase(*p2);
 		if (lc1 != lc2)
 			return (lc1 < lc2) ? -1 : 1;
 		++p;
@@ -94,18 +104,20 @@ int compare_no_case(docstring const & s, docstring const & s2)
 
 namespace {
 
-int ascii_tolower(int c) {
+template<typename Char>
+Char ascii_tolower(Char c) {
 	if (c >= 'A' && c <= 'Z')
 		return c - 'A' + 'a';
 	return c;
 }
 
+}
 
-template<typename String> inline
-int do_compare_ascii_no_case(String const & s, String const & s2)
+
+int compare_ascii_no_case(string const & s, string const & s2)
 {
-	typename String::const_iterator p = s.begin();
-	typename String::const_iterator p2 = s2.begin();
+	string::const_iterator p = s.begin();
+	string::const_iterator p2 = s2.begin();
 
 	while (p != s.end() && p2 != s2.end()) {
 		int const lc1 = ascii_tolower(*p);
@@ -123,18 +135,26 @@ int do_compare_ascii_no_case(String const & s, String const & s2)
 	return 1;
 }
 
-}
 
-
-int compare_ascii_no_case(string const & s, string const & s2)
+int compare_ascii_no_case(docstring const & s, docstring const & s2)
 {
-	return do_compare_ascii_no_case(s, s2);
-}
+	docstring::const_iterator p = s.begin();
+	docstring::const_iterator p2 = s2.begin();
 
+	while (p != s.end() && p2 != s2.end()) {
+		char_type const lc1 = ascii_tolower(*p);
+		char_type const lc2 = ascii_tolower(*p2);
+		if (lc1 != lc2)
+			return (lc1 < lc2) ? -1 : 1;
+		++p;
+		++p2;
+	}
 
-int compare_ascii_no_case(docstring const & s, docstring const & s2)
-{
-	return do_compare_ascii_no_case(s, s2);
+	if (s.size() == s2.size())
+		return 0;
+	if (s.size() < s2.size())
+		return -1;
+	return 1;
 }
 
 
@@ -300,7 +320,9 @@ char uppercase(char c)
 	return char(toupper(c));
 }
 
-// FIXME for lowercase() and uppercase() function below:
+
+// FIXME UNICODE
+// for lowercase() and uppercase() function below when wchar_t is not used:
 // 1) std::tolower() and std::toupper() are templates that
 // compile fine with char_type. With the test (c >= 256) we
 // do not trust these function to do the right thing with
@@ -310,19 +332,27 @@ char uppercase(char c)
 
 char_type lowercase(char_type c)
 {
+#ifdef LIBC_WCTYPE_USES_UCS4
+	return towlower(c);
+#else
 	if (c >= 256)
 		return c;
 
 	return tolower(c);
+#endif
 }
 
 
 char_type uppercase(char_type c)
 {
+#ifdef LIBC_WCTYPE_USES_UCS4
+	return towupper(c);
+#else
 	if (c >= 256)
 		return c;
 
 	return toupper(c);
+#endif
 }
 
 
diff --git a/src/support/textutils.h b/src/support/textutils.h
index c0f3850a6c..3d70c69df1 100644
--- a/src/support/textutils.h
+++ b/src/support/textutils.h
@@ -17,12 +17,21 @@
 
 #include "support/types.h"
 
+#ifdef LIBC_WCTYPE_USES_UCS4
+// We can use the libc ctype functions because we unset the LC_CTYPE
+// category of the current locale in gettext.C
+#include <wctype.h>
+#else
+// Steal some code from somewhere else, e.g. glib (look at gunicode.h)
+// The code that we currently use does not really work.
+#endif
+
 
 namespace lyx {
 
 /// return true if the char is a line separator
 inline
-bool isLineSeparatorChar(lyx::char_type c)
+bool isLineSeparatorChar(char_type c)
 {
 	return c == ' ';
 }
@@ -30,34 +39,55 @@ bool isLineSeparatorChar(lyx::char_type c)
 
 /// return true if a char is alphabetical (including accented chars)
 inline
-bool isLetterChar(lyx::char_type c)
+bool isLetterChar(char_type c)
 {
+#ifdef LIBC_WCTYPE_USES_UCS4
+	return iswalpha(c);
+#else
+	// FIXME UNICODE This is wrong!
 	return (c >= 'A' && c <= 'Z')
 		|| (c >= 'a' && c <= 'z')
 		|| (c >= 192 && c < 256); // in iso-8859-x these are accented chars
+#endif
 }
 
 
-/// return true if the char is printable (masked to 7-bit ASCII)
+/// return true if the char is printable
 inline
-bool isPrintable(lyx::char_type c)
+bool isPrintable(char_type c)
 {
+#ifdef LIBC_WCTYPE_USES_UCS4
+	return iswprint(c);
+#else
+	// FIXME UNICODE This is wrong!
 	return (c & 127) >= ' ';
+#endif
 }
 
 
-/// return true if the char is printable and not a space (masked to 7-bit ASCII)
+/// return true if the char is printable and not a space
 inline
-bool isPrintableNonspace(lyx::char_type c)
+bool isPrintableNonspace(char_type c)
 {
+#ifdef LIBC_WCTYPE_USES_UCS4
+	return iswprint(c) && !iswspace(c);
+#else
+	// FIXME UNICODE This is wrong!
 	return (c & 127) > ' ';
+#endif
 }
 
+
 /// return true if a unicode char is a digit.
 inline
-bool isDigit(lyx::char_type ch)
+bool isDigit(char_type c)
 {
-	return ch >= '0' && ch <= '9';
+#ifdef LIBC_WCTYPE_USES_UCS4
+	return iswdigit(c);
+#else
+	// FIXME UNICODE This is wrong!
+	return c >= '0' && c <= '9';
+#endif
 }