#include <algorithm>
#include <sstream>
+#ifdef LIBC_WCTYPE_USES_UCS4
+// We can use the libc ctype functions because we unset the LC_CTYPE
+// category of the current locale in gettext.C
+#include <wctype.h>
+#else
+// Steal some code from somewhere else, e.g. glib (look at gunicode.h)
+// The code that we currently use does not really work.
+#endif
+
+
using lyx::docstring;
using std::transform;
docstring::const_iterator p2 = s2.begin();
while (p != s.end() && p2 != s2.end()) {
- int const lc1 = tolower(*p);
- int const lc2 = tolower(*p2);
+ char_type const lc1 = lowercase(*p);
+ char_type const lc2 = lowercase(*p2);
if (lc1 != lc2)
return (lc1 < lc2) ? -1 : 1;
++p;
namespace {
-int ascii_tolower(int c) {
+template<typename Char>
+Char ascii_tolower(Char c) {
if (c >= 'A' && c <= 'Z')
return c - 'A' + 'a';
return c;
}
+}
-template<typename String> inline
-int do_compare_ascii_no_case(String const & s, String const & s2)
+
+int compare_ascii_no_case(string const & s, string const & s2)
{
- typename String::const_iterator p = s.begin();
- typename String::const_iterator p2 = s2.begin();
+ string::const_iterator p = s.begin();
+ string::const_iterator p2 = s2.begin();
while (p != s.end() && p2 != s2.end()) {
int const lc1 = ascii_tolower(*p);
return 1;
}
-}
-
-int compare_ascii_no_case(string const & s, string const & s2)
+int compare_ascii_no_case(docstring const & s, docstring const & s2)
{
- return do_compare_ascii_no_case(s, s2);
-}
+ docstring::const_iterator p = s.begin();
+ docstring::const_iterator p2 = s2.begin();
+ while (p != s.end() && p2 != s2.end()) {
+ char_type const lc1 = ascii_tolower(*p);
+ char_type const lc2 = ascii_tolower(*p2);
+ if (lc1 != lc2)
+ return (lc1 < lc2) ? -1 : 1;
+ ++p;
+ ++p2;
+ }
-int compare_ascii_no_case(docstring const & s, docstring const & s2)
-{
- return do_compare_ascii_no_case(s, s2);
+ if (s.size() == s2.size())
+ return 0;
+ if (s.size() < s2.size())
+ return -1;
+ return 1;
}
}
+namespace {
+
+inline
+bool isHexChar(char_type c)
+{
+ return c == '0' ||
+ c == '1' ||
+ c == '2' ||
+ c == '3' ||
+ c == '4' ||
+ c == '5' ||
+ c == '6' ||
+ c == '7' ||
+ c == '8' ||
+ c == '9' ||
+ c == 'a' || c == 'A' ||
+ c == 'b' || c == 'B' ||
+ c == 'c' || c == 'C' ||
+ c == 'd' || c == 'D' ||
+ c == 'e' || c == 'E' ||
+ c == 'f' || c == 'F';
+}
+
+} // anon namespace
+
+
+bool isHex(docstring const & str)
+{
+ int index = 0;
+
+ if (str.length() > 2 && str[0] == '0' &&
+ (str[1] == 'x' || str[1] == 'X'))
+ index = 2;
+
+ int const len = str.length();
+
+ for (; index < len; ++index) {
+ if (!isHexChar(str[index]))
+ return false;
+ }
+ return true;
+}
+
+
+int hexToInt(docstring const & str)
+{
+ string s = to_ascii(str);
+ int h;
+ sscanf(s.c_str(), "%x", &h);
+ return h;
+}
+
+
+bool isAscii(docstring const & str)
+{
+ int const len = str.length();
+ for (int i = 0; i < len; ++i)
+ if (str[i] >= 0x80)
+ return false;
+ return true;
+}
+
+
char lowercase(char c)
{
return char(tolower(c));
return char(toupper(c));
}
-// FIXME for lowercase() and uppercase() function below:
+
+// FIXME UNICODE
+// for lowercase() and uppercase() function below when wchar_t is not used:
// 1) std::tolower() and std::toupper() are templates that
// compile fine with char_type. With the test (c >= 256) we
// do not trust these function to do the right thing with
char_type lowercase(char_type c)
{
+#ifdef LIBC_WCTYPE_USES_UCS4
+ return towlower(c);
+#else
if (c >= 256)
return c;
return tolower(c);
+#endif
}
char_type uppercase(char_type c)
{
+#ifdef LIBC_WCTYPE_USES_UCS4
+ return towupper(c);
+#else
if (c >= 256)
return c;
return toupper(c);
+#endif
}
string const ltrim(string const & a, char const * p)
{
BOOST_ASSERT(p);
-
if (a.empty() || !*p)
return a;
-
string::size_type l = a.find_first_not_of(p);
-
if (l == string::npos)
return string();
-
return a.substr(l, string::npos);
}
+docstring const ltrim(docstring const & a, char const * p)
+{
+ BOOST_ASSERT(p);
+ if (a.empty() || !*p)
+ return a;
+ size_t l = a.find_first_not_of(from_ascii(p));
+ if (l == docstring::npos)
+ return docstring();
+ return a.substr(l, docstring::npos);
+}
+
namespace {
template<typename String, typename Char> inline
}
-// This function escapes 8-bit characters and other problematic
-// characters that cause problems in latex labels.
docstring const escape(docstring const & lab)
{
- lyx::char_type hexdigit[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
- '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
+ char_type hexdigit[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
+ '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
docstring enc;
for (docstring::size_type i = 0; i < lab.length(); ++i) {
- lyx::char_type c = lab[i];
- // FIXME We must change the following algorithm for UCS4
- // chars, but that will be a file format change.
+ char_type c = lab[i];
if (c >= 128 || c == '=' || c == '%') {
+ // Although char_type is a 32 bit type we know that
+ // UCS4 occupies only 21 bits, so we don't need to
+ // encode bigger values. Test for 2^24 because we
+ // can encode that with the 6 hex digits that are
+ // needed for 21 bits anyway.
+ BOOST_ASSERT(c < (1 << 24));
enc += '=';
- enc += hexdigit[c>>4];
- enc += hexdigit[c & 15];
+ enc += hexdigit[(c>>20) & 15];
+ enc += hexdigit[(c>>16) & 15];
+ enc += hexdigit[(c>>12) & 15];
+ enc += hexdigit[(c>> 8) & 15];
+ enc += hexdigit[(c>> 4) & 15];
+ enc += hexdigit[ c & 15];
} else {
enc += c;
}