hopefully fix tex2lyx linking.

[lyx.git] / src / support / lstrings.C
diff --git a/src/support/lstrings.C b/src/support/lstrings.C

index a142dae8fb7dc50d5d63dcee42c570240ecf36a7..324f33e1ee6a7f9d1c7c24f2eef2cace92c984f9 100644 (file)
--- a/src/support/lstrings.C
+++ b/src/support/lstrings.C
@@ -32,6 +32,16 @@
  #include <algorithm>
  #include <sstream>
  
+#ifdef LIBC_WCTYPE_USES_UCS4
+// We can use the libc ctype functions because we unset the LC_CTYPE
+// category of the current locale in gettext.C
+#include <wctype.h>
+#else
+// Steal some code from somewhere else, e.g. glib (look at gunicode.h)
+// The code that we currently use does not really work.
+#endif
+
+
  using lyx::docstring;
  
  using std::transform;
@@ -76,8 +86,8 @@ int compare_no_case(docstring const & s, docstring const & s2)
         docstring::const_iterator p2 = s2.begin();
  
         while (p != s.end() && p2 != s2.end()) {
-               int const lc1 = tolower(*p);
-               int const lc2 = tolower(*p2);
+               char_type const lc1 = lowercase(*p);
+               char_type const lc2 = lowercase(*p2);
                 if (lc1 != lc2)
                         return (lc1 < lc2) ? -1 : 1;
                 ++p;
@@ -93,11 +103,14 @@ int compare_no_case(docstring const & s, docstring const & s2)
  
  
  namespace {
-       int ascii_tolower(int c) {
-               if (c >= 'A' && c <= 'Z')
-                       return c - 'A' + 'a';
-               return c;
-       }
+
+template<typename Char>
+Char ascii_tolower(Char c) {
+       if (c >= 'A' && c <= 'Z')
+               return c - 'A' + 'a';
+       return c;
+}
+
  }
  
  
@@ -123,6 +136,28 @@ int compare_ascii_no_case(string const & s, string const & s2)
  }
  
  
+int compare_ascii_no_case(docstring const & s, docstring const & s2)
+{
+       docstring::const_iterator p = s.begin();
+       docstring::const_iterator p2 = s2.begin();
+
+       while (p != s.end() && p2 != s2.end()) {
+               char_type const lc1 = ascii_tolower(*p);
+               char_type const lc2 = ascii_tolower(*p2);
+               if (lc1 != lc2)
+                       return (lc1 < lc2) ? -1 : 1;
+               ++p;
+               ++p2;
+       }
+
+       if (s.size() == s2.size())
+               return 0;
+       if (s.size() < s2.size())
+               return -1;
+       return 1;
+}
+
+
  int compare_no_case(string const & s, string const & s2, unsigned int len)
  {
         string::const_iterator p = s.begin();
@@ -211,6 +246,69 @@ bool isStrDbl(string const & str)
  }
  
  
+namespace {
+
+inline
+bool isHexChar(char_type c)
+{
+       return c == '0' ||
+               c == '1' ||
+               c == '2' ||
+               c == '3' ||
+               c == '4' ||
+               c == '5' ||
+               c == '6' ||
+               c == '7' ||
+               c == '8' ||
+               c == '9' ||
+               c == 'a' || c == 'A' ||
+               c == 'b' || c == 'B' ||
+               c == 'c' || c == 'C' ||
+               c == 'd' || c == 'D' ||
+               c == 'e' || c == 'E' ||
+               c == 'f' || c == 'F';
+}
+
+} // anon namespace
+
+
+bool isHex(docstring const & str)
+{
+       int index = 0;
+
+       if (str.length() > 2 && str[0] == '0' &&
+           (str[1] == 'x' || str[1] == 'X'))
+               index = 2;
+
+       int const len = str.length();
+
+       for (; index < len; ++index) {
+               if (!isHexChar(str[index]))
+                       return false;
+       }
+       return true;
+}
+
+
+int hexToInt(docstring const & str)
+{
+       string s = to_ascii(str);
+       int h;
+       sscanf(s.c_str(), "%x", &h);
+       return h;
+}
+
+
+bool isAscii(docstring const & str)
+{
+       int const len = str.length();
+       for (int i = 0; i < len; ++i)
+               if (str[i] >= 0x80)
+                       return false;
+       return true;
+}
+
+
  char lowercase(char c)
  {
         return char(tolower(c));
@@ -222,7 +320,9 @@ char uppercase(char c)
         return char(toupper(c));
  }
  
-// FIXME for lowercase() and uppercase() function below:
+
+// FIXME UNICODE
+// for lowercase() and uppercase() function below when wchar_t is not used:
  // 1) std::tolower() and std::toupper() are templates that
  // compile fine with char_type. With the test (c >= 256) we
  // do not trust these function to do the right thing with
@@ -232,19 +332,27 @@ char uppercase(char c)
  
  char_type lowercase(char_type c)
  {
+#ifdef LIBC_WCTYPE_USES_UCS4
+       return towlower(c);
+#else
         if (c >= 256)
                 return c;
  
         return tolower(c);
+#endif
  }
  
  
  char_type uppercase(char_type c)
  {
+#ifdef LIBC_WCTYPE_USES_UCS4
+       return towupper(c);
+#else
         if (c >= 256)
                 return c;
  
         return toupper(c);
+#endif
  }
  
  
@@ -314,6 +422,18 @@ bool prefixIs(string const & a, string const & pre)
  }
  
  
+bool prefixIs(docstring const & a, docstring const & pre)
+{
+       docstring::size_type const prelen = pre.length();
+       docstring::size_type const alen = a.length();
+
+       if (prelen > alen || a.empty())
+               return false;
+       else
+               return a.compare(0, prelen, pre) == 0;
+}
+
+
  bool suffixIs(string const & a, char c)
  {
         if (a.empty()) return false;
@@ -411,6 +531,7 @@ int tokenPos(string const & a, char delim, string const & tok)
  
  namespace {
  
+/// Substitute all \a oldchar with \a newchar
  template<typename Ch> inline
  std::basic_string<Ch> const subst_char(std::basic_string<Ch> const & a,
                 Ch oldchar, Ch newchar)
@@ -426,6 +547,7 @@ std::basic_string<Ch> const subst_char(std::basic_string<Ch> const & a,
  }
  
  
+/// substitutes all instances of \a oldstr with \a newstr
  template<typename String> inline
  String const subst_string(String const & a,
                 String const & oldstr, String const & newstr)
@@ -472,6 +594,25 @@ docstring const subst(docstring const & a,
  }
  
  
+docstring const trim(docstring const & a, char const * p)
+{
+       BOOST_ASSERT(p);
+
+       if (a.empty() || !*p)
+               return a;
+
+       docstring s = lyx::from_ascii(p);
+       docstring::size_type r = a.find_last_not_of(s);
+       docstring::size_type l = a.find_first_not_of(s);
+
+       // Is this the minimal test? (lgb)
+       if (r == docstring::npos && l == docstring::npos)
+               return docstring();
+
+       return a.substr(l, r - l + 1);
+}
+
+
  string const trim(string const & a, char const * p)
  {
         BOOST_ASSERT(p);
@@ -510,26 +651,36 @@ string const rtrim(string const & a, char const * p)
  string const ltrim(string const & a, char const * p)
  {
         BOOST_ASSERT(p);
-
         if (a.empty() || !*p)
                 return a;
-
         string::size_type l = a.find_first_not_of(p);
-
         if (l == string::npos)
                 return string();
-
         return a.substr(l, string::npos);
  }
  
  
-string const split(string const & a, string & piece, char delim)
+docstring const ltrim(docstring const & a, char const * p)
  {
-       string tmp;
-       string::size_type i = a.find(delim);
+       BOOST_ASSERT(p);
+       if (a.empty() || !*p)
+               return a;
+       size_t l = a.find_first_not_of(from_ascii(p));
+       if (l == docstring::npos)
+               return docstring();
+       return a.substr(l, docstring::npos);
+}
+
+namespace {
+
+template<typename String, typename Char> inline
+String const doSplit(String const & a, String & piece, Char delim)
+{
+       String tmp;
+       typename String::size_type i = a.find(delim);
         if (i == a.length() - 1) {
                 piece = a.substr(0, i);
-       } else if (i != string::npos) {
+       } else if (i != String::npos) {
                 piece = a.substr(0, i);
                 tmp = a.substr(i + 1);
         } else if (i == 0) {
@@ -541,6 +692,20 @@ string const split(string const & a, string & piece, char delim)
         return tmp;
  }
  
+}
+
+
+string const split(string const & a, string & piece, char delim)
+{
+       return doSplit(a, piece, delim);
+}
+
+
+docstring const split(docstring const & a, docstring & piece, char_type delim)
+{
+       return doSplit(a, piece, delim);
+}
+
  
  string const split(string const & a, char delim)
  {
@@ -567,19 +732,27 @@ string const rsplit(string const & a, string & piece, char delim)
  }
  
  
-// This function escapes 8-bit characters and other problematic
-// characters that cause problems in latex labels.
-string const escape(string const & lab)
+docstring const escape(docstring const & lab)
  {
-       char hexdigit[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
-                             '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
-       string enc;
-       for (string::size_type i = 0; i < lab.length(); ++i) {
-               unsigned char c= lab[i];
+       char_type hexdigit[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
+                                  '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
+       docstring enc;
+       for (docstring::size_type i = 0; i < lab.length(); ++i) {
+               char_type c = lab[i];
                 if (c >= 128 || c == '=' || c == '%') {
+                       // Although char_type is a 32 bit type we know that
+                       // UCS4 occupies only 21 bits, so we don't need to
+                       // encode bigger values. Test for 2^24 because we
+                       // can encode that with the 6 hex digits that are
+                       // needed for 21 bits anyway.
+                       BOOST_ASSERT(c < (1 << 24));
                         enc += '=';
-                       enc += hexdigit[c>>4];
-                       enc += hexdigit[c & 15];
+                       enc += hexdigit[(c>>20) & 15];
+                       enc += hexdigit[(c>>16) & 15];
+                       enc += hexdigit[(c>>12) & 15];
+                       enc += hexdigit[(c>> 8) & 15];
+                       enc += hexdigit[(c>> 4) & 15];
+                       enc += hexdigit[ c      & 15];
                 } else {
                         enc += c;
                 }