#include <algorithm>
#include <sstream>
+#ifdef LIBC_WCTYPE_USES_UCS4
+// We can use the libc ctype functions because we unset the LC_CTYPE
+// category of the current locale in gettext.C
+#include <wctype.h>
+#else
+// Steal some code from somewhere else, e.g. glib (look at gunicode.h)
+// The code that we currently use does not really work.
+#endif
+
+
using lyx::docstring;
using std::transform;
docstring::const_iterator p2 = s2.begin();
while (p != s.end() && p2 != s2.end()) {
- int const lc1 = tolower(*p);
- int const lc2 = tolower(*p2);
+ char_type const lc1 = lowercase(*p);
+ char_type const lc2 = lowercase(*p2);
if (lc1 != lc2)
return (lc1 < lc2) ? -1 : 1;
++p;
namespace {
- int ascii_tolower(int c) {
- if (c >= 'A' && c <= 'Z')
- return c - 'A' + 'a';
- return c;
- }
+
+template<typename Char>
+Char ascii_tolower(Char c) {
+ if (c >= 'A' && c <= 'Z')
+ return c - 'A' + 'a';
+ return c;
+}
+
}
}
+int compare_ascii_no_case(docstring const & s, docstring const & s2)
+{
+ docstring::const_iterator p = s.begin();
+ docstring::const_iterator p2 = s2.begin();
+
+ while (p != s.end() && p2 != s2.end()) {
+ char_type const lc1 = ascii_tolower(*p);
+ char_type const lc2 = ascii_tolower(*p2);
+ if (lc1 != lc2)
+ return (lc1 < lc2) ? -1 : 1;
+ ++p;
+ ++p2;
+ }
+
+ if (s.size() == s2.size())
+ return 0;
+ if (s.size() < s2.size())
+ return -1;
+ return 1;
+}
+
+
int compare_no_case(string const & s, string const & s2, unsigned int len)
{
string::const_iterator p = s.begin();
}
+namespace {
+
+inline
+bool isHexChar(char_type c)
+{
+ return c == '0' ||
+ c == '1' ||
+ c == '2' ||
+ c == '3' ||
+ c == '4' ||
+ c == '5' ||
+ c == '6' ||
+ c == '7' ||
+ c == '8' ||
+ c == '9' ||
+ c == 'a' || c == 'A' ||
+ c == 'b' || c == 'B' ||
+ c == 'c' || c == 'C' ||
+ c == 'd' || c == 'D' ||
+ c == 'e' || c == 'E' ||
+ c == 'f' || c == 'F';
+}
+
+} // anon namespace
+
+
+bool isHex(docstring const & str)
+{
+ int index = 0;
+
+ if (str.length() > 2 && str[0] == '0' &&
+ (str[1] == 'x' || str[1] == 'X'))
+ index = 2;
+
+ int const len = str.length();
+
+ for (; index < len; ++index) {
+ if (!isHexChar(str[index]))
+ return false;
+ }
+ return true;
+}
+
+
+int hexToInt(docstring const & str)
+{
+ string s = to_ascii(str);
+ int h;
+ sscanf(s.c_str(), "%x", &h);
+ return h;
+}
+
+
+bool isAscii(docstring const & str)
+{
+ int const len = str.length();
+ for (int i = 0; i < len; ++i)
+ if (str[i] >= 0x80)
+ return false;
+ return true;
+}
+
+
char lowercase(char c)
{
return char(tolower(c));
return char(toupper(c));
}
-// FIXME for lowercase() and uppercase() function below:
+
+// FIXME UNICODE
+// for lowercase() and uppercase() function below when wchar_t is not used:
// 1) std::tolower() and std::toupper() are templates that
// compile fine with char_type. With the test (c >= 256) we
// do not trust these function to do the right thing with
char_type lowercase(char_type c)
{
+#ifdef LIBC_WCTYPE_USES_UCS4
+ return towlower(c);
+#else
if (c >= 256)
return c;
return tolower(c);
+#endif
}
char_type uppercase(char_type c)
{
+#ifdef LIBC_WCTYPE_USES_UCS4
+ return towupper(c);
+#else
if (c >= 256)
return c;
return toupper(c);
+#endif
}
}
+bool prefixIs(docstring const & a, docstring const & pre)
+{
+ docstring::size_type const prelen = pre.length();
+ docstring::size_type const alen = a.length();
+
+ if (prelen > alen || a.empty())
+ return false;
+ else
+ return a.compare(0, prelen, pre) == 0;
+}
+
+
bool suffixIs(string const & a, char c)
{
if (a.empty()) return false;
namespace {
+/// Substitute all \a oldchar with \a newchar
template<typename Ch> inline
std::basic_string<Ch> const subst_char(std::basic_string<Ch> const & a,
Ch oldchar, Ch newchar)
}
+/// substitutes all instances of \a oldstr with \a newstr
template<typename String> inline
String const subst_string(String const & a,
String const & oldstr, String const & newstr)
}
+docstring const trim(docstring const & a, char const * p)
+{
+ BOOST_ASSERT(p);
+
+ if (a.empty() || !*p)
+ return a;
+
+ docstring s = lyx::from_ascii(p);
+ docstring::size_type r = a.find_last_not_of(s);
+ docstring::size_type l = a.find_first_not_of(s);
+
+ // Is this the minimal test? (lgb)
+ if (r == docstring::npos && l == docstring::npos)
+ return docstring();
+
+ return a.substr(l, r - l + 1);
+}
+
+
string const trim(string const & a, char const * p)
{
BOOST_ASSERT(p);
string const ltrim(string const & a, char const * p)
{
BOOST_ASSERT(p);
-
if (a.empty() || !*p)
return a;
-
string::size_type l = a.find_first_not_of(p);
-
if (l == string::npos)
return string();
-
return a.substr(l, string::npos);
}
-string const split(string const & a, string & piece, char delim)
+docstring const ltrim(docstring const & a, char const * p)
{
- string tmp;
- string::size_type i = a.find(delim);
+ BOOST_ASSERT(p);
+ if (a.empty() || !*p)
+ return a;
+ size_t l = a.find_first_not_of(from_ascii(p));
+ if (l == docstring::npos)
+ return docstring();
+ return a.substr(l, docstring::npos);
+}
+
+namespace {
+
+template<typename String, typename Char> inline
+String const doSplit(String const & a, String & piece, Char delim)
+{
+ String tmp;
+ typename String::size_type i = a.find(delim);
if (i == a.length() - 1) {
piece = a.substr(0, i);
- } else if (i != string::npos) {
+ } else if (i != String::npos) {
piece = a.substr(0, i);
tmp = a.substr(i + 1);
} else if (i == 0) {
return tmp;
}
+}
+
+
+string const split(string const & a, string & piece, char delim)
+{
+ return doSplit(a, piece, delim);
+}
+
+
+docstring const split(docstring const & a, docstring & piece, char_type delim)
+{
+ return doSplit(a, piece, delim);
+}
+
string const split(string const & a, char delim)
{
}
-// This function escapes 8-bit characters and other problematic
-// characters that cause problems in latex labels.
-string const escape(string const & lab)
+docstring const escape(docstring const & lab)
{
- char hexdigit[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
- '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
- string enc;
- for (string::size_type i = 0; i < lab.length(); ++i) {
- unsigned char c= lab[i];
+ char_type hexdigit[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
+ '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
+ docstring enc;
+ for (docstring::size_type i = 0; i < lab.length(); ++i) {
+ char_type c = lab[i];
if (c >= 128 || c == '=' || c == '%') {
+ // Although char_type is a 32 bit type we know that
+ // UCS4 occupies only 21 bits, so we don't need to
+ // encode bigger values. Test for 2^24 because we
+ // can encode that with the 6 hex digits that are
+ // needed for 21 bits anyway.
+ BOOST_ASSERT(c < (1 << 24));
enc += '=';
- enc += hexdigit[c>>4];
- enc += hexdigit[c & 15];
+ enc += hexdigit[(c>>20) & 15];
+ enc += hexdigit[(c>>16) & 15];
+ enc += hexdigit[(c>>12) & 15];
+ enc += hexdigit[(c>> 8) & 15];
+ enc += hexdigit[(c>> 4) & 15];
+ enc += hexdigit[ c & 15];
} else {
enc += c;
}
}
+template<>
docstring bformat(docstring const & fmt, char * arg1)
{
return (boost::basic_format<char_type>(fmt) % arg1).str();
docstring bformat(docstring const & fmt, char * arg1)
{
BOOST_ASSERT(contains(fmt, lyx::from_ascii("%1$s")));
- docstring const str = subst(fmt, lyx::from_ascii("%1$s"), arg1);
+ docstring const str = subst(fmt, lyx::from_ascii("%1$s"), lyx::from_ascii(arg1));
return subst(str, lyx::from_ascii("%%"), lyx::from_ascii("%"));
}
+
+
template<>
docstring bformat(docstring const & fmt, docstring arg1, docstring arg2)
{
{
BOOST_ASSERT(contains(fmt, lyx::from_ascii("%1$s")));
BOOST_ASSERT(contains(fmt, lyx::from_ascii("%2$s")));
- docstring str = subst(fmt, lyx::from_ascii("%1$s"), arg1);
+ docstring str = subst(fmt, lyx::from_ascii("%1$s"), lyx::from_ascii(arg1));
str = subst(fmt, lyx::from_ascii("%2$s"), arg2);
return subst(str, lyx::from_ascii("%%"), lyx::from_ascii("%"));
}