src/support/lstrings.C

   1 /* This file is part of
   2  * ======================================================
   3  *
   4  *           LyX, The Document Processor
   5  *
   6  *           Copyright 1995 Matthias Ettrich
   7  *           Copyright 1995-2001 The LyX Team.
   8  *
   9  * ====================================================== */
  10
  11 #include <config.h>
  12
  13 #ifdef __GNUG__
  14 #pragma implementation
  15 #endif
  16
  17 #include <algorithm>
  18
  19 #include <cctype>
  20 #include <cstdlib>
  21
  22 #include "LString.h"
  23 #include "lstrings.h"
  24 #include "LRegex.h"
  25 #include "LAssert.h"
  26
  27 using std::count;
  28 using std::transform;
  29
  30 #ifndef CXX_GLOBAL_CSTD
  31 using std::tolower;
  32 using std::toupper;
  33 using std::strlen;
  34 #endif
  35
  36
  37 int compare_no_case(string const & s, string const & s2)
  38 {
  39         string::const_iterator p = s.begin();
  40         string::const_iterator p2 = s2.begin();
  41
  42         while (p != s.end() && p2 != s2.end()) {
  43                 int const lc1 = tolower(*p);
  44                 int const lc2 = tolower(*p2);
  45                 if (lc1 != lc2)
  46                         return (lc1 < lc2) ? -1 : 1;
  47                 ++p;
  48                 ++p2;
  49         }
  50
  51         if (s.size() == s2.size())
  52                 return 0;
  53         if (s.size() < s2.size())
  54                 return -1;
  55         return 1;
  56 }
  57
  58
  59 namespace {
  60         int ascii_tolower(int c) {
  61                 if (c >= 'A' && c <= 'Z')
  62                         return c - 'A' + 'a';
  63                 return c;
  64         }
  65 }
  66
  67
  68 int compare_ascii_no_case(string const & s, string const & s2)
  69 {
  70         string::const_iterator p = s.begin();
  71         string::const_iterator p2 = s2.begin();
  72
  73         while (p != s.end() && p2 != s2.end()) {
  74                 int const lc1 = ascii_tolower(*p);
  75                 int const lc2 = ascii_tolower(*p2);
  76                 if (lc1 != lc2)
  77                         return (lc1 < lc2) ? -1 : 1;
  78                 ++p;
  79                 ++p2;
  80         }
  81
  82         if (s.size() == s2.size())
  83                 return 0;
  84         if (s.size() < s2.size())
  85                 return -1;
  86         return 1;
  87 }
  88
  89
  90 int compare_no_case(string const & s, string const & s2, unsigned int len)
  91 {
  92         string::const_iterator p = s.begin();
  93         string::const_iterator p2 = s2.begin();
  94         unsigned int i = 0;
  95         while (i < len && p != s.end() && p2 != s2.end()) {
  96                 int const lc1 = tolower(*p);
  97                 int const lc2 = tolower(*p2);
  98                 if (lc1 != lc2)
  99                         return (lc1 < lc2) ? -1 : 1;
 100                 ++i;
 101                 ++p;
 102                 ++p2;
 103         }
 104
 105         if (s.size() >= len && s2.size() >= len)
 106                 return 0;
 107         if (s.size() < s2.size())
 108                 return -1;
 109         return 1;
 110 }
 111
 112
 113 bool isStrInt(string const & str)
 114 {
 115         if (str.empty()) return false;
 116
 117         // Remove leading and trailing white space chars.
 118         string const tmpstr = frontStrip(strip(str, ' '), ' ');
 119         if (tmpstr.empty()) return false;
 120
 121         string::const_iterator cit = tmpstr.begin();
 122         if ((*cit) == '-') ++cit;
 123         string::const_iterator end = tmpstr.end();
 124         for (; cit != end; ++cit) {
 125                 if (!isdigit((*cit))) return false;
 126         }
 127         return true;
 128 }
 129
 130
 131 bool isStrUnsignedInt(string const & str)
 132 {
 133         if (str.empty()) return false;
 134
 135         // Remove leading and trailing white space chars.
 136         string const tmpstr = frontStrip(strip(str, ' '), ' ');
 137         if (tmpstr.empty()) return false;
 138
 139         string::const_iterator cit = tmpstr.begin();
 140         string::const_iterator end = tmpstr.end();
 141         for (; cit != end; ++cit) {
 142                 if (!isdigit((*cit))) return false;
 143         }
 144         return true;
 145 }
 146
 147
 148 int strToInt(string const & str)
 149 {
 150         if (isStrInt(str)) {
 151                 // Remove leading and trailing white space chars.
 152                 string const tmpstr = frontStrip(strip(str, ' '), ' ');
 153                 // Do the conversion proper.
 154                 return lyx::atoi(tmpstr);
 155         } else {
 156                 return 0;
 157         }
 158 }
 159
 160
 161 unsigned int strToUnsignedInt(string const & str)
 162 {
 163         if (isStrUnsignedInt(str)) {
 164                 // Remove leading and trailing white space chars.
 165                 string const tmpstr = frontStrip(strip(str, ' '), ' ');
 166                 // Do the conversion proper.
 167                 return lyx::atoi(tmpstr);
 168         } else {
 169                 return 0;
 170         }
 171 }
 172
 173
 174 bool isStrDbl(string const & str)
 175 {
 176         if (str.empty()) return false;
 177
 178         // Remove leading and trailing white space chars.
 179         string const tmpstr = frontStrip(strip(str, ' '), ' ');
 180         if (tmpstr.empty()) return false;
 181         //      if (1 < tmpstr.count('.')) return false;
 182
 183         string::const_iterator cit = tmpstr.begin();
 184         bool found_dot(false);
 185         if ((*cit) == '-') ++cit;
 186         string::const_iterator end = tmpstr.end();
 187         for (; cit != end; ++cit) {
 188                 if (!isdigit((*cit))
 189                     && '.' != (*cit)) {
 190                         return false;
 191                 }
 192                 if ('.' == (*cit)) {
 193                         if (found_dot) {
 194                                 return false;
 195                         } else {
 196                                 found_dot = true;
 197                         }
 198                 }
 199         }
 200         return true;
 201 }
 202
 203
 204 double strToDbl(string const & str)
 205 {
 206         if (isStrDbl(str)) {
 207                 // Remove leading and trailing white space chars.
 208                 string const tmpstr = frontStrip(strip(str, ' '), ' ');
 209                 // Do the conversion proper.
 210                 return ::atof(tmpstr.c_str());
 211         } else {
 212                 return 0.0;
 213         }
 214 }
 215
 216
 217 char lowercase(char c)
 218 {
 219         return char( tolower(c) );
 220 }
 221
 222
 223 char uppercase(char c)
 224 {
 225         return char( toupper(c) );
 226 }
 227
 228
 229 namespace {
 230
 231 // since we cannot use std::tolower and std::toupper directly in the
 232 // calls to std::transform yet, we use these helper clases. (Lgb)
 233
 234 struct local_lowercase {
 235         char operator()(char c) const {
 236                 return tolower(c);
 237         }
 238 };
 239
 240 struct local_uppercase {
 241         char operator()(char c) const {
 242                 return toupper(c);
 243         }
 244 };
 245
 246 } // end of anon namespace
 247
 248 string const lowercase(string const & a)
 249 {
 250         string tmp(a);
 251         transform(tmp.begin(), tmp.end(), tmp.begin(), local_lowercase());
 252         return tmp;
 253 }
 254
 255 string const uppercase(string const & a)
 256 {
 257         string tmp(a);
 258         transform(tmp.begin(), tmp.end(), tmp.begin(), local_uppercase());
 259         return tmp;
 260 }
 261
 262
 263 bool prefixIs(string const & a, char const * pre)
 264 {
 265         lyx::Assert(pre);
 266
 267         size_t const l = strlen(pre);
 268         string::size_type const alen = a.length();
 269
 270         if (l > alen || a.empty())
 271                 return false;
 272         else {
 273 #if !defined(USE_INCLUDED_STRING) && !defined(STD_STRING_IS_GOOD)
 274                 // Delete this code when the compilers get a bit better.
 275                 return ::strncmp(a.c_str(), pre, l) == 0;
 276 #else
 277                 // This is the code that we really want to use
 278                 // but until gcc ships with a basic_string that
 279                 // implements std::string correctly we have to
 280                 // use the code above.
 281                 return a.compare(0, l, pre, l) == 0;
 282 #endif
 283         }
 284 }
 285
 286
 287 bool prefixIs(string const & a, string const & pre)
 288 {
 289         string::size_type const prelen = pre.length();
 290         string::size_type const alen = a.length();
 291
 292         if (prelen > alen || a.empty())
 293                 return false;
 294         else {
 295 #if !defined(USE_INCLUDED_STRING) && !defined(STD_STRING_IS_GOOD)
 296                 return ::strncmp(a.c_str(), pre.c_str(), prelen) == 0;
 297 #else
 298                 return a.compare(0, prelen, pre) == 0;
 299 #endif
 300         }
 301 }
 302
 303
 304 bool suffixIs(string const & a, char c)
 305 {
 306         if (a.empty()) return false;
 307         return a[a.length() - 1] == c;
 308 }
 309
 310
 311 bool suffixIs(string const & a, char const * suf)
 312 {
 313         lyx::Assert(suf);
 314
 315         size_t const suflen = strlen(suf);
 316         string::size_type const alen = a.length();
 317
 318         if (suflen > alen)
 319                 return false;
 320         else {
 321 #if !defined(USE_INCLUDED_STRING) && !defined(STD_STRING_IS_GOOD)
 322                 // Delete this code when the compilers get a bit better.
 323                 string tmp(a, alen - suflen);
 324                 return ::strncmp(tmp.c_str(), suf, suflen) == 0;
 325 #else
 326                 // This is the code that we really want to use
 327                 // but until gcc ships with a basic_string that
 328                 // implements std::string correctly we have to
 329                 // use the code above.
 330                 return a.compare(alen - suflen, suflen, suf) == 0;
 331 #endif
 332         }
 333 }
 334
 335
 336 bool suffixIs(string const & a, string const & suf)
 337 {
 338         string::size_type const suflen = suf.length();
 339         string::size_type const alen = a.length();
 340
 341         if (suflen > alen) {
 342                 return false;
 343         } else {
 344 #if !defined(USE_INCLUDED_STRING) && !defined(STD_STRING_IS_GOOD)
 345                 string tmp(a, alen - suflen);
 346                 return ::strncmp(tmp.c_str(), suf.c_str(), suflen) == 0;
 347 #else
 348                 return a.compare(alen - suflen, suflen, suf) == 0;
 349 #endif
 350         }
 351 }
 352
 353
 354 bool contains(char const * a, string const & b)
 355 {
 356         lyx::Assert(a);
 357         string const at(a);
 358         return contains(at, b);
 359 }
 360
 361
 362 bool contains(string const & a, char const * b)
 363 {
 364         lyx::Assert(b);
 365         string const bt(b);
 366         return contains(a, bt);
 367 }
 368
 369
 370 bool contains(string const & a, string const & b)
 371 {
 372         if (a.empty())
 373                 return false;
 374         return a.find(b) != string::npos;
 375 }
 376
 377
 378 bool contains(string const & a, char b)
 379 {
 380         if (a.empty())
 381                 return false;
 382         return a.find(b) != string::npos;
 383 }
 384
 385
 386 bool contains(char const * a, char const * b)
 387 {
 388         lyx::Assert(a && b);
 389         string const at(a);
 390         string const bt(b);
 391         return contains(at, bt);
 392 }
 393
 394
 395 bool containsOnly(string const & s, char const * cset)
 396 {
 397         lyx::Assert(cset);
 398
 399         return s.find_first_not_of(cset) == string::npos;
 400 }
 401
 402
 403 bool containsOnly(string const & s, string const & cset)
 404 {
 405         return s.find_first_not_of(cset) == string::npos;
 406 }
 407
 408
 409 bool containsOnly(char const * s, char const * cset)
 410 {
 411         lyx::Assert(s && cset);
 412
 413         return string(s).find_first_not_of(cset) == string::npos;
 414 }
 415
 416
 417 bool containsOnly(char const * s, string const & cset)
 418 {
 419         lyx::Assert(s);
 420
 421         return string(s).find_first_not_of(cset) == string::npos;
 422 }
 423
 424
 425 string::size_type countChar(string const & a, char c)
 426 {
 427 #ifdef HAVE_STD_COUNT
 428         return count(a.begin(), a.end(), c);
 429 #else
 430         unsigned int n = 0;
 431         count(a.begin(), a.end(), c, n);
 432         return n;
 433 #endif
 434 }
 435
 436
 437 // ale970405+lasgoutt-970425
 438 // rewritten to use new string (Lgb)
 439 string const token(string const & a, char delim, int n)
 440 {
 441         if (a.empty()) return string();
 442
 443         string::size_type k = 0;
 444         string::size_type i = 0;
 445
 446         // Find delimiter or end of string
 447         for (; n--;)
 448                 if ((i = a.find(delim, i)) == string::npos)
 449                         break;
 450                 else
 451                         ++i; // step delim
 452         // i is now the n'th delim (or string::npos)
 453         if (i == string::npos) return string();
 454         k = a.find(delim, i);
 455         // k is now the n'th + 1 delim (or string::npos)
 456
 457         return a.substr(i, k - i);
 458 }
 459
 460
 461 // this could probably be faster and/or cleaner, but it seems to work (JMarc)
 462 // rewritten to use new string (Lgb)
 463 int tokenPos(string const & a, char delim, string const & tok)
 464 {
 465         int i = 0;
 466         string str(a);
 467         string tmptok;
 468
 469         while (!str.empty()) {
 470                 str = split(str, tmptok, delim);
 471                 if (tok == tmptok)
 472                         return i;
 473                 ++i;
 474         }
 475         return -1;
 476 }
 477
 478
 479 bool regexMatch(string const & a, string const & pattern)
 480 {
 481         // We massage the pattern a bit so that the usual
 482         // shell pattern we all are used to will work.
 483         // One nice thing about using a real regex is that
 484         // things like "*.*[^~]" will work also.
 485         // build the regex string.
 486         string regex(pattern);
 487         regex = subst(regex, ".", "\\.");
 488         regex = subst(regex, "*", ".*");
 489         LRegex reg(regex);
 490         return reg.exact_match(a);
 491 }
 492
 493
 494 string const subst(string const & a, char oldchar, char newchar)
 495 {
 496         string tmp(a);
 497         string::iterator lit = tmp.begin();
 498         string::iterator end = tmp.end();
 499         for (; lit != end; ++lit)
 500                 if ((*lit) == oldchar)
 501                         (*lit) = newchar;
 502         return tmp;
 503 }
 504
 505
 506 string const subst(string const & a,
 507                    char const * oldstr, string const & newstr)
 508 {
 509         lyx::Assert(oldstr);
 510
 511         string lstr(a);
 512         string::size_type i = 0;
 513         string::size_type olen = strlen(oldstr);
 514         while((i = lstr.find(oldstr, i)) != string::npos) {
 515                 lstr.replace(i, olen, newstr);
 516                 i += newstr.length(); // We need to be sure that we dont
 517                 // use the same i over and over again.
 518         }
 519         return lstr;
 520 }
 521
 522
 523 string const subst(string const & a,
 524                    string const & oldstr, string const & newstr)
 525 {
 526         string lstr(a);
 527         string::size_type i = 0;
 528         string::size_type const olen = oldstr.length();
 529         while((i = lstr.find(oldstr, i)) != string::npos) {
 530                 lstr.replace(i, olen, newstr);
 531                 i += newstr.length(); // We need to be sure that we dont
 532                 // use the same i over and over again.
 533         }
 534         return lstr;
 535 }
 536
 537
 538 string const strip(string const & a, char c)
 539 {
 540         if (a.empty()) return a;
 541         string tmp(a);
 542         string::size_type i = tmp.find_last_not_of(c);
 543         if (i == a.length() - 1) return tmp; // no c's at end of a
 544         if (i != string::npos)
 545                 tmp.erase(i + 1, string::npos);
 546 #if !defined(USE_INCLUDED_STRING) && !defined(STD_STRING_IS_GOOD)
 547         /// Needed for broken string::find_last_not_of
 548         else if (tmp[0] != c) {
 549                 if (a.length() == 1) return tmp;
 550                 tmp.erase(1, string::npos);
 551         }
 552 #endif
 553         else
 554                 tmp.erase(); // only c in the whole string
 555         return tmp;
 556 }
 557
 558
 559 string const frontStrip(string const & a, char const * p)
 560 {
 561         lyx::Assert(p);
 562
 563         if (a.empty() || !*p) return a;
 564         string tmp(a);
 565         string::size_type i = tmp.find_first_not_of(p);
 566         if (i > 0)
 567                 tmp.erase(0, i);
 568         return tmp;
 569 }
 570
 571
 572 string const frontStrip(string const & a, char c)
 573 {
 574         if (a.empty()) return a;
 575         string tmp(a);
 576         string::size_type i = tmp.find_first_not_of(c);
 577         if (i > 0)
 578                 tmp.erase(0, i);
 579         return tmp;
 580 }
 581
 582
 583 string const split(string const & a, string & piece, char delim)
 584 {
 585         string tmp;
 586         string::size_type i = a.find(delim);
 587         if (i == a.length() - 1) {
 588                 piece = a.substr(0, i);
 589         } else if (i != string::npos) {
 590                 piece = a.substr(0, i);
 591                 tmp = a.substr(i + 1);
 592         } else if (i == 0) {
 593                 piece.erase();
 594                 tmp = a.substr(i + 1);
 595         } else {
 596                 piece = a;
 597         }
 598         return tmp;
 599 }
 600
 601
 602 string const split(string const & a, char delim)
 603 {
 604         string tmp;
 605         string::size_type i = a.find(delim);
 606         if (i != string::npos) // found delim
 607                 tmp = a.substr(i + 1);
 608         return tmp;
 609 }
 610
 611
 612 // ale970521
 613 string const rsplit(string const & a, string & piece, char delim)
 614 {
 615         string tmp;
 616         string::size_type i = a.rfind(delim);
 617         if (i != string::npos) { // delimiter was found
 618                 piece = a.substr(0, i);
 619                 tmp = a.substr(i + 1);
 620         } else { // delimter was not found
 621                 piece.erase();
 622         }
 623         return tmp;
 624 }
 625
 626
 627 // This function escapes 8-bit characters and other problematic
 628 // characters that cause problems in latex labels.
 629 string const escape(string const & lab)
 630 {
 631         char hexdigit[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
 632                               '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
 633         string enc;
 634         for (string::size_type i = 0; i < lab.length(); ++i) {
 635                 unsigned char c= lab[i];
 636                 if (c >= 128 || c == '=' || c == '%') {
 637                         enc += '=';
 638                         enc += hexdigit[c>>4];
 639                         enc += hexdigit[c & 15];
 640                 } else {
 641                         enc += c;
 642                 }
 643         }
 644         return enc;
 645 }