src/support/lstrings.C

   1 /* This file is part of
   2  * ======================================================
   3  *
   4  *           LyX, The Document Processor
   5  *
   6  *           Copyright 1995 Matthias Ettrich
   7  *           Copyright 1995-2001 The LyX Team.
   8  *
   9  * ====================================================== */
  10
  11 #include <config.h>
  12
  13 #ifdef __GNUG__
  14 #pragma implementation
  15 #endif
  16
  17 #include <algorithm>
  18
  19 #include <cctype>
  20 #include <cstdlib>
  21
  22 #include "LString.h"
  23 #include "lstrings.h"
  24 #include "LRegex.h"
  25 #include "LAssert.h"
  26 #include "debug.h"
  27
  28 using std::count;
  29 using std::transform;
  30
  31 #ifndef CXX_GLOBAL_CSTD
  32 using std::tolower;
  33 using std::toupper;
  34 using std::strlen;
  35 #endif
  36
  37
  38 int compare_no_case(string const & s, string const & s2)
  39 {
  40         string::const_iterator p = s.begin();
  41         string::const_iterator p2 = s2.begin();
  42
  43         while (p != s.end() && p2 != s2.end()) {
  44                 int const lc1 = tolower(*p);
  45                 int const lc2 = tolower(*p2);
  46                 if (lc1 != lc2)
  47                         return (lc1 < lc2) ? -1 : 1;
  48                 ++p;
  49                 ++p2;
  50         }
  51
  52         if (s.size() == s2.size())
  53                 return 0;
  54         if (s.size() < s2.size())
  55                 return -1;
  56         return 1;
  57 }
  58
  59
  60 namespace {
  61         int ascii_tolower(int c) {
  62                 if (c >= 'A' && c <= 'Z')
  63                         return c - 'A' + 'a';
  64                 return c;
  65         }
  66 }
  67
  68
  69 int compare_ascii_no_case(string const & s, string const & s2)
  70 {
  71         string::const_iterator p = s.begin();
  72         string::const_iterator p2 = s2.begin();
  73
  74         while (p != s.end() && p2 != s2.end()) {
  75                 int const lc1 = ascii_tolower(*p);
  76                 int const lc2 = ascii_tolower(*p2);
  77                 if (lc1 != lc2)
  78                         return (lc1 < lc2) ? -1 : 1;
  79                 ++p;
  80                 ++p2;
  81         }
  82
  83         if (s.size() == s2.size())
  84                 return 0;
  85         if (s.size() < s2.size())
  86                 return -1;
  87         return 1;
  88 }
  89
  90
  91 int compare_no_case(string const & s, string const & s2, unsigned int len)
  92 {
  93         string::const_iterator p = s.begin();
  94         string::const_iterator p2 = s2.begin();
  95         unsigned int i = 0;
  96         while (i < len && p != s.end() && p2 != s2.end()) {
  97                 int const lc1 = tolower(*p);
  98                 int const lc2 = tolower(*p2);
  99                 if (lc1 != lc2)
 100                         return (lc1 < lc2) ? -1 : 1;
 101                 ++i;
 102                 ++p;
 103                 ++p2;
 104         }
 105
 106         if (s.size() >= len && s2.size() >= len)
 107                 return 0;
 108         if (s.size() < s2.size())
 109                 return -1;
 110         return 1;
 111 }
 112
 113
 114 bool isStrInt(string const & str)
 115 {
 116         if (str.empty()) return false;
 117
 118         // Remove leading and trailing white space chars.
 119         string const tmpstr = frontStrip(strip(str, ' '), ' ');
 120         if (tmpstr.empty()) return false;
 121
 122         string::const_iterator cit = tmpstr.begin();
 123         if ((*cit) == '-') ++cit;
 124         string::const_iterator end = tmpstr.end();
 125         for (; cit != end; ++cit) {
 126                 if (!isdigit((*cit))) return false;
 127         }
 128         return true;
 129 }
 130
 131
 132 bool isStrUnsignedInt(string const & str)
 133 {
 134         if (str.empty()) return false;
 135
 136         // Remove leading and trailing white space chars.
 137         string const tmpstr = frontStrip(strip(str, ' '), ' ');
 138         if (tmpstr.empty()) return false;
 139
 140         string::const_iterator cit = tmpstr.begin();
 141         string::const_iterator end = tmpstr.end();
 142         for (; cit != end; ++cit) {
 143                 if (!isdigit((*cit))) return false;
 144         }
 145         return true;
 146 }
 147
 148
 149 int strToInt(string const & str)
 150 {
 151         if (isStrInt(str)) {
 152                 // Remove leading and trailing white space chars.
 153                 string const tmpstr = frontStrip(strip(str, ' '), ' ');
 154                 // Do the conversion proper.
 155                 return lyx::atoi(tmpstr);
 156         } else {
 157                 return 0;
 158         }
 159 }
 160
 161
 162 unsigned int strToUnsignedInt(string const & str)
 163 {
 164         if (isStrUnsignedInt(str)) {
 165                 // Remove leading and trailing white space chars.
 166                 string const tmpstr = frontStrip(strip(str, ' '), ' ');
 167                 // Do the conversion proper.
 168                 return lyx::atoi(tmpstr);
 169         } else {
 170                 return 0;
 171         }
 172 }
 173
 174
 175 bool isStrDbl(string const & str)
 176 {
 177         if (str.empty()) return false;
 178
 179         // Remove leading and trailing white space chars.
 180         string const tmpstr = frontStrip(strip(str, ' '), ' ');
 181         if (tmpstr.empty()) return false;
 182         //      if (1 < tmpstr.count('.')) return false;
 183
 184         string::const_iterator cit = tmpstr.begin();
 185         bool found_dot(false);
 186         if ((*cit) == '-') ++cit;
 187         string::const_iterator end = tmpstr.end();
 188         for (; cit != end; ++cit) {
 189                 if (!isdigit((*cit))
 190                     && '.' != (*cit)) {
 191                         return false;
 192                 }
 193                 if ('.' == (*cit)) {
 194                         if (found_dot) {
 195                                 return false;
 196                         } else {
 197                                 found_dot = true;
 198                         }
 199                 }
 200         }
 201         return true;
 202 }
 203
 204
 205 double strToDbl(string const & str)
 206 {
 207         if (isStrDbl(str)) {
 208                 // Remove leading and trailing white space chars.
 209                 string const tmpstr = frontStrip(strip(str, ' '), ' ');
 210                 // Do the conversion proper.
 211                 return ::atof(tmpstr.c_str());
 212         } else {
 213                 return 0.0;
 214         }
 215 }
 216
 217
 218 char lowercase(char c)
 219 {
 220         return char(tolower(c));
 221 }
 222
 223
 224 char uppercase(char c)
 225 {
 226         return char(toupper(c));
 227 }
 228
 229
 230 namespace {
 231
 232 // since we cannot use std::tolower and std::toupper directly in the
 233 // calls to std::transform yet, we use these helper clases. (Lgb)
 234
 235 struct local_lowercase {
 236         char operator()(char c) const {
 237                 return tolower(c);
 238         }
 239 };
 240
 241 struct local_uppercase {
 242         char operator()(char c) const {
 243                 return toupper(c);
 244         }
 245 };
 246
 247 } // end of anon namespace
 248
 249 string const lowercase(string const & a)
 250 {
 251         string tmp(a);
 252         transform(tmp.begin(), tmp.end(), tmp.begin(), local_lowercase());
 253         return tmp;
 254 }
 255
 256 string const uppercase(string const & a)
 257 {
 258         string tmp(a);
 259         transform(tmp.begin(), tmp.end(), tmp.begin(), local_uppercase());
 260         return tmp;
 261 }
 262
 263
 264 bool prefixIs(string const & a, char const * pre)
 265 {
 266         lyx::Assert(pre);
 267
 268         size_t const l = strlen(pre);
 269         string::size_type const alen = a.length();
 270
 271         if (l > alen || a.empty())
 272                 return false;
 273         else {
 274 #if !defined(USE_INCLUDED_STRING) && !defined(STD_STRING_IS_GOOD)
 275                 // Delete this code when the compilers get a bit better.
 276                 return ::strncmp(a.c_str(), pre, l) == 0;
 277 #else
 278                 // This is the code that we really want to use
 279                 // but until gcc ships with a basic_string that
 280                 // implements std::string correctly we have to
 281                 // use the code above.
 282                 return a.compare(0, l, pre, l) == 0;
 283 #endif
 284         }
 285 }
 286
 287
 288 bool prefixIs(string const & a, string const & pre)
 289 {
 290         string::size_type const prelen = pre.length();
 291         string::size_type const alen = a.length();
 292
 293         if (prelen > alen || a.empty())
 294                 return false;
 295         else {
 296 #if !defined(USE_INCLUDED_STRING) && !defined(STD_STRING_IS_GOOD)
 297                 return ::strncmp(a.c_str(), pre.c_str(), prelen) == 0;
 298 #else
 299                 return a.compare(0, prelen, pre) == 0;
 300 #endif
 301         }
 302 }
 303
 304
 305 bool suffixIs(string const & a, char c)
 306 {
 307         if (a.empty()) return false;
 308         return a[a.length() - 1] == c;
 309 }
 310
 311
 312 bool suffixIs(string const & a, char const * suf)
 313 {
 314         lyx::Assert(suf);
 315
 316         size_t const suflen = strlen(suf);
 317         string::size_type const alen = a.length();
 318
 319         if (suflen > alen)
 320                 return false;
 321         else {
 322 #if !defined(USE_INCLUDED_STRING) && !defined(STD_STRING_IS_GOOD)
 323                 // Delete this code when the compilers get a bit better.
 324                 string tmp(a, alen - suflen);
 325                 return ::strncmp(tmp.c_str(), suf, suflen) == 0;
 326 #else
 327                 // This is the code that we really want to use
 328                 // but until gcc ships with a basic_string that
 329                 // implements std::string correctly we have to
 330                 // use the code above.
 331                 return a.compare(alen - suflen, suflen, suf) == 0;
 332 #endif
 333         }
 334 }
 335
 336
 337 bool suffixIs(string const & a, string const & suf)
 338 {
 339         string::size_type const suflen = suf.length();
 340         string::size_type const alen = a.length();
 341
 342         if (suflen > alen) {
 343                 return false;
 344         } else {
 345 #if !defined(USE_INCLUDED_STRING) && !defined(STD_STRING_IS_GOOD)
 346                 string tmp(a, alen - suflen);
 347                 return ::strncmp(tmp.c_str(), suf.c_str(), suflen) == 0;
 348 #else
 349                 return a.compare(alen - suflen, suflen, suf) == 0;
 350 #endif
 351         }
 352 }
 353
 354
 355 bool contains(char const * a, string const & b)
 356 {
 357         lyx::Assert(a);
 358         string const at(a);
 359         return contains(at, b);
 360 }
 361
 362
 363 bool contains(string const & a, char const * b)
 364 {
 365         lyx::Assert(b);
 366         string const bt(b);
 367         return contains(a, bt);
 368 }
 369
 370
 371 bool contains(string const & a, string const & b)
 372 {
 373         if (a.empty())
 374                 return false;
 375         return a.find(b) != string::npos;
 376 }
 377
 378
 379 bool contains(string const & a, char b)
 380 {
 381         if (a.empty())
 382                 return false;
 383         return a.find(b) != string::npos;
 384 }
 385
 386
 387 bool contains(char const * a, char const * b)
 388 {
 389         lyx::Assert(a && b);
 390         string const at(a);
 391         string const bt(b);
 392         return contains(at, bt);
 393 }
 394
 395
 396 bool containsOnly(string const & s, char const * cset)
 397 {
 398         lyx::Assert(cset);
 399
 400         return s.find_first_not_of(cset) == string::npos;
 401 }
 402
 403
 404 bool containsOnly(string const & s, string const & cset)
 405 {
 406         return s.find_first_not_of(cset) == string::npos;
 407 }
 408
 409
 410 bool containsOnly(char const * s, char const * cset)
 411 {
 412         lyx::Assert(s && cset);
 413
 414         return string(s).find_first_not_of(cset) == string::npos;
 415 }
 416
 417
 418 bool containsOnly(char const * s, string const & cset)
 419 {
 420         lyx::Assert(s);
 421
 422         return string(s).find_first_not_of(cset) == string::npos;
 423 }
 424
 425
 426 // ale970405+lasgoutt-970425
 427 // rewritten to use new string (Lgb)
 428 string const token(string const & a, char delim, int n)
 429 {
 430         if (a.empty()) return string();
 431
 432         string::size_type k = 0;
 433         string::size_type i = 0;
 434
 435         // Find delimiter or end of string
 436         for (; n--;)
 437                 if ((i = a.find(delim, i)) == string::npos)
 438                         break;
 439                 else
 440                         ++i; // step delim
 441         // i is now the n'th delim (or string::npos)
 442         if (i == string::npos) return string();
 443         k = a.find(delim, i);
 444         // k is now the n'th + 1 delim (or string::npos)
 445
 446         return a.substr(i, k - i);
 447 }
 448
 449
 450 // this could probably be faster and/or cleaner, but it seems to work (JMarc)
 451 // rewritten to use new string (Lgb)
 452 int tokenPos(string const & a, char delim, string const & tok)
 453 {
 454         int i = 0;
 455         string str(a);
 456         string tmptok;
 457
 458         while (!str.empty()) {
 459                 str = split(str, tmptok, delim);
 460                 if (tok == tmptok)
 461                         return i;
 462                 ++i;
 463         }
 464         return -1;
 465 }
 466
 467
 468 bool regexMatch(string const & a, string const & pattern)
 469 {
 470         // We massage the pattern a bit so that the usual
 471         // shell pattern we all are used to will work.
 472         // One nice thing about using a real regex is that
 473         // things like "*.*[^~]" will work also.
 474         // build the regex string.
 475         string regex(pattern);
 476         regex = subst(regex, ".", "\\.");
 477         regex = subst(regex, "*", ".*");
 478         LRegex reg(regex);
 479         return reg.exact_match(a);
 480 }
 481
 482
 483 string const subst(string const & a, char oldchar, char newchar)
 484 {
 485         string tmp(a);
 486         string::iterator lit = tmp.begin();
 487         string::iterator end = tmp.end();
 488         for (; lit != end; ++lit)
 489                 if ((*lit) == oldchar)
 490                         (*lit) = newchar;
 491         return tmp;
 492 }
 493
 494
 495 string const subst(string const & a,
 496                    char const * oldstr, string const & newstr)
 497 {
 498         lyx::Assert(oldstr);
 499
 500         string lstr(a);
 501         string::size_type i = 0;
 502         string::size_type olen = strlen(oldstr);
 503         while ((i = lstr.find(oldstr, i)) != string::npos) {
 504                 lstr.replace(i, olen, newstr);
 505                 i += newstr.length(); // We need to be sure that we dont
 506                 // use the same i over and over again.
 507         }
 508         return lstr;
 509 }
 510
 511
 512 string const subst(string const & a,
 513                    string const & oldstr, string const & newstr)
 514 {
 515         string lstr(a);
 516         string::size_type i = 0;
 517         string::size_type const olen = oldstr.length();
 518         while ((i = lstr.find(oldstr, i)) != string::npos) {
 519                 lstr.replace(i, olen, newstr);
 520                 i += newstr.length(); // We need to be sure that we dont
 521                 // use the same i over and over again.
 522         }
 523         return lstr;
 524 }
 525
 526
 527 string const strip(string const & a, char c)
 528 {
 529         if (a.empty()) return a;
 530         string tmp(a);
 531         string::size_type i = tmp.find_last_not_of(c);
 532         if (i == a.length() - 1) return tmp; // no c's at end of a
 533         if (i != string::npos)
 534                 tmp.erase(i + 1, string::npos);
 535 #if !defined(USE_INCLUDED_STRING) && !defined(STD_STRING_IS_GOOD)
 536         /// Needed for broken string::find_last_not_of
 537         else if (tmp[0] != c) {
 538                 if (a.length() == 1) return tmp;
 539                 tmp.erase(1, string::npos);
 540         }
 541 #endif
 542         else
 543                 tmp.erase(); // only c in the whole string
 544         return tmp;
 545 }
 546
 547
 548 string const frontStrip(string const & a, char const * p)
 549 {
 550         lyx::Assert(p);
 551
 552         if (a.empty() || !*p) return a;
 553         string tmp(a);
 554         string::size_type i = tmp.find_first_not_of(p);
 555         if (i > 0)
 556                 tmp.erase(0, i);
 557         return tmp;
 558 }
 559
 560
 561 string const frontStrip(string const & a, char c)
 562 {
 563         if (a.empty()) return a;
 564         string tmp(a);
 565         string::size_type i = tmp.find_first_not_of(c);
 566         if (i > 0)
 567                 tmp.erase(0, i);
 568         return tmp;
 569 }
 570
 571
 572 string const split(string const & a, string & piece, char delim)
 573 {
 574         string tmp;
 575         string::size_type i = a.find(delim);
 576         if (i == a.length() - 1) {
 577                 piece = a.substr(0, i);
 578         } else if (i != string::npos) {
 579                 piece = a.substr(0, i);
 580                 tmp = a.substr(i + 1);
 581         } else if (i == 0) {
 582                 piece.erase();
 583                 tmp = a.substr(i + 1);
 584         } else {
 585                 piece = a;
 586         }
 587         return tmp;
 588 }
 589
 590
 591 string const split(string const & a, char delim)
 592 {
 593         string tmp;
 594         string::size_type i = a.find(delim);
 595         if (i != string::npos) // found delim
 596                 tmp = a.substr(i + 1);
 597         return tmp;
 598 }
 599
 600
 601 // ale970521
 602 string const rsplit(string const & a, string & piece, char delim)
 603 {
 604         string tmp;
 605         string::size_type i = a.rfind(delim);
 606         if (i != string::npos) { // delimiter was found
 607                 piece = a.substr(0, i);
 608                 tmp = a.substr(i + 1);
 609         } else { // delimter was not found
 610                 piece.erase();
 611         }
 612         return tmp;
 613 }
 614
 615
 616 // This function escapes 8-bit characters and other problematic
 617 // characters that cause problems in latex labels.
 618 string const escape(string const & lab)
 619 {
 620         char hexdigit[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
 621                               '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
 622         string enc;
 623         for (string::size_type i = 0; i < lab.length(); ++i) {
 624                 unsigned char c= lab[i];
 625                 if (c >= 128 || c == '=' || c == '%') {
 626                         enc += '=';
 627                         enc += hexdigit[c>>4];
 628                         enc += hexdigit[c & 15];
 629                 } else {
 630                         enc += c;
 631                 }
 632         }
 633         return enc;
 634 }