src/support/lstrings.C

   1 /* This file is part of
   2  * ======================================================
   3  *
   4  *           LyX, The Document Processor
   5  *
   6  *           Copyright 1995 Matthias Ettrich
   7  *           Copyright 1995-2001 The LyX Team.
   8  *
   9  * ====================================================== */
  10
  11 #include <config.h>
  12
  13 #ifdef __GNUG__
  14 #pragma implementation
  15 #endif
  16
  17 #include "LString.h"
  18 #include "lstrings.h"
  19 #include "LAssert.h"
  20 #include "debug.h"
  21
  22 #include <boost/regex.hpp>
  23
  24 #include <algorithm>
  25
  26 #include <cctype>
  27 #include <cstdlib>
  28
  29 using std::count;
  30 using std::transform;
  31 using std::vector;
  32
  33 #ifndef CXX_GLOBAL_CSTD
  34 using std::atof;
  35 using std::isdigit;
  36 using std::strlen;
  37 using std::tolower;
  38 using std::toupper;
  39 #endif
  40
  41
  42 int compare_no_case(string const & s, string const & s2)
  43 {
  44         string::const_iterator p = s.begin();
  45         string::const_iterator p2 = s2.begin();
  46
  47         while (p != s.end() && p2 != s2.end()) {
  48                 int const lc1 = tolower(*p);
  49                 int const lc2 = tolower(*p2);
  50                 if (lc1 != lc2)
  51                         return (lc1 < lc2) ? -1 : 1;
  52                 ++p;
  53                 ++p2;
  54         }
  55
  56         if (s.size() == s2.size())
  57                 return 0;
  58         if (s.size() < s2.size())
  59                 return -1;
  60         return 1;
  61 }
  62
  63
  64 namespace {
  65         int ascii_tolower(int c) {
  66                 if (c >= 'A' && c <= 'Z')
  67                         return c - 'A' + 'a';
  68                 return c;
  69         }
  70 }
  71
  72
  73 int compare_ascii_no_case(string const & s, string const & s2)
  74 {
  75         string::const_iterator p = s.begin();
  76         string::const_iterator p2 = s2.begin();
  77
  78         while (p != s.end() && p2 != s2.end()) {
  79                 int const lc1 = ascii_tolower(*p);
  80                 int const lc2 = ascii_tolower(*p2);
  81                 if (lc1 != lc2)
  82                         return (lc1 < lc2) ? -1 : 1;
  83                 ++p;
  84                 ++p2;
  85         }
  86
  87         if (s.size() == s2.size())
  88                 return 0;
  89         if (s.size() < s2.size())
  90                 return -1;
  91         return 1;
  92 }
  93
  94
  95 int compare_no_case(string const & s, string const & s2, unsigned int len)
  96 {
  97         string::const_iterator p = s.begin();
  98         string::const_iterator p2 = s2.begin();
  99         unsigned int i = 0;
 100         while (i < len && p != s.end() && p2 != s2.end()) {
 101                 int const lc1 = tolower(*p);
 102                 int const lc2 = tolower(*p2);
 103                 if (lc1 != lc2)
 104                         return (lc1 < lc2) ? -1 : 1;
 105                 ++i;
 106                 ++p;
 107                 ++p2;
 108         }
 109
 110         if (s.size() >= len && s2.size() >= len)
 111                 return 0;
 112         if (s.size() < s2.size())
 113                 return -1;
 114         return 1;
 115 }
 116
 117
 118 bool isStrInt(string const & str)
 119 {
 120         if (str.empty()) return false;
 121
 122         // Remove leading and trailing white space chars.
 123         string const tmpstr = frontStrip(strip(str, ' '), ' ');
 124         if (tmpstr.empty()) return false;
 125
 126         string::const_iterator cit = tmpstr.begin();
 127         if ((*cit) == '-') ++cit;
 128         string::const_iterator end = tmpstr.end();
 129         for (; cit != end; ++cit) {
 130                 if (!isdigit((*cit))) return false;
 131         }
 132         return true;
 133 }
 134
 135
 136 bool isStrUnsignedInt(string const & str)
 137 {
 138         if (str.empty()) return false;
 139
 140         // Remove leading and trailing white space chars.
 141         string const tmpstr = frontStrip(strip(str, ' '), ' ');
 142         if (tmpstr.empty()) return false;
 143
 144         string::const_iterator cit = tmpstr.begin();
 145         string::const_iterator end = tmpstr.end();
 146         for (; cit != end; ++cit) {
 147                 if (!isdigit((*cit))) return false;
 148         }
 149         return true;
 150 }
 151
 152
 153 int strToInt(string const & str)
 154 {
 155         if (isStrInt(str)) {
 156                 // Remove leading and trailing white space chars.
 157                 string const tmpstr = frontStrip(strip(str, ' '), ' ');
 158                 // Do the conversion proper.
 159                 return lyx::atoi(tmpstr);
 160         } else {
 161                 return 0;
 162         }
 163 }
 164
 165
 166 unsigned int strToUnsignedInt(string const & str)
 167 {
 168         if (isStrUnsignedInt(str)) {
 169                 // Remove leading and trailing white space chars.
 170                 string const tmpstr = frontStrip(strip(str, ' '), ' ');
 171                 // Do the conversion proper.
 172                 return lyx::atoi(tmpstr);
 173         } else {
 174                 return 0;
 175         }
 176 }
 177
 178
 179 bool isStrDbl(string const & str)
 180 {
 181         if (str.empty()) return false;
 182
 183         // Remove leading and trailing white space chars.
 184         string const tmpstr = frontStrip(strip(str, ' '), ' ');
 185         if (tmpstr.empty()) return false;
 186         //      if (1 < tmpstr.count('.')) return false;
 187
 188         string::const_iterator cit = tmpstr.begin();
 189         bool found_dot(false);
 190         if ((*cit) == '-') ++cit;
 191         string::const_iterator end = tmpstr.end();
 192         for (; cit != end; ++cit) {
 193                 if (!isdigit((*cit))
 194                     && '.' != (*cit)) {
 195                         return false;
 196                 }
 197                 if ('.' == (*cit)) {
 198                         if (found_dot) {
 199                                 return false;
 200                         } else {
 201                                 found_dot = true;
 202                         }
 203                 }
 204         }
 205         return true;
 206 }
 207
 208
 209 double strToDbl(string const & str)
 210 {
 211         if (isStrDbl(str)) {
 212                 // Remove leading and trailing white space chars.
 213                 string const tmpstr = frontStrip(strip(str, ' '), ' ');
 214                 // Do the conversion proper.
 215                 return ::atof(tmpstr.c_str());
 216         } else {
 217                 return 0.0;
 218         }
 219 }
 220
 221
 222 char lowercase(char c)
 223 {
 224         return char(tolower(c));
 225 }
 226
 227
 228 char uppercase(char c)
 229 {
 230         return char(toupper(c));
 231 }
 232
 233
 234 namespace {
 235
 236 // since we cannot use std::tolower and std::toupper directly in the
 237 // calls to std::transform yet, we use these helper clases. (Lgb)
 238
 239 struct local_lowercase {
 240         char operator()(char c) const {
 241                 return tolower(c);
 242         }
 243 };
 244
 245 struct local_uppercase {
 246         char operator()(char c) const {
 247                 return toupper(c);
 248         }
 249 };
 250
 251 } // end of anon namespace
 252
 253 string const lowercase(string const & a)
 254 {
 255         string tmp(a);
 256         transform(tmp.begin(), tmp.end(), tmp.begin(), local_lowercase());
 257         return tmp;
 258 }
 259
 260 string const uppercase(string const & a)
 261 {
 262         string tmp(a);
 263         transform(tmp.begin(), tmp.end(), tmp.begin(), local_uppercase());
 264         return tmp;
 265 }
 266
 267
 268 bool prefixIs(string const & a, char const * pre)
 269 {
 270         lyx::Assert(pre);
 271
 272         size_t const l = strlen(pre);
 273         string::size_type const alen = a.length();
 274
 275         if (l > alen || a.empty())
 276                 return false;
 277         else {
 278 #if !defined(USE_INCLUDED_STRING) && !defined(STD_STRING_IS_GOOD)
 279                 // Delete this code when the compilers get a bit better.
 280                 return ::strncmp(a.c_str(), pre, l) == 0;
 281 #else
 282                 // This is the code that we really want to use
 283                 // but until gcc ships with a basic_string that
 284                 // implements std::string correctly we have to
 285                 // use the code above.
 286                 return a.compare(0, l, pre, l) == 0;
 287 #endif
 288         }
 289 }
 290
 291
 292 bool prefixIs(string const & a, string const & pre)
 293 {
 294         string::size_type const prelen = pre.length();
 295         string::size_type const alen = a.length();
 296
 297         if (prelen > alen || a.empty())
 298                 return false;
 299         else {
 300 #if !defined(USE_INCLUDED_STRING) && !defined(STD_STRING_IS_GOOD)
 301                 return ::strncmp(a.c_str(), pre.c_str(), prelen) == 0;
 302 #else
 303                 return a.compare(0, prelen, pre) == 0;
 304 #endif
 305         }
 306 }
 307
 308
 309 bool suffixIs(string const & a, char c)
 310 {
 311         if (a.empty()) return false;
 312         return a[a.length() - 1] == c;
 313 }
 314
 315
 316 bool suffixIs(string const & a, char const * suf)
 317 {
 318         lyx::Assert(suf);
 319
 320         size_t const suflen = strlen(suf);
 321         string::size_type const alen = a.length();
 322
 323         if (suflen > alen)
 324                 return false;
 325         else {
 326 #if !defined(USE_INCLUDED_STRING) && !defined(STD_STRING_IS_GOOD)
 327                 // Delete this code when the compilers get a bit better.
 328                 string tmp(a, alen - suflen);
 329                 return ::strncmp(tmp.c_str(), suf, suflen) == 0;
 330 #else
 331                 // This is the code that we really want to use
 332                 // but until gcc ships with a basic_string that
 333                 // implements std::string correctly we have to
 334                 // use the code above.
 335                 return a.compare(alen - suflen, suflen, suf) == 0;
 336 #endif
 337         }
 338 }
 339
 340
 341 bool suffixIs(string const & a, string const & suf)
 342 {
 343         string::size_type const suflen = suf.length();
 344         string::size_type const alen = a.length();
 345
 346         if (suflen > alen) {
 347                 return false;
 348         } else {
 349 #if !defined(USE_INCLUDED_STRING) && !defined(STD_STRING_IS_GOOD)
 350                 string tmp(a, alen - suflen);
 351                 return ::strncmp(tmp.c_str(), suf.c_str(), suflen) == 0;
 352 #else
 353                 return a.compare(alen - suflen, suflen, suf) == 0;
 354 #endif
 355         }
 356 }
 357
 358
 359 bool contains(char const * a, string const & b)
 360 {
 361         lyx::Assert(a);
 362         string const at(a);
 363         return contains(at, b);
 364 }
 365
 366
 367 bool contains(string const & a, char const * b)
 368 {
 369         lyx::Assert(b);
 370         string const bt(b);
 371         return contains(a, bt);
 372 }
 373
 374
 375 bool contains(string const & a, string const & b)
 376 {
 377         if (a.empty())
 378                 return false;
 379         return a.find(b) != string::npos;
 380 }
 381
 382
 383 bool contains(string const & a, char b)
 384 {
 385         if (a.empty())
 386                 return false;
 387         return a.find(b) != string::npos;
 388 }
 389
 390
 391 bool contains(char const * a, char const * b)
 392 {
 393         lyx::Assert(a && b);
 394         string const at(a);
 395         string const bt(b);
 396         return contains(at, bt);
 397 }
 398
 399
 400 bool containsOnly(string const & s, char const * cset)
 401 {
 402         lyx::Assert(cset);
 403
 404         return s.find_first_not_of(cset) == string::npos;
 405 }
 406
 407
 408 bool containsOnly(string const & s, string const & cset)
 409 {
 410         return s.find_first_not_of(cset) == string::npos;
 411 }
 412
 413
 414 bool containsOnly(char const * s, char const * cset)
 415 {
 416         lyx::Assert(s && cset);
 417
 418         return string(s).find_first_not_of(cset) == string::npos;
 419 }
 420
 421
 422 bool containsOnly(char const * s, string const & cset)
 423 {
 424         lyx::Assert(s);
 425
 426         return string(s).find_first_not_of(cset) == string::npos;
 427 }
 428
 429
 430 // ale970405+lasgoutt-970425
 431 // rewritten to use new string (Lgb)
 432 string const token(string const & a, char delim, int n)
 433 {
 434         if (a.empty()) return string();
 435
 436         string::size_type k = 0;
 437         string::size_type i = 0;
 438
 439         // Find delimiter or end of string
 440         for (; n--;)
 441                 if ((i = a.find(delim, i)) == string::npos)
 442                         break;
 443                 else
 444                         ++i; // step delim
 445         // i is now the n'th delim (or string::npos)
 446         if (i == string::npos) return string();
 447         k = a.find(delim, i);
 448         // k is now the n'th + 1 delim (or string::npos)
 449
 450         return a.substr(i, k - i);
 451 }
 452
 453
 454 // this could probably be faster and/or cleaner, but it seems to work (JMarc)
 455 // rewritten to use new string (Lgb)
 456 int tokenPos(string const & a, char delim, string const & tok)
 457 {
 458         int i = 0;
 459         string str(a);
 460         string tmptok;
 461
 462         while (!str.empty()) {
 463                 str = split(str, tmptok, delim);
 464                 if (tok == tmptok)
 465                         return i;
 466                 ++i;
 467         }
 468         return -1;
 469 }
 470
 471
 472 bool regexMatch(string const & a, string const & pattern)
 473 {
 474         // We massage the pattern a bit so that the usual
 475         // shell pattern we all are used to will work.
 476         // One nice thing about using a real regex is that
 477         // things like "*.*[^~]" will work also.
 478         // build the regex string.
 479         string regex(pattern);
 480         regex = subst(regex, ".", "\\.");
 481         regex = subst(regex, "*", ".*");
 482         boost::regex reg(regex);
 483         return boost::regex_match(a, reg);
 484 }
 485
 486
 487 string const subst(string const & a, char oldchar, char newchar)
 488 {
 489         string tmp(a);
 490         string::iterator lit = tmp.begin();
 491         string::iterator end = tmp.end();
 492         for (; lit != end; ++lit)
 493                 if ((*lit) == oldchar)
 494                         (*lit) = newchar;
 495         return tmp;
 496 }
 497
 498
 499 string const subst(string const & a,
 500                    char const * oldstr, string const & newstr)
 501 {
 502         lyx::Assert(oldstr);
 503
 504         string lstr(a);
 505         string::size_type i = 0;
 506         string::size_type olen = strlen(oldstr);
 507         while ((i = lstr.find(oldstr, i)) != string::npos) {
 508                 lstr.replace(i, olen, newstr);
 509                 i += newstr.length(); // We need to be sure that we dont
 510                 // use the same i over and over again.
 511         }
 512         return lstr;
 513 }
 514
 515
 516 string const subst(string const & a,
 517                    string const & oldstr, string const & newstr)
 518 {
 519         string lstr(a);
 520         string::size_type i = 0;
 521         string::size_type const olen = oldstr.length();
 522         while ((i = lstr.find(oldstr, i)) != string::npos) {
 523                 lstr.replace(i, olen, newstr);
 524                 i += newstr.length(); // We need to be sure that we dont
 525                 // use the same i over and over again.
 526         }
 527         return lstr;
 528 }
 529
 530
 531 string const strip(string const & a, char c)
 532 {
 533         if (a.empty()) return a;
 534         string tmp(a);
 535         string::size_type i = tmp.find_last_not_of(c);
 536         if (i == a.length() - 1) return tmp; // no c's at end of a
 537         if (i != string::npos)
 538                 tmp.erase(i + 1, string::npos);
 539 #if !defined(USE_INCLUDED_STRING) && !defined(STD_STRING_IS_GOOD)
 540         /// Needed for broken string::find_last_not_of
 541         else if (tmp[0] != c) {
 542                 if (a.length() == 1) return tmp;
 543                 tmp.erase(1, string::npos);
 544         }
 545 #endif
 546         else
 547                 tmp.erase(); // only c in the whole string
 548         return tmp;
 549 }
 550
 551
 552 string const frontStrip(string const & a, char const * p)
 553 {
 554         lyx::Assert(p);
 555
 556         if (a.empty() || !*p) return a;
 557         string tmp(a);
 558         string::size_type i = tmp.find_first_not_of(p);
 559         if (i > 0)
 560                 tmp.erase(0, i);
 561         return tmp;
 562 }
 563
 564
 565 string const frontStrip(string const & a, char c)
 566 {
 567         if (a.empty()) return a;
 568         string tmp(a);
 569         string::size_type i = tmp.find_first_not_of(c);
 570         if (i > 0)
 571                 tmp.erase(0, i);
 572         return tmp;
 573 }
 574
 575
 576 string const split(string const & a, string & piece, char delim)
 577 {
 578         string tmp;
 579         string::size_type i = a.find(delim);
 580         if (i == a.length() - 1) {
 581                 piece = a.substr(0, i);
 582         } else if (i != string::npos) {
 583                 piece = a.substr(0, i);
 584                 tmp = a.substr(i + 1);
 585         } else if (i == 0) {
 586                 piece.erase();
 587                 tmp = a.substr(i + 1);
 588         } else {
 589                 piece = a;
 590         }
 591         return tmp;
 592 }
 593
 594
 595 string const split(string const & a, char delim)
 596 {
 597         string tmp;
 598         string::size_type i = a.find(delim);
 599         if (i != string::npos) // found delim
 600                 tmp = a.substr(i + 1);
 601         return tmp;
 602 }
 603
 604
 605 // ale970521
 606 string const rsplit(string const & a, string & piece, char delim)
 607 {
 608         string tmp;
 609         string::size_type i = a.rfind(delim);
 610         if (i != string::npos) { // delimiter was found
 611                 piece = a.substr(0, i);
 612                 tmp = a.substr(i + 1);
 613         } else { // delimter was not found
 614                 piece.erase();
 615         }
 616         return tmp;
 617 }
 618
 619
 620 // This function escapes 8-bit characters and other problematic
 621 // characters that cause problems in latex labels.
 622 string const escape(string const & lab)
 623 {
 624         char hexdigit[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
 625                               '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
 626         string enc;
 627         for (string::size_type i = 0; i < lab.length(); ++i) {
 628                 unsigned char c= lab[i];
 629                 if (c >= 128 || c == '=' || c == '%') {
 630                         enc += '=';
 631                         enc += hexdigit[c>>4];
 632                         enc += hexdigit[c & 15];
 633                 } else {
 634                         enc += c;
 635                 }
 636         }
 637         return enc;
 638 }
 639
 640
 641 /// gives a vector of stringparts which have the delimiter delim
 642 vector<string> const getVectorFromString(string const & str,
 643                                          string const & delim)
 644 {
 645     vector<string> vec;
 646     if (str.empty())
 647         return vec;
 648     string keys(strip(str));
 649     for(;;) {
 650         string::size_type const idx = keys.find(delim);
 651         if (idx == string::npos) {
 652             vec.push_back(frontStrip(keys));
 653             break;
 654         }
 655         string const key = strip(frontStrip(keys.substr(0, idx)));
 656         if (!key.empty())
 657             vec.push_back(key);
 658         string::size_type const start = idx + delim.size();
 659         keys = keys.substr(start);
 660     }
 661     return vec;
 662 }
 663
 664 // the same vice versa
 665 string const getStringFromVector(vector<string> const & vec,
 666                                  string const & delim)
 667 {
 668         string str;
 669         int i = 0;
 670         for (vector<string>::const_iterator it = vec.begin();
 671              it != vec.end(); ++it) {
 672                 string item = strip(frontStrip(*it));
 673                 if (item.empty()) continue;
 674
 675                 if (i++ > 0) str += delim;
 676                 str += item;
 677         }
 678         return str;
 679 }