src/support/lstrings.C

   1 /* This file is part of
   2  * ======================================================
   3  *
   4  *           LyX, The Document Processor
   5  *
   6  *           Copyright 1995 Matthias Ettrich
   7  *           Copyright 1995-2001 The LyX Team.
   8  *
   9  * ====================================================== */
  10
  11 #include <config.h>
  12
  13 #ifdef __GNUG__
  14 #pragma implementation
  15 #endif
  16
  17 #include "LString.h"
  18 #include "lstrings.h"
  19 #include "LAssert.h"
  20 #include "debug.h"
  21
  22 #include <boost/regex.hpp>
  23
  24 #include <algorithm>
  25
  26 #include <cctype>
  27 #include <cstdlib>
  28
  29 using std::count;
  30 using std::transform;
  31 using std::vector;
  32
  33 #ifndef CXX_GLOBAL_CSTD
  34 using std::atof;
  35 using std::isdigit;
  36 using std::strlen;
  37 using std::tolower;
  38 using std::toupper;
  39 #endif
  40
  41
  42 int compare_no_case(string const & s, string const & s2)
  43 {
  44         string::const_iterator p = s.begin();
  45         string::const_iterator p2 = s2.begin();
  46
  47         while (p != s.end() && p2 != s2.end()) {
  48                 int const lc1 = tolower(*p);
  49                 int const lc2 = tolower(*p2);
  50                 if (lc1 != lc2)
  51                         return (lc1 < lc2) ? -1 : 1;
  52                 ++p;
  53                 ++p2;
  54         }
  55
  56         if (s.size() == s2.size())
  57                 return 0;
  58         if (s.size() < s2.size())
  59                 return -1;
  60         return 1;
  61 }
  62
  63
  64 namespace {
  65         int ascii_tolower(int c) {
  66                 if (c >= 'A' && c <= 'Z')
  67                         return c - 'A' + 'a';
  68                 return c;
  69         }
  70 }
  71
  72
  73 int compare_ascii_no_case(string const & s, string const & s2)
  74 {
  75         string::const_iterator p = s.begin();
  76         string::const_iterator p2 = s2.begin();
  77
  78         while (p != s.end() && p2 != s2.end()) {
  79                 int const lc1 = ascii_tolower(*p);
  80                 int const lc2 = ascii_tolower(*p2);
  81                 if (lc1 != lc2)
  82                         return (lc1 < lc2) ? -1 : 1;
  83                 ++p;
  84                 ++p2;
  85         }
  86
  87         if (s.size() == s2.size())
  88                 return 0;
  89         if (s.size() < s2.size())
  90                 return -1;
  91         return 1;
  92 }
  93
  94
  95 int compare_no_case(string const & s, string const & s2, unsigned int len)
  96 {
  97         string::const_iterator p = s.begin();
  98         string::const_iterator p2 = s2.begin();
  99         unsigned int i = 0;
 100         while (i < len && p != s.end() && p2 != s2.end()) {
 101                 int const lc1 = tolower(*p);
 102                 int const lc2 = tolower(*p2);
 103                 if (lc1 != lc2)
 104                         return (lc1 < lc2) ? -1 : 1;
 105                 ++i;
 106                 ++p;
 107                 ++p2;
 108         }
 109
 110         if (s.size() >= len && s2.size() >= len)
 111                 return 0;
 112         if (s.size() < s2.size())
 113                 return -1;
 114         return 1;
 115 }
 116
 117
 118 bool isStrInt(string const & str)
 119 {
 120         if (str.empty()) return false;
 121
 122         // Remove leading and trailing white space chars.
 123         string const tmpstr = frontStrip(strip(str, ' '), ' ');
 124         if (tmpstr.empty()) return false;
 125
 126         string::const_iterator cit = tmpstr.begin();
 127         if ((*cit) == '-') ++cit;
 128         string::const_iterator end = tmpstr.end();
 129         for (; cit != end; ++cit) {
 130                 if (!isdigit((*cit))) return false;
 131         }
 132         return true;
 133 }
 134
 135
 136 bool isStrUnsignedInt(string const & str)
 137 {
 138         if (str.empty()) return false;
 139
 140         // Remove leading and trailing white space chars.
 141         string const tmpstr = frontStrip(strip(str, ' '), ' ');
 142         if (tmpstr.empty()) return false;
 143
 144         string::const_iterator cit = tmpstr.begin();
 145         string::const_iterator end = tmpstr.end();
 146         for (; cit != end; ++cit) {
 147                 if (!isdigit((*cit))) return false;
 148         }
 149         return true;
 150 }
 151
 152
 153 int strToInt(string const & str)
 154 {
 155         if (isStrInt(str)) {
 156                 // Remove leading and trailing white space chars.
 157                 string const tmpstr = frontStrip(strip(str, ' '), ' ');
 158                 // Do the conversion proper.
 159                 return lyx::atoi(tmpstr);
 160         } else {
 161                 return 0;
 162         }
 163 }
 164
 165
 166 unsigned int strToUnsignedInt(string const & str)
 167 {
 168         if (isStrUnsignedInt(str)) {
 169                 // Remove leading and trailing white space chars.
 170                 string const tmpstr = frontStrip(strip(str, ' '), ' ');
 171                 // Do the conversion proper.
 172                 return lyx::atoi(tmpstr);
 173         } else {
 174                 return 0;
 175         }
 176 }
 177
 178
 179 bool isStrDbl(string const & str)
 180 {
 181         if (str.empty()) return false;
 182
 183         // Remove leading and trailing white space chars.
 184         string const tmpstr = frontStrip(strip(str, ' '), ' ');
 185         if (tmpstr.empty()) return false;
 186         //      if (1 < tmpstr.count('.')) return false;
 187
 188         string::const_iterator cit = tmpstr.begin();
 189         bool found_dot(false);
 190         if ((*cit) == '-') ++cit;
 191         string::const_iterator end = tmpstr.end();
 192         for (; cit != end; ++cit) {
 193                 if (!isdigit((*cit))
 194                     && '.' != (*cit)) {
 195                         return false;
 196                 }
 197                 if ('.' == (*cit)) {
 198                         if (found_dot) {
 199                                 return false;
 200                         } else {
 201                                 found_dot = true;
 202                         }
 203                 }
 204         }
 205         return true;
 206 }
 207
 208
 209 double strToDbl(string const & str)
 210 {
 211         if (isStrDbl(str)) {
 212                 // Remove leading and trailing white space chars.
 213                 string const tmpstr = frontStrip(strip(str, ' '), ' ');
 214                 // Do the conversion proper.
 215                 return ::atof(tmpstr.c_str());
 216         } else {
 217                 return 0.0;
 218         }
 219 }
 220
 221
 222 char lowercase(char c)
 223 {
 224         return char(tolower(c));
 225 }
 226
 227
 228 char uppercase(char c)
 229 {
 230         return char(toupper(c));
 231 }
 232
 233
 234 namespace {
 235
 236 // since we cannot use std::tolower and std::toupper directly in the
 237 // calls to std::transform yet, we use these helper clases. (Lgb)
 238
 239 struct local_lowercase {
 240         char operator()(char c) const {
 241                 return tolower(c);
 242         }
 243 };
 244
 245 struct local_uppercase {
 246         char operator()(char c) const {
 247                 return toupper(c);
 248         }
 249 };
 250
 251 struct local_ascii_lowercase {
 252         char operator()(char c) const {
 253                 return ascii_tolower(c);
 254         }
 255 };
 256
 257 } // end of anon namespace
 258
 259 string const lowercase(string const & a)
 260 {
 261         string tmp(a);
 262         transform(tmp.begin(), tmp.end(), tmp.begin(), local_lowercase());
 263         return tmp;
 264 }
 265
 266 string const uppercase(string const & a)
 267 {
 268         string tmp(a);
 269         transform(tmp.begin(), tmp.end(), tmp.begin(), local_uppercase());
 270         return tmp;
 271 }
 272
 273
 274 string const ascii_lowercase(string const & a)
 275 {
 276         string tmp(a);
 277         transform(tmp.begin(), tmp.end(), tmp.begin(),
 278                   local_ascii_lowercase());
 279         return tmp;
 280 }
 281
 282
 283 bool prefixIs(string const & a, char const * pre)
 284 {
 285         lyx::Assert(pre);
 286
 287         size_t const l = strlen(pre);
 288         string::size_type const alen = a.length();
 289
 290         if (l > alen || a.empty())
 291                 return false;
 292         else {
 293 #if !defined(USE_INCLUDED_STRING) && !defined(STD_STRING_IS_GOOD)
 294                 // Delete this code when the compilers get a bit better.
 295                 return ::strncmp(a.c_str(), pre, l) == 0;
 296 #else
 297                 // This is the code that we really want to use
 298                 // but until gcc ships with a basic_string that
 299                 // implements std::string correctly we have to
 300                 // use the code above.
 301                 return a.compare(0, l, pre, l) == 0;
 302 #endif
 303         }
 304 }
 305
 306
 307 bool prefixIs(string const & a, string const & pre)
 308 {
 309         string::size_type const prelen = pre.length();
 310         string::size_type const alen = a.length();
 311
 312         if (prelen > alen || a.empty())
 313                 return false;
 314         else {
 315 #if !defined(USE_INCLUDED_STRING) && !defined(STD_STRING_IS_GOOD)
 316                 return ::strncmp(a.c_str(), pre.c_str(), prelen) == 0;
 317 #else
 318                 return a.compare(0, prelen, pre) == 0;
 319 #endif
 320         }
 321 }
 322
 323
 324 bool suffixIs(string const & a, char c)
 325 {
 326         if (a.empty()) return false;
 327         return a[a.length() - 1] == c;
 328 }
 329
 330
 331 bool suffixIs(string const & a, char const * suf)
 332 {
 333         lyx::Assert(suf);
 334
 335         size_t const suflen = strlen(suf);
 336         string::size_type const alen = a.length();
 337
 338         if (suflen > alen)
 339                 return false;
 340         else {
 341 #if !defined(USE_INCLUDED_STRING) && !defined(STD_STRING_IS_GOOD)
 342                 // Delete this code when the compilers get a bit better.
 343                 string tmp(a, alen - suflen);
 344                 return ::strncmp(tmp.c_str(), suf, suflen) == 0;
 345 #else
 346                 // This is the code that we really want to use
 347                 // but until gcc ships with a basic_string that
 348                 // implements std::string correctly we have to
 349                 // use the code above.
 350                 return a.compare(alen - suflen, suflen, suf) == 0;
 351 #endif
 352         }
 353 }
 354
 355
 356 bool suffixIs(string const & a, string const & suf)
 357 {
 358         string::size_type const suflen = suf.length();
 359         string::size_type const alen = a.length();
 360
 361         if (suflen > alen) {
 362                 return false;
 363         } else {
 364 #if !defined(USE_INCLUDED_STRING) && !defined(STD_STRING_IS_GOOD)
 365                 string tmp(a, alen - suflen);
 366                 return ::strncmp(tmp.c_str(), suf.c_str(), suflen) == 0;
 367 #else
 368                 return a.compare(alen - suflen, suflen, suf) == 0;
 369 #endif
 370         }
 371 }
 372
 373
 374 bool contains(char const * a, string const & b)
 375 {
 376         lyx::Assert(a);
 377         string const at(a);
 378         return contains(at, b);
 379 }
 380
 381
 382 bool contains(string const & a, char const * b)
 383 {
 384         lyx::Assert(b);
 385         string const bt(b);
 386         return contains(a, bt);
 387 }
 388
 389
 390 bool contains(string const & a, string const & b)
 391 {
 392         if (a.empty())
 393                 return false;
 394         return a.find(b) != string::npos;
 395 }
 396
 397
 398 bool contains(string const & a, char b)
 399 {
 400         if (a.empty())
 401                 return false;
 402         return a.find(b) != string::npos;
 403 }
 404
 405
 406 bool contains(char const * a, char const * b)
 407 {
 408         lyx::Assert(a && b);
 409         string const at(a);
 410         string const bt(b);
 411         return contains(at, bt);
 412 }
 413
 414
 415 bool containsOnly(string const & s, char const * cset)
 416 {
 417         lyx::Assert(cset);
 418
 419         return s.find_first_not_of(cset) == string::npos;
 420 }
 421
 422
 423 bool containsOnly(string const & s, string const & cset)
 424 {
 425         return s.find_first_not_of(cset) == string::npos;
 426 }
 427
 428
 429 bool containsOnly(char const * s, char const * cset)
 430 {
 431         lyx::Assert(s && cset);
 432
 433         return string(s).find_first_not_of(cset) == string::npos;
 434 }
 435
 436
 437 bool containsOnly(char const * s, string const & cset)
 438 {
 439         lyx::Assert(s);
 440
 441         return string(s).find_first_not_of(cset) == string::npos;
 442 }
 443
 444
 445 // ale970405+lasgoutt-970425
 446 // rewritten to use new string (Lgb)
 447 string const token(string const & a, char delim, int n)
 448 {
 449         if (a.empty()) return string();
 450
 451         string::size_type k = 0;
 452         string::size_type i = 0;
 453
 454         // Find delimiter or end of string
 455         for (; n--;)
 456                 if ((i = a.find(delim, i)) == string::npos)
 457                         break;
 458                 else
 459                         ++i; // step delim
 460         // i is now the n'th delim (or string::npos)
 461         if (i == string::npos) return string();
 462         k = a.find(delim, i);
 463         // k is now the n'th + 1 delim (or string::npos)
 464
 465         return a.substr(i, k - i);
 466 }
 467
 468
 469 // this could probably be faster and/or cleaner, but it seems to work (JMarc)
 470 // rewritten to use new string (Lgb)
 471 int tokenPos(string const & a, char delim, string const & tok)
 472 {
 473         int i = 0;
 474         string str(a);
 475         string tmptok;
 476
 477         while (!str.empty()) {
 478                 str = split(str, tmptok, delim);
 479                 if (tok == tmptok)
 480                         return i;
 481                 ++i;
 482         }
 483         return -1;
 484 }
 485
 486
 487 bool regexMatch(string const & a, string const & pattern)
 488 {
 489         // We massage the pattern a bit so that the usual
 490         // shell pattern we all are used to will work.
 491         // One nice thing about using a real regex is that
 492         // things like "*.*[^~]" will work also.
 493         // build the regex string.
 494         string regex(pattern);
 495         regex = subst(regex, ".", "\\.");
 496         regex = subst(regex, "*", ".*");
 497         boost::regex reg(regex);
 498         return boost::regex_match(a, reg);
 499 }
 500
 501
 502 string const subst(string const & a, char oldchar, char newchar)
 503 {
 504         string tmp(a);
 505         string::iterator lit = tmp.begin();
 506         string::iterator end = tmp.end();
 507         for (; lit != end; ++lit)
 508                 if ((*lit) == oldchar)
 509                         (*lit) = newchar;
 510         return tmp;
 511 }
 512
 513
 514 string const subst(string const & a,
 515                    char const * oldstr, string const & newstr)
 516 {
 517         lyx::Assert(oldstr);
 518
 519         string lstr(a);
 520         string::size_type i = 0;
 521         string::size_type olen = strlen(oldstr);
 522         while ((i = lstr.find(oldstr, i)) != string::npos) {
 523                 lstr.replace(i, olen, newstr);
 524                 i += newstr.length(); // We need to be sure that we dont
 525                 // use the same i over and over again.
 526         }
 527         return lstr;
 528 }
 529
 530
 531 string const subst(string const & a,
 532                    string const & oldstr, string const & newstr)
 533 {
 534         string lstr(a);
 535         string::size_type i = 0;
 536         string::size_type const olen = oldstr.length();
 537         while ((i = lstr.find(oldstr, i)) != string::npos) {
 538                 lstr.replace(i, olen, newstr);
 539                 i += newstr.length(); // We need to be sure that we dont
 540                 // use the same i over and over again.
 541         }
 542         return lstr;
 543 }
 544
 545
 546 string const strip(string const & a, char c)
 547 {
 548         if (a.empty()) return a;
 549         string tmp(a);
 550         string::size_type i = tmp.find_last_not_of(c);
 551         if (i == a.length() - 1) return tmp; // no c's at end of a
 552         if (i != string::npos)
 553                 tmp.erase(i + 1, string::npos);
 554 #if !defined(USE_INCLUDED_STRING) && !defined(STD_STRING_IS_GOOD)
 555         /// Needed for broken string::find_last_not_of
 556         else if (tmp[0] != c) {
 557                 if (a.length() == 1) return tmp;
 558                 tmp.erase(1, string::npos);
 559         }
 560 #endif
 561         else
 562                 tmp.erase(); // only c in the whole string
 563         return tmp;
 564 }
 565
 566
 567 string const frontStrip(string const & a, char const * p)
 568 {
 569         lyx::Assert(p);
 570
 571         if (a.empty() || !*p) return a;
 572         string tmp(a);
 573         string::size_type i = tmp.find_first_not_of(p);
 574         if (i > 0)
 575                 tmp.erase(0, i);
 576         return tmp;
 577 }
 578
 579
 580 string const frontStrip(string const & a, char c)
 581 {
 582         if (a.empty()) return a;
 583         string tmp(a);
 584         string::size_type i = tmp.find_first_not_of(c);
 585         if (i > 0)
 586                 tmp.erase(0, i);
 587         return tmp;
 588 }
 589
 590
 591 string const split(string const & a, string & piece, char delim)
 592 {
 593         string tmp;
 594         string::size_type i = a.find(delim);
 595         if (i == a.length() - 1) {
 596                 piece = a.substr(0, i);
 597         } else if (i != string::npos) {
 598                 piece = a.substr(0, i);
 599                 tmp = a.substr(i + 1);
 600         } else if (i == 0) {
 601                 piece.erase();
 602                 tmp = a.substr(i + 1);
 603         } else {
 604                 piece = a;
 605         }
 606         return tmp;
 607 }
 608
 609
 610 string const split(string const & a, char delim)
 611 {
 612         string tmp;
 613         string::size_type i = a.find(delim);
 614         if (i != string::npos) // found delim
 615                 tmp = a.substr(i + 1);
 616         return tmp;
 617 }
 618
 619
 620 // ale970521
 621 string const rsplit(string const & a, string & piece, char delim)
 622 {
 623         string tmp;
 624         string::size_type i = a.rfind(delim);
 625         if (i != string::npos) { // delimiter was found
 626                 piece = a.substr(0, i);
 627                 tmp = a.substr(i + 1);
 628         } else { // delimter was not found
 629                 piece.erase();
 630         }
 631         return tmp;
 632 }
 633
 634
 635 // This function escapes 8-bit characters and other problematic
 636 // characters that cause problems in latex labels.
 637 string const escape(string const & lab)
 638 {
 639         char hexdigit[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
 640                               '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
 641         string enc;
 642         for (string::size_type i = 0; i < lab.length(); ++i) {
 643                 unsigned char c= lab[i];
 644                 if (c >= 128 || c == '=' || c == '%') {
 645                         enc += '=';
 646                         enc += hexdigit[c>>4];
 647                         enc += hexdigit[c & 15];
 648                 } else {
 649                         enc += c;
 650                 }
 651         }
 652         return enc;
 653 }
 654
 655
 656 /// gives a vector of stringparts which have the delimiter delim
 657 vector<string> const getVectorFromString(string const & str,
 658                                          string const & delim)
 659 {
 660     vector<string> vec;
 661     if (str.empty())
 662         return vec;
 663     string keys(strip(str));
 664     for(;;) {
 665         string::size_type const idx = keys.find(delim);
 666         if (idx == string::npos) {
 667             vec.push_back(frontStrip(keys));
 668             break;
 669         }
 670         string const key = strip(frontStrip(keys.substr(0, idx)));
 671         if (!key.empty())
 672             vec.push_back(key);
 673         string::size_type const start = idx + delim.size();
 674         keys = keys.substr(start);
 675     }
 676     return vec;
 677 }
 678
 679 // the same vice versa
 680 string const getStringFromVector(vector<string> const & vec,
 681                                  string const & delim)
 682 {
 683         string str;
 684         int i = 0;
 685         for (vector<string>::const_iterator it = vec.begin();
 686              it != vec.end(); ++it) {
 687                 string item = strip(frontStrip(*it));
 688                 if (item.empty()) continue;
 689
 690                 if (i++ > 0) str += delim;
 691                 str += item;
 692         }
 693         return str;
 694 }