src/support/lstrings.C

   1 /* This file is part of
   2  * ======================================================
   3  *
   4  *           LyX, The Document Processor
   5  *
   6  *           Copyright 1995 Matthias Ettrich
   7  *           Copyright 1995-2001 The LyX Team.
   8  *
   9  * ====================================================== */
  10
  11 #include <config.h>
  12
  13 #ifdef __GNUG__
  14 #pragma implementation
  15 #endif
  16
  17 #include <algorithm>
  18
  19 #include <cctype>
  20 #include <cstdlib>
  21
  22 #include "LString.h"
  23 #include "lstrings.h"
  24 #include "LRegex.h"
  25 #include "LAssert.h"
  26 #include "debug.h"
  27
  28 using std::count;
  29 using std::transform;
  30 using std::vector;
  31
  32 #ifndef CXX_GLOBAL_CSTD
  33 using std::tolower;
  34 using std::toupper;
  35 using std::strlen;
  36 #endif
  37
  38
  39 int compare_no_case(string const & s, string const & s2)
  40 {
  41         string::const_iterator p = s.begin();
  42         string::const_iterator p2 = s2.begin();
  43
  44         while (p != s.end() && p2 != s2.end()) {
  45                 int const lc1 = tolower(*p);
  46                 int const lc2 = tolower(*p2);
  47                 if (lc1 != lc2)
  48                         return (lc1 < lc2) ? -1 : 1;
  49                 ++p;
  50                 ++p2;
  51         }
  52
  53         if (s.size() == s2.size())
  54                 return 0;
  55         if (s.size() < s2.size())
  56                 return -1;
  57         return 1;
  58 }
  59
  60
  61 namespace {
  62         int ascii_tolower(int c) {
  63                 if (c >= 'A' && c <= 'Z')
  64                         return c - 'A' + 'a';
  65                 return c;
  66         }
  67 }
  68
  69
  70 int compare_ascii_no_case(string const & s, string const & s2)
  71 {
  72         string::const_iterator p = s.begin();
  73         string::const_iterator p2 = s2.begin();
  74
  75         while (p != s.end() && p2 != s2.end()) {
  76                 int const lc1 = ascii_tolower(*p);
  77                 int const lc2 = ascii_tolower(*p2);
  78                 if (lc1 != lc2)
  79                         return (lc1 < lc2) ? -1 : 1;
  80                 ++p;
  81                 ++p2;
  82         }
  83
  84         if (s.size() == s2.size())
  85                 return 0;
  86         if (s.size() < s2.size())
  87                 return -1;
  88         return 1;
  89 }
  90
  91
  92 int compare_no_case(string const & s, string const & s2, unsigned int len)
  93 {
  94         string::const_iterator p = s.begin();
  95         string::const_iterator p2 = s2.begin();
  96         unsigned int i = 0;
  97         while (i < len && p != s.end() && p2 != s2.end()) {
  98                 int const lc1 = tolower(*p);
  99                 int const lc2 = tolower(*p2);
 100                 if (lc1 != lc2)
 101                         return (lc1 < lc2) ? -1 : 1;
 102                 ++i;
 103                 ++p;
 104                 ++p2;
 105         }
 106
 107         if (s.size() >= len && s2.size() >= len)
 108                 return 0;
 109         if (s.size() < s2.size())
 110                 return -1;
 111         return 1;
 112 }
 113
 114
 115 bool isStrInt(string const & str)
 116 {
 117         if (str.empty()) return false;
 118
 119         // Remove leading and trailing white space chars.
 120         string const tmpstr = frontStrip(strip(str, ' '), ' ');
 121         if (tmpstr.empty()) return false;
 122
 123         string::const_iterator cit = tmpstr.begin();
 124         if ((*cit) == '-') ++cit;
 125         string::const_iterator end = tmpstr.end();
 126         for (; cit != end; ++cit) {
 127                 if (!isdigit((*cit))) return false;
 128         }
 129         return true;
 130 }
 131
 132
 133 bool isStrUnsignedInt(string const & str)
 134 {
 135         if (str.empty()) return false;
 136
 137         // Remove leading and trailing white space chars.
 138         string const tmpstr = frontStrip(strip(str, ' '), ' ');
 139         if (tmpstr.empty()) return false;
 140
 141         string::const_iterator cit = tmpstr.begin();
 142         string::const_iterator end = tmpstr.end();
 143         for (; cit != end; ++cit) {
 144                 if (!isdigit((*cit))) return false;
 145         }
 146         return true;
 147 }
 148
 149
 150 int strToInt(string const & str)
 151 {
 152         if (isStrInt(str)) {
 153                 // Remove leading and trailing white space chars.
 154                 string const tmpstr = frontStrip(strip(str, ' '), ' ');
 155                 // Do the conversion proper.
 156                 return lyx::atoi(tmpstr);
 157         } else {
 158                 return 0;
 159         }
 160 }
 161
 162
 163 unsigned int strToUnsignedInt(string const & str)
 164 {
 165         if (isStrUnsignedInt(str)) {
 166                 // Remove leading and trailing white space chars.
 167                 string const tmpstr = frontStrip(strip(str, ' '), ' ');
 168                 // Do the conversion proper.
 169                 return lyx::atoi(tmpstr);
 170         } else {
 171                 return 0;
 172         }
 173 }
 174
 175
 176 bool isStrDbl(string const & str)
 177 {
 178         if (str.empty()) return false;
 179
 180         // Remove leading and trailing white space chars.
 181         string const tmpstr = frontStrip(strip(str, ' '), ' ');
 182         if (tmpstr.empty()) return false;
 183         //      if (1 < tmpstr.count('.')) return false;
 184
 185         string::const_iterator cit = tmpstr.begin();
 186         bool found_dot(false);
 187         if ((*cit) == '-') ++cit;
 188         string::const_iterator end = tmpstr.end();
 189         for (; cit != end; ++cit) {
 190                 if (!isdigit((*cit))
 191                     && '.' != (*cit)) {
 192                         return false;
 193                 }
 194                 if ('.' == (*cit)) {
 195                         if (found_dot) {
 196                                 return false;
 197                         } else {
 198                                 found_dot = true;
 199                         }
 200                 }
 201         }
 202         return true;
 203 }
 204
 205
 206 double strToDbl(string const & str)
 207 {
 208         if (isStrDbl(str)) {
 209                 // Remove leading and trailing white space chars.
 210                 string const tmpstr = frontStrip(strip(str, ' '), ' ');
 211                 // Do the conversion proper.
 212                 return ::atof(tmpstr.c_str());
 213         } else {
 214                 return 0.0;
 215         }
 216 }
 217
 218
 219 char lowercase(char c)
 220 {
 221         return char(tolower(c));
 222 }
 223
 224
 225 char uppercase(char c)
 226 {
 227         return char(toupper(c));
 228 }
 229
 230
 231 namespace {
 232
 233 // since we cannot use std::tolower and std::toupper directly in the
 234 // calls to std::transform yet, we use these helper clases. (Lgb)
 235
 236 struct local_lowercase {
 237         char operator()(char c) const {
 238                 return tolower(c);
 239         }
 240 };
 241
 242 struct local_uppercase {
 243         char operator()(char c) const {
 244                 return toupper(c);
 245         }
 246 };
 247
 248 } // end of anon namespace
 249
 250 string const lowercase(string const & a)
 251 {
 252         string tmp(a);
 253         transform(tmp.begin(), tmp.end(), tmp.begin(), local_lowercase());
 254         return tmp;
 255 }
 256
 257 string const uppercase(string const & a)
 258 {
 259         string tmp(a);
 260         transform(tmp.begin(), tmp.end(), tmp.begin(), local_uppercase());
 261         return tmp;
 262 }
 263
 264
 265 bool prefixIs(string const & a, char const * pre)
 266 {
 267         lyx::Assert(pre);
 268
 269         size_t const l = strlen(pre);
 270         string::size_type const alen = a.length();
 271
 272         if (l > alen || a.empty())
 273                 return false;
 274         else {
 275 #if !defined(USE_INCLUDED_STRING) && !defined(STD_STRING_IS_GOOD)
 276                 // Delete this code when the compilers get a bit better.
 277                 return ::strncmp(a.c_str(), pre, l) == 0;
 278 #else
 279                 // This is the code that we really want to use
 280                 // but until gcc ships with a basic_string that
 281                 // implements std::string correctly we have to
 282                 // use the code above.
 283                 return a.compare(0, l, pre, l) == 0;
 284 #endif
 285         }
 286 }
 287
 288
 289 bool prefixIs(string const & a, string const & pre)
 290 {
 291         string::size_type const prelen = pre.length();
 292         string::size_type const alen = a.length();
 293
 294         if (prelen > alen || a.empty())
 295                 return false;
 296         else {
 297 #if !defined(USE_INCLUDED_STRING) && !defined(STD_STRING_IS_GOOD)
 298                 return ::strncmp(a.c_str(), pre.c_str(), prelen) == 0;
 299 #else
 300                 return a.compare(0, prelen, pre) == 0;
 301 #endif
 302         }
 303 }
 304
 305
 306 bool suffixIs(string const & a, char c)
 307 {
 308         if (a.empty()) return false;
 309         return a[a.length() - 1] == c;
 310 }
 311
 312
 313 bool suffixIs(string const & a, char const * suf)
 314 {
 315         lyx::Assert(suf);
 316
 317         size_t const suflen = strlen(suf);
 318         string::size_type const alen = a.length();
 319
 320         if (suflen > alen)
 321                 return false;
 322         else {
 323 #if !defined(USE_INCLUDED_STRING) && !defined(STD_STRING_IS_GOOD)
 324                 // Delete this code when the compilers get a bit better.
 325                 string tmp(a, alen - suflen);
 326                 return ::strncmp(tmp.c_str(), suf, suflen) == 0;
 327 #else
 328                 // This is the code that we really want to use
 329                 // but until gcc ships with a basic_string that
 330                 // implements std::string correctly we have to
 331                 // use the code above.
 332                 return a.compare(alen - suflen, suflen, suf) == 0;
 333 #endif
 334         }
 335 }
 336
 337
 338 bool suffixIs(string const & a, string const & suf)
 339 {
 340         string::size_type const suflen = suf.length();
 341         string::size_type const alen = a.length();
 342
 343         if (suflen > alen) {
 344                 return false;
 345         } else {
 346 #if !defined(USE_INCLUDED_STRING) && !defined(STD_STRING_IS_GOOD)
 347                 string tmp(a, alen - suflen);
 348                 return ::strncmp(tmp.c_str(), suf.c_str(), suflen) == 0;
 349 #else
 350                 return a.compare(alen - suflen, suflen, suf) == 0;
 351 #endif
 352         }
 353 }
 354
 355
 356 bool contains(char const * a, string const & b)
 357 {
 358         lyx::Assert(a);
 359         string const at(a);
 360         return contains(at, b);
 361 }
 362
 363
 364 bool contains(string const & a, char const * b)
 365 {
 366         lyx::Assert(b);
 367         string const bt(b);
 368         return contains(a, bt);
 369 }
 370
 371
 372 bool contains(string const & a, string const & b)
 373 {
 374         if (a.empty())
 375                 return false;
 376         return a.find(b) != string::npos;
 377 }
 378
 379
 380 bool contains(string const & a, char b)
 381 {
 382         if (a.empty())
 383                 return false;
 384         return a.find(b) != string::npos;
 385 }
 386
 387
 388 bool contains(char const * a, char const * b)
 389 {
 390         lyx::Assert(a && b);
 391         string const at(a);
 392         string const bt(b);
 393         return contains(at, bt);
 394 }
 395
 396
 397 bool containsOnly(string const & s, char const * cset)
 398 {
 399         lyx::Assert(cset);
 400
 401         return s.find_first_not_of(cset) == string::npos;
 402 }
 403
 404
 405 bool containsOnly(string const & s, string const & cset)
 406 {
 407         return s.find_first_not_of(cset) == string::npos;
 408 }
 409
 410
 411 bool containsOnly(char const * s, char const * cset)
 412 {
 413         lyx::Assert(s && cset);
 414
 415         return string(s).find_first_not_of(cset) == string::npos;
 416 }
 417
 418
 419 bool containsOnly(char const * s, string const & cset)
 420 {
 421         lyx::Assert(s);
 422
 423         return string(s).find_first_not_of(cset) == string::npos;
 424 }
 425
 426
 427 // ale970405+lasgoutt-970425
 428 // rewritten to use new string (Lgb)
 429 string const token(string const & a, char delim, int n)
 430 {
 431         if (a.empty()) return string();
 432
 433         string::size_type k = 0;
 434         string::size_type i = 0;
 435
 436         // Find delimiter or end of string
 437         for (; n--;)
 438                 if ((i = a.find(delim, i)) == string::npos)
 439                         break;
 440                 else
 441                         ++i; // step delim
 442         // i is now the n'th delim (or string::npos)
 443         if (i == string::npos) return string();
 444         k = a.find(delim, i);
 445         // k is now the n'th + 1 delim (or string::npos)
 446
 447         return a.substr(i, k - i);
 448 }
 449
 450
 451 // this could probably be faster and/or cleaner, but it seems to work (JMarc)
 452 // rewritten to use new string (Lgb)
 453 int tokenPos(string const & a, char delim, string const & tok)
 454 {
 455         int i = 0;
 456         string str(a);
 457         string tmptok;
 458
 459         while (!str.empty()) {
 460                 str = split(str, tmptok, delim);
 461                 if (tok == tmptok)
 462                         return i;
 463                 ++i;
 464         }
 465         return -1;
 466 }
 467
 468
 469 bool regexMatch(string const & a, string const & pattern)
 470 {
 471         // We massage the pattern a bit so that the usual
 472         // shell pattern we all are used to will work.
 473         // One nice thing about using a real regex is that
 474         // things like "*.*[^~]" will work also.
 475         // build the regex string.
 476         string regex(pattern);
 477         regex = subst(regex, ".", "\\.");
 478         regex = subst(regex, "*", ".*");
 479         LRegex reg(regex);
 480         return reg.exact_match(a);
 481 }
 482
 483
 484 string const subst(string const & a, char oldchar, char newchar)
 485 {
 486         string tmp(a);
 487         string::iterator lit = tmp.begin();
 488         string::iterator end = tmp.end();
 489         for (; lit != end; ++lit)
 490                 if ((*lit) == oldchar)
 491                         (*lit) = newchar;
 492         return tmp;
 493 }
 494
 495
 496 string const subst(string const & a,
 497                    char const * oldstr, string const & newstr)
 498 {
 499         lyx::Assert(oldstr);
 500
 501         string lstr(a);
 502         string::size_type i = 0;
 503         string::size_type olen = strlen(oldstr);
 504         while ((i = lstr.find(oldstr, i)) != string::npos) {
 505                 lstr.replace(i, olen, newstr);
 506                 i += newstr.length(); // We need to be sure that we dont
 507                 // use the same i over and over again.
 508         }
 509         return lstr;
 510 }
 511
 512
 513 string const subst(string const & a,
 514                    string const & oldstr, string const & newstr)
 515 {
 516         string lstr(a);
 517         string::size_type i = 0;
 518         string::size_type const olen = oldstr.length();
 519         while ((i = lstr.find(oldstr, i)) != string::npos) {
 520                 lstr.replace(i, olen, newstr);
 521                 i += newstr.length(); // We need to be sure that we dont
 522                 // use the same i over and over again.
 523         }
 524         return lstr;
 525 }
 526
 527
 528 string const strip(string const & a, char c)
 529 {
 530         if (a.empty()) return a;
 531         string tmp(a);
 532         string::size_type i = tmp.find_last_not_of(c);
 533         if (i == a.length() - 1) return tmp; // no c's at end of a
 534         if (i != string::npos)
 535                 tmp.erase(i + 1, string::npos);
 536 #if !defined(USE_INCLUDED_STRING) && !defined(STD_STRING_IS_GOOD)
 537         /// Needed for broken string::find_last_not_of
 538         else if (tmp[0] != c) {
 539                 if (a.length() == 1) return tmp;
 540                 tmp.erase(1, string::npos);
 541         }
 542 #endif
 543         else
 544                 tmp.erase(); // only c in the whole string
 545         return tmp;
 546 }
 547
 548
 549 string const frontStrip(string const & a, char const * p)
 550 {
 551         lyx::Assert(p);
 552
 553         if (a.empty() || !*p) return a;
 554         string tmp(a);
 555         string::size_type i = tmp.find_first_not_of(p);
 556         if (i > 0)
 557                 tmp.erase(0, i);
 558         return tmp;
 559 }
 560
 561
 562 string const frontStrip(string const & a, char c)
 563 {
 564         if (a.empty()) return a;
 565         string tmp(a);
 566         string::size_type i = tmp.find_first_not_of(c);
 567         if (i > 0)
 568                 tmp.erase(0, i);
 569         return tmp;
 570 }
 571
 572
 573 string const split(string const & a, string & piece, char delim)
 574 {
 575         string tmp;
 576         string::size_type i = a.find(delim);
 577         if (i == a.length() - 1) {
 578                 piece = a.substr(0, i);
 579         } else if (i != string::npos) {
 580                 piece = a.substr(0, i);
 581                 tmp = a.substr(i + 1);
 582         } else if (i == 0) {
 583                 piece.erase();
 584                 tmp = a.substr(i + 1);
 585         } else {
 586                 piece = a;
 587         }
 588         return tmp;
 589 }
 590
 591
 592 string const split(string const & a, char delim)
 593 {
 594         string tmp;
 595         string::size_type i = a.find(delim);
 596         if (i != string::npos) // found delim
 597                 tmp = a.substr(i + 1);
 598         return tmp;
 599 }
 600
 601
 602 // ale970521
 603 string const rsplit(string const & a, string & piece, char delim)
 604 {
 605         string tmp;
 606         string::size_type i = a.rfind(delim);
 607         if (i != string::npos) { // delimiter was found
 608                 piece = a.substr(0, i);
 609                 tmp = a.substr(i + 1);
 610         } else { // delimter was not found
 611                 piece.erase();
 612         }
 613         return tmp;
 614 }
 615
 616
 617 // This function escapes 8-bit characters and other problematic
 618 // characters that cause problems in latex labels.
 619 string const escape(string const & lab)
 620 {
 621         char hexdigit[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
 622                               '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
 623         string enc;
 624         for (string::size_type i = 0; i < lab.length(); ++i) {
 625                 unsigned char c= lab[i];
 626                 if (c >= 128 || c == '=' || c == '%') {
 627                         enc += '=';
 628                         enc += hexdigit[c>>4];
 629                         enc += hexdigit[c & 15];
 630                 } else {
 631                         enc += c;
 632                 }
 633         }
 634         return enc;
 635 }
 636
 637
 638 /// gives a vector of stringparts which have the delimiter delim
 639 vector<string> const getVectorFromString(string const & str,
 640                                          string const & delim)
 641 {
 642     vector<string> vec;
 643     if (str.empty())
 644         return vec;
 645     string keys(strip(str));
 646     for(;;) {
 647         string::size_type const idx = keys.find(delim);
 648         if (idx == string::npos) {
 649             vec.push_back(frontStrip(keys));
 650             break;
 651         }
 652         string const key = strip(frontStrip(keys.substr(0, idx)));
 653         if (!key.empty())
 654             vec.push_back(key);
 655         string::size_type const start = idx + delim.size();
 656         keys = keys.substr(start);
 657     }
 658     return vec;
 659 }
 660
 661 // the same vice versa
 662 string const getStringFromVector(vector<string> const & vec,
 663                                  string const & delim)
 664 {
 665         string str;
 666         int i = 0;
 667         for (vector<string>::const_iterator it = vec.begin();
 668              it != vec.end(); ++it) {
 669                 string item = strip(frontStrip(*it));
 670                 if (item.empty()) continue;
 671
 672                 if (i++ > 0) str += delim;
 673                 str += item;
 674         }
 675         return str;
 676 }
 677