src/support/lstrings.C

   1 /* This file is part of
   2  * ======================================================
   3  *
   4  *           LyX, The Document Processor
   5  *
   6  *           Copyright 1995 Matthias Ettrich
   7  *           Copyright 1995-2001 The LyX Team.
   8  *
   9  * ====================================================== */
  10
  11 #include <config.h>
  12
  13 #ifdef __GNUG__
  14 #pragma implementation
  15 #endif
  16
  17 #include "LString.h"
  18 #include "lstrings.h"
  19 #include "LAssert.h"
  20 #include "debug.h"
  21
  22 #include <boost/regex.hpp>
  23
  24 #include <algorithm>
  25
  26 #include <cctype>
  27 #include <cstdlib>
  28
  29 using std::count;
  30 using std::transform;
  31 using std::vector;
  32
  33 #ifndef CXX_GLOBAL_CSTD
  34 using std::atof;
  35 using std::isdigit;
  36 using std::strlen;
  37 using std::tolower;
  38 using std::toupper;
  39 #endif
  40
  41
  42 int compare_no_case(string const & s, string const & s2)
  43 {
  44         string::const_iterator p = s.begin();
  45         string::const_iterator p2 = s2.begin();
  46
  47         while (p != s.end() && p2 != s2.end()) {
  48                 int const lc1 = tolower(*p);
  49                 int const lc2 = tolower(*p2);
  50                 if (lc1 != lc2)
  51                         return (lc1 < lc2) ? -1 : 1;
  52                 ++p;
  53                 ++p2;
  54         }
  55
  56         if (s.size() == s2.size())
  57                 return 0;
  58         if (s.size() < s2.size())
  59                 return -1;
  60         return 1;
  61 }
  62
  63
  64 namespace {
  65         int ascii_tolower(int c) {
  66                 if (c >= 'A' && c <= 'Z')
  67                         return c - 'A' + 'a';
  68                 return c;
  69         }
  70 }
  71
  72
  73 int compare_ascii_no_case(string const & s, string const & s2)
  74 {
  75         string::const_iterator p = s.begin();
  76         string::const_iterator p2 = s2.begin();
  77
  78         while (p != s.end() && p2 != s2.end()) {
  79                 int const lc1 = ascii_tolower(*p);
  80                 int const lc2 = ascii_tolower(*p2);
  81                 if (lc1 != lc2)
  82                         return (lc1 < lc2) ? -1 : 1;
  83                 ++p;
  84                 ++p2;
  85         }
  86
  87         if (s.size() == s2.size())
  88                 return 0;
  89         if (s.size() < s2.size())
  90                 return -1;
  91         return 1;
  92 }
  93
  94
  95 int compare_no_case(string const & s, string const & s2, unsigned int len)
  96 {
  97         string::const_iterator p = s.begin();
  98         string::const_iterator p2 = s2.begin();
  99         unsigned int i = 0;
 100         while (i < len && p != s.end() && p2 != s2.end()) {
 101                 int const lc1 = tolower(*p);
 102                 int const lc2 = tolower(*p2);
 103                 if (lc1 != lc2)
 104                         return (lc1 < lc2) ? -1 : 1;
 105                 ++i;
 106                 ++p;
 107                 ++p2;
 108         }
 109
 110         if (s.size() >= len && s2.size() >= len)
 111                 return 0;
 112         if (s.size() < s2.size())
 113                 return -1;
 114         return 1;
 115 }
 116
 117
 118 bool isStrInt(string const & str)
 119 {
 120         if (str.empty()) return false;
 121
 122         // Remove leading and trailing white space chars.
 123         string const tmpstr = frontStrip(strip(str));
 124         if (tmpstr.empty()) return false;
 125
 126         string::const_iterator cit = tmpstr.begin();
 127         if ((*cit) == '-') ++cit;
 128         string::const_iterator end = tmpstr.end();
 129         for (; cit != end; ++cit) {
 130                 if (!isdigit((*cit))) return false;
 131         }
 132         return true;
 133 }
 134
 135
 136 bool isStrUnsignedInt(string const & str)
 137 {
 138         if (str.empty()) return false;
 139
 140         // Remove leading and trailing white space chars.
 141         string const tmpstr = frontStrip(strip(str));
 142         if (tmpstr.empty()) return false;
 143
 144         string::const_iterator cit = tmpstr.begin();
 145         string::const_iterator end = tmpstr.end();
 146         for (; cit != end; ++cit) {
 147                 if (!isdigit((*cit))) return false;
 148         }
 149         return true;
 150 }
 151
 152
 153 int strToInt(string const & str)
 154 {
 155         if (isStrInt(str)) {
 156                 // Remove leading and trailing white space chars.
 157                 string const tmpstr = frontStrip(strip(str));
 158                 // Do the conversion proper.
 159                 return lyx::atoi(tmpstr);
 160         } else {
 161                 return 0;
 162         }
 163 }
 164
 165
 166 unsigned int strToUnsignedInt(string const & str)
 167 {
 168         if (isStrUnsignedInt(str)) {
 169                 // Remove leading and trailing white space chars.
 170                 string const tmpstr = frontStrip(strip(str));
 171                 // Do the conversion proper.
 172                 return lyx::atoi(tmpstr);
 173         } else {
 174                 return 0;
 175         }
 176 }
 177
 178
 179 bool isStrDbl(string const & str)
 180 {
 181         if (str.empty()) return false;
 182
 183         // Remove leading and trailing white space chars.
 184         string const tmpstr = frontStrip(strip(str));
 185         if (tmpstr.empty()) return false;
 186         //      if (1 < tmpstr.count('.')) return false;
 187
 188         string::const_iterator cit = tmpstr.begin();
 189         bool found_dot(false);
 190         if ((*cit) == '-') ++cit;
 191         string::const_iterator end = tmpstr.end();
 192         for (; cit != end; ++cit) {
 193                 if (!isdigit((*cit))
 194                     && '.' != (*cit)) {
 195                         return false;
 196                 }
 197                 if ('.' == (*cit)) {
 198                         if (found_dot) {
 199                                 return false;
 200                         } else {
 201                                 found_dot = true;
 202                         }
 203                 }
 204         }
 205         return true;
 206 }
 207
 208
 209 double strToDbl(string const & str)
 210 {
 211         if (isStrDbl(str)) {
 212                 // Remove leading and trailing white space chars.
 213                 string const tmpstr = frontStrip(strip(str));
 214                 // Do the conversion proper.
 215                 return ::atof(tmpstr.c_str());
 216         } else {
 217                 return 0.0;
 218         }
 219 }
 220
 221
 222 char lowercase(char c)
 223 {
 224         return char(tolower(c));
 225 }
 226
 227
 228 char uppercase(char c)
 229 {
 230         return char(toupper(c));
 231 }
 232
 233
 234 namespace {
 235
 236 // since we cannot use std::tolower and std::toupper directly in the
 237 // calls to std::transform yet, we use these helper clases. (Lgb)
 238
 239 struct local_lowercase {
 240         char operator()(char c) const {
 241                 return tolower(c);
 242         }
 243 };
 244
 245 struct local_uppercase {
 246         char operator()(char c) const {
 247                 return toupper(c);
 248         }
 249 };
 250
 251 struct local_ascii_lowercase {
 252         char operator()(char c) const {
 253                 return ascii_tolower(c);
 254         }
 255 };
 256
 257 } // end of anon namespace
 258
 259 string const lowercase(string const & a)
 260 {
 261         string tmp(a);
 262         transform(tmp.begin(), tmp.end(), tmp.begin(), local_lowercase());
 263         return tmp;
 264 }
 265
 266 string const uppercase(string const & a)
 267 {
 268         string tmp(a);
 269         transform(tmp.begin(), tmp.end(), tmp.begin(), local_uppercase());
 270         return tmp;
 271 }
 272
 273
 274 string const ascii_lowercase(string const & a)
 275 {
 276         string tmp(a);
 277         transform(tmp.begin(), tmp.end(), tmp.begin(),
 278                   local_ascii_lowercase());
 279         return tmp;
 280 }
 281
 282
 283 bool prefixIs(string const & a, char const * pre)
 284 {
 285         lyx::Assert(pre);
 286
 287         size_t const l = strlen(pre);
 288         string::size_type const alen = a.length();
 289
 290         if (l > alen || a.empty())
 291                 return false;
 292         else {
 293 #if !defined(USE_INCLUDED_STRING) && !defined(STD_STRING_IS_GOOD)
 294                 // Delete this code when the compilers get a bit better.
 295                 return ::strncmp(a.c_str(), pre, l) == 0;
 296 #else
 297                 // This is the code that we really want to use
 298                 // but until gcc ships with a basic_string that
 299                 // implements std::string correctly we have to
 300                 // use the code above.
 301                 return a.compare(0, l, pre, l) == 0;
 302 #endif
 303         }
 304 }
 305
 306
 307 bool prefixIs(string const & a, string const & pre)
 308 {
 309         string::size_type const prelen = pre.length();
 310         string::size_type const alen = a.length();
 311
 312         if (prelen > alen || a.empty())
 313                 return false;
 314         else {
 315 #if !defined(USE_INCLUDED_STRING) && !defined(STD_STRING_IS_GOOD)
 316                 return ::strncmp(a.c_str(), pre.c_str(), prelen) == 0;
 317 #else
 318                 return a.compare(0, prelen, pre) == 0;
 319 #endif
 320         }
 321 }
 322
 323
 324 bool suffixIs(string const & a, char c)
 325 {
 326         if (a.empty()) return false;
 327         return a[a.length() - 1] == c;
 328 }
 329
 330
 331 bool suffixIs(string const & a, char const * suf)
 332 {
 333         lyx::Assert(suf);
 334
 335         size_t const suflen = strlen(suf);
 336         string::size_type const alen = a.length();
 337
 338         if (suflen > alen)
 339                 return false;
 340         else {
 341 #if !defined(USE_INCLUDED_STRING) && !defined(STD_STRING_IS_GOOD)
 342                 // Delete this code when the compilers get a bit better.
 343                 string tmp(a, alen - suflen);
 344                 return ::strncmp(tmp.c_str(), suf, suflen) == 0;
 345 #else
 346                 // This is the code that we really want to use
 347                 // but until gcc ships with a basic_string that
 348                 // implements std::string correctly we have to
 349                 // use the code above.
 350                 return a.compare(alen - suflen, suflen, suf) == 0;
 351 #endif
 352         }
 353 }
 354
 355
 356 bool suffixIs(string const & a, string const & suf)
 357 {
 358         string::size_type const suflen = suf.length();
 359         string::size_type const alen = a.length();
 360
 361         if (suflen > alen) {
 362                 return false;
 363         } else {
 364 #if !defined(USE_INCLUDED_STRING) && !defined(STD_STRING_IS_GOOD)
 365                 string tmp(a, alen - suflen);
 366                 return ::strncmp(tmp.c_str(), suf.c_str(), suflen) == 0;
 367 #else
 368                 return a.compare(alen - suflen, suflen, suf) == 0;
 369 #endif
 370         }
 371 }
 372
 373
 374 bool contains(string const & a, string const & b)
 375 {
 376         if (a.empty())
 377                 return false;
 378         return a.find(b) != string::npos;
 379 }
 380
 381
 382 bool contains(string const & a, char b)
 383 {
 384         if (a.empty())
 385                 return false;
 386         return a.find(b) != string::npos;
 387 }
 388
 389
 390 bool containsOnly(string const & s, string const & cset)
 391 {
 392         return s.find_first_not_of(cset) == string::npos;
 393 }
 394
 395
 396 // ale970405+lasgoutt-970425
 397 // rewritten to use new string (Lgb)
 398 string const token(string const & a, char delim, int n)
 399 {
 400         if (a.empty()) return string();
 401
 402         string::size_type k = 0;
 403         string::size_type i = 0;
 404
 405         // Find delimiter or end of string
 406         for (; n--;)
 407                 if ((i = a.find(delim, i)) == string::npos)
 408                         break;
 409                 else
 410                         ++i; // step delim
 411         // i is now the n'th delim (or string::npos)
 412         if (i == string::npos) return string();
 413         k = a.find(delim, i);
 414         // k is now the n'th + 1 delim (or string::npos)
 415
 416         return a.substr(i, k - i);
 417 }
 418
 419
 420 // this could probably be faster and/or cleaner, but it seems to work (JMarc)
 421 // rewritten to use new string (Lgb)
 422 int tokenPos(string const & a, char delim, string const & tok)
 423 {
 424         int i = 0;
 425         string str(a);
 426         string tmptok;
 427
 428         while (!str.empty()) {
 429                 str = split(str, tmptok, delim);
 430                 if (tok == tmptok)
 431                         return i;
 432                 ++i;
 433         }
 434         return -1;
 435 }
 436
 437
 438 bool regexMatch(string const & a, string const & pattern)
 439 {
 440         // We massage the pattern a bit so that the usual
 441         // shell pattern we all are used to will work.
 442         // One nice thing about using a real regex is that
 443         // things like "*.*[^~]" will work also.
 444         // build the regex string.
 445         string regex(pattern);
 446         regex = subst(regex, ".", "\\.");
 447         regex = subst(regex, "*", ".*");
 448         boost::regex reg(regex);
 449         return boost::regex_match(a, reg);
 450 }
 451
 452
 453 string const subst(string const & a, char oldchar, char newchar)
 454 {
 455         string tmp(a);
 456         string::iterator lit = tmp.begin();
 457         string::iterator end = tmp.end();
 458         for (; lit != end; ++lit)
 459                 if ((*lit) == oldchar)
 460                         (*lit) = newchar;
 461         return tmp;
 462 }
 463
 464
 465 string const subst(string const & a,
 466                    char const * oldstr, string const & newstr)
 467 {
 468         lyx::Assert(oldstr);
 469
 470         string lstr(a);
 471         string::size_type i = 0;
 472         string::size_type olen = strlen(oldstr);
 473         while ((i = lstr.find(oldstr, i)) != string::npos) {
 474                 lstr.replace(i, olen, newstr);
 475                 i += newstr.length(); // We need to be sure that we dont
 476                 // use the same i over and over again.
 477         }
 478         return lstr;
 479 }
 480
 481
 482 string const subst(string const & a,
 483                    string const & oldstr, string const & newstr)
 484 {
 485         string lstr(a);
 486         string::size_type i = 0;
 487         string::size_type const olen = oldstr.length();
 488         while ((i = lstr.find(oldstr, i)) != string::npos) {
 489                 lstr.replace(i, olen, newstr);
 490                 i += newstr.length(); // We need to be sure that we dont
 491                 // use the same i over and over again.
 492         }
 493         return lstr;
 494 }
 495
 496
 497 string const strip(string const & a, char const * p)
 498 {
 499         lyx::Assert(p);
 500
 501         if (a.empty() || !*p) return a;
 502         string tmp(a);
 503         string::size_type i = tmp.find_last_not_of(p);
 504         if (i == a.length() - 1) return tmp; // no c's at end of a
 505         if (i != string::npos)
 506                 tmp.erase(i + 1, string::npos);
 507 #if !defined(USE_INCLUDED_STRING) && !defined(STD_STRING_IS_GOOD)
 508         // Ok This code is now suspect... (Lgb)
 509         /// Needed for broken string::find_last_not_of
 510         else if (tmp[0] != p[0]) {
 511                 if (a.length() == 1) return tmp;
 512                 tmp.erase(1, string::npos);
 513         }
 514 #endif
 515         else
 516                 tmp.erase(); // only chars from p in the whole string
 517         return tmp;
 518 }
 519
 520
 521 string const frontStrip(string const & a, char const * p)
 522 {
 523         lyx::Assert(p);
 524
 525         if (a.empty() || !*p) return a;
 526         string tmp(a);
 527         string::size_type i = tmp.find_first_not_of(p);
 528         if (i > 0)
 529                 tmp.erase(0, i);
 530         return tmp;
 531 }
 532
 533
 534 string const split(string const & a, string & piece, char delim)
 535 {
 536         string tmp;
 537         string::size_type i = a.find(delim);
 538         if (i == a.length() - 1) {
 539                 piece = a.substr(0, i);
 540         } else if (i != string::npos) {
 541                 piece = a.substr(0, i);
 542                 tmp = a.substr(i + 1);
 543         } else if (i == 0) {
 544                 piece.erase();
 545                 tmp = a.substr(i + 1);
 546         } else {
 547                 piece = a;
 548         }
 549         return tmp;
 550 }
 551
 552
 553 string const split(string const & a, char delim)
 554 {
 555         string tmp;
 556         string::size_type i = a.find(delim);
 557         if (i != string::npos) // found delim
 558                 tmp = a.substr(i + 1);
 559         return tmp;
 560 }
 561
 562
 563 // ale970521
 564 string const rsplit(string const & a, string & piece, char delim)
 565 {
 566         string tmp;
 567         string::size_type i = a.rfind(delim);
 568         if (i != string::npos) { // delimiter was found
 569                 piece = a.substr(0, i);
 570                 tmp = a.substr(i + 1);
 571         } else { // delimter was not found
 572                 piece.erase();
 573         }
 574         return tmp;
 575 }
 576
 577
 578 // This function escapes 8-bit characters and other problematic
 579 // characters that cause problems in latex labels.
 580 string const escape(string const & lab)
 581 {
 582         char hexdigit[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
 583                               '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
 584         string enc;
 585         for (string::size_type i = 0; i < lab.length(); ++i) {
 586                 unsigned char c= lab[i];
 587                 if (c >= 128 || c == '=' || c == '%') {
 588                         enc += '=';
 589                         enc += hexdigit[c>>4];
 590                         enc += hexdigit[c & 15];
 591                 } else {
 592                         enc += c;
 593                 }
 594         }
 595         return enc;
 596 }
 597
 598
 599 /// gives a vector of stringparts which have the delimiter delim
 600 vector<string> const getVectorFromString(string const & str,
 601                                          string const & delim)
 602 {
 603     vector<string> vec;
 604     if (str.empty())
 605         return vec;
 606     string keys(strip(str));
 607     for(;;) {
 608         string::size_type const idx = keys.find(delim);
 609         if (idx == string::npos) {
 610             vec.push_back(frontStrip(keys));
 611             break;
 612         }
 613         string const key = strip(frontStrip(keys.substr(0, idx)));
 614         if (!key.empty())
 615             vec.push_back(key);
 616         string::size_type const start = idx + delim.size();
 617         keys = keys.substr(start);
 618     }
 619     return vec;
 620 }
 621
 622 // the same vice versa
 623 string const getStringFromVector(vector<string> const & vec,
 624                                  string const & delim)
 625 {
 626         string str;
 627         int i = 0;
 628         for (vector<string>::const_iterator it = vec.begin();
 629              it != vec.end(); ++it) {
 630                 string item = strip(frontStrip(*it));
 631                 if (item.empty()) continue;
 632
 633                 if (i++ > 0) str += delim;
 634                 str += item;
 635         }
 636         return str;
 637 }