src/support/lstrings.C

   1 /* This file is part of
   2  * ======================================================
   3  *
   4  *           LyX, The Document Processor
   5  *
   6  *           Copyright 1995 Matthias Ettrich
   7  *           Copyright 1995-2001 The LyX Team.
   8  *
   9  * ====================================================== */
  10
  11 #include <config.h>
  12
  13 #ifdef __GNUG__
  14 #pragma implementation
  15 #endif
  16
  17 #include "LString.h"
  18 #include "lstrings.h"
  19 #include "LAssert.h"
  20 #include "debug.h"
  21
  22 #include <boost/regex.hpp>
  23
  24 #include <algorithm>
  25
  26 #include <cctype>
  27 #include <cstdlib>
  28
  29 using std::count;
  30 using std::transform;
  31 using std::vector;
  32
  33 #ifndef CXX_GLOBAL_CSTD
  34 using std::tolower;
  35 using std::toupper;
  36 using std::strlen;
  37 #endif
  38
  39
  40 int compare_no_case(string const & s, string const & s2)
  41 {
  42         string::const_iterator p = s.begin();
  43         string::const_iterator p2 = s2.begin();
  44
  45         while (p != s.end() && p2 != s2.end()) {
  46                 int const lc1 = tolower(*p);
  47                 int const lc2 = tolower(*p2);
  48                 if (lc1 != lc2)
  49                         return (lc1 < lc2) ? -1 : 1;
  50                 ++p;
  51                 ++p2;
  52         }
  53
  54         if (s.size() == s2.size())
  55                 return 0;
  56         if (s.size() < s2.size())
  57                 return -1;
  58         return 1;
  59 }
  60
  61
  62 namespace {
  63         int ascii_tolower(int c) {
  64                 if (c >= 'A' && c <= 'Z')
  65                         return c - 'A' + 'a';
  66                 return c;
  67         }
  68 }
  69
  70
  71 int compare_ascii_no_case(string const & s, string const & s2)
  72 {
  73         string::const_iterator p = s.begin();
  74         string::const_iterator p2 = s2.begin();
  75
  76         while (p != s.end() && p2 != s2.end()) {
  77                 int const lc1 = ascii_tolower(*p);
  78                 int const lc2 = ascii_tolower(*p2);
  79                 if (lc1 != lc2)
  80                         return (lc1 < lc2) ? -1 : 1;
  81                 ++p;
  82                 ++p2;
  83         }
  84
  85         if (s.size() == s2.size())
  86                 return 0;
  87         if (s.size() < s2.size())
  88                 return -1;
  89         return 1;
  90 }
  91
  92
  93 int compare_no_case(string const & s, string const & s2, unsigned int len)
  94 {
  95         string::const_iterator p = s.begin();
  96         string::const_iterator p2 = s2.begin();
  97         unsigned int i = 0;
  98         while (i < len && p != s.end() && p2 != s2.end()) {
  99                 int const lc1 = tolower(*p);
 100                 int const lc2 = tolower(*p2);
 101                 if (lc1 != lc2)
 102                         return (lc1 < lc2) ? -1 : 1;
 103                 ++i;
 104                 ++p;
 105                 ++p2;
 106         }
 107
 108         if (s.size() >= len && s2.size() >= len)
 109                 return 0;
 110         if (s.size() < s2.size())
 111                 return -1;
 112         return 1;
 113 }
 114
 115
 116 bool isStrInt(string const & str)
 117 {
 118         if (str.empty()) return false;
 119
 120         // Remove leading and trailing white space chars.
 121         string const tmpstr = frontStrip(strip(str, ' '), ' ');
 122         if (tmpstr.empty()) return false;
 123
 124         string::const_iterator cit = tmpstr.begin();
 125         if ((*cit) == '-') ++cit;
 126         string::const_iterator end = tmpstr.end();
 127         for (; cit != end; ++cit) {
 128                 if (!isdigit((*cit))) return false;
 129         }
 130         return true;
 131 }
 132
 133
 134 bool isStrUnsignedInt(string const & str)
 135 {
 136         if (str.empty()) return false;
 137
 138         // Remove leading and trailing white space chars.
 139         string const tmpstr = frontStrip(strip(str, ' '), ' ');
 140         if (tmpstr.empty()) return false;
 141
 142         string::const_iterator cit = tmpstr.begin();
 143         string::const_iterator end = tmpstr.end();
 144         for (; cit != end; ++cit) {
 145                 if (!isdigit((*cit))) return false;
 146         }
 147         return true;
 148 }
 149
 150
 151 int strToInt(string const & str)
 152 {
 153         if (isStrInt(str)) {
 154                 // Remove leading and trailing white space chars.
 155                 string const tmpstr = frontStrip(strip(str, ' '), ' ');
 156                 // Do the conversion proper.
 157                 return lyx::atoi(tmpstr);
 158         } else {
 159                 return 0;
 160         }
 161 }
 162
 163
 164 unsigned int strToUnsignedInt(string const & str)
 165 {
 166         if (isStrUnsignedInt(str)) {
 167                 // Remove leading and trailing white space chars.
 168                 string const tmpstr = frontStrip(strip(str, ' '), ' ');
 169                 // Do the conversion proper.
 170                 return lyx::atoi(tmpstr);
 171         } else {
 172                 return 0;
 173         }
 174 }
 175
 176
 177 bool isStrDbl(string const & str)
 178 {
 179         if (str.empty()) return false;
 180
 181         // Remove leading and trailing white space chars.
 182         string const tmpstr = frontStrip(strip(str, ' '), ' ');
 183         if (tmpstr.empty()) return false;
 184         //      if (1 < tmpstr.count('.')) return false;
 185
 186         string::const_iterator cit = tmpstr.begin();
 187         bool found_dot(false);
 188         if ((*cit) == '-') ++cit;
 189         string::const_iterator end = tmpstr.end();
 190         for (; cit != end; ++cit) {
 191                 if (!isdigit((*cit))
 192                     && '.' != (*cit)) {
 193                         return false;
 194                 }
 195                 if ('.' == (*cit)) {
 196                         if (found_dot) {
 197                                 return false;
 198                         } else {
 199                                 found_dot = true;
 200                         }
 201                 }
 202         }
 203         return true;
 204 }
 205
 206
 207 double strToDbl(string const & str)
 208 {
 209         if (isStrDbl(str)) {
 210                 // Remove leading and trailing white space chars.
 211                 string const tmpstr = frontStrip(strip(str, ' '), ' ');
 212                 // Do the conversion proper.
 213                 return ::atof(tmpstr.c_str());
 214         } else {
 215                 return 0.0;
 216         }
 217 }
 218
 219
 220 char lowercase(char c)
 221 {
 222         return char(tolower(c));
 223 }
 224
 225
 226 char uppercase(char c)
 227 {
 228         return char(toupper(c));
 229 }
 230
 231
 232 namespace {
 233
 234 // since we cannot use std::tolower and std::toupper directly in the
 235 // calls to std::transform yet, we use these helper clases. (Lgb)
 236
 237 struct local_lowercase {
 238         char operator()(char c) const {
 239                 return tolower(c);
 240         }
 241 };
 242
 243 struct local_uppercase {
 244         char operator()(char c) const {
 245                 return toupper(c);
 246         }
 247 };
 248
 249 } // end of anon namespace
 250
 251 string const lowercase(string const & a)
 252 {
 253         string tmp(a);
 254         transform(tmp.begin(), tmp.end(), tmp.begin(), local_lowercase());
 255         return tmp;
 256 }
 257
 258 string const uppercase(string const & a)
 259 {
 260         string tmp(a);
 261         transform(tmp.begin(), tmp.end(), tmp.begin(), local_uppercase());
 262         return tmp;
 263 }
 264
 265
 266 bool prefixIs(string const & a, char const * pre)
 267 {
 268         lyx::Assert(pre);
 269
 270         size_t const l = strlen(pre);
 271         string::size_type const alen = a.length();
 272
 273         if (l > alen || a.empty())
 274                 return false;
 275         else {
 276 #if !defined(USE_INCLUDED_STRING) && !defined(STD_STRING_IS_GOOD)
 277                 // Delete this code when the compilers get a bit better.
 278                 return ::strncmp(a.c_str(), pre, l) == 0;
 279 #else
 280                 // This is the code that we really want to use
 281                 // but until gcc ships with a basic_string that
 282                 // implements std::string correctly we have to
 283                 // use the code above.
 284                 return a.compare(0, l, pre, l) == 0;
 285 #endif
 286         }
 287 }
 288
 289
 290 bool prefixIs(string const & a, string const & pre)
 291 {
 292         string::size_type const prelen = pre.length();
 293         string::size_type const alen = a.length();
 294
 295         if (prelen > alen || a.empty())
 296                 return false;
 297         else {
 298 #if !defined(USE_INCLUDED_STRING) && !defined(STD_STRING_IS_GOOD)
 299                 return ::strncmp(a.c_str(), pre.c_str(), prelen) == 0;
 300 #else
 301                 return a.compare(0, prelen, pre) == 0;
 302 #endif
 303         }
 304 }
 305
 306
 307 bool suffixIs(string const & a, char c)
 308 {
 309         if (a.empty()) return false;
 310         return a[a.length() - 1] == c;
 311 }
 312
 313
 314 bool suffixIs(string const & a, char const * suf)
 315 {
 316         lyx::Assert(suf);
 317
 318         size_t const suflen = strlen(suf);
 319         string::size_type const alen = a.length();
 320
 321         if (suflen > alen)
 322                 return false;
 323         else {
 324 #if !defined(USE_INCLUDED_STRING) && !defined(STD_STRING_IS_GOOD)
 325                 // Delete this code when the compilers get a bit better.
 326                 string tmp(a, alen - suflen);
 327                 return ::strncmp(tmp.c_str(), suf, suflen) == 0;
 328 #else
 329                 // This is the code that we really want to use
 330                 // but until gcc ships with a basic_string that
 331                 // implements std::string correctly we have to
 332                 // use the code above.
 333                 return a.compare(alen - suflen, suflen, suf) == 0;
 334 #endif
 335         }
 336 }
 337
 338
 339 bool suffixIs(string const & a, string const & suf)
 340 {
 341         string::size_type const suflen = suf.length();
 342         string::size_type const alen = a.length();
 343
 344         if (suflen > alen) {
 345                 return false;
 346         } else {
 347 #if !defined(USE_INCLUDED_STRING) && !defined(STD_STRING_IS_GOOD)
 348                 string tmp(a, alen - suflen);
 349                 return ::strncmp(tmp.c_str(), suf.c_str(), suflen) == 0;
 350 #else
 351                 return a.compare(alen - suflen, suflen, suf) == 0;
 352 #endif
 353         }
 354 }
 355
 356
 357 bool contains(char const * a, string const & b)
 358 {
 359         lyx::Assert(a);
 360         string const at(a);
 361         return contains(at, b);
 362 }
 363
 364
 365 bool contains(string const & a, char const * b)
 366 {
 367         lyx::Assert(b);
 368         string const bt(b);
 369         return contains(a, bt);
 370 }
 371
 372
 373 bool contains(string const & a, string const & b)
 374 {
 375         if (a.empty())
 376                 return false;
 377         return a.find(b) != string::npos;
 378 }
 379
 380
 381 bool contains(string const & a, char b)
 382 {
 383         if (a.empty())
 384                 return false;
 385         return a.find(b) != string::npos;
 386 }
 387
 388
 389 bool contains(char const * a, char const * b)
 390 {
 391         lyx::Assert(a && b);
 392         string const at(a);
 393         string const bt(b);
 394         return contains(at, bt);
 395 }
 396
 397
 398 bool containsOnly(string const & s, char const * cset)
 399 {
 400         lyx::Assert(cset);
 401
 402         return s.find_first_not_of(cset) == string::npos;
 403 }
 404
 405
 406 bool containsOnly(string const & s, string const & cset)
 407 {
 408         return s.find_first_not_of(cset) == string::npos;
 409 }
 410
 411
 412 bool containsOnly(char const * s, char const * cset)
 413 {
 414         lyx::Assert(s && cset);
 415
 416         return string(s).find_first_not_of(cset) == string::npos;
 417 }
 418
 419
 420 bool containsOnly(char const * s, string const & cset)
 421 {
 422         lyx::Assert(s);
 423
 424         return string(s).find_first_not_of(cset) == string::npos;
 425 }
 426
 427
 428 // ale970405+lasgoutt-970425
 429 // rewritten to use new string (Lgb)
 430 string const token(string const & a, char delim, int n)
 431 {
 432         if (a.empty()) return string();
 433
 434         string::size_type k = 0;
 435         string::size_type i = 0;
 436
 437         // Find delimiter or end of string
 438         for (; n--;)
 439                 if ((i = a.find(delim, i)) == string::npos)
 440                         break;
 441                 else
 442                         ++i; // step delim
 443         // i is now the n'th delim (or string::npos)
 444         if (i == string::npos) return string();
 445         k = a.find(delim, i);
 446         // k is now the n'th + 1 delim (or string::npos)
 447
 448         return a.substr(i, k - i);
 449 }
 450
 451
 452 // this could probably be faster and/or cleaner, but it seems to work (JMarc)
 453 // rewritten to use new string (Lgb)
 454 int tokenPos(string const & a, char delim, string const & tok)
 455 {
 456         int i = 0;
 457         string str(a);
 458         string tmptok;
 459
 460         while (!str.empty()) {
 461                 str = split(str, tmptok, delim);
 462                 if (tok == tmptok)
 463                         return i;
 464                 ++i;
 465         }
 466         return -1;
 467 }
 468
 469
 470 bool regexMatch(string const & a, string const & pattern)
 471 {
 472         // We massage the pattern a bit so that the usual
 473         // shell pattern we all are used to will work.
 474         // One nice thing about using a real regex is that
 475         // things like "*.*[^~]" will work also.
 476         // build the regex string.
 477         string regex(pattern);
 478         regex = subst(regex, ".", "\\.");
 479         regex = subst(regex, "*", ".*");
 480         boost::regex reg(regex);
 481         return boost::regex_match(a, reg);
 482 }
 483
 484
 485 string const subst(string const & a, char oldchar, char newchar)
 486 {
 487         string tmp(a);
 488         string::iterator lit = tmp.begin();
 489         string::iterator end = tmp.end();
 490         for (; lit != end; ++lit)
 491                 if ((*lit) == oldchar)
 492                         (*lit) = newchar;
 493         return tmp;
 494 }
 495
 496
 497 string const subst(string const & a,
 498                    char const * oldstr, string const & newstr)
 499 {
 500         lyx::Assert(oldstr);
 501
 502         string lstr(a);
 503         string::size_type i = 0;
 504         string::size_type olen = strlen(oldstr);
 505         while ((i = lstr.find(oldstr, i)) != string::npos) {
 506                 lstr.replace(i, olen, newstr);
 507                 i += newstr.length(); // We need to be sure that we dont
 508                 // use the same i over and over again.
 509         }
 510         return lstr;
 511 }
 512
 513
 514 string const subst(string const & a,
 515                    string const & oldstr, string const & newstr)
 516 {
 517         string lstr(a);
 518         string::size_type i = 0;
 519         string::size_type const olen = oldstr.length();
 520         while ((i = lstr.find(oldstr, i)) != string::npos) {
 521                 lstr.replace(i, olen, newstr);
 522                 i += newstr.length(); // We need to be sure that we dont
 523                 // use the same i over and over again.
 524         }
 525         return lstr;
 526 }
 527
 528
 529 string const strip(string const & a, char c)
 530 {
 531         if (a.empty()) return a;
 532         string tmp(a);
 533         string::size_type i = tmp.find_last_not_of(c);
 534         if (i == a.length() - 1) return tmp; // no c's at end of a
 535         if (i != string::npos)
 536                 tmp.erase(i + 1, string::npos);
 537 #if !defined(USE_INCLUDED_STRING) && !defined(STD_STRING_IS_GOOD)
 538         /// Needed for broken string::find_last_not_of
 539         else if (tmp[0] != c) {
 540                 if (a.length() == 1) return tmp;
 541                 tmp.erase(1, string::npos);
 542         }
 543 #endif
 544         else
 545                 tmp.erase(); // only c in the whole string
 546         return tmp;
 547 }
 548
 549
 550 string const frontStrip(string const & a, char const * p)
 551 {
 552         lyx::Assert(p);
 553
 554         if (a.empty() || !*p) return a;
 555         string tmp(a);
 556         string::size_type i = tmp.find_first_not_of(p);
 557         if (i > 0)
 558                 tmp.erase(0, i);
 559         return tmp;
 560 }
 561
 562
 563 string const frontStrip(string const & a, char c)
 564 {
 565         if (a.empty()) return a;
 566         string tmp(a);
 567         string::size_type i = tmp.find_first_not_of(c);
 568         if (i > 0)
 569                 tmp.erase(0, i);
 570         return tmp;
 571 }
 572
 573
 574 string const split(string const & a, string & piece, char delim)
 575 {
 576         string tmp;
 577         string::size_type i = a.find(delim);
 578         if (i == a.length() - 1) {
 579                 piece = a.substr(0, i);
 580         } else if (i != string::npos) {
 581                 piece = a.substr(0, i);
 582                 tmp = a.substr(i + 1);
 583         } else if (i == 0) {
 584                 piece.erase();
 585                 tmp = a.substr(i + 1);
 586         } else {
 587                 piece = a;
 588         }
 589         return tmp;
 590 }
 591
 592
 593 string const split(string const & a, char delim)
 594 {
 595         string tmp;
 596         string::size_type i = a.find(delim);
 597         if (i != string::npos) // found delim
 598                 tmp = a.substr(i + 1);
 599         return tmp;
 600 }
 601
 602
 603 // ale970521
 604 string const rsplit(string const & a, string & piece, char delim)
 605 {
 606         string tmp;
 607         string::size_type i = a.rfind(delim);
 608         if (i != string::npos) { // delimiter was found
 609                 piece = a.substr(0, i);
 610                 tmp = a.substr(i + 1);
 611         } else { // delimter was not found
 612                 piece.erase();
 613         }
 614         return tmp;
 615 }
 616
 617
 618 // This function escapes 8-bit characters and other problematic
 619 // characters that cause problems in latex labels.
 620 string const escape(string const & lab)
 621 {
 622         char hexdigit[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
 623                               '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
 624         string enc;
 625         for (string::size_type i = 0; i < lab.length(); ++i) {
 626                 unsigned char c= lab[i];
 627                 if (c >= 128 || c == '=' || c == '%') {
 628                         enc += '=';
 629                         enc += hexdigit[c>>4];
 630                         enc += hexdigit[c & 15];
 631                 } else {
 632                         enc += c;
 633                 }
 634         }
 635         return enc;
 636 }
 637
 638
 639 /// gives a vector of stringparts which have the delimiter delim
 640 vector<string> const getVectorFromString(string const & str,
 641                                          string const & delim)
 642 {
 643     vector<string> vec;
 644     if (str.empty())
 645         return vec;
 646     string keys(strip(str));
 647     for(;;) {
 648         string::size_type const idx = keys.find(delim);
 649         if (idx == string::npos) {
 650             vec.push_back(frontStrip(keys));
 651             break;
 652         }
 653         string const key = strip(frontStrip(keys.substr(0, idx)));
 654         if (!key.empty())
 655             vec.push_back(key);
 656         string::size_type const start = idx + delim.size();
 657         keys = keys.substr(start);
 658     }
 659     return vec;
 660 }
 661
 662 // the same vice versa
 663 string const getStringFromVector(vector<string> const & vec,
 664                                  string const & delim)
 665 {
 666         string str;
 667         int i = 0;
 668         for (vector<string>::const_iterator it = vec.begin();
 669              it != vec.end(); ++it) {
 670                 string item = strip(frontStrip(*it));
 671                 if (item.empty()) continue;
 672
 673                 if (i++ > 0) str += delim;
 674                 str += item;
 675         }
 676         return str;
 677 }