src/support/lstrings.C

   1 /**
   2  * \file lstrings.C
   3  * This file is part of LyX, the document processor.
   4  * Licence details can be found in the file COPYING.
   5  *
   6  * \author Lars Gullik Bjønnes
   7  * \author Jean-Marc Lasgouttes
   8  *
   9  * Full author contact details are available in file CREDITS
  10  */
  11
  12 #include <config.h>
  13
  14 #ifdef __GNUG__
  15 #pragma implementation
  16 #endif
  17
  18 #include "LString.h"
  19 #include "lstrings.h"
  20 #include "LAssert.h"
  21 #include "debug.h"
  22
  23 #include <boost/regex.hpp>
  24
  25 #include <algorithm>
  26
  27 #include <cctype>
  28 #include <cstdlib>
  29
  30 using std::count;
  31 using std::transform;
  32 using std::vector;
  33
  34 #ifndef CXX_GLOBAL_CSTD
  35 using std::atof;
  36 using std::isdigit;
  37 using std::strlen;
  38 using std::tolower;
  39 using std::toupper;
  40 #endif
  41
  42
  43 int compare_no_case(string const & s, string const & s2)
  44 {
  45         string::const_iterator p = s.begin();
  46         string::const_iterator p2 = s2.begin();
  47
  48         while (p != s.end() && p2 != s2.end()) {
  49                 int const lc1 = tolower(*p);
  50                 int const lc2 = tolower(*p2);
  51                 if (lc1 != lc2)
  52                         return (lc1 < lc2) ? -1 : 1;
  53                 ++p;
  54                 ++p2;
  55         }
  56
  57         if (s.size() == s2.size())
  58                 return 0;
  59         if (s.size() < s2.size())
  60                 return -1;
  61         return 1;
  62 }
  63
  64
  65 namespace {
  66         int ascii_tolower(int c) {
  67                 if (c >= 'A' && c <= 'Z')
  68                         return c - 'A' + 'a';
  69                 return c;
  70         }
  71 }
  72
  73
  74 int compare_ascii_no_case(string const & s, string const & s2)
  75 {
  76         string::const_iterator p = s.begin();
  77         string::const_iterator p2 = s2.begin();
  78
  79         while (p != s.end() && p2 != s2.end()) {
  80                 int const lc1 = ascii_tolower(*p);
  81                 int const lc2 = ascii_tolower(*p2);
  82                 if (lc1 != lc2)
  83                         return (lc1 < lc2) ? -1 : 1;
  84                 ++p;
  85                 ++p2;
  86         }
  87
  88         if (s.size() == s2.size())
  89                 return 0;
  90         if (s.size() < s2.size())
  91                 return -1;
  92         return 1;
  93 }
  94
  95
  96 int compare_no_case(string const & s, string const & s2, unsigned int len)
  97 {
  98         string::const_iterator p = s.begin();
  99         string::const_iterator p2 = s2.begin();
 100         unsigned int i = 0;
 101         while (i < len && p != s.end() && p2 != s2.end()) {
 102                 int const lc1 = tolower(*p);
 103                 int const lc2 = tolower(*p2);
 104                 if (lc1 != lc2)
 105                         return (lc1 < lc2) ? -1 : 1;
 106                 ++i;
 107                 ++p;
 108                 ++p2;
 109         }
 110
 111         if (s.size() >= len && s2.size() >= len)
 112                 return 0;
 113         if (s.size() < s2.size())
 114                 return -1;
 115         return 1;
 116 }
 117
 118
 119 bool isStrInt(string const & str)
 120 {
 121         if (str.empty()) return false;
 122
 123         // Remove leading and trailing white space chars.
 124         string const tmpstr = trim(str);
 125         if (tmpstr.empty()) return false;
 126
 127         string::const_iterator cit = tmpstr.begin();
 128         if ((*cit) == '-') ++cit;
 129         string::const_iterator end = tmpstr.end();
 130         for (; cit != end; ++cit) {
 131                 if (!isdigit((*cit))) return false;
 132         }
 133         return true;
 134 }
 135
 136
 137 bool isStrUnsignedInt(string const & str)
 138 {
 139         if (str.empty()) return false;
 140
 141         // Remove leading and trailing white space chars.
 142         string const tmpstr = trim(str);
 143         if (tmpstr.empty()) return false;
 144
 145         string::const_iterator cit = tmpstr.begin();
 146         string::const_iterator end = tmpstr.end();
 147         for (; cit != end; ++cit) {
 148                 if (!isdigit((*cit))) return false;
 149         }
 150         return true;
 151 }
 152
 153
 154 int strToInt(string const & str)
 155 {
 156         if (isStrInt(str)) {
 157                 // Remove leading and trailing white space chars.
 158                 string const tmpstr = trim(str);
 159                 // Do the conversion proper.
 160                 return lyx::atoi(tmpstr);
 161         } else {
 162                 return 0;
 163         }
 164 }
 165
 166
 167 unsigned int strToUnsignedInt(string const & str)
 168 {
 169         if (isStrUnsignedInt(str)) {
 170                 // Remove leading and trailing white space chars.
 171                 string const tmpstr = trim(str);
 172                 // Do the conversion proper.
 173                 return lyx::atoi(tmpstr);
 174         } else {
 175                 return 0;
 176         }
 177 }
 178
 179
 180 bool isStrDbl(string const & str)
 181 {
 182         if (str.empty()) return false;
 183
 184         // Remove leading and trailing white space chars.
 185         string const tmpstr = trim(str);
 186         if (tmpstr.empty()) return false;
 187         //      if (1 < tmpstr.count('.')) return false;
 188
 189         string::const_iterator cit = tmpstr.begin();
 190         bool found_dot(false);
 191         if ((*cit) == '-') ++cit;
 192         string::const_iterator end = tmpstr.end();
 193         for (; cit != end; ++cit) {
 194                 if (!isdigit((*cit))
 195                     && '.' != (*cit)) {
 196                         return false;
 197                 }
 198                 if ('.' == (*cit)) {
 199                         if (found_dot) {
 200                                 return false;
 201                         } else {
 202                                 found_dot = true;
 203                         }
 204                 }
 205         }
 206         return true;
 207 }
 208
 209
 210 double strToDbl(string const & str)
 211 {
 212         if (isStrDbl(str)) {
 213                 // Remove leading and trailing white space chars.
 214                 string const tmpstr = trim(str);
 215                 // Do the conversion proper.
 216                 return ::atof(tmpstr.c_str());
 217         } else {
 218                 return 0.0;
 219         }
 220 }
 221
 222
 223 char lowercase(char c)
 224 {
 225         return char(tolower(c));
 226 }
 227
 228
 229 char uppercase(char c)
 230 {
 231         return char(toupper(c));
 232 }
 233
 234
 235 namespace {
 236
 237 // since we cannot use std::tolower and std::toupper directly in the
 238 // calls to std::transform yet, we use these helper clases. (Lgb)
 239
 240 struct local_lowercase {
 241         char operator()(char c) const {
 242                 return tolower(c);
 243         }
 244 };
 245
 246 struct local_uppercase {
 247         char operator()(char c) const {
 248                 return toupper(c);
 249         }
 250 };
 251
 252 struct local_ascii_lowercase {
 253         char operator()(char c) const {
 254                 return ascii_tolower(c);
 255         }
 256 };
 257
 258 } // end of anon namespace
 259
 260 string const lowercase(string const & a)
 261 {
 262         string tmp(a);
 263         transform(tmp.begin(), tmp.end(), tmp.begin(), local_lowercase());
 264         return tmp;
 265 }
 266
 267 string const uppercase(string const & a)
 268 {
 269         string tmp(a);
 270         transform(tmp.begin(), tmp.end(), tmp.begin(), local_uppercase());
 271         return tmp;
 272 }
 273
 274
 275 string const ascii_lowercase(string const & a)
 276 {
 277         string tmp(a);
 278         transform(tmp.begin(), tmp.end(), tmp.begin(),
 279                   local_ascii_lowercase());
 280         return tmp;
 281 }
 282
 283
 284 bool prefixIs(string const & a, char const * pre)
 285 {
 286         lyx::Assert(pre);
 287
 288         size_t const l = strlen(pre);
 289         string::size_type const alen = a.length();
 290
 291         if (l > alen || a.empty())
 292                 return false;
 293         else {
 294 #if !defined(USE_INCLUDED_STRING) && !defined(STD_STRING_IS_GOOD)
 295                 // Delete this code when the compilers get a bit better.
 296                 return ::strncmp(a.c_str(), pre, l) == 0;
 297 #else
 298                 // This is the code that we really want to use
 299                 // but until gcc ships with a basic_string that
 300                 // implements std::string correctly we have to
 301                 // use the code above.
 302                 return a.compare(0, l, pre, l) == 0;
 303 #endif
 304         }
 305 }
 306
 307
 308 bool prefixIs(string const & a, string const & pre)
 309 {
 310         string::size_type const prelen = pre.length();
 311         string::size_type const alen = a.length();
 312
 313         if (prelen > alen || a.empty())
 314                 return false;
 315         else {
 316 #if !defined(USE_INCLUDED_STRING) && !defined(STD_STRING_IS_GOOD)
 317                 return ::strncmp(a.c_str(), pre.c_str(), prelen) == 0;
 318 #else
 319                 return a.compare(0, prelen, pre) == 0;
 320 #endif
 321         }
 322 }
 323
 324
 325 bool suffixIs(string const & a, char c)
 326 {
 327         if (a.empty()) return false;
 328         return a[a.length() - 1] == c;
 329 }
 330
 331
 332 bool suffixIs(string const & a, char const * suf)
 333 {
 334         lyx::Assert(suf);
 335
 336         size_t const suflen = strlen(suf);
 337         string::size_type const alen = a.length();
 338
 339         if (suflen > alen)
 340                 return false;
 341         else {
 342 #if !defined(USE_INCLUDED_STRING) && !defined(STD_STRING_IS_GOOD)
 343                 // Delete this code when the compilers get a bit better.
 344                 string tmp(a, alen - suflen);
 345                 return ::strncmp(tmp.c_str(), suf, suflen) == 0;
 346 #else
 347                 // This is the code that we really want to use
 348                 // but until gcc ships with a basic_string that
 349                 // implements std::string correctly we have to
 350                 // use the code above.
 351                 return a.compare(alen - suflen, suflen, suf) == 0;
 352 #endif
 353         }
 354 }
 355
 356
 357 bool suffixIs(string const & a, string const & suf)
 358 {
 359         string::size_type const suflen = suf.length();
 360         string::size_type const alen = a.length();
 361
 362         if (suflen > alen) {
 363                 return false;
 364         } else {
 365 #if !defined(USE_INCLUDED_STRING) && !defined(STD_STRING_IS_GOOD)
 366                 string tmp(a, alen - suflen);
 367                 return ::strncmp(tmp.c_str(), suf.c_str(), suflen) == 0;
 368 #else
 369                 return a.compare(alen - suflen, suflen, suf) == 0;
 370 #endif
 371         }
 372 }
 373
 374
 375 bool contains(string const & a, string const & b)
 376 {
 377         if (a.empty())
 378                 return false;
 379         return a.find(b) != string::npos;
 380 }
 381
 382
 383 bool contains(string const & a, char b)
 384 {
 385         if (a.empty())
 386                 return false;
 387         return a.find(b) != string::npos;
 388 }
 389
 390
 391 bool containsOnly(string const & s, string const & cset)
 392 {
 393         return s.find_first_not_of(cset) == string::npos;
 394 }
 395
 396
 397 // ale970405+lasgoutt-970425
 398 // rewritten to use new string (Lgb)
 399 string const token(string const & a, char delim, int n)
 400 {
 401         if (a.empty()) return string();
 402
 403         string::size_type k = 0;
 404         string::size_type i = 0;
 405
 406         // Find delimiter or end of string
 407         for (; n--;)
 408                 if ((i = a.find(delim, i)) == string::npos)
 409                         break;
 410                 else
 411                         ++i; // step delim
 412         // i is now the n'th delim (or string::npos)
 413         if (i == string::npos) return string();
 414         k = a.find(delim, i);
 415         // k is now the n'th + 1 delim (or string::npos)
 416
 417         return a.substr(i, k - i);
 418 }
 419
 420
 421 // this could probably be faster and/or cleaner, but it seems to work (JMarc)
 422 // rewritten to use new string (Lgb)
 423 int tokenPos(string const & a, char delim, string const & tok)
 424 {
 425         int i = 0;
 426         string str(a);
 427         string tmptok;
 428
 429         while (!str.empty()) {
 430                 str = split(str, tmptok, delim);
 431                 if (tok == tmptok)
 432                         return i;
 433                 ++i;
 434         }
 435         return -1;
 436 }
 437
 438
 439 bool regexMatch(string const & a, string const & pattern)
 440 {
 441         // We massage the pattern a bit so that the usual
 442         // shell pattern we all are used to will work.
 443         // One nice thing about using a real regex is that
 444         // things like "*.*[^~]" will work also.
 445         // build the regex string.
 446         string regex(pattern);
 447         regex = subst(regex, ".", "\\.");
 448         regex = subst(regex, "*", ".*");
 449         boost::regex reg(regex);
 450         return boost::regex_match(a, reg);
 451 }
 452
 453
 454 string const subst(string const & a, char oldchar, char newchar)
 455 {
 456         string tmp(a);
 457         string::iterator lit = tmp.begin();
 458         string::iterator end = tmp.end();
 459         for (; lit != end; ++lit)
 460                 if ((*lit) == oldchar)
 461                         (*lit) = newchar;
 462         return tmp;
 463 }
 464
 465
 466 string const subst(string const & a,
 467                    char const * oldstr, string const & newstr)
 468 {
 469         lyx::Assert(oldstr);
 470
 471         string lstr(a);
 472         string::size_type i = 0;
 473         string::size_type olen = strlen(oldstr);
 474         while ((i = lstr.find(oldstr, i)) != string::npos) {
 475                 lstr.replace(i, olen, newstr);
 476                 i += newstr.length(); // We need to be sure that we dont
 477                 // use the same i over and over again.
 478         }
 479         return lstr;
 480 }
 481
 482
 483 string const subst(string const & a,
 484                    string const & oldstr, string const & newstr)
 485 {
 486         string lstr(a);
 487         string::size_type i = 0;
 488         string::size_type const olen = oldstr.length();
 489         while ((i = lstr.find(oldstr, i)) != string::npos) {
 490                 lstr.replace(i, olen, newstr);
 491                 i += newstr.length(); // We need to be sure that we dont
 492                 // use the same i over and over again.
 493         }
 494         return lstr;
 495 }
 496
 497
 498 string const trim(string const & a, char const * p)
 499 {
 500         lyx::Assert(p);
 501
 502         if (a.empty() || !*p)
 503                 return a;
 504
 505         string::size_type r = a.find_last_not_of(p);
 506         string::size_type l = a.find_first_not_of(p);
 507
 508         // Is this the minimal test? (lgb)
 509         if (r == string::npos && l == string::npos)
 510                 return string();
 511
 512         return a.substr(l, r - l + 1);
 513 }
 514
 515
 516 string const rtrim(string const & a, char const * p)
 517 {
 518         lyx::Assert(p);
 519
 520         if (a.empty() || !*p)
 521                 return a;
 522
 523         string::size_type r = a.find_last_not_of(p);
 524
 525         // Is this test really needed? (Lgb)
 526         if (r == string::npos)
 527                 return string();
 528
 529         return a.substr(0, r + 1);
 530 }
 531
 532
 533 string const ltrim(string const & a, char const * p)
 534 {
 535         lyx::Assert(p);
 536
 537         if (a.empty() || !*p)
 538                 return a;
 539
 540         string::size_type l = a.find_first_not_of(p);
 541
 542         if (l == string::npos)
 543                 return string();
 544
 545         return a.substr(l, string::npos);
 546 }
 547
 548
 549 string const split(string const & a, string & piece, char delim)
 550 {
 551         string tmp;
 552         string::size_type i = a.find(delim);
 553         if (i == a.length() - 1) {
 554                 piece = a.substr(0, i);
 555         } else if (i != string::npos) {
 556                 piece = a.substr(0, i);
 557                 tmp = a.substr(i + 1);
 558         } else if (i == 0) {
 559                 piece.erase();
 560                 tmp = a.substr(i + 1);
 561         } else {
 562                 piece = a;
 563         }
 564         return tmp;
 565 }
 566
 567
 568 string const split(string const & a, char delim)
 569 {
 570         string tmp;
 571         string::size_type i = a.find(delim);
 572         if (i != string::npos) // found delim
 573                 tmp = a.substr(i + 1);
 574         return tmp;
 575 }
 576
 577
 578 // ale970521
 579 string const rsplit(string const & a, string & piece, char delim)
 580 {
 581         string tmp;
 582         string::size_type i = a.rfind(delim);
 583         if (i != string::npos) { // delimiter was found
 584                 piece = a.substr(0, i);
 585                 tmp = a.substr(i + 1);
 586         } else { // delimter was not found
 587                 piece.erase();
 588         }
 589         return tmp;
 590 }
 591
 592
 593 // This function escapes 8-bit characters and other problematic
 594 // characters that cause problems in latex labels.
 595 string const escape(string const & lab)
 596 {
 597         char hexdigit[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
 598                               '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
 599         string enc;
 600         for (string::size_type i = 0; i < lab.length(); ++i) {
 601                 unsigned char c= lab[i];
 602                 if (c >= 128 || c == '=' || c == '%') {
 603                         enc += '=';
 604                         enc += hexdigit[c>>4];
 605                         enc += hexdigit[c & 15];
 606                 } else {
 607                         enc += c;
 608                 }
 609         }
 610         return enc;
 611 }
 612
 613
 614 /// gives a vector of stringparts which have the delimiter delim
 615 vector<string> const getVectorFromString(string const & str,
 616                                          string const & delim)
 617 {
 618     vector<string> vec;
 619     if (str.empty())
 620         return vec;
 621     string keys(rtrim(str));
 622     for(;;) {
 623         string::size_type const idx = keys.find(delim);
 624         if (idx == string::npos) {
 625             vec.push_back(ltrim(keys));
 626             break;
 627         }
 628         string const key = trim(keys.substr(0, idx));
 629         if (!key.empty())
 630             vec.push_back(key);
 631         string::size_type const start = idx + delim.size();
 632         keys = keys.substr(start);
 633     }
 634     return vec;
 635 }
 636
 637 // the same vice versa
 638 string const getStringFromVector(vector<string> const & vec,
 639                                  string const & delim)
 640 {
 641         string str;
 642         int i = 0;
 643         for (vector<string>::const_iterator it = vec.begin();
 644              it != vec.end(); ++it) {
 645                 string item = trim(*it);
 646                 if (item.empty()) continue;
 647
 648                 if (i++ > 0) str += delim;
 649                 str += item;
 650         }
 651         return str;
 652 }