src/support/lstrings.C

   1 /**
   2  * \file lstrings.C
   3  * This file is part of LyX, the document processor.
   4  * Licence details can be found in the file COPYING.
   5  *
   6  * \author Lars Gullik Bjønnes
   7  * \author Jean-Marc Lasgouttes
   8  *
   9  * Full author contact details are available in file CREDITS
  10  */
  11
  12 #include <config.h>
  13
  14 #include "LString.h"
  15 #include "lstrings.h"
  16 #include "LAssert.h"
  17 #include "debug.h"
  18 #include "BoostFormat.h"
  19
  20 #include <boost/regex.hpp>
  21 #include <boost/tokenizer.hpp>
  22
  23 #include <algorithm>
  24
  25 #include <cctype>
  26 #include <cstdlib>
  27
  28 using std::count;
  29 using std::transform;
  30 using std::vector;
  31
  32 #ifndef CXX_GLOBAL_CSTD
  33 using std::atof;
  34 using std::isdigit;
  35 using std::strlen;
  36 using std::tolower;
  37 using std::toupper;
  38 #endif
  39
  40
  41 int compare_no_case(string const & s, string const & s2)
  42 {
  43         string::const_iterator p = s.begin();
  44         string::const_iterator p2 = s2.begin();
  45
  46         while (p != s.end() && p2 != s2.end()) {
  47                 int const lc1 = tolower(*p);
  48                 int const lc2 = tolower(*p2);
  49                 if (lc1 != lc2)
  50                         return (lc1 < lc2) ? -1 : 1;
  51                 ++p;
  52                 ++p2;
  53         }
  54
  55         if (s.size() == s2.size())
  56                 return 0;
  57         if (s.size() < s2.size())
  58                 return -1;
  59         return 1;
  60 }
  61
  62
  63 namespace {
  64         int ascii_tolower(int c) {
  65                 if (c >= 'A' && c <= 'Z')
  66                         return c - 'A' + 'a';
  67                 return c;
  68         }
  69 }
  70
  71
  72 int compare_ascii_no_case(string const & s, string const & s2)
  73 {
  74         string::const_iterator p = s.begin();
  75         string::const_iterator p2 = s2.begin();
  76
  77         while (p != s.end() && p2 != s2.end()) {
  78                 int const lc1 = ascii_tolower(*p);
  79                 int const lc2 = ascii_tolower(*p2);
  80                 if (lc1 != lc2)
  81                         return (lc1 < lc2) ? -1 : 1;
  82                 ++p;
  83                 ++p2;
  84         }
  85
  86         if (s.size() == s2.size())
  87                 return 0;
  88         if (s.size() < s2.size())
  89                 return -1;
  90         return 1;
  91 }
  92
  93
  94 int compare_no_case(string const & s, string const & s2, unsigned int len)
  95 {
  96         string::const_iterator p = s.begin();
  97         string::const_iterator p2 = s2.begin();
  98         unsigned int i = 0;
  99         while (i < len && p != s.end() && p2 != s2.end()) {
 100                 int const lc1 = tolower(*p);
 101                 int const lc2 = tolower(*p2);
 102                 if (lc1 != lc2)
 103                         return (lc1 < lc2) ? -1 : 1;
 104                 ++i;
 105                 ++p;
 106                 ++p2;
 107         }
 108
 109         if (s.size() >= len && s2.size() >= len)
 110                 return 0;
 111         if (s.size() < s2.size())
 112                 return -1;
 113         return 1;
 114 }
 115
 116
 117 bool isStrInt(string const & str)
 118 {
 119         if (str.empty()) return false;
 120
 121         // Remove leading and trailing white space chars.
 122         string const tmpstr = trim(str);
 123         if (tmpstr.empty()) return false;
 124
 125         string::const_iterator cit = tmpstr.begin();
 126         if ((*cit) == '-') ++cit;
 127         string::const_iterator end = tmpstr.end();
 128         for (; cit != end; ++cit) {
 129                 if (!isdigit((*cit))) return false;
 130         }
 131         return true;
 132 }
 133
 134
 135 bool isStrUnsignedInt(string const & str)
 136 {
 137         if (str.empty()) return false;
 138
 139         // Remove leading and trailing white space chars.
 140         string const tmpstr = trim(str);
 141         if (tmpstr.empty()) return false;
 142
 143         string::const_iterator cit = tmpstr.begin();
 144         string::const_iterator end = tmpstr.end();
 145         for (; cit != end; ++cit) {
 146                 if (!isdigit((*cit))) return false;
 147         }
 148         return true;
 149 }
 150
 151
 152 int strToInt(string const & str)
 153 {
 154         if (isStrInt(str)) {
 155                 // Remove leading and trailing white space chars.
 156                 string const tmpstr = trim(str);
 157                 // Do the conversion proper.
 158                 return lyx::atoi(tmpstr);
 159         } else {
 160                 return 0;
 161         }
 162 }
 163
 164
 165 unsigned int strToUnsignedInt(string const & str)
 166 {
 167         if (isStrUnsignedInt(str)) {
 168                 // Remove leading and trailing white space chars.
 169                 string const tmpstr = trim(str);
 170                 // Do the conversion proper.
 171                 return lyx::atoi(tmpstr);
 172         } else {
 173                 return 0;
 174         }
 175 }
 176
 177
 178 bool isStrDbl(string const & str)
 179 {
 180         if (str.empty()) return false;
 181
 182         // Remove leading and trailing white space chars.
 183         string const tmpstr = trim(str);
 184         if (tmpstr.empty()) return false;
 185         //      if (1 < tmpstr.count('.')) return false;
 186
 187         string::const_iterator cit = tmpstr.begin();
 188         bool found_dot(false);
 189         if ((*cit) == '-') ++cit;
 190         string::const_iterator end = tmpstr.end();
 191         for (; cit != end; ++cit) {
 192                 if (!isdigit((*cit))
 193                     && '.' != (*cit)) {
 194                         return false;
 195                 }
 196                 if ('.' == (*cit)) {
 197                         if (found_dot) {
 198                                 return false;
 199                         } else {
 200                                 found_dot = true;
 201                         }
 202                 }
 203         }
 204         return true;
 205 }
 206
 207
 208 double strToDbl(string const & str)
 209 {
 210         if (isStrDbl(str)) {
 211                 // Remove leading and trailing white space chars.
 212                 string const tmpstr = trim(str);
 213                 // Do the conversion proper.
 214                 return ::atof(tmpstr.c_str());
 215         } else {
 216                 return 0.0;
 217         }
 218 }
 219
 220
 221 char lowercase(char c)
 222 {
 223         return char(tolower(c));
 224 }
 225
 226
 227 char uppercase(char c)
 228 {
 229         return char(toupper(c));
 230 }
 231
 232
 233 namespace {
 234
 235 // since we cannot use std::tolower and std::toupper directly in the
 236 // calls to std::transform yet, we use these helper clases. (Lgb)
 237
 238 struct local_lowercase {
 239         char operator()(char c) const {
 240                 return tolower(c);
 241         }
 242 };
 243
 244 struct local_uppercase {
 245         char operator()(char c) const {
 246                 return toupper(c);
 247         }
 248 };
 249
 250 struct local_ascii_lowercase {
 251         char operator()(char c) const {
 252                 return ascii_tolower(c);
 253         }
 254 };
 255
 256 } // end of anon namespace
 257
 258 string const lowercase(string const & a)
 259 {
 260         string tmp(a);
 261         transform(tmp.begin(), tmp.end(), tmp.begin(), local_lowercase());
 262         return tmp;
 263 }
 264
 265 string const uppercase(string const & a)
 266 {
 267         string tmp(a);
 268         transform(tmp.begin(), tmp.end(), tmp.begin(), local_uppercase());
 269         return tmp;
 270 }
 271
 272
 273 string const ascii_lowercase(string const & a)
 274 {
 275         string tmp(a);
 276         transform(tmp.begin(), tmp.end(), tmp.begin(),
 277                   local_ascii_lowercase());
 278         return tmp;
 279 }
 280
 281
 282 bool prefixIs(string const & a, char const * pre)
 283 {
 284         lyx::Assert(pre);
 285
 286         size_t const l = strlen(pre);
 287         string::size_type const alen = a.length();
 288
 289         if (l > alen || a.empty())
 290                 return false;
 291         else {
 292 #if !defined(USE_INCLUDED_STRING) && !defined(STD_STRING_IS_GOOD)
 293                 // Delete this code when the compilers get a bit better.
 294                 return ::strncmp(a.c_str(), pre, l) == 0;
 295 #else
 296                 // This is the code that we really want to use
 297                 // but until gcc ships with a basic_string that
 298                 // implements std::string correctly we have to
 299                 // use the code above.
 300                 return a.compare(0, l, pre, l) == 0;
 301 #endif
 302         }
 303 }
 304
 305
 306 bool prefixIs(string const & a, string const & pre)
 307 {
 308         string::size_type const prelen = pre.length();
 309         string::size_type const alen = a.length();
 310
 311         if (prelen > alen || a.empty())
 312                 return false;
 313         else {
 314 #if !defined(USE_INCLUDED_STRING) && !defined(STD_STRING_IS_GOOD)
 315                 return ::strncmp(a.c_str(), pre.c_str(), prelen) == 0;
 316 #else
 317                 return a.compare(0, prelen, pre) == 0;
 318 #endif
 319         }
 320 }
 321
 322
 323 bool suffixIs(string const & a, char c)
 324 {
 325         if (a.empty()) return false;
 326         return a[a.length() - 1] == c;
 327 }
 328
 329
 330 bool suffixIs(string const & a, char const * suf)
 331 {
 332         lyx::Assert(suf);
 333
 334         size_t const suflen = strlen(suf);
 335         string::size_type const alen = a.length();
 336
 337         if (suflen > alen)
 338                 return false;
 339         else {
 340 #if !defined(USE_INCLUDED_STRING) && !defined(STD_STRING_IS_GOOD)
 341                 // Delete this code when the compilers get a bit better.
 342                 string tmp(a, alen - suflen);
 343                 return ::strncmp(tmp.c_str(), suf, suflen) == 0;
 344 #else
 345                 // This is the code that we really want to use
 346                 // but until gcc ships with a basic_string that
 347                 // implements std::string correctly we have to
 348                 // use the code above.
 349                 return a.compare(alen - suflen, suflen, suf) == 0;
 350 #endif
 351         }
 352 }
 353
 354
 355 bool suffixIs(string const & a, string const & suf)
 356 {
 357         string::size_type const suflen = suf.length();
 358         string::size_type const alen = a.length();
 359
 360         if (suflen > alen) {
 361                 return false;
 362         } else {
 363 #if !defined(USE_INCLUDED_STRING) && !defined(STD_STRING_IS_GOOD)
 364                 string tmp(a, alen - suflen);
 365                 return ::strncmp(tmp.c_str(), suf.c_str(), suflen) == 0;
 366 #else
 367                 return a.compare(alen - suflen, suflen, suf) == 0;
 368 #endif
 369         }
 370 }
 371
 372
 373 bool contains(string const & a, string const & b)
 374 {
 375         if (a.empty())
 376                 return false;
 377         return a.find(b) != string::npos;
 378 }
 379
 380
 381 bool contains(string const & a, char b)
 382 {
 383         if (a.empty())
 384                 return false;
 385         return a.find(b) != string::npos;
 386 }
 387
 388
 389 bool containsOnly(string const & s, string const & cset)
 390 {
 391         return s.find_first_not_of(cset) == string::npos;
 392 }
 393
 394
 395 // ale970405+lasgoutt-970425
 396 // rewritten to use new string (Lgb)
 397 string const token(string const & a, char delim, int n)
 398 {
 399         if (a.empty()) return string();
 400
 401         string::size_type k = 0;
 402         string::size_type i = 0;
 403
 404         // Find delimiter or end of string
 405         for (; n--;)
 406                 if ((i = a.find(delim, i)) == string::npos)
 407                         break;
 408                 else
 409                         ++i; // step delim
 410         // i is now the n'th delim (or string::npos)
 411         if (i == string::npos) return string();
 412         k = a.find(delim, i);
 413         // k is now the n'th + 1 delim (or string::npos)
 414
 415         return a.substr(i, k - i);
 416 }
 417
 418
 419 // this could probably be faster and/or cleaner, but it seems to work (JMarc)
 420 // rewritten to use new string (Lgb)
 421 int tokenPos(string const & a, char delim, string const & tok)
 422 {
 423         int i = 0;
 424         string str(a);
 425         string tmptok;
 426
 427         while (!str.empty()) {
 428                 str = split(str, tmptok, delim);
 429                 if (tok == tmptok)
 430                         return i;
 431                 ++i;
 432         }
 433         return -1;
 434 }
 435
 436
 437 bool regexMatch(string const & a, string const & pattern)
 438 {
 439         // We massage the pattern a bit so that the usual
 440         // shell pattern we all are used to will work.
 441         // One nice thing about using a real regex is that
 442         // things like "*.*[^~]" will work also.
 443         // build the regex string.
 444         string regex(pattern);
 445         regex = subst(regex, ".", "\\.");
 446         regex = subst(regex, "*", ".*");
 447         boost::regex reg(STRCONV(regex));
 448         return boost::regex_match(STRCONV(a), reg);
 449 }
 450
 451
 452 string const subst(string const & a, char oldchar, char newchar)
 453 {
 454         string tmp(a);
 455         string::iterator lit = tmp.begin();
 456         string::iterator end = tmp.end();
 457         for (; lit != end; ++lit)
 458                 if ((*lit) == oldchar)
 459                         (*lit) = newchar;
 460         return tmp;
 461 }
 462
 463
 464 string const subst(string const & a,
 465                    char const * oldstr, string const & newstr)
 466 {
 467         lyx::Assert(oldstr);
 468
 469         string lstr(a);
 470         string::size_type i = 0;
 471         string::size_type olen = strlen(oldstr);
 472         while ((i = lstr.find(oldstr, i)) != string::npos) {
 473                 lstr.replace(i, olen, newstr);
 474                 i += newstr.length(); // We need to be sure that we dont
 475                 // use the same i over and over again.
 476         }
 477         return lstr;
 478 }
 479
 480
 481 string const subst(string const & a,
 482                    string const & oldstr, string const & newstr)
 483 {
 484         string lstr(a);
 485         string::size_type i = 0;
 486         string::size_type const olen = oldstr.length();
 487         while ((i = lstr.find(oldstr, i)) != string::npos) {
 488                 lstr.replace(i, olen, newstr);
 489                 i += newstr.length(); // We need to be sure that we dont
 490                 // use the same i over and over again.
 491         }
 492         return lstr;
 493 }
 494
 495
 496 string const trim(string const & a, char const * p)
 497 {
 498         lyx::Assert(p);
 499
 500         if (a.empty() || !*p)
 501                 return a;
 502
 503         string::size_type r = a.find_last_not_of(p);
 504         string::size_type l = a.find_first_not_of(p);
 505
 506         // Is this the minimal test? (lgb)
 507         if (r == string::npos && l == string::npos)
 508                 return string();
 509
 510         return a.substr(l, r - l + 1);
 511 }
 512
 513
 514 string const rtrim(string const & a, char const * p)
 515 {
 516         lyx::Assert(p);
 517
 518         if (a.empty() || !*p)
 519                 return a;
 520
 521         string::size_type r = a.find_last_not_of(p);
 522
 523         // Is this test really needed? (Lgb)
 524         if (r == string::npos)
 525                 return string();
 526
 527         return a.substr(0, r + 1);
 528 }
 529
 530
 531 string const ltrim(string const & a, char const * p)
 532 {
 533         lyx::Assert(p);
 534
 535         if (a.empty() || !*p)
 536                 return a;
 537
 538         string::size_type l = a.find_first_not_of(p);
 539
 540         if (l == string::npos)
 541                 return string();
 542
 543         return a.substr(l, string::npos);
 544 }
 545
 546
 547 string const split(string const & a, string & piece, char delim)
 548 {
 549         string tmp;
 550         string::size_type i = a.find(delim);
 551         if (i == a.length() - 1) {
 552                 piece = a.substr(0, i);
 553         } else if (i != string::npos) {
 554                 piece = a.substr(0, i);
 555                 tmp = a.substr(i + 1);
 556         } else if (i == 0) {
 557                 piece.erase();
 558                 tmp = a.substr(i + 1);
 559         } else {
 560                 piece = a;
 561         }
 562         return tmp;
 563 }
 564
 565
 566 string const split(string const & a, char delim)
 567 {
 568         string tmp;
 569         string::size_type i = a.find(delim);
 570         if (i != string::npos) // found delim
 571                 tmp = a.substr(i + 1);
 572         return tmp;
 573 }
 574
 575
 576 // ale970521
 577 string const rsplit(string const & a, string & piece, char delim)
 578 {
 579         string tmp;
 580         string::size_type i = a.rfind(delim);
 581         if (i != string::npos) { // delimiter was found
 582                 piece = a.substr(0, i);
 583                 tmp = a.substr(i + 1);
 584         } else { // delimter was not found
 585                 piece.erase();
 586         }
 587         return tmp;
 588 }
 589
 590
 591 // This function escapes 8-bit characters and other problematic
 592 // characters that cause problems in latex labels.
 593 string const escape(string const & lab)
 594 {
 595         char hexdigit[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
 596                               '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
 597         string enc;
 598         for (string::size_type i = 0; i < lab.length(); ++i) {
 599                 unsigned char c= lab[i];
 600                 if (c >= 128 || c == '=' || c == '%') {
 601                         enc += '=';
 602                         enc += hexdigit[c>>4];
 603                         enc += hexdigit[c & 15];
 604                 } else {
 605                         enc += c;
 606                 }
 607         }
 608         return enc;
 609 }
 610
 611
 612 /// gives a vector of stringparts which have the delimiter delim
 613 vector<string> const getVectorFromString(string const & str,
 614                                          string const & delim)
 615 {
 616 // Lars would like this code to go, but for now his replacement (below)
 617 // doesn't fullfil the same function. I have, therefore, reactivated the
 618 // old code for now. Angus 11 Nov 2002.
 619 #if 1
 620         vector<string> vec;
 621         if (str.empty())
 622                 return vec;
 623         string keys(rtrim(str));
 624         for(;;) {
 625                 string::size_type const idx = keys.find(delim);
 626                 if (idx == string::npos) {
 627                         vec.push_back(ltrim(keys));
 628                         break;
 629                 }
 630                 string const key = trim(keys.substr(0, idx));
 631                 if (!key.empty())
 632                         vec.push_back(key);
 633                 string::size_type const start = idx + delim.size();
 634                 keys = keys.substr(start);
 635         }
 636         return vec;
 637 #else
 638         boost::char_separator<char> sep(delim.c_str());
 639         boost::tokenizer<boost::char_separator<char> > tokens(str, sep);
 640 #ifndef USE_INCLUDED_STRING
 641         return vector<string>(tokens.begin(), tokens.end());
 642 #else
 643         vector<string> vec;
 644         using boost::tokenizer;
 645         using boost::char_separator;
 646
 647         tokenizer<char_separator<char> >::iterator it = tokens.begin();
 648         tokenizer<char_separator<char> >::iterator end = tokens.end();
 649         for (; it != end; ++it) {
 650                 vec.push_back(STRCONV((*it)));
 651         }
 652         return vec;
 653 #endif
 654 #endif
 655 }
 656
 657
 658 // the same vice versa
 659 string const getStringFromVector(vector<string> const & vec,
 660                                  string const & delim)
 661 {
 662         string str;
 663         int i = 0;
 664         for (vector<string>::const_iterator it = vec.begin();
 665              it != vec.end(); ++it) {
 666                 string item = trim(*it);
 667                 if (item.empty()) continue;
 668
 669                 if (i++ > 0) str += delim;
 670                 str += item;
 671         }
 672         return str;
 673 }
 674
 675
 676 #if USE_BOOST_FORMAT
 677
 678 string bformat(char const * fmt, string const & arg1)
 679 {
 680         return STRCONV((boost::format(fmt) % STRCONV(arg1)).str());
 681 }
 682
 683
 684 string bformat(char const * fmt, string const & arg1, string const & arg2)
 685 {
 686         return STRCONV((boost::format(fmt) % STRCONV(arg1) % STRCONV(arg2)).str());
 687 }
 688
 689 string bformat(char const * fmt, string const & arg1, string const & arg2,
 690         string const & arg3, string const & arg4)
 691 {
 692         return STRCONV((boost::format(fmt) % STRCONV(arg1) % STRCONV(arg2)
 693                 % STRCONV(arg3) % STRCONV(arg4)).str());
 694 }
 695
 696 #else
 697
 698 string bformat(char const * fmt, string const & arg1)
 699 {
 700         Assert(contains(fmt, "%1$s"));
 701         string const str = subst(fmt, "%1$s", arg1);
 702         return subst(str, "%%", "%");
 703 }
 704
 705
 706 string bformat(char const * fmt, string const & arg1, string const & arg2)
 707 {
 708         Assert(contains(fmt, "%1$s"));
 709         Assert(contains(fmt, "%2$s"));
 710         string str = subst(fmt, "%1$s", arg1);
 711         str = subst(str, "%2$s", arg2);
 712         return subst(str, "%%", "%");
 713 }
 714
 715
 716 string bformat(char const * fmt, string const & arg1, string const & arg2,
 717         string const & arg3, string const & arg4)
 718 {
 719         Assert(contains(fmt, "%1$s"));
 720         Assert(contains(fmt, "%2$s"));
 721         Assert(contains(fmt, "%3$s"));
 722         Assert(contains(fmt, "%4$s"));
 723         string str = subst(fmt, "%1$s", arg1);
 724         str = subst(str, "%2$s", arg2);
 725         str = subst(str, "%3$s", arg3);
 726         str = subst(str, "%4$s", arg4);
 727         return subst(str, "%%", "%");
 728 }
 729
 730 #endif