src/support/lstrings.C

   1 /**
   2  * \file lstrings.C
   3  * This file is part of LyX, the document processor.
   4  * Licence details can be found in the file COPYING.
   5  *
   6  * \author Lars Gullik Bjønnes
   7  * \author Jean-Marc Lasgouttes
   8  *
   9  * Full author contact details are available in file CREDITS
  10  */
  11
  12 #include <config.h>
  13
  14 #include "LString.h"
  15 #include "lstrings.h"
  16 #include "LAssert.h"
  17 #include "Lsstream.h"
  18 #include "debug.h"
  19 #include "BoostFormat.h"
  20
  21 #include <boost/regex.hpp>
  22 #include <boost/tokenizer.hpp>
  23
  24 #include <algorithm>
  25
  26 #include <cctype>
  27 #include <cstdlib>
  28
  29 using std::count;
  30 using std::transform;
  31 using std::vector;
  32
  33 #ifndef CXX_GLOBAL_CSTD
  34 using std::atof;
  35 using std::isdigit;
  36 using std::strlen;
  37 using std::tolower;
  38 using std::toupper;
  39 #endif
  40
  41
  42 int compare_no_case(string const & s, string const & s2)
  43 {
  44         string::const_iterator p = s.begin();
  45         string::const_iterator p2 = s2.begin();
  46
  47         while (p != s.end() && p2 != s2.end()) {
  48                 int const lc1 = tolower(*p);
  49                 int const lc2 = tolower(*p2);
  50                 if (lc1 != lc2)
  51                         return (lc1 < lc2) ? -1 : 1;
  52                 ++p;
  53                 ++p2;
  54         }
  55
  56         if (s.size() == s2.size())
  57                 return 0;
  58         if (s.size() < s2.size())
  59                 return -1;
  60         return 1;
  61 }
  62
  63
  64 namespace {
  65         int ascii_tolower(int c) {
  66                 if (c >= 'A' && c <= 'Z')
  67                         return c - 'A' + 'a';
  68                 return c;
  69         }
  70 }
  71
  72
  73 int compare_ascii_no_case(string const & s, string const & s2)
  74 {
  75         string::const_iterator p = s.begin();
  76         string::const_iterator p2 = s2.begin();
  77
  78         while (p != s.end() && p2 != s2.end()) {
  79                 int const lc1 = ascii_tolower(*p);
  80                 int const lc2 = ascii_tolower(*p2);
  81                 if (lc1 != lc2)
  82                         return (lc1 < lc2) ? -1 : 1;
  83                 ++p;
  84                 ++p2;
  85         }
  86
  87         if (s.size() == s2.size())
  88                 return 0;
  89         if (s.size() < s2.size())
  90                 return -1;
  91         return 1;
  92 }
  93
  94
  95 int compare_no_case(string const & s, string const & s2, unsigned int len)
  96 {
  97         string::const_iterator p = s.begin();
  98         string::const_iterator p2 = s2.begin();
  99         unsigned int i = 0;
 100         while (i < len && p != s.end() && p2 != s2.end()) {
 101                 int const lc1 = tolower(*p);
 102                 int const lc2 = tolower(*p2);
 103                 if (lc1 != lc2)
 104                         return (lc1 < lc2) ? -1 : 1;
 105                 ++i;
 106                 ++p;
 107                 ++p2;
 108         }
 109
 110         if (s.size() >= len && s2.size() >= len)
 111                 return 0;
 112         if (s.size() < s2.size())
 113                 return -1;
 114         return 1;
 115 }
 116
 117
 118 bool isStrInt(string const & str)
 119 {
 120         if (str.empty()) return false;
 121
 122         // Remove leading and trailing white space chars.
 123         string const tmpstr = trim(str);
 124         if (tmpstr.empty()) return false;
 125
 126         string::const_iterator cit = tmpstr.begin();
 127         if ((*cit) == '-') ++cit;
 128         string::const_iterator end = tmpstr.end();
 129         for (; cit != end; ++cit) {
 130                 if (!isdigit((*cit))) return false;
 131         }
 132         return true;
 133 }
 134
 135
 136 bool isStrUnsignedInt(string const & str)
 137 {
 138         if (str.empty()) return false;
 139
 140         // Remove leading and trailing white space chars.
 141         string const tmpstr = trim(str);
 142         if (tmpstr.empty()) return false;
 143
 144         string::const_iterator cit = tmpstr.begin();
 145         string::const_iterator end = tmpstr.end();
 146         for (; cit != end; ++cit) {
 147                 if (!isdigit((*cit))) return false;
 148         }
 149         return true;
 150 }
 151
 152
 153 int strToInt(string const & str)
 154 {
 155         if (isStrInt(str)) {
 156                 // Remove leading and trailing white space chars.
 157                 string const tmpstr = trim(str);
 158                 // Do the conversion proper.
 159                 return lyx::atoi(tmpstr);
 160         } else {
 161                 return 0;
 162         }
 163 }
 164
 165
 166 unsigned int strToUnsignedInt(string const & str)
 167 {
 168         if (isStrUnsignedInt(str)) {
 169                 // Remove leading and trailing white space chars.
 170                 string const tmpstr = trim(str);
 171                 // Do the conversion proper.
 172                 return lyx::atoi(tmpstr);
 173         } else {
 174                 return 0;
 175         }
 176 }
 177
 178
 179 bool isStrDbl(string const & str)
 180 {
 181         if (str.empty()) return false;
 182
 183         // Remove leading and trailing white space chars.
 184         string const tmpstr = trim(str);
 185         if (tmpstr.empty()) return false;
 186         //      if (1 < tmpstr.count('.')) return false;
 187
 188         string::const_iterator cit = tmpstr.begin();
 189         bool found_dot(false);
 190         if ((*cit) == '-') ++cit;
 191         string::const_iterator end = tmpstr.end();
 192         for (; cit != end; ++cit) {
 193                 if (!isdigit((*cit))
 194                     && '.' != (*cit)) {
 195                         return false;
 196                 }
 197                 if ('.' == (*cit)) {
 198                         if (found_dot) {
 199                                 return false;
 200                         } else {
 201                                 found_dot = true;
 202                         }
 203                 }
 204         }
 205         return true;
 206 }
 207
 208
 209 double strToDbl(string const & str)
 210 {
 211         if (isStrDbl(str)) {
 212                 // Remove leading and trailing white space chars.
 213                 string const tmpstr = trim(str);
 214                 // Do the conversion proper.
 215                 return ::atof(tmpstr.c_str());
 216         } else {
 217                 return 0.0;
 218         }
 219 }
 220
 221
 222 char lowercase(char c)
 223 {
 224         return char(tolower(c));
 225 }
 226
 227
 228 char uppercase(char c)
 229 {
 230         return char(toupper(c));
 231 }
 232
 233
 234 namespace {
 235
 236 // since we cannot use std::tolower and std::toupper directly in the
 237 // calls to std::transform yet, we use these helper clases. (Lgb)
 238
 239 struct local_lowercase {
 240         char operator()(char c) const {
 241                 return tolower(c);
 242         }
 243 };
 244
 245 struct local_uppercase {
 246         char operator()(char c) const {
 247                 return toupper(c);
 248         }
 249 };
 250
 251 struct local_ascii_lowercase {
 252         char operator()(char c) const {
 253                 return ascii_tolower(c);
 254         }
 255 };
 256
 257 } // end of anon namespace
 258
 259 string const lowercase(string const & a)
 260 {
 261         string tmp(a);
 262         transform(tmp.begin(), tmp.end(), tmp.begin(), local_lowercase());
 263         return tmp;
 264 }
 265
 266 string const uppercase(string const & a)
 267 {
 268         string tmp(a);
 269         transform(tmp.begin(), tmp.end(), tmp.begin(), local_uppercase());
 270         return tmp;
 271 }
 272
 273
 274 string const ascii_lowercase(string const & a)
 275 {
 276         string tmp(a);
 277         transform(tmp.begin(), tmp.end(), tmp.begin(),
 278                   local_ascii_lowercase());
 279         return tmp;
 280 }
 281
 282
 283 bool prefixIs(string const & a, char const * pre)
 284 {
 285         lyx::Assert(pre);
 286
 287         size_t const l = strlen(pre);
 288         string::size_type const alen = a.length();
 289
 290         if (l > alen || a.empty())
 291                 return false;
 292         else {
 293 #if !defined(USE_INCLUDED_STRING) && !defined(STD_STRING_IS_GOOD)
 294                 // Delete this code when the compilers get a bit better.
 295                 return ::strncmp(a.c_str(), pre, l) == 0;
 296 #else
 297                 // This is the code that we really want to use
 298                 // but until gcc ships with a basic_string that
 299                 // implements std::string correctly we have to
 300                 // use the code above.
 301                 return a.compare(0, l, pre, l) == 0;
 302 #endif
 303         }
 304 }
 305
 306
 307 bool prefixIs(string const & a, string const & pre)
 308 {
 309         string::size_type const prelen = pre.length();
 310         string::size_type const alen = a.length();
 311
 312         if (prelen > alen || a.empty())
 313                 return false;
 314         else {
 315 #if !defined(USE_INCLUDED_STRING) && !defined(STD_STRING_IS_GOOD)
 316                 return ::strncmp(a.c_str(), pre.c_str(), prelen) == 0;
 317 #else
 318                 return a.compare(0, prelen, pre) == 0;
 319 #endif
 320         }
 321 }
 322
 323
 324 bool suffixIs(string const & a, char c)
 325 {
 326         if (a.empty()) return false;
 327         return a[a.length() - 1] == c;
 328 }
 329
 330
 331 bool suffixIs(string const & a, char const * suf)
 332 {
 333         lyx::Assert(suf);
 334
 335         size_t const suflen = strlen(suf);
 336         string::size_type const alen = a.length();
 337
 338         if (suflen > alen)
 339                 return false;
 340         else {
 341 #if !defined(USE_INCLUDED_STRING) && !defined(STD_STRING_IS_GOOD)
 342                 // Delete this code when the compilers get a bit better.
 343                 string tmp(a, alen - suflen);
 344                 return ::strncmp(tmp.c_str(), suf, suflen) == 0;
 345 #else
 346                 // This is the code that we really want to use
 347                 // but until gcc ships with a basic_string that
 348                 // implements std::string correctly we have to
 349                 // use the code above.
 350                 return a.compare(alen - suflen, suflen, suf) == 0;
 351 #endif
 352         }
 353 }
 354
 355
 356 bool suffixIs(string const & a, string const & suf)
 357 {
 358         string::size_type const suflen = suf.length();
 359         string::size_type const alen = a.length();
 360
 361         if (suflen > alen) {
 362                 return false;
 363         } else {
 364 #if !defined(USE_INCLUDED_STRING) && !defined(STD_STRING_IS_GOOD)
 365                 string tmp(a, alen - suflen);
 366                 return ::strncmp(tmp.c_str(), suf.c_str(), suflen) == 0;
 367 #else
 368                 return a.compare(alen - suflen, suflen, suf) == 0;
 369 #endif
 370         }
 371 }
 372
 373
 374 bool contains(string const & a, string const & b)
 375 {
 376         if (a.empty())
 377                 return false;
 378         return a.find(b) != string::npos;
 379 }
 380
 381
 382 bool contains(string const & a, char b)
 383 {
 384         if (a.empty())
 385                 return false;
 386         return a.find(b) != string::npos;
 387 }
 388
 389
 390 bool containsOnly(string const & s, string const & cset)
 391 {
 392         return s.find_first_not_of(cset) == string::npos;
 393 }
 394
 395
 396 // ale970405+lasgoutt-970425
 397 // rewritten to use new string (Lgb)
 398 string const token(string const & a, char delim, int n)
 399 {
 400         if (a.empty()) return string();
 401
 402         string::size_type k = 0;
 403         string::size_type i = 0;
 404
 405         // Find delimiter or end of string
 406         for (; n--;)
 407                 if ((i = a.find(delim, i)) == string::npos)
 408                         break;
 409                 else
 410                         ++i; // step delim
 411         // i is now the n'th delim (or string::npos)
 412         if (i == string::npos) return string();
 413         k = a.find(delim, i);
 414         // k is now the n'th + 1 delim (or string::npos)
 415
 416         return a.substr(i, k - i);
 417 }
 418
 419
 420 // this could probably be faster and/or cleaner, but it seems to work (JMarc)
 421 // rewritten to use new string (Lgb)
 422 int tokenPos(string const & a, char delim, string const & tok)
 423 {
 424         int i = 0;
 425         string str(a);
 426         string tmptok;
 427
 428         while (!str.empty()) {
 429                 str = split(str, tmptok, delim);
 430                 if (tok == tmptok)
 431                         return i;
 432                 ++i;
 433         }
 434         return -1;
 435 }
 436
 437
 438 bool regexMatch(string const & a, string const & pattern)
 439 {
 440         // We massage the pattern a bit so that the usual
 441         // shell pattern we all are used to will work.
 442         // One nice thing about using a real regex is that
 443         // things like "*.*[^~]" will work also.
 444         // build the regex string.
 445         string regex(pattern);
 446         regex = subst(regex, ".", "\\.");
 447         regex = subst(regex, "*", ".*");
 448         boost::regex reg(STRCONV(regex));
 449         return boost::regex_match(STRCONV(a), reg);
 450 }
 451
 452
 453 string const subst(string const & a, char oldchar, char newchar)
 454 {
 455         string tmp(a);
 456         string::iterator lit = tmp.begin();
 457         string::iterator end = tmp.end();
 458         for (; lit != end; ++lit)
 459                 if ((*lit) == oldchar)
 460                         (*lit) = newchar;
 461         return tmp;
 462 }
 463
 464
 465 string const subst(string const & a,
 466                    char const * oldstr, string const & newstr)
 467 {
 468         lyx::Assert(oldstr);
 469
 470         string lstr(a);
 471         string::size_type i = 0;
 472         string::size_type olen = strlen(oldstr);
 473         while ((i = lstr.find(oldstr, i)) != string::npos) {
 474                 lstr.replace(i, olen, newstr);
 475                 i += newstr.length(); // We need to be sure that we dont
 476                 // use the same i over and over again.
 477         }
 478         return lstr;
 479 }
 480
 481
 482 string const subst(string const & a,
 483                    string const & oldstr, string const & newstr)
 484 {
 485         string lstr = a;
 486         string::size_type i = 0;
 487         string::size_type const olen = oldstr.length();
 488         while ((i = lstr.find(oldstr, i)) != string::npos) {
 489                 lstr.replace(i, olen, newstr);
 490                 i += newstr.length(); // We need to be sure that we dont
 491                 // use the same i over and over again.
 492         }
 493         return lstr;
 494 }
 495
 496
 497 string const trim(string const & a, char const * p)
 498 {
 499         lyx::Assert(p);
 500
 501         if (a.empty() || !*p)
 502                 return a;
 503
 504         string::size_type r = a.find_last_not_of(p);
 505         string::size_type l = a.find_first_not_of(p);
 506
 507         // Is this the minimal test? (lgb)
 508         if (r == string::npos && l == string::npos)
 509                 return string();
 510
 511         return a.substr(l, r - l + 1);
 512 }
 513
 514
 515 string const rtrim(string const & a, char const * p)
 516 {
 517         lyx::Assert(p);
 518
 519         if (a.empty() || !*p)
 520                 return a;
 521
 522         string::size_type r = a.find_last_not_of(p);
 523
 524         // Is this test really needed? (Lgb)
 525         if (r == string::npos)
 526                 return string();
 527
 528         return a.substr(0, r + 1);
 529 }
 530
 531
 532 string const ltrim(string const & a, char const * p)
 533 {
 534         lyx::Assert(p);
 535
 536         if (a.empty() || !*p)
 537                 return a;
 538
 539         string::size_type l = a.find_first_not_of(p);
 540
 541         if (l == string::npos)
 542                 return string();
 543
 544         return a.substr(l, string::npos);
 545 }
 546
 547
 548 string const split(string const & a, string & piece, char delim)
 549 {
 550         string tmp;
 551         string::size_type i = a.find(delim);
 552         if (i == a.length() - 1) {
 553                 piece = a.substr(0, i);
 554         } else if (i != string::npos) {
 555                 piece = a.substr(0, i);
 556                 tmp = a.substr(i + 1);
 557         } else if (i == 0) {
 558                 piece.erase();
 559                 tmp = a.substr(i + 1);
 560         } else {
 561                 piece = a;
 562         }
 563         return tmp;
 564 }
 565
 566
 567 string const split(string const & a, char delim)
 568 {
 569         string tmp;
 570         string::size_type i = a.find(delim);
 571         if (i != string::npos) // found delim
 572                 tmp = a.substr(i + 1);
 573         return tmp;
 574 }
 575
 576
 577 // ale970521
 578 string const rsplit(string const & a, string & piece, char delim)
 579 {
 580         string tmp;
 581         string::size_type i = a.rfind(delim);
 582         if (i != string::npos) { // delimiter was found
 583                 piece = a.substr(0, i);
 584                 tmp = a.substr(i + 1);
 585         } else { // delimiter was not found
 586                 piece.erase();
 587         }
 588         return tmp;
 589 }
 590
 591
 592 // This function escapes 8-bit characters and other problematic
 593 // characters that cause problems in latex labels.
 594 string const escape(string const & lab)
 595 {
 596         char hexdigit[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
 597                               '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
 598         string enc;
 599         for (string::size_type i = 0; i < lab.length(); ++i) {
 600                 unsigned char c= lab[i];
 601                 if (c >= 128 || c == '=' || c == '%') {
 602                         enc += '=';
 603                         enc += hexdigit[c>>4];
 604                         enc += hexdigit[c & 15];
 605                 } else {
 606                         enc += c;
 607                 }
 608         }
 609         return enc;
 610 }
 611
 612
 613 /// gives a vector of stringparts which have the delimiter delim
 614 vector<string> const getVectorFromString(string const & str,
 615                                          string const & delim)
 616 {
 617 // Lars would like this code to go, but for now his replacement (below)
 618 // doesn't fullfil the same function. I have, therefore, reactivated the
 619 // old code for now. Angus 11 Nov 2002.
 620 #if 1
 621         vector<string> vec;
 622         if (str.empty())
 623                 return vec;
 624         string keys(rtrim(str));
 625         for(;;) {
 626                 string::size_type const idx = keys.find(delim);
 627                 if (idx == string::npos) {
 628                         vec.push_back(ltrim(keys));
 629                         break;
 630                 }
 631                 string const key = trim(keys.substr(0, idx));
 632                 if (!key.empty())
 633                         vec.push_back(key);
 634                 string::size_type const start = idx + delim.size();
 635                 keys = keys.substr(start);
 636         }
 637         return vec;
 638 #else
 639         boost::char_separator<char> sep(delim.c_str());
 640         boost::tokenizer<boost::char_separator<char> > tokens(str, sep);
 641 #ifndef USE_INCLUDED_STRING
 642         return vector<string>(tokens.begin(), tokens.end());
 643 #else
 644         vector<string> vec;
 645         using boost::tokenizer;
 646         using boost::char_separator;
 647
 648         tokenizer<char_separator<char> >::iterator it = tokens.begin();
 649         tokenizer<char_separator<char> >::iterator end = tokens.end();
 650         for (; it != end; ++it) {
 651                 vec.push_back(STRCONV((*it)));
 652         }
 653         return vec;
 654 #endif
 655 #endif
 656 }
 657
 658
 659 // the same vice versa
 660 string const getStringFromVector(vector<string> const & vec,
 661                                  string const & delim)
 662 {
 663         string str;
 664         int i = 0;
 665         for (vector<string>::const_iterator it = vec.begin();
 666              it != vec.end(); ++it) {
 667                 string item = trim(*it);
 668                 if (item.empty()) continue;
 669
 670                 if (i++ > 0) str += delim;
 671                 str += item;
 672         }
 673         return str;
 674 }
 675
 676
 677 #if USE_BOOST_FORMAT
 678
 679 string bformat(char const * fmt, string const & arg1)
 680 {
 681         return STRCONV((boost::format(fmt) % STRCONV(arg1)).str());
 682 }
 683
 684
 685 string bformat(char const * fmt, string const & arg1, string const & arg2)
 686 {
 687         return STRCONV((boost::format(fmt) % STRCONV(arg1) % STRCONV(arg2)).str());
 688 }
 689
 690 string bformat(char const * fmt, string const & arg1, string const & arg2,
 691         string const & arg3, string const & arg4)
 692 {
 693         return STRCONV((boost::format(fmt) % STRCONV(arg1) % STRCONV(arg2)
 694                 % STRCONV(arg3) % STRCONV(arg4)).str());
 695 }
 696
 697 #else
 698
 699 string bformat(char const * fmt, string const & arg1)
 700 {
 701         lyx::Assert(contains(fmt, "%1$s"));
 702         string const str = subst(fmt, "%1$s", arg1);
 703         return subst(str, "%%", "%");
 704 }
 705
 706
 707 string bformat(char const * fmt, string const & arg1, string const & arg2)
 708 {
 709         lyx::Assert(contains(fmt, "%1$s"));
 710         lyx::Assert(contains(fmt, "%2$s"));
 711         string str = subst(fmt, "%1$s", arg1);
 712         str = subst(str, "%2$s", arg2);
 713         return subst(str, "%%", "%");
 714 }
 715
 716
 717 string bformat(char const * fmt, string const & arg1, string const & arg2,
 718         string const & arg3, string const & arg4)
 719 {
 720         lyx::Assert(contains(fmt, "%1$s"));
 721         lyx::Assert(contains(fmt, "%2$s"));
 722         lyx::Assert(contains(fmt, "%3$s"));
 723         lyx::Assert(contains(fmt, "%4$s"));
 724         string str = subst(fmt, "%1$s", arg1);
 725         str = subst(str, "%2$s", arg2);
 726         str = subst(str, "%3$s", arg3);
 727         str = subst(str, "%4$s", arg4);
 728         return subst(str, "%%", "%");
 729 }
 730
 731 #endif