src/support/lstrings.C

   1 /**
   2  * \file lstrings.C
   3  * This file is part of LyX, the document processor.
   4  * Licence details can be found in the file COPYING.
   5  *
   6  * \author Lars Gullik Bjønnes
   7  * \author Jean-Marc Lasgouttes
   8  *
   9  * Full author contact details are available in file CREDITS
  10  */
  11
  12 #include <config.h>
  13
  14 #include "LString.h"
  15 #include "lstrings.h"
  16 #include "LAssert.h"
  17 #include "Lsstream.h"
  18 #include "debug.h"
  19 #include "BoostFormat.h"
  20
  21 #include <boost/regex.hpp>
  22 #include <boost/tokenizer.hpp>
  23
  24 #include <algorithm>
  25
  26 #include <cctype>
  27 #include <cstdlib>
  28
  29 using std::count;
  30 using std::transform;
  31 using std::vector;
  32
  33 #ifndef CXX_GLOBAL_CSTD
  34 using std::atof;
  35 using std::isdigit;
  36 using std::strlen;
  37 using std::tolower;
  38 using std::toupper;
  39 #endif
  40
  41
  42 int compare_no_case(string const & s, string const & s2)
  43 {
  44         string::const_iterator p = s.begin();
  45         string::const_iterator p2 = s2.begin();
  46
  47         while (p != s.end() && p2 != s2.end()) {
  48                 int const lc1 = tolower(*p);
  49                 int const lc2 = tolower(*p2);
  50                 if (lc1 != lc2)
  51                         return (lc1 < lc2) ? -1 : 1;
  52                 ++p;
  53                 ++p2;
  54         }
  55
  56         if (s.size() == s2.size())
  57                 return 0;
  58         if (s.size() < s2.size())
  59                 return -1;
  60         return 1;
  61 }
  62
  63
  64 namespace {
  65         int ascii_tolower(int c) {
  66                 if (c >= 'A' && c <= 'Z')
  67                         return c - 'A' + 'a';
  68                 return c;
  69         }
  70 }
  71
  72
  73 int compare_ascii_no_case(string const & s, string const & s2)
  74 {
  75         string::const_iterator p = s.begin();
  76         string::const_iterator p2 = s2.begin();
  77
  78         while (p != s.end() && p2 != s2.end()) {
  79                 int const lc1 = ascii_tolower(*p);
  80                 int const lc2 = ascii_tolower(*p2);
  81                 if (lc1 != lc2)
  82                         return (lc1 < lc2) ? -1 : 1;
  83                 ++p;
  84                 ++p2;
  85         }
  86
  87         if (s.size() == s2.size())
  88                 return 0;
  89         if (s.size() < s2.size())
  90                 return -1;
  91         return 1;
  92 }
  93
  94
  95 int compare_no_case(string const & s, string const & s2, unsigned int len)
  96 {
  97         string::const_iterator p = s.begin();
  98         string::const_iterator p2 = s2.begin();
  99         unsigned int i = 0;
 100         while (i < len && p != s.end() && p2 != s2.end()) {
 101                 int const lc1 = tolower(*p);
 102                 int const lc2 = tolower(*p2);
 103                 if (lc1 != lc2)
 104                         return (lc1 < lc2) ? -1 : 1;
 105                 ++i;
 106                 ++p;
 107                 ++p2;
 108         }
 109
 110         if (s.size() >= len && s2.size() >= len)
 111                 return 0;
 112         if (s.size() < s2.size())
 113                 return -1;
 114         return 1;
 115 }
 116
 117
 118 bool isStrInt(string const & str)
 119 {
 120         if (str.empty()) return false;
 121
 122         // Remove leading and trailing white space chars.
 123         string const tmpstr = trim(str);
 124         if (tmpstr.empty()) return false;
 125
 126         string::const_iterator cit = tmpstr.begin();
 127         if ((*cit) == '-') ++cit;
 128         string::const_iterator end = tmpstr.end();
 129         for (; cit != end; ++cit) {
 130                 if (!isdigit((*cit))) return false;
 131         }
 132         return true;
 133 }
 134
 135
 136 bool isStrUnsignedInt(string const & str)
 137 {
 138         if (str.empty()) return false;
 139
 140         // Remove leading and trailing white space chars.
 141         string const tmpstr = trim(str);
 142         if (tmpstr.empty()) return false;
 143
 144         string::const_iterator cit = tmpstr.begin();
 145         string::const_iterator end = tmpstr.end();
 146         for (; cit != end; ++cit) {
 147                 if (!isdigit((*cit))) return false;
 148         }
 149         return true;
 150 }
 151
 152
 153 int strToInt(string const & str)
 154 {
 155         if (isStrInt(str)) {
 156                 // Remove leading and trailing white space chars.
 157                 string const tmpstr = trim(str);
 158                 // Do the conversion proper.
 159                 return lyx::atoi(tmpstr);
 160         } else {
 161                 return 0;
 162         }
 163 }
 164
 165
 166 unsigned int strToUnsignedInt(string const & str)
 167 {
 168         if (isStrUnsignedInt(str)) {
 169                 // Remove leading and trailing white space chars.
 170                 string const tmpstr = trim(str);
 171                 // Do the conversion proper.
 172                 return lyx::atoi(tmpstr);
 173         } else {
 174                 return 0;
 175         }
 176 }
 177
 178
 179 bool isStrDbl(string const & str)
 180 {
 181         if (str.empty()) return false;
 182
 183         // Remove leading and trailing white space chars.
 184         string const tmpstr = trim(str);
 185         if (tmpstr.empty()) return false;
 186         //      if (1 < tmpstr.count('.')) return false;
 187
 188         string::const_iterator cit = tmpstr.begin();
 189         bool found_dot(false);
 190         if ((*cit) == '-') ++cit;
 191         string::const_iterator end = tmpstr.end();
 192         for (; cit != end; ++cit) {
 193                 if (!isdigit((*cit))
 194                     && '.' != (*cit)) {
 195                         return false;
 196                 }
 197                 if ('.' == (*cit)) {
 198                         if (found_dot) {
 199                                 return false;
 200                         } else {
 201                                 found_dot = true;
 202                         }
 203                 }
 204         }
 205         return true;
 206 }
 207
 208
 209 double strToDbl(string const & str)
 210 {
 211         if (isStrDbl(str)) {
 212                 // Remove leading and trailing white space chars.
 213                 string const tmpstr = trim(str);
 214                 // Do the conversion proper.
 215                 return ::atof(tmpstr.c_str());
 216         } else {
 217                 return 0.0;
 218         }
 219 }
 220
 221
 222 char lowercase(char c)
 223 {
 224         return char(tolower(c));
 225 }
 226
 227
 228 char uppercase(char c)
 229 {
 230         return char(toupper(c));
 231 }
 232
 233
 234 namespace {
 235
 236 // since we cannot use std::tolower and std::toupper directly in the
 237 // calls to std::transform yet, we use these helper clases. (Lgb)
 238
 239 struct local_lowercase {
 240         char operator()(char c) const {
 241                 return tolower(c);
 242         }
 243 };
 244
 245 struct local_uppercase {
 246         char operator()(char c) const {
 247                 return toupper(c);
 248         }
 249 };
 250
 251 struct local_ascii_lowercase {
 252         char operator()(char c) const {
 253                 return ascii_tolower(c);
 254         }
 255 };
 256
 257 } // end of anon namespace
 258
 259 string const lowercase(string const & a)
 260 {
 261         string tmp(a);
 262         transform(tmp.begin(), tmp.end(), tmp.begin(), local_lowercase());
 263         return tmp;
 264 }
 265
 266 string const uppercase(string const & a)
 267 {
 268         string tmp(a);
 269         transform(tmp.begin(), tmp.end(), tmp.begin(), local_uppercase());
 270         return tmp;
 271 }
 272
 273
 274 string const ascii_lowercase(string const & a)
 275 {
 276         string tmp(a);
 277         transform(tmp.begin(), tmp.end(), tmp.begin(),
 278                   local_ascii_lowercase());
 279         return tmp;
 280 }
 281
 282
 283 bool prefixIs(string const & a, string const & pre)
 284 {
 285         string::size_type const prelen = pre.length();
 286         string::size_type const alen = a.length();
 287
 288         if (prelen > alen || a.empty())
 289                 return false;
 290         else {
 291 #if !defined(USE_INCLUDED_STRING) && !defined(STD_STRING_IS_GOOD)
 292                 return ::strncmp(a.c_str(), pre.c_str(), prelen) == 0;
 293 #else
 294                 return a.compare(0, prelen, pre) == 0;
 295 #endif
 296         }
 297 }
 298
 299
 300 bool suffixIs(string const & a, char c)
 301 {
 302         if (a.empty()) return false;
 303         return a[a.length() - 1] == c;
 304 }
 305
 306
 307 bool suffixIs(string const & a, string const & suf)
 308 {
 309         string::size_type const suflen = suf.length();
 310         string::size_type const alen = a.length();
 311
 312         if (suflen > alen) {
 313                 return false;
 314         } else {
 315 #if !defined(USE_INCLUDED_STRING) && !defined(STD_STRING_IS_GOOD)
 316                 string tmp(a, alen - suflen);
 317                 return ::strncmp(tmp.c_str(), suf.c_str(), suflen) == 0;
 318 #else
 319                 return a.compare(alen - suflen, suflen, suf) == 0;
 320 #endif
 321         }
 322 }
 323
 324
 325 bool contains(string const & a, string const & b)
 326 {
 327         if (a.empty())
 328                 return false;
 329         return a.find(b) != string::npos;
 330 }
 331
 332
 333 bool contains(string const & a, char b)
 334 {
 335         if (a.empty())
 336                 return false;
 337         return a.find(b) != string::npos;
 338 }
 339
 340
 341 bool containsOnly(string const & s, string const & cset)
 342 {
 343         return s.find_first_not_of(cset) == string::npos;
 344 }
 345
 346
 347 // ale970405+lasgoutt-970425
 348 // rewritten to use new string (Lgb)
 349 string const token(string const & a, char delim, int n)
 350 {
 351         if (a.empty()) return string();
 352
 353         string::size_type k = 0;
 354         string::size_type i = 0;
 355
 356         // Find delimiter or end of string
 357         for (; n--;)
 358                 if ((i = a.find(delim, i)) == string::npos)
 359                         break;
 360                 else
 361                         ++i; // step delim
 362         // i is now the n'th delim (or string::npos)
 363         if (i == string::npos) return string();
 364         k = a.find(delim, i);
 365         // k is now the n'th + 1 delim (or string::npos)
 366
 367         return a.substr(i, k - i);
 368 }
 369
 370
 371 // this could probably be faster and/or cleaner, but it seems to work (JMarc)
 372 // rewritten to use new string (Lgb)
 373 int tokenPos(string const & a, char delim, string const & tok)
 374 {
 375         int i = 0;
 376         string str(a);
 377         string tmptok;
 378
 379         while (!str.empty()) {
 380                 str = split(str, tmptok, delim);
 381                 if (tok == tmptok)
 382                         return i;
 383                 ++i;
 384         }
 385         return -1;
 386 }
 387
 388
 389 bool regexMatch(string const & a, string const & pattern)
 390 {
 391         // We massage the pattern a bit so that the usual
 392         // shell pattern we all are used to will work.
 393         // One nice thing about using a real regex is that
 394         // things like "*.*[^~]" will work also.
 395         // build the regex string.
 396         string regex(pattern);
 397         regex = subst(regex, ".", "\\.");
 398         regex = subst(regex, "*", ".*");
 399         boost::regex reg(STRCONV(regex));
 400         return boost::regex_match(STRCONV(a), reg);
 401 }
 402
 403
 404 string const subst(string const & a, char oldchar, char newchar)
 405 {
 406         string tmp(a);
 407         string::iterator lit = tmp.begin();
 408         string::iterator end = tmp.end();
 409         for (; lit != end; ++lit)
 410                 if ((*lit) == oldchar)
 411                         (*lit) = newchar;
 412         return tmp;
 413 }
 414
 415
 416 string const subst(string const & a,
 417                    string const & oldstr, string const & newstr)
 418 {
 419         string lstr = a;
 420         string::size_type i = 0;
 421         string::size_type const olen = oldstr.length();
 422         while ((i = lstr.find(oldstr, i)) != string::npos) {
 423                 lstr.replace(i, olen, newstr);
 424                 i += newstr.length(); // We need to be sure that we dont
 425                 // use the same i over and over again.
 426         }
 427         return lstr;
 428 }
 429
 430
 431 string const trim(string const & a, char const * p)
 432 {
 433         lyx::Assert(p);
 434
 435         if (a.empty() || !*p)
 436                 return a;
 437
 438         string::size_type r = a.find_last_not_of(p);
 439         string::size_type l = a.find_first_not_of(p);
 440
 441         // Is this the minimal test? (lgb)
 442         if (r == string::npos && l == string::npos)
 443                 return string();
 444
 445         return a.substr(l, r - l + 1);
 446 }
 447
 448
 449 string const rtrim(string const & a, char const * p)
 450 {
 451         lyx::Assert(p);
 452
 453         if (a.empty() || !*p)
 454                 return a;
 455
 456         string::size_type r = a.find_last_not_of(p);
 457
 458         // Is this test really needed? (Lgb)
 459         if (r == string::npos)
 460                 return string();
 461
 462         return a.substr(0, r + 1);
 463 }
 464
 465
 466 string const ltrim(string const & a, char const * p)
 467 {
 468         lyx::Assert(p);
 469
 470         if (a.empty() || !*p)
 471                 return a;
 472
 473         string::size_type l = a.find_first_not_of(p);
 474
 475         if (l == string::npos)
 476                 return string();
 477
 478         return a.substr(l, string::npos);
 479 }
 480
 481
 482 string const split(string const & a, string & piece, char delim)
 483 {
 484         string tmp;
 485         string::size_type i = a.find(delim);
 486         if (i == a.length() - 1) {
 487                 piece = a.substr(0, i);
 488         } else if (i != string::npos) {
 489                 piece = a.substr(0, i);
 490                 tmp = a.substr(i + 1);
 491         } else if (i == 0) {
 492                 piece.erase();
 493                 tmp = a.substr(i + 1);
 494         } else {
 495                 piece = a;
 496         }
 497         return tmp;
 498 }
 499
 500
 501 string const split(string const & a, char delim)
 502 {
 503         string tmp;
 504         string::size_type i = a.find(delim);
 505         if (i != string::npos) // found delim
 506                 tmp = a.substr(i + 1);
 507         return tmp;
 508 }
 509
 510
 511 // ale970521
 512 string const rsplit(string const & a, string & piece, char delim)
 513 {
 514         string tmp;
 515         string::size_type i = a.rfind(delim);
 516         if (i != string::npos) { // delimiter was found
 517                 piece = a.substr(0, i);
 518                 tmp = a.substr(i + 1);
 519         } else { // delimiter was not found
 520                 piece.erase();
 521         }
 522         return tmp;
 523 }
 524
 525
 526 // This function escapes 8-bit characters and other problematic
 527 // characters that cause problems in latex labels.
 528 string const escape(string const & lab)
 529 {
 530         char hexdigit[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
 531                               '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
 532         string enc;
 533         for (string::size_type i = 0; i < lab.length(); ++i) {
 534                 unsigned char c= lab[i];
 535                 if (c >= 128 || c == '=' || c == '%') {
 536                         enc += '=';
 537                         enc += hexdigit[c>>4];
 538                         enc += hexdigit[c & 15];
 539                 } else {
 540                         enc += c;
 541                 }
 542         }
 543         return enc;
 544 }
 545
 546
 547 /// gives a vector of stringparts which have the delimiter delim
 548 vector<string> const getVectorFromString(string const & str,
 549                                          string const & delim)
 550 {
 551 // Lars would like this code to go, but for now his replacement (below)
 552 // doesn't fullfil the same function. I have, therefore, reactivated the
 553 // old code for now. Angus 11 Nov 2002.
 554 #if 1
 555         vector<string> vec;
 556         if (str.empty())
 557                 return vec;
 558         string keys(rtrim(str));
 559         for(;;) {
 560                 string::size_type const idx = keys.find(delim);
 561                 if (idx == string::npos) {
 562                         vec.push_back(ltrim(keys));
 563                         break;
 564                 }
 565                 string const key = trim(keys.substr(0, idx));
 566                 if (!key.empty())
 567                         vec.push_back(key);
 568                 string::size_type const start = idx + delim.size();
 569                 keys = keys.substr(start);
 570         }
 571         return vec;
 572 #else
 573         boost::char_separator<char> sep(delim.c_str());
 574         boost::tokenizer<boost::char_separator<char> > tokens(str, sep);
 575 #ifndef USE_INCLUDED_STRING
 576         return vector<string>(tokens.begin(), tokens.end());
 577 #else
 578         vector<string> vec;
 579         using boost::tokenizer;
 580         using boost::char_separator;
 581
 582         tokenizer<char_separator<char> >::iterator it = tokens.begin();
 583         tokenizer<char_separator<char> >::iterator end = tokens.end();
 584         for (; it != end; ++it) {
 585                 vec.push_back(STRCONV((*it)));
 586         }
 587         return vec;
 588 #endif
 589 #endif
 590 }
 591
 592
 593 // the same vice versa
 594 string const getStringFromVector(vector<string> const & vec,
 595                                  string const & delim)
 596 {
 597         string str;
 598         int i = 0;
 599         for (vector<string>::const_iterator it = vec.begin();
 600              it != vec.end(); ++it) {
 601                 string item = trim(*it);
 602                 if (item.empty())
 603                         continue;
 604                 if (i++ > 0)
 605                         str += delim;
 606                 str += item;
 607         }
 608         return str;
 609 }
 610
 611
 612 #if USE_BOOST_FORMAT
 613
 614 string bformat(string const & fmt, string const & arg1)
 615 {
 616         return STRCONV((boost::format(fmt) % STRCONV(arg1)).str());
 617 }
 618
 619
 620 string bformat(string const & fmt, string const & arg1, string const & arg2)
 621 {
 622         return STRCONV((boost::format(fmt) % STRCONV(arg1) % STRCONV(arg2)).str());
 623 }
 624
 625
 626 string bformat(string const & fmt, string const & arg1, string const & arg2,
 627         string const & arg3)
 628 {
 629         return STRCONV((boost::format(fmt) % STRCONV(arg1) % STRCONV(arg2)
 630                 % STRCONV(arg3)).str());
 631 }
 632
 633
 634 string bformat(string const & fmt, string const & arg1, string const & arg2,
 635         string const & arg3, string const & arg4)
 636 {
 637         return STRCONV((boost::format(fmt) % STRCONV(arg1) % STRCONV(arg2)
 638                 % STRCONV(arg3) % STRCONV(arg4)).str());
 639 }
 640
 641 #else
 642
 643 string bformat(string const & fmt, string const & arg1)
 644 {
 645         lyx::Assert(contains(fmt, "%1$s"));
 646         string const str = subst(fmt, "%1$s", arg1);
 647         return subst(str, "%%", "%");
 648 }
 649
 650
 651 string bformat(string const & fmt, string const & arg1, string const & arg2)
 652 {
 653         lyx::Assert(contains(fmt, "%1$s"));
 654         lyx::Assert(contains(fmt, "%2$s"));
 655         string str = subst(fmt, "%1$s", arg1);
 656         str = subst(str, "%2$s", arg2);
 657         return subst(str, "%%", "%");
 658 }
 659
 660
 661 string bformat(string const & fmt, string const & arg1, string const & arg2,
 662         string const & arg3)
 663 {
 664         lyx::Assert(contains(fmt, "%1$s"));
 665         lyx::Assert(contains(fmt, "%2$s"));
 666         lyx::Assert(contains(fmt, "%3$s"));
 667         string str = subst(fmt, "%1$s", arg1);
 668         str = subst(str, "%2$s", arg2);
 669         str = subst(str, "%3$s", arg3);
 670         return subst(str, "%%", "%");
 671 }
 672
 673
 674 string bformat(string const & fmt, string const & arg1, string const & arg2,
 675         string const & arg3, string const & arg4)
 676 {
 677         lyx::Assert(contains(fmt, "%1$s"));
 678         lyx::Assert(contains(fmt, "%2$s"));
 679         lyx::Assert(contains(fmt, "%3$s"));
 680         lyx::Assert(contains(fmt, "%4$s"));
 681         string str = subst(fmt, "%1$s", arg1);
 682         str = subst(str, "%2$s", arg2);
 683         str = subst(str, "%3$s", arg3);
 684         str = subst(str, "%4$s", arg4);
 685         return subst(str, "%%", "%");
 686 }
 687
 688 #endif