src/support/lstrings.C

   1 /**
   2  * \file lstrings.C
   3  * This file is part of LyX, the document processor.
   4  * Licence details can be found in the file COPYING.
   5  *
   6  * \author Lars Gullik Bjønnes
   7  * \author Jean-Marc Lasgouttes
   8  *
   9  * Full author contact details are available in file CREDITS.
  10  */
  11
  12 #include <config.h>
  13
  14 #include "support/std_string.h"
  15 #include "lstrings.h"
  16 #include "LAssert.h"
  17 #include "support/std_sstream.h"
  18 #include "debug.h"
  19 #include "BoostFormat.h"
  20 #include "lyxlib.h"
  21 #include "tostr.h"
  22
  23 #include <boost/regex.hpp>
  24 #include <boost/tokenizer.hpp>
  25
  26 #include <algorithm>
  27
  28 #include <cctype>
  29 #include <cstdlib>
  30
  31 using std::count;
  32 using std::transform;
  33 using std::vector;
  34
  35 #ifndef CXX_GLOBAL_CSTD
  36 using std::atof;
  37 using std::isdigit;
  38 using std::strlen;
  39 using std::tolower;
  40 using std::toupper;
  41 #endif
  42
  43 namespace lyx {
  44 namespace support {
  45
  46 int compare_no_case(string const & s, string const & s2)
  47 {
  48         string::const_iterator p = s.begin();
  49         string::const_iterator p2 = s2.begin();
  50
  51         while (p != s.end() && p2 != s2.end()) {
  52                 int const lc1 = tolower(*p);
  53                 int const lc2 = tolower(*p2);
  54                 if (lc1 != lc2)
  55                         return (lc1 < lc2) ? -1 : 1;
  56                 ++p;
  57                 ++p2;
  58         }
  59
  60         if (s.size() == s2.size())
  61                 return 0;
  62         if (s.size() < s2.size())
  63                 return -1;
  64         return 1;
  65 }
  66
  67
  68 namespace {
  69         int ascii_tolower(int c) {
  70                 if (c >= 'A' && c <= 'Z')
  71                         return c - 'A' + 'a';
  72                 return c;
  73         }
  74 }
  75
  76
  77 int compare_ascii_no_case(string const & s, string const & s2)
  78 {
  79         string::const_iterator p = s.begin();
  80         string::const_iterator p2 = s2.begin();
  81
  82         while (p != s.end() && p2 != s2.end()) {
  83                 int const lc1 = ascii_tolower(*p);
  84                 int const lc2 = ascii_tolower(*p2);
  85                 if (lc1 != lc2)
  86                         return (lc1 < lc2) ? -1 : 1;
  87                 ++p;
  88                 ++p2;
  89         }
  90
  91         if (s.size() == s2.size())
  92                 return 0;
  93         if (s.size() < s2.size())
  94                 return -1;
  95         return 1;
  96 }
  97
  98
  99 int compare_no_case(string const & s, string const & s2, unsigned int len)
 100 {
 101         string::const_iterator p = s.begin();
 102         string::const_iterator p2 = s2.begin();
 103         unsigned int i = 0;
 104         while (i < len && p != s.end() && p2 != s2.end()) {
 105                 int const lc1 = tolower(*p);
 106                 int const lc2 = tolower(*p2);
 107                 if (lc1 != lc2)
 108                         return (lc1 < lc2) ? -1 : 1;
 109                 ++i;
 110                 ++p;
 111                 ++p2;
 112         }
 113
 114         if (s.size() >= len && s2.size() >= len)
 115                 return 0;
 116         if (s.size() < s2.size())
 117                 return -1;
 118         return 1;
 119 }
 120
 121
 122 bool isStrInt(string const & str)
 123 {
 124         if (str.empty()) return false;
 125
 126         // Remove leading and trailing white space chars.
 127         string const tmpstr = trim(str);
 128         if (tmpstr.empty()) return false;
 129
 130         string::const_iterator cit = tmpstr.begin();
 131         if ((*cit) == '-') ++cit;
 132         string::const_iterator end = tmpstr.end();
 133         for (; cit != end; ++cit) {
 134                 if (!isdigit((*cit))) return false;
 135         }
 136         return true;
 137 }
 138
 139
 140 bool isStrUnsignedInt(string const & str)
 141 {
 142         if (str.empty()) return false;
 143
 144         // Remove leading and trailing white space chars.
 145         string const tmpstr = trim(str);
 146         if (tmpstr.empty()) return false;
 147
 148         string::const_iterator cit = tmpstr.begin();
 149         string::const_iterator end = tmpstr.end();
 150         for (; cit != end; ++cit) {
 151                 if (!isdigit((*cit))) return false;
 152         }
 153         return true;
 154 }
 155
 156
 157 int strToInt(string const & str)
 158 {
 159         if (isStrInt(str)) {
 160                 // Remove leading and trailing white space chars.
 161                 string const tmpstr = trim(str);
 162                 // Do the conversion proper.
 163                 return atoi(tmpstr);
 164         } else {
 165                 return 0;
 166         }
 167 }
 168
 169
 170 unsigned int strToUnsignedInt(string const & str)
 171 {
 172         if (isStrUnsignedInt(str)) {
 173                 // Remove leading and trailing white space chars.
 174                 string const tmpstr = trim(str);
 175                 // Do the conversion proper.
 176                 return atoi(tmpstr);
 177         } else {
 178                 return 0;
 179         }
 180 }
 181
 182
 183 bool isStrDbl(string const & str)
 184 {
 185         if (str.empty()) return false;
 186
 187         // Remove leading and trailing white space chars.
 188         string const tmpstr = trim(str);
 189         if (tmpstr.empty()) return false;
 190         //      if (1 < tmpstr.count('.')) return false;
 191
 192         string::const_iterator cit = tmpstr.begin();
 193         bool found_dot(false);
 194         if ((*cit) == '-') ++cit;
 195         string::const_iterator end = tmpstr.end();
 196         for (; cit != end; ++cit) {
 197                 if (!isdigit((*cit))
 198                     && '.' != (*cit)) {
 199                         return false;
 200                 }
 201                 if ('.' == (*cit)) {
 202                         if (found_dot) {
 203                                 return false;
 204                         } else {
 205                                 found_dot = true;
 206                         }
 207                 }
 208         }
 209         return true;
 210 }
 211
 212
 213 double strToDbl(string const & str)
 214 {
 215         if (isStrDbl(str)) {
 216                 // Remove leading and trailing white space chars.
 217                 string const tmpstr = trim(str);
 218                 // Do the conversion proper.
 219                 return ::atof(tmpstr.c_str());
 220         } else {
 221                 return 0.0;
 222         }
 223 }
 224
 225
 226 char lowercase(char c)
 227 {
 228         return char(tolower(c));
 229 }
 230
 231
 232 char uppercase(char c)
 233 {
 234         return char(toupper(c));
 235 }
 236
 237
 238 namespace {
 239
 240 // since we cannot use std::tolower and std::toupper directly in the
 241 // calls to std::transform yet, we use these helper clases. (Lgb)
 242
 243 struct local_lowercase {
 244         char operator()(char c) const {
 245                 return tolower(c);
 246         }
 247 };
 248
 249 struct local_uppercase {
 250         char operator()(char c) const {
 251                 return toupper(c);
 252         }
 253 };
 254
 255 struct local_ascii_lowercase {
 256         char operator()(char c) const {
 257                 return ascii_tolower(c);
 258         }
 259 };
 260
 261 } // end of anon namespace
 262
 263 string const lowercase(string const & a)
 264 {
 265         string tmp(a);
 266         transform(tmp.begin(), tmp.end(), tmp.begin(), local_lowercase());
 267         return tmp;
 268 }
 269
 270 string const uppercase(string const & a)
 271 {
 272         string tmp(a);
 273         transform(tmp.begin(), tmp.end(), tmp.begin(), local_uppercase());
 274         return tmp;
 275 }
 276
 277
 278 string const ascii_lowercase(string const & a)
 279 {
 280         string tmp(a);
 281         transform(tmp.begin(), tmp.end(), tmp.begin(),
 282                   local_ascii_lowercase());
 283         return tmp;
 284 }
 285
 286
 287 bool prefixIs(string const & a, string const & pre)
 288 {
 289         string::size_type const prelen = pre.length();
 290         string::size_type const alen = a.length();
 291
 292         if (prelen > alen || a.empty())
 293                 return false;
 294         else {
 295 #if !defined(USE_INCLUDED_STRING) && !defined(STD_STRING_IS_GOOD)
 296                 return ::strncmp(a.c_str(), pre.c_str(), prelen) == 0;
 297 #else
 298                 return a.compare(0, prelen, pre) == 0;
 299 #endif
 300         }
 301 }
 302
 303
 304 bool suffixIs(string const & a, char c)
 305 {
 306         if (a.empty()) return false;
 307         return a[a.length() - 1] == c;
 308 }
 309
 310
 311 bool suffixIs(string const & a, string const & suf)
 312 {
 313         string::size_type const suflen = suf.length();
 314         string::size_type const alen = a.length();
 315
 316         if (suflen > alen) {
 317                 return false;
 318         } else {
 319 #if !defined(USE_INCLUDED_STRING) && !defined(STD_STRING_IS_GOOD)
 320                 string tmp(a, alen - suflen);
 321                 return ::strncmp(tmp.c_str(), suf.c_str(), suflen) == 0;
 322 #else
 323                 return a.compare(alen - suflen, suflen, suf) == 0;
 324 #endif
 325         }
 326 }
 327
 328
 329 bool contains(string const & a, string const & b)
 330 {
 331         if (a.empty())
 332                 return false;
 333         return a.find(b) != string::npos;
 334 }
 335
 336
 337 bool contains(string const & a, char b)
 338 {
 339         if (a.empty())
 340                 return false;
 341         return a.find(b) != string::npos;
 342 }
 343
 344
 345 bool containsOnly(string const & s, string const & cset)
 346 {
 347         return s.find_first_not_of(cset) == string::npos;
 348 }
 349
 350
 351 // ale970405+lasgoutt-970425
 352 // rewritten to use new string (Lgb)
 353 string const token(string const & a, char delim, int n)
 354 {
 355         if (a.empty()) return string();
 356
 357         string::size_type k = 0;
 358         string::size_type i = 0;
 359
 360         // Find delimiter or end of string
 361         for (; n--;)
 362                 if ((i = a.find(delim, i)) == string::npos)
 363                         break;
 364                 else
 365                         ++i; // step delim
 366         // i is now the n'th delim (or string::npos)
 367         if (i == string::npos) return string();
 368         k = a.find(delim, i);
 369         // k is now the n'th + 1 delim (or string::npos)
 370
 371         return a.substr(i, k - i);
 372 }
 373
 374
 375 // this could probably be faster and/or cleaner, but it seems to work (JMarc)
 376 // rewritten to use new string (Lgb)
 377 int tokenPos(string const & a, char delim, string const & tok)
 378 {
 379         int i = 0;
 380         string str(a);
 381         string tmptok;
 382
 383         while (!str.empty()) {
 384                 str = split(str, tmptok, delim);
 385                 if (tok == tmptok)
 386                         return i;
 387                 ++i;
 388         }
 389         return -1;
 390 }
 391
 392
 393 bool regexMatch(string const & a, string const & pattern)
 394 {
 395         // We massage the pattern a bit so that the usual
 396         // shell pattern we all are used to will work.
 397         // One nice thing about using a real regex is that
 398         // things like "*.*[^~]" will work also.
 399         // build the regex string.
 400         string regex(pattern);
 401         regex = subst(regex, ".", "\\.");
 402         regex = subst(regex, "*", ".*");
 403         boost::regex reg(STRCONV(regex));
 404         return boost::regex_match(STRCONV(a), reg);
 405 }
 406
 407
 408 string const subst(string const & a, char oldchar, char newchar)
 409 {
 410         string tmp(a);
 411         string::iterator lit = tmp.begin();
 412         string::iterator end = tmp.end();
 413         for (; lit != end; ++lit)
 414                 if ((*lit) == oldchar)
 415                         (*lit) = newchar;
 416         return tmp;
 417 }
 418
 419
 420 string const subst(string const & a,
 421                    string const & oldstr, string const & newstr)
 422 {
 423         string lstr = a;
 424         string::size_type i = 0;
 425         string::size_type const olen = oldstr.length();
 426         while ((i = lstr.find(oldstr, i)) != string::npos) {
 427                 lstr.replace(i, olen, newstr);
 428                 i += newstr.length(); // We need to be sure that we dont
 429                 // use the same i over and over again.
 430         }
 431         return lstr;
 432 }
 433
 434
 435 string const trim(string const & a, char const * p)
 436 {
 437         Assert(p);
 438
 439         if (a.empty() || !*p)
 440                 return a;
 441
 442         string::size_type r = a.find_last_not_of(p);
 443         string::size_type l = a.find_first_not_of(p);
 444
 445         // Is this the minimal test? (lgb)
 446         if (r == string::npos && l == string::npos)
 447                 return string();
 448
 449         return a.substr(l, r - l + 1);
 450 }
 451
 452
 453 string const rtrim(string const & a, char const * p)
 454 {
 455         Assert(p);
 456
 457         if (a.empty() || !*p)
 458                 return a;
 459
 460         string::size_type r = a.find_last_not_of(p);
 461
 462         // Is this test really needed? (Lgb)
 463         if (r == string::npos)
 464                 return string();
 465
 466         return a.substr(0, r + 1);
 467 }
 468
 469
 470 string const ltrim(string const & a, char const * p)
 471 {
 472         Assert(p);
 473
 474         if (a.empty() || !*p)
 475                 return a;
 476
 477         string::size_type l = a.find_first_not_of(p);
 478
 479         if (l == string::npos)
 480                 return string();
 481
 482         return a.substr(l, string::npos);
 483 }
 484
 485
 486 string const split(string const & a, string & piece, char delim)
 487 {
 488         string tmp;
 489         string::size_type i = a.find(delim);
 490         if (i == a.length() - 1) {
 491                 piece = a.substr(0, i);
 492         } else if (i != string::npos) {
 493                 piece = a.substr(0, i);
 494                 tmp = a.substr(i + 1);
 495         } else if (i == 0) {
 496                 piece.erase();
 497                 tmp = a.substr(i + 1);
 498         } else {
 499                 piece = a;
 500         }
 501         return tmp;
 502 }
 503
 504
 505 string const split(string const & a, char delim)
 506 {
 507         string tmp;
 508         string::size_type i = a.find(delim);
 509         if (i != string::npos) // found delim
 510                 tmp = a.substr(i + 1);
 511         return tmp;
 512 }
 513
 514
 515 // ale970521
 516 string const rsplit(string const & a, string & piece, char delim)
 517 {
 518         string tmp;
 519         string::size_type i = a.rfind(delim);
 520         if (i != string::npos) { // delimiter was found
 521                 piece = a.substr(0, i);
 522                 tmp = a.substr(i + 1);
 523         } else { // delimiter was not found
 524                 piece.erase();
 525         }
 526         return tmp;
 527 }
 528
 529
 530 // This function escapes 8-bit characters and other problematic
 531 // characters that cause problems in latex labels.
 532 string const escape(string const & lab)
 533 {
 534         char hexdigit[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
 535                               '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
 536         string enc;
 537         for (string::size_type i = 0; i < lab.length(); ++i) {
 538                 unsigned char c= lab[i];
 539                 if (c >= 128 || c == '=' || c == '%') {
 540                         enc += '=';
 541                         enc += hexdigit[c>>4];
 542                         enc += hexdigit[c & 15];
 543                 } else {
 544                         enc += c;
 545                 }
 546         }
 547         return enc;
 548 }
 549
 550
 551 /// gives a vector of stringparts which have the delimiter delim
 552 vector<string> const getVectorFromString(string const & str,
 553                                          string const & delim)
 554 {
 555 // Lars would like this code to go, but for now his replacement (below)
 556 // doesn't fullfil the same function. I have, therefore, reactivated the
 557 // old code for now. Angus 11 Nov 2002.
 558 #if 1
 559         vector<string> vec;
 560         if (str.empty())
 561                 return vec;
 562         string keys(rtrim(str));
 563         for(;;) {
 564                 string::size_type const idx = keys.find(delim);
 565                 if (idx == string::npos) {
 566                         vec.push_back(ltrim(keys));
 567                         break;
 568                 }
 569                 string const key = trim(keys.substr(0, idx));
 570                 if (!key.empty())
 571                         vec.push_back(key);
 572                 string::size_type const start = idx + delim.size();
 573                 keys = keys.substr(start);
 574         }
 575         return vec;
 576 #else
 577         boost::char_separator<char> sep(delim.c_str());
 578         boost::tokenizer<boost::char_separator<char> > tokens(str, sep);
 579 #ifndef USE_INCLUDED_STRING
 580         return vector<string>(tokens.begin(), tokens.end());
 581 #else
 582         vector<string> vec;
 583         using boost::tokenizer;
 584         using boost::char_separator;
 585
 586         tokenizer<char_separator<char> >::iterator it = tokens.begin();
 587         tokenizer<char_separator<char> >::iterator end = tokens.end();
 588         for (; it != end; ++it) {
 589                 vec.push_back(STRCONV((*it)));
 590         }
 591         return vec;
 592 #endif
 593 #endif
 594 }
 595
 596
 597 // the same vice versa
 598 string const getStringFromVector(vector<string> const & vec,
 599                                  string const & delim)
 600 {
 601         string str;
 602         int i = 0;
 603         for (vector<string>::const_iterator it = vec.begin();
 604              it != vec.end(); ++it) {
 605                 string item = trim(*it);
 606                 if (item.empty())
 607                         continue;
 608                 if (i++ > 0)
 609                         str += delim;
 610                 str += item;
 611         }
 612         return str;
 613 }
 614
 615
 616 #if USE_BOOST_FORMAT
 617
 618 string bformat(string const & fmt, string const & arg1)
 619 {
 620         return STRCONV((boost::format(STRCONV(fmt)) % STRCONV(arg1)).str());
 621 }
 622
 623
 624 string bformat(string const & fmt, string const & arg1, string const & arg2)
 625 {
 626         return STRCONV((boost::format(STRCONV(fmt)) % STRCONV(arg1)
 627                 % STRCONV(arg2)).str());
 628 }
 629
 630
 631 string bformat(string const & fmt, int arg1, int arg2)
 632 {
 633         return STRCONV((boost::format(STRCONV(fmt)) % arg1 % arg2).str());
 634 }
 635
 636
 637 string bformat(string const & fmt, string const & arg1, string const & arg2,
 638         string const & arg3)
 639 {
 640         return STRCONV((boost::format(STRCONV(fmt)) % STRCONV(arg1)
 641                 % STRCONV(arg2) % STRCONV(arg3)).str());
 642 }
 643
 644
 645 string bformat(string const & fmt, string const & arg1, string const & arg2,
 646         string const & arg3, string const & arg4)
 647 {
 648         return STRCONV((boost::format(STRCONV(fmt)) % STRCONV(arg1)
 649                 % STRCONV(arg2) % STRCONV(arg3) % STRCONV(arg4)).str());
 650 }
 651
 652
 653 string bformat(string const & fmt, string const & arg1, string const & arg2,
 654         string const & arg3, string const & arg4, string const & arg5)
 655 {
 656         return STRCONV((boost::format(STRCONV(fmt)) % STRCONV(arg1)
 657                 % STRCONV(arg2) % STRCONV(arg3) % STRCONV(arg4)
 658                 % STRCONV(arg5)).str());
 659 }
 660
 661 #else
 662
 663 string bformat(string const & fmt, string const & arg1)
 664 {
 665         Assert(contains(fmt, "%1$s"));
 666         string const str = subst(fmt, "%1$s", arg1);
 667         return subst(str, "%%", "%");
 668 }
 669
 670
 671 string bformat(string const & fmt, string const & arg1, string const & arg2)
 672 {
 673         Assert(contains(fmt, "%1$s"));
 674         Assert(contains(fmt, "%2$s"));
 675         string str = subst(fmt, "%1$s", arg1);
 676         str = subst(str, "%2$s", arg2);
 677         return subst(str, "%%", "%");
 678 }
 679
 680
 681 string bformat(string const & fmt, int arg1, int arg2)
 682 {
 683         Assert(contains(fmt, "%1$d"));
 684         Assert(contains(fmt, "%2$d"));
 685         string str = subst(fmt, "%1$d", tostr(arg1));
 686         str = subst(str, "%2$d", tostr(arg2));
 687         return subst(str, "%%", "%");
 688 }
 689
 690
 691 string bformat(string const & fmt, string const & arg1, string const & arg2,
 692         string const & arg3)
 693 {
 694         Assert(contains(fmt, "%1$s"));
 695         Assert(contains(fmt, "%2$s"));
 696         Assert(contains(fmt, "%3$s"));
 697         string str = subst(fmt, "%1$s", arg1);
 698         str = subst(str, "%2$s", arg2);
 699         str = subst(str, "%3$s", arg3);
 700         return subst(str, "%%", "%");
 701 }
 702
 703
 704 string bformat(string const & fmt, string const & arg1, string const & arg2,
 705         string const & arg3, string const & arg4)
 706 {
 707         Assert(contains(fmt, "%1$s"));
 708         Assert(contains(fmt, "%2$s"));
 709         Assert(contains(fmt, "%3$s"));
 710         Assert(contains(fmt, "%4$s"));
 711         string str = subst(fmt, "%1$s", arg1);
 712         str = subst(str, "%2$s", arg2);
 713         str = subst(str, "%3$s", arg3);
 714         str = subst(str, "%4$s", arg4);
 715         return subst(str, "%%", "%");
 716 }
 717
 718
 719 string bformat(string const & fmt, string const & arg1, string const & arg2,
 720         string const & arg3, string const & arg4, string const & arg5)
 721 {
 722         Assert(contains(fmt, "%1$s"));
 723         Assert(contains(fmt, "%2$s"));
 724         Assert(contains(fmt, "%3$s"));
 725         Assert(contains(fmt, "%4$s"));
 726         Assert(contains(fmt, "%5$s"));
 727         string str = subst(fmt, "%1$s", arg1);
 728         str = subst(str, "%2$s", arg2);
 729         str = subst(str, "%3$s", arg3);
 730         str = subst(str, "%4$s", arg4);
 731         str = subst(str, "%5$s", arg5);
 732         return subst(str, "%%", "%");
 733 }
 734
 735 #endif
 736
 737 } // namespace support
 738 } // namespace lyx