src/support/lstrings.C

   1 /**
   2  * \file lstrings.C
   3  * This file is part of LyX, the document processor.
   4  * Licence details can be found in the file COPYING.
   5  *
   6  * \author Lars Gullik Bjønnes
   7  * \author Jean-Marc Lasgouttes
   8  *
   9  * Full author contact details are available in file CREDITS.
  10  */
  11
  12 #include <config.h>
  13
  14 #include "support/lstrings.h"
  15 #include "support/lyxlib.h"
  16 #include "support/convert.h"
  17
  18 #include "debug.h"
  19
  20 #include <boost/tokenizer.hpp>
  21 #include <boost/assert.hpp>
  22
  23 #ifndef I_AM_NOT_AFRAID_OF_HEADER_LIBRARIES
  24 #if USE_BOOST_FORMAT
  25 #include <boost/format.hpp>
  26 #endif
  27 #endif
  28
  29 #include <cctype>
  30 #include <cstdlib>
  31
  32 #include <algorithm>
  33 #include <sstream>
  34
  35 using std::transform;
  36 using std::string;
  37 using std::vector;
  38
  39 #ifndef CXX_GLOBAL_CSTD
  40 using std::atof;
  41 using std::isdigit;
  42 using std::tolower;
  43 using std::toupper;
  44 #endif
  45
  46
  47 namespace lyx {
  48 namespace support {
  49
  50 int compare_no_case(string const & s, string const & s2)
  51 {
  52         string::const_iterator p = s.begin();
  53         string::const_iterator p2 = s2.begin();
  54
  55         while (p != s.end() && p2 != s2.end()) {
  56                 int const lc1 = tolower(*p);
  57                 int const lc2 = tolower(*p2);
  58                 if (lc1 != lc2)
  59                         return (lc1 < lc2) ? -1 : 1;
  60                 ++p;
  61                 ++p2;
  62         }
  63
  64         if (s.size() == s2.size())
  65                 return 0;
  66         if (s.size() < s2.size())
  67                 return -1;
  68         return 1;
  69 }
  70
  71
  72 namespace {
  73         int ascii_tolower(int c) {
  74                 if (c >= 'A' && c <= 'Z')
  75                         return c - 'A' + 'a';
  76                 return c;
  77         }
  78 }
  79
  80
  81 int compare_ascii_no_case(string const & s, string const & s2)
  82 {
  83         string::const_iterator p = s.begin();
  84         string::const_iterator p2 = s2.begin();
  85
  86         while (p != s.end() && p2 != s2.end()) {
  87                 int const lc1 = ascii_tolower(*p);
  88                 int const lc2 = ascii_tolower(*p2);
  89                 if (lc1 != lc2)
  90                         return (lc1 < lc2) ? -1 : 1;
  91                 ++p;
  92                 ++p2;
  93         }
  94
  95         if (s.size() == s2.size())
  96                 return 0;
  97         if (s.size() < s2.size())
  98                 return -1;
  99         return 1;
 100 }
 101
 102
 103 int compare_no_case(string const & s, string const & s2, unsigned int len)
 104 {
 105         string::const_iterator p = s.begin();
 106         string::const_iterator p2 = s2.begin();
 107         unsigned int i = 0;
 108         while (i < len && p != s.end() && p2 != s2.end()) {
 109                 int const lc1 = tolower(*p);
 110                 int const lc2 = tolower(*p2);
 111                 if (lc1 != lc2)
 112                         return (lc1 < lc2) ? -1 : 1;
 113                 ++i;
 114                 ++p;
 115                 ++p2;
 116         }
 117
 118         if (s.size() >= len && s2.size() >= len)
 119                 return 0;
 120         if (s.size() < s2.size())
 121                 return -1;
 122         return 1;
 123 }
 124
 125
 126 bool isStrInt(string const & str)
 127 {
 128         if (str.empty()) return false;
 129
 130         // Remove leading and trailing white space chars.
 131         string const tmpstr = trim(str);
 132         if (tmpstr.empty()) return false;
 133
 134         string::const_iterator cit = tmpstr.begin();
 135         if ((*cit) == '-') ++cit;
 136         string::const_iterator end = tmpstr.end();
 137         for (; cit != end; ++cit) {
 138                 if (!isdigit((*cit))) return false;
 139         }
 140         return true;
 141 }
 142
 143
 144 bool isStrUnsignedInt(string const & str)
 145 {
 146         if (str.empty()) return false;
 147
 148         // Remove leading and trailing white space chars.
 149         string const tmpstr = trim(str);
 150         if (tmpstr.empty()) return false;
 151
 152         string::const_iterator cit = tmpstr.begin();
 153         string::const_iterator end = tmpstr.end();
 154         for (; cit != end; ++cit) {
 155                 if (!isdigit((*cit))) return false;
 156         }
 157         return true;
 158 }
 159
 160
 161 int strToInt(string const & str)
 162 {
 163         if (isStrInt(str)) {
 164                 // Remove leading and trailing white space chars.
 165                 string const tmpstr = trim(str);
 166                 // Do the conversion proper.
 167                 return atoi(tmpstr);
 168         } else {
 169                 return 0;
 170         }
 171 }
 172
 173
 174 unsigned int strToUnsignedInt(string const & str)
 175 {
 176         if (isStrUnsignedInt(str)) {
 177                 // Remove leading and trailing white space chars.
 178                 string const tmpstr = trim(str);
 179                 // Do the conversion proper.
 180                 return atoi(tmpstr);
 181         } else {
 182                 return 0;
 183         }
 184 }
 185
 186
 187 bool isStrDbl(string const & str)
 188 {
 189         if (str.empty()) return false;
 190
 191         // Remove leading and trailing white space chars.
 192         string const tmpstr = trim(str);
 193         if (tmpstr.empty()) return false;
 194         //      if (1 < tmpstr.count('.')) return false;
 195
 196         string::const_iterator cit = tmpstr.begin();
 197         bool found_dot(false);
 198         if ((*cit) == '-') ++cit;
 199         string::const_iterator end = tmpstr.end();
 200         for (; cit != end; ++cit) {
 201                 if (!isdigit((*cit))
 202                     && '.' != (*cit)) {
 203                         return false;
 204                 }
 205                 if ('.' == (*cit)) {
 206                         if (found_dot) {
 207                                 return false;
 208                         } else {
 209                                 found_dot = true;
 210                         }
 211                 }
 212         }
 213         return true;
 214 }
 215
 216
 217 double strToDbl(string const & str)
 218 {
 219         if (isStrDbl(str)) {
 220                 // Remove leading and trailing white space chars.
 221                 string const tmpstr = trim(str);
 222                 // Do the conversion proper.
 223                 return ::atof(tmpstr.c_str());
 224         } else {
 225                 return 0.0;
 226         }
 227 }
 228
 229
 230 char lowercase(char c)
 231 {
 232         return char(tolower(c));
 233 }
 234
 235
 236 char uppercase(char c)
 237 {
 238         return char(toupper(c));
 239 }
 240
 241
 242 namespace {
 243
 244 // since we cannot use std::tolower and std::toupper directly in the
 245 // calls to std::transform yet, we use these helper clases. (Lgb)
 246
 247 struct local_lowercase {
 248         char operator()(char c) const {
 249                 return tolower(c);
 250         }
 251 };
 252
 253 struct local_uppercase {
 254         char operator()(char c) const {
 255                 return toupper(c);
 256         }
 257 };
 258
 259 struct local_ascii_lowercase {
 260         char operator()(char c) const {
 261                 return ascii_tolower(c);
 262         }
 263 };
 264
 265 } // end of anon namespace
 266
 267 string const lowercase(string const & a)
 268 {
 269         string tmp(a);
 270         transform(tmp.begin(), tmp.end(), tmp.begin(), local_lowercase());
 271         return tmp;
 272 }
 273
 274 string const uppercase(string const & a)
 275 {
 276         string tmp(a);
 277         transform(tmp.begin(), tmp.end(), tmp.begin(), local_uppercase());
 278         return tmp;
 279 }
 280
 281
 282 string const ascii_lowercase(string const & a)
 283 {
 284         string tmp(a);
 285         transform(tmp.begin(), tmp.end(), tmp.begin(),
 286                   local_ascii_lowercase());
 287         return tmp;
 288 }
 289
 290
 291 bool prefixIs(string const & a, string const & pre)
 292 {
 293         string::size_type const prelen = pre.length();
 294         string::size_type const alen = a.length();
 295
 296         if (prelen > alen || a.empty())
 297                 return false;
 298         else {
 299 #if defined(STD_STRING_IS_GOOD)
 300                 return a.compare(0, prelen, pre) == 0;
 301 #else
 302                 return ::strncmp(a.c_str(), pre.c_str(), prelen) == 0;
 303 #endif
 304         }
 305 }
 306
 307
 308 bool suffixIs(string const & a, char c)
 309 {
 310         if (a.empty()) return false;
 311         return a[a.length() - 1] == c;
 312 }
 313
 314
 315 bool suffixIs(string const & a, string const & suf)
 316 {
 317         string::size_type const suflen = suf.length();
 318         string::size_type const alen = a.length();
 319
 320         if (suflen > alen) {
 321                 return false;
 322         } else {
 323 #if !defined(USE_INCLUDED_STRING) && !defined(STD_STRING_IS_GOOD)
 324                 string tmp(a, alen - suflen);
 325                 return ::strncmp(tmp.c_str(), suf.c_str(), suflen) == 0;
 326 #else
 327                 return a.compare(alen - suflen, suflen, suf) == 0;
 328 #endif
 329         }
 330 }
 331
 332
 333 bool containsOnly(string const & s, string const & cset)
 334 {
 335         return s.find_first_not_of(cset) == string::npos;
 336 }
 337
 338
 339 // ale970405+lasgoutt-970425
 340 // rewritten to use new string (Lgb)
 341 string const token(string const & a, char delim, int n)
 342 {
 343         if (a.empty()) return string();
 344
 345         string::size_type k = 0;
 346         string::size_type i = 0;
 347
 348         // Find delimiter or end of string
 349         for (; n--;)
 350                 if ((i = a.find(delim, i)) == string::npos)
 351                         break;
 352                 else
 353                         ++i; // step delim
 354         // i is now the n'th delim (or string::npos)
 355         if (i == string::npos) return string();
 356         k = a.find(delim, i);
 357         // k is now the n'th + 1 delim (or string::npos)
 358
 359         return a.substr(i, k - i);
 360 }
 361
 362
 363 // this could probably be faster and/or cleaner, but it seems to work (JMarc)
 364 // rewritten to use new string (Lgb)
 365 int tokenPos(string const & a, char delim, string const & tok)
 366 {
 367         int i = 0;
 368         string str(a);
 369         string tmptok;
 370
 371         while (!str.empty()) {
 372                 str = split(str, tmptok, delim);
 373                 if (tok == tmptok)
 374                         return i;
 375                 ++i;
 376         }
 377         return -1;
 378 }
 379
 380
 381 string const subst(string const & a, char oldchar, char newchar)
 382 {
 383         string tmp(a);
 384         string::iterator lit = tmp.begin();
 385         string::iterator end = tmp.end();
 386         for (; lit != end; ++lit)
 387                 if ((*lit) == oldchar)
 388                         (*lit) = newchar;
 389         return tmp;
 390 }
 391
 392
 393 string const subst(string const & a,
 394                    string const & oldstr, string const & newstr)
 395 {
 396         string lstr = a;
 397         string::size_type i = 0;
 398         string::size_type const olen = oldstr.length();
 399         while ((i = lstr.find(oldstr, i)) != string::npos) {
 400                 lstr.replace(i, olen, newstr);
 401                 i += newstr.length(); // We need to be sure that we dont
 402                 // use the same i over and over again.
 403         }
 404         return lstr;
 405 }
 406
 407
 408 string const trim(string const & a, char const * p)
 409 {
 410         BOOST_ASSERT(p);
 411
 412         if (a.empty() || !*p)
 413                 return a;
 414
 415         string::size_type r = a.find_last_not_of(p);
 416         string::size_type l = a.find_first_not_of(p);
 417
 418         // Is this the minimal test? (lgb)
 419         if (r == string::npos && l == string::npos)
 420                 return string();
 421
 422         return a.substr(l, r - l + 1);
 423 }
 424
 425
 426 string const rtrim(string const & a, char const * p)
 427 {
 428         BOOST_ASSERT(p);
 429
 430         if (a.empty() || !*p)
 431                 return a;
 432
 433         string::size_type r = a.find_last_not_of(p);
 434
 435         // Is this test really needed? (Lgb)
 436         if (r == string::npos)
 437                 return string();
 438
 439         return a.substr(0, r + 1);
 440 }
 441
 442
 443 string const ltrim(string const & a, char const * p)
 444 {
 445         BOOST_ASSERT(p);
 446
 447         if (a.empty() || !*p)
 448                 return a;
 449
 450         string::size_type l = a.find_first_not_of(p);
 451
 452         if (l == string::npos)
 453                 return string();
 454
 455         return a.substr(l, string::npos);
 456 }
 457
 458
 459 string const split(string const & a, string & piece, char delim)
 460 {
 461         string tmp;
 462         string::size_type i = a.find(delim);
 463         if (i == a.length() - 1) {
 464                 piece = a.substr(0, i);
 465         } else if (i != string::npos) {
 466                 piece = a.substr(0, i);
 467                 tmp = a.substr(i + 1);
 468         } else if (i == 0) {
 469                 piece.erase();
 470                 tmp = a.substr(i + 1);
 471         } else {
 472                 piece = a;
 473         }
 474         return tmp;
 475 }
 476
 477
 478 string const split(string const & a, char delim)
 479 {
 480         string tmp;
 481         string::size_type i = a.find(delim);
 482         if (i != string::npos) // found delim
 483                 tmp = a.substr(i + 1);
 484         return tmp;
 485 }
 486
 487
 488 // ale970521
 489 string const rsplit(string const & a, string & piece, char delim)
 490 {
 491         string tmp;
 492         string::size_type i = a.rfind(delim);
 493         if (i != string::npos) { // delimiter was found
 494                 piece = a.substr(0, i);
 495                 tmp = a.substr(i + 1);
 496         } else { // delimiter was not found
 497                 piece.erase();
 498         }
 499         return tmp;
 500 }
 501
 502
 503 // This function escapes 8-bit characters and other problematic
 504 // characters that cause problems in latex labels.
 505 string const escape(string const & lab)
 506 {
 507         char hexdigit[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
 508                               '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
 509         string enc;
 510         for (string::size_type i = 0; i < lab.length(); ++i) {
 511                 unsigned char c= lab[i];
 512                 if (c >= 128 || c == '=' || c == '%') {
 513                         enc += '=';
 514                         enc += hexdigit[c>>4];
 515                         enc += hexdigit[c & 15];
 516                 } else {
 517                         enc += c;
 518                 }
 519         }
 520         return enc;
 521 }
 522
 523
 524 /// gives a vector of stringparts which have the delimiter delim
 525 vector<string> const getVectorFromString(string const & str,
 526                                          string const & delim)
 527 {
 528 // Lars would like this code to go, but for now his replacement (below)
 529 // doesn't fullfil the same function. I have, therefore, reactivated the
 530 // old code for now. Angus 11 Nov 2002.
 531 #if 1
 532         vector<string> vec;
 533         if (str.empty())
 534                 return vec;
 535         string keys(rtrim(str));
 536         for(;;) {
 537                 string::size_type const idx = keys.find(delim);
 538                 if (idx == string::npos) {
 539                         vec.push_back(ltrim(keys));
 540                         break;
 541                 }
 542                 string const key = trim(keys.substr(0, idx));
 543                 if (!key.empty())
 544                         vec.push_back(key);
 545                 string::size_type const start = idx + delim.size();
 546                 keys = keys.substr(start);
 547         }
 548         return vec;
 549 #else
 550         boost::char_separator<char> sep(delim.c_str());
 551         boost::tokenizer<boost::char_separator<char> > tokens(str, sep);
 552         return vector<string>(tokens.begin(), tokens.end());
 553 #endif
 554 }
 555
 556
 557 // the same vice versa
 558 string const getStringFromVector(vector<string> const & vec,
 559                                  string const & delim)
 560 {
 561         string str;
 562         int i = 0;
 563         for (vector<string>::const_iterator it = vec.begin();
 564              it != vec.end(); ++it) {
 565                 string item = trim(*it);
 566                 if (item.empty())
 567                         continue;
 568                 if (i++ > 0)
 569                         str += delim;
 570                 str += item;
 571         }
 572         return str;
 573 }
 574
 575
 576 #ifndef I_AM_NOT_AFRAID_OF_HEADER_LIBRARIES
 577 #if USE_BOOST_FORMAT
 578
 579 template<>
 580 string bformat(string const & fmt, int arg1)
 581 {
 582         return (boost::format(fmt) % arg1).str();
 583 }
 584
 585
 586 template<>
 587 string bformat(string const & fmt, long arg1)
 588 {
 589         return (boost::format(fmt) % arg1).str();
 590 }
 591
 592
 593 template<>
 594 string bformat(string const & fmt, unsigned int arg1)
 595 {
 596         return (boost::format(fmt) % arg1).str();
 597 }
 598
 599
 600 template<>
 601 string bformat<string>(string const & fmt, string arg1)
 602 {
 603         return (boost::format(fmt) % arg1).str();
 604 }
 605
 606
 607 template<>
 608 string bformat(string const & fmt, char * arg1)
 609 {
 610         return (boost::format(fmt) % arg1).str();
 611 }
 612
 613
 614 template<>
 615 string bformat(string const & fmt, int arg1, int arg2)
 616 {
 617         return (boost::format(fmt) % arg1 % arg2).str();
 618 }
 619
 620
 621 template<>
 622 string bformat(string const & fmt, string arg1, string arg2)
 623 {
 624         return (boost::format(fmt) % arg1 % arg2).str();
 625 }
 626
 627
 628 template<>
 629 string bformat(string const & fmt, char const * arg1, string arg2)
 630 {
 631         return (boost::format(fmt) % arg1 % arg2).str();
 632 }
 633
 634
 635 template<>
 636 string bformat(string const & fmt, string arg1, string arg2, string arg3)
 637 {
 638         return (boost::format(fmt) % arg1 % arg2 % arg3).str();
 639 }
 640
 641
 642 template<>
 643 string bformat(string const & fmt,
 644                string arg1, string arg2, string arg3, string arg4)
 645 {
 646         return (boost::format(fmt) % arg1 % arg2 % arg3 % arg4).str();
 647 }
 648
 649 #else
 650
 651 template<>
 652 string bformat(string const & fmt, int arg1)
 653 {
 654         BOOST_ASSERT(contains(fmt, "%1$d"));
 655         string const str = subst(fmt, "%1$d", convert<string>(arg1));
 656         return subst(str, "%%", "%");
 657 }
 658
 659
 660 template<>
 661 string bformat(string const & fmt, long arg1)
 662 {
 663         BOOST_ASSERT(contains(fmt, "%1$d"));
 664         string const str = subst(fmt, "%1$d", convert<string>(arg1));
 665         return subst(str, "%%", "%");
 666 }
 667
 668
 669 template<>
 670 string bformat(string const & fmt, unsigned int arg1)
 671 {
 672         BOOST_ASSERT(contains(fmt, "%1$d"));
 673         string const str = subst(fmt, "%1$d", convert<string>(arg1));
 674         return subst(str, "%%", "%");
 675 }
 676
 677
 678 template<>
 679 string bformat(string const & fmt, string arg1)
 680 {
 681         BOOST_ASSERT(contains(fmt, "%1$s"));
 682         string const str = subst(fmt, "%1$s", arg1);
 683         return subst(str, "%%", "%");
 684 }
 685
 686
 687 template<>
 688 string bformat(string const & fmt, char * arg1)
 689 {
 690         BOOST_ASSERT(contains(fmt, "%1$s"));
 691         string const str = subst(fmt, "%1$s", arg1);
 692         return subst(str, "%%", "%");
 693 }
 694 template<>
 695 string bformat(string const & fmt, string arg1, string arg2)
 696 {
 697         BOOST_ASSERT(contains(fmt, "%1$s"));
 698         BOOST_ASSERT(contains(fmt, "%2$s"));
 699         string str = subst(fmt, "%1$s", arg1);
 700         str = subst(str, "%2$s", arg2);
 701         return subst(str, "%%", "%");
 702 }
 703
 704
 705 template<>
 706 string bformat(string const & fmt, char const * arg1, string arg2)
 707 {
 708         BOOST_ASSERT(contains(fmt, "%1$s"));
 709         BOOST_ASSERT(contains(fmt, "%2$s"));
 710         string str = subst(fmt, "%1$s", arg1);
 711         str = subst(fmt, "%2$s", arg2);
 712         return subst(str, "%%", "%");
 713 }
 714
 715
 716 template<>
 717 string bformat(string const & fmt, int arg1, int arg2)
 718 {
 719         BOOST_ASSERT(contains(fmt, "%1$d"));
 720         BOOST_ASSERT(contains(fmt, "%2$d"));
 721         string str = subst(fmt, "%1$d", convert<string>(arg1));
 722         str = subst(str, "%2$d", convert<string>(arg2));
 723         return subst(str, "%%", "%");
 724 }
 725
 726
 727 template<>
 728 string bformat(string const & fmt, string arg1, string arg2, string arg3)
 729 {
 730         BOOST_ASSERT(contains(fmt, "%1$s"));
 731         BOOST_ASSERT(contains(fmt, "%2$s"));
 732         BOOST_ASSERT(contains(fmt, "%3$s"));
 733         string str = subst(fmt, "%1$s", arg1);
 734         str = subst(str, "%2$s", arg2);
 735         str = subst(str, "%3$s", arg3);
 736         return subst(str, "%%", "%");
 737 }
 738
 739
 740 template<>
 741 string bformat(string const & fmt,
 742                string arg1, string arg2, string arg3, string arg4)
 743 {
 744         BOOST_ASSERT(contains(fmt, "%1$s"));
 745         BOOST_ASSERT(contains(fmt, "%2$s"));
 746         BOOST_ASSERT(contains(fmt, "%3$s"));
 747         BOOST_ASSERT(contains(fmt, "%4$s"));
 748         string str = subst(fmt, "%1$s", arg1);
 749         str = subst(str, "%2$s", arg2);
 750         str = subst(str, "%3$s", arg3);
 751         str = subst(str, "%4$s", arg4);
 752         return subst(str, "%%", "%");
 753 }
 754
 755 #endif
 756 #endif
 757
 758 } // namespace support
 759 } // namespace lyx