src/support/lstrings.C

   1 /**
   2  * \file lstrings.C
   3  * This file is part of LyX, the document processor.
   4  * Licence details can be found in the file COPYING.
   5  *
   6  * \author Lars Gullik Bjønnes
   7  * \author Jean-Marc Lasgouttes
   8  *
   9  * Full author contact details are available in file CREDITS
  10  */
  11
  12 #include <config.h>
  13
  14 #include "LString.h"
  15 #include "lstrings.h"
  16 #include "LAssert.h"
  17 #include "debug.h"
  18
  19 #include <boost/regex.hpp>
  20 #include <boost/tokenizer.hpp>
  21
  22 #include <algorithm>
  23
  24 #include <cctype>
  25 #include <cstdlib>
  26
  27 using std::count;
  28 using std::transform;
  29 using std::vector;
  30
  31 #ifndef CXX_GLOBAL_CSTD
  32 using std::atof;
  33 using std::isdigit;
  34 using std::strlen;
  35 using std::tolower;
  36 using std::toupper;
  37 #endif
  38
  39
  40 int compare_no_case(string const & s, string const & s2)
  41 {
  42         string::const_iterator p = s.begin();
  43         string::const_iterator p2 = s2.begin();
  44
  45         while (p != s.end() && p2 != s2.end()) {
  46                 int const lc1 = tolower(*p);
  47                 int const lc2 = tolower(*p2);
  48                 if (lc1 != lc2)
  49                         return (lc1 < lc2) ? -1 : 1;
  50                 ++p;
  51                 ++p2;
  52         }
  53
  54         if (s.size() == s2.size())
  55                 return 0;
  56         if (s.size() < s2.size())
  57                 return -1;
  58         return 1;
  59 }
  60
  61
  62 namespace {
  63         int ascii_tolower(int c) {
  64                 if (c >= 'A' && c <= 'Z')
  65                         return c - 'A' + 'a';
  66                 return c;
  67         }
  68 }
  69
  70
  71 int compare_ascii_no_case(string const & s, string const & s2)
  72 {
  73         string::const_iterator p = s.begin();
  74         string::const_iterator p2 = s2.begin();
  75
  76         while (p != s.end() && p2 != s2.end()) {
  77                 int const lc1 = ascii_tolower(*p);
  78                 int const lc2 = ascii_tolower(*p2);
  79                 if (lc1 != lc2)
  80                         return (lc1 < lc2) ? -1 : 1;
  81                 ++p;
  82                 ++p2;
  83         }
  84
  85         if (s.size() == s2.size())
  86                 return 0;
  87         if (s.size() < s2.size())
  88                 return -1;
  89         return 1;
  90 }
  91
  92
  93 int compare_no_case(string const & s, string const & s2, unsigned int len)
  94 {
  95         string::const_iterator p = s.begin();
  96         string::const_iterator p2 = s2.begin();
  97         unsigned int i = 0;
  98         while (i < len && p != s.end() && p2 != s2.end()) {
  99                 int const lc1 = tolower(*p);
 100                 int const lc2 = tolower(*p2);
 101                 if (lc1 != lc2)
 102                         return (lc1 < lc2) ? -1 : 1;
 103                 ++i;
 104                 ++p;
 105                 ++p2;
 106         }
 107
 108         if (s.size() >= len && s2.size() >= len)
 109                 return 0;
 110         if (s.size() < s2.size())
 111                 return -1;
 112         return 1;
 113 }
 114
 115
 116 bool isStrInt(string const & str)
 117 {
 118         if (str.empty()) return false;
 119
 120         // Remove leading and trailing white space chars.
 121         string const tmpstr = trim(str);
 122         if (tmpstr.empty()) return false;
 123
 124         string::const_iterator cit = tmpstr.begin();
 125         if ((*cit) == '-') ++cit;
 126         string::const_iterator end = tmpstr.end();
 127         for (; cit != end; ++cit) {
 128                 if (!isdigit((*cit))) return false;
 129         }
 130         return true;
 131 }
 132
 133
 134 bool isStrUnsignedInt(string const & str)
 135 {
 136         if (str.empty()) return false;
 137
 138         // Remove leading and trailing white space chars.
 139         string const tmpstr = trim(str);
 140         if (tmpstr.empty()) return false;
 141
 142         string::const_iterator cit = tmpstr.begin();
 143         string::const_iterator end = tmpstr.end();
 144         for (; cit != end; ++cit) {
 145                 if (!isdigit((*cit))) return false;
 146         }
 147         return true;
 148 }
 149
 150
 151 int strToInt(string const & str)
 152 {
 153         if (isStrInt(str)) {
 154                 // Remove leading and trailing white space chars.
 155                 string const tmpstr = trim(str);
 156                 // Do the conversion proper.
 157                 return lyx::atoi(tmpstr);
 158         } else {
 159                 return 0;
 160         }
 161 }
 162
 163
 164 unsigned int strToUnsignedInt(string const & str)
 165 {
 166         if (isStrUnsignedInt(str)) {
 167                 // Remove leading and trailing white space chars.
 168                 string const tmpstr = trim(str);
 169                 // Do the conversion proper.
 170                 return lyx::atoi(tmpstr);
 171         } else {
 172                 return 0;
 173         }
 174 }
 175
 176
 177 bool isStrDbl(string const & str)
 178 {
 179         if (str.empty()) return false;
 180
 181         // Remove leading and trailing white space chars.
 182         string const tmpstr = trim(str);
 183         if (tmpstr.empty()) return false;
 184         //      if (1 < tmpstr.count('.')) return false;
 185
 186         string::const_iterator cit = tmpstr.begin();
 187         bool found_dot(false);
 188         if ((*cit) == '-') ++cit;
 189         string::const_iterator end = tmpstr.end();
 190         for (; cit != end; ++cit) {
 191                 if (!isdigit((*cit))
 192                     && '.' != (*cit)) {
 193                         return false;
 194                 }
 195                 if ('.' == (*cit)) {
 196                         if (found_dot) {
 197                                 return false;
 198                         } else {
 199                                 found_dot = true;
 200                         }
 201                 }
 202         }
 203         return true;
 204 }
 205
 206
 207 double strToDbl(string const & str)
 208 {
 209         if (isStrDbl(str)) {
 210                 // Remove leading and trailing white space chars.
 211                 string const tmpstr = trim(str);
 212                 // Do the conversion proper.
 213                 return ::atof(tmpstr.c_str());
 214         } else {
 215                 return 0.0;
 216         }
 217 }
 218
 219
 220 char lowercase(char c)
 221 {
 222         return char(tolower(c));
 223 }
 224
 225
 226 char uppercase(char c)
 227 {
 228         return char(toupper(c));
 229 }
 230
 231
 232 namespace {
 233
 234 // since we cannot use std::tolower and std::toupper directly in the
 235 // calls to std::transform yet, we use these helper clases. (Lgb)
 236
 237 struct local_lowercase {
 238         char operator()(char c) const {
 239                 return tolower(c);
 240         }
 241 };
 242
 243 struct local_uppercase {
 244         char operator()(char c) const {
 245                 return toupper(c);
 246         }
 247 };
 248
 249 struct local_ascii_lowercase {
 250         char operator()(char c) const {
 251                 return ascii_tolower(c);
 252         }
 253 };
 254
 255 } // end of anon namespace
 256
 257 string const lowercase(string const & a)
 258 {
 259         string tmp(a);
 260         transform(tmp.begin(), tmp.end(), tmp.begin(), local_lowercase());
 261         return tmp;
 262 }
 263
 264 string const uppercase(string const & a)
 265 {
 266         string tmp(a);
 267         transform(tmp.begin(), tmp.end(), tmp.begin(), local_uppercase());
 268         return tmp;
 269 }
 270
 271
 272 string const ascii_lowercase(string const & a)
 273 {
 274         string tmp(a);
 275         transform(tmp.begin(), tmp.end(), tmp.begin(),
 276                   local_ascii_lowercase());
 277         return tmp;
 278 }
 279
 280
 281 bool prefixIs(string const & a, char const * pre)
 282 {
 283         lyx::Assert(pre);
 284
 285         size_t const l = strlen(pre);
 286         string::size_type const alen = a.length();
 287
 288         if (l > alen || a.empty())
 289                 return false;
 290         else {
 291 #if !defined(USE_INCLUDED_STRING) && !defined(STD_STRING_IS_GOOD)
 292                 // Delete this code when the compilers get a bit better.
 293                 return ::strncmp(a.c_str(), pre, l) == 0;
 294 #else
 295                 // This is the code that we really want to use
 296                 // but until gcc ships with a basic_string that
 297                 // implements std::string correctly we have to
 298                 // use the code above.
 299                 return a.compare(0, l, pre, l) == 0;
 300 #endif
 301         }
 302 }
 303
 304
 305 bool prefixIs(string const & a, string const & pre)
 306 {
 307         string::size_type const prelen = pre.length();
 308         string::size_type const alen = a.length();
 309
 310         if (prelen > alen || a.empty())
 311                 return false;
 312         else {
 313 #if !defined(USE_INCLUDED_STRING) && !defined(STD_STRING_IS_GOOD)
 314                 return ::strncmp(a.c_str(), pre.c_str(), prelen) == 0;
 315 #else
 316                 return a.compare(0, prelen, pre) == 0;
 317 #endif
 318         }
 319 }
 320
 321
 322 bool suffixIs(string const & a, char c)
 323 {
 324         if (a.empty()) return false;
 325         return a[a.length() - 1] == c;
 326 }
 327
 328
 329 bool suffixIs(string const & a, char const * suf)
 330 {
 331         lyx::Assert(suf);
 332
 333         size_t const suflen = strlen(suf);
 334         string::size_type const alen = a.length();
 335
 336         if (suflen > alen)
 337                 return false;
 338         else {
 339 #if !defined(USE_INCLUDED_STRING) && !defined(STD_STRING_IS_GOOD)
 340                 // Delete this code when the compilers get a bit better.
 341                 string tmp(a, alen - suflen);
 342                 return ::strncmp(tmp.c_str(), suf, suflen) == 0;
 343 #else
 344                 // This is the code that we really want to use
 345                 // but until gcc ships with a basic_string that
 346                 // implements std::string correctly we have to
 347                 // use the code above.
 348                 return a.compare(alen - suflen, suflen, suf) == 0;
 349 #endif
 350         }
 351 }
 352
 353
 354 bool suffixIs(string const & a, string const & suf)
 355 {
 356         string::size_type const suflen = suf.length();
 357         string::size_type const alen = a.length();
 358
 359         if (suflen > alen) {
 360                 return false;
 361         } else {
 362 #if !defined(USE_INCLUDED_STRING) && !defined(STD_STRING_IS_GOOD)
 363                 string tmp(a, alen - suflen);
 364                 return ::strncmp(tmp.c_str(), suf.c_str(), suflen) == 0;
 365 #else
 366                 return a.compare(alen - suflen, suflen, suf) == 0;
 367 #endif
 368         }
 369 }
 370
 371
 372 bool contains(string const & a, string const & b)
 373 {
 374         if (a.empty())
 375                 return false;
 376         return a.find(b) != string::npos;
 377 }
 378
 379
 380 bool contains(string const & a, char b)
 381 {
 382         if (a.empty())
 383                 return false;
 384         return a.find(b) != string::npos;
 385 }
 386
 387
 388 bool containsOnly(string const & s, string const & cset)
 389 {
 390         return s.find_first_not_of(cset) == string::npos;
 391 }
 392
 393
 394 // ale970405+lasgoutt-970425
 395 // rewritten to use new string (Lgb)
 396 string const token(string const & a, char delim, int n)
 397 {
 398         if (a.empty()) return string();
 399
 400         string::size_type k = 0;
 401         string::size_type i = 0;
 402
 403         // Find delimiter or end of string
 404         for (; n--;)
 405                 if ((i = a.find(delim, i)) == string::npos)
 406                         break;
 407                 else
 408                         ++i; // step delim
 409         // i is now the n'th delim (or string::npos)
 410         if (i == string::npos) return string();
 411         k = a.find(delim, i);
 412         // k is now the n'th + 1 delim (or string::npos)
 413
 414         return a.substr(i, k - i);
 415 }
 416
 417
 418 // this could probably be faster and/or cleaner, but it seems to work (JMarc)
 419 // rewritten to use new string (Lgb)
 420 int tokenPos(string const & a, char delim, string const & tok)
 421 {
 422         int i = 0;
 423         string str(a);
 424         string tmptok;
 425
 426         while (!str.empty()) {
 427                 str = split(str, tmptok, delim);
 428                 if (tok == tmptok)
 429                         return i;
 430                 ++i;
 431         }
 432         return -1;
 433 }
 434
 435
 436 bool regexMatch(string const & a, string const & pattern)
 437 {
 438         // We massage the pattern a bit so that the usual
 439         // shell pattern we all are used to will work.
 440         // One nice thing about using a real regex is that
 441         // things like "*.*[^~]" will work also.
 442         // build the regex string.
 443         string regex(pattern);
 444         regex = subst(regex, ".", "\\.");
 445         regex = subst(regex, "*", ".*");
 446         boost::regex reg(STRCONV(regex));
 447         return boost::regex_match(STRCONV(a), reg);
 448 }
 449
 450
 451 string const subst(string const & a, char oldchar, char newchar)
 452 {
 453         string tmp(a);
 454         string::iterator lit = tmp.begin();
 455         string::iterator end = tmp.end();
 456         for (; lit != end; ++lit)
 457                 if ((*lit) == oldchar)
 458                         (*lit) = newchar;
 459         return tmp;
 460 }
 461
 462
 463 string const subst(string const & a,
 464                    char const * oldstr, string const & newstr)
 465 {
 466         lyx::Assert(oldstr);
 467
 468         string lstr(a);
 469         string::size_type i = 0;
 470         string::size_type olen = strlen(oldstr);
 471         while ((i = lstr.find(oldstr, i)) != string::npos) {
 472                 lstr.replace(i, olen, newstr);
 473                 i += newstr.length(); // We need to be sure that we dont
 474                 // use the same i over and over again.
 475         }
 476         return lstr;
 477 }
 478
 479
 480 string const subst(string const & a,
 481                    string const & oldstr, string const & newstr)
 482 {
 483         string lstr(a);
 484         string::size_type i = 0;
 485         string::size_type const olen = oldstr.length();
 486         while ((i = lstr.find(oldstr, i)) != string::npos) {
 487                 lstr.replace(i, olen, newstr);
 488                 i += newstr.length(); // We need to be sure that we dont
 489                 // use the same i over and over again.
 490         }
 491         return lstr;
 492 }
 493
 494
 495 string const trim(string const & a, char const * p)
 496 {
 497         lyx::Assert(p);
 498
 499         if (a.empty() || !*p)
 500                 return a;
 501
 502         string::size_type r = a.find_last_not_of(p);
 503         string::size_type l = a.find_first_not_of(p);
 504
 505         // Is this the minimal test? (lgb)
 506         if (r == string::npos && l == string::npos)
 507                 return string();
 508
 509         return a.substr(l, r - l + 1);
 510 }
 511
 512
 513 string const rtrim(string const & a, char const * p)
 514 {
 515         lyx::Assert(p);
 516
 517         if (a.empty() || !*p)
 518                 return a;
 519
 520         string::size_type r = a.find_last_not_of(p);
 521
 522         // Is this test really needed? (Lgb)
 523         if (r == string::npos)
 524                 return string();
 525
 526         return a.substr(0, r + 1);
 527 }
 528
 529
 530 string const ltrim(string const & a, char const * p)
 531 {
 532         lyx::Assert(p);
 533
 534         if (a.empty() || !*p)
 535                 return a;
 536
 537         string::size_type l = a.find_first_not_of(p);
 538
 539         if (l == string::npos)
 540                 return string();
 541
 542         return a.substr(l, string::npos);
 543 }
 544
 545
 546 string const split(string const & a, string & piece, char delim)
 547 {
 548         string tmp;
 549         string::size_type i = a.find(delim);
 550         if (i == a.length() - 1) {
 551                 piece = a.substr(0, i);
 552         } else if (i != string::npos) {
 553                 piece = a.substr(0, i);
 554                 tmp = a.substr(i + 1);
 555         } else if (i == 0) {
 556                 piece.erase();
 557                 tmp = a.substr(i + 1);
 558         } else {
 559                 piece = a;
 560         }
 561         return tmp;
 562 }
 563
 564
 565 string const split(string const & a, char delim)
 566 {
 567         string tmp;
 568         string::size_type i = a.find(delim);
 569         if (i != string::npos) // found delim
 570                 tmp = a.substr(i + 1);
 571         return tmp;
 572 }
 573
 574
 575 // ale970521
 576 string const rsplit(string const & a, string & piece, char delim)
 577 {
 578         string tmp;
 579         string::size_type i = a.rfind(delim);
 580         if (i != string::npos) { // delimiter was found
 581                 piece = a.substr(0, i);
 582                 tmp = a.substr(i + 1);
 583         } else { // delimter was not found
 584                 piece.erase();
 585         }
 586         return tmp;
 587 }
 588
 589
 590 // This function escapes 8-bit characters and other problematic
 591 // characters that cause problems in latex labels.
 592 string const escape(string const & lab)
 593 {
 594         char hexdigit[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
 595                               '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
 596         string enc;
 597         for (string::size_type i = 0; i < lab.length(); ++i) {
 598                 unsigned char c= lab[i];
 599                 if (c >= 128 || c == '=' || c == '%') {
 600                         enc += '=';
 601                         enc += hexdigit[c>>4];
 602                         enc += hexdigit[c & 15];
 603                 } else {
 604                         enc += c;
 605                 }
 606         }
 607         return enc;
 608 }
 609
 610
 611 /// gives a vector of stringparts which have the delimiter delim
 612 vector<string> const getVectorFromString(string const & str,
 613                                          string const & delim)
 614 {
 615 // Lars would like this code to go, but for now his replacement (below)
 616 // doesn't fullfil the same function. I have, therefore, reactivated the
 617 // old code for now. Angus 11 Nov 2002.
 618 #if 1
 619         vector<string> vec;
 620         if (str.empty())
 621                 return vec;
 622         string keys(rtrim(str));
 623         for(;;) {
 624                 string::size_type const idx = keys.find(delim);
 625                 if (idx == string::npos) {
 626                         vec.push_back(ltrim(keys));
 627                         break;
 628                 }
 629                 string const key = trim(keys.substr(0, idx));
 630                 if (!key.empty())
 631                         vec.push_back(key);
 632                 string::size_type const start = idx + delim.size();
 633                 keys = keys.substr(start);
 634         }
 635         return vec;
 636 #else
 637         boost::char_separator<char> sep(delim.c_str());
 638         boost::tokenizer<boost::char_separator<char> > tokens(str, sep);
 639 #ifndef USE_INCLUDED_STRING
 640         return vector<string>(tokens.begin(), tokens.end());
 641 #else
 642         vector<string> vec;
 643         using boost::tokenizer;
 644         using boost::char_separator;
 645
 646         tokenizer<char_separator<char> >::iterator it = tokens.begin();
 647         tokenizer<char_separator<char> >::iterator end = tokens.end();
 648         for (; it != end; ++it) {
 649                 vec.push_back(STRCONV((*it)));
 650         }
 651         return vec;
 652 #endif
 653 #endif
 654 }
 655
 656
 657 // the same vice versa
 658 string const getStringFromVector(vector<string> const & vec,
 659                                  string const & delim)
 660 {
 661         string str;
 662         int i = 0;
 663         for (vector<string>::const_iterator it = vec.begin();
 664              it != vec.end(); ++it) {
 665                 string item = trim(*it);
 666                 if (item.empty()) continue;
 667
 668                 if (i++ > 0) str += delim;
 669                 str += item;
 670         }
 671         return str;
 672 }