src/support/lstrings.cpp

   1 /**
   2  * \file lstrings.cpp
   3  * This file is part of LyX, the document processor.
   4  * Licence details can be found in the file COPYING.
   5  *
   6  * \author Lars Gullik Bjønnes
   7  * \author Jean-Marc Lasgouttes
   8  * \author Dekel Tsur
   9  *
  10  * Full author contact details are available in file CREDITS.
  11  */
  12
  13 #include <config.h>
  14
  15 #include "support/lstrings.h"
  16
  17 #include "support/convert.h"
  18 #include "support/qstring_helpers.h"
  19 #include "support/textutils.h"
  20
  21 #include <boost/tokenizer.hpp>
  22 #include "support/assert.h"
  23
  24 #include <QString>
  25 #include <QVector>
  26
  27 #include <algorithm>
  28
  29 using namespace std;
  30
  31 namespace lyx {
  32
  33 // Using this allows us to have docstring default arguments in headers
  34 // without #include "support/docstring" there.
  35 docstring const & empty_docstring()
  36 {
  37         static docstring s;
  38         return s;
  39 }
  40
  41 // Using this allows us to have string default arguments in headers
  42 // without #include <string>
  43 string const & empty_string()
  44 {
  45         static string s;
  46         return s;
  47 }
  48
  49 /**
  50  * Convert a QChar into a UCS4 character.
  51  * This is a hack (it does only make sense for the common part of the UCS4
  52  * and UTF16 encodings) and should not be used.
  53  * This does only exist because of performance reasons (a real conversion
  54  * using iconv is too slow on windows).
  55  */
  56 static inline char_type qchar_to_ucs4(QChar const & qchar)
  57 {
  58         LASSERT(is_utf16(static_cast<char_type>(qchar.unicode())), /**/);
  59         return static_cast<char_type>(qchar.unicode());
  60 }
  61
  62
  63
  64 QString toqstr(char const * str)
  65 {
  66         return QString::fromUtf8(str);
  67 }
  68
  69 QString toqstr(std::string const & str)
  70 {
  71         return toqstr(str.c_str());
  72 }
  73
  74
  75 QString toqstr(docstring const & ucs4)
  76 {
  77         // If possible we let qt do the work, since this version does not
  78         // need to be superfast.
  79         return QString::fromUcs4((uint const *)ucs4.data(), ucs4.length());
  80 }
  81
  82 QString toqstr(char_type ucs4)
  83 {
  84         union { char_type c; uint i; } u = { ucs4 };
  85         return QString::fromUcs4(&u.i, 1);
  86 }
  87
  88 docstring qstring_to_ucs4(QString const & qstr)
  89 {
  90         if (qstr.isEmpty())
  91                 return docstring();
  92         QVector<uint> const ucs4 = qstr.toUcs4();
  93         return docstring((char_type const *)(ucs4.constData()), ucs4.size());
  94 }
  95
  96 std::string fromqstr(QString const & str)
  97 {
  98         return str.isEmpty() ? std::string() : std::string(str.toUtf8());
  99 }
 100
 101
 102 /**
 103  * Convert a UCS4 character into a QChar.
 104  * This is a hack (it does only make sense for the common part of the UCS4
 105  * and UTF16 encodings) and should not be used.
 106  * This does only exist because of performance reasons (a real conversion
 107  * using iconv is too slow on windows).
 108  */
 109 static inline QChar const ucs4_to_qchar(char_type const ucs4)
 110 {
 111         LASSERT(is_utf16(ucs4), /**/);
 112         return QChar(static_cast<unsigned short>(ucs4));
 113 }
 114
 115
 116 namespace {
 117         /// Maximum valid UCS4 code point
 118         char_type const ucs4_max = 0x10ffff;
 119 }
 120
 121
 122 bool isLetterChar(char_type c)
 123 {
 124         if (!is_utf16(c)) {
 125                 if (c > ucs4_max)
 126                         // outside the UCS4 range
 127                         return false;
 128                 // assume that all non-utf16 characters are letters
 129                 return true;
 130         }
 131         return ucs4_to_qchar(c).isLetter();
 132 }
 133
 134
 135 bool isAlphaASCII(char_type c)
 136 {
 137         return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z');
 138 }
 139
 140
 141 bool isPrintable(char_type c)
 142 {
 143         if (!is_utf16(c)) {
 144                 if (c > ucs4_max)
 145                         // outside the UCS4 range
 146                         return false;
 147                 // assume that all non-utf16 characters are printable
 148                 return true;
 149         }
 150         return ucs4_to_qchar(c).isPrint();
 151 }
 152
 153
 154 bool isPrintableNonspace(char_type c)
 155 {
 156         if (!is_utf16(c)) {
 157                 if (c > ucs4_max)
 158                         // outside the UCS4 range
 159                         return false;
 160                 // assume that all non-utf16 characters are printable and
 161                 // no space
 162                 return true;
 163         }
 164         QChar const qc = ucs4_to_qchar(c);
 165         return qc.isPrint() && !qc.isSpace();
 166 }
 167
 168
 169 bool isSpace(char_type c)
 170 {
 171         if (!is_utf16(c)) {
 172                 // assume that no non-utf16 character is a space
 173                 // c outside the UCS4 range is catched as well
 174                 return false;
 175         }
 176         QChar const qc = ucs4_to_qchar(c);
 177         return qc.isSpace();
 178 }
 179
 180
 181 bool isDigit(char_type c)
 182 {
 183         if (!is_utf16(c))
 184                 // assume that no non-utf16 character is a digit
 185                 // c outside the UCS4 range is catched as well
 186                 return false;
 187         return ucs4_to_qchar(c).isDigit();
 188 }
 189
 190
 191 bool isDigitASCII(char_type c)
 192 {
 193         return '0' <= c && c <= '9';
 194 }
 195
 196 namespace support {
 197
 198 int compare_no_case(docstring const & s, docstring const & s2)
 199 {
 200         docstring::const_iterator p = s.begin();
 201         docstring::const_iterator p2 = s2.begin();
 202
 203         while (p != s.end() && p2 != s2.end()) {
 204                 char_type const lc1 = lowercase(*p);
 205                 char_type const lc2 = lowercase(*p2);
 206                 if (lc1 != lc2)
 207                         return (lc1 < lc2) ? -1 : 1;
 208                 ++p;
 209                 ++p2;
 210         }
 211
 212         if (s.size() == s2.size())
 213                 return 0;
 214         if (s.size() < s2.size())
 215                 return -1;
 216         return 1;
 217 }
 218
 219
 220 namespace {
 221
 222 template<typename Char>
 223 Char ascii_tolower(Char c) {
 224         if (c >= 'A' && c <= 'Z')
 225                 return c - 'A' + 'a';
 226         return c;
 227 }
 228
 229 }
 230
 231
 232 int compare_ascii_no_case(string const & s, string const & s2)
 233 {
 234         string::const_iterator p = s.begin();
 235         string::const_iterator p2 = s2.begin();
 236
 237         while (p != s.end() && p2 != s2.end()) {
 238                 int const lc1 = ascii_tolower(*p);
 239                 int const lc2 = ascii_tolower(*p2);
 240                 if (lc1 != lc2)
 241                         return (lc1 < lc2) ? -1 : 1;
 242                 ++p;
 243                 ++p2;
 244         }
 245
 246         if (s.size() == s2.size())
 247                 return 0;
 248         if (s.size() < s2.size())
 249                 return -1;
 250         return 1;
 251 }
 252
 253
 254 int compare_ascii_no_case(docstring const & s, docstring const & s2)
 255 {
 256         docstring::const_iterator p = s.begin();
 257         docstring::const_iterator p2 = s2.begin();
 258
 259         while (p != s.end() && p2 != s2.end()) {
 260                 char_type const lc1 = ascii_tolower(*p);
 261                 char_type const lc2 = ascii_tolower(*p2);
 262                 if (lc1 != lc2)
 263                         return (lc1 < lc2) ? -1 : 1;
 264                 ++p;
 265                 ++p2;
 266         }
 267
 268         if (s.size() == s2.size())
 269                 return 0;
 270         if (s.size() < s2.size())
 271                 return -1;
 272         return 1;
 273 }
 274
 275
 276 bool isStrInt(string const & str)
 277 {
 278         if (str.empty())
 279                 return false;
 280
 281         // Remove leading and trailing white space chars.
 282         string const tmpstr = trim(str);
 283         if (tmpstr.empty())
 284                 return false;
 285
 286         string::const_iterator cit = tmpstr.begin();
 287         if ((*cit) == '-')
 288                 ++cit;
 289
 290         string::const_iterator end = tmpstr.end();
 291         for (; cit != end; ++cit)
 292                 if (!isdigit((*cit)))
 293                         return false;
 294
 295         return true;
 296 }
 297
 298
 299 bool isStrUnsignedInt(string const & str)
 300 {
 301         if (str.empty())
 302                 return false;
 303
 304         // Remove leading and trailing white space chars.
 305         string const tmpstr = trim(str);
 306         if (tmpstr.empty())
 307                 return false;
 308
 309         string::const_iterator cit = tmpstr.begin();
 310         string::const_iterator end = tmpstr.end();
 311         for (; cit != end; ++cit)
 312                 if (!isdigit((*cit)))
 313                         return false;
 314
 315         return true;
 316 }
 317
 318
 319 bool isStrDbl(string const & str)
 320 {
 321         if (str.empty())
 322                 return false;
 323
 324         // Remove leading and trailing white space chars.
 325         string const tmpstr = trim(str);
 326         if (tmpstr.empty())
 327                 return false;
 328         //      if (tmpstr.count('.') > 1) return false;
 329
 330         string::const_iterator cit = tmpstr.begin();
 331         bool found_dot = false;
 332         if (*cit == '-')
 333                 ++cit;
 334         string::const_iterator end = tmpstr.end();
 335         for (; cit != end; ++cit) {
 336                 if (!isdigit(*cit) && *cit != '.')
 337                         return false;
 338                 if ('.' == (*cit)) {
 339                         if (found_dot)
 340                                 return false;
 341                         found_dot = true;
 342                 }
 343         }
 344         return true;
 345 }
 346
 347
 348 static bool isHexChar(char_type c)
 349 {
 350         return c == '0' ||
 351                 c == '1' ||
 352                 c == '2' ||
 353                 c == '3' ||
 354                 c == '4' ||
 355                 c == '5' ||
 356                 c == '6' ||
 357                 c == '7' ||
 358                 c == '8' ||
 359                 c == '9' ||
 360                 c == 'a' || c == 'A' ||
 361                 c == 'b' || c == 'B' ||
 362                 c == 'c' || c == 'C' ||
 363                 c == 'd' || c == 'D' ||
 364                 c == 'e' || c == 'E' ||
 365                 c == 'f' || c == 'F';
 366 }
 367
 368
 369 bool isHex(docstring const & str)
 370 {
 371         int index = 0;
 372
 373         if (str.length() > 2 && str[0] == '0' &&
 374             (str[1] == 'x' || str[1] == 'X'))
 375                 index = 2;
 376
 377         int const len = str.length();
 378
 379         for (; index < len; ++index) {
 380                 if (!isHexChar(str[index]))
 381                         return false;
 382         }
 383         return true;
 384 }
 385
 386
 387 int hexToInt(docstring const & str)
 388 {
 389         string s = to_ascii(str);
 390         int h;
 391         sscanf(s.c_str(), "%x", &h);
 392         return h;
 393 }
 394
 395
 396 bool isAscii(docstring const & str)
 397 {
 398         int const len = str.length();
 399         for (int i = 0; i < len; ++i)
 400                 if (str[i] >= 0x80)
 401                         return false;
 402         return true;
 403 }
 404
 405
 406 bool isAscii(string const & str)
 407 {
 408         int const len = str.length();
 409         for (int i = 0; i < len; ++i)
 410                 if (static_cast<unsigned char>(str[i]) >= 0x80)
 411                         return false;
 412         return true;
 413 }
 414
 415
 416 char lowercase(char c)
 417 {
 418         LASSERT(static_cast<unsigned char>(c) < 0x80, /**/);
 419         return char(tolower(c));
 420 }
 421
 422
 423 char uppercase(char c)
 424 {
 425         LASSERT(static_cast<unsigned char>(c) < 0x80, /**/);
 426         return char(toupper(c));
 427 }
 428
 429
 430 char_type lowercase(char_type c)
 431 {
 432         if (!is_utf16(c))
 433                 // We don't know how to lowercase a non-utf16 char
 434                 return c;
 435         return qchar_to_ucs4(ucs4_to_qchar(c).toLower());
 436 }
 437
 438
 439 char_type uppercase(char_type c)
 440 {
 441         if (!is_utf16(c))
 442                 // We don't know how to uppercase a non-utf16 char
 443                 return c;
 444         return qchar_to_ucs4(ucs4_to_qchar(c).toUpper());
 445 }
 446
 447
 448 namespace {
 449
 450 // since we cannot use tolower and toupper directly in the
 451 // calls to transform yet, we use these helper clases. (Lgb)
 452
 453 struct local_lowercase {
 454         char_type operator()(char_type c) const {
 455                 if (!is_utf16(c))
 456                         // We don't know how to lowercase a non-utf16 char
 457                         return c;
 458                 return qchar_to_ucs4(ucs4_to_qchar(c).toLower());
 459         }
 460 };
 461
 462 struct local_uppercase {
 463         char_type operator()(char_type c) const {
 464                 if (!is_utf16(c))
 465                         // We don't know how to uppercase a non-utf16 char
 466                         return c;
 467                 return qchar_to_ucs4(ucs4_to_qchar(c).toUpper());
 468         }
 469 };
 470
 471 template<typename Char> struct local_ascii_lowercase {
 472         Char operator()(Char c) const { return ascii_tolower(c); }
 473 };
 474
 475 } // end of anon namespace
 476
 477 docstring const lowercase(docstring const & a)
 478 {
 479         docstring tmp(a);
 480         transform(tmp.begin(), tmp.end(), tmp.begin(), local_lowercase());
 481         return tmp;
 482 }
 483
 484
 485 docstring const uppercase(docstring const & a)
 486 {
 487         docstring tmp(a);
 488         transform(tmp.begin(), tmp.end(), tmp.begin(), local_uppercase());
 489         return tmp;
 490 }
 491
 492
 493 string const ascii_lowercase(string const & a)
 494 {
 495         string tmp(a);
 496         transform(tmp.begin(), tmp.end(), tmp.begin(),
 497                   local_ascii_lowercase<char>());
 498         return tmp;
 499 }
 500
 501
 502 docstring const ascii_lowercase(docstring const & a)
 503 {
 504         docstring tmp(a);
 505         transform(tmp.begin(), tmp.end(), tmp.begin(),
 506                   local_ascii_lowercase<char_type>());
 507         return tmp;
 508 }
 509
 510
 511 bool prefixIs(docstring const & a, char_type c)
 512 {
 513         if (a.empty())
 514                 return false;
 515         return a[0] == c;
 516 }
 517
 518
 519 bool prefixIs(string const & a, string const & pre)
 520 {
 521         size_t const prelen = pre.length();
 522         size_t const alen = a.length();
 523         return prelen <= alen && !a.empty() && a.compare(0, prelen, pre) == 0;
 524 }
 525
 526
 527 bool prefixIs(docstring const & a, docstring const & pre)
 528 {
 529         size_t const prelen = pre.length();
 530         size_t const alen = a.length();
 531         return prelen <= alen && !a.empty() && a.compare(0, prelen, pre) == 0;
 532 }
 533
 534
 535 bool suffixIs(string const & a, char c)
 536 {
 537         if (a.empty()) return false;
 538         return a[a.length() - 1] == c;
 539 }
 540
 541
 542 bool suffixIs(docstring const & a, char_type c)
 543 {
 544         if (a.empty())
 545                 return false;
 546         return a[a.length() - 1] == c;
 547 }
 548
 549
 550 bool suffixIs(string const & a, string const & suf)
 551 {
 552         size_t const suflen = suf.length();
 553         size_t const alen = a.length();
 554         return suflen <= alen && a.compare(alen - suflen, suflen, suf) == 0;
 555 }
 556
 557
 558 bool containsOnly(string const & s, string const & cset)
 559 {
 560         return s.find_first_not_of(cset) == string::npos;
 561 }
 562
 563
 564 // ale970405+lasgoutt-970425
 565 // rewritten to use new string (Lgb)
 566 string const token(string const & a, char delim, int n)
 567 {
 568         if (a.empty())
 569                 return string();
 570
 571         size_t k = 0;
 572         size_t i = 0;
 573
 574         // Find delimiter or end of string
 575         for (; n--;) {
 576                 if ((i = a.find(delim, i)) == string::npos)
 577                         break;
 578                 else
 579                         ++i; // step delim
 580         }
 581
 582         // i is now the n'th delim (or string::npos)
 583         if (i == string::npos)
 584                 return string();
 585
 586         k = a.find(delim, i);
 587         // k is now the n'th + 1 delim (or string::npos)
 588
 589         return a.substr(i, k - i);
 590 }
 591
 592
 593 docstring const token(docstring const & a, char_type delim, int n)
 594 {
 595         if (a.empty())
 596                 return docstring();
 597
 598         size_t k = 0;
 599         size_t i = 0;
 600
 601         // Find delimiter or end of string
 602         for (; n--;) {
 603                 if ((i = a.find(delim, i)) == docstring::npos)
 604                         break;
 605                 else
 606                         ++i; // step delim
 607         }
 608
 609         // i is now the n'th delim (or string::npos)
 610         if (i == docstring::npos)
 611                 return docstring();
 612
 613         k = a.find(delim, i);
 614         // k is now the n'th + 1 delim (or string::npos)
 615
 616         return a.substr(i, k - i);
 617 }
 618
 619
 620 // this could probably be faster and/or cleaner, but it seems to work (JMarc)
 621 // rewritten to use new string (Lgb)
 622 int tokenPos(string const & a, char delim, string const & tok)
 623 {
 624         int i = 0;
 625         string str = a;
 626         string tmptok;
 627
 628         while (!str.empty()) {
 629                 str = split(str, tmptok, delim);
 630                 if (tok == tmptok)
 631                         return i;
 632                 ++i;
 633         }
 634         return -1;
 635 }
 636
 637
 638 // this could probably be faster and/or cleaner, but it seems to work (JMarc)
 639 // rewritten to use new string (Lgb)
 640 int tokenPos(docstring const & a, char_type delim, docstring const & tok)
 641 {
 642         int i = 0;
 643         docstring str = a;
 644         docstring tmptok;
 645
 646         while (!str.empty()) {
 647                 str = split(str, tmptok, delim);
 648                 if (tok == tmptok)
 649                         return i;
 650                 ++i;
 651         }
 652         return -1;
 653 }
 654
 655
 656 namespace {
 657
 658 /// Substitute all \a oldchar with \a newchar
 659 template<typename Ch> inline
 660 basic_string<Ch> const subst_char(basic_string<Ch> const & a,
 661                 Ch oldchar, Ch newchar)
 662 {
 663         typedef basic_string<Ch> String;
 664         String tmp(a);
 665         typename String::iterator lit = tmp.begin();
 666         typename String::iterator end = tmp.end();
 667         for (; lit != end; ++lit)
 668                 if ((*lit) == oldchar)
 669                         (*lit) = newchar;
 670         return tmp;
 671 }
 672
 673 /// Substitute all \a oldchar with \a newchar
 674 docstring const subst_char(docstring const & a,
 675         docstring::value_type oldchar, docstring::value_type newchar)
 676 {
 677         docstring tmp(a);
 678         docstring::iterator lit = tmp.begin();
 679         docstring::iterator end = tmp.end();
 680         for (; lit != end; ++lit)
 681                 if ((*lit) == oldchar)
 682                         (*lit) = newchar;
 683         return tmp;
 684 }
 685
 686
 687 /// substitutes all instances of \a oldstr with \a newstr
 688 template<typename String> inline
 689 String const subst_string(String const & a,
 690                 String const & oldstr, String const & newstr)
 691 {
 692         LASSERT(!oldstr.empty(), /**/);
 693         String lstr = a;
 694         size_t i = 0;
 695         size_t const olen = oldstr.length();
 696         while ((i = lstr.find(oldstr, i)) != string::npos) {
 697                 lstr.replace(i, olen, newstr);
 698                 i += newstr.length(); // We need to be sure that we dont
 699                 // use the same i over and over again.
 700         }
 701         return lstr;
 702 }
 703
 704 docstring const subst_string(docstring const & a,
 705                 docstring const & oldstr, docstring const & newstr)
 706 {
 707         LASSERT(!oldstr.empty(), /**/);
 708         docstring lstr = a;
 709         size_t i = 0;
 710         size_t const olen = oldstr.length();
 711         while ((i = lstr.find(oldstr, i)) != string::npos) {
 712                 lstr.replace(i, olen, newstr);
 713                 i += newstr.length(); // We need to be sure that we dont
 714                 // use the same i over and over again.
 715         }
 716         return lstr;
 717 }
 718
 719 }
 720
 721
 722 string const subst(string const & a, char oldchar, char newchar)
 723 {
 724         return subst_char(a, oldchar, newchar);
 725 }
 726
 727
 728 docstring const subst(docstring const & a,
 729                 char_type oldchar, char_type newchar)
 730 {
 731         return subst_char(a, oldchar, newchar);
 732 }
 733
 734
 735 string const subst(string const & a,
 736                 string const & oldstr, string const & newstr)
 737 {
 738         return subst_string(a, oldstr, newstr);
 739 }
 740
 741
 742 docstring const subst(docstring const & a,
 743                 docstring const & oldstr, docstring const & newstr)
 744 {
 745         return subst_string(a, oldstr, newstr);
 746 }
 747
 748
 749 docstring const trim(docstring const & a, char const * p)
 750 {
 751         LASSERT(p, /**/);
 752
 753         if (a.empty() || !*p)
 754                 return a;
 755
 756         docstring s = from_ascii(p);
 757         size_t r = a.find_last_not_of(s);
 758         size_t l = a.find_first_not_of(s);
 759
 760         // Is this the minimal test? (lgb)
 761         if (r == docstring::npos && l == docstring::npos)
 762                 return docstring();
 763
 764         return a.substr(l, r - l + 1);
 765 }
 766
 767
 768 string const trim(string const & a, char const * p)
 769 {
 770         LASSERT(p, /**/);
 771
 772         if (a.empty() || !*p)
 773                 return a;
 774
 775         size_t r = a.find_last_not_of(p);
 776         size_t l = a.find_first_not_of(p);
 777
 778         // Is this the minimal test? (lgb)
 779         if (r == string::npos && l == string::npos)
 780                 return string();
 781
 782         return a.substr(l, r - l + 1);
 783 }
 784
 785
 786 string const rtrim(string const & a, char const * p)
 787 {
 788         LASSERT(p, /**/);
 789
 790         if (a.empty() || !*p)
 791                 return a;
 792
 793         size_t r = a.find_last_not_of(p);
 794
 795         // Is this test really needed? (Lgb)
 796         if (r == string::npos)
 797                 return string();
 798
 799         return a.substr(0, r + 1);
 800 }
 801
 802
 803 docstring const rtrim(docstring const & a, char const * p)
 804 {
 805         LASSERT(p, /**/);
 806
 807         if (a.empty() || !*p)
 808                 return a;
 809
 810         size_t r = a.find_last_not_of(from_ascii(p));
 811
 812         // Is this test really needed? (Lgb)
 813         if (r == docstring::npos)
 814                 return docstring();
 815
 816         return a.substr(0, r + 1);
 817 }
 818
 819
 820 string const ltrim(string const & a, char const * p)
 821 {
 822         LASSERT(p, /**/);
 823         if (a.empty() || !*p)
 824                 return a;
 825         size_t l = a.find_first_not_of(p);
 826         if (l == string::npos)
 827                 return string();
 828         return a.substr(l, string::npos);
 829 }
 830
 831
 832 docstring const ltrim(docstring const & a, char const * p)
 833 {
 834         LASSERT(p, /**/);
 835         if (a.empty() || !*p)
 836                 return a;
 837         size_t l = a.find_first_not_of(from_ascii(p));
 838         if (l == docstring::npos)
 839                 return docstring();
 840         return a.substr(l, docstring::npos);
 841 }
 842
 843 namespace {
 844
 845 template<typename String, typename Char> inline
 846 String const doSplit(String const & a, String & piece, Char delim)
 847 {
 848         String tmp;
 849         size_t i = a.find(delim);
 850         if (i == a.length() - 1) {
 851                 piece = a.substr(0, i);
 852         } else if (i != String::npos) {
 853                 piece = a.substr(0, i);
 854                 tmp = a.substr(i + 1);
 855         } else if (i == 0) {
 856                 piece.erase();
 857                 tmp = a.substr(i + 1);
 858         } else {
 859                 piece = a;
 860         }
 861         return tmp;
 862 }
 863
 864 template<typename Char> inline
 865 docstring const doSplit(docstring const & a, docstring & piece, Char delim)
 866 {
 867         docstring tmp;
 868         size_t i = a.find(delim);
 869         if (i == a.length() - 1) {
 870                 piece = a.substr(0, i);
 871         } else if (i != docstring::npos) {
 872                 piece = a.substr(0, i);
 873                 tmp = a.substr(i + 1);
 874         } else if (i == 0) {
 875                 piece.erase();
 876                 tmp = a.substr(i + 1);
 877         } else {
 878                 piece = a;
 879         }
 880         return tmp;
 881 }
 882
 883 } // anon
 884
 885
 886 string const split(string const & a, string & piece, char delim)
 887 {
 888         return doSplit(a, piece, delim);
 889 }
 890
 891
 892 docstring const split(docstring const & a, docstring & piece, char_type delim)
 893 {
 894         return doSplit(a, piece, delim);
 895 }
 896
 897
 898 string const split(string const & a, char delim)
 899 {
 900         string tmp;
 901         size_t i = a.find(delim);
 902         if (i != string::npos) // found delim
 903                 tmp = a.substr(i + 1);
 904         return tmp;
 905 }
 906
 907
 908 // ale970521
 909 string const rsplit(string const & a, string & piece, char delim)
 910 {
 911         string tmp;
 912         size_t i = a.rfind(delim);
 913         if (i != string::npos) { // delimiter was found
 914                 piece = a.substr(0, i);
 915                 tmp = a.substr(i + 1);
 916         } else { // delimiter was not found
 917                 piece.erase();
 918         }
 919         return tmp;
 920 }
 921
 922
 923 docstring const escape(docstring const & lab)
 924 {
 925         char_type hexdigit[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
 926                                    '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
 927         docstring enc;
 928         for (size_t i = 0; i < lab.length(); ++i) {
 929                 char_type c = lab[i];
 930                 if (c >= 128 || c == '=' || c == '%') {
 931                         // Although char_type is a 32 bit type we know that
 932                         // UCS4 occupies only 21 bits, so we don't need to
 933                         // encode bigger values. Test for 2^24 because we
 934                         // can encode that with the 6 hex digits that are
 935                         // needed for 21 bits anyway.
 936                         LASSERT(c < (1 << 24), /**/);
 937                         enc += '=';
 938                         enc += hexdigit[(c>>20) & 15];
 939                         enc += hexdigit[(c>>16) & 15];
 940                         enc += hexdigit[(c>>12) & 15];
 941                         enc += hexdigit[(c>> 8) & 15];
 942                         enc += hexdigit[(c>> 4) & 15];
 943                         enc += hexdigit[ c      & 15];
 944                 } else {
 945                         enc += c;
 946                 }
 947         }
 948         return enc;
 949 }
 950
 951
 952 namespace {
 953
 954 template<typename String> vector<String> const
 955 getVectorFromStringT(String const & str, String const & delim)
 956 {
 957 // Lars would like this code to go, but for now his replacement (below)
 958 // doesn't fullfil the same function. I have, therefore, reactivated the
 959 // old code for now. Angus 11 Nov 2002.
 960 #if 1
 961         vector<String> vec;
 962         if (str.empty())
 963                 return vec;
 964         String keys = rtrim(str);
 965         while (true) {
 966                 size_t const idx = keys.find(delim);
 967                 if (idx == String::npos) {
 968                         vec.push_back(ltrim(keys));
 969                         break;
 970                 }
 971                 String const key = trim(keys.substr(0, idx));
 972                 if (!key.empty())
 973                         vec.push_back(key);
 974                 size_t const start = idx + delim.size();
 975                 keys = keys.substr(start);
 976         }
 977         return vec;
 978 #else
 979         typedef boost::char_separator<typename String::value_type> Separator;
 980         typedef boost::tokenizer<Separator, typename String::const_iterator, String> Tokenizer;
 981         Separator sep(delim.c_str());
 982         Tokenizer tokens(str, sep);
 983         return vector<String>(tokens.begin(), tokens.end());
 984 #endif
 985 }
 986
 987 } // namespace anon
 988
 989
 990 vector<string> const getVectorFromString(string const & str,
 991                                          string const & delim)
 992 {
 993         return getVectorFromStringT<string>(str, delim);
 994 }
 995
 996
 997 vector<docstring> const getVectorFromString(docstring const & str,
 998                                             docstring const & delim)
 999 {
1000         return getVectorFromStringT<docstring>(str, delim);
1001 }
1002
1003
1004 // the same vice versa
1005 string const getStringFromVector(vector<string> const & vec,
1006                                  string const & delim)
1007 {
1008         string str;
1009         int i = 0;
1010         for (vector<string>::const_iterator it = vec.begin();
1011              it != vec.end(); ++it) {
1012                 string item = trim(*it);
1013                 if (item.empty())
1014                         continue;
1015                 if (i++ > 0)
1016                         str += delim;
1017                 str += item;
1018         }
1019         return str;
1020 }
1021
1022
1023 int findToken(char const * const str[], string const & search_token)
1024 {
1025         int i = 0;
1026
1027         while (str[i][0] && str[i] != search_token)
1028                 ++i;
1029         if (!str[i][0])
1030                 i = -1;
1031         return i;
1032 }
1033
1034
1035 docstring const externalLineEnding(docstring const & str)
1036 {
1037 #if defined(__APPLE__)
1038         // The MAC clipboard uses \r for lineendings, and we use \n
1039         return subst(str, '\n', '\r');
1040 #elif defined (_WIN32) || (defined (__CYGWIN__) && defined (X_DISPLAY_MISSING))
1041         // Windows clipboard uses \r\n for lineendings, and we use \n
1042         return subst(str, from_ascii("\n"), from_ascii("\r\n"));
1043 #else
1044         return str;
1045 #endif
1046 }
1047
1048
1049 docstring const internalLineEnding(docstring const & str)
1050 {
1051         docstring const s = subst(str, from_ascii("\r\n"), from_ascii("\n"));
1052         return subst(s, '\r', '\n');
1053 }
1054
1055
1056 template<>
1057 docstring bformat(docstring const & fmt, int arg1)
1058 {
1059         LASSERT(contains(fmt, from_ascii("%1$d")), /**/);
1060         docstring const str = subst(fmt, from_ascii("%1$d"), convert<docstring>(arg1));
1061         return subst(str, from_ascii("%%"), from_ascii("%"));
1062 }
1063
1064
1065 template<>
1066 docstring bformat(docstring const & fmt, long arg1)
1067 {
1068         LASSERT(contains(fmt, from_ascii("%1$d")), /**/);
1069         docstring const str = subst(fmt, from_ascii("%1$d"), convert<docstring>(arg1));
1070         return subst(str, from_ascii("%%"), from_ascii("%"));
1071 }
1072
1073
1074 template<>
1075 docstring bformat(docstring const & fmt, unsigned int arg1)
1076 {
1077         LASSERT(contains(fmt, from_ascii("%1$d")), /**/);
1078         docstring const str = subst(fmt, from_ascii("%1$d"), convert<docstring>(arg1));
1079         return subst(str, from_ascii("%%"), from_ascii("%"));
1080 }
1081
1082
1083 template<>
1084 docstring bformat(docstring const & fmt, docstring arg1)
1085 {
1086         LASSERT(contains(fmt, from_ascii("%1$s")), /**/);
1087         docstring const str = subst(fmt, from_ascii("%1$s"), arg1);
1088         return subst(str, from_ascii("%%"), from_ascii("%"));
1089 }
1090
1091
1092 template<>
1093 docstring bformat(docstring const & fmt, char * arg1)
1094 {
1095         LASSERT(contains(fmt, from_ascii("%1$s")), /**/);
1096         docstring const str = subst(fmt, from_ascii("%1$s"), from_ascii(arg1));
1097         return subst(str, from_ascii("%%"), from_ascii("%"));
1098 }
1099
1100
1101 template<>
1102 docstring bformat(docstring const & fmt, docstring arg1, docstring arg2)
1103 {
1104         LASSERT(contains(fmt, from_ascii("%1$s")), /**/);
1105         LASSERT(contains(fmt, from_ascii("%2$s")), /**/);
1106         docstring str = subst(fmt, from_ascii("%1$s"), arg1);
1107         str = subst(str, from_ascii("%2$s"), arg2);
1108         return subst(str, from_ascii("%%"), from_ascii("%"));
1109 }
1110
1111
1112 template<>
1113 docstring bformat(docstring const & fmt, char const * arg1, docstring arg2)
1114 {
1115         LASSERT(contains(fmt, from_ascii("%1$s")), /**/);
1116         LASSERT(contains(fmt, from_ascii("%2$s")), /**/);
1117         docstring str = subst(fmt, from_ascii("%1$s"), from_ascii(arg1));
1118         str = subst(fmt, from_ascii("%2$s"), arg2);
1119         return subst(str, from_ascii("%%"), from_ascii("%"));
1120 }
1121
1122
1123 template<>
1124 docstring bformat(docstring const & fmt, int arg1, int arg2)
1125 {
1126         LASSERT(contains(fmt, from_ascii("%1$d")), /**/);
1127         LASSERT(contains(fmt, from_ascii("%2$d")), /**/);
1128         docstring str = subst(fmt, from_ascii("%1$d"), convert<docstring>(arg1));
1129         str = subst(str, from_ascii("%2$d"), convert<docstring>(arg2));
1130         return subst(str, from_ascii("%%"), from_ascii("%"));
1131 }
1132
1133
1134 template<>
1135 docstring bformat(docstring const & fmt, docstring arg1, docstring arg2, docstring arg3)
1136 {
1137         LASSERT(contains(fmt, from_ascii("%1$s")), /**/);
1138         LASSERT(contains(fmt, from_ascii("%2$s")), /**/);
1139         LASSERT(contains(fmt, from_ascii("%3$s")), /**/);
1140         docstring str = subst(fmt, from_ascii("%1$s"), arg1);
1141         str = subst(str, from_ascii("%2$s"), arg2);
1142         str = subst(str, from_ascii("%3$s"), arg3);
1143         return subst(str, from_ascii("%%"), from_ascii("%"));
1144 }
1145
1146
1147 template<>
1148 docstring bformat(docstring const & fmt,
1149                docstring arg1, docstring arg2, docstring arg3, docstring arg4)
1150 {
1151         LASSERT(contains(fmt, from_ascii("%1$s")), /**/);
1152         LASSERT(contains(fmt, from_ascii("%2$s")), /**/);
1153         LASSERT(contains(fmt, from_ascii("%3$s")), /**/);
1154         LASSERT(contains(fmt, from_ascii("%4$s")), /**/);
1155         docstring str = subst(fmt, from_ascii("%1$s"), arg1);
1156         str = subst(str, from_ascii("%2$s"), arg2);
1157         str = subst(str, from_ascii("%3$s"), arg3);
1158         str = subst(str, from_ascii("%4$s"), arg4);
1159         return subst(str, from_ascii("%%"), from_ascii("%"));
1160 }
1161
1162 } // namespace support
1163 } // namespace lyx