3 * This file is part of LyX, the document processor.
4 * Licence details can be found in the file COPYING.
6 * \author Lars Gullik Bjønnes
7 * \author Jean-Marc Lasgouttes
9 * Full author contact details are available in file CREDITS.
14 #include "support/lstrings.h"
15 #include "support/lyxlib.h"
16 #include "support/convert.h"
20 #include <boost/tokenizer.hpp>
21 #include <boost/assert.hpp>
23 #ifndef I_AM_NOT_AFRAID_OF_HEADER_LIBRARIES
25 #include <boost/format.hpp>
39 #ifndef CXX_GLOBAL_CSTD
49 int compare_no_case(string const & s, string const & s2)
51 string::const_iterator p = s.begin();
52 string::const_iterator p2 = s2.begin();
54 while (p != s.end() && p2 != s2.end()) {
55 int const lc1 = tolower(*p);
56 int const lc2 = tolower(*p2);
58 return (lc1 < lc2) ? -1 : 1;
63 if (s.size() == s2.size())
65 if (s.size() < s2.size())
71 int compare_no_case(docstring const & s, docstring const & s2)
73 docstring::const_iterator p = s.begin();
74 docstring::const_iterator p2 = s2.begin();
76 while (p != s.end() && p2 != s2.end()) {
77 int const lc1 = tolower(*p);
78 int const lc2 = tolower(*p2);
80 return (lc1 < lc2) ? -1 : 1;
85 if (s.size() == s2.size())
87 if (s.size() < s2.size())
94 int ascii_tolower(int c) {
95 if (c >= 'A' && c <= 'Z')
102 int compare_ascii_no_case(string const & s, string const & s2)
104 string::const_iterator p = s.begin();
105 string::const_iterator p2 = s2.begin();
107 while (p != s.end() && p2 != s2.end()) {
108 int const lc1 = ascii_tolower(*p);
109 int const lc2 = ascii_tolower(*p2);
111 return (lc1 < lc2) ? -1 : 1;
116 if (s.size() == s2.size())
118 if (s.size() < s2.size())
124 int compare_no_case(string const & s, string const & s2, unsigned int len)
126 string::const_iterator p = s.begin();
127 string::const_iterator p2 = s2.begin();
129 while (i < len && p != s.end() && p2 != s2.end()) {
130 int const lc1 = tolower(*p);
131 int const lc2 = tolower(*p2);
133 return (lc1 < lc2) ? -1 : 1;
139 if (s.size() >= len && s2.size() >= len)
141 if (s.size() < s2.size())
147 bool isStrInt(string const & str)
149 if (str.empty()) return false;
151 // Remove leading and trailing white space chars.
152 string const tmpstr = trim(str);
153 if (tmpstr.empty()) return false;
155 string::const_iterator cit = tmpstr.begin();
156 if ((*cit) == '-') ++cit;
157 string::const_iterator end = tmpstr.end();
158 for (; cit != end; ++cit) {
159 if (!isdigit((*cit))) return false;
165 bool isStrUnsignedInt(string const & str)
167 if (str.empty()) return false;
169 // Remove leading and trailing white space chars.
170 string const tmpstr = trim(str);
171 if (tmpstr.empty()) return false;
173 string::const_iterator cit = tmpstr.begin();
174 string::const_iterator end = tmpstr.end();
175 for (; cit != end; ++cit) {
176 if (!isdigit((*cit))) return false;
182 bool isStrDbl(string const & str)
184 if (str.empty()) return false;
186 // Remove leading and trailing white space chars.
187 string const tmpstr = trim(str);
188 if (tmpstr.empty()) return false;
189 // if (1 < tmpstr.count('.')) return false;
191 string::const_iterator cit = tmpstr.begin();
192 bool found_dot(false);
193 if ((*cit) == '-') ++cit;
194 string::const_iterator end = tmpstr.end();
195 for (; cit != end; ++cit) {
212 char lowercase(char c)
214 return char(tolower(c));
218 char uppercase(char c)
220 return char(toupper(c));
223 // FIXME for lowercase() and uppercase() function below:
224 // 1) std::tolower() and std::toupper() are templates that
225 // compile fine with char_type. With the test (c >= 256) we
226 // do not trust these function to do the right thing with
228 // 2) these functions use the current locale, which is wrong
229 // if it is not latin1 based (latin1 is a subset of UCS4).
231 char_type lowercase(char_type c)
240 char_type uppercase(char_type c)
251 // since we cannot use std::tolower and std::toupper directly in the
252 // calls to std::transform yet, we use these helper clases. (Lgb)
254 struct local_lowercase {
255 char operator()(char c) const {
260 struct local_uppercase {
261 char operator()(char c) const {
266 struct local_ascii_lowercase {
267 char operator()(char c) const {
268 return ascii_tolower(c);
272 } // end of anon namespace
274 string const lowercase(string const & a)
277 transform(tmp.begin(), tmp.end(), tmp.begin(), local_lowercase());
281 string const uppercase(string const & a)
284 transform(tmp.begin(), tmp.end(), tmp.begin(), local_uppercase());
289 string const ascii_lowercase(string const & a)
292 transform(tmp.begin(), tmp.end(), tmp.begin(),
293 local_ascii_lowercase());
298 bool prefixIs(string const & a, string const & pre)
300 string::size_type const prelen = pre.length();
301 string::size_type const alen = a.length();
303 if (prelen > alen || a.empty())
306 #if defined(STD_STRING_IS_GOOD)
307 return a.compare(0, prelen, pre) == 0;
309 return ::strncmp(a.c_str(), pre.c_str(), prelen) == 0;
315 bool suffixIs(string const & a, char c)
317 if (a.empty()) return false;
318 return a[a.length() - 1] == c;
322 bool suffixIs(string const & a, string const & suf)
324 string::size_type const suflen = suf.length();
325 string::size_type const alen = a.length();
330 #if !defined(USE_INCLUDED_STRING) && !defined(STD_STRING_IS_GOOD)
331 string tmp(a, alen - suflen);
332 return ::strncmp(tmp.c_str(), suf.c_str(), suflen) == 0;
334 return a.compare(alen - suflen, suflen, suf) == 0;
340 bool containsOnly(string const & s, string const & cset)
342 return s.find_first_not_of(cset) == string::npos;
346 // ale970405+lasgoutt-970425
347 // rewritten to use new string (Lgb)
348 string const token(string const & a, char delim, int n)
350 if (a.empty()) return string();
352 string::size_type k = 0;
353 string::size_type i = 0;
355 // Find delimiter or end of string
357 if ((i = a.find(delim, i)) == string::npos)
361 // i is now the n'th delim (or string::npos)
362 if (i == string::npos) return string();
363 k = a.find(delim, i);
364 // k is now the n'th + 1 delim (or string::npos)
366 return a.substr(i, k - i);
370 docstring const token(docstring const & a, char_type delim, int n)
372 if (a.empty()) return docstring();
374 string::size_type k = 0;
375 string::size_type i = 0;
377 // Find delimiter or end of string
379 if ((i = a.find(delim, i)) == docstring::npos)
383 // i is now the n'th delim (or string::npos)
384 if (i == docstring::npos) return docstring();
385 k = a.find(delim, i);
386 // k is now the n'th + 1 delim (or string::npos)
388 return a.substr(i, k - i);
392 // this could probably be faster and/or cleaner, but it seems to work (JMarc)
393 // rewritten to use new string (Lgb)
394 int tokenPos(string const & a, char delim, string const & tok)
400 while (!str.empty()) {
401 str = split(str, tmptok, delim);
412 template<typename Ch> inline
413 std::basic_string<Ch> const subst_char(std::basic_string<Ch> const & a,
414 Ch oldchar, Ch newchar)
416 typedef std::basic_string<Ch> String;
418 typename String::iterator lit = tmp.begin();
419 typename String::iterator end = tmp.end();
420 for (; lit != end; ++lit)
421 if ((*lit) == oldchar)
427 template<typename String> inline
428 String const subst_string(String const & a,
429 String const & oldstr, String const & newstr)
431 BOOST_ASSERT(!oldstr.empty());
433 typename String::size_type i = 0;
434 typename String::size_type const olen = oldstr.length();
435 while ((i = lstr.find(oldstr, i)) != string::npos) {
436 lstr.replace(i, olen, newstr);
437 i += newstr.length(); // We need to be sure that we dont
438 // use the same i over and over again.
446 string const subst(string const & a, char oldchar, char newchar)
448 return subst_char(a, oldchar, newchar);
452 docstring const subst(docstring const & a,
453 char_type oldchar, char_type newchar)
455 return subst_char(a, oldchar, newchar);
459 string const subst(string const & a,
460 string const & oldstr, string const & newstr)
462 return subst_string(a, oldstr, newstr);
466 docstring const subst(docstring const & a,
467 docstring const & oldstr, docstring const & newstr)
469 return subst_string(a, oldstr, newstr);
473 string const trim(string const & a, char const * p)
477 if (a.empty() || !*p)
480 string::size_type r = a.find_last_not_of(p);
481 string::size_type l = a.find_first_not_of(p);
483 // Is this the minimal test? (lgb)
484 if (r == string::npos && l == string::npos)
487 return a.substr(l, r - l + 1);
491 string const rtrim(string const & a, char const * p)
495 if (a.empty() || !*p)
498 string::size_type r = a.find_last_not_of(p);
500 // Is this test really needed? (Lgb)
501 if (r == string::npos)
504 return a.substr(0, r + 1);
508 string const ltrim(string const & a, char const * p)
512 if (a.empty() || !*p)
515 string::size_type l = a.find_first_not_of(p);
517 if (l == string::npos)
520 return a.substr(l, string::npos);
524 string const split(string const & a, string & piece, char delim)
527 string::size_type i = a.find(delim);
528 if (i == a.length() - 1) {
529 piece = a.substr(0, i);
530 } else if (i != string::npos) {
531 piece = a.substr(0, i);
532 tmp = a.substr(i + 1);
535 tmp = a.substr(i + 1);
543 string const split(string const & a, char delim)
546 string::size_type i = a.find(delim);
547 if (i != string::npos) // found delim
548 tmp = a.substr(i + 1);
554 string const rsplit(string const & a, string & piece, char delim)
557 string::size_type i = a.rfind(delim);
558 if (i != string::npos) { // delimiter was found
559 piece = a.substr(0, i);
560 tmp = a.substr(i + 1);
561 } else { // delimiter was not found
568 // This function escapes 8-bit characters and other problematic
569 // characters that cause problems in latex labels.
570 string const escape(string const & lab)
572 char hexdigit[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
573 '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
575 for (string::size_type i = 0; i < lab.length(); ++i) {
576 unsigned char c= lab[i];
577 if (c >= 128 || c == '=' || c == '%') {
579 enc += hexdigit[c>>4];
580 enc += hexdigit[c & 15];
589 /// gives a vector of stringparts which have the delimiter delim
590 vector<string> const getVectorFromString(string const & str,
591 string const & delim)
593 // Lars would like this code to go, but for now his replacement (below)
594 // doesn't fullfil the same function. I have, therefore, reactivated the
595 // old code for now. Angus 11 Nov 2002.
600 string keys = rtrim(str);
602 string::size_type const idx = keys.find(delim);
603 if (idx == string::npos) {
604 vec.push_back(ltrim(keys));
607 string const key = trim(keys.substr(0, idx));
610 string::size_type const start = idx + delim.size();
611 keys = keys.substr(start);
615 boost::char_separator<char> sep(delim.c_str());
616 boost::tokenizer<boost::char_separator<char> > tokens(str, sep);
617 return vector<string>(tokens.begin(), tokens.end());
622 // the same vice versa
623 string const getStringFromVector(vector<string> const & vec,
624 string const & delim)
628 for (vector<string>::const_iterator it = vec.begin();
629 it != vec.end(); ++it) {
630 string item = trim(*it);
641 int findToken(char const * const str[], string const & search_token)
645 while (str[i][0] && str[i] != search_token)
653 docstring const externalLineEnding(docstring const & str)
655 #if defined(__APPLE__)
656 // The MAC clipboard uses \r for lineendings, and we use \n
657 return subst(str, '\n', '\r');
658 #elif defined (_WIN32) || (defined (__CYGWIN__) && defined (X_DISPLAY_MISSING))
659 // Windows clipboard uses \r\n for lineendings, and we use \n
660 return subst(str, lyx::from_ascii("\n"), lyx::from_ascii("\r\n"));
667 docstring const internalLineEnding(docstring const & str)
669 docstring const s = subst(str,
670 lyx::from_ascii("\r\n"), lyx::from_ascii("\n"));
671 return subst(s, '\r', '\n');
675 #ifndef I_AM_NOT_AFRAID_OF_HEADER_LIBRARIES
679 string bformat(string const & fmt, int arg1)
681 return (boost::format(fmt) % arg1).str();
686 string bformat(string const & fmt, long arg1)
688 return (boost::format(fmt) % arg1).str();
693 string bformat(string const & fmt, unsigned int arg1)
695 return (boost::format(fmt) % arg1).str();
700 string bformat<string>(string const & fmt, string arg1)
702 return (boost::format(fmt) % arg1).str();
707 string bformat(string const & fmt, char * arg1)
709 return (boost::format(fmt) % arg1).str();
714 string bformat(string const & fmt, int arg1, int arg2)
716 return (boost::format(fmt) % arg1 % arg2).str();
721 string bformat(string const & fmt, string arg1, string arg2)
723 return (boost::format(fmt) % arg1 % arg2).str();
728 string bformat(string const & fmt, char const * arg1, string arg2)
730 return (boost::format(fmt) % arg1 % arg2).str();
735 string bformat(string const & fmt, string arg1, string arg2, string arg3)
737 return (boost::format(fmt) % arg1 % arg2 % arg3).str();
742 string bformat(string const & fmt,
743 string arg1, string arg2, string arg3, string arg4)
745 return (boost::format(fmt) % arg1 % arg2 % arg3 % arg4).str();
751 string bformat(string const & fmt, int arg1)
753 BOOST_ASSERT(contains(fmt, "%1$d"));
754 string const str = subst(fmt, "%1$d", convert<string>(arg1));
755 return subst(str, "%%", "%");
760 string bformat(string const & fmt, long arg1)
762 BOOST_ASSERT(contains(fmt, "%1$d"));
763 string const str = subst(fmt, "%1$d", convert<string>(arg1));
764 return subst(str, "%%", "%");
769 string bformat(string const & fmt, unsigned int arg1)
771 BOOST_ASSERT(contains(fmt, "%1$d"));
772 string const str = subst(fmt, "%1$d", convert<string>(arg1));
773 return subst(str, "%%", "%");
778 string bformat(string const & fmt, string arg1)
780 BOOST_ASSERT(contains(fmt, "%1$s"));
781 string const str = subst(fmt, "%1$s", arg1);
782 return subst(str, "%%", "%");
787 string bformat(string const & fmt, char * arg1)
789 BOOST_ASSERT(contains(fmt, "%1$s"));
790 string const str = subst(fmt, "%1$s", arg1);
791 return subst(str, "%%", "%");
794 string bformat(string const & fmt, string arg1, string arg2)
796 BOOST_ASSERT(contains(fmt, "%1$s"));
797 BOOST_ASSERT(contains(fmt, "%2$s"));
798 string str = subst(fmt, "%1$s", arg1);
799 str = subst(str, "%2$s", arg2);
800 return subst(str, "%%", "%");
805 string bformat(string const & fmt, char const * arg1, string arg2)
807 BOOST_ASSERT(contains(fmt, "%1$s"));
808 BOOST_ASSERT(contains(fmt, "%2$s"));
809 string str = subst(fmt, "%1$s", arg1);
810 str = subst(fmt, "%2$s", arg2);
811 return subst(str, "%%", "%");
816 string bformat(string const & fmt, int arg1, int arg2)
818 BOOST_ASSERT(contains(fmt, "%1$d"));
819 BOOST_ASSERT(contains(fmt, "%2$d"));
820 string str = subst(fmt, "%1$d", convert<string>(arg1));
821 str = subst(str, "%2$d", convert<string>(arg2));
822 return subst(str, "%%", "%");
827 string bformat(string const & fmt, string arg1, string arg2, string arg3)
829 BOOST_ASSERT(contains(fmt, "%1$s"));
830 BOOST_ASSERT(contains(fmt, "%2$s"));
831 BOOST_ASSERT(contains(fmt, "%3$s"));
832 string str = subst(fmt, "%1$s", arg1);
833 str = subst(str, "%2$s", arg2);
834 str = subst(str, "%3$s", arg3);
835 return subst(str, "%%", "%");
840 string bformat(string const & fmt,
841 string arg1, string arg2, string arg3, string arg4)
843 BOOST_ASSERT(contains(fmt, "%1$s"));
844 BOOST_ASSERT(contains(fmt, "%2$s"));
845 BOOST_ASSERT(contains(fmt, "%3$s"));
846 BOOST_ASSERT(contains(fmt, "%4$s"));
847 string str = subst(fmt, "%1$s", arg1);
848 str = subst(str, "%2$s", arg2);
849 str = subst(str, "%3$s", arg3);
850 str = subst(str, "%4$s", arg4);
851 return subst(str, "%%", "%");
857 } // namespace support