4 * This file is part of LyX, the document processor.
5 * Licence details can be found in the file COPYING.
7 * \author Lars Gullik Bjønnes
8 * \author Jean-Marc Lasgouttes
10 * Full author contact details are available in file CREDITS.
12 * A collection of string helper functions that works with string.
13 * Some of these would certainly benefit from a rewrite/optimization.
19 #include "support/docstring.h"
27 /// Compare \p s and \p s2, ignoring the case.
28 /// Does not depend on the locale.
29 int compare_no_case(docstring const & s, docstring const & s2);
31 /// Compare \p s and \p s2 using the collating rules of the current locale.
32 int compare_locale(docstring const & s, docstring const & s2);
34 /// Compare \p s and \p s2, ignoring the case of ASCII characters only.
35 int compare_ascii_no_case(std::string const & s, std::string const & s2);
37 /// Compare \p s and \p s2, ignoring the case of ASCII characters only.
38 int compare_ascii_no_case(docstring const & s, docstring const & s2);
41 bool isStrInt(std::string const & str);
43 /// does the std::string represent an unsigned integer value ?
44 bool isStrUnsignedInt(std::string const & str);
47 bool isStrDbl(std::string const & str);
49 /// does the string contain a digit?
50 bool hasDigitASCII(docstring const & str);
52 bool isHexChar(char_type);
54 bool isHex(docstring const & str);
56 int hexToInt(docstring const & str);
58 /// is \p str pure ascii?
59 bool isAscii(docstring const & str);
61 /// is \p str pure ascii?
62 bool isAscii(std::string const & str);
65 * Changes the case of \p c to lowercase.
66 * Don't use this for non-ASCII characters, since it depends on the locale.
67 * This overloaded function is only implemented because the char_type variant
68 * would be used otherwise, and we assert in this function that \p c is in
71 char lowercase(char c);
74 * Changes the case of \p c to uppercase.
75 * Don't use this for non-ASCII characters, since it depends on the locale.
76 * This overloaded function is only implemented because the char_type variant
77 * would be used otherwise, and we assert in this function that \p c is in
80 char uppercase(char c);
82 /// Changes the case of \p c to lowercase.
83 /// Does not depend on the locale.
84 char_type lowercase(char_type c);
86 /// Changes the case of \p c to uppercase.
87 /// Does not depend on the locale.
88 char_type uppercase(char_type c);
90 /// Checks if the supplied character is lower-case
91 bool isLowerCase(char_type ch);
93 /// Checks if the supplied character is upper-case
94 bool isUpperCase(char_type ch);
96 /// same as lowercase(), but ignores locale
97 std::string const ascii_lowercase(std::string const &);
98 docstring const ascii_lowercase(docstring const &);
100 /// Changes the case of \p s to lowercase.
101 /// Does not depend on the locale.
102 docstring const lowercase(docstring const & s);
103 // Currently unused, but the code is there if needed.
104 // std::string const lowercase(std::string const & s);
106 /// Changes the case of \p s to uppercase.
107 /// Does not depend on the locale.
108 docstring const uppercase(docstring const & s);
110 /// Returns the superscript of \p c or \p c if no superscript exists.
111 /// Does not depend on the locale.
112 char_type superscript(char_type c);
114 /// Returns the subscript of \p c or \p c if no subscript exists.
115 /// Does not depend on the locale.
116 char_type subscript(char_type c);
118 /// Does str start with c?
119 bool prefixIs(docstring const & str, char_type c);
121 /// Does str start with pre?
122 bool prefixIs(std::string const & str, std::string const & pre);
123 bool prefixIs(docstring const & str, docstring const & pre);
125 /// Does the string end with this char?
126 bool suffixIs(std::string const &, char);
127 bool suffixIs(docstring const &, char_type);
129 /// Does the string end with this suffix?
130 bool suffixIs(std::string const &, std::string const &);
131 bool suffixIs(docstring const &, docstring const &);
133 /// Is b contained in a?
134 inline bool contains(std::string const & a, std::string const & b)
136 return a.find(b) != std::string::npos;
139 inline bool contains(docstring const & a, docstring const & b)
141 return a.find(b) != docstring::npos;
144 inline bool contains(std::string const & a, char b)
146 return a.find(b) != std::string::npos;
149 inline bool contains(docstring const & a, char_type b)
151 return a.find(b) != docstring::npos;
155 bool containsOnly(std::string const &, std::string const &);
157 /** Extracts a token from this string at the nth delim.
158 Doesn't modify the original string. Similar to strtok.
161 token("a;bc;d", ';', 1) == "bc";
162 token("a;bc;d", ';', 2) == "d";
165 std::string const token(std::string const & a, char delim, int n);
167 docstring const token(docstring const & a, char_type delim, int n);
169 /** Search a token in this string using the delim.
170 Doesn't modify the original string. Returns -1 in case of
174 tokenPos("a;bc;d", ';', "bc") == 1;
175 tokenPos("a;bc;d", ';', "d") == 2;
178 int tokenPos(std::string const & a, char delim, std::string const & tok);
179 int tokenPos(docstring const & a, char_type delim, docstring const & tok);
182 /// Substitute all \a oldchar with \a newchar
183 std::string const subst(std::string const & a, char oldchar, char newchar);
185 /// Substitute all \a oldchar with \a newchar
186 docstring const subst(docstring const & a, char_type oldchar, char_type newchar);
188 /// substitutes all instances of \a oldstr with \a newstr
189 std::string const subst(std::string const & a,
190 std::string const & oldstr, std::string const & newstr);
192 /// substitutes all instances of \a oldstr with \a newstr
193 docstring const subst(docstring const & a,
194 docstring const & oldstr, docstring const & newstr);
196 /// Count all occurences of char \a chr inside \a str
197 int count_char(std::string const & str, char chr);
199 /// Count all occurences of char \a chr inside \a str
200 int count_char(docstring const & str, docstring::value_type chr);
202 /** Count all occurences of binary chars inside \a str.
203 It is assumed that \a str is utf-8 encoded and that a binary char
204 belongs to the unicode class names Zl, Zp, Cc, Cf, Cs, Co, or Cn
205 (excluding white space characters such as '\t', '\n', '\v', '\f', '\r').
206 See http://www.unicode.org/Public/6.2.0/ucd/UnicodeData.txt
208 int count_bin_chars(std::string const & str);
210 /** Trims characters off the end and beginning of a string.
212 trim("ccabccc", "c") == "ab".
215 docstring const trim(docstring const & a, char const * p = " ");
217 /** Trims characters off the end and beginning of a string.
219 trim("ccabccc", "c") == "ab".
222 std::string const trim(std::string const & a, char const * p = " ");
224 /** Trims characters off the end of a string, removing any character
227 rtrim("abcde", "dec") == "ab".
230 std::string const rtrim(std::string const & a, char const * p = " ");
231 docstring const rtrim(docstring const & a, char const * p = " ");
233 /** Trims characters off the beginning of a string.
235 ("abbabcdef", "ab") = "cdef"
238 std::string const ltrim(std::string const & a, char const * p = " ");
239 docstring const ltrim(docstring const & a, char const * p = " ");
241 /** Splits the string given in the first argument at the first occurence
242 of the third argument, delim.
243 What precedes delim is returned in the second argument, piece; this
244 will be the whole of the string if no delimiter is found.
245 The return value is what follows delim, if anything. So the return
246 value is the null string if no delimiter is found.
247 'a' and 'piece' must be different variables.
251 ret = split(s1, s2, ';') -> ret = "bc", s2 == "a"
254 std::string const split(std::string const & a, std::string & piece, char delim);
255 docstring const split(docstring const & a, docstring & piece, char_type delim);
257 /// Same as split but does not return a piece
258 std::string const split(std::string const & a, char delim);
260 /// Same as split but uses the last delim.
261 std::string const rsplit(std::string const & a, std::string & piece, char delim);
262 docstring const rsplit(docstring const & a, docstring & piece, char_type delim);
263 docstring const rsplit(docstring const & a, char_type delim);
265 /// Escapes non ASCII chars and other problematic characters that cause
266 /// problems in latex labels.
267 docstring const escape(docstring const & lab);
269 /// Truncates a string with an ellipsis at the end. Leaves str unchanged and
270 /// returns false if it is shorter than len. Otherwise resizes str to len, with
271 /// U+2026 HORIZONTAL ELLIPSIS at the end, and returns true.
273 /// Warning (Unicode): The cases where we want to truncate the text and it does
274 /// not end up converted into a QString for UI display must be really
275 /// rare. Whenever possible, we should prefer calling QFontMetrics::elidedText()
276 /// instead, which takes into account the actual length on the screen and the
277 /// layout direction (RTL or LTR). Or a similar function taking into account the
278 /// font metrics from the buffer view, which still has to be defined. Or set up
279 /// the widgets such that Qt elides the string automatically with the exact
280 /// needed width. Recall that not only graphemes vary greatly in width, but also
281 /// can be made of several code points. See:
282 /// <http://utf8everywhere.org/#myth.strlen>
284 /// What is acceptable is when we know that the string is probably going to be
285 /// elided by Qt anyway, and len is chosen such that our own ellipsis will only
286 /// be displayed in worst-case scenarios.
288 /// FIXME: apply those principles in the current code.
290 bool truncateWithEllipsis(docstring & str, size_t const len);
292 /// Word-wraps the provided docstring, returning a line-broken string
293 /// of width no wider than width, with the string broken at spaces.
294 /// If the string cannot be broken appropriately, it returns something
295 /// with "..." at the end, again no wider than width.
296 /// We assume here that str does not contain newlines.
297 /// If indent is positive, then the first line is indented that many
298 /// spaces. If it is negative, then successive lines are indented, as
299 /// if the first line were "outdented".
301 /// Warning (Unicode): uses truncateWithEllipsis() internally. Therefore it is
302 /// subject to the same warning and FIXME as above.
304 docstring wrap(docstring const & str, int const indent = 0,
305 size_t const width = 80);
307 /// Like the preceding, except it is intended to operate on strings
308 /// that may contain embedded newlines.
309 /// \param numlines Don't return more than numlines lines. If numlines
310 /// is 0, we return everything.
312 /// Warning (Unicode): uses truncateWithEllipsis() internally. Therefore it is
313 /// subject to the same warning and FIXME as above.
315 docstring wrapParas(docstring const & str, int const indent = 0,
316 size_t const width = 80, size_t const maxlines = 10);
318 /// gives a vector of stringparts which have the delimiter delim
319 /// If \p keepempty is true, empty strings will be pushed to the vector as well
320 std::vector<std::string> const getVectorFromString(std::string const & str,
321 std::string const & delim = std::string(","),
322 bool keepempty = false);
323 std::vector<docstring> const getVectorFromString(docstring const & str,
324 docstring const & delim = from_ascii(","), bool keepempty = false);
326 /// the same vice versa
327 std::string const getStringFromVector(std::vector<std::string> const & vec,
328 std::string const & delim = std::string(","));
329 docstring const getStringFromVector(std::vector<docstring> const & vec,
330 docstring const & delim = from_ascii(","));
332 /// Search \p search_token in \p str and return the position if it is
333 /// found, else -1. The last item in \p str must be "".
334 int findToken(char const * const str[], std::string const & search_token);
337 /// Format a floating point number with at least 6 siginificant digits, but
338 /// without scientific notation.
339 /// Scientific notation would be invalid in some contexts, such as lengths for
340 /// LaTeX. Simply using std::ostream with std::fixed would produce results
341 /// like "1000000.000000", and precision control would not be that easy either.
342 std::string formatFPNumber(double);
345 template <class Arg1>
346 docstring bformat(docstring const & fmt, Arg1);
348 template <class Arg1, class Arg2>
349 docstring bformat(docstring const & fmt, Arg1, Arg2);
351 template <class Arg1, class Arg2, class Arg3>
352 docstring bformat(docstring const & fmt, Arg1, Arg2, Arg3);
354 template <class Arg1, class Arg2, class Arg3, class Arg4>
355 docstring bformat(docstring const & fmt, Arg1, Arg2, Arg3, Arg4);
358 template<> docstring bformat(docstring const & fmt, int arg1);
359 template<> docstring bformat(docstring const & fmt, long arg1);
360 #ifdef LYX_USE_LONG_LONG
361 template<> docstring bformat(docstring const & fmt, long long arg1);
363 template<> docstring bformat(docstring const & fmt, unsigned int arg1);
364 template<> docstring bformat(docstring const & fmt, docstring arg1);
365 template<> docstring bformat(docstring const & fmt, char * arg1);
366 template<> docstring bformat(docstring const & fmt, docstring arg1, docstring arg2);
367 template<> docstring bformat(docstring const & fmt, docstring arg1, int arg2);
368 template<> docstring bformat(docstring const & fmt, char const * arg1, docstring arg2);
369 template<> docstring bformat(docstring const & fmt, int arg1, int arg2);
370 template<> docstring bformat(docstring const & fmt, docstring arg1, docstring arg2, docstring arg3);
371 template<> docstring bformat(docstring const & fmt, docstring arg1, docstring arg2, docstring arg3, docstring arg4);
374 } // namespace support