src/support/lstrings.h

   1 // -*- C++ -*-
   2 /**
   3  * \file lstrings.h
   4  * This file is part of LyX, the document processor.
   5  * Licence details can be found in the file COPYING.
   6  *
   7  * \author Lars Gullik Bjønnes
   8  * \author Jean-Marc Lasgouttes
   9  *
  10  * Full author contact details are available in file CREDITS.
  11  *
  12  * A collection of string helper functions that works with string.
  13  * Some of these would certainly benefit from a rewrite/optimization.
  14  */
  15
  16 #ifndef LSTRINGS_H
  17 #define LSTRINGS_H
  18
  19 #include "support/docstring.h"
  20 #include "support/types.h"
  21
  22 #include <vector>
  23
  24
  25 namespace lyx {
  26 namespace support {
  27
  28 /// Compare \p s and \p s2, ignoring the case.
  29 /// Does not depend on the locale.
  30 int compare_no_case(docstring const & s, docstring const & s2);
  31
  32 /// Compare \p s and \p s2 using the collating rules of the current locale.
  33 int compare_locale(docstring const & s, docstring const & s2);
  34
  35 /// Compare \p s and \p s2, ignoring the case of ASCII characters only.
  36 int compare_ascii_no_case(std::string const & s, std::string const & s2);
  37
  38 /// Compare \p s and \p s2, ignoring the case of ASCII characters only.
  39 int compare_ascii_no_case(docstring const & s, docstring const & s2);
  40
  41 ///
  42 bool isStrInt(std::string const & str);
  43
  44 /// does the std::string represent an unsigned integer value ?
  45 bool isStrUnsignedInt(std::string const & str);
  46
  47 ///
  48 bool isStrDbl(std::string const & str);
  49
  50 /// does the string contain a digit?
  51 bool hasDigitASCII(docstring const & str);
  52
  53 bool isHexChar(char_type);
  54
  55 bool isHex(docstring const & str);
  56
  57 unsigned int hexToInt(docstring const & str);
  58
  59 /// is \p str pure ascii?
  60 bool isAscii(docstring const & str);
  61
  62 /// is \p str pure ascii?
  63 bool isAscii(std::string const & str);
  64
  65 /**
  66  * Changes the case of \p c to lowercase.
  67  * Don't use this for non-ASCII characters, since it depends on the locale.
  68  * This overloaded function is only implemented because the char_type variant
  69  * would be used otherwise, and we assert in this function that \p c is in
  70  * the ASCII range.
  71  */
  72 char lowercase(char c);
  73
  74 /**
  75  * Changes the case of \p c to uppercase.
  76  * Don't use this for non-ASCII characters, since it depends on the locale.
  77  * This overloaded function is only implemented because the char_type variant
  78  * would be used otherwise, and we assert in this function that \p c is in
  79  * the ASCII range.
  80  */
  81 char uppercase(char c);
  82
  83 /// Changes the case of \p c to lowercase.
  84 /// Does not depend on the locale.
  85 char_type lowercase(char_type c);
  86
  87 /// Changes the case of \p c to uppercase.
  88 /// Does not depend on the locale.
  89 char_type uppercase(char_type c);
  90
  91 /// Checks if the supplied character is lower-case
  92 bool isLowerCase(char_type ch);
  93
  94 /// Checks if the supplied character is upper-case
  95 bool isUpperCase(char_type ch);
  96
  97 /// same as lowercase(), but ignores locale
  98 std::string const ascii_lowercase(std::string const &);
  99 docstring const ascii_lowercase(docstring const &);
 100
 101 /// Changes the case of \p s to lowercase.
 102 /// Does not depend on the locale.
 103 docstring const lowercase(docstring const & s);
 104 // Currently unused, but the code is there if needed.
 105 // std::string const lowercase(std::string const & s);
 106
 107 /// Changes the case of \p s to uppercase.
 108 /// Does not depend on the locale.
 109 docstring const uppercase(docstring const & s);
 110
 111 /// Returns the superscript of \p c or \p c if no superscript exists.
 112 /// Does not depend on the locale.
 113 char_type superscript(char_type c);
 114
 115 /// Returns the subscript of \p c or \p c if no subscript exists.
 116 /// Does not depend on the locale.
 117 char_type subscript(char_type c);
 118
 119 /// Does str start with c?
 120 bool prefixIs(docstring const & str, char_type c);
 121
 122 /// Does str start with pre?
 123 bool prefixIs(std::string const & str, std::string const & pre);
 124 bool prefixIs(docstring const & str, docstring const & pre);
 125
 126 /// Does the string end with this char?
 127 bool suffixIs(std::string const &, char);
 128 bool suffixIs(docstring const &, char_type);
 129
 130 /// Does the string end with this suffix?
 131 bool suffixIs(std::string const &, std::string const &);
 132 bool suffixIs(docstring const &, docstring const &);
 133
 134 /// Is b contained in a?
 135 inline bool contains(std::string const & a, std::string const & b)
 136 {
 137         return a.find(b) != std::string::npos;
 138 }
 139
 140 inline bool contains(docstring const & a, docstring const & b)
 141 {
 142         return a.find(b) != docstring::npos;
 143 }
 144
 145 inline bool contains(std::string const & a, char b)
 146 {
 147         return a.find(b) != std::string::npos;
 148 }
 149
 150 inline bool contains(docstring const & a, char_type b)
 151 {
 152         return a.find(b) != docstring::npos;
 153 }
 154
 155 /// Returns true if the first argument is made of ascii chars given in the
 156 /// second argument.
 157 bool containsOnly(std::string const &, std::string const &);
 158 ///
 159 bool containsOnly(docstring const &, std::string const &);
 160
 161 /** Extracts a token from this string at the nth delim.
 162     Doesn't modify the original string. Similar to strtok.
 163     Example:
 164     \code
 165     token("a;bc;d", ';', 1) == "bc";
 166     token("a;bc;d", ';', 2) == "d";
 167     \endcode
 168 */
 169 std::string const token(std::string const & a, char delim, int n);
 170
 171 docstring const token(docstring const & a, char_type delim, int n);
 172
 173 /** Search a token in this string using the delim.
 174     Doesn't modify the original string. Returns -1 in case of
 175     failure.
 176     Example:
 177     \code
 178     tokenPos("a;bc;d", ';', "bc") == 1;
 179     tokenPos("a;bc;d", ';', "d") == 2;
 180     \endcode
 181 */
 182 int tokenPos(std::string const & a, char delim, std::string const & tok);
 183 int tokenPos(docstring const & a, char_type delim, docstring const & tok);
 184
 185
 186 /// Substitute all \a oldchar with \a newchar
 187 std::string const subst(std::string const & a, char oldchar, char newchar);
 188
 189 /// Substitute all \a oldchar with \a newchar
 190 docstring const subst(docstring const & a, char_type oldchar, char_type newchar);
 191
 192 /// substitutes all instances of \a oldstr with \a newstr
 193 std::string const subst(std::string const & a,
 194                    std::string const & oldstr, std::string const & newstr);
 195
 196 /// substitutes all instances of \a oldstr with \a newstr
 197 docstring const subst(docstring const & a,
 198                 docstring const & oldstr, docstring const & newstr);
 199
 200 /// Count all occurrences of char \a chr inside \a str
 201 int count_char(std::string const & str, char chr);
 202
 203 /// Count all occurrences of char \a chr inside \a str
 204 int count_char(docstring const & str, docstring::value_type chr);
 205
 206 /** Count all occurrences of binary chars inside \a str.
 207     It is assumed that \a str is utf-8 encoded and that a binary char
 208     belongs to the unicode class names Zl, Zp, Cc, Cf, Cs, Co, or Cn
 209     (excluding white space characters such as '\t', '\n', '\v', '\f', '\r').
 210     See http://www.unicode.org/Public/6.2.0/ucd/UnicodeData.txt
 211 */
 212 int count_bin_chars(std::string const & str);
 213
 214 /** Trims characters off the end and beginning of a string.
 215     \code
 216     trim("ccabccc", "c") == "ab".
 217     \endcode
 218 */
 219 docstring const trim(docstring const & a, char const * p = " ");
 220
 221 /** Trims characters off the end and beginning of a string.
 222     \code
 223     trim("ccabccc", "c") == "ab".
 224     \endcode
 225 */
 226 std::string const trim(std::string const & a, char const * p = " ");
 227
 228 /** Trims characters off the end of a string, removing any character
 229     in p.
 230     \code
 231     rtrim("abcde", "dec") == "ab".
 232     \endcode
 233 */
 234 std::string const rtrim(std::string const & a, char const * p = " ");
 235 docstring const rtrim(docstring const & a, char const * p = " ");
 236
 237 /** Trims characters off the beginning of a string.
 238     \code
 239    ("abbabcdef", "ab") = "cdef"
 240     \endcode
 241 */
 242 std::string const ltrim(std::string const & a, char const * p = " ");
 243 docstring const ltrim(docstring const & a, char const * p = " ");
 244
 245 /** Splits the string given in the first argument at the first occurence
 246     of the third argument, delim.
 247     What precedes delim is returned in the second argument, piece; this
 248     will be the whole of the string if no delimiter is found.
 249     The return value is what follows delim, if anything. So the return
 250     value is the null string if no delimiter is found.
 251     'a' and 'piece' must be different variables.
 252     Examples:
 253     \code
 254     s1= "a;bc"; s2= ""
 255     ret = split(s1, s2, ';') -> ret = "bc", s2 == "a"
 256     \endcode
 257  */
 258 std::string const split(std::string const & a, std::string & piece, char delim);
 259 docstring const split(docstring const & a, docstring & piece, char_type delim);
 260
 261 /// Same as split but does not return a piece
 262 std::string const split(std::string const & a, char delim);
 263
 264 /// Same as split but uses the last delim.
 265 std::string const rsplit(std::string const & a, std::string & piece, char delim);
 266 docstring const rsplit(docstring const & a, docstring & piece, char_type delim);
 267 docstring const rsplit(docstring const & a, char_type delim);
 268
 269 /// Escapes non ASCII chars and other problematic characters that cause
 270 /// problems in latex labels.
 271 docstring const escape(docstring const & lab);
 272
 273 /// Group contents of an argument if needed
 274 docstring const protectArgument(docstring & arg, char const l = '[',
 275                           char const r = ']');
 276
 277 /// Truncates a string with an ellipsis at the end.  Leaves str unchanged and
 278 /// returns false if it is shorter than len. Otherwise resizes str to len, with
 279 /// U+2026 HORIZONTAL ELLIPSIS at the end, and returns true.
 280 /// If mid is true, the ellipsis will be put to the mid of the string, and the first
 281 /// and last half is appended/prepended.
 282 ///
 283 /// Warning (Unicode): The cases where we want to truncate the text and it does
 284 /// not end up converted into a QString for UI display must be really
 285 /// rare. Whenever possible, we should prefer calling QFontMetrics::elidedText()
 286 /// instead, which takes into account the actual length on the screen and the
 287 /// layout direction (RTL or LTR). Or a similar function taking into account the
 288 /// font metrics from the buffer view, which still has to be defined. Or set up
 289 /// the widgets such that Qt elides the string automatically with the exact
 290 /// needed width. Recall that not only graphemes vary greatly in width, but also
 291 /// can be made of several code points. See:
 292 /// <http://utf8everywhere.org/#myth.strlen>
 293 ///
 294 /// What is acceptable is when we know that the string is probably going to be
 295 /// elided by Qt anyway, and len is chosen such that our own ellipsis will only
 296 /// be displayed in worst-case scenarios.
 297 ///
 298 /// FIXME: apply those principles in the current code.
 299 ///
 300 bool truncateWithEllipsis(docstring & str, size_t const len,
 301                           bool const mid = false);
 302
 303 /// Word-wraps the provided docstring, returning a line-broken string
 304 /// of width no wider than width, with the string broken at spaces.
 305 /// If the string cannot be broken appropriately, it returns something
 306 /// with "..." at the end, again no wider than width.
 307 /// We assume here that str does not contain newlines.
 308 /// If indent is positive, then the first line is indented that many
 309 /// spaces. If it is negative, then successive lines are indented, as
 310 /// if the first line were "outdented".
 311 ///
 312 /// Warning (Unicode): uses truncateWithEllipsis() internally. Therefore it is
 313 /// subject to the same warning and FIXME as above.
 314 ///
 315 docstring wrap(docstring const & str, int const indent = 0,
 316                size_t const width = 80);
 317
 318 /// Like the preceding, except it is intended to operate on strings
 319 /// that may contain embedded newlines.
 320 /// \param numlines Don't return more than numlines lines. If numlines
 321 ///    is 0, we return everything.
 322 ///
 323 /// Warning (Unicode): uses truncateWithEllipsis() internally. Therefore it is
 324 /// subject to the same warning and FIXME as above.
 325 ///
 326 docstring wrapParas(docstring const & str, int const indent = 0,
 327                     size_t const width = 80, size_t const maxlines = 10);
 328
 329 /// gives a vector of stringparts which have the delimiter delim
 330 /// If \p keepempty is true, empty strings will be pushed to the vector as well
 331 /// If \p trimit is true, leading and trailing whitespace will be trimmed from
 332 /// all values. Note that this can affect what counts as "empty".
 333 /// NOTE: If you want to split a string on whitespace, then do:
 334 ///    getVectorFromString(str, " ", false, true);
 335 std::vector<std::string> const getVectorFromString(std::string const & str,
 336         std::string const & delim = std::string(","),
 337         bool keepempty = false, bool trimit = true);
 338 std::vector<docstring> const getVectorFromString(docstring const & str,
 339         docstring const & delim = from_ascii(","),
 340         bool keepempty = false, bool trimit = true);
 341
 342 /// the same vice versa
 343 std::string const getStringFromVector(std::vector<std::string> const & vec,
 344                                  std::string const & delim = std::string(","));
 345 docstring const getStringFromVector(std::vector<docstring> const & vec,
 346                                  docstring const & delim = from_ascii(","));
 347
 348 /// Search \p search_token in \p str and return the position if it is
 349 /// found, else -1. The last item in \p str must be "".
 350 int findToken(char const * const str[], std::string const & search_token);
 351
 352
 353 /// Format a floating point number with at least 6 siginificant digits, but
 354 /// without scientific notation.
 355 /// Scientific notation would be invalid in some contexts, such as lengths for
 356 /// LaTeX. Simply using std::ostream with std::fixed would produce results
 357 /// like "1000000.000000", and precision control would not be that easy either.
 358 std::string formatFPNumber(double);
 359
 360 /// Returns an URI/URL-style percent-encoded copy of the string \p in.
 361 /// \p ex defines a string of characters that are excluded from the transformation
 362 docstring to_percent_encoding(docstring const & in, docstring const & ex = docstring());
 363
 364 /// Returns a string decoded from an URI/URL-style percent-encoded string \p in.
 365 std::string from_percent_encoding(std::string const & in);
 366
 367 docstring bformat(docstring const & fmt, int arg1);
 368 docstring bformat(docstring const & fmt, long arg1);
 369 #ifdef HAVE_LONG_LONG_INT
 370 docstring bformat(docstring const & fmt, long long arg1);
 371 #endif
 372 docstring bformat(docstring const & fmt, unsigned int arg1);
 373 docstring bformat(docstring const & fmt, pit_type arg1);
 374 docstring bformat(docstring const & fmt, docstring const & arg1);
 375 docstring bformat(docstring const & fmt, char * arg1);
 376 docstring bformat(docstring const & fmt, docstring const & arg1, docstring const & arg2);
 377 docstring bformat(docstring const & fmt, docstring const & arg1, int arg2);
 378 docstring bformat(docstring const & fmt, char const * arg1, docstring const & arg2);
 379 docstring bformat(docstring const & fmt, int arg1, int arg2);
 380 docstring bformat(docstring const & fmt, docstring const & arg1, docstring const & arg2, docstring const & arg3);
 381 docstring bformat(docstring const & fmt, docstring const & arg1, docstring const & arg2, docstring const & arg3, docstring const & arg4);
 382 docstring bformat(docstring const & fmt, docstring const & arg1, docstring const & arg2, docstring const & arg3, docstring const & arg4, docstring const & arg5);
 383
 384
 385 } // namespace support
 386 } // namespace lyx
 387
 388 #endif