src/support/lstrings.h

   1 // -*- C++ -*-
   2 /**
   3  * \file lstrings.h
   4  * This file is part of LyX, the document processor.
   5  * Licence details can be found in the file COPYING.
   6  *
   7  * \author Lars Gullik Bjønnes
   8  * \author Jean-Marc Lasgouttes
   9  *
  10  * Full author contact details are available in file CREDITS.
  11  *
  12  * A collection of string helper functions that works with string.
  13  * Some of these would certainly benefit from a rewrite/optimization.
  14  */
  15
  16 #ifndef LSTRINGS_H
  17 #define LSTRINGS_H
  18
  19 #include "support/docstring.h"
  20
  21 #include <vector>
  22
  23
  24 namespace lyx {
  25 namespace support {
  26
  27 /// Compare \p s and \p s2, ignoring the case.
  28 /// Does not depend on the locale.
  29 int compare_no_case(docstring const & s, docstring const & s2);
  30
  31 /// Compare \p s and \p s2 using the collating rules of the current locale.
  32 int compare_locale(docstring const & s, docstring const & s2);
  33
  34 /// Compare \p s and \p s2, ignoring the case of ASCII characters only.
  35 int compare_ascii_no_case(std::string const & s, std::string const & s2);
  36
  37 /// Compare \p s and \p s2, ignoring the case of ASCII characters only.
  38 int compare_ascii_no_case(docstring const & s, docstring const & s2);
  39
  40 ///
  41 bool isStrInt(std::string const & str);
  42
  43 /// does the std::string represent an unsigned integer value ?
  44 bool isStrUnsignedInt(std::string const & str);
  45
  46 ///
  47 bool isStrDbl(std::string const & str);
  48
  49 /// does the string contain a digit?
  50 bool hasDigitASCII(docstring const & str);
  51
  52 bool isHexChar(char_type);
  53
  54 bool isHex(docstring const & str);
  55
  56 int hexToInt(docstring const & str);
  57
  58 /// is \p str pure ascii?
  59 bool isAscii(docstring const & str);
  60
  61 /// is \p str pure ascii?
  62 bool isAscii(std::string const & str);
  63
  64 /**
  65  * Changes the case of \p c to lowercase.
  66  * Don't use this for non-ASCII characters, since it depends on the locale.
  67  * This overloaded function is only implemented because the char_type variant
  68  * would be used otherwise, and we assert in this function that \p c is in
  69  * the ASCII range.
  70  */
  71 char lowercase(char c);
  72
  73 /**
  74  * Changes the case of \p c to uppercase.
  75  * Don't use this for non-ASCII characters, since it depends on the locale.
  76  * This overloaded function is only implemented because the char_type variant
  77  * would be used otherwise, and we assert in this function that \p c is in
  78  * the ASCII range.
  79  */
  80 char uppercase(char c);
  81
  82 /// Changes the case of \p c to lowercase.
  83 /// Does not depend on the locale.
  84 char_type lowercase(char_type c);
  85
  86 /// Changes the case of \p c to uppercase.
  87 /// Does not depend on the locale.
  88 char_type uppercase(char_type c);
  89
  90 /// Checks if the supplied character is lower-case
  91 bool isLowerCase(char_type ch);
  92
  93 /// Checks if the supplied character is upper-case
  94 bool isUpperCase(char_type ch);
  95
  96 /// same as lowercase(), but ignores locale
  97 std::string const ascii_lowercase(std::string const &);
  98 docstring const ascii_lowercase(docstring const &);
  99
 100 /// Changes the case of \p s to lowercase.
 101 /// Does not depend on the locale.
 102 docstring const lowercase(docstring const & s);
 103 // Currently unused, but the code is there if needed.
 104 // std::string const lowercase(std::string const & s);
 105
 106 /// Changes the case of \p s to uppercase.
 107 /// Does not depend on the locale.
 108 docstring const uppercase(docstring const & s);
 109
 110 /// Returns the superscript of \p c or \p c if no superscript exists.
 111 /// Does not depend on the locale.
 112 char_type superscript(char_type c);
 113
 114 /// Returns the subscript of \p c or \p c if no subscript exists.
 115 /// Does not depend on the locale.
 116 char_type subscript(char_type c);
 117
 118 /// Does str start with c?
 119 bool prefixIs(docstring const & str, char_type c);
 120
 121 /// Does str start with pre?
 122 bool prefixIs(std::string const & str, std::string const & pre);
 123 bool prefixIs(docstring const & str, docstring const & pre);
 124
 125 /// Does the string end with this char?
 126 bool suffixIs(std::string const &, char);
 127 bool suffixIs(docstring const &, char_type);
 128
 129 /// Does the string end with this suffix?
 130 bool suffixIs(std::string const &, std::string const &);
 131 bool suffixIs(docstring const &, docstring const &);
 132
 133 /// Is b contained in a?
 134 inline bool contains(std::string const & a, std::string const & b)
 135 {
 136         return a.find(b) != std::string::npos;
 137 }
 138
 139 inline bool contains(docstring const & a, docstring const & b)
 140 {
 141         return a.find(b) != docstring::npos;
 142 }
 143
 144 inline bool contains(std::string const & a, char b)
 145 {
 146         return a.find(b) != std::string::npos;
 147 }
 148
 149 inline bool contains(docstring const & a, char_type b)
 150 {
 151         return a.find(b) != docstring::npos;
 152 }
 153
 154 /// Returns true if the first argument is made of ascii chars given in the
 155 /// second argument.
 156 bool containsOnly(std::string const &, std::string const &);
 157 ///
 158 bool containsOnly(docstring const &, std::string const &);
 159
 160 /** Extracts a token from this string at the nth delim.
 161     Doesn't modify the original string. Similar to strtok.
 162     Example:
 163     \code
 164     token("a;bc;d", ';', 1) == "bc";
 165     token("a;bc;d", ';', 2) == "d";
 166     \endcode
 167 */
 168 std::string const token(std::string const & a, char delim, int n);
 169
 170 docstring const token(docstring const & a, char_type delim, int n);
 171
 172 /** Search a token in this string using the delim.
 173     Doesn't modify the original string. Returns -1 in case of
 174     failure.
 175     Example:
 176     \code
 177     tokenPos("a;bc;d", ';', "bc") == 1;
 178     tokenPos("a;bc;d", ';', "d") == 2;
 179     \endcode
 180 */
 181 int tokenPos(std::string const & a, char delim, std::string const & tok);
 182 int tokenPos(docstring const & a, char_type delim, docstring const & tok);
 183
 184
 185 /// Substitute all \a oldchar with \a newchar
 186 std::string const subst(std::string const & a, char oldchar, char newchar);
 187
 188 /// Substitute all \a oldchar with \a newchar
 189 docstring const subst(docstring const & a, char_type oldchar, char_type newchar);
 190
 191 /// substitutes all instances of \a oldstr with \a newstr
 192 std::string const subst(std::string const & a,
 193                    std::string const & oldstr, std::string const & newstr);
 194
 195 /// substitutes all instances of \a oldstr with \a newstr
 196 docstring const subst(docstring const & a,
 197                 docstring const & oldstr, docstring const & newstr);
 198
 199 /// Count all occurrences of char \a chr inside \a str
 200 int count_char(std::string const & str, char chr);
 201
 202 /// Count all occurrences of char \a chr inside \a str
 203 int count_char(docstring const & str, docstring::value_type chr);
 204
 205 /** Count all occurrences of binary chars inside \a str.
 206     It is assumed that \a str is utf-8 encoded and that a binary char
 207     belongs to the unicode class names Zl, Zp, Cc, Cf, Cs, Co, or Cn
 208     (excluding white space characters such as '\t', '\n', '\v', '\f', '\r').
 209     See http://www.unicode.org/Public/6.2.0/ucd/UnicodeData.txt
 210 */
 211 int count_bin_chars(std::string const & str);
 212
 213 /** Trims characters off the end and beginning of a string.
 214     \code
 215     trim("ccabccc", "c") == "ab".
 216     \endcode
 217 */
 218 docstring const trim(docstring const & a, char const * p = " ");
 219
 220 /** Trims characters off the end and beginning of a string.
 221     \code
 222     trim("ccabccc", "c") == "ab".
 223     \endcode
 224 */
 225 std::string const trim(std::string const & a, char const * p = " ");
 226
 227 /** Trims characters off the end of a string, removing any character
 228     in p.
 229     \code
 230     rtrim("abcde", "dec") == "ab".
 231     \endcode
 232 */
 233 std::string const rtrim(std::string const & a, char const * p = " ");
 234 docstring const rtrim(docstring const & a, char const * p = " ");
 235
 236 /** Trims characters off the beginning of a string.
 237     \code
 238    ("abbabcdef", "ab") = "cdef"
 239     \endcode
 240 */
 241 std::string const ltrim(std::string const & a, char const * p = " ");
 242 docstring const ltrim(docstring const & a, char const * p = " ");
 243
 244 /** Splits the string given in the first argument at the first occurence
 245     of the third argument, delim.
 246     What precedes delim is returned in the second argument, piece; this
 247     will be the whole of the string if no delimiter is found.
 248     The return value is what follows delim, if anything. So the return
 249     value is the null string if no delimiter is found.
 250     'a' and 'piece' must be different variables.
 251     Examples:
 252     \code
 253     s1= "a;bc"; s2= ""
 254     ret = split(s1, s2, ';') -> ret = "bc", s2 == "a"
 255     \endcode
 256  */
 257 std::string const split(std::string const & a, std::string & piece, char delim);
 258 docstring const split(docstring const & a, docstring & piece, char_type delim);
 259
 260 /// Same as split but does not return a piece
 261 std::string const split(std::string const & a, char delim);
 262
 263 /// Same as split but uses the last delim.
 264 std::string const rsplit(std::string const & a, std::string & piece, char delim);
 265 docstring const rsplit(docstring const & a, docstring & piece, char_type delim);
 266 docstring const rsplit(docstring const & a, char_type delim);
 267
 268 /// Escapes non ASCII chars and other problematic characters that cause
 269 /// problems in latex labels.
 270 docstring const escape(docstring const & lab);
 271
 272 /// Group contents of an argument if needed
 273 docstring const protectArgument(docstring & arg, char const l = '[',
 274                           char const r = ']');
 275
 276 /// Truncates a string with an ellipsis at the end.  Leaves str unchanged and
 277 /// returns false if it is shorter than len. Otherwise resizes str to len, with
 278 /// U+2026 HORIZONTAL ELLIPSIS at the end, and returns true.
 279 ///
 280 /// Warning (Unicode): The cases where we want to truncate the text and it does
 281 /// not end up converted into a QString for UI display must be really
 282 /// rare. Whenever possible, we should prefer calling QFontMetrics::elidedText()
 283 /// instead, which takes into account the actual length on the screen and the
 284 /// layout direction (RTL or LTR). Or a similar function taking into account the
 285 /// font metrics from the buffer view, which still has to be defined. Or set up
 286 /// the widgets such that Qt elides the string automatically with the exact
 287 /// needed width. Recall that not only graphemes vary greatly in width, but also
 288 /// can be made of several code points. See:
 289 /// <http://utf8everywhere.org/#myth.strlen>
 290 ///
 291 /// What is acceptable is when we know that the string is probably going to be
 292 /// elided by Qt anyway, and len is chosen such that our own ellipsis will only
 293 /// be displayed in worst-case scenarios.
 294 ///
 295 /// FIXME: apply those principles in the current code.
 296 ///
 297 bool truncateWithEllipsis(docstring & str, size_t const len);
 298
 299 /// Word-wraps the provided docstring, returning a line-broken string
 300 /// of width no wider than width, with the string broken at spaces.
 301 /// If the string cannot be broken appropriately, it returns something
 302 /// with "..." at the end, again no wider than width.
 303 /// We assume here that str does not contain newlines.
 304 /// If indent is positive, then the first line is indented that many
 305 /// spaces. If it is negative, then successive lines are indented, as
 306 /// if the first line were "outdented".
 307 ///
 308 /// Warning (Unicode): uses truncateWithEllipsis() internally. Therefore it is
 309 /// subject to the same warning and FIXME as above.
 310 ///
 311 docstring wrap(docstring const & str, int const indent = 0,
 312                size_t const width = 80);
 313
 314 /// Like the preceding, except it is intended to operate on strings
 315 /// that may contain embedded newlines.
 316 /// \param numlines Don't return more than numlines lines. If numlines
 317 ///    is 0, we return everything.
 318 ///
 319 /// Warning (Unicode): uses truncateWithEllipsis() internally. Therefore it is
 320 /// subject to the same warning and FIXME as above.
 321 ///
 322 docstring wrapParas(docstring const & str, int const indent = 0,
 323                     size_t const width = 80, size_t const maxlines = 10);
 324
 325 /// gives a vector of stringparts which have the delimiter delim
 326 /// If \p keepempty is true, empty strings will be pushed to the vector as well
 327 std::vector<std::string> const getVectorFromString(std::string const & str,
 328                                               std::string const & delim = std::string(","),
 329                                               bool keepempty = false);
 330 std::vector<docstring> const getVectorFromString(docstring const & str,
 331                 docstring const & delim = from_ascii(","), bool keepempty = false);
 332
 333 /// the same vice versa
 334 std::string const getStringFromVector(std::vector<std::string> const & vec,
 335                                  std::string const & delim = std::string(","));
 336 docstring const getStringFromVector(std::vector<docstring> const & vec,
 337                                  docstring const & delim = from_ascii(","));
 338
 339 /// Search \p search_token in \p str and return the position if it is
 340 /// found, else -1. The last item in \p str must be "".
 341 int findToken(char const * const str[], std::string const & search_token);
 342
 343
 344 /// Format a floating point number with at least 6 siginificant digits, but
 345 /// without scientific notation.
 346 /// Scientific notation would be invalid in some contexts, such as lengths for
 347 /// LaTeX. Simply using std::ostream with std::fixed would produce results
 348 /// like "1000000.000000", and precision control would not be that easy either.
 349 std::string formatFPNumber(double);
 350
 351
 352 docstring bformat(docstring const & fmt, int arg1);
 353 docstring bformat(docstring const & fmt, long arg1);
 354 #ifdef LYX_USE_LONG_LONG
 355 docstring bformat(docstring const & fmt, long long arg1);
 356 #endif
 357 docstring bformat(docstring const & fmt, unsigned int arg1);
 358 docstring bformat(docstring const & fmt, docstring const & arg1);
 359 docstring bformat(docstring const & fmt, char * arg1);
 360 docstring bformat(docstring const & fmt, docstring const & arg1, docstring const & arg2);
 361 docstring bformat(docstring const & fmt, docstring const & arg1, int arg2);
 362 docstring bformat(docstring const & fmt, char const * arg1, docstring const & arg2);
 363 docstring bformat(docstring const & fmt, int arg1, int arg2);
 364 docstring bformat(docstring const & fmt, docstring const & arg1, docstring const & arg2, docstring const & arg3);
 365 docstring bformat(docstring const & fmt, docstring const & arg1, docstring const & arg2, docstring const & arg3, docstring const & arg4);
 366
 367
 368 } // namespace support
 369 } // namespace lyx
 370
 371 #endif