X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=src%2Fsupport%2Flstrings.h;h=ac310c59c43dcc598e6a6016c447e91a0bc274d2;hb=8d640dc77608bedddb5b00982c23665584f52d21;hp=b419798e11311b8b4264dc13f0fe836ab5bc18bf;hpb=1fd5e5ed974338edef45216c3e3a3f678b180676;p=lyx.git diff --git a/src/support/lstrings.h b/src/support/lstrings.h index b419798e11..ac310c59c4 100644 --- a/src/support/lstrings.h +++ b/src/support/lstrings.h @@ -1,187 +1,377 @@ // -*- C++ -*- - -/** This is a collection of string helper functions that works - together with string (and later also with STL String. Some of these - would certainly benefit from a rewrite/optimization. -*/ +/** + * \file lstrings.h + * This file is part of LyX, the document processor. + * Licence details can be found in the file COPYING. + * + * \author Lars Gullik Bjønnes + * \author Jean-Marc Lasgouttes + * + * Full author contact details are available in file CREDITS. + * + * A collection of string helper functions that works with string. + * Some of these would certainly benefit from a rewrite/optimization. + */ #ifndef LSTRINGS_H #define LSTRINGS_H -#include +#include "support/docstring.h" -#include "LAssert.h" +#include -//#warning verify this please. Lgb -/// -template -size_t lstrlen(T const * t) -{ - Assert(t); // we don't want null pointers - size_t count = 0; - T const * r = t; - while(*r != 0) ++r, ++count; - return count; -} +namespace lyx { +namespace support { -//#warning verify this please. Lgb -/// -template -T * lstrchr(T const * t, int c) -{ - Assert(t); // we don't want null pointers - T * r = const_cast(t); - while(*r != 0) { - if (*r == c) - return r; - else - ++r; - } - return 0; -} +/// Compare \p s and \p s2, ignoring the case. +/// Does not depend on the locale. +int compare_no_case(docstring const & s, docstring const & s2); -#include -#include "LString.h" +/// Compare \p s and \p s2 using the collating rules of the current locale. +int compare_locale(docstring const & s, docstring const & s2); +/// Compare \p s and \p s2, ignoring the case of ASCII characters only. +int compare_ascii_no_case(std::string const & s, std::string const & s2); -/// -int compare_no_case(string const & s, string const & s2); +/// Compare \p s and \p s2, ignoring the case of ASCII characters only. +int compare_ascii_no_case(docstring const & s, docstring const & s2); /// -int compare_no_case(string const & s, string const & s2, unsigned int len); +bool isStrInt(std::string const & str); + +/// does the std::string represent an unsigned integer value ? +bool isStrUnsignedInt(std::string const & str); /// -inline int compare(char const * a, char const * b) -{ - return strcmp(a, b); -} +bool isStrDbl(std::string const & str); +/// does the string contain a digit? +bool hasDigitASCII(docstring const & str); -/// -inline int compare(char const * a, char const * b, unsigned int len) -{ - return strncmp(a, b, len); -} +bool isHexChar(char_type); +bool isHex(docstring const & str); -/// -bool isStrInt(string const & str); +int hexToInt(docstring const & str); -/// -int strToInt(string const & str); +/// is \p str pure ascii? +bool isAscii(docstring const & str); -/// -string lowercase(string const &); +/// is \p str pure ascii? +bool isAscii(std::string const & str); -/// -string uppercase(string const &); +/** + * Changes the case of \p c to lowercase. + * Don't use this for non-ASCII characters, since it depends on the locale. + * This overloaded function is only implemented because the char_type variant + * would be used otherwise, and we assert in this function that \p c is in + * the ASCII range. + */ +char lowercase(char c); -/// int to string -string tostr(int i); +/** + * Changes the case of \p c to uppercase. + * Don't use this for non-ASCII characters, since it depends on the locale. + * This overloaded function is only implemented because the char_type variant + * would be used otherwise, and we assert in this function that \p c is in + * the ASCII range. + */ +char uppercase(char c); -/// -string tostr(unsigned int); +/// Changes the case of \p c to lowercase. +/// Does not depend on the locale. +char_type lowercase(char_type c); -/// long to string -string tostr(long l); +/// Changes the case of \p c to uppercase. +/// Does not depend on the locale. +char_type uppercase(char_type c); -/// -string tostr(unsigned long l); +/// Checks if the supplied character is lower-case +bool isLowerCase(char_type ch); -/// -string tostr(char c); +/// Checks if the supplied character is upper-case +bool isUpperCase(char_type ch); -/// void * to string -string tostr(void * v); +/// same as lowercase(), but ignores locale +std::string const ascii_lowercase(std::string const &); +docstring const ascii_lowercase(docstring const &); -/// bool to string -string tostr(bool b); +/// Changes the case of \p s to lowercase. +/// Does not depend on the locale. +docstring const lowercase(docstring const & s); +// Currently unused, but the code is there if needed. +// std::string const lowercase(std::string const & s); -/// -string tostr(double d); +/// Changes the case of \p s to uppercase. +/// Does not depend on the locale. +docstring const uppercase(docstring const & s); -/// Does the string start with this prefix? -bool prefixIs(string const &, char const *); +/// Returns the superscript of \p c or \p c if no superscript exists. +/// Does not depend on the locale. +char_type superscript(char_type c); + +/// Returns the subscript of \p c or \p c if no subscript exists. +/// Does not depend on the locale. +char_type subscript(char_type c); + +/// Does str start with c? +bool prefixIs(docstring const & str, char_type c); + +/// Does str start with pre? +bool prefixIs(std::string const & str, std::string const & pre); +bool prefixIs(docstring const & str, docstring const & pre); /// Does the string end with this char? -bool suffixIs(string const &, char); +bool suffixIs(std::string const &, char); +bool suffixIs(docstring const &, char_type); /// Does the string end with this suffix? -bool suffixIs(string const &, char const *); +bool suffixIs(std::string const &, std::string const &); +bool suffixIs(docstring const &, docstring const &); -/// -bool contains(char const * a, string const & b); +/// Is b contained in a? +inline bool contains(std::string const & a, std::string const & b) +{ + return a.find(b) != std::string::npos; +} -/// -bool contains(string const & a, char const * b); +inline bool contains(docstring const & a, docstring const & b) +{ + return a.find(b) != docstring::npos; +} -/// -bool contains(string const & a, string const & b); +inline bool contains(std::string const & a, char b) +{ + return a.find(b) != std::string::npos; +} -/// -bool contains(char const * a, char const * b); +inline bool contains(docstring const & a, char_type b) +{ + return a.find(b) != docstring::npos; +} -/// Counts how many of character c there is in a -unsigned int countChar(string const & a, char const c); +/// +bool containsOnly(std::string const &, std::string const &); /** Extracts a token from this string at the nth delim. - Doesn't modify the original string. Similar to strtok. - Example: - #"a;bc;d".token(';', 1) == "bc";# - #"a;bc;d".token(';', 2) == "d";# + Doesn't modify the original string. Similar to strtok. + Example: + \code + token("a;bc;d", ';', 1) == "bc"; + token("a;bc;d", ';', 2) == "d"; + \endcode */ -string token(string const & a, char delim, int n); +std::string const token(std::string const & a, char delim, int n); +docstring const token(docstring const & a, char_type delim, int n); /** Search a token in this string using the delim. - Doesn't modify the original string. Returns -1 in case of - failure. - Example: - #"a;bc;d".tokenPos(';', "bc") == 1;# - #"a;bc;d".token(';', "d") == 2;# + Doesn't modify the original string. Returns -1 in case of + failure. + Example: + \code + tokenPos("a;bc;d", ';', "bc") == 1; + tokenPos("a;bc;d", ';', "d") == 2; + \endcode */ -int tokenPos(string const & a, char delim, string const & tok); +int tokenPos(std::string const & a, char delim, std::string const & tok); +int tokenPos(docstring const & a, char_type delim, docstring const & tok); + + +/// Substitute all \a oldchar with \a newchar +std::string const subst(std::string const & a, char oldchar, char newchar); +/// Substitute all \a oldchar with \a newchar +docstring const subst(docstring const & a, char_type oldchar, char_type newchar); -/** Compares a string and a (simple) regular expression - The only element allowed is "*" for any string of characters - */ -bool regexMatch(string const & a, string const & pattern); +/// substitutes all instances of \a oldstr with \a newstr +std::string const subst(std::string const & a, + std::string const & oldstr, std::string const & newstr); -/// Substitute all "oldchar"s with "newchar" -string subst(string const & a, char oldchar, char newchar); +/// substitutes all instances of \a oldstr with \a newstr +docstring const subst(docstring const & a, + docstring const & oldstr, docstring const & newstr); -/// Substitutes all instances of oldstr with newstr -string subst(string const & a, - char const * oldstr, string const & newstr); +/// Count all occurences of char \a chr inside \a str +int count_char(std::string const & str, char chr); -/** Strips characters off the end of a string. - #"abccc".strip('c') = "ab".# - */ -string strip(string const & a, char const c = ' '); +/// Count all occurences of char \a chr inside \a str +int count_char(docstring const & str, docstring::value_type chr); + +/** Count all occurences of binary chars inside \a str. + It is assumed that \a str is utf-8 encoded and that a binary char + belongs to the unicode class names Zl, Zp, Cc, Cf, Cs, Co, or Cn + (excluding white space characters such as '\t', '\n', '\v', '\f', '\r'). + See http://www.unicode.org/Public/6.2.0/ucd/UnicodeData.txt +*/ +int count_bin_chars(std::string const & str); + +/** Trims characters off the end and beginning of a string. + \code + trim("ccabccc", "c") == "ab". + \endcode +*/ +docstring const trim(docstring const & a, char const * p = " "); -/** Strips characters of the beginning of a string. - #"cccba".frontstrip('c') = "ba"#. */ -string frontStrip(string const & a, char const c = ' '); +/** Trims characters off the end and beginning of a string. + \code + trim("ccabccc", "c") == "ab". + \endcode +*/ +std::string const trim(std::string const & a, char const * p = " "); -/** Strips characters off the beginning of a string. - #"ababcdef".frontstrip("ab") = "cdef"# .*/ -string frontStrip(string const & a, char const * p); +/** Trims characters off the end of a string, removing any character + in p. + \code + rtrim("abcde", "dec") == "ab". + \endcode +*/ +std::string const rtrim(std::string const & a, char const * p = " "); +docstring const rtrim(docstring const & a, char const * p = " "); -/** Splits the string by the first delim. - Splits the string by the first appearance of delim. - The leading string up to delim is returned in piece (not including - delim), while the original string is cut from after the delimiter. - Example: - #s1= ""; s2= "a;bc".split(s1, ';') -> s1 == "a"; s2 == "bc";# - */ -string split(string const & a, string & piece, char delim); +/** Trims characters off the beginning of a string. + \code + ("abbabcdef", "ab") = "cdef" + \endcode +*/ +std::string const ltrim(std::string const & a, char const * p = " "); +docstring const ltrim(docstring const & a, char const * p = " "); + +/** Splits the string given in the first argument at the first occurence + of the third argument, delim. + What precedes delim is returned in the second argument, piece; this + will be the whole of the string if no delimiter is found. + The return value is what follows delim, if anything. So the return + value is the null string if no delimiter is found. + 'a' and 'piece' must be different variables. + Examples: + \code + s1= "a;bc"; s2= "" + ret = split(s1, s2, ';') -> ret = "bc", s2 == "a" + \endcode + */ +std::string const split(std::string const & a, std::string & piece, char delim); +docstring const split(docstring const & a, docstring & piece, char_type delim); /// Same as split but does not return a piece -string split(string const & a, char delim); +std::string const split(std::string const & a, char delim); /// Same as split but uses the last delim. -string rsplit(string const & a, string & piece, char delim); +std::string const rsplit(std::string const & a, std::string & piece, char delim); +docstring const rsplit(docstring const & a, docstring & piece, char_type delim); +docstring const rsplit(docstring const & a, char_type delim); + +/// Escapes non ASCII chars and other problematic characters that cause +/// problems in latex labels. +docstring const escape(docstring const & lab); + +/// Truncates a string with an ellipsis at the end. Leaves str unchanged and +/// returns false if it is shorter than len. Otherwise resizes str to len, with +/// U+2026 HORIZONTAL ELLIPSIS at the end, and returns true. +/// +/// Warning (Unicode): The cases where we want to truncate the text and it does +/// not end up converted into a QString for UI display must be really +/// rare. Whenever possible, we should prefer calling QFontMetrics::elidedText() +/// instead, which takes into account the actual length on the screen and the +/// layout direction (RTL or LTR). Or a similar function taking into account the +/// font metrics from the buffer view, which still has to be defined. Or set up +/// the widgets such that Qt elides the string automatically with the exact +/// needed width. Recall that not only graphemes vary greatly in width, but also +/// can be made of several code points. See: +/// +/// +/// What is acceptable is when we know that the string is probably going to be +/// elided by Qt anyway, and len is chosen such that our own ellipsis will only +/// be displayed in worst-case scenarios. +/// +/// FIXME: apply those principles in the current code. +/// +bool truncateWithEllipsis(docstring & str, size_t const len); + +/// Word-wraps the provided docstring, returning a line-broken string +/// of width no wider than width, with the string broken at spaces. +/// If the string cannot be broken appropriately, it returns something +/// with "..." at the end, again no wider than width. +/// We assume here that str does not contain newlines. +/// If indent is positive, then the first line is indented that many +/// spaces. If it is negative, then successive lines are indented, as +/// if the first line were "outdented". +/// +/// Warning (Unicode): uses truncateWithEllipsis() internally. Therefore it is +/// subject to the same warning and FIXME as above. +/// +docstring wrap(docstring const & str, int const indent = 0, + size_t const width = 80); + +/// Like the preceding, except it is intended to operate on strings +/// that may contain embedded newlines. +/// \param numlines Don't return more than numlines lines. If numlines +/// is 0, we return everything. +/// +/// Warning (Unicode): uses truncateWithEllipsis() internally. Therefore it is +/// subject to the same warning and FIXME as above. +/// +docstring wrapParas(docstring const & str, int const indent = 0, + size_t const width = 80, size_t const maxlines = 10); + +/// gives a vector of stringparts which have the delimiter delim +/// If \p keepempty is true, empty strings will be pushed to the vector as well +std::vector const getVectorFromString(std::string const & str, + std::string const & delim = std::string(","), + bool keepempty = false); +std::vector const getVectorFromString(docstring const & str, + docstring const & delim = from_ascii(","), bool keepempty = false); + +/// the same vice versa +std::string const getStringFromVector(std::vector const & vec, + std::string const & delim = std::string(",")); +docstring const getStringFromVector(std::vector const & vec, + docstring const & delim = from_ascii(",")); + +/// Search \p search_token in \p str and return the position if it is +/// found, else -1. The last item in \p str must be "". +int findToken(char const * const str[], std::string const & search_token); + + +/// Format a floating point number with at least 6 siginificant digits, but +/// without scientific notation. +/// Scientific notation would be invalid in some contexts, such as lengths for +/// LaTeX. Simply using std::ostream with std::fixed would produce results +/// like "1000000.000000", and precision control would not be that easy either. +std::string formatFPNumber(double); + + +template +docstring bformat(docstring const & fmt, Arg1); + +template +docstring bformat(docstring const & fmt, Arg1, Arg2); + +template +docstring bformat(docstring const & fmt, Arg1, Arg2, Arg3); + +template +docstring bformat(docstring const & fmt, Arg1, Arg2, Arg3, Arg4); + + +template<> docstring bformat(docstring const & fmt, int arg1); +template<> docstring bformat(docstring const & fmt, long arg1); +#ifdef LYX_USE_LONG_LONG +template<> docstring bformat(docstring const & fmt, long long arg1); +#endif +template<> docstring bformat(docstring const & fmt, unsigned int arg1); +template<> docstring bformat(docstring const & fmt, docstring arg1); +template<> docstring bformat(docstring const & fmt, char * arg1); +template<> docstring bformat(docstring const & fmt, docstring arg1, docstring arg2); +template<> docstring bformat(docstring const & fmt, docstring arg1, int arg2); +template<> docstring bformat(docstring const & fmt, char const * arg1, docstring arg2); +template<> docstring bformat(docstring const & fmt, int arg1, int arg2); +template<> docstring bformat(docstring const & fmt, docstring arg1, docstring arg2, docstring arg3); +template<> docstring bformat(docstring const & fmt, docstring arg1, docstring arg2, docstring arg3, docstring arg4); + + +} // namespace support +} // namespace lyx #endif