X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=src%2Fsupport%2Flstrings.h;h=ac310c59c43dcc598e6a6016c447e91a0bc274d2;hb=57b69a5efddf9f3c148007322f00dad6c253a2ed;hp=837658a13d3e18c29f237ea9609ae83c09c503b3;hpb=2faa06b829b08ec2321590dccf24cfff262ea470;p=lyx.git diff --git a/src/support/lstrings.h b/src/support/lstrings.h index 837658a13d..ac310c59c4 100644 --- a/src/support/lstrings.h +++ b/src/support/lstrings.h @@ -4,7 +4,7 @@ * This file is part of LyX, the document processor. * Licence details can be found in the file COPYING. * - * \author Lars Gullik Bjønnes + * \author Lars Gullik Bjønnes * \author Jean-Marc Lasgouttes * * Full author contact details are available in file CREDITS. @@ -16,107 +16,143 @@ #ifndef LSTRINGS_H #define LSTRINGS_H +#include "support/docstring.h" + #include -#include "LString.h" namespace lyx { namespace support { -/// -int compare_no_case(string const & s, string const & s2); +/// Compare \p s and \p s2, ignoring the case. +/// Does not depend on the locale. +int compare_no_case(docstring const & s, docstring const & s2); -/// -int compare_ascii_no_case(string const & s, string const & s2); +/// Compare \p s and \p s2 using the collating rules of the current locale. +int compare_locale(docstring const & s, docstring const & s2); -/// -int compare_no_case(string const & s, string const & s2, unsigned int len); +/// Compare \p s and \p s2, ignoring the case of ASCII characters only. +int compare_ascii_no_case(std::string const & s, std::string const & s2); -/// -inline -int compare(char const * a, char const * b) -{ -#ifndef CXX_GLOBAL_CSTD - return std::strcmp(a, b); -#else - return strcmp(a, b); -#endif -} +/// Compare \p s and \p s2, ignoring the case of ASCII characters only. +int compare_ascii_no_case(docstring const & s, docstring const & s2); /// -inline -int compare(char const * a, char const * b, unsigned int len) -{ -#ifndef CXX_GLOBAL_CSTD - return std::strncmp(a, b, len); -#else - return strncmp(a, b, len); -#endif -} +bool isStrInt(std::string const & str); + +/// does the std::string represent an unsigned integer value ? +bool isStrUnsignedInt(std::string const & str); /// -bool isStrInt(string const & str); +bool isStrDbl(std::string const & str); -/// does the string represent an unsigned integer value ? -bool isStrUnsignedInt(string const & str); +/// does the string contain a digit? +bool hasDigitASCII(docstring const & str); -/// -int strToInt(string const & str); +bool isHexChar(char_type); -/// convert string to an unsigned integer -unsigned int strToUnsignedInt(string const & str); +bool isHex(docstring const & str); -/// -bool isStrDbl(string const & str); +int hexToInt(docstring const & str); -/// -double strToDbl(string const & str); +/// is \p str pure ascii? +bool isAscii(docstring const & str); -/// +/// is \p str pure ascii? +bool isAscii(std::string const & str); + +/** + * Changes the case of \p c to lowercase. + * Don't use this for non-ASCII characters, since it depends on the locale. + * This overloaded function is only implemented because the char_type variant + * would be used otherwise, and we assert in this function that \p c is in + * the ASCII range. + */ char lowercase(char c); -/// +/** + * Changes the case of \p c to uppercase. + * Don't use this for non-ASCII characters, since it depends on the locale. + * This overloaded function is only implemented because the char_type variant + * would be used otherwise, and we assert in this function that \p c is in + * the ASCII range. + */ char uppercase(char c); +/// Changes the case of \p c to lowercase. +/// Does not depend on the locale. +char_type lowercase(char_type c); + +/// Changes the case of \p c to uppercase. +/// Does not depend on the locale. +char_type uppercase(char_type c); + +/// Checks if the supplied character is lower-case +bool isLowerCase(char_type ch); + +/// Checks if the supplied character is upper-case +bool isUpperCase(char_type ch); + /// same as lowercase(), but ignores locale -string const ascii_lowercase(string const &); +std::string const ascii_lowercase(std::string const &); +docstring const ascii_lowercase(docstring const &); -/// -string const lowercase(string const &); +/// Changes the case of \p s to lowercase. +/// Does not depend on the locale. +docstring const lowercase(docstring const & s); +// Currently unused, but the code is there if needed. +// std::string const lowercase(std::string const & s); -/// -string const uppercase(string const &); +/// Changes the case of \p s to uppercase. +/// Does not depend on the locale. +docstring const uppercase(docstring const & s); + +/// Returns the superscript of \p c or \p c if no superscript exists. +/// Does not depend on the locale. +char_type superscript(char_type c); + +/// Returns the subscript of \p c or \p c if no subscript exists. +/// Does not depend on the locale. +char_type subscript(char_type c); -/// Does the string start with this prefix? -bool prefixIs(string const &, string const &); +/// Does str start with c? +bool prefixIs(docstring const & str, char_type c); + +/// Does str start with pre? +bool prefixIs(std::string const & str, std::string const & pre); +bool prefixIs(docstring const & str, docstring const & pre); /// Does the string end with this char? -bool suffixIs(string const &, char); +bool suffixIs(std::string const &, char); +bool suffixIs(docstring const &, char_type); /// Does the string end with this suffix? -bool suffixIs(string const &, string const &); - -/// -bool contains(string const & a, string const & b); +bool suffixIs(std::string const &, std::string const &); +bool suffixIs(docstring const &, docstring const &); -/// -bool contains(string const & a, char b); +/// Is b contained in a? +inline bool contains(std::string const & a, std::string const & b) +{ + return a.find(b) != std::string::npos; +} -/// This should probably we rewritten to be more general. -class contains_functor { -public: - typedef string first_argument_type; - typedef string second_argument_type; - typedef bool result_type; +inline bool contains(docstring const & a, docstring const & b) +{ + return a.find(b) != docstring::npos; +} - bool operator()(string const & haystack, string const & needle) const { - return contains(haystack, needle); - } -}; +inline bool contains(std::string const & a, char b) +{ + return a.find(b) != std::string::npos; +} +inline bool contains(docstring const & a, char_type b) +{ + return a.find(b) != docstring::npos; +} /// -bool containsOnly(string const &, string const &); +bool containsOnly(std::string const &, std::string const &); /** Extracts a token from this string at the nth delim. Doesn't modify the original string. Similar to strtok. @@ -126,8 +162,9 @@ bool containsOnly(string const &, string const &); token("a;bc;d", ';', 2) == "d"; \endcode */ -string const token(string const & a, char delim, int n); +std::string const token(std::string const & a, char delim, int n); +docstring const token(docstring const & a, char_type delim, int n); /** Search a token in this string using the delim. Doesn't modify the original string. Returns -1 in case of @@ -138,82 +175,201 @@ string const token(string const & a, char delim, int n); tokenPos("a;bc;d", ';', "d") == 2; \endcode */ -int tokenPos(string const & a, char delim, string const & tok); +int tokenPos(std::string const & a, char delim, std::string const & tok); +int tokenPos(docstring const & a, char_type delim, docstring const & tok); -/** Compares a string and a (simple) regular expression - The only element allowed is "*" for any string of characters - */ -bool regexMatch(string const & a, string const & pattern); +/// Substitute all \a oldchar with \a newchar +std::string const subst(std::string const & a, char oldchar, char newchar); /// Substitute all \a oldchar with \a newchar -string const subst(string const & a, char oldchar, char newchar); +docstring const subst(docstring const & a, char_type oldchar, char_type newchar); + +/// substitutes all instances of \a oldstr with \a newstr +std::string const subst(std::string const & a, + std::string const & oldstr, std::string const & newstr); /// substitutes all instances of \a oldstr with \a newstr -string const subst(string const & a, - string const & oldstr, string const & newstr); +docstring const subst(docstring const & a, + docstring const & oldstr, docstring const & newstr); + +/// Count all occurences of char \a chr inside \a str +int count_char(std::string const & str, char chr); + +/// Count all occurences of char \a chr inside \a str +int count_char(docstring const & str, docstring::value_type chr); + +/** Count all occurences of binary chars inside \a str. + It is assumed that \a str is utf-8 encoded and that a binary char + belongs to the unicode class names Zl, Zp, Cc, Cf, Cs, Co, or Cn + (excluding white space characters such as '\t', '\n', '\v', '\f', '\r'). + See http://www.unicode.org/Public/6.2.0/ucd/UnicodeData.txt +*/ +int count_bin_chars(std::string const & str); /** Trims characters off the end and beginning of a string. \code trim("ccabccc", "c") == "ab". \endcode */ -string const trim(string const & a, char const * p = " "); +docstring const trim(docstring const & a, char const * p = " "); -/** Trims characters off the end of a string. +/** Trims characters off the end and beginning of a string. \code - rtrim("abccc", "c") == "ab". + trim("ccabccc", "c") == "ab". \endcode */ -string const rtrim(string const & a, char const * p = " "); +std::string const trim(std::string const & a, char const * p = " "); -/** Trims characters off the beginning of a string. +/** Trims characters off the end of a string, removing any character + in p. \code - ltrim("ababcdef", "ab") = "cdef" + rtrim("abcde", "dec") == "ab". \endcode */ -string const ltrim(string const & a, char const * p = " "); +std::string const rtrim(std::string const & a, char const * p = " "); +docstring const rtrim(docstring const & a, char const * p = " "); -/** Splits the string by the first delim. - Splits the string by the first appearance of delim. - The leading string up to delim is returned in piece (not including - delim), while the original string is cut from after the delimiter. - Example: +/** Trims characters off the beginning of a string. \code - s1= ""; s2= "a;bc".split(s1, ';') -> s1 == "a"; s2 == "bc"; + ("abbabcdef", "ab") = "cdef" \endcode */ -string const split(string const & a, string & piece, char delim); +std::string const ltrim(std::string const & a, char const * p = " "); +docstring const ltrim(docstring const & a, char const * p = " "); + +/** Splits the string given in the first argument at the first occurence + of the third argument, delim. + What precedes delim is returned in the second argument, piece; this + will be the whole of the string if no delimiter is found. + The return value is what follows delim, if anything. So the return + value is the null string if no delimiter is found. + 'a' and 'piece' must be different variables. + Examples: + \code + s1= "a;bc"; s2= "" + ret = split(s1, s2, ';') -> ret = "bc", s2 == "a" + \endcode + */ +std::string const split(std::string const & a, std::string & piece, char delim); +docstring const split(docstring const & a, docstring & piece, char_type delim); /// Same as split but does not return a piece -string const split(string const & a, char delim); +std::string const split(std::string const & a, char delim); /// Same as split but uses the last delim. -string const rsplit(string const & a, string & piece, char delim); +std::string const rsplit(std::string const & a, std::string & piece, char delim); +docstring const rsplit(docstring const & a, docstring & piece, char_type delim); +docstring const rsplit(docstring const & a, char_type delim); -/// Escapes non ASCII chars -string const escape(string const & lab); +/// Escapes non ASCII chars and other problematic characters that cause +/// problems in latex labels. +docstring const escape(docstring const & lab); + +/// Truncates a string with an ellipsis at the end. Leaves str unchanged and +/// returns false if it is shorter than len. Otherwise resizes str to len, with +/// U+2026 HORIZONTAL ELLIPSIS at the end, and returns true. +/// +/// Warning (Unicode): The cases where we want to truncate the text and it does +/// not end up converted into a QString for UI display must be really +/// rare. Whenever possible, we should prefer calling QFontMetrics::elidedText() +/// instead, which takes into account the actual length on the screen and the +/// layout direction (RTL or LTR). Or a similar function taking into account the +/// font metrics from the buffer view, which still has to be defined. Or set up +/// the widgets such that Qt elides the string automatically with the exact +/// needed width. Recall that not only graphemes vary greatly in width, but also +/// can be made of several code points. See: +/// +/// +/// What is acceptable is when we know that the string is probably going to be +/// elided by Qt anyway, and len is chosen such that our own ellipsis will only +/// be displayed in worst-case scenarios. +/// +/// FIXME: apply those principles in the current code. +/// +bool truncateWithEllipsis(docstring & str, size_t const len); + +/// Word-wraps the provided docstring, returning a line-broken string +/// of width no wider than width, with the string broken at spaces. +/// If the string cannot be broken appropriately, it returns something +/// with "..." at the end, again no wider than width. +/// We assume here that str does not contain newlines. +/// If indent is positive, then the first line is indented that many +/// spaces. If it is negative, then successive lines are indented, as +/// if the first line were "outdented". +/// +/// Warning (Unicode): uses truncateWithEllipsis() internally. Therefore it is +/// subject to the same warning and FIXME as above. +/// +docstring wrap(docstring const & str, int const indent = 0, + size_t const width = 80); + +/// Like the preceding, except it is intended to operate on strings +/// that may contain embedded newlines. +/// \param numlines Don't return more than numlines lines. If numlines +/// is 0, we return everything. +/// +/// Warning (Unicode): uses truncateWithEllipsis() internally. Therefore it is +/// subject to the same warning and FIXME as above. +/// +docstring wrapParas(docstring const & str, int const indent = 0, + size_t const width = 80, size_t const maxlines = 10); /// gives a vector of stringparts which have the delimiter delim -std::vector const getVectorFromString(string const & str, - string const & delim = ","); - -// the same vice versa -string const getStringFromVector(std::vector const & vec, - string const & delim = ","); - -// wrapper around boost::format using one argument %1$s -string bformat(string const & fmt, string const & arg1); -// arguments %1$s and %2$s -string bformat(string const & fmt, string const & arg1, string const & arg2); -// arguments %1$d and %2$d -string bformat(string const & fmt, int arg1, int arg2); -// arguments %1$s and %2$s and %3$s -string bformat(string const & fmt, string const & arg1, string const & arg2, - string const & arg3); -// arguments %1$s and %2$s and %3$s and %4$s -string bformat(string const & fmt, string const & arg1, string const & arg2, - string const & arg3, string const & arg4); +/// If \p keepempty is true, empty strings will be pushed to the vector as well +std::vector const getVectorFromString(std::string const & str, + std::string const & delim = std::string(","), + bool keepempty = false); +std::vector const getVectorFromString(docstring const & str, + docstring const & delim = from_ascii(","), bool keepempty = false); + +/// the same vice versa +std::string const getStringFromVector(std::vector const & vec, + std::string const & delim = std::string(",")); +docstring const getStringFromVector(std::vector const & vec, + docstring const & delim = from_ascii(",")); + +/// Search \p search_token in \p str and return the position if it is +/// found, else -1. The last item in \p str must be "". +int findToken(char const * const str[], std::string const & search_token); + + +/// Format a floating point number with at least 6 siginificant digits, but +/// without scientific notation. +/// Scientific notation would be invalid in some contexts, such as lengths for +/// LaTeX. Simply using std::ostream with std::fixed would produce results +/// like "1000000.000000", and precision control would not be that easy either. +std::string formatFPNumber(double); + + +template +docstring bformat(docstring const & fmt, Arg1); + +template +docstring bformat(docstring const & fmt, Arg1, Arg2); + +template +docstring bformat(docstring const & fmt, Arg1, Arg2, Arg3); + +template +docstring bformat(docstring const & fmt, Arg1, Arg2, Arg3, Arg4); + + +template<> docstring bformat(docstring const & fmt, int arg1); +template<> docstring bformat(docstring const & fmt, long arg1); +#ifdef LYX_USE_LONG_LONG +template<> docstring bformat(docstring const & fmt, long long arg1); +#endif +template<> docstring bformat(docstring const & fmt, unsigned int arg1); +template<> docstring bformat(docstring const & fmt, docstring arg1); +template<> docstring bformat(docstring const & fmt, char * arg1); +template<> docstring bformat(docstring const & fmt, docstring arg1, docstring arg2); +template<> docstring bformat(docstring const & fmt, docstring arg1, int arg2); +template<> docstring bformat(docstring const & fmt, char const * arg1, docstring arg2); +template<> docstring bformat(docstring const & fmt, int arg1, int arg2); +template<> docstring bformat(docstring const & fmt, docstring arg1, docstring arg2, docstring arg3); +template<> docstring bformat(docstring const & fmt, docstring arg1, docstring arg2, docstring arg3, docstring arg4); + } // namespace support } // namespace lyx