X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=src%2Fsupport%2Flstrings.h;h=ac310c59c43dcc598e6a6016c447e91a0bc274d2;hb=57b69a5efddf9f3c148007322f00dad6c253a2ed;hp=837658a13d3e18c29f237ea9609ae83c09c503b3;hpb=2faa06b829b08ec2321590dccf24cfff262ea470;p=lyx.git

diff --git a/src/support/lstrings.h b/src/support/lstrings.h
index 837658a13d..ac310c59c4 100644
--- a/src/support/lstrings.h
+++ b/src/support/lstrings.h
@@ -4,7 +4,7 @@
  * This file is part of LyX, the document processor.
  * Licence details can be found in the file COPYING.
  *
- * \author Lars Gullik Bjønnes
+ * \author Lars Gullik BjÃ¸nnes
  * \author Jean-Marc Lasgouttes
  *
  * Full author contact details are available in file CREDITS.
@@ -16,107 +16,143 @@
 #ifndef LSTRINGS_H
 #define LSTRINGS_H
 
+#include "support/docstring.h"
+
 #include <vector>
 
-#include "LString.h"
 
 namespace lyx {
 namespace support {
 
-///
-int compare_no_case(string const & s, string const & s2);
+/// Compare \p s and \p s2, ignoring the case.
+/// Does not depend on the locale.
+int compare_no_case(docstring const & s, docstring const & s2);
 
-///
-int compare_ascii_no_case(string const & s, string const & s2);
+/// Compare \p s and \p s2 using the collating rules of the current locale.
+int compare_locale(docstring const & s, docstring const & s2);
 
-///
-int compare_no_case(string const & s, string const & s2, unsigned int len);
+/// Compare \p s and \p s2, ignoring the case of ASCII characters only.
+int compare_ascii_no_case(std::string const & s, std::string const & s2);
 
-///
-inline
-int compare(char const * a, char const * b)
-{
-#ifndef CXX_GLOBAL_CSTD
-	return std::strcmp(a, b);
-#else
-	return strcmp(a, b);
-#endif
-}
+/// Compare \p s and \p s2, ignoring the case of ASCII characters only.
+int compare_ascii_no_case(docstring const & s, docstring const & s2);
 
 ///
-inline
-int compare(char const * a, char const * b, unsigned int len)
-{
-#ifndef CXX_GLOBAL_CSTD
-	return std::strncmp(a, b, len);
-#else
-	return strncmp(a, b, len);
-#endif
-}
+bool isStrInt(std::string const & str);
+
+/// does the std::string represent an unsigned integer value ?
+bool isStrUnsignedInt(std::string const & str);
 
 ///
-bool isStrInt(string const & str);
+bool isStrDbl(std::string const & str);
 
-/// does the string represent an unsigned integer value ?
-bool isStrUnsignedInt(string const & str);
+/// does the string contain a digit?
+bool hasDigitASCII(docstring const & str);
 
-///
-int strToInt(string const & str);
+bool isHexChar(char_type);
 
-/// convert string to an unsigned integer
-unsigned int strToUnsignedInt(string const & str);
+bool isHex(docstring const & str);
 
-///
-bool isStrDbl(string const & str);
+int hexToInt(docstring const & str);
 
-///
-double strToDbl(string const & str);
+/// is \p str pure ascii?
+bool isAscii(docstring const & str);
 
-///
+/// is \p str pure ascii?
+bool isAscii(std::string const & str);
+
+/**
+ * Changes the case of \p c to lowercase.
+ * Don't use this for non-ASCII characters, since it depends on the locale.
+ * This overloaded function is only implemented because the char_type variant
+ * would be used otherwise, and we assert in this function that \p c is in
+ * the ASCII range.
+ */
 char lowercase(char c);
 
-///
+/**
+ * Changes the case of \p c to uppercase.
+ * Don't use this for non-ASCII characters, since it depends on the locale.
+ * This overloaded function is only implemented because the char_type variant
+ * would be used otherwise, and we assert in this function that \p c is in
+ * the ASCII range.
+ */
 char uppercase(char c);
 
+/// Changes the case of \p c to lowercase.
+/// Does not depend on the locale.
+char_type lowercase(char_type c);
+
+/// Changes the case of \p c to uppercase.
+/// Does not depend on the locale.
+char_type uppercase(char_type c);
+
+/// Checks if the supplied character is lower-case
+bool isLowerCase(char_type ch);
+
+/// Checks if the supplied character is upper-case
+bool isUpperCase(char_type ch);
+
 /// same as lowercase(), but ignores locale
-string const ascii_lowercase(string const &);
+std::string const ascii_lowercase(std::string const &);
+docstring const ascii_lowercase(docstring const &);
 
-///
-string const lowercase(string const &);
+/// Changes the case of \p s to lowercase.
+/// Does not depend on the locale.
+docstring const lowercase(docstring const & s);
+// Currently unused, but the code is there if needed.
+// std::string const lowercase(std::string const & s);
 
-///
-string const uppercase(string const &);
+/// Changes the case of \p s to uppercase.
+/// Does not depend on the locale.
+docstring const uppercase(docstring const & s);
+
+/// Returns the superscript of \p c or \p c if no superscript exists.
+/// Does not depend on the locale.
+char_type superscript(char_type c);
+
+/// Returns the subscript of \p c or \p c if no subscript exists.
+/// Does not depend on the locale.
+char_type subscript(char_type c);
 
-/// Does the string start with this prefix?
-bool prefixIs(string const &, string const &);
+/// Does str start with c?
+bool prefixIs(docstring const & str, char_type c);
+
+/// Does str start with pre?
+bool prefixIs(std::string const & str, std::string const & pre);
+bool prefixIs(docstring const & str, docstring const & pre);
 
 /// Does the string end with this char?
-bool suffixIs(string const &, char);
+bool suffixIs(std::string const &, char);
+bool suffixIs(docstring const &, char_type);
 
 /// Does the string end with this suffix?
-bool suffixIs(string const &, string const &);
-
-///
-bool contains(string const & a, string const & b);
+bool suffixIs(std::string const &, std::string const &);
+bool suffixIs(docstring const &, docstring const &);
 
-///
-bool contains(string const & a, char b);
+/// Is b contained in a?
+inline bool contains(std::string const & a, std::string const & b)
+{
+	return a.find(b) != std::string::npos;
+}
 
-/// This should probably we rewritten to be more general.
-class contains_functor {
-public:
-	typedef string first_argument_type;
-	typedef string second_argument_type;
-	typedef bool result_type;
+inline bool contains(docstring const & a, docstring const & b)
+{
+	return a.find(b) != docstring::npos;
+}
 
-	bool operator()(string const & haystack, string const & needle) const {
-		return contains(haystack, needle);
-	}
-};
+inline bool contains(std::string const & a, char b)
+{
+	return a.find(b) != std::string::npos;
+}
 
+inline bool contains(docstring const & a, char_type b)
+{
+	return a.find(b) != docstring::npos;
+}
 
 ///
-bool containsOnly(string const &, string const &);
+bool containsOnly(std::string const &, std::string const &);
 
 /** Extracts a token from this string at the nth delim.
     Doesn't modify the original string. Similar to strtok.
@@ -126,8 +162,9 @@ bool containsOnly(string const &, string const &);
     token("a;bc;d", ';', 2) == "d";
     \endcode
 */
-string const token(string const & a, char delim, int n);
+std::string const token(std::string const & a, char delim, int n);
 
+docstring const token(docstring const & a, char_type delim, int n);
 
 /** Search a token in this string using the delim.
     Doesn't modify the original string. Returns -1 in case of
@@ -138,82 +175,201 @@ string const token(string const & a, char delim, int n);
     tokenPos("a;bc;d", ';', "d") == 2;
     \endcode
 */
-int tokenPos(string const & a, char delim, string const & tok);
+int tokenPos(std::string const & a, char delim, std::string const & tok);
+int tokenPos(docstring const & a, char_type delim, docstring const & tok);
 
 
-/** Compares a string and a (simple) regular expression
-  The only element allowed is "*" for any string of characters
-  */
-bool regexMatch(string const & a, string const & pattern);
+/// Substitute all \a oldchar with \a newchar
+std::string const subst(std::string const & a, char oldchar, char newchar);
 
 /// Substitute all \a oldchar with \a newchar
-string const subst(string const & a, char oldchar, char newchar);
+docstring const subst(docstring const & a, char_type oldchar, char_type newchar);
+
+/// substitutes all instances of \a oldstr with \a newstr
+std::string const subst(std::string const & a,
+		   std::string const & oldstr, std::string const & newstr);
 
 /// substitutes all instances of \a oldstr with \a newstr
-string const subst(string const & a,
-		   string const & oldstr, string const & newstr);
+docstring const subst(docstring const & a,
+		docstring const & oldstr, docstring const & newstr);
+
+/// Count all occurences of char \a chr inside \a str
+int count_char(std::string const & str, char chr);
+
+/// Count all occurences of char \a chr inside \a str
+int count_char(docstring const & str, docstring::value_type chr);
+
+/** Count all occurences of binary chars inside \a str.
+    It is assumed that \a str is utf-8 encoded and that a binary char
+    belongs to the unicode class names Zl, Zp, Cc, Cf, Cs, Co, or Cn
+    (excluding white space characters such as '\t', '\n', '\v', '\f', '\r').
+    See http://www.unicode.org/Public/6.2.0/ucd/UnicodeData.txt
+*/
+int count_bin_chars(std::string const & str);
 
 /** Trims characters off the end and beginning of a string.
     \code
     trim("ccabccc", "c") == "ab".
     \endcode
 */
-string const trim(string const & a, char const * p = " ");
+docstring const trim(docstring const & a, char const * p = " ");
 
-/** Trims characters off the end of a string.
+/** Trims characters off the end and beginning of a string.
     \code
-    rtrim("abccc", "c") == "ab".
+    trim("ccabccc", "c") == "ab".
     \endcode
 */
-string const rtrim(string const & a, char const * p = " ");
+std::string const trim(std::string const & a, char const * p = " ");
 
-/** Trims characters off the beginning of a string.
+/** Trims characters off the end of a string, removing any character
+    in p.
     \code
-   ltrim("ababcdef", "ab") = "cdef"
+    rtrim("abcde", "dec") == "ab".
     \endcode
 */
-string const ltrim(string const & a, char const * p = " ");
+std::string const rtrim(std::string const & a, char const * p = " ");
+docstring const rtrim(docstring const & a, char const * p = " ");
 
-/** Splits the string by the first delim.
-    Splits the string by the first appearance of delim.
-    The leading string up to delim is returned in piece (not including
-    delim), while the original string is cut from after the delimiter.
-    Example:
+/** Trims characters off the beginning of a string.
     \code
-    s1= ""; s2= "a;bc".split(s1, ';') -> s1 == "a"; s2 == "bc";
+   ("abbabcdef", "ab") = "cdef"
     \endcode
 */
-string const split(string const & a, string & piece, char delim);
+std::string const ltrim(std::string const & a, char const * p = " ");
+docstring const ltrim(docstring const & a, char const * p = " ");
+
+/** Splits the string given in the first argument at the first occurence 
+    of the third argument, delim.
+    What precedes delim is returned in the second argument, piece; this
+    will be the whole of the string if no delimiter is found.
+    The return value is what follows delim, if anything. So the return
+    value is the null string if no delimiter is found.
+    'a' and 'piece' must be different variables.
+    Examples:
+    \code
+    s1= "a;bc"; s2= ""
+    ret = split(s1, s2, ';') -> ret = "bc", s2 == "a"
+    \endcode
+ */
+std::string const split(std::string const & a, std::string & piece, char delim);
+docstring const split(docstring const & a, docstring & piece, char_type delim);
 
 /// Same as split but does not return a piece
-string const split(string const & a, char delim);
+std::string const split(std::string const & a, char delim);
 
 /// Same as split but uses the last delim.
-string const rsplit(string const & a, string & piece, char delim);
+std::string const rsplit(std::string const & a, std::string & piece, char delim);
+docstring const rsplit(docstring const & a, docstring & piece, char_type delim);
+docstring const rsplit(docstring const & a, char_type delim);
 
-/// Escapes non ASCII chars
-string const escape(string const & lab);
+/// Escapes non ASCII chars and other problematic characters that cause
+/// problems in latex labels.
+docstring const escape(docstring const & lab);
+
+/// Truncates a string with an ellipsis at the end.  Leaves str unchanged and
+/// returns false if it is shorter than len. Otherwise resizes str to len, with
+/// U+2026 HORIZONTAL ELLIPSIS at the end, and returns true.
+///
+/// Warning (Unicode): The cases where we want to truncate the text and it does
+/// not end up converted into a QString for UI display must be really
+/// rare. Whenever possible, we should prefer calling QFontMetrics::elidedText()
+/// instead, which takes into account the actual length on the screen and the
+/// layout direction (RTL or LTR). Or a similar function taking into account the
+/// font metrics from the buffer view, which still has to be defined. Or set up
+/// the widgets such that Qt elides the string automatically with the exact
+/// needed width. Recall that not only graphemes vary greatly in width, but also
+/// can be made of several code points. See:
+/// <http://utf8everywhere.org/#myth.strlen>
+///
+/// What is acceptable is when we know that the string is probably going to be
+/// elided by Qt anyway, and len is chosen such that our own ellipsis will only
+/// be displayed in worst-case scenarios.
+///
+/// FIXME: apply those principles in the current code.
+/// 
+bool truncateWithEllipsis(docstring & str, size_t const len);
+
+/// Word-wraps the provided docstring, returning a line-broken string
+/// of width no wider than width, with the string broken at spaces. 
+/// If the string cannot be broken appropriately, it returns something 
+/// with "..." at the end, again no wider than width.
+/// We assume here that str does not contain newlines.
+/// If indent is positive, then the first line is indented that many 
+/// spaces. If it is negative, then successive lines are indented, as
+/// if the first line were "outdented".
+///
+/// Warning (Unicode): uses truncateWithEllipsis() internally. Therefore it is
+/// subject to the same warning and FIXME as above.
+///
+docstring wrap(docstring const & str, int const indent = 0,
+               size_t const width = 80);
+
+/// Like the preceding, except it is intended to operate on strings
+/// that may contain embedded newlines.
+/// \param numlines Don't return more than numlines lines. If numlines
+///    is 0, we return everything.
+///
+/// Warning (Unicode): uses truncateWithEllipsis() internally. Therefore it is
+/// subject to the same warning and FIXME as above.
+///
+docstring wrapParas(docstring const & str, int const indent = 0,
+                    size_t const width = 80, size_t const maxlines = 10);
 
 /// gives a vector of stringparts which have the delimiter delim
-std::vector<string> const getVectorFromString(string const & str,
-					      string const & delim = ",");
-
-// the same vice versa
-string const getStringFromVector(std::vector<string> const & vec,
-				 string const & delim = ",");
-
-// wrapper around boost::format using one argument %1$s
-string bformat(string const & fmt, string const & arg1);
-// arguments %1$s and %2$s
-string bformat(string const & fmt, string const & arg1, string const & arg2);
-// arguments %1$d and %2$d
-string bformat(string const & fmt, int arg1, int arg2);
-// arguments %1$s and %2$s and %3$s
-string bformat(string const & fmt, string const & arg1, string const & arg2,
-               string const & arg3);
-// arguments %1$s and %2$s and %3$s and %4$s
-string bformat(string const & fmt, string const & arg1, string const & arg2,
-               string const & arg3, string const & arg4);
+/// If \p keepempty is true, empty strings will be pushed to the vector as well
+std::vector<std::string> const getVectorFromString(std::string const & str,
+					      std::string const & delim = std::string(","),
+					      bool keepempty = false);
+std::vector<docstring> const getVectorFromString(docstring const & str,
+		docstring const & delim = from_ascii(","), bool keepempty = false);
+
+/// the same vice versa
+std::string const getStringFromVector(std::vector<std::string> const & vec,
+				 std::string const & delim = std::string(","));
+docstring const getStringFromVector(std::vector<docstring> const & vec,
+				 docstring const & delim = from_ascii(","));
+
+/// Search \p search_token in \p str and return the position if it is
+/// found, else -1. The last item in \p str must be "".
+int findToken(char const * const str[], std::string const & search_token);
+
+
+/// Format a floating point number with at least 6 siginificant digits, but
+/// without scientific notation.
+/// Scientific notation would be invalid in some contexts, such as lengths for
+/// LaTeX. Simply using std::ostream with std::fixed would produce results
+/// like "1000000.000000", and precision control would not be that easy either.
+std::string formatFPNumber(double);
+
+
+template <class Arg1>
+docstring bformat(docstring const & fmt, Arg1);
+
+template <class Arg1, class Arg2>
+docstring bformat(docstring const & fmt, Arg1, Arg2);
+
+template <class Arg1, class Arg2, class Arg3>
+docstring bformat(docstring const & fmt, Arg1, Arg2, Arg3);
+
+template <class Arg1, class Arg2, class Arg3, class Arg4>
+docstring bformat(docstring const & fmt, Arg1, Arg2, Arg3, Arg4);
+
+
+template<> docstring bformat(docstring const & fmt, int arg1);
+template<> docstring bformat(docstring const & fmt, long arg1);
+#ifdef LYX_USE_LONG_LONG
+template<> docstring bformat(docstring const & fmt, long long arg1);
+#endif
+template<> docstring bformat(docstring const & fmt, unsigned int arg1);
+template<> docstring bformat(docstring const & fmt, docstring arg1);
+template<> docstring bformat(docstring const & fmt, char * arg1);
+template<> docstring bformat(docstring const & fmt, docstring arg1, docstring arg2);
+template<> docstring bformat(docstring const & fmt, docstring arg1, int arg2);
+template<> docstring bformat(docstring const & fmt, char const * arg1, docstring arg2);
+template<> docstring bformat(docstring const & fmt, int arg1, int arg2);
+template<> docstring bformat(docstring const & fmt, docstring arg1, docstring arg2, docstring arg3);
+template<> docstring bformat(docstring const & fmt, docstring arg1, docstring arg2, docstring arg3, docstring arg4);
+
 
 } // namespace support
 } // namespace lyx