Handle multiple spaces at row break

[features.git] / src / support / lstrings.cpp
diff --git a/src/support/lstrings.cpp b/src/support/lstrings.cpp

index 8508e4ef1303e06e7e34d3e560b387f7ee142194..d5b6dea588f10e40d378576f0be08270d444f29e 100644 (file)
--- a/src/support/lstrings.cpp
+++ b/src/support/lstrings.cpp
@@ -16,6 +16,7 @@
  
  #include "support/convert.h"
  #include "support/debug.h"
+#include "support/lyxlib.h"
  #include "support/qstring_helpers.h"
  
  #include "support/lassert.h"
@@ -25,6 +26,8 @@
  #include <cstdio>
  #include <cstring>
  #include <algorithm>
+#include <iomanip>
+#include <sstream>
  #include <typeinfo>
  
  using namespace std;
@@ -76,7 +79,7 @@ inline QChar const ucs4_to_qchar(char_type const ucs4)
  
  /// Maximum valid UCS4 code point
  char_type const ucs4_max = 0x10ffff;
-} // anon namespace
+} // namespace
  
  
  bool isLetterChar(char_type c)
@@ -143,7 +146,7 @@ bool isSpace(char_type c)
  {
         if (!is_utf16(c)) {
                 // assume that no non-utf16 character is a space
-               // c outside the UCS4 range is catched as well
+               // c outside the UCS4 range is caught as well
                 return false;
         }
         QChar const qc = ucs4_to_qchar(c);
@@ -155,17 +158,44 @@ bool isNumber(char_type c)
  {
         if (!is_utf16(c))
                 // assume that no non-utf16 character is a numeral
-               // c outside the UCS4 range is catched as well
+               // c outside the UCS4 range is caught as well
                 return false;
         return ucs4_to_qchar(c).isNumber();
  }
  
  
+bool isCommonNumberSeparator(char_type c)
+{
+       if (!is_utf16(c))
+               // assume that no non-utf16 character is a numeral
+               // c outside the UCS4 range is caught as well
+               return false;
+       return ucs4_to_qchar(c).direction() == QChar::DirCS;
+}
+
+
+bool isEuropeanNumberTerminator(char_type c)
+{
+       if (!is_utf16(c))
+               // assume that no non-utf16 character is a numeral
+               // c outside the UCS4 range is caught as well
+               return false;
+       return ucs4_to_qchar(c).direction() == QChar::DirET;
+}
+
+
  bool isDigitASCII(char_type c)
  {
         return '0' <= c && c <= '9';
  }
  
+bool isNumberChar(char_type c)
+{
+       if (c > ucs4_max)
+               // outside the UCS4 range
+               return false;
+       return ucs4_to_qchar(c).isNumber();
+}
  
  bool isAlnumASCII(char_type c)
  {
@@ -179,6 +209,18 @@ bool isASCII(char_type c)
  }
  
  
+bool isOpenPunctuation(char_type c)
+{
+       if (!is_utf16(c)) {
+               // assume that no non-utf16 character is an op
+               // c outside the UCS4 range is caught as well
+               return false;
+       }
+       QChar const qc = ucs4_to_qchar(c);
+       return qc.category() == QChar::Punctuation_Open;
+}
+
+
  namespace support {
  
  int compare_no_case(docstring const & s, docstring const & s2)
@@ -205,24 +247,7 @@ int compare_no_case(docstring const & s, docstring const & s2)
  
  int compare_locale(docstring const & s, docstring const & s2)
  {
-       // FIXME We have a report that this does not work on windows (bug 9030)
-       try
-       {
-               string const l = to_local8bit(s);
-               string const r = to_local8bit(s2);
-               return strcoll(l.c_str(), r.c_str());
-       }
-       catch (bad_cast & e)
-       {
-               // fall back to builtin sorting
-               LYXERR0("Could not compare using the current locale: "
-                       << e.what() << ", using fallback.");
-               if (s < s2)
-                       return -1;
-               if (s > s2)
-                       return 1;
-               return 0;
-       }
+       return QString::localeAwareCompare(toqstr(s), toqstr(s2));
  }
  
  
@@ -235,7 +260,7 @@ Char ascii_tolower(Char c) {
         return c;
  }
  
-}
+} // namespace
  
  
  int compare_ascii_no_case(string const & s, string const & s2)
@@ -388,13 +413,13 @@ bool isHexChar(char_type c)
  
  bool isHex(docstring const & str)
  {
-       int index = 0;
+       size_t index = 0;
  
         if (str.length() > 2 && str[0] == '0' &&
             (str[1] == 'x' || str[1] == 'X'))
                 index = 2;
  
-       int const len = str.length();
+       size_t const len = str.length();
  
         for (; index < len; ++index) {
                 if (!isHexChar(str[index]))
@@ -404,10 +429,10 @@ bool isHex(docstring const & str)
  }
  
  
-int hexToInt(docstring const & str)
+unsigned int hexToInt(docstring const & str)
  {
         string s = to_ascii(str);
-       int h;
+       unsigned int h;
         sscanf(s.c_str(), "%x", &h);
         return h;
  }
@@ -415,8 +440,8 @@ int hexToInt(docstring const & str)
  
  bool isAscii(docstring const & str)
  {
-       int const len = str.length();
-       for (int i = 0; i < len; ++i)
+       size_t const len = str.length();
+       for (size_t i = 0; i < len; ++i)
                 if (str[i] >= 0x80)
                         return false;
         return true;
@@ -425,8 +450,8 @@ bool isAscii(docstring const & str)
  
  bool isAscii(string const & str)
  {
-       int const len = str.length();
-       for (int i = 0; i < len; ++i)
+       size_t const len = str.length();
+       for (size_t i = 0; i < len; ++i)
                 if (static_cast<unsigned char>(str[i]) >= 0x80)
                         return false;
         return true;
@@ -496,7 +521,7 @@ template<typename Char> struct local_ascii_lowercase {
         Char operator()(Char c) const { return ascii_tolower(c); }
  };
  
-} // end of anon namespace
+} // namespace
  
  
  docstring const lowercase(docstring const & a)
@@ -525,6 +550,14 @@ docstring const uppercase(docstring const & a)
  }
  
  
+docstring capitalize(docstring const & s) {
+       docstring ret = s;
+       char_type t = uppercase(ret[0]);
+       ret[0] = t;
+       return ret;
+}
+
+
  string const ascii_lowercase(string const & a)
  {
         string tmp(a);
@@ -731,6 +764,12 @@ bool containsOnly(string const & s, string const & cset)
  }
  
  
+bool containsOnly(docstring const & s, string const & cset)
+{
+       return s.find_first_not_of(from_ascii(cset)) == string::npos;
+}
+
+
  // ale970405+lasgoutt-970425
  // rewritten to use new string (Lgb)
  string const token(string const & a, char delim, int n)
@@ -866,7 +905,7 @@ String const subst_string(String const & a,
         size_t const olen = oldstr.length();
         while ((i = lstr.find(oldstr, i)) != string::npos) {
                 lstr.replace(i, olen, newstr);
-               i += newstr.length(); // We need to be sure that we dont
+               i += newstr.length(); // We need to be sure that we don't
                 // use the same i over and over again.
         }
         return lstr;
@@ -882,13 +921,13 @@ docstring const subst_string(docstring const & a,
         size_t const olen = oldstr.length();
         while ((i = lstr.find(oldstr, i)) != string::npos) {
                 lstr.replace(i, olen, newstr);
-               i += newstr.length(); // We need to be sure that we dont
+               i += newstr.length(); // We need to be sure that we don't
                 // use the same i over and over again.
         }
         return lstr;
  }
  
-}
+} // namespace
  
  
  string const subst(string const & a, char oldchar, char newchar)
@@ -930,7 +969,7 @@ int count_char(string const & str, char chr)
  }
  
  
-/// Count all occurences of char \a chr inside \a str
+/// Count all occurrences of char \a chr inside \a str
  int count_char(docstring const & str, docstring::value_type chr)
  {
         int count = 0;
@@ -943,6 +982,21 @@ int count_char(docstring const & str, docstring::value_type chr)
  }
  
  
+int wordCount(docstring const & d)
+{
+       docstring dt = trim(d);
+       if (dt.empty())
+               return 0;
+       int words = 1;
+       for (auto const & c : dt) {
+               if (isSpace(c))
+                       words++;
+       }
+       return words;
+}
+
+
+
  int count_bin_chars(string const & str)
  {
         QString const qstr = toqstr(str).simplified();
@@ -1071,18 +1125,20 @@ String const doSplit(String const & a, String & piece, Char delim)
         size_t i = a.find(delim);
         if (i == a.length() - 1) {
                 piece = a.substr(0, i);
-       } else if (i != String::npos) {
-               piece = a.substr(0, i);
-               tmp = a.substr(i + 1);
         } else if (i == 0) {
                 piece.erase();
                 tmp = a.substr(i + 1);
+       } else if (i != String::npos) {
+               piece = a.substr(0, i);
+               tmp = a.substr(i + 1);
         } else {
                 piece = a;
         }
         return tmp;
  }
  
+
+// FIXME: why is this specialization needed?
  template<typename Char> inline
  docstring const doSplit(docstring const & a, docstring & piece, Char delim)
  {
@@ -1090,19 +1146,19 @@ docstring const doSplit(docstring const & a, docstring & piece, Char delim)
         size_t i = a.find(delim);
         if (i == a.length() - 1) {
                 piece = a.substr(0, i);
-       } else if (i != docstring::npos) {
-               piece = a.substr(0, i);
-               tmp = a.substr(i + 1);
         } else if (i == 0) {
                 piece.erase();
                 tmp = a.substr(i + 1);
+       } else if (i != docstring::npos) {
+               piece = a.substr(0, i);
+               tmp = a.substr(i + 1);
         } else {
                 piece = a;
         }
         return tmp;
  }
  
-} // anon
+} // namespace
  
  
  string const split(string const & a, string & piece, char delim)
@@ -1171,10 +1227,10 @@ docstring const escape(docstring const & lab)
         char_type hexdigit[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
                                    '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
         docstring enc;
-       for (size_t i = 0; i < lab.length(); ++i) {
-               char_type c = lab[i];
+       for (char_type const c : lab) {
                 if (c >= 128 || c == '=' || c == '%' || c == '#' || c == '$'
-                   || c == '}' || c == '{' || c == ']' || c == '[' || c == '&') {
+                   || c == '}' || c == '{' || c == ']' || c == '[' || c == '&'
+                   || c == '\\') {
                         // Although char_type is a 32 bit type we know that
                         // UCS4 occupies only 21 bits, so we don't need to
                         // encode bigger values. Test for 2^24 because we
@@ -1196,6 +1252,35 @@ docstring const escape(docstring const & lab)
  }
  
  
+docstring const protectArgument(docstring & arg, char const l,
+                         char const r)
+{
+       if (contains(arg, l) || contains(arg, r))
+               // protect brackets
+               arg = '{' + arg + '}';
+       return arg;
+}
+
+
+bool truncateWithEllipsis(docstring & str, size_t const len, bool const mid)
+{
+       if (str.size() <= len)
+               return false;
+       if (mid && len > 0) {
+               size_t const hlen = len / 2;
+               docstring suffix = str.substr(str.size() - hlen);
+               str.resize(hlen);
+               str[hlen - 1] = 0x2026;// HORIZONTAL ELLIPSIS
+               str += suffix;
+       } else {
+               str.resize(len);
+               if (len > 0)
+                       str[len - 1] = 0x2026;// HORIZONTAL ELLIPSIS
+       }
+       return true;
+}
+
+
  namespace {
  
  // this doesn't check whether str is empty, so do that first.
@@ -1219,7 +1304,7 @@ vector<docstring> wrapToVec(docstring const & str, int ind,
                 size_t const i = s.find_last_of(' ', width - 1);
                 if (i == docstring::npos || i <= size_t(ind)) {
                         // no space found
-                       s = s.substr(0, width - 3) + "...";
+                       truncateWithEllipsis(s, width);
                         break;
                 }
                 retval.push_back(s.substr(0, i));
@@ -1231,7 +1316,7 @@ vector<docstring> wrapToVec(docstring const & str, int ind,
         return retval;
  }
  
-}
+} // namespace
  
  
  docstring wrap(docstring const & str, int const ind, size_t const width)
@@ -1248,7 +1333,6 @@ docstring wrap(docstring const & str, int const ind, size_t const width)
  docstring wrapParas(docstring const & str, int const indent,
                     size_t const width, size_t const maxlines)
  {
-       docstring const dots = from_ascii("...");
         if (str.empty())
                 return docstring();
  
@@ -1267,15 +1351,15 @@ docstring wrapParas(docstring const & str, int const indent,
                         tmp.resize(maxlines - curlines);
                         docstring last = tmp.back();
                         size_t const lsize = last.size();
-                       if (lsize > width - 3) {
-                               size_t const i = last.find_last_of(' ', width - 3);
+                       if (lsize > width - 1) {
+                               size_t const i = last.find_last_of(' ', width - 1);
                                 if (i == docstring::npos || i <= size_t(indent))
                                         // no space found
-                                       last = last.substr(0, lsize - 3) + dots;
+                                       truncateWithEllipsis(last, lsize);
                                 else
-                                       last = last.substr(0, i) + dots;
+                                       truncateWithEllipsis(last, i);
                         } else
-                               last += dots;
+                               last.push_back(0x2026);//HORIZONTAL ELLIPSIS
                         tmp.pop_back();
                         tmp.push_back(last);
                 }
@@ -1290,7 +1374,8 @@ docstring wrapParas(docstring const & str, int const indent,
  namespace {
  
  template<typename String> vector<String> const
-getVectorFromStringT(String const & str, String const & delim, bool keepempty)
+getVectorFromStringT(String const & str, String const & delim,
+                     bool keepempty, bool trimit)
  {
  // Lars would like this code to go, but for now his replacement (below)
  // doesn't fullfil the same function. I have, therefore, reactivated the
@@ -1299,14 +1384,15 @@ getVectorFromStringT(String const & str, String const & delim, bool keepempty)
         vector<String> vec;
         if (str.empty())
                 return vec;
-       String keys = rtrim(str);
+       String keys = trimit ? rtrim(str) : str;
         while (true) {
                 size_t const idx = keys.find(delim);
                 if (idx == String::npos) {
-                       vec.push_back(ltrim(keys));
+                       vec.push_back(trimit ? ltrim(keys) : keys);
                         break;
                 }
-               String const key = trim(keys.substr(0, idx));
+               String const key = trimit ?
+                       trim(keys.substr(0, idx)) : keys.substr(0, idx);
                 if (!key.empty() || keepempty)
                         vec.push_back(key);
                 size_t const start = idx + delim.size();
@@ -1340,22 +1426,20 @@ template<typename String> const String
         return str;
  }
  
-} // namespace anon
+} // namespace
  
  
  vector<string> const getVectorFromString(string const & str,
-                                        string const & delim,
-                                        bool keepempty)
+        string const & delim, bool keepempty, bool trimit)
  {
-       return getVectorFromStringT<string>(str, delim, keepempty);
+       return getVectorFromStringT<string>(str, delim, keepempty, trimit);
  }
  
  
  vector<docstring> const getVectorFromString(docstring const & str,
-                                           docstring const & delim,
-                                           bool keepempty)
+        docstring const & delim, bool keepempty, bool trimit)
  {
-       return getVectorFromStringT<docstring>(str, delim, keepempty);
+       return getVectorFromStringT<docstring>(str, delim, keepempty, trimit);
  }
  
  
@@ -1385,7 +1469,53 @@ int findToken(char const * const str[], string const & search_token)
  }
  
  
-template<>
+std::string formatFPNumber(double x)
+{
+       // Need manual tweaking, QString::number(x, 'f', 16) does not work either
+       ostringstream os;
+       os << std::fixed;
+       // Prevent outputs of 23.4200000000000017 but output small numbers
+       // with at least 6 significant digits.
+       int const precision = (x == 0.0) ? 0 : max(6 - iround(log10(fabs(x))), 0);
+       os << std::setprecision(precision) << x;
+       string result = os.str();
+       if (result.find('.') != string::npos) {
+               result = rtrim(result, "0");
+               if (result[result.length()-1] == '.')
+                       result = rtrim(result, ".");
+       }
+       return result;
+}
+
+
+docstring to_percent_encoding(docstring const & in, docstring const & ex)
+{
+       QByteArray input = to_utf8(in).c_str();
+       QByteArray excludes = to_utf8(ex).c_str();
+       return from_utf8(string(input.toPercentEncoding(excludes).data()));
+}
+
+
+string from_percent_encoding(string const & in)
+{
+       return QByteArray::fromPercentEncoding(in.c_str()).data();
+}
+
+
+int countExpanders(docstring const & str)
+{
+       // Numbers of characters that are expanded by inter-word spacing.  These
+       // characters are spaces, except for characters 09-0D which are treated
+       // specially.  (From a combination of testing with the notepad found in qt's
+       // examples, and reading the source code.)
+       int nexp = 0;
+       for (char_type c : str)
+               if (c > 0x0d && isSpace(c))
+                       ++nexp;
+       return nexp;
+}
+
+
  docstring bformat(docstring const & fmt, int arg1)
  {
         LATTEST(contains(fmt, from_ascii("%1$d")));
@@ -1394,7 +1524,6 @@ docstring bformat(docstring const & fmt, int arg1)
  }
  
  
-template<>
  docstring bformat(docstring const & fmt, long arg1)
  {
         LATTEST(contains(fmt, from_ascii("%1$d")));
@@ -1403,7 +1532,16 @@ docstring bformat(docstring const & fmt, long arg1)
  }
  
  
-template<>
+#ifdef HAVE_LONG_LONG_INT
+docstring bformat(docstring const & fmt, long long arg1)
+{
+       LATTEST(contains(fmt, from_ascii("%1$d")));
+       docstring const str = subst(fmt, from_ascii("%1$d"), convert<docstring>(arg1));
+       return subst(str, from_ascii("%%"), from_ascii("%"));
+}
+#endif
+
+
  docstring bformat(docstring const & fmt, unsigned int arg1)
  {
         LATTEST(contains(fmt, from_ascii("%1$d")));
@@ -1412,8 +1550,7 @@ docstring bformat(docstring const & fmt, unsigned int arg1)
  }
  
  
-template<>
-docstring bformat(docstring const & fmt, docstring arg1)
+docstring bformat(docstring const & fmt, docstring const & arg1)
  {
         LATTEST(contains(fmt, from_ascii("%1$s")));
         docstring const str = subst(fmt, from_ascii("%1$s"), arg1);
@@ -1421,7 +1558,6 @@ docstring bformat(docstring const & fmt, docstring arg1)
  }
  
  
-template<>
  docstring bformat(docstring const & fmt, char * arg1)
  {
         LATTEST(contains(fmt, from_ascii("%1$s")));
@@ -1430,8 +1566,7 @@ docstring bformat(docstring const & fmt, char * arg1)
  }
  
  
-template<>
-docstring bformat(docstring const & fmt, docstring arg1, docstring arg2)
+docstring bformat(docstring const & fmt, docstring const & arg1, docstring const & arg2)
  {
         LATTEST(contains(fmt, from_ascii("%1$s")));
         LATTEST(contains(fmt, from_ascii("%2$s")));
@@ -1441,8 +1576,7 @@ docstring bformat(docstring const & fmt, docstring arg1, docstring arg2)
  }
  
  
-template<>
-docstring bformat(docstring const & fmt, docstring arg1, int arg2)
+docstring bformat(docstring const & fmt, docstring const & arg1, int arg2)
  {
         LATTEST(contains(fmt, from_ascii("%1$s")));
         LATTEST(contains(fmt, from_ascii("%2$d")));
@@ -1452,18 +1586,12 @@ docstring bformat(docstring const & fmt, docstring arg1, int arg2)
  }
  
  
-template<>
-docstring bformat(docstring const & fmt, char const * arg1, docstring arg2)
+docstring bformat(docstring const & fmt, char const * arg1, docstring const & arg2)
  {
-       LATTEST(contains(fmt, from_ascii("%1$s")));
-       LATTEST(contains(fmt, from_ascii("%2$s")));
-       docstring str = subst(fmt, from_ascii("%1$s"), from_ascii(arg1));
-       str = subst(fmt, from_ascii("%2$s"), arg2);
-       return subst(str, from_ascii("%%"), from_ascii("%"));
+       return bformat(fmt, from_ascii(arg1), arg2);
  }
  
  
-template<>
  docstring bformat(docstring const & fmt, int arg1, int arg2)
  {
         LATTEST(contains(fmt, from_ascii("%1$d")));
@@ -1474,8 +1602,7 @@ docstring bformat(docstring const & fmt, int arg1, int arg2)
  }
  
  
-template<>
-docstring bformat(docstring const & fmt, docstring arg1, docstring arg2, docstring arg3)
+docstring bformat(docstring const & fmt, docstring const & arg1, docstring const & arg2, docstring const & arg3)
  {
         LATTEST(contains(fmt, from_ascii("%1$s")));
         LATTEST(contains(fmt, from_ascii("%2$s")));
@@ -1487,18 +1614,34 @@ docstring bformat(docstring const & fmt, docstring arg1, docstring arg2, docstri
  }
  
  
-template<>
  docstring bformat(docstring const & fmt,
-              docstring arg1, docstring arg2, docstring arg3, docstring arg4)
+              docstring const & arg1, docstring const & arg2, docstring const & arg3, docstring const & arg4)
+{
+       LATTEST(contains(fmt, from_ascii("%1$s")));
+       LATTEST(contains(fmt, from_ascii("%2$s")));
+       LATTEST(contains(fmt, from_ascii("%3$s")));
+       LATTEST(contains(fmt, from_ascii("%4$s")));
+       docstring str = subst(fmt, from_ascii("%1$s"), arg1);
+       str = subst(str, from_ascii("%2$s"), arg2);
+       str = subst(str, from_ascii("%3$s"), arg3);
+       str = subst(str, from_ascii("%4$s"), arg4);
+       return subst(str, from_ascii("%%"), from_ascii("%"));
+}
+
+docstring bformat(docstring const & fmt, docstring const & arg1,
+                                 docstring const & arg2, docstring const & arg3,
+                                 docstring const & arg4, docstring const & arg5)
  {
         LATTEST(contains(fmt, from_ascii("%1$s")));
         LATTEST(contains(fmt, from_ascii("%2$s")));
         LATTEST(contains(fmt, from_ascii("%3$s")));
         LATTEST(contains(fmt, from_ascii("%4$s")));
+       LATTEST(contains(fmt, from_ascii("%5$s")));
         docstring str = subst(fmt, from_ascii("%1$s"), arg1);
         str = subst(str, from_ascii("%2$s"), arg2);
         str = subst(str, from_ascii("%3$s"), arg3);
         str = subst(str, from_ascii("%4$s"), arg4);
+       str = subst(str, from_ascii("%5$s"), arg5);
         return subst(str, from_ascii("%%"), from_ascii("%"));
  }