Improve LaTeX format detection

[lyx.git] / src / support / lstrings.cpp
diff --git a/src/support/lstrings.cpp b/src/support/lstrings.cpp

index 9e5301fc004991e406b365e7ff4cce69f62e1943..339318989f188d646b9e2866a3bbe400ff6f85c9 100644 (file)
--- a/src/support/lstrings.cpp
+++ b/src/support/lstrings.cpp
@@ -15,15 +15,11 @@
  #include "support/lstrings.h"
  
  #include "support/convert.h"
-#include "support/gettext.h"
  #include "support/qstring_helpers.h"
-#include "support/textutils.h"
  
-#include <boost/tokenizer.hpp>
  #include "support/lassert.h"
  
  #include <QString>
-#include <QVector>
  
  #include <cstdio>
  #include <algorithm>
@@ -116,6 +112,11 @@ bool isPrintable(char_type c)
                 // assume that all non-utf16 characters are printable
                 return true;
         }
+       // Not yet recognized by QChar::isPrint()
+       // See https://bugreports.qt-project.org/browse/QTBUG-12144
+       // LATIN CAPITAL LETTER SHARP S
+       else if (c == 0x1e9e)
+               return true;
         return ucs4_to_qchar(c).isPrint();
  }
  
@@ -147,13 +148,13 @@ bool isSpace(char_type c)
  }
  
  
-bool isDigit(char_type c)
+bool isNumber(char_type c)
  {
         if (!is_utf16(c))
-               // assume that no non-utf16 character is a digit
+               // assume that no non-utf16 character is a numeral
                 // c outside the UCS4 range is catched as well
                 return false;
-       return ucs4_to_qchar(c).isDigit();
+       return ucs4_to_qchar(c).isNumber();
  }
  
  
@@ -165,8 +166,13 @@ bool isDigitASCII(char_type c)
  
  bool isAlnumASCII(char_type c)
  {
-       return ('0' <= c && c <= '9')
-               || ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z');
+       return isAlphaASCII(c) || isDigitASCII(c);
+}
+
+
+bool isASCII(char_type c)
+{
+       return c < 0x80;
  }
  
  
@@ -266,7 +272,7 @@ bool isStrInt(string const & str)
  
         string::const_iterator end = tmpstr.end();
         for (; cit != end; ++cit)
-               if (!isdigit((*cit)))
+               if (!isDigitASCII(*cit))
                         return false;
  
         return true;
@@ -286,7 +292,7 @@ bool isStrUnsignedInt(string const & str)
         string::const_iterator cit = tmpstr.begin();
         string::const_iterator end = tmpstr.end();
         for (; cit != end; ++cit)
-               if (!isdigit((*cit)))
+               if (!isDigitASCII(*cit))
                         return false;
  
         return true;
@@ -310,7 +316,7 @@ bool isStrDbl(string const & str)
                 ++cit;
         string::const_iterator end = tmpstr.end();
         for (; cit != end; ++cit) {
-               if (!isdigit(*cit) && *cit != '.')
+               if (!isDigitASCII(*cit) && *cit != '.')
                         return false;
                 if ('.' == (*cit)) {
                         if (found_dot)
@@ -322,24 +328,18 @@ bool isStrDbl(string const & str)
  }
  
  
-bool hasDigit(docstring const & str)
+bool hasDigitASCII(docstring const & str)
  {
-       if (str.empty())
-               return false;
-
         docstring::const_iterator cit = str.begin();
         docstring::const_iterator const end = str.end();
-       for (; cit != end; ++cit) {
-               if (*cit == ' ')
-                       continue;
-               if (isdigit((*cit)))
+       for (; cit != end; ++cit)
+               if (isDigitASCII(*cit))
                         return true;
-       }
         return false;
  }
  
  
-static bool isHexChar(char_type c)
+bool isHexChar(char_type c)
  {
         return c == '0' ||
                 c == '1' ||
@@ -409,14 +409,14 @@ bool isAscii(string const & str)
  
  char lowercase(char c)
  {
-       LASSERT(static_cast<unsigned char>(c) < 0x80, /**/);
+       LASSERT(isASCII(c), /**/);
         return char(tolower(c));
  }
  
  
  char uppercase(char c)
  {
-       LASSERT(static_cast<unsigned char>(c) < 0x80, /**/);
+       LASSERT(isASCII(c), /**/);
         return char(toupper(c));
  }
  
@@ -882,6 +882,18 @@ docstring const subst(docstring const & a,
  }
  
  
+int count_char(string const & str, char chr)
+{
+       int count = 0;
+       string::const_iterator lit = str.begin();
+       string::const_iterator end = str.end();
+       for (; lit != end; ++lit)
+               if ((*lit) == chr)
+                       count++;
+       return count;
+}
+
+
  /// Count all occurences of char \a chr inside \a str
  int count_char(docstring const & str, docstring::value_type chr)
  {
@@ -1069,6 +1081,20 @@ string const rsplit(string const & a, string & piece, char delim)
  }
  
  
+docstring const rsplit(docstring const & a, docstring & piece, char_type delim)
+{
+       docstring tmp;
+       size_t i = a.rfind(delim);
+       if (i != string::npos) { // delimiter was found
+               piece = a.substr(0, i);
+               tmp = a.substr(i + 1);
+       } else { // delimiter was not found
+               piece.erase();
+       }
+       return tmp;
+}
+
+
  docstring const rsplit(docstring const & a, char_type delim)
  {
         docstring tmp;
@@ -1086,7 +1112,8 @@ docstring const escape(docstring const & lab)
         docstring enc;
         for (size_t i = 0; i < lab.length(); ++i) {
                 char_type c = lab[i];
-               if (c >= 128 || c == '=' || c == '%') {
+               if (c >= 128 || c == '=' || c == '%' || c == '#' || c == '$'
+                   || c == '}' || c == '{' || c == ']' || c == '[' || c == '&') {
                         // Although char_type is a 32 bit type we know that
                         // UCS4 occupies only 21 bits, so we don't need to
                         // encode bigger values. Test for 2^24 because we
@@ -1160,6 +1187,7 @@ docstring wrap(docstring const & str, int const ind, size_t const width)
  docstring wrapParas(docstring const & str, int const indent,
                     size_t const width, size_t const maxlines)
  {
+       docstring const dots = from_ascii("...");
         if (str.empty())
                 return docstring();
  
@@ -1174,9 +1202,21 @@ docstring wrapParas(docstring const & str, int const indent,
                 if (nlines == 0)
                         continue;
                 size_t const curlines = retval.size();
-               if (maxlines > 0 && curlines + nlines >= maxlines) {
-                       tmp.resize(maxlines - curlines - 1);
-                       tmp.push_back(from_ascii("..."));
+               if (maxlines > 0 && curlines + nlines > maxlines) {
+                       tmp.resize(maxlines - curlines);
+                       docstring last = tmp.back();
+                       size_t const lsize = last.size();
+                       if (lsize > width - 3) {
+                               size_t const i = last.find_last_of(' ', width - 3);
+                               if (i == docstring::npos || i <= size_t(indent))
+                                       // no space found
+                                       last = last.substr(0, lsize - 3) + dots;
+                               else
+                                       last = last.substr(0, i) + dots;
+                       } else
+                               last += dots;
+                       tmp.pop_back();
+                       tmp.push_back(last);
                 }
                 retval.insert(retval.end(), tmp.begin(), tmp.end());
                 if (maxlines > 0 && retval.size() >= maxlines)
@@ -1284,12 +1324,6 @@ int findToken(char const * const str[], string const & search_token)
  }
  
  
-string const languageTestString()
-{
-       return N_("[[Replace with the code of your language]]");
-}
-
-
  template<>
  docstring bformat(docstring const & fmt, int arg1)
  {