Handle multiple spaces at row break

[features.git] / src / frontends / qt / GuiFontMetrics.cpp
diff --git a/src/frontends/qt/GuiFontMetrics.cpp b/src/frontends/qt/GuiFontMetrics.cpp

index 77e04a1ce2ebb9c79cb31430ab7acf350a50da2d..3ee27024eb90132dd79fa3232cac52485489013c 100644 (file)
--- a/src/frontends/qt/GuiFontMetrics.cpp
+++ b/src/frontends/qt/GuiFontMetrics.cpp
@@ -18,9 +18,10 @@
  #include "Dimension.h"
  
  #include "support/convert.h"
+#include "support/debug.h"
  #include "support/lassert.h"
  #include "support/lyxlib.h"
-#include "support/debug.h"
+#include "support/textutils.h"
  
  #define DISABLE_PMPROF
  #include "support/pmprof.h"
@@ -28,7 +29,12 @@
  #include <QByteArray>
  #include <QRawFont>
  #include <QtEndian>
+
+#if QT_VERSION >= 0x050100
  #include <QtMath>
+#else
+#define qDegreesToRadians(degree) (degree * (M_PI / 180))
+#endif
  
  using namespace std;
  using namespace lyx::support;
@@ -44,6 +50,11 @@ using namespace lyx::support;
  
  #ifdef BIDI_USE_OVERRIDE
  # define BIDI_OFFSET 1
+/* Unicode override characters enforce drawing direction
+ * Source: http://www.iamcal.com/understanding-bidirectional-text/
+ * Right-to-left override is 0x202e and left-to-right override is 0x202d.
+ */
+QChar const bidi_override[2] = {0x202d, 0x202e};
  #else
  # define BIDI_OFFSET 0
  #endif
@@ -52,6 +63,15 @@ using namespace lyx::support;
  #  error "Define at least one of BIDI_USE_OVERRIDE or BIDI_USE_FLAG"
  #endif
  
+
+#if QT_VERSION < 0x050000
+inline uint qHash(double key)
+{
+       return qHash(QByteArray(reinterpret_cast<char const *>(&key), sizeof(key)));
+}
+#endif
+
+
  namespace std {
  
  /*
@@ -71,25 +91,27 @@ namespace lyx {
  namespace frontend {
  
  
-/*
- * Limit (strwidth|breakat)_cache_ size to 512kB of string data.
- * Limit qtextlayout_cache_ size to 500 elements (we do not know the
- * size of the QTextLayout objects anyway).
- * Note that all these numbers are arbitrary.
- * Also, setting size to 0 is tantamount to disabling the cache.
- */
-int cache_metrics_width_size = 1 << 19;
-int cache_metrics_breakat_size = 1 << 19;
+namespace {
+// Maximal size/cost for various caches. See QCache documentation to
+// see what cost means.
+
+// Limit strwidth_cache_ total cost to 1MB of string data.
+int const strwidth_cache_max_cost = 1024 * 1024;
+// Limit breakstr_cache_ total cost to 10MB of string data.
+// This is useful for documents with very large insets.
+int const breakstr_cache_max_cost = 10 * 1024 * 1024;
  // Qt 5.x already has its own caching of QTextLayout objects
  // but it does not seem to work well on MacOS X.
  #if (QT_VERSION < 0x050000) || defined(Q_OS_MAC)
-int cache_metrics_qtextlayout_size = 500;
+// Limit qtextlayout_cache_ size to 500 elements (we do not know the
+// size of the QTextLayout objects anyway).
+int const qtextlayout_cache_max_size = 500;
  #else
-int cache_metrics_qtextlayout_size = 0;
+// Disable the cache
+int const qtextlayout_cache_max_size = 0;
  #endif
  
  
-namespace {
  /**
   * Convert a UCS4 character into a QChar.
   * This is a hack (it does only make sense for the common part of the UCS4
@@ -113,9 +135,9 @@ inline QChar const ucs4_to_qchar(char_type const ucs4)
  
  GuiFontMetrics::GuiFontMetrics(QFont const & font)
         : font_(font), metrics_(font, 0),
-         strwidth_cache_(cache_metrics_width_size),
-         breakat_cache_(cache_metrics_breakat_size),
-         qtextlayout_cache_(cache_metrics_qtextlayout_size)
+         strwidth_cache_(strwidth_cache_max_cost),
+         breakstr_cache_(breakstr_cache_max_cost),
+         qtextlayout_cache_(qtextlayout_cache_max_size)
  {
         // Determine italic slope
         double const defaultSlope = tan(qDegreesToRadians(19.0));
@@ -148,6 +170,7 @@ int GuiFontMetrics::maxDescent() const
  {
         // We add 1 as the value returned by QT is different than X
         // See http://doc.trolltech.com/2.3/qfontmetrics.html#200b74
+       // FIXME: check this
         return metrics_.descent() + 1;
  }
  
@@ -247,8 +270,8 @@ int GuiFontMetrics::rbearing(char_type c) const
  int GuiFontMetrics::width(docstring const & s) const
  {
         PROFILE_THIS_BLOCK(width);
-       if (strwidth_cache_.contains(s))
-               return strwidth_cache_[s];
+       if (int * wid_p = strwidth_cache_.object_ptr(s))
+               return *wid_p;
         PROFILE_CACHE_MISS(width);
         /* Several problems have to be taken into account:
          * * QFontMetrics::width does not returns a wrong value with Qt5 with
@@ -322,14 +345,108 @@ int GuiFontMetrics::signedWidth(docstring const & s) const
  }
  
  
+uint qHash(TextLayoutKey const & key)
+{
+       double params = (2 * key.rtl - 1) * key.ws;
+       return std::qHash(key.s) ^ ::qHash(params);
+}
+
+
+namespace {
+
+// This holds a translation table between the original string and the
+// QString that we can use with QTextLayout.
+struct TextLayoutHelper
+{
+       /// Create the helper
+       /// \c s is the original string
+       /// \c isrtl is true if the string is right-to-left
+       /// \c naked is true to disable the insertion of zero width annotations
+       TextLayoutHelper(docstring const & s, bool isrtl, bool naked = false);
+
+       /// translate QString index to docstring index
+       docstring::size_type qpos2pos(int qpos) const
+       {
+               return lower_bound(pos2qpos_.begin(), pos2qpos_.end(), qpos) - pos2qpos_.begin();
+       }
+
+       /// Translate docstring index to QString index
+       int pos2qpos(docstring::size_type pos) const { return pos2qpos_[pos]; }
+
+       // The original string
+       docstring docstr;
+       // The mirror string
+       QString qstr;
+       // is string right-to-left?
+       bool rtl;
+
+private:
+       // This vector contains the QString pos for each string position
+       vector<int> pos2qpos_;
+};
+
+
+TextLayoutHelper::TextLayoutHelper(docstring const & s, bool isrtl, bool naked)
+       : docstr(s), rtl(isrtl)
+{
+       // Reserve memory for performance purpose
+       pos2qpos_.reserve(s.size());
+       qstr.reserve(2 * s.size());
+
+       /* Qt will not break at a leading or trailing space, and we need
+        * that sometimes, see http://www.lyx.org/trac/ticket/9921.
+        *
+        * To work around the problem, we enclose the string between
+        * word joiner characters so that the QTextLayout algorithm will
+        * agree to break the text at these extremal spaces.
+        */
+       // Unicode character WORD JOINER
+       QChar const word_joiner(0x2060);
+       if (!naked)
+               qstr += word_joiner;
+
+#ifdef BIDI_USE_OVERRIDE
+       if (!naked)
+               qstr += bidi_override[rtl];
+#endif
+
+       // Now translate the string character-by-character.
+       bool was_space = false;
+       for (char_type const c : s) {
+               // insert a word joiner character between consecutive spaces
+               bool const is_space = isSpace(c);
+               if (!naked && is_space && was_space)
+                       qstr += word_joiner;
+               was_space = is_space;
+               // Remember the QString index at this point
+               pos2qpos_.push_back(qstr.size());
+               // Performance: UTF-16 characters are easier
+               if (is_utf16(c))
+                       qstr += ucs4_to_qchar(c);
+               else
+                       qstr += toqstr(c);
+       }
+
+       // Final word joiner (see above)
+       if (!naked)
+               qstr += word_joiner;
+
+       // Add virtual position at the end of the string
+       pos2qpos_.push_back(qstr.size());
+
+       //QString dump = qstr;
+       //LYXERR0("TLH: " << dump.replace(word_joiner, "|").toStdString());
+}
+
+}
+
  shared_ptr<QTextLayout const>
  GuiFontMetrics::getTextLayout(docstring const & s, bool const rtl,
                                double const wordspacing) const
  {
         PROFILE_THIS_BLOCK(getTextLayout);
-       docstring const s_cache =
-               s + (rtl ? "r" : "l") + convert<docstring>(wordspacing);
-       if (auto ptl = qtextlayout_cache_[s_cache])
+       TextLayoutKey key{s, rtl, wordspacing};
+       if (auto ptl = qtextlayout_cache_[key])
                 return ptl;
         PROFILE_CACHE_MISS(getTextLayout);
         auto const ptl = make_shared<QTextLayout>();
@@ -346,15 +463,7 @@ GuiFontMetrics::getTextLayout(docstring const & s, bool const rtl,
  #endif
  
  #ifdef BIDI_USE_OVERRIDE
-       /* Use unicode override characters to enforce drawing direction
-        * Source: http://www.iamcal.com/understanding-bidirectional-text/
-        */
-       if (rtl)
-               // Right-to-left override: forces to draw text right-to-left
-               ptl->setText(QChar(0x202E) + toqstr(s));
-       else
-               // Left-to-right override: forces to draw text left-to-right
-               ptl->setText(QChar(0x202D) + toqstr(s));
+       ptl->setText(bidi_override[rtl] + toqstr(s));
  #else
         ptl->setText(toqstr(s));
  #endif
@@ -362,7 +471,7 @@ GuiFontMetrics::getTextLayout(docstring const & s, bool const rtl,
         ptl->beginLayout();
         ptl->createLine();
         ptl->endLayout();
-       qtextlayout_cache_.insert(s_cache, ptl);
+       qtextlayout_cache_.insert(key, ptl);
         return ptl;
  }
  
@@ -444,125 +553,110 @@ int GuiFontMetrics::x2pos(docstring const & s, int & x, bool const rtl,
  }
  
  
-int GuiFontMetrics::countExpanders(docstring const & str) const
+FontMetrics::Breaks
+GuiFontMetrics::breakString_helper(docstring const & s, int first_wid, int wid,
+                                   bool rtl, bool force) const
  {
-       // Numbers of characters that are expanded by inter-word spacing.  These
-       // characters are spaces, except for characters 09-0D which are treated
-       // specially.  (From a combination of testing with the notepad found in qt's
-       // examples, and reading the source code.)  In addition, consecutive spaces
-       // only count as one expander.
-       bool wasspace = false;
-       int nexp = 0;
-       for (char_type c : str)
-               if (c > 0x0d && QChar(c).isSpace()) {
-                       if (!wasspace) {
-                               ++nexp;
-                               wasspace = true;
-                       }
-               } else
-                       wasspace = false;
-       return nexp;
-}
-
+       TextLayoutHelper const tlh(s, rtl);
  
-pair<int, int>
-GuiFontMetrics::breakAt_helper(docstring const & s, int const x,
-                               bool const rtl, bool const force) const
-{
         QTextLayout tl;
-       /* Qt will not break at a leading or trailing space, and we need
-        * that sometimes, see http://www.lyx.org/trac/ticket/9921.
-        *
-        * To work around the problem, we enclose the string between
-        * zero-width characters so that the QTextLayout algorithm will
-        * agree to break the text at these extremal spaces.
-        */
-       // Unicode character ZERO WIDTH NO-BREAK SPACE
-       QChar const zerow_nbsp(0xfeff);
-       QString qs = zerow_nbsp + toqstr(s) + zerow_nbsp;
  #ifdef BIDI_USE_FLAG
         /* Use undocumented flag to enforce drawing direction
          * FIXME: This does not work with Qt 5.11 (ticket #11284).
          */
         tl.setFlags(rtl ? Qt::TextForceRightToLeft : Qt::TextForceLeftToRight);
  #endif
-
-#ifdef BIDI_USE_OVERRIDE
-       /* Use unicode override characters to enforce drawing direction
-        * Source: http://www.iamcal.com/understanding-bidirectional-text/
-        */
-       if (rtl)
-               // Right-to-left override: forces to draw text right-to-left
-               qs = QChar(0x202E) + qs;
-       else
-               // Left-to-right override: forces to draw text left-to-right
-               qs =  QChar(0x202D) + qs;
-#endif
-       int const offset = 1 + BIDI_OFFSET;
-
-       tl.setText(qs);
+       tl.setText(tlh.qstr);
         tl.setFont(font_);
         QTextOption to;
+       /*
+        * Some Asian languages split lines anywhere (no notion of
+        * word). It seems that QTextLayout is not aware of this fact.
+        * See for reference:
+        *    https://en.wikipedia.org/wiki/Line_breaking_rules_in_East_Asian_languages
+        *
+        * FIXME: Something shall be done about characters which are
+        * not allowed at the beginning or end of line.
+        */
         to.setWrapMode(force ? QTextOption::WrapAtWordBoundaryOrAnywhere
                              : QTextOption::WordWrap);
         tl.setTextOption(to);
+
+       bool first = true;
         tl.beginLayout();
-       QTextLine line = tl.createLine();
-       line.setLineWidth(x);
-       tl.createLine();
+       while(true) {
+               QTextLine line = tl.createLine();
+               if (!line.isValid())
+                       break;
+               line.setLineWidth(first ? first_wid : wid);
+               first = false;
+       }
         tl.endLayout();
-       int const line_wid = iround(line.horizontalAdvance());
-       if ((force && line.textLength() == offset) || line_wid > x)
-               return {-1, -1};
-       /* Since QString is UTF-16 and docstring is UCS-4, the offsets may
-        * not be the same when there are high-plan unicode characters
-        * (bug #10443).
-        */
-       // The variable `offset' is here to account for the extra leading characters.
-       // The ending character zerow_nbsp has to be ignored if the line is complete.
-       int const qlen = line.textLength() - offset - (line.textLength() == qs.length());
-#if QT_VERSION < 0x040801 || QT_VERSION >= 0x050100
-       int len = qstring_to_ucs4(qs.mid(offset, qlen)).length();
+
+       Breaks breaks;
+       int pos = 0;
+       for (int i = 0 ; i < tl.lineCount() ; ++i) {
+               QTextLine const & line = tl.lineAt(i);
+               int const line_epos = line.textStart() + line.textLength();
+               int const epos = tlh.qpos2pos(line_epos);
+#if QT_VERSION >= 0x050000
+               // This does not take trailing spaces into account, except for the last line.
+               int const wid = iround(line.naturalTextWidth());
+               // If the line is not the last one, trailing space is always omitted.
+               int nspc_wid = wid;
+               // For the last line, compute the width without trailing space
+               if (i + 1 == tl.lineCount() && !s.empty() && isSpace(s.back())
+                   && line.textStart() <= tlh.pos2qpos(s.size() - 1))
+                       nspc_wid = iround(line.cursorToX(tlh.pos2qpos(s.size() - 1)));
  #else
-       /* Due to QTBUG-25536 in 4.8.1 <= Qt < 5.1.0, the string returned
-        * by QString::toUcs4 (used by qstring_to_ucs4) may have wrong
-        * length. We work around the problem by trying all docstring
-        * positions until the right one is found. This is slow only if
-        * there are many high-plane Unicode characters. It might be
-        * worthwhile to implement a dichotomy search if this shows up
-        * under a profiler.
-        */
-       int len = min(qlen, static_cast<int>(s.length()));
-       while (len >= 0 && toqstr(s.substr(0, len)).length() != qlen)
-               --len;
-       LASSERT(len > 0 || qlen == 0, /**/);
+               // With some monospace fonts, the value of horizontalAdvance()
+               // can be wrong with Qt4. One hypothesis is that the invisible
+               // characters that we use are given a non-null width.
+               // FIXME: this is slower than it could be but we'll get rid of Qt4 anyway
+               docstring ss = s.substr(pos, epos - pos);
+               int const wid = width(ss);
+               if (!ss.empty() && isSpace(ss.back()))
+                       ss.pop_back();
+               int const nspc_wid = i + 1 < tl.lineCount() ? width(ss) : wid;
+#endif
+               breaks.emplace_back(epos - pos, wid, nspc_wid);
+               pos = epos;
+#if 0
+               // FIXME: should it be kept in some form?
+               if ((force && line.textLength() == brkStrOffset) || line_wid > x)
+                       return {-1, line_wid};
  #endif
-       return {len, line_wid};
+       }
+
+       return breaks;
  }
  
  
-bool GuiFontMetrics::breakAt(docstring & s, int & x, bool const rtl, bool const force) const
+uint qHash(BreakStringKey const & key)
  {
-       PROFILE_THIS_BLOCK(breakAt);
-       if (s.empty())
-               return false;
+       // assume widths are less than 10000. This fits in 32 bits.
+       uint params = key.force + 2 * key.rtl + 4 * key.first_wid + 10000 * key.wid;
+       return std::qHash(key.s) ^ ::qHash(params);
+}
+
  
-       docstring const s_cache =
-               s + convert<docstring>(x) + (rtl ? "r" : "l") + (force ? "f" : "w");
-       pair<int, int> pp;
+FontMetrics::Breaks GuiFontMetrics::breakString(docstring const & s, int first_wid, int wid,
+                                                bool rtl, bool force) const
+{
+       PROFILE_THIS_BLOCK(breakString);
+       if (s.empty())
+               return Breaks();
  
-       if (breakat_cache_.contains(s_cache))
-               pp = breakat_cache_[s_cache];
+       BreakStringKey key{s, first_wid, wid, rtl, force};
+       Breaks brks;
+       if (auto * brks_ptr = breakstr_cache_.object_ptr(key))
+               brks = *brks_ptr;
         else {
-               PROFILE_CACHE_MISS(breakAt);
-               pp = breakAt_helper(s, x, rtl, force);
-               breakat_cache_.insert(s_cache, pp, s_cache.size() * sizeof(char_type));
+               PROFILE_CACHE_MISS(breakString);
+               brks = breakString_helper(s, first_wid, wid, rtl, force);
+               breakstr_cache_.insert(key, brks, sizeof(key) + s.size() * sizeof(char_type));
         }
-       if (pp.first == -1)
-               return false;
-       s = s.substr(0, pp.first);
-       x = pp.second;
-       return true;
+       return brks;
  }
  
  
@@ -622,10 +716,17 @@ int GuiFontMetrics::width(char_type c) const
         if (value != outOfLimitMetric)
                 return value;
  
+#if QT_VERSION >= 0x050b00
+       if (is_utf16(c))
+               value = metrics_.horizontalAdvance(ucs4_to_qchar(c));
+       else
+               value = metrics_.horizontalAdvance(toqstr(docstring(1, c)));
+#else
         if (is_utf16(c))
                 value = metrics_.width(ucs4_to_qchar(c));
         else
                 value = metrics_.width(toqstr(docstring(1, c)));
+#endif
  
         width_cache_.insert(c, value);