From ea3cdb15529d013bcac231c5b1f25628f5067b42 Mon Sep 17 00:00:00 2001 From: Jean-Marc Lasgouttes Date: Wed, 2 Nov 2016 17:42:56 +0100 Subject: [PATCH] QString only uses UTF-16, not UCS-4 This means that, when a string contains high-plane Unicode characters, the length of a docstring and the corresponding QString will be different: Qt will encode these characters using several 16bit characters. We have additionally to take into account QTBUG-25536, which implies that sometimes qstring_to_ucs4(toqstr(s)) !=s. It is not clear whether this bug can be a problem in other places. Fixes bug #10443. (cherry picked from commit 5d85a42bf0a182585b800a80817a6e1208ef7ec3) --- src/frontends/qt4/GuiFontMetrics.cpp | 65 ++++++++++++++++++++++++---- status.22x | 3 ++ 2 files changed, 59 insertions(+), 9 deletions(-) diff --git a/src/frontends/qt4/GuiFontMetrics.cpp b/src/frontends/qt4/GuiFontMetrics.cpp index 74ed392033..1d472b5ceb 100644 --- a/src/frontends/qt4/GuiFontMetrics.cpp +++ b/src/frontends/qt4/GuiFontMetrics.cpp @@ -206,7 +206,12 @@ int GuiFontMetrics::pos2x(docstring const & s, int const pos, bool const rtl, double const wordspacing) const { QTextLayout const & tl = getTextLayout(s, font_, rtl, wordspacing); - return static_cast(tl.lineForTextPosition(pos).cursorToX(pos)); + /* Since QString is UTF-16 and docstring is UCS-4, the offsets may + * not be the same when there are high-plan unicode characters + * (bug #10443). + */ + int const qpos = toqstr(s.substr(0, pos)).length(); + return static_cast(tl.lineForTextPosition(qpos).cursorToX(qpos)); } @@ -214,10 +219,30 @@ int GuiFontMetrics::x2pos(docstring const & s, int & x, bool const rtl, double const wordspacing) const { QTextLayout const & tl = getTextLayout(s, font_, rtl, wordspacing); - int pos = tl.lineForTextPosition(0).xToCursor(x); + int const qpos = tl.lineForTextPosition(0).xToCursor(x); // correct x value to the actual cursor position. - x = static_cast(tl.lineForTextPosition(0).cursorToX(pos)); + x = static_cast(tl.lineForTextPosition(0).cursorToX(qpos)); + /* Since QString is UTF-16 and docstring is UCS-4, the offsets may + * not be the same when there are high-plan unicode characters + * (bug #10443). + */ +#if QT_VERSION < 0x040801 || QT_VERSION >= 0x050100 + return qstring_to_ucs4(tl.text().left(qpos)).length(); +#else + /* Due to QTBUG-25536 in 4.8.1 <= Qt < 5.1.0, the string returned + * by QString::toUcs4 (used by qstring_to_ucs4)may have wrong + * length. We work around the problem by trying all docstring + * positions until the right one is found. This is slow only if + * there are many high-plane Unicode characters. It might be + * worthwhile to implement a dichotomy search if this shows up + * under a profiler. + */ + int pos = min(qpos, static_cast(s.length())); + while (pos >= 0 && toqstr(s.substr(0, pos)).length() != qpos) + --pos; + LASSERT(pos > 0 || qpos == 0, /**/); return pos; +#endif } @@ -235,17 +260,17 @@ bool GuiFontMetrics::breakAt(docstring & s, int & x, bool const rtl, bool const */ // Unicode character ZERO WIDTH NO-BREAK SPACE QChar const zerow_nbsp(0xfeff); - QString str = zerow_nbsp + toqstr(s) + zerow_nbsp; + QString qs = zerow_nbsp + toqstr(s) + zerow_nbsp; #if 1 /* Use unicode override characters to enforce drawing direction * Source: http://www.iamcal.com/understanding-bidirectional-text/ */ if (rtl) // Right-to-left override: forces to draw text right-to-left - str = QChar(0x202E) + str; + qs = QChar(0x202E) + qs; else // Left-to-right override: forces to draw text left-to-right - str = QChar(0x202D) + str; + qs = QChar(0x202D) + qs; int const offset = 2; #else // Alternative version that breaks with Qt5 and arabic text (#10436) @@ -254,7 +279,7 @@ bool GuiFontMetrics::breakAt(docstring & s, int & x, bool const rtl, bool const int const offset = 1; #endif - tl.setText(str); + tl.setText(qs); tl.setFont(font_); QTextOption to; to.setWrapMode(force ? QTextOption::WrapAnywhere : QTextOption::WordWrap); @@ -267,8 +292,30 @@ bool GuiFontMetrics::breakAt(docstring & s, int & x, bool const rtl, bool const if ((force && line.textLength() == offset) || int(line.naturalTextWidth()) > x) return false; x = int(line.naturalTextWidth()); - // The offset is here to account for the extra leading characters. - s = s.substr(0, line.textLength() - offset); + /* Since QString is UTF-16 and docstring is UCS-4, the offsets may + * not be the same when there are high-plan unicode characters + * (bug #10443). + */ + // The variable `offset' is here to account for the extra leading characters. + // The ending character zerow_nbsp has to be ignored if the line is complete. + int const qlen = line.textLength() - offset - (line.textLength() == qs.length()); +#if QT_VERSION < 0x040801 || QT_VERSION >= 0x050100 + s = qstring_to_ucs4(qs.mid(offset, qlen)); +#else + /* Due to QTBUG-25536 in 4.8.1 <= Qt < 5.1.0, the string returned + * by QString::toUcs4 (used by qstring_to_ucs4)may have wrong + * length. We work around the problem by trying all docstring + * positions until the right one is found. This is slow only if + * there are many high-plane Unicode characters. It might be + * worthwhile to implement a dichotomy search if this shows up + * under a profiler. + */ + int len = min(qlen, static_cast(s.length())); + while (len >= 0 && toqstr(s.substr(0, len)).length() != qlen) + --len; + LASSERT(len > 0 || qlen == 0, /**/); + s = s.substr(0, len); +#endif return true; } diff --git a/status.22x b/status.22x index 49b351d26d..2719c6a126 100644 --- a/status.22x +++ b/status.22x @@ -118,6 +118,9 @@ What's new - Fix selection painting in right-to-left texts (bug 10424). +- Fix cursor movement when the document contains high-plane Unicode + characters (bug 10443). + - Allow using colors supported by xcolor inside mathed (bug 10417). - Change description of "frame of button" color to "button frame" (bug 10135). -- 2.39.5