From d6337a248847aa4c72f8a813d92d349d1035c79f Mon Sep 17 00:00:00 2001 From: Jean-Marc Lasgouttes Date: Fri, 13 Jun 2014 16:33:58 +0200 Subject: [PATCH] Remove obsolete code to handle Hebrew and Arabic characters This is handled by Qt now. Note that a small optimization (do not draw text that is to the left of WorkArea) is removed because it cannot be guaranteed to be exact anymore. It was probably not very useful anyway, and would become useless once the RowPainter is rewritten to use Row information. Update 00README_STR_METRICS_BRANCH. --- 00README_STR_METRICS_BRANCH | 26 +++-- src/Encoding.cpp | 221 ------------------------------------ src/Encoding.h | 21 ---- src/Paragraph.cpp | 40 ------- src/Paragraph.h | 2 - src/ParagraphMetrics.cpp | 24 +--- src/rowpainter.cpp | 10 +- 7 files changed, 27 insertions(+), 317 deletions(-) diff --git a/00README_STR_METRICS_BRANCH b/00README_STR_METRICS_BRANCH index 2dc3cadd11..66686237c3 100644 --- a/00README_STR_METRICS_BRANCH +++ b/00README_STR_METRICS_BRANCH @@ -16,6 +16,7 @@ Currently everything is supposed to work for both LTR and RTL text. The bugs fixed and caused by this branch are tracked at ticket #9003: http://www.lyx.org/trac/ticket/9003 + What is done: * Make TextMetrics methods operate on Row objects: breakRow and @@ -38,10 +39,7 @@ What is done: * Implement proper string metrics computation (with cache). Remove useless workarounds which disable kerning and ligatures. -* Draw also RtL text string-wise. This both speeds-up drawing and - prepares for code removal, since we now rely on Qt to do things we - use to do by ourselves (see isArabic* and isHebrew* code in - Encodings.cpp). +* Draw also RtL text string-wise. This speeds-up drawing. * Do not cut strings at separators in RowPainter when text is not justified. This speeds-up painting by reducing the number of strings @@ -51,16 +49,19 @@ What is done: ligature/kerning breaking in latin text, and bad rendering problems in Arabic. +* Remove homebrew Arabic and Hebrew support from Encoding.cpp. We now + rely on Qt to do handle complex scripts. -Next steps: -* Fix bugs uncovered by testing. +Next steps: * Get rid of LyXRC::force_paint_single_char, which is not used anymore in the source. * Maybe get rid of LyXRC::rtl_support, which does not have a real use case. +* Fix bugs uncovered by testing. + * Profile and see how performance can be improved. @@ -70,9 +71,14 @@ Steps for later (aka out of the scope of this branch): in principle, but the code is intricate and needs some careful analysis. The first thing that needs to be done is to break row elements with the same criteria. Currently breakRow does not - consider on-the-fly spell-checking and selection changes, but it is - not clear to me that it is required. Moreover, this thing would only - work if we are sure that the Row object is up-to-date when drawing - happens. This depends on the update machinery. + consider on-the-fly spell-checking, but it is not clear to me that + it is required. Moreover, this thing would only work if we are sure + that the Row object is up-to-date when drawing happens. This depends + on the update machinery. This would allow to get rid of the Bidi.cpp code. + +* Change Row object to operate only on integers and not doubles. Then, + rewrite the computation of spacing in justified paragraphs so that + strings can be drawn without cutting at separators. This will improve + performance. diff --git a/src/Encoding.cpp b/src/Encoding.cpp index a6f6a60ea3..e4bbbc044a 100644 --- a/src/Encoding.cpp +++ b/src/Encoding.cpp @@ -42,190 +42,6 @@ Encodings::MathSymbolSet Encodings::mathsym; namespace { -char_type arabic_table[172][4] = { - {0xfe80, 0xfe80, 0xfe80, 0xfe80}, // 0x0621 = hamza - {0xfe81, 0xfe82, 0xfe81, 0xfe82}, // 0x0622 = ligature madda on alef - {0xfe83, 0xfe84, 0xfe83, 0xfe84}, // 0x0623 = ligature hamza on alef - {0xfe85, 0xfe86, 0xfe85, 0xfe86}, // 0x0624 = ligature hamza on waw - {0xfe87, 0xfe88, 0xfe87, 0xfe88}, // 0x0625 = ligature hamza under alef - {0xfe89, 0xfe8a, 0xfe8b, 0xfe8c}, // 0x0626 = ligature hamza on ya - {0xfe8d, 0xfe8e, 0xfe8d, 0xfe8e}, // 0x0627 = alef - {0xfe8f, 0xfe90, 0xfe91, 0xfe92}, // 0x0628 = baa - {0xfe93, 0xfe94, 0xfe93, 0xfe94}, // 0x0629 = taa marbuta - {0xfe95, 0xfe96, 0xfe97, 0xfe98}, // 0x062a = taa - {0xfe99, 0xfe9a, 0xfe9b, 0xfe9c}, // 0x062b = thaa - {0xfe9d, 0xfe9e, 0xfe9f, 0xfea0}, // 0x062c = jeem - {0xfea1, 0xfea2, 0xfea3, 0xfea4}, // 0x062d = haa - {0xfea5, 0xfea6, 0xfea7, 0xfea8}, // 0x062e = khaa - {0xfea9, 0xfeaa, 0xfea9, 0xfeaa}, // 0x062f = dal - - {0xfeab, 0xfeac, 0xfeab, 0xfeac}, // 0x0630 = thal - {0xfead, 0xfeae, 0xfead, 0xfeae}, // 0x0631 = ra - {0xfeaf, 0xfeb0, 0xfeaf, 0xfeb0}, // 0x0632 = zain - {0xfeb1, 0xfeb2, 0xfeb3, 0xfeb4}, // 0x0633 = seen - {0xfeb5, 0xfeb6, 0xfeb7, 0xfeb8}, // 0x0634 = sheen - {0xfeb9, 0xfeba, 0xfebb, 0xfebc}, // 0x0635 = sad - {0xfebd, 0xfebe, 0xfebf, 0xfec0}, // 0x0636 = dad - {0xfec1, 0xfec2, 0xfec3, 0xfec4}, // 0x0637 = tah - {0xfec5, 0xfec6, 0xfec7, 0xfec8}, // 0x0638 = zah - {0xfec9, 0xfeca, 0xfecb, 0xfecc}, // 0x0639 = ain - {0xfecd, 0xfece, 0xfecf, 0xfed0}, // 0x063a = ghain - {0, 0, 0, 0}, // 0x063b - {0, 0, 0, 0}, // 0x063c - {0, 0, 0, 0}, // 0x063d - {0, 0, 0, 0}, // 0x063e - {0, 0, 0, 0}, // 0x063f - - {0, 0, 0, 0}, // 0x0640 - {0xfed1, 0xfed2, 0xfed3, 0xfed4}, // 0x0641 = fa - {0xfed5, 0xfed6, 0xfed7, 0xfed8}, // 0x0642 = qaf - {0xfed9, 0xfeda, 0xfedb, 0xfedc}, // 0x0643 = kaf - {0xfedd, 0xfede, 0xfedf, 0xfee0}, // 0x0644 = lam - {0xfee1, 0xfee2, 0xfee3, 0xfee4}, // 0x0645 = meem - {0xfee5, 0xfee6, 0xfee7, 0xfee8}, // 0x0646 = noon - {0xfee9, 0xfeea, 0xfeeb, 0xfeec}, // 0x0647 = ha - {0xfeed, 0xfeee, 0xfeed, 0xfeee}, // 0x0648 = waw - {0xfeef, 0xfef0, 0xfeef, 0xfef0}, // 0x0649 = alef maksura - {0xfef1, 0xfef2, 0xfef3, 0xfef4}, // 0x064a = ya - {0x065b, 0x065b, 0x065b, 0x065b}, // 0x064b = fathatan - {0x065c, 0x065c, 0x065c, 0x065c}, // 0x064c = dammatan - {0x064d, 0x064d, 0x064d, 0x064d}, // 0x064d = kasratan - {0x064e, 0x064e, 0x064e, 0x064e}, // 0x064e = fatha - {0x064f, 0x064f, 0x064f, 0x064f}, // 0x064f = damma - - {0x0650, 0x0650, 0x0650, 0x0650}, // 0x0650 = kasra - {0x0651, 0x0651, 0x0651, 0x0651}, // 0x0651 = shadda - {0x0652, 0x0652, 0x0652, 0x0652}, // 0x0652 = sukun - - {0, 0, 0, 0}, // 0x0653 - {0, 0, 0, 0}, // 0x0654 - {0, 0, 0, 0}, // 0x0655 - {0, 0, 0, 0}, // 0x0656 - {0, 0, 0, 0}, // 0x0657 - {0, 0, 0, 0}, // 0x0658 - {0, 0, 0, 0}, // 0x0659 - {0, 0, 0, 0}, // 0x065a - {0, 0, 0, 0}, // 0x065b - {0, 0, 0, 0}, // 0x065c - {0, 0, 0, 0}, // 0x065d - {0, 0, 0, 0}, // 0x065e - {0, 0, 0, 0}, // 0x065f - {0, 0, 0, 0}, // 0x0660 - {0, 0, 0, 0}, // 0x0661 - {0, 0, 0, 0}, // 0x0662 - {0, 0, 0, 0}, // 0x0663 - {0, 0, 0, 0}, // 0x0664 - {0, 0, 0, 0}, // 0x0665 - {0, 0, 0, 0}, // 0x0666 - {0, 0, 0, 0}, // 0x0667 - {0, 0, 0, 0}, // 0x0668 - {0, 0, 0, 0}, // 0x0669 - {0, 0, 0, 0}, // 0x066a - {0, 0, 0, 0}, // 0x066b - {0, 0, 0, 0}, // 0x066c - {0, 0, 0, 0}, // 0x066d - {0, 0, 0, 0}, // 0x066e - {0, 0, 0, 0}, // 0x066f - {0, 0, 0, 0}, // 0x0670 - {0, 0, 0, 0}, // 0x0671 - {0, 0, 0, 0}, // 0x0672 - {0, 0, 0, 0}, // 0x0673 - {0, 0, 0, 0}, // 0x0674 - {0, 0, 0, 0}, // 0x0675 - {0, 0, 0, 0}, // 0x0676 - {0, 0, 0, 0}, // 0x0677 - {0, 0, 0, 0}, // 0x0678 - {0, 0, 0, 0}, // 0x0679 - {0, 0, 0, 0}, // 0x067a - {0, 0, 0, 0}, // 0x067b - {0, 0, 0, 0}, // 0x067c - {0, 0, 0, 0}, // 0x067d - {0xfb56, 0xfb57, 0xfb58, 0xfb59}, // 0x067e = peh - {0, 0, 0, 0}, // 0x067f - {0, 0, 0, 0}, // 0x0680 - {0, 0, 0, 0}, // 0x0681 - {0, 0, 0, 0}, // 0x0682 - {0, 0, 0, 0}, // 0x0683 - {0, 0, 0, 0}, // 0x0684 - {0, 0, 0, 0}, // 0x0685 - {0xfb7a, 0xfb7b, 0xfb7c, 0xfb7d}, // 0x0686 = tcheh - {0, 0, 0, 0}, // 0x0687 - {0, 0, 0, 0}, // 0x0688 - {0, 0, 0, 0}, // 0x0689 - {0, 0, 0, 0}, // 0x068a - {0, 0, 0, 0}, // 0x068b - {0, 0, 0, 0}, // 0x068c - {0, 0, 0, 0}, // 0x068d - {0, 0, 0, 0}, // 0x068e - {0, 0, 0, 0}, // 0x068f - {0, 0, 0, 0}, // 0x0690 - {0, 0, 0, 0}, // 0x0691 - {0, 0, 0, 0}, // 0x0692 - {0, 0, 0, 0}, // 0x0693 - {0, 0, 0, 0}, // 0x0694 - {0, 0, 0, 0}, // 0x0695 - {0, 0, 0, 0}, // 0x0696 - {0, 0, 0, 0}, // 0x0697 - {0xfb8a, 0xfb8b, 0xfb8a, 0xfb8b}, // 0x0698 = jeh - {0, 0, 0, 0}, // 0x0699 - {0, 0, 0, 0}, // 0x069a - {0, 0, 0, 0}, // 0x069b - {0, 0, 0, 0}, // 0x069c - {0, 0, 0, 0}, // 0x069d - {0, 0, 0, 0}, // 0x069e - {0, 0, 0, 0}, // 0x069f - {0, 0, 0, 0}, // 0x06a0 - {0, 0, 0, 0}, // 0x06a1 - {0, 0, 0, 0}, // 0x06a2 - {0, 0, 0, 0}, // 0x06a3 - {0, 0, 0, 0}, // 0x06a4 - {0, 0, 0, 0}, // 0x06a5 - {0, 0, 0, 0}, // 0x06a6 - {0, 0, 0, 0}, // 0x06a7 - {0, 0, 0, 0}, // 0x06a8 - {0xfb8e, 0xfb8f, 0xfb90, 0xfb91}, // 0x06a9 = farsi kaf - {0, 0, 0, 0}, // 0x06aa - {0, 0, 0, 0}, // 0x06ab - {0, 0, 0, 0}, // 0x06ac - {0, 0, 0, 0}, // 0x06ad - {0, 0, 0, 0}, // 0x06ae - {0xfb92, 0xfb93, 0xfb94, 0xfb95}, // 0x06af = gaf - {0, 0, 0, 0}, // 0x06b0 - {0, 0, 0, 0}, // 0x06b1 - {0, 0, 0, 0}, // 0x06b2 - {0, 0, 0, 0}, // 0x06b3 - {0, 0, 0, 0}, // 0x06b4 - {0, 0, 0, 0}, // 0x06b5 - {0, 0, 0, 0}, // 0x06b6 - {0, 0, 0, 0}, // 0x06b7 - {0, 0, 0, 0}, // 0x06b8 - {0, 0, 0, 0}, // 0x06b9 - {0, 0, 0, 0}, // 0x06ba - {0, 0, 0, 0}, // 0x06bb - {0, 0, 0, 0}, // 0x06bc - {0, 0, 0, 0}, // 0x06bd - {0, 0, 0, 0}, // 0x06be - {0, 0, 0, 0}, // 0x06bf - {0, 0, 0, 0}, // 0x06c0 - {0, 0, 0, 0}, // 0x06c1 - {0, 0, 0, 0}, // 0x06c2 - {0, 0, 0, 0}, // 0x06c3 - {0, 0, 0, 0}, // 0x06c4 - {0, 0, 0, 0}, // 0x06c5 - {0, 0, 0, 0}, // 0x06c6 - {0, 0, 0, 0}, // 0x06c7 - {0, 0, 0, 0}, // 0x06c8 - {0, 0, 0, 0}, // 0x06c9 - {0, 0, 0, 0}, // 0x06ca - {0, 0, 0, 0}, // 0x06cb - {0xfbfc, 0xfbfd, 0xfbfe, 0xfbff} // 0x06cc = farsi yeh -}; - - -char_type const arabic_start = 0x0621; -char_type const arabic_end = 0x06cc; - - typedef map CharInfoMap; CharInfoMap unicodesymbols; @@ -711,37 +527,6 @@ docstring Encodings::fromLaTeXCommand(docstring const & cmd, int cmdtype, } -bool Encodings::isHebrewComposeChar(char_type c) -{ - return c <= 0x05c2 && c >= 0x05b0 && c != 0x05be && c != 0x05c0; -} - - -// Special Arabic letters are ones that do not get connected from left -// they are hamza, alef_madda, alef_hamza, waw_hamza, alef_hamza_under, -// alef, tah_marbota, dal, thal, rah, zai, wow, alef_maksoura - -bool Encodings::isArabicSpecialChar(char_type c) -{ - return (c >= 0x0621 && c <= 0x0625) || (c >= 0x0630 && c <= 0x0632) - || c == 0x0627 || c == 0x0629 || c == 0x062f || c == 0x0648 - || c == 0x0649 || c == 0x0698; -} - - -bool Encodings::isArabicComposeChar(char_type c) -{ - return c >= 0x064b && c <= 0x0652; -} - - -bool Encodings::isArabicChar(char_type c) -{ - return c >= arabic_start && c <= arabic_end - && arabic_table[c-arabic_start][0]; -} - - CharInfo const & Encodings::unicodeCharInfo(char_type c) { static CharInfo empty; @@ -750,12 +535,6 @@ CharInfo const & Encodings::unicodeCharInfo(char_type c) } -char_type Encodings::transformChar(char_type c, Encodings::LetterForm form) -{ - return isArabicChar(c) ? arabic_table[c-arabic_start][form] : c; -} - - bool Encodings::isCombiningChar(char_type c) { CharInfoMap::const_iterator const it = unicodesymbols.find(c); diff --git a/src/Encoding.h b/src/Encoding.h index ed9c27e465..7021596ef3 100644 --- a/src/Encoding.h +++ b/src/Encoding.h @@ -255,29 +255,8 @@ public: /// const_iterator end() const { return encodinglist.end(); } - /// - enum LetterForm { - /// - FORM_ISOLATED, - /// - FORM_FINAL, - /// - FORM_INITIAL, - /// - FORM_MEDIAL - }; - /// - static bool isHebrewComposeChar(char_type c); - /// - static bool isArabicComposeChar(char_type c); - /// - static bool isArabicSpecialChar(char_type c); - /// - static bool isArabicChar(char_type c); /// Accessor for the unicode information table. static CharInfo const & unicodeCharInfo(char_type c); - /// - static char_type transformChar(char_type c, LetterForm form); /// Is this a combining char? static bool isCombiningChar(char_type c); /// Return the TIPA shortcut diff --git a/src/Paragraph.cpp b/src/Paragraph.cpp index c33712d416..266c238f38 100644 --- a/src/Paragraph.cpp +++ b/src/Paragraph.cpp @@ -3496,46 +3496,6 @@ bool Paragraph::allowEmpty() const } -char_type Paragraph::transformChar(char_type c, pos_type pos) const -{ - if (!Encodings::isArabicChar(c)) - return c; - - char_type prev_char = ' '; - char_type next_char = ' '; - - for (pos_type i = pos - 1; i >= 0; --i) { - char_type const par_char = d->text_[i]; - if (!Encodings::isArabicComposeChar(par_char)) { - prev_char = par_char; - break; - } - } - - for (pos_type i = pos + 1, end = size(); i < end; ++i) { - char_type const par_char = d->text_[i]; - if (!Encodings::isArabicComposeChar(par_char)) { - next_char = par_char; - break; - } - } - - if (Encodings::isArabicChar(next_char)) { - if (Encodings::isArabicChar(prev_char) && - !Encodings::isArabicSpecialChar(prev_char)) - return Encodings::transformChar(c, Encodings::FORM_MEDIAL); - else - return Encodings::transformChar(c, Encodings::FORM_INITIAL); - } else { - if (Encodings::isArabicChar(prev_char) && - !Encodings::isArabicSpecialChar(prev_char)) - return Encodings::transformChar(c, Encodings::FORM_FINAL); - else - return Encodings::transformChar(c, Encodings::FORM_ISOLATED); - } -} - - bool Paragraph::brokenBiblio() const { // there is a problem if there is no bibitem at position 0 or diff --git a/src/Paragraph.h b/src/Paragraph.h index 77f1260fed..ef99c9ffa4 100644 --- a/src/Paragraph.h +++ b/src/Paragraph.h @@ -431,8 +431,6 @@ public: /// return true if we allow this par to stay empty bool allowEmpty() const; /// - char_type transformChar(char_type c, pos_type pos) const; - /// ParagraphParameters & params(); /// ParagraphParameters const & params() const; diff --git a/src/ParagraphMetrics.cpp b/src/ParagraphMetrics.cpp index 93b0461161..97170bb96a 100644 --- a/src/ParagraphMetrics.cpp +++ b/src/ParagraphMetrics.cpp @@ -215,27 +215,15 @@ int ParagraphMetrics::singleWidth(pos_type pos, Font const & font) const if (Inset const * inset = par_->getInset(pos)) return insetDimension(inset).wid; - char_type c = par_->getChar(pos); + char_type const c = par_->getChar(pos); if (c == '\t') return 4 * theFontMetrics(font).width(' '); - - if (!isPrintable(c)) - return theFontMetrics(font).width(c); - - Language const * language = font.language(); - if (language->rightToLeft()) { - if (language->lang() == "arabic_arabtex" || - language->lang() == "arabic_arabi" || - language->lang() == "farsi") { - if (Encodings::isArabicComposeChar(c)) - return 0; - c = par_->transformChar(c, pos); - } else if (language->lang() == "hebrew" && - Encodings::isHebrewComposeChar(c)) { - return 0; - } - } + + // Note that this function is only called in + // RowPainter::paintText, and only used for characters that do + // not require handling of compose chars or ligatures. It can + // therefore be kept simple. return theFontMetrics(font).width(c); } diff --git a/src/rowpainter.cpp b/src/rowpainter.cpp index b6e7f24f95..ee4cd18c8a 100644 --- a/src/rowpainter.cpp +++ b/src/rowpainter.cpp @@ -774,13 +774,11 @@ void RowPainter::paintText() font_span.last = inlineCompletionVPos; } + // Note that this value will only be used in + // situations where no ligature of composition of + // characters is needed. (see comments alginuses of width_pos). const int width_pos = pm_.singleWidth(pos, font); - if (x_ + width_pos < 0) { - x_ += width_pos; - ++vpos; - continue; - } Change const & change = par_.lookupChange(pos); if (change.changed() && !change_running.changed()) { change_running = change; @@ -815,6 +813,7 @@ void RowPainter::paintText() int const lwidth = theFontMetrics(labelFont()) .width(layout.labelsep); + // width_pos is either the width of a space or an inset x_ += row_.label_hfill + lwidth - width_pos; } @@ -825,6 +824,7 @@ void RowPainter::paintText() if (par_.isSeparator(pos)) { Font const orig_font = text_metrics_.displayFont(pit_, pos); double const orig_x = x_; + // width_pos is the width of a space double separator_width = width_pos; if (pos >= body_pos) separator_width += row_.separator; -- 2.39.5