Fix parentheses with Hebrew

author Juergen Spitzmueller <spitz@lyx.org>

Tue, 30 Oct 2018 11:33:35 +0000 (12:33 +0100)

committer Jean-Marc Lasgouttes <lasgouttes@lyx.org>

Thu, 18 Jun 2020 12:39:50 +0000 (14:39 +0200)
author Juergen Spitzmueller <spitz@lyx.org>
Tue, 30 Oct 2018 11:33:35 +0000 (12:33 +0100)
committer Jean-Marc Lasgouttes <lasgouttes@lyx.org>
Thu, 18 Jun 2020 12:39:50 +0000 (14:39 +0200)
diff --git a/development/FORMAT b/development/FORMAT

index 251e00ec09bed5f829b3ce424493f17d9be08f16..b87d395bc38f82daedf80335b2e9aa6de985a411 100644 (file)
--- a/development/FORMAT
+++ b/development/FORMAT
@@ -8,6 +8,12 @@ changes happened in particular if possible. A good example would be
  -----------------------
  
  
+2018-10-29  Guy Rutenberg <guyrutenberg@gmail.com>
+       * format incremeneted to 566: Fix direction of Hebrew parentheses in the LyX file.
+
+2018-10-18  Kornel Benko <kornel@lyx.org>
+       * format incremented to 565: Added Adobe Source Pro fonts.
+
  2018-09-20  Jürgen Spitzmüller <spitz@lyx.org>
         * format incremented to 564: New info-inset lyxinfo subtype layoutformat. This returns
            the current layout format.
diff --git a/lib/lyx2lyx/lyx2lyx_tools.py b/lib/lyx2lyx/lyx2lyx_tools.py

index cb1996ecb58af99018351c6bdd20fb98bacfb7d5..51412e5b31378aed4d27438b93db03c52dfdeca8 100644 (file)
--- a/lib/lyx2lyx/lyx2lyx_tools.py
+++ b/lib/lyx2lyx/lyx2lyx_tools.py
@@ -83,10 +83,13 @@ insert_document_option(document, option):
  
  remove_document_option(document, option):
    Remove _option_ as a document option.
+
+get_language_for_line(document, i):
+  Return the language setting for line number i.
  '''
  
  import re
-from parser_tools import find_token, find_end_of_inset
+from parser_tools import find_token, find_end_of_inset, get_containing_layout
  from unicode_symbols import unicode_reps
  
  # This will accept either a list of lines or a single line.
@@ -604,3 +607,15 @@ def is_document_option(document, option):
          return False
  
      return True
+
+
+def get_language_for_line(document, i):
+    " Return the language for line number i"
+    layout = get_containing_layout(document.body, i)
+    if not layout:
+        return document.language
+    start_of_par = layout[3]
+    for line in document.body[i:start_of_par:-1]:
+        if line.startswith('\\lang '):
+            return line[len('\\lang '):]
+    return document.language
diff --git a/lib/lyx2lyx/lyx_2_4.py b/lib/lyx2lyx/lyx_2_4.py

index 365ba6fc020240a04fbc336ce338d3d8184532df..b6b626c31696497eb028647a1fede0aa9ade0123 100644 (file)
--- a/lib/lyx2lyx/lyx_2_4.py
+++ b/lib/lyx2lyx/lyx_2_4.py
@@ -36,7 +36,7 @@ from parser_tools import (count_pars_in_inset, find_end_of_inset, find_end_of_la
  #    is_in_inset, set_bool_value
  #    find_tokens, find_token_exact, check_token
  
-from lyx2lyx_tools import (put_cmd_in_ert, add_to_preamble)
+from lyx2lyx_tools import (put_cmd_in_ert, add_to_preamble, get_language_for_line)
  #  revert_font_attrs, insert_to_preamble, latex_length
  #  get_ert, lyx2latex, lyx2verbatim, length_in_bp, convert_info_insets
  #  revert_flex_inset, hex2ratio, str2bool
@@ -1381,6 +1381,22 @@ def revert_lformatinfo(document):
          i = i + 1
  
  
+def convert_hebrew_parentheses(document):
+    " Don't reverse parentheses in Hebrew text"
+    for i, line in enumerate(document.body):
+        if line.startswith('\\\\'):
+            # not a text line, skip
+            continue
+        if get_language_for_line(document, i) == 'hebrew':
+            document.body[i] = line.replace('(','\x00').replace(')','(').replace('\x00',')')
+
+
+def revert_hebrew_parentheses(document):
+    " Store parentheses in Hebrew text reversed"
+    # This only exists to keep the convert/revert nameing convention
+    convert_hebrew_parentheses(document)
+
+
  ##
  # Conversion hub
  #
@@ -1408,9 +1424,11 @@ convert = [
             [563, []],
             [564, []],
             [565, [convert_AdobeFonts]], # Handle adobe fonts in GUI
+           [566, [convert_hebrew_parentheses]],
            ]
  
  revert =  [
+           [565, [revert_hebrew_parentheses]],
             [564, [revert_AdobeFonts]],
             [563, [revert_lformatinfo]],
             [562, [revert_listpargs]],
diff --git a/src/Paragraph.cpp b/src/Paragraph.cpp

index 96f95d721ca857815a8ed6472ecf609e3e942a0e..5bad896dfdf4e13f0a09bc9d1597e52290332e16 100644 (file)
--- a/src/Paragraph.cpp
+++ b/src/Paragraph.cpp
@@ -1998,47 +1998,44 @@ char_type Paragraph::getUChar(BufferParams const & bparams,
  {
         char_type c = d->text_[pos];
  
-       // Return unchanged character in LTR languages.
-       if (!getFontSettings(bparams, pos).isRightToLeft())
+       // Return unchanged character in LTR languages
+       // or if we use poylglossia/bidi.
+       if (rp.use_polyglossia || !getFontSettings(bparams, pos).isRightToLeft())
                 return c;
  
-       // FIXME This is a complete mess due to all the language-specific
-       // special cases. We need to unify this eventually, but this
-       // requires a file format change and some thought.
-       // We also need to unify the input of parentheses in different RTL
-       // languages. Currently, some have their own methods (Arabic:
-       // 18599/lyxsvn, Hebrew: e5f42f67d/lyxgit), some don't (Urdu, Syriac).
-       // Also note that the representation in the LyX file is probably wrong
-       // (see FIXME in TextMetrics::breakRow).
-       // Most likely, we should simply rely on Qt's unicode handling here.
-       string const & lang = getFontSettings(bparams, pos).language()->lang();
+       // Without polyglossia/bidi, we need to account for some special cases.
+       // FIXME This needs to be audited!
+       // Check if:
+       // * The input is as expected for all delimiters
+       //   => checked for Hebrew!
+       // * The output matches the display in the LyX workarea
+       //   => checked for Hebrew!
+       // * The special cases below are really necessary
+       //   => checked for Hebrew!
+       // * In arabic_arabi, brackets are transformed to Arabic
+       //   Ornate Parentheses. Is this is really wanted?
  
-       // With polyglossia, brackets and stuff need not be reversed in RTL scripts
-       // FIXME: The special casing for Hebrew parens is due to the special
-       // handling on input (for Hebrew in e5f42f67d/lyxgit); see #8251.
+       string const & lang = getFontSettings(bparams, pos).language()->lang();
         char_type uc = c;
-       if (rp.use_polyglossia) {
-               switch (c) {
-               case '(':
-                       if (lang == "hebrew")
-                               uc = ')';
-                       break;
-               case ')':
-                       if (lang == "hebrew")
-                               uc = '(';
-                       break;
-               }
-               return uc;
-       }
  
-       // In the following languages, brackets don't need to be reversed.
-       // Furthermore, in arabic_arabi, they are transformed to Arabic
-       // Ornate Parentheses (dunno if this is really wanted)
+       // 1. In the following languages, parentheses need to be reversed.
+       bool const reverseparens = lang == "hebrew";
+
+       // 2. In the following languages, brackets don't need to be reversed.
         bool const reversebrackets = lang != "arabic_arabtex"
                         && lang != "arabic_arabi"
-                       && lang != "farsi"; 
+                       && lang != "farsi";
  
+       // Now swap delimiters if needed.
         switch (c) {
+       case '(':
+               if (reverseparens)
+                       uc = ')';
+               break;
+       case ')':
+               if (reverseparens)
+                       uc = '(';
+               break;
         case '[':
                 if (reversebrackets)
                         uc = ']';
diff --git a/src/TextMetrics.cpp b/src/TextMetrics.cpp

index 2b0903c750d23f1398d462db0ddb8e8956300eb5..a10633842189cae9284d969b4ccdc7b54455bce9 100644 (file)
--- a/src/TextMetrics.cpp
+++ b/src/TextMetrics.cpp
@@ -892,17 +892,8 @@ bool TextMetrics::breakRow(Row & row, int const right_margin) const
                         // ¶ U+00B6 PILCROW SIGN
                         char_type const screen_char = (c == 0x2028) ? 0x2936 : 0x00B6;
                         row.add(i, screen_char, *fi, par.lookupChange(i));
-               } else {
-                       // FIXME: please someone fix the Hebrew/Arabic parenthesis mess!
-                       // see also Paragraph::getUChar.
-                       if (fi->language()->lang() == "hebrew") {
-                               if (c == '(')
-                                       c = ')';
-                               else if (c == ')')
-                                       c = '(';
-                       }
+               } else
                         row.add(i, c, *fi, par.lookupChange(i));
-               }
  
                 // add inline completion width
                 // draw logically behind the previous character
diff --git a/src/version.h b/src/version.h

index 59781fd32c13dd7fe78d2f66ce40501310f1aced..09d0ddc487819c8d8bed754278d5f2686759e6cd 100644 (file)
--- a/src/version.h
+++ b/src/version.h
@@ -32,8 +32,8 @@ extern char const * const lyx_version_info;
  
  // Do not remove the comment below, so we get merge conflict in
  // independent branches. Instead add your own.
-#define LYX_FORMAT_LYX 565 // Kornel: Adobe fonts added
-#define LYX_FORMAT_TEX2LYX 565
+#define LYX_FORMAT_LYX 566 // guyru: Fix parentheses in Hebrew
+#define LYX_FORMAT_TEX2LYX 566
  
  #if LYX_FORMAT_TEX2LYX != LYX_FORMAT_LYX
  #ifndef _MSC_VER
author	Juergen Spitzmueller <spitz@lyx.org>
	Tue, 30 Oct 2018 11:33:35 +0000 (12:33 +0100)
committer	Jean-Marc Lasgouttes <lasgouttes@lyx.org>
	Thu, 18 Jun 2020 12:39:50 +0000 (14:39 +0200)
development/FORMAT		patch \| blob \| history
lib/lyx2lyx/lyx2lyx_tools.py		patch \| blob \| history
lib/lyx2lyx/lyx_2_4.py		patch \| blob \| history
src/Paragraph.cpp		patch \| blob \| history
src/TextMetrics.cpp		patch \| blob \| history
src/version.h		patch \| blob \| history