From: Günter Milde <milde@lyx.org>
Date: Sun, 21 Jan 2018 18:55:27 +0000 (+0100)
Subject: Fix preamble-code removal in lyx2lyx. Do some optimizations.
X-Git-Tag: lyx-2.4.0dev-acb2ca7b~3988
X-Git-Url: https://git.lyx.org/gitweb/?a=commitdiff_plain;h=8e825de4b27f02ed2f24cb73ef84c2d6d2260278;p=features.git

Fix preamble-code removal in lyx2lyx. Do some optimizations.

Fix failure of revert_dashes() found by lyx2lyx ctests
using an efficient function to find a given sequence of lines
in a list of lines.

Some optimizations using Python idioms instead of C-like code.
---

diff --git a/lib/lyx2lyx/lyx_2_2.py b/lib/lyx2lyx/lyx_2_2.py
index 2fb00c9a6c..a4c899079e 100644
--- a/lib/lyx2lyx/lyx_2_2.py
+++ b/lib/lyx2lyx/lyx_2_2.py
@@ -29,14 +29,14 @@ import sys, os
 #  find_token_backwards, is_in_inset, get_value, get_quoted_value, \
 #  del_token, check_token, get_option_value
 
-from lyx2lyx_tools import add_to_preamble, put_cmd_in_ert, get_ert, lyx2latex, \
-  lyx2verbatim, length_in_bp, convert_info_insets
-#  insert_to_preamble, latex_length, revert_flex_inset, \
-#  revert_font_attrs, hex2ratio, str2bool
+from lyx2lyx_tools import (add_to_preamble, put_cmd_in_ert, get_ert,
+    lyx2latex, lyx2verbatim, length_in_bp, convert_info_insets)
+#   insert_to_preamble, latex_length, revert_flex_inset,
+#   revert_font_attrs, hex2ratio, str2bool
 
-from parser_tools import find_token, find_token_backwards, find_re, \
-     find_end_of_inset, find_end_of_layout, find_nonempty_line, \
-     get_containing_layout, get_value, check_token
+from parser_tools import (find_end_of_inset, find_end_of_layout,
+    find_nonempty_line, find_re, find_slice, find_token, find_token_backwards,
+    get_containing_layout, get_value, check_token)
 
 ####################################################################
 # Private helper functions
@@ -706,24 +706,20 @@ def revert_dashes(document):
     Remove preamble code from 2.3->2.2 conversion.
     """
     # Remove preamble code from 2.3->2.2 conversion:
-    for i, line in enumerate(document.preamble):
-        if (line == '% Added by lyx2lyx' and
-            document.preamble[i+1] == r'\renewcommand{\textendash}{--}' and
-            document.preamble[i+2] == r'\renewcommand{\textemdash}{---}'):
-            del document.preamble[i:i+3]
-            break
+    dash_renew_lines = find_slice(document.preamble,
+                                  ['% Added by lyx2lyx',
+                                   r'\renewcommand{\textendash}{--}',
+                                   r'\renewcommand{\textemdash}{---}'])
+    del(document.preamble[dash_renew_lines])
     # Prevent ligation of hyphens:
     i = 0
     while i < len(document.body)-1:
         # increment i, skip some insets (cf. convert_dashes)
-        i = _dashes_next_line(document, i) 
+        i = _dashes_next_line(document, i)
         line = document.body[i]
-        while "--" in line:
+        if "--" in line:
             line = line.replace("--", "-\\SpecialChar \\textcompwordmark{}\n-")
-        parts = line.split('\n')
-        if len(parts) > 1:
-            document.body[i:i+1] = parts
-            i += len(parts)-1
+            document.body[i:i+1] = line.split('\n')
     # Convert \twohyphens and \threehyphens:
     i = 0
     while i < len(document.body):
diff --git a/lib/lyx2lyx/lyx_2_3.py b/lib/lyx2lyx/lyx_2_3.py
index a39aaadd09..fc44a11521 100644
--- a/lib/lyx2lyx/lyx_2_3.py
+++ b/lib/lyx2lyx/lyx_2_3.py
@@ -24,9 +24,10 @@ import sys, os
 
 # Uncomment only what you need to import, please.
 
-from parser_tools import find_end_of, find_token_backwards, find_end_of_layout, \
-    find_token, find_end_of_inset, get_value,  get_bool_value, \
-    get_containing_layout, get_quoted_value, del_token, find_re
+from parser_tools import del_token, find_end_of, find_end_of_layout, \
+    find_end_of_inset, find_re, find_slice, find_token, \
+    find_token_backwards, get_containing_layout, \
+    get_bool_value, get_value, get_quoted_value
 #  find_tokens, find_token_exact, is_in_inset, \
 #  check_token, get_option_value
 
@@ -1843,17 +1844,18 @@ def revert_chapterbib(document):
 def convert_dashligatures(document):
     "Set 'use_dash_ligatures' according to content."
     use_dash_ligatures = None
-    # eventually remove preamble code from 2.3->2.2 conversion:
-    for i, line in enumerate(document.preamble):
-        if i > 1 and line == r'\renewcommand{\textemdash}{---}':
-            if (document.preamble[i-1] == r'\renewcommand{\textendash}{--}'
-                and document.preamble[i-2] == '% Added by lyx2lyx'):
-                del document.preamble[i-2:i+1]
-                use_dash_ligatures = True
+    # Eventually remove preamble code from 2.3->2.2 conversion:
+    dash_renew_lines = find_slice(document.preamble,
+                                  ['% Added by lyx2lyx',
+                                   r'\renewcommand{\textendash}{--}',
+                                   r'\renewcommand{\textemdash}{---}'])
+    del(document.preamble[dash_renew_lines])
+    use_dash_ligatures = bool(dash_renew_lines.stop)
+
     if use_dash_ligatures is None:
         # Look for dashes:
         # (Documents by LyX 2.1 or older have "\twohyphens\n" or "\threehyphens\n"
-        # as interim representation for dash ligatures in 2.2.)
+        # as interim representation for dash ligatures)
         has_literal_dashes = False
         has_ligature_dashes = False
         j = 0
@@ -1882,9 +1884,8 @@ def convert_dashligatures(document):
                          flags=re.UNICODE):
                 has_literal_dashes = True
             # ligature dash followed by word or no-break space on next line:
-            if re.search(u"(\\\\twohyphens|\\\\threehyphens)", line,
-                            flags=re.UNICODE) and re.match(u"[\w\u00A0]",
-                            document.body[i+1], flags=re.UNICODE):
+            if (re.search(r"(\\twohyphens|\\threehyphens)", line) and
+                re.match(u"[\w\u00A0]", document.body[i+1], flags=re.UNICODE)):
                 has_ligature_dashes = True
         if has_literal_dashes and has_ligature_dashes:
             # TODO: insert a warning note in the document?
@@ -1920,11 +1921,10 @@ def revert_dashligatures(document):
         if (i < j) or line.startswith("\\labelwidthstring"):
             new_body.append(line)
             continue
-        words = line.split()
-        if (len(words) > 1 and words[0] == "\\begin_inset"
-            and (words[1] in ["CommandInset", "ERT", "External", "Formula",
-                              "FormulaMacro", "Graphics", "IPA", "listings"]
-                 or ' '.join(words[1:]) == "Flex Code")):
+        if (line.startswith("\\begin_inset ") and
+            line[13:].split()[0] in ["CommandInset", "ERT", "External",
+                "Formula", "FormulaMacro", "Graphics", "IPA", "listings"]
+            or line == "\\begin_inset Flex Code"):
             j = find_end_of_inset(document.body, i)
             if j == -1:
                 document.warning("Malformed LyX document: Can't find end of "
diff --git a/lib/lyx2lyx/parser_tools.py b/lib/lyx2lyx/parser_tools.py
index caa8ecca4a..44ac5d9045 100644
--- a/lib/lyx2lyx/parser_tools.py
+++ b/lib/lyx2lyx/parser_tools.py
@@ -18,7 +18,7 @@
 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 
 
-'''
+"""
 This module offers several free functions to help parse lines.
 More documentaton is below, but here is a quick guide to what
 they do. Optional arguments are marked by brackets.
@@ -152,18 +152,68 @@ is_nonempty_line(line):
 count_pars_in_inset(lines, i):
   Counts the paragraphs inside an inset.
 
-'''
+"""
 
 import re
 
+# Fast search in lists
+def find_slice(l, sl, start = 0, stop = None):
+    """Return position of first occurence of sequence `sl` in list `l`
+    as a `slice` object.
+
+    >>> find_slice([1, 2, 3, 1, 1, 2], (1, 2))
+    slice(0, 2, None)
+
+    The return value can be used to delete or substitute the sub-list:
+
+    >>> l = [1, 0, 1, 1, 1, 2]
+    >>> s = find_slice(l, [0, 1, 1])
+    >>> del(l[s]); l
+    [1, 1, 2]
+    >>> s = find_slice(l, (1, 2))
+    >>> l[s] = [3]; l
+    [1, 3]
+
+    The start argument works similar to list.index()
+
+    >>> find_slice([1, 2, 3, 1, 1 ,2], (1, 2), start = 1)
+    slice(4, 6, None)
+
+    Use the `stop` attribute of the returned `slice` to test for success:
+
+    >>> s1 = find_slice([2, 3, 1], (3, 1))
+    >>> s2 = find_slice([2, 3, 1], (2, 1))
+    >>> if s1.stop and not s2.stop:
+    ...     print "wow"
+    wow
+    """
+    stop = stop or len(l)
+    N = len(sl) # lenght of sub-list
+    try:
+        while True:
+            for j, value in enumerate(sl):
+                i = l.index(value, start, stop)
+                if j and i != start:
+                    start = i-j
+                    break
+                start = i +1
+            else:
+                return slice(i+1-N, i+1)
+    except ValueError: # sub list `sl` not found
+        return slice(0, 0)
+
+
 # Utilities for one line
 def check_token(line, token):
     """ check_token(line, token) -> bool
 
     Return True if token is present in line and is the first element
-    else returns False."""
+    else returns False.
+
+    Deprecated. Use line.startswith(token).
+    """
 
-    return line[:len(token)] == token
+    return line.startswith(token)
 
 
 def is_nonempty_line(line):
@@ -171,7 +221,7 @@ def is_nonempty_line(line):
 
     Return False if line is either empty or it has only whitespaces,
     else return True."""
-    return line != " "*len(line)
+    return bool(line.strip())
 
 
 # Utilities for a list of lines