From: Günter Milde Date: Sun, 21 Jan 2018 18:55:27 +0000 (+0100) Subject: Fix preamble-code removal in lyx2lyx. Do some optimizations. X-Git-Tag: lyx-2.4.0dev-acb2ca7b~3988 X-Git-Url: https://git.lyx.org/gitweb/?a=commitdiff_plain;h=8e825de4b27f02ed2f24cb73ef84c2d6d2260278;p=features.git Fix preamble-code removal in lyx2lyx. Do some optimizations. Fix failure of revert_dashes() found by lyx2lyx ctests using an efficient function to find a given sequence of lines in a list of lines. Some optimizations using Python idioms instead of C-like code. --- diff --git a/lib/lyx2lyx/lyx_2_2.py b/lib/lyx2lyx/lyx_2_2.py index 2fb00c9a6c..a4c899079e 100644 --- a/lib/lyx2lyx/lyx_2_2.py +++ b/lib/lyx2lyx/lyx_2_2.py @@ -29,14 +29,14 @@ import sys, os # find_token_backwards, is_in_inset, get_value, get_quoted_value, \ # del_token, check_token, get_option_value -from lyx2lyx_tools import add_to_preamble, put_cmd_in_ert, get_ert, lyx2latex, \ - lyx2verbatim, length_in_bp, convert_info_insets -# insert_to_preamble, latex_length, revert_flex_inset, \ -# revert_font_attrs, hex2ratio, str2bool +from lyx2lyx_tools import (add_to_preamble, put_cmd_in_ert, get_ert, + lyx2latex, lyx2verbatim, length_in_bp, convert_info_insets) +# insert_to_preamble, latex_length, revert_flex_inset, +# revert_font_attrs, hex2ratio, str2bool -from parser_tools import find_token, find_token_backwards, find_re, \ - find_end_of_inset, find_end_of_layout, find_nonempty_line, \ - get_containing_layout, get_value, check_token +from parser_tools import (find_end_of_inset, find_end_of_layout, + find_nonempty_line, find_re, find_slice, find_token, find_token_backwards, + get_containing_layout, get_value, check_token) #################################################################### # Private helper functions @@ -706,24 +706,20 @@ def revert_dashes(document): Remove preamble code from 2.3->2.2 conversion. """ # Remove preamble code from 2.3->2.2 conversion: - for i, line in enumerate(document.preamble): - if (line == '% Added by lyx2lyx' and - document.preamble[i+1] == r'\renewcommand{\textendash}{--}' and - document.preamble[i+2] == r'\renewcommand{\textemdash}{---}'): - del document.preamble[i:i+3] - break + dash_renew_lines = find_slice(document.preamble, + ['% Added by lyx2lyx', + r'\renewcommand{\textendash}{--}', + r'\renewcommand{\textemdash}{---}']) + del(document.preamble[dash_renew_lines]) # Prevent ligation of hyphens: i = 0 while i < len(document.body)-1: # increment i, skip some insets (cf. convert_dashes) - i = _dashes_next_line(document, i) + i = _dashes_next_line(document, i) line = document.body[i] - while "--" in line: + if "--" in line: line = line.replace("--", "-\\SpecialChar \\textcompwordmark{}\n-") - parts = line.split('\n') - if len(parts) > 1: - document.body[i:i+1] = parts - i += len(parts)-1 + document.body[i:i+1] = line.split('\n') # Convert \twohyphens and \threehyphens: i = 0 while i < len(document.body): diff --git a/lib/lyx2lyx/lyx_2_3.py b/lib/lyx2lyx/lyx_2_3.py index a39aaadd09..fc44a11521 100644 --- a/lib/lyx2lyx/lyx_2_3.py +++ b/lib/lyx2lyx/lyx_2_3.py @@ -24,9 +24,10 @@ import sys, os # Uncomment only what you need to import, please. -from parser_tools import find_end_of, find_token_backwards, find_end_of_layout, \ - find_token, find_end_of_inset, get_value, get_bool_value, \ - get_containing_layout, get_quoted_value, del_token, find_re +from parser_tools import del_token, find_end_of, find_end_of_layout, \ + find_end_of_inset, find_re, find_slice, find_token, \ + find_token_backwards, get_containing_layout, \ + get_bool_value, get_value, get_quoted_value # find_tokens, find_token_exact, is_in_inset, \ # check_token, get_option_value @@ -1843,17 +1844,18 @@ def revert_chapterbib(document): def convert_dashligatures(document): "Set 'use_dash_ligatures' according to content." use_dash_ligatures = None - # eventually remove preamble code from 2.3->2.2 conversion: - for i, line in enumerate(document.preamble): - if i > 1 and line == r'\renewcommand{\textemdash}{---}': - if (document.preamble[i-1] == r'\renewcommand{\textendash}{--}' - and document.preamble[i-2] == '% Added by lyx2lyx'): - del document.preamble[i-2:i+1] - use_dash_ligatures = True + # Eventually remove preamble code from 2.3->2.2 conversion: + dash_renew_lines = find_slice(document.preamble, + ['% Added by lyx2lyx', + r'\renewcommand{\textendash}{--}', + r'\renewcommand{\textemdash}{---}']) + del(document.preamble[dash_renew_lines]) + use_dash_ligatures = bool(dash_renew_lines.stop) + if use_dash_ligatures is None: # Look for dashes: # (Documents by LyX 2.1 or older have "\twohyphens\n" or "\threehyphens\n" - # as interim representation for dash ligatures in 2.2.) + # as interim representation for dash ligatures) has_literal_dashes = False has_ligature_dashes = False j = 0 @@ -1882,9 +1884,8 @@ def convert_dashligatures(document): flags=re.UNICODE): has_literal_dashes = True # ligature dash followed by word or no-break space on next line: - if re.search(u"(\\\\twohyphens|\\\\threehyphens)", line, - flags=re.UNICODE) and re.match(u"[\w\u00A0]", - document.body[i+1], flags=re.UNICODE): + if (re.search(r"(\\twohyphens|\\threehyphens)", line) and + re.match(u"[\w\u00A0]", document.body[i+1], flags=re.UNICODE)): has_ligature_dashes = True if has_literal_dashes and has_ligature_dashes: # TODO: insert a warning note in the document? @@ -1920,11 +1921,10 @@ def revert_dashligatures(document): if (i < j) or line.startswith("\\labelwidthstring"): new_body.append(line) continue - words = line.split() - if (len(words) > 1 and words[0] == "\\begin_inset" - and (words[1] in ["CommandInset", "ERT", "External", "Formula", - "FormulaMacro", "Graphics", "IPA", "listings"] - or ' '.join(words[1:]) == "Flex Code")): + if (line.startswith("\\begin_inset ") and + line[13:].split()[0] in ["CommandInset", "ERT", "External", + "Formula", "FormulaMacro", "Graphics", "IPA", "listings"] + or line == "\\begin_inset Flex Code"): j = find_end_of_inset(document.body, i) if j == -1: document.warning("Malformed LyX document: Can't find end of " diff --git a/lib/lyx2lyx/parser_tools.py b/lib/lyx2lyx/parser_tools.py index caa8ecca4a..44ac5d9045 100644 --- a/lib/lyx2lyx/parser_tools.py +++ b/lib/lyx2lyx/parser_tools.py @@ -18,7 +18,7 @@ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -''' +""" This module offers several free functions to help parse lines. More documentaton is below, but here is a quick guide to what they do. Optional arguments are marked by brackets. @@ -152,18 +152,68 @@ is_nonempty_line(line): count_pars_in_inset(lines, i): Counts the paragraphs inside an inset. -''' +""" import re +# Fast search in lists +def find_slice(l, sl, start = 0, stop = None): + """Return position of first occurence of sequence `sl` in list `l` + as a `slice` object. + + >>> find_slice([1, 2, 3, 1, 1, 2], (1, 2)) + slice(0, 2, None) + + The return value can be used to delete or substitute the sub-list: + + >>> l = [1, 0, 1, 1, 1, 2] + >>> s = find_slice(l, [0, 1, 1]) + >>> del(l[s]); l + [1, 1, 2] + >>> s = find_slice(l, (1, 2)) + >>> l[s] = [3]; l + [1, 3] + + The start argument works similar to list.index() + + >>> find_slice([1, 2, 3, 1, 1 ,2], (1, 2), start = 1) + slice(4, 6, None) + + Use the `stop` attribute of the returned `slice` to test for success: + + >>> s1 = find_slice([2, 3, 1], (3, 1)) + >>> s2 = find_slice([2, 3, 1], (2, 1)) + >>> if s1.stop and not s2.stop: + ... print "wow" + wow + """ + stop = stop or len(l) + N = len(sl) # lenght of sub-list + try: + while True: + for j, value in enumerate(sl): + i = l.index(value, start, stop) + if j and i != start: + start = i-j + break + start = i +1 + else: + return slice(i+1-N, i+1) + except ValueError: # sub list `sl` not found + return slice(0, 0) + + # Utilities for one line def check_token(line, token): """ check_token(line, token) -> bool Return True if token is present in line and is the first element - else returns False.""" + else returns False. + + Deprecated. Use line.startswith(token). + """ - return line[:len(token)] == token + return line.startswith(token) def is_nonempty_line(line): @@ -171,7 +221,7 @@ def is_nonempty_line(line): Return False if line is either empty or it has only whitespaces, else return True.""" - return line != " "*len(line) + return bool(line.strip()) # Utilities for a list of lines