]> git.lyx.org Git - lyx.git/commitdiff
Fix lyx2lyx dash conversion and make it faster.
authorGünter Milde <milde@lyx.org>
Wed, 24 Jan 2018 16:38:19 +0000 (17:38 +0100)
committerGünter Milde <milde@lyx.org>
Wed, 24 Jan 2018 16:38:19 +0000 (17:38 +0100)
lib/lyx2lyx/lyx_2_1.py
lib/lyx2lyx/lyx_2_2.py
lib/lyx2lyx/lyx_2_3.py

index 84b057af2a3a0c109492ffb229002a457d10a0dd..c8500c779e51632109ec343a8b88eb889d52de63 100644 (file)
@@ -1557,10 +1557,11 @@ def convert_latexargs(document):
                     "theorems-chap-bytype", "theorems-chap", "theorems-named", "theorems-sec-bytype",
                     "theorems-sec", "theorems-starred", "theorems-std", "todonotes"]
     # Modules we need to take care of
-    caveat_modules = ["initials"]
+    caveat_modules = ["initials"] # TODO: , "graphicboxes", "bicaption"]
     # information about the relevant styles in caveat_modules (number of opt and req args)
     # use this if we get more caveat_modules. For now, use hard coding (see below).
     # initials = [{'Layout' : 'Initial', 'opt' : 1, 'req' : 1}]
+    # graphicboxes = { ... }
 
     # Is this a known safe layout?
     safe_layout = document.textclass in safe_layouts
index b1b3a728ef7c7ef2c7190cc5b7cdcd2718a42add..664dd6c3f7a4be214d01b231d0aacb4b53551bd8 100644 (file)
@@ -37,7 +37,7 @@ from lyx2lyx_tools import (add_to_preamble, put_cmd_in_ert, get_ert,
 from parser_tools import (check_token, del_complete_lines,
     find_end_of_inset, find_end_of_layout, find_nonempty_line, find_re,
     find_token, find_token_backwards, get_containing_layout,
-    get_value, is_in_inset)
+    get_containing_inset, get_value, is_in_inset)
 
 
 ####################################################################
@@ -622,41 +622,40 @@ def convert_dashes(document):
     while i+1 < len(lines):
         i += 1
         line = lines[i]
-        words = line.split()
-        if (len(words) > 1 and words[0] == "\\begin_inset"
-            and (words[1] in ["CommandInset", "ERT", "External", "Formula",
-                              "FormulaMacro", "Graphics", "IPA", "listings"]
-                 or line.endswith("Flex Code"))):
-            # must not replace anything in insets that store LaTeX contents in .lyx files
-            # (math and command insets without overridden read() and write() methods
-            # filtering out IPA makes Text::readParToken() more simple
-            # skip ERT as well since it is not needed there
-            # Flex Code is logical markup, typically rendered as typewriter
-            j = find_end_of_inset(lines, i)
-            if j == -1:
-                document.warning("Malformed LyX document: Can't find end of " +
-                                 words[1] + " inset at line " + str(i))
-            else:
-                i = j
-            continue
-        if lines[i] == "\\begin_layout LyX-Code":
-            j = find_end_of_layout(lines, i)
-            if j == -1:
-                document.warning("Malformed LyX document: "
-                    "Can't find end of %s layout at line %d" % (words[1],i))
-            else:
-                i = j
+        if "--" not in line:
             continue
+        # skip label width string (bug 10243):
         if line.startswith("\\labelwidthstring"):
-            # skip label width string (bug 10243)
             continue
-
-        if "--" in line:
-            # We can have an arbitrary number of consecutive hyphens.
-            # Replace as LaTeX does: First try emdash, then endash
-            line = line.replace("---", "\\threehyphens\n")
-            line = line.replace("--", "\\twohyphens\n")
-            lines[i:i+1] = line.splitlines()
+        # Do not touch hyphens in some insets:
+        try:
+            value, start, end = get_containing_inset(lines, i)
+        except TypeError:
+            # False means no (or malformed) containing inset
+            value, start, end = "no inset", -1, -1
+        # We must not replace anything in insets that store LaTeX contents in .lyx files
+        # (math and command insets without overridden read() and write() methods.
+        # Filtering out IPA and ERT makes Text::readParToken() more simple,
+        # Flex Code is logical markup, typically rendered as typewriter
+        if (value.split()[0] in ["CommandInset", "ERT", "External", "Formula",
+                                 "FormulaMacro", "Graphics", "IPA", "listings"]
+            or value in ["Flex Code", "Flex URL"]):
+            i = end
+            continue
+        try:
+            layout, start, end, j = get_containing_layout(lines, i)
+        except TypeError: # no (or malformed) containing layout
+            document.warning("Malformed LyX document: "
+                             "Can't find layout at line %d" % i)
+            continue
+        if layout == "LyX-Code":
+            i = end
+            continue
+        # We can have an arbitrary number of consecutive hyphens.
+        # Replace as LaTeX does: First try emdash, then endash
+        line = line.replace("---", "\\threehyphens\n")
+        line = line.replace("--", "\\twohyphens\n")
+        lines[i:i+1] = line.splitlines()
 
     # remove ligature breaks between dashes
     i = 1
@@ -672,40 +671,40 @@ def convert_dashes(document):
 
 def revert_dashes(document):
     """
+    Remove preamble code from 2.3->2.2 conversion.
     Prevent ligatures of existing --- and --.
     Revert \\twohyphens and \\threehyphens to -- and ---.
-    Remove preamble code from 2.3->2.2 conversion.
     """
     del_complete_lines(document.preamble,
                        ['% Added by lyx2lyx',
                         r'\renewcommand{\textendash}{--}',
                         r'\renewcommand{\textemdash}{---}'])
+
     # Insert ligature breaks to prevent ligation of hyphens to dashes:
     lines = document.body
     i = 0
     while i+1 < len(lines):
         i += 1
         line = lines[i]
+        if "--" not in line:
+            continue
         # skip label width string (bug 10243):
         if line.startswith("\\labelwidthstring"):
             continue
         # do not touch hyphens in some insets (cf. convert_dashes):
-        if line.startswith("\\begin_inset"):
-            try:
-                if line.split()[1] in ["CommandInset", "ERT", "External",
-                                       "Formula", "FormulaMacro", "Graphics",
-                                       "IPA", "listings"]:
-                    j = find_end_of_inset(lines, i)
-                    if j == -1:
-                        document.warning("Malformed LyX document: Can't find "
-                                    "end of %s inset at line %d." % (itype, i))
-                        continue
-                    i = j
-            except IndexError:
-                continue
-        if "--" in line:
-            line = line.replace("--", "-\\SpecialChar \\textcompwordmark{}\n-")
-            document.body[i:i+1] = line.split('\n')
+        try:
+            value, start, end = get_containing_inset(lines, i)
+        except TypeError:
+            # False means no (or malformed) containing inset
+            value, start, end = "no inset", -1, -1
+        if (value.split()[0] in ["CommandInset", "ERT", "External", "Formula",
+                                 "FormulaMacro", "Graphics", "IPA", "listings"]
+            or value == "Flex URL"):
+            i = end
+            continue
+        line = line.replace("--", "-\\SpecialChar \\textcompwordmark{}\n-")
+        document.body[i:i+1] = line.split('\n')
+
     # Revert \twohyphens and \threehyphens:
     i = 1
     while i < len(lines):
index 230909dfb8539565a5334145a7888926ff489c23..310de3889990a776224db5ef9abd023c07996af1 100644 (file)
@@ -26,7 +26,7 @@ import sys, os
 
 from parser_tools import (del_token, del_value, del_complete_lines,
     find_complete_lines, find_end_of, find_end_of_layout, find_end_of_inset,
-    find_re, find_token, find_token_backwards,
+    find_re, find_token, find_token_backwards, get_containing_inset,
     get_containing_layout, get_bool_value, get_value, get_quoted_value)
 #  find_tokens, find_token_exact, is_in_inset,
 #  check_token, get_option_value
@@ -1853,46 +1853,54 @@ def convert_dashligatures(document):
     if use_dash_ligatures is None:
         # Look for dashes (Documents by LyX 2.1 or older have "\twohyphens\n"
         # or "\threehyphens\n" as interim representation for -- an ---.)
-        has_literal_dashes = False
-        has_ligature_dashes = False
-        j = 0
-        for i, line in enumerate(document.body):
-            # Skip some document parts where dashes are not converted
-            if (i < j) or line.startswith("\\labelwidthstring"):
+        lines = document.body
+        has_literal_dashes = has_ligature_dashes = False
+        i = j = 0
+        while i+1 < len(lines):
+            i += 1
+            line = lines[i]
+            # skip lines without any dashes:
+            if not re.search(u"[\u2013\u2014]|\\twohyphens|\\threehyphens", line):
                 continue
-            if line.startswith("\\begin_inset"):
-                try:
-                    it = line.split()[1]
-                except IndexError:
-                    continue
-                if (it in ["CommandInset", "ERT", "External", "Formula",
-                           "FormulaMacro", "Graphics", "IPA", "listings"]
-                    or line.endswith("Flex Code")):
-                    j = find_end_of_inset(document.body, i)
-                    if j == -1:
-                        document.warning("Malformed LyX document: Can't "
-                            "find end of %s inset at line %d." % (itype, i))
-                        continue
-            if line == "\\begin_layout LyX-Code":
-                j = find_end_of_layout(document.body, i)
-                if j == -1:
-                    document.warning("Malformed LyX document: "
-                       "Can't find end of %s layout at line %d" % (words[1],i))
+            # skip label width string (see bug 10243):
+            if line.startswith("\\labelwidthstring"):
                 continue
+            # do not touch hyphens in some insets (cf. lyx_2_2.convert_dashes):
+            try:
+                value, start, end = get_containing_inset(lines, i)
+            except TypeError: # no containing inset
+                value, start, end = "no inset", -1, -1
+            if (value.split()[0] in
+                ["CommandInset", "ERT", "External", "Formula",
+                 "FormulaMacro", "Graphics", "IPA", "listings"]
+                or value == "Flex Code"):
+                i = end
+                continue
+            try:
+                layout, start, end, j = get_containing_layout(lines, i)
+            except TypeError: # no (or malformed) containing layout
+                document.warning("Malformed LyX document: "
+                                "Can't find layout at line %d" % i)
+                continue
+            if layout == "LyX-Code":
+                i = end
+                continue
+
             # literal dash followed by a word or no-break space:
             if re.search(u"[\u2013\u2014]([\w\u00A0]|$)", line,
                          flags=re.UNICODE):
                 has_literal_dashes = True
             # ligature dash followed by word or no-break space on next line:
             if (re.search(r"(\\twohyphens|\\threehyphens)", line) and
-                re.match(u"[\w\u00A0]", document.body[i+1], flags=re.UNICODE)):
+                re.match(u"[\w\u00A0]", lines[i+1], flags=re.UNICODE)):
                 has_ligature_dashes = True
-        if has_literal_dashes and has_ligature_dashes:
-            # TODO: insert a warning note in the document?
-            document.warning('This document contained both literal and '
-                '"ligature" dashes.\n Line breaks may have changed. '
-                'See UserGuide chapter 3.9.1 for details.')
-        elif has_literal_dashes:
+            if has_literal_dashes and has_ligature_dashes:
+                # TODO: insert a warning note in the document?
+                document.warning('This document contained both literal and '
+                                 '"ligature" dashes.\n Line breaks may have changed. '
+                                 'See UserGuide chapter 3.9.1 for details.')
+                break
+        if has_literal_dashes:
             use_dash_ligatures = False
         elif has_ligature_dashes:
             use_dash_ligatures = True