lyx2lyx fixes and cleanup.

author Günter Milde <milde@lyx.org>

Fri, 9 Feb 2018 15:49:23 +0000 (16:49 +0100)

committer Günter Milde <milde@lyx.org>

Fri, 9 Feb 2018 15:49:23 +0000 (16:49 +0100)
author Günter Milde <milde@lyx.org>
Fri, 9 Feb 2018 15:49:23 +0000 (16:49 +0100)
committer Günter Milde <milde@lyx.org>
Fri, 9 Feb 2018 15:49:23 +0000 (16:49 +0100)
diff --git a/autotests/export/lyx2lyx/lyx_2_3_test2.lyx b/autotests/export/lyx2lyx/lyx_2_3_test2.lyx

index 1d19f2fb33d527574190e4d4c920db5917ccc5be..2740775a5ac03e9e3d6fdf433615de9c75fcc91e 100644 (file)
--- a/autotests/export/lyx2lyx/lyx_2_3_test2.lyx
+++ b/autotests/export/lyx2lyx/lyx_2_3_test2.lyx
@@ -12,7 +12,7 @@ logicalmkup
  \maintain_unincluded_children false
  \language bosnian
  \language_package default
-\inputencoding auto
+\inputencoding utf8
  \fontencoding global
  \font_roman "cochineal" "DejaVu Serif"
  \font_sans "lmss" "default"
@@ -295,5 +295,53 @@ y=x^{2}
  
  \end_layout
  
+\begin_layout Description
+Quote
+\begin_inset space ~
+\end_inset
+
+insets: Plain quote insets 
+\begin_inset Quotes qld
+\end_inset
+
+<file-
+\begin_inset Quotes qls
+\end_inset
+
+name
+\begin_inset Quotes qrs
+\end_inset
+
+>
+\begin_inset Quotes qrd
+\end_inset
+
+ vs.
+ literal quotes "<file-'name'>".
+\end_layout
+
+\begin_deeper
+\begin_layout Verbatim
+
+Quote insets in Verbatim: 
+\begin_inset Quotes cld
+\end_inset
+
+foo
+\begin_inset Quotes frd
+\end_inset
+
+ and 
+\begin_inset Quotes pls
+\end_inset
+
+bar
+\begin_inset Quotes prs
+\end_inset
+
+
+\end_layout
+
+\end_deeper
  \end_body
  \end_document
diff --git a/lib/lyx2lyx/lyx_2_2.py b/lib/lyx2lyx/lyx_2_2.py

index 342bb06119fcb1805390746cfca349a08345f857..cb1731304edfb736155a95baf35654715154e046 100644 (file)
--- a/lib/lyx2lyx/lyx_2_2.py
+++ b/lib/lyx2lyx/lyx_2_2.py
@@ -36,7 +36,7 @@ from lyx2lyx_tools import (add_to_preamble, put_cmd_in_ert, get_ert,
  
  from parser_tools import (check_token, del_complete_lines,
      find_end_of_inset, find_end_of_layout, find_nonempty_line, find_re,
-    find_token, find_token_backwards, get_containing_layout,
+    find_substring, find_token, find_token_backwards, get_containing_layout,
      get_containing_inset, get_quoted_value, get_value, is_in_inset,
      get_bool_value, set_bool_value)
  
@@ -618,19 +618,18 @@ def convert_dashes(document):
      if document.backend != "latex":
          return
  
-    lines = document.body
      i = 0
-    while i+1 < len(lines):
-        i += 1
-        line = lines[i]
-        if "--" not in line:
-            continue
+    while True:
+        i = find_substring(document.body, "--", i+1)
+        if i == -1:
+            break
+        line = document.body[i]
          # skip label width string (bug 10243):
          if line.startswith("\\labelwidthstring"):
              continue
          # Do not touch hyphens in some insets:
          try:
-            value, start, end = get_containing_inset(lines, i)
+            value, start, end = get_containing_inset(document.body, i)
          except TypeError:
              # False means no (or malformed) containing inset
              value, start, end = "no inset", -1, -1
@@ -644,7 +643,7 @@ def convert_dashes(document):
              i = end
              continue
          try:
-            layout, start, end, j = get_containing_layout(lines, i)
+            layout, start, end, j = get_containing_layout(document.body, i)
          except TypeError: # no (or malformed) containing layout
              document.warning("Malformed LyX document: "
                               "Can't find layout at line %d" % i)
@@ -656,18 +655,18 @@ def convert_dashes(document):
          # Replace as LaTeX does: First try emdash, then endash
          line = line.replace("---", "\\threehyphens\n")
          line = line.replace("--", "\\twohyphens\n")
-        lines[i:i+1] = line.splitlines()
+        document.body[i:i+1] = line.split('\n')
  
      # remove ligature breaks between dashes
-    i = 1
-    while i < len(lines):
-        line = lines[i]
-        if (line.endswith(r"-\SpecialChar \textcompwordmark{}") and
-            lines[i+1].startswith("-")):
-            lines[i] = line.replace(r"\SpecialChar \textcompwordmark{}",
-                                    lines.pop(i+1))
-        else:
-            i += 1
+    i = 0
+    while True:
+        i = find_substring(document.body, 
+                           r"-\SpecialChar \textcompwordmark{}", i+1)
+        if i == -1:
+            break
+        if document.body[i+1].startswith("-"):
+            document.body[i] = document.body[i].replace(
+                r"\SpecialChar \textcompwordmark{}", document.body.pop(i+1))
  
  
  def revert_dashes(document):
@@ -682,19 +681,18 @@ def revert_dashes(document):
                          r'\renewcommand{\textemdash}{---}'])
  
      # Insert ligature breaks to prevent ligation of hyphens to dashes:
-    lines = document.body
      i = 0
-    while i+1 < len(lines):
-        i += 1
-        line = lines[i]
-        if "--" not in line:
-            continue
+    while True:
+        i = find_substring(document.body, "--", i+1)
+        if i == -1:
+            break
+        line = document.body[i]
          # skip label width string (bug 10243):
          if line.startswith("\\labelwidthstring"):
              continue
          # do not touch hyphens in some insets (cf. convert_dashes):
          try:
-            value, start, end = get_containing_inset(lines, i)
+            value, start, end = get_containing_inset(document.body, i)
          except TypeError:
              # False means no (or malformed) containing inset
              value, start, end = "no inset", -1, -1
@@ -708,14 +706,14 @@ def revert_dashes(document):
  
      # Revert \twohyphens and \threehyphens:
      i = 1
-    while i < len(lines):
-        line = lines[i]
+    while i < len(document.body):
+        line = document.body[i]
          if not line.endswith("hyphens"):
              i +=1
          elif line.endswith("\\twohyphens") or line.endswith("\\threehyphens"):
              line = line.replace("\\twohyphens", "--")
              line = line.replace("\\threehyphens", "---")
-            lines[i] = line + lines.pop(i+1)
+            document.body[i] = line + document.body.pop(i+1)
          else:
              i += 1
  
diff --git a/lib/lyx2lyx/lyx_2_3.py b/lib/lyx2lyx/lyx_2_3.py

index 7f4ceef6bd632cd19b958d886fa2d20920a5df17..815ac2b5084f5a77e56fcc4b40c3c0baeb573fa6 100644 (file)
--- a/lib/lyx2lyx/lyx_2_3.py
+++ b/lib/lyx2lyx/lyx_2_3.py
@@ -132,7 +132,6 @@ def revert_ibranches(document):
              continue
          if inverted:
              branch = document.body[i][20:].strip()
-            #document.warning(branch)
              if not branch in antibranches:
                  antibranch = "Anti-" + branch
                  while antibranch in antibranches:
@@ -140,7 +139,6 @@ def revert_ibranches(document):
                  antibranches[branch] = antibranch
              else:
                  antibranch = antibranches[branch]
-            #document.warning(antibranch)
              document.body[i] = "\\begin_inset Branch " + antibranch
  
      # now we need to add the new branches to the header
@@ -420,6 +418,7 @@ def revert_quotes(document):
          if len(words) > 1 and words[0] == "\\begin_inset" and \
             ( words[1] in ["ERT", "listings"] or ( len(words) > 2 and words[2] in ["URL", "Chunk", "Sweave", "S/R"]) ):
              j = find_end_of_inset(document.body, i)
+
              if j == -1:
                  document.warning("Malformed LyX document: Can't find end of " + words[1] + " inset at line " + str(i))
                  i += 1
@@ -434,10 +433,10 @@ def revert_quotes(document):
                      document.warning("Malformed LyX document: Can't find end of Quote inset at line " + str(k))
                      i = k
                      continue
-                replace = "\""
+                replace = '"'
                  if document.body[k].endswith("s"):
                      replace = "'"
-                document.body[k:l+1] = [replace]
+                document.body[k:l+2] = [replace]
          else:
              i += 1
              continue
@@ -467,7 +466,7 @@ def revert_quotes(document):
                  replace = "\""
                  if document.body[k].endswith("s"):
                      replace = "'"
-                document.body[k:l+1] = [replace]
+                document.body[k:l+2] = [replace]
          else:
              i += 1
              continue
@@ -498,7 +497,7 @@ def revert_quotes(document):
              replace = "\""
              if document.body[k].endswith("s"):
                  replace = "'"
-            document.body[k:l+1] = [replace]
+            document.body[k:l+2] = [replace]
          i = l
  
  
@@ -602,7 +601,7 @@ def revert_plainquote(document):
          replace = "\""
          if document.body[k].endswith("s"):
              replace = "'"
-        document.body[k:l+1] = [replace]
+        document.body[k:l+2] = [replace]
          i = l
  
  
@@ -1799,13 +1798,13 @@ def convert_dashligatures(document):
                                  ['% Added by lyx2lyx',
                                   r'\renewcommand{\textendash}{--}',
                                   r'\renewcommand{\textemdash}{---}']) or None
-
+    
      if use_dash_ligatures is None:
          # Look for dashes (Documents by LyX 2.1 or older have "\twohyphens\n"
          # or "\threehyphens\n" as interim representation for -- an ---.)
          lines = document.body
          has_literal_dashes = has_ligature_dashes = False
-        dash_pattern = re.compile(u"[\u2013\u2014]|\\twohyphens|\\threehyphens")
+        dash_pattern = re.compile(u".*[\u2013\u2014]|\\twohyphens|\\threehyphens")
          i = j = 0
          while True:
              # skip lines without dashes:
@@ -1837,13 +1836,13 @@ def convert_dashligatures(document):
                  i = end
                  continue
  
-            # literal dash followed by a word or no-break space:
-            if re.search(u"[\u2013\u2014]([\w\u00A0]|$)",
+            # literal dash followed by a non-white-character or no-break space:
+            if re.search(u"[\u2013\u2014]([\S\u00A0\u202F\u2060]|$)",
                           line, flags=re.UNICODE):
                  has_literal_dashes = True
-            # ligature dash followed by word or no-break space on next line:
+            # ligature dash followed by non-white-char or no-break space on next line:
              if (re.search(r"(\\twohyphens|\\threehyphens)", line) and
-                re.match(u"[\w\u00A0]", lines[i+1], flags=re.UNICODE)):
+                re.match(u"[\S\u00A0\u202F\u2060]", lines[i+1], flags=re.UNICODE)):
                  has_ligature_dashes = True
              if has_literal_dashes and has_ligature_dashes:
                  # TODO: insert a warning note in the document?
@@ -1866,40 +1865,46 @@ def convert_dashligatures(document):
  
  def revert_dashligatures(document):
      """Remove font ligature settings for en- and em-dashes.
-    Revert conversion of \twodashes or \threedashes to literal dashes."""
+    Revert conversion of \twodashes or \threedashes to literal dashes.
+    """
      use_dash_ligatures = del_value(document.header, "\\use_dash_ligatures")
      if use_dash_ligatures != "true" or document.backend != "latex":
          return
-    j = 0
-    new_body = []
-    for i, line in enumerate(document.body):
-        # Skip some document parts where dashes are not converted
-        if (i < j) or line.startswith("\\labelwidthstring"):
-            new_body.append(line)
+    i = 0
+    dash_pattern = re.compile(u".*[\u2013\u2014]")
+    while True:
+        # skip lines without dashes:
+        i = find_re(document.body, dash_pattern, i+1)
+        if i == -1:
+            break
+        line = document.body[i]
+        # skip label width string (see bug 10243):
+        if line.startswith("\\labelwidthstring"):
              continue
-        if (line.startswith("\\begin_inset ") and
-            line[13:].split()[0] in ["CommandInset", "ERT", "External",
-                "Formula", "FormulaMacro", "Graphics", "IPA", "listings"]
-            or line == "\\begin_inset Flex Code"):
-            j = find_end_of_inset(document.body, i)
-            if j == -1:
-                document.warning("Malformed LyX document: Can't find end of "
-                                 + words[1] + " inset at line " + str(i))
-            new_body.append(line)
+        # do not touch hyphens in some insets (cf. lyx_2_2.convert_dashes):
+        try:
+            inset_type, start, end = get_containing_inset(document.body, i)
+        except TypeError: # no containing inset
+            inset_type, start, end = "no inset", -1, -1
+        if (inset_type.split()[0] in
+            ["CommandInset", "ERT", "External", "Formula",
+                "FormulaMacro", "Graphics", "IPA", "listings"]
+            or inset_type == "Flex Code"):
+            i = end
              continue
-        if line == "\\begin_layout LyX-Code":
-            j = find_end_of_layout(document.body, i)
-            if j == -1:
-                document.warning("Malformed LyX document: "
-                    "Can't find end of %s layout at line %d" % (words[1],i))
-            new_body.append(line)
+        try:
+            layoutname, start, end, j = get_containing_layout(document.body, i)
+        except TypeError: # no (or malformed) containing layout
+            document.warning("Malformed LyX document: "
+                            "Can't find layout at body line %d" % i)
+            continue
+        if layoutname == "LyX-Code":
+            i = end
              continue
          # TODO: skip replacement in typewriter fonts
          line = line.replace(u'\u2013', '\\twohyphens\n')
          line = line.replace(u'\u2014', '\\threehyphens\n')
-        lines = line.split('\n')
-        new_body.extend(line.split('\n'))
-    document.body = new_body
+        document.body[i:i+1] = line.split('\n')
      # redefine the dash LICRs to use ligature dashes:
      add_to_preamble(document, [r'\renewcommand{\textendash}{--}',
                                 r'\renewcommand{\textemdash}{---}'])
author	Günter Milde <milde@lyx.org>
	Fri, 9 Feb 2018 15:49:23 +0000 (16:49 +0100)
committer	Günter Milde <milde@lyx.org>
	Fri, 9 Feb 2018 15:49:23 +0000 (16:49 +0100)
autotests/export/lyx2lyx/lyx_2_3_test2.lyx		patch \| blob \| history
lib/lyx2lyx/lyx_2_2.py		patch \| blob \| history
lib/lyx2lyx/lyx_2_3.py		patch \| blob \| history