New lyx2lyx tools.

[lyx.git] / lib / lyx2lyx / lyx_2_3.py
diff --git a/lib/lyx2lyx/lyx_2_3.py b/lib/lyx2lyx/lyx_2_3.py

index 3b8b024ff23adff1b45aa801d636c58f18015d84..625db086044139e7afcc4c7e762333e24271b5b7 100644 (file)
--- a/lib/lyx2lyx/lyx_2_3.py
+++ b/lib/lyx2lyx/lyx_2_3.py
@@ -24,9 +24,10 @@ import sys, os
  
  # Uncomment only what you need to import, please.
  
-from parser_tools import find_end_of, find_token_backwards, find_end_of_layout, \
-    find_token, find_end_of_inset, get_value,  get_bool_value, \
-    get_containing_layout, get_quoted_value, del_token, find_re
+from parser_tools import del_token, del_value, del_complete_lines, \
+    find_end_of, find_end_of_layout, find_end_of_inset, find_re, \
+    find_token, find_token_backwards, get_containing_layout, \
+    get_bool_value, get_value, get_quoted_value
  #  find_tokens, find_token_exact, is_in_inset, \
  #  check_token, get_option_value
  
@@ -1146,11 +1147,12 @@ def revert_noprefix(document):
              i += 1
              continue
          k = find_token(document.body, "LatexCommand labelonly", i, j)
-        if k == -1:
-            i = j
-            continue
-        noprefix = get_bool_value(document.body, "noprefix", i, j)
+        noprefix = False
+        if k != -1:
+            noprefix = get_bool_value(document.body, "noprefix", i, j)
          if not noprefix:
+            # either it was not a labelonly command, or else noprefix was not set.
+            # in that case, we just delete the option.
              del_token(document.body, "noprefix", i, j)
              i = j
              continue
@@ -1301,7 +1303,7 @@ def revert_biblatex(document):
                        "Citealt*", "Citealp*", "Citeauthor*", "fullcite", "footcite",\
                        "footcitet", "footcitep", "footcitealt", "footcitealp",\
                        "footciteauthor", "footciteyear", "footciteyearpar",\
-                     "citefield", "citetitle", "cite*" ]
+                      "citefield", "citetitle", "cite*" ]
  
      i = 0
      while (True):
@@ -1550,9 +1552,6 @@ command_insets = ["bibitem", "citation", "href", "index_print", "nomenclature"]
  def convert_literalparam(document):
      " Add param literal "
  
-    # These already had some sort of latexify method
-    latexified_insets = ["href", "index_print", "nomenclature"]
-
      for inset in command_insets:
          i = 0
          while True:
@@ -1566,7 +1565,8 @@ def convert_literalparam(document):
                  continue
              while i < j and document.body[i].strip() != '':
                  i += 1
-            if inset in latexified_insets:
+            # href is already fully latexified. Here we can switch off literal.
+            if inset == "href":
                  document.body.insert(i, "literal \"false\"")
              else:
                  document.body.insert(i, "literal \"true\"")
@@ -1842,103 +1842,105 @@ def revert_chapterbib(document):
  
  
  def convert_dashligatures(document):
-    " Remove a zero-length space (U+200B) after en- and em-dashes. "
-
-    i = find_token(document.header, "\\use_microtype", 0)
-    if i != -1:
-        if document.initial_format > 474 and document.initial_format < 509:
-            # This was created by LyX 2.2
-            document.header[i+1:i+1] = ["\\use_dash_ligatures false"]
-        else:
-            # This was created by LyX 2.1 or earlier
-            document.header[i+1:i+1] = ["\\use_dash_ligatures true"]
-
-    i = 0
-    while i < len(document.body):
-        words = document.body[i].split()
-        # Skip some document parts where dashes are not converted
-        if len(words) > 1 and words[0] == "\\begin_inset" and \
-           words[1] in ["CommandInset", "ERT", "External", "Formula", \
-                        "FormulaMacro", "Graphics", "IPA", "listings"]:
-            j = find_end_of_inset(document.body, i)
-            if j == -1:
-                document.warning("Malformed LyX document: Can't find end of " \
-                                 + words[1] + " inset at line " + str(i))
-                i += 1
-            else:
-                i = j
-            continue
-        if len(words) > 0 and words[0] in ["\\leftindent", \
-                "\\paragraph_spacing", "\\align", "\\labelwidthstring"]:
-            i += 1
-            continue
-
-        start = 0
-        while True:
-            j = document.body[i].find(u"\u2013", start) # en-dash
-            k = document.body[i].find(u"\u2014", start) # em-dash
-            if j == -1 and k == -1:
-                break
-            if j == -1 or (k != -1 and k < j):
-                j = k
-            after = document.body[i][j+1:]
-            if after.startswith(u"\u200B"):
-                document.body[i] = document.body[i][:j+1] + after[1:]
-            else:
-                if len(after) == 0 and document.body[i+1].startswith(u"\u200B"):
-                    document.body[i+1] = document.body[i+1][1:]
-                    break
-            start = j+1
-        i += 1
-
+    "Set 'use_dash_ligatures' according to content."
+    # Look for and remove dashligatures workaround from 2.3->2.2 reversion,
+    # set use_dash_ligatures to True if found, to None else.
+    use_dash_ligatures = del_complete_lines(document.preamble,
+                                ['% Added by lyx2lyx',
+                                 r'\renewcommand{\textendash}{--}',
+                                 r'\renewcommand{\textemdash}{---}']) or None
+
+    if use_dash_ligatures is None:
+        # Look for dashes (Documents by LyX 2.1 or older have "\twohyphens\n"
+        # or "\threehyphens\n" as interim representation for -- an ---.)
+        has_literal_dashes = False
+        has_ligature_dashes = False
+        j = 0
+        for i, line in enumerate(document.body):
+            # Skip some document parts where dashes are not converted
+            if (i < j) or line.startswith("\\labelwidthstring"):
+                continue
+            if line.startswith("\\begin_inset"):
+                try:
+                    it = line.split()[1]
+                except IndexError:
+                    continue
+                if (it in ["CommandInset", "ERT", "External", "Formula",
+                           "FormulaMacro", "Graphics", "IPA", "listings"]
+                    or line.endswith("Flex Code")):
+                    j = find_end_of_inset(document.body, i)
+                    if j == -1:
+                        document.warning("Malformed LyX document: Can't "
+                            "find end of %s inset at line %d." % (itype, i))
+                        continue
+            if line == "\\begin_layout LyX-Code":
+                j = find_end_of_layout(document.body, i)
+                if j == -1:
+                    document.warning("Malformed LyX document: "
+                       "Can't find end of %s layout at line %d" % (words[1],i))
+                continue
+            # literal dash followed by a word or no-break space:
+            if re.search(u"[\u2013\u2014]([\w\u00A0]|$)", line,
+                         flags=re.UNICODE):
+                has_literal_dashes = True
+            # ligature dash followed by word or no-break space on next line:
+            if (re.search(r"(\\twohyphens|\\threehyphens)", line) and
+                re.match(u"[\w\u00A0]", document.body[i+1], flags=re.UNICODE)):
+                has_ligature_dashes = True
+        if has_literal_dashes and has_ligature_dashes:
+            # TODO: insert a warning note in the document?
+            document.warning('This document contained both literal and '
+                '"ligature" dashes.\n Line breaks may have changed. '
+                'See UserGuide chapter 3.9.1 for details.')
+        elif has_literal_dashes:
+            use_dash_ligatures = False
+        elif has_ligature_dashes:
+            use_dash_ligatures = True
+    # insert the setting if there is a preferred value
+    if use_dash_ligatures is not None:
+        i = find_token(document.header, "\\graphics")
+        document.header.insert(i, "\\use_dash_ligatures %s"
+                               % str(use_dash_ligatures).lower())
  
  def revert_dashligatures(document):
-    " Remove font ligature settings for en- and em-dashes. "
-    i = find_token(document.header, "\\use_dash_ligatures", 0)
-    if i == -1:
-        return
-    use_dash_ligatures = get_bool_value(document.header, "\\use_dash_ligatures", i)
-    del document.header[i]
-    use_non_tex_fonts = False
-    i = find_token(document.header, "\\use_non_tex_fonts", 0)
-    if i != -1:
-        use_non_tex_fonts = get_bool_value(document.header, "\\use_non_tex_fonts", i)
-    if not use_dash_ligatures or use_non_tex_fonts:
+    """Remove font ligature settings for en- and em-dashes.
+    Revert conversion of \twodashes or \threedashes to literal dashes."""
+    use_dash_ligatures = del_value(document.header, "\\use_dash_ligatures")
+    if use_dash_ligatures != "true" or document.backend != "latex":
          return
-
-    # Add a zero-length space (U+200B) after en- and em-dashes
-    i = 0
-    while i < len(document.body):
-        words = document.body[i].split()
+    j = 0
+    new_body = []
+    for i, line in enumerate(document.body):
          # Skip some document parts where dashes are not converted
-        if len(words) > 1 and words[0] == "\\begin_inset" and \
-           words[1] in ["CommandInset", "ERT", "External", "Formula", \
-                        "FormulaMacro", "Graphics", "IPA", "listings"]:
+        if (i < j) or line.startswith("\\labelwidthstring"):
+            new_body.append(line)
+            continue
+        if (line.startswith("\\begin_inset ") and
+            line[13:].split()[0] in ["CommandInset", "ERT", "External",
+                "Formula", "FormulaMacro", "Graphics", "IPA", "listings"]
+            or line == "\\begin_inset Flex Code"):
              j = find_end_of_inset(document.body, i)
              if j == -1:
-                document.warning("Malformed LyX document: Can't find end of " \
+                document.warning("Malformed LyX document: Can't find end of "
                                   + words[1] + " inset at line " + str(i))
-                i += 1
-            else:
-                i = j
+            new_body.append(line)
              continue
-        if len(words) > 0 and words[0] in ["\\leftindent", \
-                "\\paragraph_spacing", "\\align", "\\labelwidthstring"]:
-            i += 1
+        if line == "\\begin_layout LyX-Code":
+            j = find_end_of_layout(document.body, i)
+            if j == -1:
+                document.warning("Malformed LyX document: "
+                    "Can't find end of %s layout at line %d" % (words[1],i))
+            new_body.append(line)
              continue
-
-        start = 0
-        while True:
-            j = document.body[i].find(u"\u2013", start) # en-dash
-            k = document.body[i].find(u"\u2014", start) # em-dash
-            if j == -1 and k == -1:
-                break
-            if j == -1 or (k != -1 and k < j):
-                j = k
-            after = document.body[i][j+1:]
-            document.body[i] = document.body[i][:j+1] + u"\u200B" + after
-            start = j+1
-        i += 1
+        # TODO: skip replacement in typewriter fonts
+        line = line.replace(u'\u2013', '\\twohyphens\n')
+        line = line.replace(u'\u2014', '\\threehyphens\n')
+        lines = line.split('\n')
+        new_body.extend(line.split('\n'))
+    document.body = new_body
+    # redefine the dash LICRs to use ligature dashes:
+    add_to_preamble(document, [r'\renewcommand{\textendash}{--}',
+                               r'\renewcommand{\textemdash}{---}'])
  
  
  def revert_noto(document):
@@ -1970,6 +1972,284 @@ def revert_xout(document):
          '\\usepackage{ulem}'])
  
  
+def convert_mathindent(document):
+    " add the \\is_math_indent tag "
+    # check if the document uses the class option "fleqn"
+    k = find_token(document.header, "\\quotes_style", 0)
+    regexp = re.compile(r'^.*fleqn.*')
+    i = find_re(document.header, regexp, 0)
+    if i != -1:
+        document.header.insert(k, "\\is_math_indent 1")
+        # delete the found option
+        document.header[i] = document.header[i].replace(",fleqn", "")
+        document.header[i] = document.header[i].replace(", fleqn", "")
+        document.header[i] = document.header[i].replace("fleqn,", "")
+        j = find_re(document.header, regexp, 0)
+        if i == j:
+            # then we have fleqn as the only option
+            del document.header[i]
+    else:
+        document.header.insert(k, "\\is_math_indent 0")
+
+
+def revert_mathindent(document):
+    " Define mathindent if set in the document "
+    # first output the length
+    regexp = re.compile(r'(\\math_indentation)')
+    i = find_re(document.header, regexp, 0)
+    if i != -1:
+        value = get_value(document.header, "\\math_indentation" , i).split()[0]
+        if value != "default":
+            add_to_preamble(document, ["\\setlength{\\mathindent}{" + value + '}'])
+        del document.header[i]
+    # now set the document class option
+    regexp = re.compile(r'(\\is_math_indent 1)')
+    i = find_re(document.header, regexp, 0)
+    if i == -1:
+        regexp = re.compile(r'(\\is_math_indent)')
+        j = find_re(document.header, regexp, 0)
+        del document.header[j]
+    else:
+        k = find_token(document.header, "\\options", 0)
+        if k != -1:
+            document.header[k] = document.header[k].replace("\\options", "\\options fleqn,")
+            del document.header[i]
+        else:
+            l = find_token(document.header, "\\use_default_options", 0)
+            document.header.insert(l, "\\options fleqn")
+            del document.header[i + 1]
+
+
+def revert_baselineskip(document):
+  " Revert baselineskips to TeX code "
+  i = 0
+  vspaceLine = 0
+  hspaceLine = 0
+  while True:
+    regexp = re.compile(r'^.*baselineskip%.*$')
+    i = find_re(document.body, regexp, i)
+    if i == -1:
+      return
+    vspaceLine = find_token(document.body, "\\begin_inset VSpace", i)
+    if  vspaceLine == i:
+      # output VSpace inset as TeX code
+      # first read out the values
+      beg = document.body[i].rfind("VSpace ");
+      end = document.body[i].rfind("baselineskip%");
+      baselineskip = float(document.body[i][beg + 7:end]);
+      # we store the value in percent, thus divide by 100
+      baselineskip = baselineskip/100;
+      baselineskip = str(baselineskip);
+      # check if it is the starred version
+      if document.body[i].find('*') != -1:
+        star = '*'
+      else:
+        star = ''
+      # now output TeX code
+      endInset = find_end_of_inset(document.body, i)
+      if endInset == -1:
+        document.warning("Malformed LyX document: Missing '\\end_inset' of VSpace inset.")
+        return
+      else:
+        document.body[vspaceLine: endInset + 1] = put_cmd_in_ert("\\vspace" + star + '{' + baselineskip + "\\baselineskip}")
+    hspaceLine = find_token(document.body, "\\begin_inset space \\hspace", i - 1)
+    document.warning("hspaceLine: " + str(hspaceLine))
+    document.warning("i: " + str(i))
+    if  hspaceLine == i - 1:
+      # output space inset as TeX code
+      # first read out the values
+      beg = document.body[i].rfind("\\length ");
+      end = document.body[i].rfind("baselineskip%");
+      baselineskip = float(document.body[i][beg + 7:end]);
+      document.warning("baselineskip: " + str(baselineskip))
+      # we store the value in percent, thus divide by 100
+      baselineskip = baselineskip/100;
+      baselineskip = str(baselineskip);
+      # check if it is the starred version
+      if document.body[i-1].find('*') != -1:
+        star = '*'
+      else:
+        star = ''
+      # now output TeX code
+      endInset = find_end_of_inset(document.body, i)
+      if endInset == -1:
+        document.warning("Malformed LyX document: Missing '\\end_inset' of space inset.")
+        return
+      else:
+        document.body[hspaceLine: endInset + 1] = put_cmd_in_ert("\\hspace" + star + '{' + baselineskip + "\\baselineskip}")
+
+    i = i + 1
+
+
+def revert_rotfloat(document):
+  " Revert placement options for rotated floats "
+  i = 0
+  j = 0
+  k = 0
+  while True:
+    i = find_token(document.body, "sideways true", i)
+    if i != -1:
+      regexp = re.compile(r'^.*placement.*$')
+      j = find_re(document.body, regexp, i-2)
+      if j == -1:
+          return
+      if j != i-2:
+          i = i + 1
+          continue
+    else:
+      return
+    # we found a sideways float with placement options
+    # at first store the placement
+    beg = document.body[i-2].rfind(" ");
+    placement = document.body[i-2][beg+1:]
+    # check if the option'H' is used
+    if placement.find("H") != -1:
+      add_to_preamble(document, ["\\usepackage{float}"])
+    # now check if it is a starred type
+    if document.body[i-1].find("wide true") != -1:
+      star = '*'
+    else:
+      star = ''
+    # store the float type
+    beg = document.body[i-3].rfind(" ");
+    fType = document.body[i-3][beg+1:]
+    # now output TeX code
+    endInset = find_end_of_inset(document.body, i-3)
+    if endInset == -1:
+      document.warning("Malformed LyX document: Missing '\\end_inset' of Float inset.")
+      return
+    else:
+      document.body[endInset-2: endInset+1] = put_cmd_in_ert("\\end{sideways" + fType + star + '}')
+      document.body[i-3: i+2] = put_cmd_in_ert("\\begin{sideways" + fType + star + "}[" + placement + ']')
+      add_to_preamble(document, ["\\usepackage{rotfloat}"])
+
+    i = i + 1
+
+
+def convert_allowbreak(document):
+    " Zero widths Space-inset -> \SpecialChar allowbreak. "
+    body = "\n".join(document.body)
+    body = body.replace("\\begin_inset space \hspace{}\n"
+                        "\\length 0dd\n"
+                        "\\end_inset\n\n",
+                        "\\SpecialChar allowbreak\n")
+    document.body = body.split("\n")
+
+
+def revert_allowbreak(document):
+    " \SpecialChar allowbreak -> Zero widths Space-inset. "
+    body = "\n".join(document.body)
+    body = body.replace("\\SpecialChar allowbreak\n",
+                        "\n\\begin_inset space \hspace{}\n"
+                        "\\length 0dd\n"
+                        "\\end_inset\n\n")
+    document.body = body.split("\n")
+
+
+def convert_mathnumberpos(document):
+    " add the \\math_number_before tag "
+    # check if the document uses the class option "leqno"
+    k = find_token(document.header, "\\quotes_style", 0)
+    m = find_token(document.header, "\\options", 0)
+    regexp = re.compile(r'^.*leqno.*')
+    i = find_re(document.header, regexp, 0)
+    if i != -1 and i == m:
+        document.header.insert(k, "\\math_number_before 1")
+        # delete the found option
+        document.header[i] = document.header[i].replace(",leqno", "")
+        document.header[i] = document.header[i].replace(", leqno", "")
+        document.header[i] = document.header[i].replace("leqno,", "")
+        j = find_re(document.header, regexp, 0)
+        if i == j:
+            # then we have leqno as the only option
+            del document.header[i]
+    else:
+        document.header.insert(k, "\\math_number_before 0")
+
+
+def revert_mathnumberpos(document):
+    " add the document class option leqno"
+    regexp = re.compile(r'(\\math_number_before 1)')
+    i = find_re(document.header, regexp, 0)
+    if i == -1:
+        regexp = re.compile(r'(\\math_number_before)')
+        j = find_re(document.header, regexp, 0)
+        del document.header[j]
+    else:
+        k = find_token(document.header, "\\options", 0)
+        if k != -1:
+           document.header[k] = document.header[k].replace("\\options", "\\options leqno,")
+           del document.header[i]
+        else:
+            l = find_token(document.header, "\\use_default_options", 0)
+            document.header.insert(l, "\\options leqno")
+            del document.header[i + 1]
+
+
+def convert_mathnumberingname(document):
+    " rename the \\math_number_before tag to \\math_numbering_side "
+    regexp = re.compile(r'(\\math_number_before 1)')
+    i = find_re(document.header, regexp, 0)
+    if i != -1:
+        document.header[i] = "\\math_numbering_side left"
+    regexp = re.compile(r'(\\math_number_before 0)')
+    i = find_re(document.header, regexp, 0)
+    if i != -1:
+        document.header[i] = "\\math_numbering_side default"
+    # check if the document uses the class option "reqno"
+    k = find_token(document.header, "\\math_numbering_side", 0)
+    m = find_token(document.header, "\\options", 0)
+    regexp = re.compile(r'^.*reqno.*')
+    i = find_re(document.header, regexp, 0)
+    if i != -1 and i == m:
+        document.header[k] = "\\math_numbering_side right"
+        # delete the found option
+        document.header[i] = document.header[i].replace(",reqno", "")
+        document.header[i] = document.header[i].replace(", reqno", "")
+        document.header[i] = document.header[i].replace("reqno,", "")
+        j = find_re(document.header, regexp, 0)
+        if i == j:
+            # then we have reqno as the only option
+            del document.header[i]
+
+
+def revert_mathnumberingname(document):
+    " rename the \\math_numbering_side tag back to \\math_number_before "
+    # just rename
+    regexp = re.compile(r'(\\math_numbering_side left)')
+    i = find_re(document.header, regexp, 0)
+    if i != -1:
+        document.header[i] = "\\math_number_before 1"
+    # add the option reqno and delete the tag
+    regexp = re.compile(r'(\\math_numbering_side right)')
+    i = find_re(document.header, regexp, 0)
+    if i != -1:
+        document.header[i] = "\\math_number_before 0"
+        k = find_token(document.header, "\\options", 0)
+        if k != -1:
+           document.header[k] = document.header[k].replace("\\options", "\\options reqno,")
+        else:
+            l = find_token(document.header, "\\use_default_options", 0)
+            document.header.insert(l, "\\options reqno")
+    # add the math_number_before tag
+    regexp = re.compile(r'(\\math_numbering_side default)')
+    i = find_re(document.header, regexp, 0)
+    if i != -1:
+        document.header[i] = "\\math_number_before 0"
+
+
+def convert_minted(document):
+    " add the \\use_minted tag "
+    document.header.insert(-1, "\\use_minted 0")
+
+
+def revert_minted(document):
+    " remove the \\use_minted tag "
+    i = find_token(document.header, "\\use_minted", 0)
+    if i != -1:
+        document.header.pop(i)
+
+
  ##
  # Conversion hub
  #
@@ -2004,10 +2284,24 @@ convert = [
             [534, []],
             [535, [convert_dashligatures]],
             [536, []],
-           [537, []]
+           [537, []],
+           [538, [convert_mathindent]],
+           [539, []],
+           [540, []],
+           [541, [convert_allowbreak]],
+           [542, [convert_mathnumberpos]],
+           [543, [convert_mathnumberingname]],
+           [544, [convert_minted]]
            ]
  
  revert =  [
+           [543, [revert_minted]],
+           [542, [revert_mathnumberingname]],
+           [541, [revert_mathnumberpos]],
+           [540, [revert_allowbreak]],
+           [539, [revert_rotfloat]],
+           [538, [revert_baselineskip]],
+           [537, [revert_mathindent]],
             [536, [revert_xout]],
             [535, [revert_noto]],
             [534, [revert_dashligatures]],