Revert "Try to be more pedantic wrt blank lines in lyx2lyx output"

[features.git] / lib / lyx2lyx / lyx_2_4.py
diff --git a/lib/lyx2lyx/lyx_2_4.py b/lib/lyx2lyx/lyx_2_4.py

index ec6622cb6f854008725f20a566bffe2e1115c9a5..73826a5f59373a3e9bfc1b12b73ed82fcbd25ee6 100644 (file)
--- a/lib/lyx2lyx/lyx_2_4.py
+++ b/lib/lyx2lyx/lyx_2_4.py
@@ -26,14 +26,14 @@ from datetime import (datetime, date, time)
  
  # Uncomment only what you need to import, please.
  
-from parser_tools import (count_pars_in_inset, del_token, find_end_of_inset,
-    find_end_of_layout, find_token, find_token_backwards, find_token_exact,
-    find_re, get_bool_value,
-    get_containing_layout, get_option_value, get_value, get_quoted_value)
-#    del_value, del_complete_lines,
-#    find_complete_lines, find_end_of,
+from parser_tools import (count_pars_in_inset, del_complete_lines, del_token,
+     find_end_of, find_end_of_inset, find_end_of_layout, find_token,
+     find_token_backwards, find_token_exact, find_re, get_bool_value,
+     get_containing_inset, get_containing_layout, get_option_value, get_value,
+     get_quoted_value)
+#    del_value, 
+#    find_complete_lines,
  #    find_re, find_substring,
-#    get_containing_inset,
  #    is_in_inset, set_bool_value
  #    find_tokens, check_token
  
@@ -752,7 +752,7 @@ def revert_floatalignment(document):
          i += 1
  
  def revert_tuftecite(document):
-    """Revert \cite commands in tufte classes"""
+    r"""Revert \cite commands in tufte classes"""
  
      tufte = ["tufte-book", "tufte-handout"]
      if document.textclass not in tufte:
@@ -1223,7 +1223,7 @@ def revert_dateinfo(document):
              if len(datecomps) > 1:
                  argv = datecomps[0]
                  isodate = datecomps[1]
-                m = re.search('(\d\d\d\d)-(\d\d)-(\d\d)', isodate)
+                m = re.search(r'(\d\d\d\d)-(\d\d)-(\d\d)', isodate)
                  if m:
                      dte = date(int(m.group(1)), int(m.group(2)), int(m.group(3)))
  # FIXME if we had the path to the original document (not the one in the tmp dir),
@@ -1403,11 +1403,11 @@ def revert_timeinfo(document):
              if len(timecomps) > 1:
                  argv = timecomps[0]
                  isotime = timecomps[1]
-                m = re.search('(\d\d):(\d\d):(\d\d)', isotime)
+                m = re.search(r'(\d\d):(\d\d):(\d\d)', isotime)
                  if m:
                      tme = time(int(m.group(1)), int(m.group(2)), int(m.group(3)))
                  else:
-                    m = re.search('(\d\d):(\d\d)', isotime)
+                    m = re.search(r'(\d\d):(\d\d)', isotime)
                      if m:
                          tme = time(int(m.group(1)), int(m.group(2)))
  # FIXME if we had the path to the original document (not the one in the tmp dir),
@@ -1875,7 +1875,7 @@ def revert_new_languages(document):
                       "korean":         ("", "korean"),
                      }
      if document.language in new_languages:
-        used_languages = set((document.language, ))
+        used_languages = {document.language}
      else:
          used_languages = set()
      i = 0
@@ -4100,7 +4100,7 @@ def revert_branch_darkcols(document):
             break
          k = find_token(document.header, "\\color", i, j)
          if k != -1:
-            m = re.search('\\\\color (\S+) (\S+)', document.header[k])
+            m = re.search('\\\\color (\\S+) (\\S+)', document.header[k])
              if m:
                  document.header[k] = "\\color " + m.group(1)
          i += 1
@@ -4187,8 +4187,53 @@ def revert_vcolumns2(document):
                              flt = find_token(document.body, "\\begin_layout", begcell, endcell)
                              elt = find_token_backwards(document.body, "\\end_layout", endcell)
                              if flt != -1 and elt != -1:
-                                document.body[elt:elt+1] = put_cmd_in_ert("\\end{cellvarwidth}")
-                                document.body[flt+1:flt+1] = put_cmd_in_ert("\\begin{cellvarwidth}" + alarg)
+                                extralines = []
+                                # we need to reset character layouts if necessary
+                                el = find_token(document.body, '\\emph on', flt, elt)
+                                if el != -1:
+                                    extralines.append("\\emph default")
+                                el = find_token(document.body, '\\noun on', flt, elt)
+                                if el != -1:
+                                    extralines.append("\\noun default")
+                                el = find_token(document.body, '\\series', flt, elt)
+                                if el != -1:
+                                    extralines.append("\\series default")
+                                el = find_token(document.body, '\\family', flt, elt)
+                                if el != -1:
+                                    extralines.append("\\family default")
+                                el = find_token(document.body, '\\shape', flt, elt)
+                                if el != -1:
+                                    extralines.append("\\shape default")
+                                el = find_token(document.body, '\\color', flt, elt)
+                                if el != -1:
+                                    extralines.append("\\color inherit")
+                                el = find_token(document.body, '\\size', flt, elt)
+                                if el != -1:
+                                    extralines.append("\\size default")
+                                el = find_token(document.body, '\\bar under', flt, elt)
+                                if el != -1:
+                                    extralines.append("\\bar default")
+                                el = find_token(document.body, '\\uuline on', flt, elt)
+                                if el != -1:
+                                    extralines.append("\\uuline default")
+                                el = find_token(document.body, '\\uwave on', flt, elt)
+                                if el != -1:
+                                    extralines.append("\\uwave default")
+                                el = find_token(document.body, '\\strikeout on', flt, elt)
+                                if el != -1:
+                                    extralines.append("\\strikeout default")
+                                document.body[elt:elt+1] = extralines + put_cmd_in_ert("\\end{cellvarwidth}") + [r"\end_layout"]
+                                parlang = -1
+                                for q in range(flt, elt):
+                                    if document.body[q] != "" and document.body[q][0] != "\\":
+                                        break
+                                    if document.body[q][:5] == "\\lang":
+                                        parlang = q
+                                        break
+                                if parlang != -1:
+                                    document.body[parlang+1:parlang+1] = put_cmd_in_ert("\\begin{cellvarwidth}" + alarg)
+                                else:
+                                    document.body[flt+1:flt+1] = put_cmd_in_ert("\\begin{cellvarwidth}" + alarg)
                                  needcellvarwidth = True
                                  needvarwidth = True
                          # ERT newlines and linebreaks (since LyX < 2.4 automatically inserts parboxes
@@ -4238,6 +4283,273 @@ def revert_vcolumns2(document):
              add_to_preamble(document, ["\\usepackage{varwidth}"])
  
  
+def convert_vcolumns2(document):
+    """Convert varwidth ERT to native"""
+    i = 0
+    try:
+        while True:
+            i = find_token(document.body, "\\begin_inset Tabular", i+1)
+            if i == -1:
+                return
+            j = find_end_of_inset(document.body, i)
+            if j == -1:
+                document.warning("Malformed LyX document: Could not find end of tabular.")
+                continue
+
+            # Parse cells
+            nrows = int(document.body[i+1].split('"')[3])
+            ncols = int(document.body[i+1].split('"')[5])
+            m = i + 1
+            lines = []
+            for row in range(nrows):
+                for col in range(ncols):
+                    m = find_token(document.body, "<cell", m)
+                    multirow = get_option_value(document.body[m], 'multirow') != ""
+                    begcell = m
+                    endcell = find_token(document.body, "</cell>", begcell)
+                    vcand = False
+                    cvw = find_token(document.body, "begin{cellvarwidth}", begcell, endcell)
+                    if cvw != -1:
+                        vcand = document.body[cvw - 1] == "\\backslash" and get_containing_inset(document.body, cvw)[0] == "ERT"
+                    if vcand:
+                        # Remove ERTs with cellvarwidth env
+                        ecvw = find_token(document.body, "end{cellvarwidth}", begcell, endcell)
+                        if ecvw != -1:
+                            if document.body[ecvw - 1] == "\\backslash":
+                                eertins = get_containing_inset(document.body, ecvw)
+                                if eertins and eertins[0] == "ERT":
+                                    del document.body[eertins[1] : eertins[2] + 1]
+                             
+                        cvw = find_token(document.body, "begin{cellvarwidth}", begcell, endcell)   
+                        ertins = get_containing_inset(document.body, cvw)
+                        if ertins and ertins[0] == "ERT":
+                            del(document.body[ertins[1] : ertins[2] + 1])
+                        
+                        # Convert ERT newlines (as cellvarwidth detection relies on that)
+                        while True:
+                            endcell = find_token(document.body, "</cell>", begcell)
+                            nl = find_token(document.body, "\\backslash", begcell, endcell)
+                            if nl == -1 or document.body[nl + 2] != "\\backslash":
+                                break
+                            ertins = get_containing_inset(document.body, nl)
+                            if ertins and ertins[0] == "ERT":
+                                document.body[ertins[1] : ertins[2] + 1] = ["\\begin_inset Newline newline", "", "\\end_inset"]
+
+                        # Same for linebreaks
+                        while True:
+                            endcell = find_token(document.body, "</cell>", begcell)
+                            nl = find_token(document.body, "linebreak", begcell, endcell)
+                            if nl == -1 or document.body[nl - 1] != "\\backslash":
+                                break
+                            ertins = get_containing_inset(document.body, nl)
+                            if ertins and ertins[0] == "ERT":
+                                document.body[ertins[1] : ertins[2] + 1] = ["\\begin_inset Newline linebreak", "", "\\end_inset"]
+
+                        # And \\endgraf
+                        if multirow == True:
+                            endcell = find_token(document.body, "</cell>", begcell)
+                            nl = find_token(document.body, "endgraf{}", begcell, endcell)
+                            if nl == -1 or document.body[nl - 1] != "\\backslash":
+                                break
+                            ertins = get_containing_inset(document.body, nl)
+                            if ertins and ertins[0] == "ERT":
+                                    document.body[ertins[1] : ertins[2] + 1] = ["\\end_layout", "", "\\begin_layout Plain Layout"]
+                    m += 1
+
+            i += 1
+
+    finally:
+        del_complete_lines(document.preamble,
+                                ['% Added by lyx2lyx',
+                                 '%% Variable width box for table cells',
+                                 r'\newenvironment{cellvarwidth}[1][t]',
+                                 r'    {\begin{varwidth}[#1]{\linewidth}}',
+                                 r'    {\@finalstrut\@arstrutbox\end{varwidth}}'])
+        del_complete_lines(document.preamble,
+                                ['% Added by lyx2lyx',
+                                 r'\usepackage{varwidth}'])
+
+
+frontispiece_def = [
+    r'### Inserted by lyx2lyx (frontispiece layout) ###',
+    r'Style Frontispiece',
+    r'  CopyStyle             Titlehead',
+    r'  LatexName             frontispiece',
+    r'End',
+]
+
+
+def convert_koma_frontispiece(document):
+    """Remove local KOMA frontispiece definition"""
+    if document.textclass[:3] != "scr":
+        return
+
+    if document.del_local_layout(frontispiece_def):
+        document.add_module("ruby")
+
+
+def revert_koma_frontispiece(document):
+    """Add local KOMA frontispiece definition"""
+    if document.textclass[:3] != "scr":
+        return
+
+    if find_token(document.body, "\\begin_layout Frontispiece", 0) != -1:
+        document.append_local_layout(frontispiece_def)
+
+
+def revert_spellchecker_ignore(document):
+    """Revert document spellchecker dictionary"""
+    while True:
+        i = find_token(document.header, "\\spellchecker_ignore")
+        if i == -1:
+            return
+        del document.header[i]
+
+
+def revert_docbook_mathml_prefix(document):
+    """Revert the DocBook parameter to choose the prefix for the MathML name space"""
+    while True:
+        i = find_token(document.header, "\\docbook_mathml_prefix")
+        if i == -1:
+            return
+        del document.header[i]
+
+
+def revert_document_metadata(document):
+    """Revert document metadata"""
+    i = 0
+    while True:
+        i = find_token(document.header, "\\begin_metadata", i)
+        if i == -1:
+            return
+        j = find_end_of(document.header, i, "\\begin_metadata", "\\end_metadata")
+        if j == -1:
+            # this should not happen
+            break
+        document.header[i : j + 1] = []
+
+
+def revert_index_macros(document):
+    " Revert inset index macros "
+
+    i = 0
+    while True:
+        # trailing blank needed here to exclude IndexMacro insets
+        i = find_token(document.body, '\\begin_inset Index ', i+1)
+        if i == -1:
+            break
+        j = find_end_of_inset(document.body, i)
+        if j == -1:
+            document.warning("Malformed LyX document: Can't find end of index inset at line %d" % i)
+            continue
+        pl = find_token(document.body, '\\begin_layout Plain Layout', i, j)
+        if pl == -1:
+            document.warning("Malformed LyX document: Can't find plain layout in index inset at line %d" % i)
+            continue
+        # find, store and remove inset params
+        pr = find_token(document.body, 'range', i, pl)
+        prval = get_quoted_value(document.body, "range", pr)
+        pagerange = ""
+        if prval == "start":
+            pagerange = "("
+        elif prval == "end":
+            pagerange = ")"
+        pf = find_token(document.body, 'pageformat', i, pl)
+        pageformat = get_quoted_value(document.body, "pageformat", pf)
+        del document.body[pr:pf+1]
+        # Now re-find (potentially moved) inset end again, and search for subinsets
+        j = find_end_of_inset(document.body, i)
+        if j == -1:
+            document.warning("Malformed LyX document: Can't find end of index inset at line %d" % i)
+            continue
+        # We search for all possible subentries in turn, store their
+        # content and delete them
+        see = []
+        seealso = []
+        subentry = []
+        subentry2 = []
+        sortkey = []
+        # Two subentries are allowed, thus the duplication
+        imacros = ["seealso", "see", "subentry", "subentry", "sortkey"]
+        for imacro in imacros:
+            iim = find_token(document.body, "\\begin_inset IndexMacro %s" % imacro, i, j)
+            if iim == -1:
+                continue
+            iime = find_end_of_inset(document.body, iim)
+            if iime == -1:
+                document.warning("Malformed LyX document: Can't find end of index macro inset at line %d" % i)
+                continue
+            iimpl = find_token(document.body, '\\begin_layout Plain Layout', iim, iime)
+            if iimpl == -1:
+                document.warning("Malformed LyX document: Can't find plain layout in index macro inset at line %d" % i)
+                continue
+            iimple = find_end_of_layout(document.body, iimpl)
+            if iimple == -1:
+                document.warning("Malformed LyX document: Can't find end of index macro inset plain layout at line %d" % i)
+                continue
+            icont = document.body[iimpl:iimple]
+            if imacro == "seealso":
+                seealso = icont[1:]
+            elif imacro == "see":
+                see = icont[1:]
+            elif imacro == "subentry":
+                # subentries might hace their own sortkey!
+                xiim = find_token(document.body, "\\begin_inset IndexMacro sortkey", iimpl, iimple)
+                if xiim != -1:
+                    xiime = find_end_of_inset(document.body, xiim)
+                    if xiime == -1:
+                        document.warning("Malformed LyX document: Can't find end of index macro inset at line %d" % i)
+                    else:
+                        xiimpl = find_token(document.body, '\\begin_layout Plain Layout', xiim, xiime)
+                        if xiimpl == -1:
+                            document.warning("Malformed LyX document: Can't find plain layout in index macro inset at line %d" % i)
+                        else:
+                            xiimple = find_end_of_layout(document.body, xiimpl)
+                            if xiimple == -1:
+                                document.warning("Malformed LyX document: Can't find end of index macro inset plain layout at line %d" % i)
+                            else:
+                                # the sortkey
+                                xicont = document.body[xiimpl+1:xiimple]
+                                # everything before ................... or after
+                                xxicont = document.body[iimpl+1:xiim] + document.body[xiime+1:iimple]
+                                # construct the latex sequence
+                                icont = xicont + put_cmd_in_ert("@") + xxicont[1:]
+                if len(subentry) > 0:
+                    subentry2 = icont[1:]
+                else:
+                    subentry = icont[1:]
+            elif imacro == "sortkey":
+                sortkey = icont
+            # Everything stored. Delete subinset.
+            del document.body[iim:iime+1]
+            # Again re-find (potentially moved) index inset end
+            j = find_end_of_inset(document.body, i)
+            if j == -1:
+                document.warning("Malformed LyX document: Can't find end of index inset at line %d" % i)
+                continue
+        # Now insert all stuff, starting from the inset end
+        pl = find_token(document.body, '\\begin_layout Plain Layout', i, j)
+        if pl == -1:
+            document.warning("Malformed LyX document: Can't find plain layout in index inset at line %d" % i)
+            continue
+        ple = find_end_of_layout(document.body, pl)
+        if ple == -1:
+            document.warning("Malformed LyX document: Can't find end of index macro inset plain layout at line %d" % i)
+            continue
+        if len(see) > 0:
+            document.body[ple:ple] = put_cmd_in_ert("|" + pagerange + "see{") + see + put_cmd_in_ert("}")
+        elif len(seealso) > 0:
+            document.body[ple:ple] = put_cmd_in_ert("|" + pagerange + "seealso{") + seealso + put_cmd_in_ert("}")
+        elif pageformat != "default":
+            document.body[ple:ple] = put_cmd_in_ert("|" + pagerange + pageformat)
+        if len(subentry2) > 0:
+            document.body[ple:ple] = put_cmd_in_ert("!") + subentry2
+        if len(subentry) > 0:
+            document.body[ple:ple] = put_cmd_in_ert("!") + subentry
+        if len(sortkey) > 0:
+            document.body[pl:pl+1] = document.body[pl:pl] + sortkey + put_cmd_in_ert("@")
+            
+
  ##
  # Conversion hub
  #
@@ -4304,10 +4616,20 @@ convert = [
             [602, [convert_branch_colors]],
             [603, []],
             [604, []],
-           [605, []]
+           [605, [convert_vcolumns2]],
+           [606, [convert_koma_frontispiece]],
+           [607, []],
+           [608, []],
+           [609, []],
+           [610, []]
            ]
  
-revert =  [[604, [revert_vcolumns2]],
+revert =  [[609, [revert_index_macros]],
+           [608, [revert_document_metadata]],
+           [607, [revert_docbook_mathml_prefix]],
+           [606, [revert_spellchecker_ignore]],
+           [605, [revert_koma_frontispiece]],
+           [604, [revert_vcolumns2]],
             [603, [revert_branch_darkcols]],
             [602, [revert_darkmode_graphics]],
             [601, [revert_branch_colors]],