ePub: share a bit of code for parsing arguments.

[lyx.git] / lib / lyx2lyx / lyx_2_4.py
diff --git a/lib/lyx2lyx/lyx_2_4.py b/lib/lyx2lyx/lyx_2_4.py

index e1a8e6abe5b07f3a9d7375ba9fadb64403b14f96..2ea050e6ee995cfbe984a2de4fe3d2500dfacbdd 100644 (file)
--- a/lib/lyx2lyx/lyx_2_4.py
+++ b/lib/lyx2lyx/lyx_2_4.py
@@ -26,14 +26,13 @@ from datetime import (datetime, date, time)
  
  # Uncomment only what you need to import, please.
  
-from parser_tools import (count_pars_in_inset, del_token, find_end_of_inset,
-    find_end_of_layout, find_token, find_token_backwards, find_token_exact,
-    find_re, get_bool_value,
+from parser_tools import (count_pars_in_inset, del_complete_lines, del_token,
+    find_end_of_inset, find_end_of_layout, find_token, find_token_backwards,
+    find_token_exact, find_re, get_bool_value, get_containing_inset,
      get_containing_layout, get_option_value, get_value, get_quoted_value)
-#    del_value, del_complete_lines,
+#    del_value, 
  #    find_complete_lines, find_end_of,
  #    find_re, find_substring,
-#    get_containing_inset,
  #    is_in_inset, set_bool_value
  #    find_tokens, check_token
  
@@ -3506,16 +3505,13 @@ def revert_totalheight(document):
                  special = m.group(1)
              mspecial = special.split(',')
              for spc in mspecial:
-                if spc[:7] == "height=":
+                if spc.startswith("height="):
                      oldheight = spc.split('=')[1]
                      ms = rxx.search(oldheight)
                      if ms:
-                        oldval = ms.group(1)
                          oldunit = ms.group(2)
-                        if oldval[1] == ".":
-                            oldval = "0" + oldval
                          if oldunit in list(relative_heights.keys()):
-                            oldval = str(float(oldval) * 100)
+                            oldval = str(float(ms.group(1)) * 100)
                              oldunit = relative_heights[oldunit]
                              oldheight = oldval + oldunit
                      mspecial.remove(spc)
@@ -3777,7 +3773,7 @@ def revert_counter_inset(document):
              ert = put_cmd_in_ert("\\setcounter{%s}{\\value{%s}}" % (cnt, savecnt))
          else:
              document.warning("Unknown counter command `%s' in inset at line %d!" % (cnt, i))
-            
+
          if ert:
              document.body[i : j + 1] = ert
          i += 1
@@ -3804,7 +3800,7 @@ def revert_ams_spaces(document):
          subst = put_cmd_in_ert(inset)
          document.body[i : end + 1] = subst
          Found = True
-      
+
      if Found == True:
          # load amsmath in the preamble if not already loaded
          i = find_token(document.header, "\\use_package amsmath 2", 0)
@@ -3815,18 +3811,18 @@ def revert_ams_spaces(document):
  
  def convert_parskip(document):
      " Move old parskip settings to preamble "
-    
+
      i = find_token(document.header, "\\paragraph_separation skip", 0)
      if i == -1:
          return
-    
+
      j = find_token(document.header, "\\defskip", 0)
      if j == -1:
          document.warning("Malformed LyX document! Missing \\defskip.")
          return
-    
+
      val = get_value(document.header, "\\defskip", j)
-    
+
      skipval = "\\medskipamount"
      if val == "smallskip" or val == "medskip" or val == "bigskip":
          skipval = "\\" + val + "amount"
@@ -3834,25 +3830,25 @@ def convert_parskip(document):
          skipval = val
  
      add_to_preamble(document, ["\\setlength{\\parskip}{" + skipval + "}", "\\setlength{\\parindent}{0pt}"])
-    
+
      document.header[i] = "\\paragraph_separation indent"
      document.header[j] = "\\paragraph_indentation default"
  
  
  def revert_parskip(document):
      " Revert new parskip settings to preamble "
-    
+
      i = find_token(document.header, "\\paragraph_separation skip", 0)
      if i == -1:
          return
-    
+
      j = find_token(document.header, "\\defskip", 0)
      if j == -1:
          document.warning("Malformed LyX document! Missing \\defskip.")
          return
-    
+
      val = get_value(document.header, "\\defskip", j)
-    
+
      skipval = ""
      if val == "smallskip" or val == "medskip" or val == "bigskip":
          skipval = "[skip=\\" + val + "amount]"
@@ -3860,9 +3856,9 @@ def revert_parskip(document):
          skipval = "[skip=\\baselineskip]"
      elif val != "halfline":
          skipval = "[skip={" + val + "}]"
-    
+
      add_to_preamble(document, ["\\usepackage" + skipval + "{parskip}"])
-    
+
      document.header[i] = "\\paragraph_separation indent"
      document.header[j] = "\\paragraph_indentation default"
  
@@ -3961,6 +3957,445 @@ def revert_docbook_table_output(document):
          del document.header[i]
  
  
+def revert_nopagebreak(document):
+    while True:
+        i = find_token(document.body, "\\begin_inset Newpage nopagebreak")
+        if i == -1:
+            return
+        end = find_end_of_inset(document.body, i)
+        if end == 1:
+            document.warning("Malformed LyX document: Could not find end of Newpage inset.")
+            continue
+        subst = put_cmd_in_ert("\\nopagebreak{}")
+        document.body[i : end + 1] = subst
+
+
+def revert_hrquotes(document):
+    " Revert Hungarian Quotation marks "
+    
+    i = find_token(document.header, "\\quotes_style hungarian", 0)
+    if i != -1:
+        document.header[i] = "\\quotes_style polish"
+
+    i = 0
+    while True:
+        i = find_token(document.body, "\\begin_inset Quotes h")
+        if i == -1:
+            return
+        if document.body[i] == "\\begin_inset Quotes hld":
+            document.body[i] = "\\begin_inset Quotes pld"
+        elif document.body[i] == "\\begin_inset Quotes hrd":
+            document.body[i] = "\\begin_inset Quotes prd"
+        elif document.body[i] == "\\begin_inset Quotes hls":
+            document.body[i] = "\\begin_inset Quotes ald"
+        elif document.body[i] == "\\begin_inset Quotes hrs":
+            document.body[i] = "\\begin_inset Quotes ard"
+
+
+def convert_math_refs(document):
+    i = 0
+    while True:
+        i = find_token(document.body, "\\begin_inset Formula", i)
+        if i == -1:
+            break
+        j = find_end_of_inset(document.body, i)
+        if j == -1:
+            document.warning("Can't find end of inset at line %d of body!" % i)
+            i += 1
+            continue
+        while i < j:
+            document.body[i] = document.body[i].replace("\\prettyref", "\\formatted")
+            i += 1
+        
+
+def revert_math_refs(document):
+    i = 0
+    while True:
+        i = find_token(document.body, "\\begin_inset Formula", i)
+        if i == -1:
+            break
+        j = find_end_of_inset(document.body, i)
+        if j == -1:
+            document.warning("Can't find end of inset at line %d of body!" % i)
+            i += 1
+            continue
+        while i < j:
+            document.body[i] = document.body[i].replace("\\formatted", "\\prettyref")
+            if "\\labelonly" in document.body[i]:
+                document.body[i] = re.sub("\\\\labelonly{([^}]+?)}", "\\1", document.body[i])
+            i += 1
+
+
+def convert_branch_colors(document):
+    " Convert branch colors to semantic values "
+
+    i = 0
+    while True:
+        i = find_token(document.header, "\\branch", i)
+        if i == -1:
+            break
+        j = find_token(document.header, "\\end_branch", i)
+        if j == -1:
+           document.warning("Malformed LyX document. Can't find end of branch definition!")
+           break
+        # We only support the standard LyX background for now
+        k = find_token(document.header, "\\color #faf0e6", i, j)
+        if k != -1:
+           document.header[k] = "\\color background"
+        i += 1
+
+
+def revert_branch_colors(document):
+    " Revert semantic branch colors "
+
+    i = 0
+    while True:
+        i = find_token(document.header, "\\branch", i)
+        if i == -1:
+            break
+        j = find_token(document.header, "\\end_branch", i)
+        if j == -1:
+           document.warning("Malformed LyX document. Can't find end of branch definition!")
+           break
+        k = find_token(document.header, "\\color", i, j)
+        if k != -1:
+           bcolor = get_value(document.header, "\\color", k)
+           if bcolor[1] != "#":
+               # this will be read as background by LyX 2.3
+               document.header[k] = "\\color none"
+        i += 1
+
+
+def revert_darkmode_graphics(document):
+    " Revert darkModeSensitive InsetGraphics param "
+
+    i = 0
+    while (True):
+        i = find_token(document.body, "\\begin_inset Graphics", i)
+        if i == -1:
+            break
+        j = find_end_of_inset(document.body, i)
+        if j == -1:
+            document.warning("Can't find end of graphics inset at line %d!!" %(i))
+            i += 1
+            continue
+        k = find_token(document.body, "\tdarkModeSensitive", i, j)
+        if k != -1:
+            del document.body[k]
+        i += 1
+
+
+def revert_branch_darkcols(document):
+    " Revert dark branch colors "
+
+    i = 0
+    while True:
+        i = find_token(document.header, "\\branch", i)
+        if i == -1:
+            break
+        j = find_token(document.header, "\\end_branch", i)
+        if j == -1:
+           document.warning("Malformed LyX document. Can't find end of branch definition!")
+           break
+        k = find_token(document.header, "\\color", i, j)
+        if k != -1:
+            m = re.search('\\\\color (\S+) (\S+)', document.header[k])
+            if m:
+                document.header[k] = "\\color " + m.group(1)
+        i += 1
+
+
+def revert_vcolumns2(document):
+    """Revert varwidth columns with line breaks etc."""
+    i = 0
+    needvarwidth = False
+    needarray = False
+    needcellvarwidth = False
+    try:
+        while True:
+            i = find_token(document.body, "\\begin_inset Tabular", i+1)
+            if i == -1:
+                return
+            j = find_end_of_inset(document.body, i)
+            if j == -1:
+                document.warning("Malformed LyX document: Could not find end of tabular.")
+                continue
+
+            # Collect necessary column information
+            m = i + 1
+            nrows = int(document.body[i+1].split('"')[3])
+            ncols = int(document.body[i+1].split('"')[5])
+            col_info = []
+            for k in range(ncols):
+                m = find_token(document.body, "<column", m)
+                width = get_option_value(document.body[m], 'width')
+                varwidth = get_option_value(document.body[m], 'varwidth')
+                alignment = get_option_value(document.body[m], 'alignment')
+                valignment = get_option_value(document.body[m], 'valignment')
+                special = get_option_value(document.body[m], 'special')
+                col_info.append([width, varwidth, alignment, valignment, special, m])
+                m += 1
+
+            # Now parse cells
+            m = i + 1
+            lines = []
+            for row in range(nrows):
+                for col in range(ncols):
+                    m = find_token(document.body, "<cell", m)
+                    multicolumn = get_option_value(document.body[m], 'multicolumn') != ""
+                    multirow = get_option_value(document.body[m], 'multirow') != ""
+                    fixedwidth = get_option_value(document.body[m], 'width') != ""
+                    rotate = get_option_value(document.body[m], 'rotate')
+                    cellalign = get_option_value(document.body[m], 'alignment')
+                    cellvalign = get_option_value(document.body[m], 'valignment')
+                    # Check for: linebreaks, multipars, non-standard environments
+                    begcell = m
+                    endcell = find_token(document.body, "</cell>", begcell)
+                    vcand = False
+                    if find_token(document.body, "\\begin_inset Newline", begcell, endcell) != -1:
+                        vcand = not fixedwidth
+                    elif count_pars_in_inset(document.body, begcell + 2) > 1:
+                        vcand = not fixedwidth
+                    elif get_value(document.body, "\\begin_layout", begcell) != "Plain Layout":
+                        vcand = not fixedwidth
+                    colalignment = col_info[col][2]
+                    colvalignment = col_info[col][3]
+                    if vcand:
+                        if rotate == "" and ((colalignment == "left" and colvalignment == "top") or (multicolumn == True and cellalign == "left" and cellvalign == "top")):
+                            if col_info[col][0] == "" and col_info[col][1] == "" and col_info[col][4] == "":
+                                needvarwidth = True
+                                col_line = col_info[col][5]
+                                needarray = True
+                                vval = "V{\\linewidth}"
+                                if multicolumn:
+                                    document.body[m] = document.body[m][:-1] + " special=\"" + vval + "\">"
+                                else:
+                                    document.body[col_line] = document.body[col_line][:-1] + " special=\"" + vval + "\">"
+                        else:
+                            alarg = ""
+                            if multicolumn or multirow:
+                                if cellvalign == "middle":
+                                    alarg = "[m]"
+                                elif cellvalign == "bottom":
+                                    alarg = "[b]"
+                            else:
+                                if colvalignment == "middle":
+                                    alarg = "[m]"
+                                elif colvalignment == "bottom":
+                                    alarg = "[b]"
+                            flt = find_token(document.body, "\\begin_layout", begcell, endcell)
+                            elt = find_token_backwards(document.body, "\\end_layout", endcell)
+                            if flt != -1 and elt != -1:
+                                extralines = []
+                                # we need to reset character layouts if necessary
+                                el = find_token(document.body, '\\emph on', flt, elt)
+                                if el != -1:
+                                    extralines.append("\\emph default")
+                                el = find_token(document.body, '\\noun on', flt, elt)
+                                if el != -1:
+                                    extralines.append("\\noun default")
+                                el = find_token(document.body, '\\series', flt, elt)
+                                if el != -1:
+                                    extralines.append("\\series default")
+                                el = find_token(document.body, '\\family', flt, elt)
+                                if el != -1:
+                                    extralines.append("\\family default")
+                                el = find_token(document.body, '\\shape', flt, elt)
+                                if el != -1:
+                                    extralines.append("\\shape default")
+                                el = find_token(document.body, '\\color', flt, elt)
+                                if el != -1:
+                                    extralines.append("\\color inherit")
+                                el = find_token(document.body, '\\size', flt, elt)
+                                if el != -1:
+                                    extralines.append("\\size default")
+                                el = find_token(document.body, '\\bar under', flt, elt)
+                                if el != -1:
+                                    extralines.append("\\bar default")
+                                el = find_token(document.body, '\\uuline on', flt, elt)
+                                if el != -1:
+                                    extralines.append("\\uuline default")
+                                el = find_token(document.body, '\\uwave on', flt, elt)
+                                if el != -1:
+                                    extralines.append("\\uwave default")
+                                el = find_token(document.body, '\\strikeout on', flt, elt)
+                                if el != -1:
+                                    extralines.append("\\strikeout default")
+                                document.body[elt:elt+1] = extralines + put_cmd_in_ert("\\end{cellvarwidth}") + ["\end_layout"]
+                                parlang = -1
+                                for q in range(flt, elt):
+                                    if document.body[q] != "" and document.body[q][0] != "\\":
+                                        break
+                                    if document.body[q][:5] == "\\lang":
+                                        parlang = q
+                                        break
+                                if parlang != -1:
+                                    document.body[parlang+1:parlang+1] = put_cmd_in_ert("\\begin{cellvarwidth}" + alarg)
+                                else:
+                                    document.body[flt+1:flt+1] = put_cmd_in_ert("\\begin{cellvarwidth}" + alarg)
+                                needcellvarwidth = True
+                                needvarwidth = True
+                        # ERT newlines and linebreaks (since LyX < 2.4 automatically inserts parboxes
+                        # with newlines, and we do not want that)
+                        while True:
+                            endcell = find_token(document.body, "</cell>", begcell)
+                            linebreak = False
+                            nl = find_token(document.body, "\\begin_inset Newline newline", begcell, endcell)
+                            if nl == -1:
+                                nl = find_token(document.body, "\\begin_inset Newline linebreak", begcell, endcell)
+                                if nl == -1:
+                                     break
+                                linebreak = True
+                            nle = find_end_of_inset(document.body, nl)
+                            del(document.body[nle:nle+1])
+                            if linebreak:
+                                document.body[nl:nl+1] = put_cmd_in_ert("\\linebreak{}")
+                            else:
+                                document.body[nl:nl+1] = put_cmd_in_ert("\\\\")
+                        # Replace parbreaks in multirow with \\endgraf
+                        if multirow == True:
+                            flt = find_token(document.body, "\\begin_layout", begcell, endcell)
+                            if flt != -1:
+                                while True:
+                                    elt = find_end_of_layout(document.body, flt)
+                                    if elt == -1:
+                                        document.warning("Malformed LyX document! Missing layout end.")
+                                        break
+                                    endcell = find_token(document.body, "</cell>", begcell)
+                                    flt = find_token(document.body, "\\begin_layout", elt, endcell)
+                                    if flt == -1:
+                                        break
+                                    document.body[elt : flt + 1] = put_cmd_in_ert("\\endgraf{}")
+                    m += 1
+
+            i = j
+
+    finally:
+        if needarray == True:
+            add_to_preamble(document, ["\\usepackage{array}"])
+        if needcellvarwidth == True:
+            add_to_preamble(document, ["%% Variable width box for table cells",
+                                       "\\newenvironment{cellvarwidth}[1][t]",
+                                       "    {\\begin{varwidth}[#1]{\\linewidth}}",
+                                       "    {\\@finalstrut\\@arstrutbox\\end{varwidth}}"])
+        if needvarwidth == True:
+            add_to_preamble(document, ["\\usepackage{varwidth}"])
+
+
+def convert_vcolumns2(document):
+    """Convert varwidth ERT to native"""
+    i = 0
+    try:
+        while True:
+            i = find_token(document.body, "\\begin_inset Tabular", i+1)
+            if i == -1:
+                return
+            j = find_end_of_inset(document.body, i)
+            if j == -1:
+                document.warning("Malformed LyX document: Could not find end of tabular.")
+                continue
+
+            # Parse cells
+            nrows = int(document.body[i+1].split('"')[3])
+            ncols = int(document.body[i+1].split('"')[5])
+            m = i + 1
+            lines = []
+            for row in range(nrows):
+                for col in range(ncols):
+                    m = find_token(document.body, "<cell", m)
+                    multirow = get_option_value(document.body[m], 'multirow') != ""
+                    begcell = m
+                    endcell = find_token(document.body, "</cell>", begcell)
+                    vcand = False
+                    cvw = find_token(document.body, "begin{cellvarwidth}", begcell, endcell)
+                    if cvw != -1:
+                        vcand = document.body[cvw - 1] == "\\backslash" and get_containing_inset(document.body, cvw)[0] == "ERT"
+                    if vcand:
+                        # Remove ERTs with cellvarwidth env
+                        ecvw = find_token(document.body, "end{cellvarwidth}", begcell, endcell)
+                        if ecvw != -1:
+                            if document.body[ecvw - 1] == "\\backslash":
+                                eertins = get_containing_inset(document.body, ecvw)
+                                if eertins and eertins[0] == "ERT":
+                                    del document.body[eertins[1] : eertins[2] + 1]
+                             
+                        cvw = find_token(document.body, "begin{cellvarwidth}", begcell, endcell)   
+                        ertins = get_containing_inset(document.body, cvw)
+                        if ertins and ertins[0] == "ERT":
+                            del(document.body[ertins[1] : ertins[2] + 1])
+                        
+                        # Convert ERT newlines (as cellvarwidth detection relies on that)
+                        while True:
+                            endcell = find_token(document.body, "</cell>", begcell)
+                            nl = find_token(document.body, "\\backslash", begcell, endcell)
+                            if nl == -1 or document.body[nl + 2] != "\\backslash":
+                                break
+                            ertins = get_containing_inset(document.body, nl)
+                            if ertins and ertins[0] == "ERT":
+                                document.body[ertins[1] : ertins[2] + 1] = ["\\begin_inset Newline newline", "", "\\end_inset"]
+
+                        # Same for linebreaks
+                        while True:
+                            endcell = find_token(document.body, "</cell>", begcell)
+                            nl = find_token(document.body, "linebreak", begcell, endcell)
+                            if nl == -1 or document.body[nl - 1] != "\\backslash":
+                                break
+                            ertins = get_containing_inset(document.body, nl)
+                            if ertins and ertins[0] == "ERT":
+                                document.body[ertins[1] : ertins[2] + 1] = ["\\begin_inset Newline linebreak", "", "\\end_inset"]
+
+                        # And \\endgraf
+                        if multirow == True:
+                            endcell = find_token(document.body, "</cell>", begcell)
+                            nl = find_token(document.body, "endgraf{}", begcell, endcell)
+                            if nl == -1 or document.body[nl - 1] != "\\backslash":
+                                break
+                            ertins = get_containing_inset(document.body, nl)
+                            if ertins and ertins[0] == "ERT":
+                                    document.body[ertins[1] : ertins[2] + 1] = ["\\end_layout", "", "\\begin_layout Plain Layout"]
+                    m += 1
+
+            i += 1
+
+    finally:
+        del_complete_lines(document.preamble,
+                                ['% Added by lyx2lyx',
+                                 '%% Variable width box for table cells',
+                                 r'\newenvironment{cellvarwidth}[1][t]',
+                                 r'    {\begin{varwidth}[#1]{\linewidth}}',
+                                 r'    {\@finalstrut\@arstrutbox\end{varwidth}}'])
+        del_complete_lines(document.preamble,
+                                ['% Added by lyx2lyx',
+                                 r'\usepackage{varwidth}'])
+
+
+frontispiece_def = [
+    r'### Inserted by lyx2lyx (frontispiece layout) ###',
+    r'Style Frontispiece',
+    r'  CopyStyle             Titlehead',
+    r'  LatexName             frontispiece',
+    r'End',
+]
+
+
+def convert_koma_frontispiece(document):
+    """Remove local KOMA frontispiece definition"""
+    if document.textclass[:3] != "scr":
+        return
+
+    if document.del_local_layout(frontispiece_def):
+        document.add_module("ruby")
+
+
+def revert_koma_frontispiece(document):
+    """Add local KOMA frontispiece definition"""
+    if document.textclass[:3] != "scr":
+        return
+
+    if find_token(document.body, "\\begin_layout Frontispiece", 0) != -1:
+        document.append_local_layout(frontispiece_def)
+
+
  ##
  # Conversion hub
  #
@@ -4020,10 +4455,27 @@ convert = [
             [595, []],
             [596, [convert_parskip]],
             [597, [convert_libertinus_rm_fonts]],
-           [598, []]
+           [598, []],
+           [599, []],
+           [600, []],
+           [601, [convert_math_refs]],
+           [602, [convert_branch_colors]],
+           [603, []],
+           [604, []],
+           [605, [convert_vcolumns2]],
+           [606, [convert_koma_frontispiece]]
            ]
  
-revert =  [[597, [revert_docbook_table_output]],
+revert =  [[605, [revert_koma_frontispiece]],
+           [604, [revert_vcolumns2]],
+           [603, [revert_branch_darkcols]],
+           [602, [revert_darkmode_graphics]],
+           [601, [revert_branch_colors]],
+           [600, []],
+           [599, [revert_math_refs]],
+           [598, [revert_hrquotes]],
+           [598, [revert_nopagebreak]],
+           [597, [revert_docbook_table_output]],
             [596, [revert_libertinus_rm_fonts,revert_libertinus_sftt_fonts]],
             [595, [revert_parskip,revert_line_vspaces]],
             [594, [revert_ams_spaces]],