Pass parameters by reference (performance)

[lyx.git] / lib / lyx2lyx / lyx_2_2.py
diff --git a/lib/lyx2lyx/lyx_2_2.py b/lib/lyx2lyx/lyx_2_2.py

index 5b1979b4e6d8948b6287981c8655b252dae99b23..82f12e26cbe28e09511715f3d9e00a5bfbcf551f 100644 (file)
--- a/lib/lyx2lyx/lyx_2_2.py
+++ b/lib/lyx2lyx/lyx_2_2.py
@@ -30,10 +30,13 @@ import sys, os
  #  find_token_backwards, is_in_inset, get_value, get_quoted_value, \
  #  del_token, check_token, get_option_value
    
-#from lyx2lyx_tools import add_to_preamble, insert_to_preamble, \
-#  put_cmd_in_ert, lyx2latex, latex_length, revert_flex_inset, \
+from lyx2lyx_tools import add_to_preamble, put_cmd_in_ert#, \
+#  insert_to_preamble, lyx2latex, latex_length, revert_flex_inset, \
  #  revert_font_attrs, hex2ratio, str2bool
  
+from parser_tools import find_token, find_token_backwards, find_re, \
+     find_end_of_inset, find_end_of_layout, find_nonempty_line, \
+     get_containing_layout, get_value, check_token
  
  ###############################################################################
  ###
@@ -41,16 +44,635 @@ import sys, os
  ###
  ###############################################################################
  
+def convert_separator(document):
+    """
+    Convert layout separators to separator insets and add (LaTeX) paragraph
+    breaks in order to mimic previous LaTeX export.
+    """
+
+    parins = ["\\begin_inset Separator parbreak", "\\end_inset", ""]
+    parlay = ["\\begin_layout Standard", "\\begin_inset Separator parbreak",
+              "\\end_inset", "", "\\end_layout", ""]
+    sty_dict = {
+        "family" : "default",
+        "series" : "default",
+        "shape"  : "default",
+        "size"   : "default",
+        "bar"    : "default",
+        "color"  : "inherit"
+        }
+
+    i = 0
+    while 1:
+        i = find_token(document.body, "\\begin_deeper", i)
+        if i == -1:
+            break
+
+        j = find_token_backwards(document.body, "\\end_layout", i-1)
+        if j != -1:
+            # reset any text style before inserting the inset
+            lay = get_containing_layout(document.body, j-1)
+            if lay != False:
+                content = "\n".join(document.body[lay[1]:lay[2]])
+                for val in sty_dict.keys():
+                    if content.find("\\%s" % val) != -1:
+                        document.body[j:j] = ["\\%s %s" % (val, sty_dict[val])]
+                        i = i + 1
+                        j = j + 1
+            document.body[j:j] = parins
+            i = i + len(parins) + 1
+        else:
+            i = i + 1
+
+    i = 0
+    while 1:
+        i = find_token(document.body, "\\align", i)
+        if i == -1:
+            break
+
+        lay = get_containing_layout(document.body, i)
+        if lay != False and lay[0] == "Plain Layout":
+            i = i + 1
+            continue
+
+        j = find_token_backwards(document.body, "\\end_layout", i-1)
+        if j != -1:
+            lay = get_containing_layout(document.body, j-1)
+            if lay != False and lay[0] == "Standard" \
+               and find_token(document.body, "\\align", lay[1], lay[2]) == -1 \
+               and find_token(document.body, "\\begin_inset VSpace", lay[1], lay[2]) == -1:
+                # reset any text style before inserting the inset
+                content = "\n".join(document.body[lay[1]:lay[2]])
+                for val in sty_dict.keys():
+                    if content.find("\\%s" % val) != -1:
+                        document.body[j:j] = ["\\%s %s" % (val, sty_dict[val])]
+                        i = i + 1
+                        j = j + 1
+                document.body[j:j] = parins
+                i = i + len(parins) + 1
+            else:
+                i = i + 1
+        else:
+            i = i + 1
+
+    regexp = re.compile(r'^\\begin_layout (?:(-*)|(\s*))(Separator|EndOfSlide)(?:(-*)|(\s*))$', re.IGNORECASE)
+
+    i = 0
+    while 1:
+        i = find_re(document.body, regexp, i)
+        if i == -1:
+            return
+
+        j = find_end_of_layout(document.body, i)
+        if j == -1:
+            document.warning("Malformed LyX document: Missing `\\end_layout'.")
+            return
+
+        lay = get_containing_layout(document.body, j-1)
+        if lay != False:
+            lines = document.body[lay[3]:lay[2]]
+        else:
+            lines = []
+
+        document.body[i:j+1] = parlay
+        if len(lines) > 0:
+            document.body[i+1:i+1] = lines
+
+        i = i + len(parlay) + len(lines) + 1
+
+
+def revert_separator(document):
+    " Revert separator insets to layout separators "
+
+    beamer_classes = ["beamer", "article-beamer", "scrarticle-beamer"]
+    if document.textclass in beamer_classes:
+        beglaysep = "\\begin_layout Separator"
+    else:
+        beglaysep = "\\begin_layout --Separator--"
+
+    parsep = [beglaysep, "", "\\end_layout", ""]
+    comert = ["\\begin_inset ERT", "status collapsed", "",
+              "\\begin_layout Plain Layout", "%", "\\end_layout",
+              "", "\\end_inset", ""]
+    empert = ["\\begin_inset ERT", "status collapsed", "",
+              "\\begin_layout Plain Layout", " ", "\\end_layout",
+              "", "\\end_inset", ""]
+
+    i = 0
+    while 1:
+        i = find_token(document.body, "\\begin_inset Separator", i)
+        if i == -1:
+            return
+
+        lay = get_containing_layout(document.body, i)
+        if lay == False:
+            document.warning("Malformed LyX document: Can't convert separator inset at line " + str(i))
+            i = i + 1
+            continue
+
+        layoutname = lay[0]
+        beg = lay[1]
+        end = lay[2]
+        kind = get_value(document.body, "\\begin_inset Separator", i, i+1, "plain").split()[1]
+        before = document.body[beg+1:i]
+        something_before = len(before) > 0 and len("".join(before)) > 0
+        j = find_end_of_inset(document.body, i)
+        after = document.body[j+1:end]
+        something_after = len(after) > 0 and len("".join(after)) > 0
+        if kind == "plain":
+            beg = beg + len(before) + 1
+        elif something_before:
+            document.body[i:i] = ["\\end_layout", ""]
+            i = i + 2
+            j = j + 2
+            beg = i
+            end = end + 2
+
+        if kind == "plain":
+            if something_after:
+                document.body[beg:j+1] = empert
+                i = i + len(empert)
+            else:
+                document.body[beg:j+1] = comert
+                i = i + len(comert)
+        else:
+            if something_after:
+                if layoutname == "Standard":
+                    if not something_before:
+                        document.body[beg:j+1] = parsep
+                        i = i + len(parsep)
+                        document.body[i:i] = ["", "\\begin_layout Standard"]
+                        i = i + 2
+                    else:
+                        document.body[beg:j+1] = ["\\begin_layout Standard"]
+                        i = i + 1
+                else:
+                    document.body[beg:j+1] = ["\\begin_deeper"]
+                    i = i + 1
+                    end = end + 1 - (j + 1 - beg)
+                    if not something_before:
+                        document.body[i:i] = parsep
+                        i = i + len(parsep)
+                        end = end + len(parsep)
+                    document.body[i:i] = ["\\begin_layout Standard"]
+                    document.body[end+2:end+2] = ["", "\\end_deeper", ""]
+                    i = i + 4
+            else:
+                next_par_is_aligned = False
+                k = find_nonempty_line(document.body, end+1)
+                if k != -1 and check_token(document.body[k], "\\begin_layout"):
+                    lay = get_containing_layout(document.body, k)
+                    next_par_is_aligned = lay != False and \
+                            find_token(document.body, "\\align", lay[1], lay[2]) != -1
+                if k != -1 and not next_par_is_aligned \
+                        and not check_token(document.body[k], "\\end_deeper") \
+                        and not check_token(document.body[k], "\\begin_deeper"):
+                    if layoutname == "Standard":
+                        document.body[beg:j+1] = [beglaysep]
+                        i = i + 1
+                    else:
+                        document.body[beg:j+1] = ["\\begin_deeper", beglaysep]
+                        end = end + 2 - (j + 1 - beg)
+                        document.body[end+1:end+1] = ["", "\\end_deeper", ""]
+                        i = i + 3
+                else:
+                    if something_before:
+                        del document.body[i:end+1]
+                    else:
+                        del document.body[i:end-1]
+
+        i = i + 1
+
+
+def revert_smash(document):
+    " Set amsmath to on if smash commands are used "
+
+    commands = ["smash[t]", "smash[b]", "notag"]
+    i = find_token(document.header, "\\use_package amsmath", 0)
+    if i == -1:
+        document.warning("Malformed LyX document: Can't find \\use_package amsmath.")
+        return;
+    value = get_value(document.header, "\\use_package amsmath", i).split()[1]
+    if value != "1":
+        # nothing to do if package is not auto but on or off
+        return;
+    j = 0
+    while True:
+        j = find_token(document.body, '\\begin_inset Formula', j)
+        if j == -1:
+            return
+        k = find_end_of_inset(document.body, j)
+        if k == -1:
+            document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(j))
+            j += 1
+            continue
+        code = "\n".join(document.body[j:k])
+        for c in commands:
+            if code.find("\\%s" % c) != -1:
+                # set amsmath to on, since it is loaded by the newer format
+                document.header[i] = "\\use_package amsmath 2"
+                return
+        j = k
+
+
+def revert_swissgerman(document):
+    " Set language german-ch-old to german "
+    i = 0
+    if document.language == "german-ch-old":
+        document.language = "german"
+        i = find_token(document.header, "\\language", 0)
+        if i != -1:
+            document.header[i] = "\\language german"
+    j = 0
+    while True:
+        j = find_token(document.body, "\\lang german-ch-old", j)
+        if j == -1:
+            return
+        document.body[j] = document.body[j].replace("\\lang german-ch-old", "\\lang german")
+        j = j + 1
+
+
+def revert_use_package(document, pkg, commands, oldauto):
+    # oldauto defines how the version we are reverting to behaves:
+    # if it is true, the old version uses the package automatically.
+    # if it is false, the old version never uses the package.
+    regexp = re.compile(r'(\\use_package\s+%s)' % pkg)
+    i = find_re(document.header, regexp, 0)
+    value = "1" # default is auto
+    if i != -1:
+        value = get_value(document.header, "\\use_package" , i).split()[1]
+        del document.header[i]
+    if value == "2": # on
+        add_to_preamble(document, ["\\usepackage{" + pkg + "}"])
+    elif value == "1" and not oldauto: # auto
+        i = 0
+        while True:
+            i = find_token(document.body, '\\begin_inset Formula', i)
+            if i == -1:
+                return
+            j = find_end_of_inset(document.body, i)
+            if j == -1:
+                document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(i))
+                i += 1
+                continue
+            code = "\n".join(document.body[i:j])
+            for c in commands:
+                if code.find("\\%s" % c) != -1:
+                    add_to_preamble(document, ["\\usepackage{" + pkg + "}"])
+                    return
+            i = j
+
+
+mathtools_commands = ["xhookrightarrow", "xhookleftarrow", "xRightarrow", \
+                "xrightharpoondown", "xrightharpoonup", "xrightleftharpoons", \
+                "xLeftarrow", "xleftharpoondown", "xleftharpoonup", \
+                "xleftrightarrow", "xLeftrightarrow", "xleftrightharpoons", \
+                "xmapsto"]
+
+def revert_xarrow(document):
+    "remove use_package mathtools"
+    revert_use_package(document, "mathtools", mathtools_commands, False)
+
+
+def revert_beamer_lemma(document):
+    " Reverts beamer lemma layout to ERT "
+    
+    beamer_classes = ["beamer", "article-beamer", "scrarticle-beamer"]
+    if document.textclass not in beamer_classes:
+        return
+
+    consecutive = False
+    i = 0
+    while True:
+        i = find_token(document.body, "\\begin_layout Lemma", i)
+        if i == -1:
+            return
+        j = find_end_of_layout(document.body, i)
+        if j == -1:
+            document.warning("Malformed LyX document: Can't find end of Lemma layout")
+            i += 1
+            continue
+        arg1 = find_token(document.body, "\\begin_inset Argument 1", i, j)
+        endarg1 = find_end_of_inset(document.body, arg1)
+        arg2 = find_token(document.body, "\\begin_inset Argument 2", i, j)
+        endarg2 = find_end_of_inset(document.body, arg2)
+        subst1 = []
+        subst2 = []
+        if arg1 != -1:
+            beginPlain1 = find_token(document.body, "\\begin_layout Plain Layout", arg1, endarg1)
+            if beginPlain1 == -1:
+                document.warning("Malformed LyX document: Can't find arg1 plain Layout")
+                i += 1
+                continue
+            endPlain1 = find_end_of_inset(document.body, beginPlain1)
+            content1 = document.body[beginPlain1 + 1 : endPlain1 - 2]
+            subst1 = put_cmd_in_ert("<") + content1 + put_cmd_in_ert(">")
+        if arg2 != -1:
+            beginPlain2 = find_token(document.body, "\\begin_layout Plain Layout", arg2, endarg2)
+            if beginPlain2 == -1:
+                document.warning("Malformed LyX document: Can't find arg2 plain Layout")
+                i += 1
+                continue
+            endPlain2 = find_end_of_inset(document.body, beginPlain2)
+            content2 = document.body[beginPlain2 + 1 : endPlain2 - 2]
+            subst2 = put_cmd_in_ert("[") + content2 + put_cmd_in_ert("]")
+
+        # remove Arg insets
+        if arg1 < arg2:
+            del document.body[arg2 : endarg2 + 1]
+            if arg1 != -1:
+                del document.body[arg1 : endarg1 + 1]
+        if arg2 < arg1:
+            del document.body[arg1 : endarg1 + 1]
+            if arg2 != -1:
+                del document.body[arg2 : endarg2 + 1]
+
+        # index of end layout has probably changed
+        j = find_end_of_layout(document.body, i)
+        if j == -1:
+            document.warning("Malformed LyX document: Can't find end of Lemma layout")
+            i += 1
+            continue
+
+        begcmd = []
+
+        # if this is not a consecutive env, add start command
+        if not consecutive:
+            begcmd = put_cmd_in_ert("\\begin{lemma}")
+
+        # has this a consecutive lemma?
+        consecutive = document.body[j + 2] == "\\begin_layout Lemma"
+
+        # if this is not followed by a consecutive env, add end command
+        if not consecutive:
+            document.body[j : j + 1] = put_cmd_in_ert("\\end{lemma}") + ["\\end_layout"]
+
+        document.body[i : i + 1] = ["\\begin_layout Standard", ""] + begcmd + subst1 + subst2
+
+        i = j
+
+
+
+def revert_question_env(document):
+    """
+    Reverts question and question* environments of
+    theorems-ams-extended-bytype module to ERT
+    """
+
+    # Do we use theorems-ams-extended-bytype module?
+    have_mod = False
+    mods = document.get_module_list()
+    for mod in mods:
+        if mod == "theorems-ams-extended-bytype":
+            have_mod = True
+            continue
+
+    if not have_mod:
+        return
+
+    consecutive = False
+    i = 0
+    while True:
+        i = find_token(document.body, "\\begin_layout Question", i)
+        if i == -1:
+            return
+
+        starred = document.body[i] == "\\begin_layout Question*"
+
+        j = find_end_of_layout(document.body, i)
+        if j == -1:
+            document.warning("Malformed LyX document: Can't find end of Question layout")
+            i += 1
+            continue
+
+        # if this is not a consecutive env, add start command
+        begcmd = []
+        if not consecutive:
+            if starred:
+                begcmd = put_cmd_in_ert("\\begin{question*}")
+            else:
+                begcmd = put_cmd_in_ert("\\begin{question}")
+
+        # has this a consecutive theorem of same type?
+        consecutive = False
+        if starred:
+            consecutive = document.body[j + 2] == "\\begin_layout Question*"
+        else:
+            consecutive = document.body[j + 2] == "\\begin_layout Question"
+
+        # if this is not followed by a consecutive env, add end command
+        if not consecutive:
+            if starred:
+                document.body[j : j + 1] = put_cmd_in_ert("\\end{question*}") + ["\\end_layout"]
+            else:
+                document.body[j : j + 1] = put_cmd_in_ert("\\end{question}") + ["\\end_layout"]
+
+        document.body[i : i + 1] = ["\\begin_layout Standard", ""] + begcmd
+
+        add_to_preamble(document, "\\providecommand{\questionname}{Question}")
+
+        if starred:
+            add_to_preamble(document, "\\theoremstyle{plain}\n" \
+                                      "\\newtheorem*{question*}{\\protect\\questionname}")
+        else:
+            add_to_preamble(document, "\\theoremstyle{plain}\n" \
+                                      "\\newtheorem{question}{\\protect\\questionname}")
+
+        i = j
+
+
+def convert_dashes(document):
+    "convert -- and --- to \\twohyphens and \\threehyphens"
+
+    if document.backend != "latex":
+        return
+
+    i = 0
+    while i < len(document.body):
+        words = document.body[i].split()
+        if len(words) > 1 and words[0] == "\\begin_inset" and \
+           words[1] in ["CommandInset", "ERT", "External", "Formula", "Graphics", "IPA", "listings"]:
+            # must not replace anything in insets that store LaTeX contents in .lyx files
+            # (math and command insets withut overridden read() and write() methods
+            # filtering out IPA makes Text::readParToken() more simple
+            # skip ERT as well since it is not needed there
+            j = find_end_of_inset(document.body, i)
+            if j == -1:
+                document.warning("Malformed LyX document: Can't find end of " + words[1] + " inset at line " + str(i))
+                i += 1
+            else:
+                i = j
+            continue
+        while True:
+            j = document.body[i].find("--")
+            if j == -1:
+                break
+            front = document.body[i][:j]
+            back = document.body[i][j+2:]
+            # We can have an arbitrary number of consecutive hyphens.
+            # These must be split into the corresponding number of two and three hyphens
+            # We must match what LaTeX does: First try emdash, then endash, then single hyphen
+            if back.find("-") == 0:
+                back = back[1:]
+                if len(back) > 0:
+                    document.body.insert(i+1, back)
+                document.body[i] = front + "\\threehyphens"
+            else:
+                if len(back) > 0:
+                    document.body.insert(i+1, back)
+                document.body[i] = front + "\\twohyphens"
+        i += 1
+
+
+def revert_dashes(document):
+    "convert \\twohyphens and \\threehyphens to -- and ---"
+
+    i = 0
+    while i < len(document.body):
+        words = document.body[i].split()
+        if len(words) > 1 and words[0] == "\\begin_inset" and \
+           words[1] in ["CommandInset", "ERT", "External", "Formula", "Graphics", "IPA", "listings"]:
+            # see convert_dashes
+            j = find_end_of_inset(document.body, i)
+            if j == -1:
+                document.warning("Malformed LyX document: Can't find end of " + words[1] + " inset at line " + str(i))
+                i += 1
+            else:
+                i = j
+            continue
+        replaced = False
+        if document.body[i].find("\\twohyphens") >= 0:
+            document.body[i] = document.body[i].replace("\\twohyphens", "--")
+            replaced = True
+        if document.body[i].find("\\threehyphens") >= 0:
+            document.body[i] = document.body[i].replace("\\threehyphens", "---")
+            replaced = True
+        if replaced and i+1 < len(document.body) and \
+           (document.body[i+1].find("\\") != 0 or \
+            document.body[i+1].find("\\twohyphens") == 0 or
+            document.body[i+1].find("\\threehyphens") == 0) and \
+           len(document.body[i]) + len(document.body[i+1]) <= 80:
+            document.body[i] = document.body[i] + document.body[i+1]
+            document.body[i+1:i+2] = []
+        else:
+            i += 1
+
+  
+# order is important for the last three!
+phrases = ["LyX", "LaTeX2e", "LaTeX", "TeX"]
+
+def is_part_of_converted_phrase(line, j, phrase):
+    "is phrase part of an already converted phrase?"
+    for p in phrases:
+        converted = "\\SpecialCharNoPassThru \\" + p
+        pos = j + len(phrase) - len(converted)
+        if pos >= 0:
+            if line[pos:pos+len(converted)] == converted:
+                return True
+    return False
+
+
+def convert_phrases(document):
+    "convert special phrases from plain text to \\SpecialCharNoPassThru"
+
+    if document.backend != "latex":
+        return
+
+    for phrase in phrases:
+        i = 0
+        while i < len(document.body):
+            words = document.body[i].split()
+            if len(words) > 1 and words[0] == "\\begin_inset" and \
+               words[1] in ["CommandInset", "External", "Formula", "Graphics", "listings"]:
+                # must not replace anything in insets that store LaTeX contents in .lyx files
+                # (math and command insets withut overridden read() and write() methods
+                j = find_end_of_inset(document.body, i)
+                if j == -1:
+                    document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(i))
+                    i += 1
+                else:
+                    i = j
+                continue
+            if document.body[i].find("\\") == 0:
+                i += 1
+                continue
+            j = document.body[i].find(phrase)
+            if j == -1:
+                i += 1
+                continue
+            if not is_part_of_converted_phrase(document.body[i], j, phrase):
+                front = document.body[i][:j]
+                back = document.body[i][j+len(phrase):]
+                if len(back) > 0:
+                    document.body.insert(i+1, back)
+                # We cannot use SpecialChar since we do not know whether we are outside passThru 
+                document.body[i] = front + "\\SpecialCharNoPassThru \\" + phrase
+            i += 1
+
+
+def revert_phrases(document):
+    "convert special phrases to plain text"
+
+    i = 0
+    while i < len(document.body):
+        words = document.body[i].split()
+        if len(words) > 1 and words[0] == "\\begin_inset" and \
+           words[1] in ["CommandInset", "External", "Formula", "Graphics", "listings"]:
+            # see convert_phrases
+            j = find_end_of_inset(document.body, i)
+            if j == -1:
+                document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(i))
+                i += 1
+            else:
+                i = j
+            continue
+        replaced = False
+        for phrase in phrases:
+            # we can replace SpecialChar since LyX ensures that it cannot be inserted into passThru parts
+            if document.body[i].find("\\SpecialChar \\" + phrase) >= 0:
+                document.body[i] = document.body[i].replace("\\SpecialChar \\" + phrase, phrase)
+                replaced = True
+            if document.body[i].find("\\SpecialCharNoPassThru \\" + phrase) >= 0:
+                document.body[i] = document.body[i].replace("\\SpecialCharNoPassThru \\" + phrase, phrase)
+                replaced = True
+        if replaced and i+1 < len(document.body) and \
+           (document.body[i+1].find("\\") != 0 or \
+            document.body[i+1].find("\\SpecialChar") == 0) and \
+           len(document.body[i]) + len(document.body[i+1]) <= 80:
+            document.body[i] = document.body[i] + document.body[i+1]
+            document.body[i+1:i+2] = []
+            i -= 1
+        i += 1
+
  
  ##
  # Conversion hub
  #
  
-supported_versions = ["2.2.0","2.2"]
-convert = [#[475, []]
+supported_versions = ["2.2.0", "2.2"]
+convert = [
+           [475, [convert_separator]],
+           # nothing to do for 476: We consider it a bug that older versions
+           # did not load amsmath automatically for these commands, and do not
+           # want to hardcode amsmath off.
+           [476, []],
+           [477, []],
+           [478, []],
+           [479, []],
+           [480, []],
+           [481, [convert_dashes]],
+           [482, [convert_phrases]]
            ]
  
-revert =  [#[474, []]
+revert =  [
+           [481, [revert_phrases]],
+           [480, [revert_dashes]],
+           [479, [revert_question_env]],
+           [478, [revert_beamer_lemma]],
+           [477, [revert_xarrow]],
+           [476, [revert_swissgerman]],
+           [475, [revert_smash]],
+           [474, [revert_separator]]
            ]