Try to fix bug #9587 correctly.

[lyx.git] / lib / lyx2lyx / lyx_2_2.py
diff --git a/lib/lyx2lyx/lyx_2_2.py b/lib/lyx2lyx/lyx_2_2.py

index eeea01e436d3612ae6468009031a8f1434cf1fe3..cd69744ea9fb985d87d52d5b0d8bfdd703f5df2b 100644 (file)
--- a/lib/lyx2lyx/lyx_2_2.py
+++ b/lib/lyx2lyx/lyx_2_2.py
@@ -29,9 +29,9 @@ import sys, os
  #  find_token_exact, find_end_of_inset, find_end_of_layout, \
  #  find_token_backwards, is_in_inset, get_value, get_quoted_value, \
  #  del_token, check_token, get_option_value
-  
-from lyx2lyx_tools import add_to_preamble, put_cmd_in_ert#, \
-#  insert_to_preamble, lyx2latex, latex_length, revert_flex_inset, \
+
+from lyx2lyx_tools import add_to_preamble, put_cmd_in_ert, lyx2latex#, \
+#  insert_to_preamble, latex_length, revert_flex_inset, \
  #  revert_font_attrs, hex2ratio, str2bool
  
  from parser_tools import find_token, find_token_backwards, find_re, \
@@ -74,7 +74,7 @@ def convert_separator(document):
              lay = get_containing_layout(document.body, j-1)
              if lay != False:
                  content = "\n".join(document.body[lay[1]:lay[2]])
-                for val in sty_dict.keys():
+                for val in list(sty_dict.keys()):
                      if content.find("\\%s" % val) != -1:
                          document.body[j:j] = ["\\%s %s" % (val, sty_dict[val])]
                          i = i + 1
@@ -103,7 +103,7 @@ def convert_separator(document):
                 and find_token(document.body, "\\begin_inset VSpace", lay[1], lay[2]) == -1:
                  # reset any text style before inserting the inset
                  content = "\n".join(document.body[lay[1]:lay[2]])
-                for val in sty_dict.keys():
+                for val in list(sty_dict.keys()):
                      if content.find("\\%s" % val) != -1:
                          document.body[j:j] = ["\\%s %s" % (val, sty_dict[val])]
                          i = i + 1
@@ -336,7 +336,7 @@ def revert_xarrow(document):
  
  def revert_beamer_lemma(document):
      " Reverts beamer lemma layout to ERT "
-    
+
      beamer_classes = ["beamer", "article-beamer", "scrarticle-beamer"]
      if document.textclass not in beamer_classes:
          return
@@ -480,7 +480,1026 @@ def revert_question_env(document):
  
          i = j
  
-  
+
+def convert_dashes(document):
+    "convert -- and --- to \\twohyphens and \\threehyphens"
+
+    if document.backend != "latex":
+        return
+
+    i = 0
+    while i < len(document.body):
+        words = document.body[i].split()
+        if len(words) > 1 and words[0] == "\\begin_inset" and \
+           words[1] in ["CommandInset", "ERT", "External", "Formula", "Graphics", "IPA", "listings"]:
+            # must not replace anything in insets that store LaTeX contents in .lyx files
+            # (math and command insets withut overridden read() and write() methods
+            # filtering out IPA makes Text::readParToken() more simple
+            # skip ERT as well since it is not needed there
+            j = find_end_of_inset(document.body, i)
+            if j == -1:
+                document.warning("Malformed LyX document: Can't find end of " + words[1] + " inset at line " + str(i))
+                i += 1
+            else:
+                i = j
+            continue
+        while True:
+            j = document.body[i].find("--")
+            if j == -1:
+                break
+            front = document.body[i][:j]
+            back = document.body[i][j+2:]
+            # We can have an arbitrary number of consecutive hyphens.
+            # These must be split into the corresponding number of two and three hyphens
+            # We must match what LaTeX does: First try emdash, then endash, then single hyphen
+            if back.find("-") == 0:
+                back = back[1:]
+                if len(back) > 0:
+                    document.body.insert(i+1, back)
+                document.body[i] = front + "\\threehyphens"
+            else:
+                if len(back) > 0:
+                    document.body.insert(i+1, back)
+                document.body[i] = front + "\\twohyphens"
+        i += 1
+
+
+def revert_dashes(document):
+    "convert \\twohyphens and \\threehyphens to -- and ---"
+
+    i = 0
+    while i < len(document.body):
+        words = document.body[i].split()
+        if len(words) > 1 and words[0] == "\\begin_inset" and \
+           words[1] in ["CommandInset", "ERT", "External", "Formula", "Graphics", "IPA", "listings"]:
+            # see convert_dashes
+            j = find_end_of_inset(document.body, i)
+            if j == -1:
+                document.warning("Malformed LyX document: Can't find end of " + words[1] + " inset at line " + str(i))
+                i += 1
+            else:
+                i = j
+            continue
+        replaced = False
+        if document.body[i].find("\\twohyphens") >= 0:
+            document.body[i] = document.body[i].replace("\\twohyphens", "--")
+            replaced = True
+        if document.body[i].find("\\threehyphens") >= 0:
+            document.body[i] = document.body[i].replace("\\threehyphens", "---")
+            replaced = True
+        if replaced and i+1 < len(document.body) and \
+           (document.body[i+1].find("\\") != 0 or \
+            document.body[i+1].find("\\twohyphens") == 0 or
+            document.body[i+1].find("\\threehyphens") == 0) and \
+           len(document.body[i]) + len(document.body[i+1]) <= 80:
+            document.body[i] = document.body[i] + document.body[i+1]
+            document.body[i+1:i+2] = []
+        else:
+            i += 1
+
+
+# order is important for the last three!
+phrases = ["LyX", "LaTeX2e", "LaTeX", "TeX"]
+
+def is_part_of_converted_phrase(line, j, phrase):
+    "is phrase part of an already converted phrase?"
+    for p in phrases:
+        converted = "\\SpecialCharNoPassThru \\" + p
+        pos = j + len(phrase) - len(converted)
+        if pos >= 0:
+            if line[pos:pos+len(converted)] == converted:
+                return True
+    return False
+
+
+def convert_phrases(document):
+    "convert special phrases from plain text to \\SpecialCharNoPassThru"
+
+    if document.backend != "latex":
+        return
+
+    for phrase in phrases:
+        i = 0
+        while i < len(document.body):
+            words = document.body[i].split()
+            if len(words) > 1 and words[0] == "\\begin_inset" and \
+               words[1] in ["CommandInset", "External", "Formula", "Graphics", "listings"]:
+                # must not replace anything in insets that store LaTeX contents in .lyx files
+                # (math and command insets withut overridden read() and write() methods
+                j = find_end_of_inset(document.body, i)
+                if j == -1:
+                    document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(i))
+                    i += 1
+                else:
+                    i = j
+                continue
+            if document.body[i].find("\\") == 0:
+                i += 1
+                continue
+            j = document.body[i].find(phrase)
+            if j == -1:
+                i += 1
+                continue
+            if not is_part_of_converted_phrase(document.body[i], j, phrase):
+                front = document.body[i][:j]
+                back = document.body[i][j+len(phrase):]
+                if len(back) > 0:
+                    document.body.insert(i+1, back)
+                # We cannot use SpecialChar since we do not know whether we are outside passThru
+                document.body[i] = front + "\\SpecialCharNoPassThru \\" + phrase
+            i += 1
+
+
+def revert_phrases(document):
+    "convert special phrases to plain text"
+
+    i = 0
+    while i < len(document.body):
+        words = document.body[i].split()
+        if len(words) > 1 and words[0] == "\\begin_inset" and \
+           words[1] in ["CommandInset", "External", "Formula", "Graphics", "listings"]:
+            # see convert_phrases
+            j = find_end_of_inset(document.body, i)
+            if j == -1:
+                document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(i))
+                i += 1
+            else:
+                i = j
+            continue
+        replaced = False
+        for phrase in phrases:
+            # we can replace SpecialChar since LyX ensures that it cannot be inserted into passThru parts
+            if document.body[i].find("\\SpecialChar \\" + phrase) >= 0:
+                document.body[i] = document.body[i].replace("\\SpecialChar \\" + phrase, phrase)
+                replaced = True
+            if document.body[i].find("\\SpecialCharNoPassThru \\" + phrase) >= 0:
+                document.body[i] = document.body[i].replace("\\SpecialCharNoPassThru \\" + phrase, phrase)
+                replaced = True
+        if replaced and i+1 < len(document.body) and \
+           (document.body[i+1].find("\\") != 0 or \
+            document.body[i+1].find("\\SpecialChar") == 0) and \
+           len(document.body[i]) + len(document.body[i+1]) <= 80:
+            document.body[i] = document.body[i] + document.body[i+1]
+            document.body[i+1:i+2] = []
+            i -= 1
+        i += 1
+
+
+def convert_specialchar_internal(document, forward):
+    specialchars = {"\\-":"softhyphen", "\\textcompwordmark{}":"ligaturebreak", \
+        "\\@.":"endofsentence", "\\ldots{}":"ldots", \
+        "\\menuseparator":"menuseparator", "\\slash{}":"breakableslash", \
+        "\\nobreakdash-":"nobreakdash", "\\LyX":"LyX", \
+        "\\TeX":"TeX", "\\LaTeX2e":"LaTeX2e", \
+        "\\LaTeX":"LaTeX" # must be after LaTeX2e
+    }
+
+    i = 0
+    while i < len(document.body):
+        words = document.body[i].split()
+        if len(words) > 1 and words[0] == "\\begin_inset" and \
+           words[1] in ["CommandInset", "External", "Formula", "Graphics", "listings"]:
+            # see convert_phrases
+            j = find_end_of_inset(document.body, i)
+            if j == -1:
+                document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(i))
+                i += 1
+            else:
+                i = j
+            continue
+        for key, value in specialchars.iteritems():
+            if forward:
+                document.body[i] = document.body[i].replace("\\SpecialChar " + key, "\\SpecialChar " + value)
+                document.body[i] = document.body[i].replace("\\SpecialCharNoPassThru " + key, "\\SpecialCharNoPassThru " + value)
+            else:
+                document.body[i] = document.body[i].replace("\\SpecialChar " + value, "\\SpecialChar " + key)
+                document.body[i] = document.body[i].replace("\\SpecialCharNoPassThru " + value, "\\SpecialCharNoPassThru " + key)
+        i += 1
+
+
+def convert_specialchar(document):
+    "convert special characters to new syntax"
+    convert_specialchar_internal(document, True)
+
+
+def revert_specialchar(document):
+    "convert special characters to old syntax"
+    convert_specialchar_internal(document, False)
+
+
+def revert_georgian(document):
+    "Set the document language to English but assure Georgian output"
+
+    if document.language == "georgian":
+        document.language = "english"
+        i = find_token(document.header, "\\language georgian", 0)
+        if i != -1:
+           document.header[i] = "\\language english"
+        j = find_token(document.header, "\\language_package default", 0)
+        if j != -1:
+           document.header[j] = "\\language_package babel"
+        k = find_token(document.header, "\\options", 0)
+        if k != -1:
+           document.header[k] = document.header[k].replace("\\options", "\\options georgian,")
+        else:
+           l = find_token(document.header, "\\use_default_options", 0)
+           document.header.insert(l + 1, "\\options georgian")
+
+
+def revert_sigplan_doi(document):
+    " Reverts sigplanconf DOI layout to ERT "
+
+    if document.textclass != "sigplanconf":
+        return
+
+    i = 0
+    while True:
+        i = find_token(document.body, "\\begin_layout DOI", i)
+        if i == -1:
+            return
+        j = find_end_of_layout(document.body, i)
+        if j == -1:
+            document.warning("Malformed LyX document: Can't find end of DOI layout")
+            i += 1
+            continue
+
+        content = lyx2latex(document, document.body[i:j + 1])
+        add_to_preamble(document, ["\\doi{" + content + "}"])
+        del document.body[i:j + 1]
+        # no need to reset i
+
+
+def revert_ex_itemargs(document):
+    " Reverts \\item arguments of the example environments (Linguistics module) to TeX-code "
+
+    # Do we use the linguistics module?
+    have_mod = False
+    mods = document.get_module_list()
+    for mod in mods:
+        if mod == "linguistics":
+            have_mod = True
+            continue
+
+    if not have_mod:
+        return
+
+    i = 0
+    example_layouts = ["Numbered Examples (consecutive)", "Subexample"]
+    while True:
+        i = find_token(document.body, "\\begin_inset Argument item:", i)
+        if i == -1:
+            return
+        j = find_end_of_inset(document.body, i)
+        # Find containing paragraph layout
+        parent = get_containing_layout(document.body, i)
+        if parent == False:
+            document.warning("Malformed LyX document: Can't find parent paragraph layout")
+            i += 1
+            continue
+        parbeg = parent[3]
+        layoutname = parent[0]
+        if layoutname in example_layouts:
+            beginPlain = find_token(document.body, "\\begin_layout Plain Layout", i)
+            endPlain = find_end_of_layout(document.body, beginPlain)
+            content = document.body[beginPlain + 1 : endPlain]
+            del document.body[i:j+1]
+            subst = put_cmd_in_ert("[") + content + put_cmd_in_ert("]")
+            document.body[parbeg : parbeg] = subst
+        i += 1
+
+
+def revert_forest(document):
+    " Reverts the forest environment (Linguistics module) to TeX-code "
+
+    # Do we use the linguistics module?
+    have_mod = False
+    mods = document.get_module_list()
+    for mod in mods:
+        if mod == "linguistics":
+            have_mod = True
+            continue
+
+    if not have_mod:
+        return
+
+    i = 0
+    while True:
+        i = find_token(document.body, "\\begin_inset Flex Structure Tree", i)
+        if i == -1:
+            return
+        j = find_end_of_inset(document.body, i)
+        if j == -1:
+            document.warning("Malformed LyX document: Can't find end of Structure Tree inset")
+            i += 1
+            continue
+
+        beginPlain = find_token(document.body, "\\begin_layout Plain Layout", i)
+        endPlain = find_end_of_layout(document.body, beginPlain)
+        content = lyx2latex(document, document.body[beginPlain : endPlain])
+
+        add_to_preamble(document, ["\\usepackage{forest}"])
+
+        document.body[i:j + 1] = ["\\begin_inset ERT", "status collapsed", "",
+                "\\begin_layout Plain Layout", "", "\\backslash", 
+                "begin{forest}", "\\end_layout", "", "\\begin_layout Plain Layout",
+                content, "\\end_layout", "", "\\begin_layout Plain Layout",
+                "\\backslash", "end{forest}", "", "\\end_layout", "", "\\end_inset"]
+        # no need to reset i
+
+
+def revert_glossgroup(document):
+    " Reverts the GroupGlossedWords inset (Linguistics module) to TeX-code "
+
+    # Do we use the linguistics module?
+    have_mod = False
+    mods = document.get_module_list()
+    for mod in mods:
+        if mod == "linguistics":
+            have_mod = True
+            continue
+
+    if not have_mod:
+        return
+
+    i = 0
+    while True:
+        i = find_token(document.body, "\\begin_inset Flex GroupGlossedWords", i)
+        if i == -1:
+            return
+        j = find_end_of_inset(document.body, i)
+        if j == -1:
+            document.warning("Malformed LyX document: Can't find end of GroupGlossedWords inset")
+            i += 1
+            continue
+
+        beginPlain = find_token(document.body, "\\begin_layout Plain Layout", i)
+        endPlain = find_end_of_layout(document.body, beginPlain)
+        content = lyx2latex(document, document.body[beginPlain : endPlain])
+        document.warning("content: %s" % content)
+
+        document.body[i:j + 1] = ["{", "", content, "", "}"]
+        # no need to reset i
+
+
+def revert_newgloss(document):
+    " Reverts the new Glosse insets (Linguistics module) to the old format "
+
+    # Do we use the linguistics module?
+    have_mod = False
+    mods = document.get_module_list()
+    for mod in mods:
+        if mod == "linguistics":
+            have_mod = True
+            continue
+
+    if not have_mod:
+        return
+
+    glosses = ("\\begin_inset Flex Glosse", "\\begin_inset Flex Tri-Glosse")
+    for glosse in glosses:
+        i = 0
+        while True:
+            i = find_token(document.body, glosse, i)
+            if i == -1:
+                break
+            j = find_end_of_inset(document.body, i)
+            if j == -1:
+                document.warning("Malformed LyX document: Can't find end of Glosse inset")
+                i += 1
+                continue
+
+            arg = find_token(document.body, "\\begin_inset Argument 1", i, j)
+            endarg = find_end_of_inset(document.body, arg)
+            argcontent = ""
+            if arg != -1:
+                argbeginPlain = find_token(document.body, "\\begin_layout Plain Layout", arg, endarg)
+                if argbeginPlain == -1:
+                    document.warning("Malformed LyX document: Can't find arg plain Layout")
+                    i += 1
+                    continue
+                argendPlain = find_end_of_inset(document.body, argbeginPlain)
+                argcontent = lyx2latex(document, document.body[argbeginPlain : argendPlain - 2])
+
+                document.body[j:j] = ["", "\\begin_layout Plain Layout","\\backslash", "glt ",
+                    argcontent, "\\end_layout"]
+
+                # remove Arg insets and paragraph, if it only contains this inset
+                if document.body[arg - 1] == "\\begin_layout Plain Layout" and find_end_of_layout(document.body, arg - 1) == endarg + 3:
+                    del document.body[arg - 1 : endarg + 4]
+                else:
+                    del document.body[arg : endarg + 1]
+
+            beginPlain = find_token(document.body, "\\begin_layout Plain Layout", i)
+            endPlain = find_end_of_layout(document.body, beginPlain)
+            content = lyx2latex(document, document.body[beginPlain : endPlain])
+
+            document.body[beginPlain + 1:endPlain] = [content]
+            i = beginPlain + 1
+
+
+def convert_newgloss(document):
+    " Converts Glosse insets (Linguistics module) to the new format "
+
+    # Do we use the linguistics module?
+    have_mod = False
+    mods = document.get_module_list()
+    for mod in mods:
+        if mod == "linguistics":
+            have_mod = True
+            continue
+
+    if not have_mod:
+        return
+
+    glosses = ("\\begin_inset Flex Glosse", "\\begin_inset Flex Tri-Glosse")
+    for glosse in glosses:
+        i = 0
+        while True:
+            i = find_token(document.body, glosse, i)
+            if i == -1:
+                break
+            j = find_end_of_inset(document.body, i)
+            if j == -1:
+                document.warning("Malformed LyX document: Can't find end of Glosse inset")
+                i += 1
+                continue
+
+            k = i
+            while True:
+                argcontent = []
+                beginPlain = find_token(document.body, "\\begin_layout Plain Layout", k, j)
+                if beginPlain == -1:
+                    break
+                endPlain = find_end_of_layout(document.body, beginPlain)
+                if endPlain == -1:
+                    document.warning("Malformed LyX document: Can't find end of Glosse layout")
+                    i += 1
+                    continue
+
+                glt  = find_token(document.body, "\\backslash", beginPlain, endPlain)
+                if glt != -1 and document.body[glt + 1].startswith("glt"):
+                    document.body[glt + 1] = document.body[glt + 1].lstrip("glt").lstrip()
+                    argcontent = document.body[glt + 1 : endPlain]
+                    document.body[beginPlain + 1 : endPlain] = ["\\begin_inset Argument 1", "status open", "",
+                        "\\begin_layout Plain Layout", "\\begin_inset ERT", "status open", "",
+                        "\\begin_layout Plain Layout", ""] + argcontent + ["\\end_layout", "", "\\end_inset", "",
+                        "\\end_layout", "", "\\end_inset"]
+                else:
+                    content = document.body[beginPlain + 1 : endPlain]
+                    document.body[beginPlain + 1 : endPlain] = ["\\begin_inset ERT", "status open", "",
+                        "\\begin_layout Plain Layout"] + content + ["\\end_layout", "", "\\end_inset"]
+
+                endPlain = find_end_of_layout(document.body, beginPlain)
+                k = endPlain
+                j = find_end_of_inset(document.body, i)
+
+            i = endPlain + 1
+
+
+def convert_BoxFeatures(document):
+    " adds new box features "
+
+    i = 0
+    while True:
+        i = find_token(document.body, "height_special", i)
+        if i == -1:
+            return
+        document.body[i+1:i+1] = ['thickness "0.4pt"', 'separation "3pt"', 'shadowsize "4pt"']
+        i = i + 4
+
+
+def revert_BoxFeatures(document):
+    " outputs new box features as TeX code "
+
+    i = 0
+    defaultSep = "3pt"
+    defaultThick = "0.4pt"
+    defaultShadow = "4pt"
+    while True:
+        i = find_token(document.body, "height_special", i)
+        if i == -1:
+            return
+        # read out the values
+        beg = document.body[i+1].find('"');
+        end = document.body[i+1].rfind('"');
+        thickness = document.body[i+1][beg+1:end];
+        beg = document.body[i+2].find('"');
+        end = document.body[i+2].rfind('"');
+        separation = document.body[i+2][beg+1:end];
+        beg = document.body[i+3].find('"');
+        end = document.body[i+3].rfind('"');
+        shadowsize = document.body[i+3][beg+1:end];
+        # delete the specification
+        del document.body[i+1:i+4]
+        # output ERT
+        # first output the closing brace
+        if shadowsize != defaultShadow or separation != defaultSep or thickness != defaultThick:
+            document.body[i + 10 : i + 10] = put_cmd_in_ert("}")
+        # now output the lengths
+        if shadowsize != defaultShadow or separation != defaultSep or thickness != defaultThick:
+            document.body[i - 10 : i - 10] = put_cmd_in_ert("{")
+        if thickness != defaultThick:
+            document.body[i - 5 : i - 4] = ["{\\backslash fboxrule " + thickness]
+        if separation != defaultSep and thickness == defaultThick:
+            document.body[i - 5 : i - 4] = ["{\\backslash fboxsep " + separation]
+        if separation != defaultSep and thickness != defaultThick:
+            document.body[i - 5 : i - 4] = ["{\\backslash fboxrule " + thickness + "\\backslash fboxsep " + separation]
+        if shadowsize != defaultShadow and separation == defaultSep and thickness == defaultThick:
+            document.body[i - 5 : i - 4] = ["{\\backslash shadowsize " + shadowsize]
+        if shadowsize != defaultShadow and separation != defaultSep and thickness == defaultThick:
+            document.body[i - 5 : i - 4] = ["{\\backslash fboxsep " + separation + "\\backslash shadowsize " + shadowsize]
+        if shadowsize != defaultShadow and separation == defaultSep and thickness != defaultThick:
+            document.body[i - 5 : i - 4] = ["{\\backslash fboxrule " + thickness + "\\backslash shadowsize " + shadowsize]
+        if shadowsize != defaultShadow and separation != defaultSep and thickness != defaultThick:
+            document.body[i - 5 : i - 4] = ["{\\backslash fboxrule " + thickness + "\\backslash fboxsep " + separation + "\\backslash shadowsize " + shadowsize]
+        i = i + 11
+
+
+def convert_origin(document):
+    " Insert the origin tag "
+
+    i = find_token(document.header, "\\textclass ", 0)
+    if i == -1:
+        document.warning("Malformed LyX document: No \\textclass!!")
+        return;
+    if document.dir == "":
+        origin = "stdin"
+    else:
+        origin = document.dir.replace('\\', '/') + '/'
+        if os.name != 'nt':
+            origin = unicode(origin, sys.getfilesystemencoding())
+    document.header[i:i] = ["\\origin " + origin]
+
+
+def revert_origin(document):
+    " Remove the origin tag "
+
+    i = find_token(document.header, "\\origin ", 0)
+    if i == -1:
+        document.warning("Malformed LyX document: No \\origin!!")
+        return;
+    del document.header[i]
+
+
+color_names = ["brown", "darkgray", "gray", \
+               "lightgray", "lime", "olive", "orange", \
+               "pink", "purple", "teal", "violet"]
+
+def revert_textcolor(document):
+    " revert new \texcolor colors to TeX code "
+
+    i = 0
+    j = 0
+    xcolor = False
+    add_to_preamble(document, ["\\@ifundefined{rangeHsb}{\\usepackage{xcolor}}"])
+    while True:
+        i = find_token(document.body, "\\color ", i)
+        if i == -1:
+            return
+        else:
+            for color in list(color_names):
+                if document.body[i] == "\\color " + color:
+                    # register that xcolor must be loaded in the preamble
+                    if xcolor == False:
+                        xcolor = True
+                        add_to_preamble(document, ["\\@ifundefined{rangeHsb}{\usepackage{xcolor}}"])
+                    # find the next \\color and/or the next \\end_layout
+                    j = find_token(document.body, "\\color", i + 1)
+                    k = find_token(document.body, "\\end_layout", i + 1)
+                    if j == -1 and k != -1:
+                        j = k +1 
+                    # output TeX code
+                    # first output the closing brace
+                    if k < j:
+                        document.body[k: k] = put_cmd_in_ert("}")
+                    else:
+                        document.body[j: j] = put_cmd_in_ert("}")
+                    # now output the \textcolor command
+                    document.body[i : i + 1] = put_cmd_in_ert("\\textcolor{" + color + "}{")
+        i = i + 1
+
+
+def convert_colorbox(document):
+    " adds color settings for boxes "
+
+    i = 0
+    while True:
+        i = find_token(document.body, "shadowsize", i)
+        if i == -1:
+            return
+        document.body[i+1:i+1] = ['framecolor "black"', 'backgroundcolor "none"']
+        i = i + 3
+
+
+def revert_colorbox(document):
+    " outputs color settings for boxes as TeX code "
+
+    binset = 0
+    defaultframecolor = "black"
+    defaultbackcolor = "none"
+    while True:
+        binset = find_token(document.body, "\\begin_inset Box", binset)
+        if binset == -1:
+            return
+
+        einset = find_end_of_inset(document.body, binset)
+        if einset == -1:
+            document.warning("Malformed LyX document: Can't find end of box inset!")
+            binset += 1
+            continue
+
+        blay = find_token(document.body, "\\begin_layout", binset, einset)
+        if blay == -1:
+            document.warning("Malformed LyX document: Can't find start of layout!")
+            binset = einset
+            continue
+
+        # doing it this way, we make sure only to find a framecolor option
+        frame = find_token(document.body, "framecolor", binset, blay)
+        if frame == -1:
+            binset = einset
+            continue
+
+        beg = document.body[frame].find('"')
+        end = document.body[frame].rfind('"')
+        framecolor = document.body[frame][beg+1:end]
+
+        # this should be on the next line
+        bgcolor = frame + 1
+        beg = document.body[bgcolor].find('"')
+        end = document.body[bgcolor].rfind('"')
+        backcolor = document.body[bgcolor][beg+1:end]
+
+        # delete those bits
+        del document.body[frame:frame+2]
+        # adjust end of inset
+        einset -= 2
+
+        if document.body[binset] == "\\begin_inset Box Boxed" and \
+            framecolor != defaultframecolor:
+          document.body[binset] = "\\begin_inset Box Frameless"
+
+        # output TeX code
+        # first output the closing brace
+        if framecolor == defaultframecolor and backcolor == defaultbackcolor:
+            # nothing needed
+            pass
+        else:
+            document.body[einset + 1 : einset + 1] = put_cmd_in_ert("}")
+            if framecolor != defaultframecolor:
+                document.body[binset:binset] = put_cmd_in_ert("\\backslash fcolorbox{" + framecolor + "}{" + backcolor + "}{")
+            else:
+              document.body[binset:binset] = put_cmd_in_ert("\\backslash colorbox{" + backcolor + "}{")
+
+        binset = einset
+
+
+def revert_mathmulticol(document):
+    " Convert formulas to ERT if they contain multicolumns "
+
+    i = 0
+    while True:
+        i = find_token(document.body, '\\begin_inset Formula', i)
+        if i == -1:
+            return
+        j = find_end_of_inset(document.body, i)
+        if j == -1:
+            document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(i))
+            i += 1
+            continue
+        lines = document.body[i:j]
+        lines[0] = lines[0].replace('\\begin_inset Formula', '').lstrip()
+        code = "\n".join(lines)
+        converted = False
+        k = 0
+        n = 0
+        while n >= 0:
+            n = code.find("\\multicolumn", k)
+            # no need to convert degenerated multicolumn cells,
+            # they work in old LyX versions as "math ERT"
+            if n != -1 and code.find("\\multicolumn{1}", k) != n:
+                ert = put_cmd_in_ert(code)
+                document.body[i:j+1] = ert
+                converted = True
+                break
+            else:
+                k = n + 12
+        if converted:
+            i = find_end_of_inset(document.body, i)
+        else:
+            i = j
+
+
+def revert_jss(document):
+    " Reverts JSS In_Preamble commands to ERT in preamble "
+
+    if document.textclass != "jss":
+        return
+
+    h = 0
+    m = 0
+    j = 0
+    k = 0
+    n = 0
+    while True:
+      # at first revert the inset layouts because they can be part of the In_Preamble layouts
+      while m != -1 or j != -1 or h != -1 or k != -1 or n != -1:
+        # \pkg
+        if h != -1:
+          h = find_token(document.body, "\\begin_inset Flex Pkg", h)
+        if h != -1:
+          endh = find_end_of_inset(document.body, h)
+          document.body[endh - 2 : endh + 1] = put_cmd_in_ert("}")
+          document.body[h : h + 4] = put_cmd_in_ert("\\pkg{")
+          h = h + 5
+        # \proglang
+        if m != -1:
+          m = find_token(document.body, "\\begin_inset Flex Proglang", m)
+        if m != -1:
+          endm = find_end_of_inset(document.body, m)
+          document.body[endm - 2 : endm + 1] = put_cmd_in_ert("}")
+          document.body[m : m + 4] = put_cmd_in_ert("\\proglang{")
+          m = m + 5
+        # \code
+        if j != -1:
+          j = find_token(document.body, "\\begin_inset Flex Code", j)
+        if j != -1:
+          # assure that we are not in a Code Chunk inset
+          if document.body[j][-1] == "e":
+              endj = find_end_of_inset(document.body, j)
+              document.body[endj - 2 : endj + 1] = put_cmd_in_ert("}")
+              document.body[j : j + 4] = put_cmd_in_ert("\\code{")
+              j = j + 5
+          else:
+              j = j + 1
+        # \email
+        if k != -1:
+          k = find_token(document.body, "\\begin_inset Flex E-mail", k)
+        if k != -1:
+          endk = find_end_of_inset(document.body, k)
+          document.body[endk - 2 : endk + 1] = put_cmd_in_ert("}")
+          document.body[k : k + 4] = put_cmd_in_ert("\\email{")
+          k = k + 5
+        # \url
+        if n != -1:
+          n = find_token(document.body, "\\begin_inset Flex URL", n)
+        if n != -1:
+          endn = find_end_of_inset(document.body, n)
+          document.body[endn - 2 : endn + 1] = put_cmd_in_ert("}")
+          document.body[n : n + 4] = put_cmd_in_ert("\\url{")
+          n = n + 5
+      # now revert the In_Preamble layouts
+      # \title
+      i = find_token(document.body, "\\begin_layout Title", 0)
+      if i == -1:
+        return
+      j = find_end_of_layout(document.body, i)
+      if j == -1:
+        document.warning("Malformed LyX document: Can't find end of Title layout")
+        i += 1
+        continue
+      content = lyx2latex(document, document.body[i:j + 1])
+      add_to_preamble(document, ["\\title{" + content + "}"])
+      del document.body[i:j + 1]
+      # \author
+      i = find_token(document.body, "\\begin_layout Author", 0)
+      if i == -1:
+        return
+      j = find_end_of_layout(document.body, i)
+      if j == -1:
+        document.warning("Malformed LyX document: Can't find end of Author layout")
+        i += 1
+        continue
+      content = lyx2latex(document, document.body[i:j + 1])
+      add_to_preamble(document, ["\\author{" + content + "}"])
+      del document.body[i:j + 1]
+      # \Plainauthor
+      i = find_token(document.body, "\\begin_layout Plain Author", 0)
+      if i == -1:
+        return
+      j = find_end_of_layout(document.body, i)
+      if j == -1:
+        document.warning("Malformed LyX document: Can't find end of Plain Author layout")
+        i += 1
+        continue
+      content = lyx2latex(document, document.body[i:j + 1])
+      add_to_preamble(document, ["\\Plainauthor{" + content + "}"])
+      del document.body[i:j + 1]
+      # \Plaintitle
+      i = find_token(document.body, "\\begin_layout Plain Title", 0)
+      if i == -1:
+        return
+      j = find_end_of_layout(document.body, i)
+      if j == -1:
+        document.warning("Malformed LyX document: Can't find end of Plain Title layout")
+        i += 1
+        continue
+      content = lyx2latex(document, document.body[i:j + 1])
+      add_to_preamble(document, ["\\Plaintitle{" + content + "}"])
+      del document.body[i:j + 1]
+      # \Shorttitle
+      i = find_token(document.body, "\\begin_layout Short Title", 0)
+      if i == -1:
+        return
+      j = find_end_of_layout(document.body, i)
+      if j == -1:
+        document.warning("Malformed LyX document: Can't find end of Short Title layout")
+        i += 1
+        continue
+      content = lyx2latex(document, document.body[i:j + 1])
+      add_to_preamble(document, ["\\Shorttitle{" + content + "}"])
+      del document.body[i:j + 1]
+      # \Abstract
+      i = find_token(document.body, "\\begin_layout Abstract", 0)
+      if i == -1:
+        return
+      j = find_end_of_layout(document.body, i)
+      if j == -1:
+        document.warning("Malformed LyX document: Can't find end of Abstract layout")
+        i += 1
+        continue
+      content = lyx2latex(document, document.body[i:j + 1])
+      add_to_preamble(document, ["\\Abstract{" + content + "}"])
+      del document.body[i:j + 1]
+      # \Keywords
+      i = find_token(document.body, "\\begin_layout Keywords", 0)
+      if i == -1:
+        return
+      j = find_end_of_layout(document.body, i)
+      if j == -1:
+        document.warning("Malformed LyX document: Can't find end of Keywords layout")
+        i += 1
+        continue
+      content = lyx2latex(document, document.body[i:j + 1])
+      add_to_preamble(document, ["\\Keywords{" + content + "}"])
+      del document.body[i:j + 1]
+      # \Plainkeywords
+      i = find_token(document.body, "\\begin_layout Plain Keywords", 0)
+      if i == -1:
+        return
+      j = find_end_of_layout(document.body, i)
+      if j == -1:
+        document.warning("Malformed LyX document: Can't find end of Plain Keywords layout")
+        i += 1
+        continue
+      content = lyx2latex(document, document.body[i:j + 1])
+      add_to_preamble(document, ["\\Plainkeywords{" + content + "}"])
+      del document.body[i:j + 1]
+      # \Address
+      i = find_token(document.body, "\\begin_layout Address", 0)
+      if i == -1:
+        return
+      j = find_end_of_layout(document.body, i)
+      if j == -1:
+        document.warning("Malformed LyX document: Can't find end of Address layout")
+        i += 1
+        continue
+      content = lyx2latex(document, document.body[i:j + 1])
+      add_to_preamble(document, ["\\Address{" + content + "}"])
+      del document.body[i:j + 1]
+      # finally handle the code layouts
+      h = 0
+      m = 0
+      j = 0
+      k = 0
+      while m != -1 or j != -1 or h != -1 or k != -1:
+        # \CodeChunk
+        if h != -1:
+          h = find_token(document.body, "\\begin_inset Flex Code Chunk", h)
+        if h != -1:
+          endh = find_end_of_inset(document.body, h)
+          document.body[endh + 1 : endh] = ["\\end_layout"]
+          document.body[endh : endh + 1] = put_cmd_in_ert("\\end{CodeChunk}")
+          document.body[h : h + 3] = put_cmd_in_ert("\\begin{CodeChunk}")
+          document.body[h - 1 : h] = ["\\begin_layout Standard"]
+          h = h + 1
+        # \CodeInput
+        if j != -1:
+          j = find_token(document.body, "\\begin_layout Code Input", j)
+        if j != -1:
+          endj = find_end_of_layout(document.body, j)
+          document.body[endj : endj + 1] = ["\\end_layout", "", "\\begin_layout Standard"]
+          document.body[endj + 3 : endj + 4] = put_cmd_in_ert("\\end{CodeInput}")
+          document.body[endj + 13 : endj + 13] = ["\\end_layout", "", "\\begin_layout Standard"]
+          document.body[j + 1 : j] = ["\\end_layout", "", "\\begin_layout Standard"]
+          document.body[j : j + 1] = put_cmd_in_ert("\\begin{CodeInput}")
+          j = j + 1
+        # \CodeOutput
+        if k != -1:
+          k = find_token(document.body, "\\begin_layout Code Output", k)
+        if k != -1:
+          endk = find_end_of_layout(document.body, k)
+          document.body[endk : endk + 1] = ["\\end_layout", "", "\\begin_layout Standard"]
+          document.body[endk + 3 : endk + 4] = put_cmd_in_ert("\\end{CodeOutput}")
+          document.body[endk + 13 : endk + 13] = ["\\end_layout", "", "\\begin_layout Standard"]
+          document.body[k + 1 : k] = ["\\end_layout", "", "\\begin_layout Standard"]
+          document.body[k : k + 1] = put_cmd_in_ert("\\begin{CodeOutput}")
+          k = k + 1
+        # \Code
+        if m != -1:
+          m = find_token(document.body, "\\begin_layout Code", m)
+        if m != -1:
+          endm = find_end_of_layout(document.body, m)
+          document.body[endm : endm + 1] = ["\\end_layout", "", "\\begin_layout Standard"]
+          document.body[endm + 3 : endm + 4] = put_cmd_in_ert("\\end{Code}")
+          document.body[endm + 13 : endm + 13] = ["\\end_layout", "", "\\begin_layout Standard"]
+          document.body[m + 1 : m] = ["\\end_layout", "", "\\begin_layout Standard"]
+          document.body[m : m + 1] = put_cmd_in_ert("\\begin{Code}")
+          m = m + 1
+
+
+def convert_subref(document):
+    " converts sub: ref prefixes to subref: "
+
+    # 1) label insets
+    rx = re.compile(r'^name \"sub:(.+)$')
+    i = 0
+    while True:
+        i = find_token(document.body, "\\begin_inset CommandInset label", i)
+        if i == -1:
+            break
+        j = find_end_of_inset(document.body, i)
+        if j == -1:
+            document.warning("Malformed LyX document: Can't find end of Label inset at line " + str(i))
+            i += 1
+            continue
+
+        for p in range(i, j):
+            m = rx.match(document.body[p])
+            if m:
+                label = m.group(1)
+                document.body[p] = "name \"subsec:" + label
+        i += 1
+
+    # 2) xref insets
+    rx = re.compile(r'^reference \"sub:(.+)$')
+    i = 0
+    while True:
+        i = find_token(document.body, "\\begin_inset CommandInset ref", i)
+        if i == -1:
+            return
+        j = find_end_of_inset(document.body, i)
+        if j == -1:
+            document.warning("Malformed LyX document: Can't find end of Ref inset at line " + str(i))
+            i += 1
+            continue
+
+        for p in range(i, j):
+            m = rx.match(document.body[p])
+            if m:
+                label = m.group(1)
+                document.body[p] = "reference \"subsec:" + label
+                break
+        i += 1
+
+
+
+def revert_subref(document):
+    " reverts subref: ref prefixes to sub: "
+
+    # 1) label insets
+    rx = re.compile(r'^name \"subsec:(.+)$')
+    i = 0
+    while True:
+        i = find_token(document.body, "\\begin_inset CommandInset label", i)
+        if i == -1:
+            break
+        j = find_end_of_inset(document.body, i)
+        if j == -1:
+            document.warning("Malformed LyX document: Can't find end of Label inset at line " + str(i))
+            i += 1
+            continue
+
+        for p in range(i, j):
+            m = rx.match(document.body[p])
+            if m:
+                label = m.group(1)
+                document.body[p] = "name \"sub:" + label
+                break
+        i += 1
+
+    # 2) xref insets
+    rx = re.compile(r'^reference \"subsec:(.+)$')
+    i = 0
+    while True:
+        i = find_token(document.body, "\\begin_inset CommandInset ref", i)
+        if i == -1:
+            return
+        j = find_end_of_inset(document.body, i)
+        if j == -1:
+            document.warning("Malformed LyX document: Can't find end of Ref inset at line " + str(i))
+            i += 1
+            continue
+
+        for p in range(i, j):
+            m = rx.match(document.body[p])
+            if m:
+                label = m.group(1)
+                document.body[p] = "reference \"sub:" + label
+                break
+        i += 1
+
+
  ##
  # Conversion hub
  #
@@ -495,10 +1514,40 @@ convert = [
             [477, []],
             [478, []],
             [479, []],
-           [480, []]
+           [480, []],
+           [481, [convert_dashes]],
+           [482, [convert_phrases]],
+           [483, [convert_specialchar]],
+           [484, []],
+           [485, []],
+           [486, []],
+           [487, []],
+           [488, [convert_newgloss]],
+           [489, [convert_BoxFeatures]],
+           [490, [convert_origin]],
+           [491, []],
+           [492, [convert_colorbox]],
+           [493, []],
+           [494, []],
+           [495, [convert_subref]]
            ]
  
  revert =  [
+           [494, [revert_subref]],
+           [493, [revert_jss]],
+           [492, [revert_mathmulticol]],
+           [491, [revert_colorbox]],
+           [490, [revert_textcolor]],
+           [489, [revert_origin]],
+           [488, [revert_BoxFeatures]],
+           [487, [revert_newgloss, revert_glossgroup]],
+           [486, [revert_forest]],
+           [485, [revert_ex_itemargs]],
+           [484, [revert_sigplan_doi]],
+           [483, [revert_georgian]],
+           [482, [revert_specialchar]],
+           [481, [revert_phrases]],
+           [480, [revert_dashes]],
             [479, [revert_question_env]],
             [478, [revert_beamer_lemma]],
             [477, [revert_xarrow]],