Don't use widest label for numerical citations.

[lyx.git] / lib / lyx2lyx / lyx_1_6.py
diff --git a/lib/lyx2lyx/lyx_1_6.py b/lib/lyx2lyx/lyx_1_6.py

index 24abefb90718f4d8381502dea9e7b6b4b4807cb2..2d0964c3afeab7f80a4da853715b9c4feea507ea 100644 (file)
--- a/lib/lyx2lyx/lyx_1_6.py
+++ b/lib/lyx2lyx/lyx_1_6.py
@@ -1,6 +1,6 @@
  # This file is part of lyx2lyx
  # -*- coding: utf-8 -*-
  # This file is part of lyx2lyx
  # -*- coding: utf-8 -*-
-# Copyright (C) 2007 José Matos <jamatos@lyx.org>
+# Copyright (C) 2007-2008 The LyX Team <lyx-devel@lists.lyx.org>
  #
  # This program is free software; you can redistribute it and/or
  # modify it under the terms of the GNU General Public License
  #
  # This program is free software; you can redistribute it and/or
  # modify it under the terms of the GNU General Public License
@@ -14,7 +14,7 @@
  #
  # You should have received a copy of the GNU General Public License
  # along with this program; if not, write to the Free Software
  #
  # You should have received a copy of the GNU General Public License
  # along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
  
  """ Convert files to the file format generated by lyx 1.6"""
  
  
  """ Convert files to the file format generated by lyx 1.6"""
  
@@ -22,18 +22,645 @@ import re
  import unicodedata
  import sys, os
  
  import unicodedata
  import sys, os
  
-from parser_tools import find_token, find_end_of, find_tokens
+from parser_tools import find_token, find_end_of, find_tokens, get_value
  
  ####################################################################
  # Private helper functions
  
  
  ####################################################################
  # Private helper functions
  
+
+def get_value_string(lines, token, start, end = 0, trim = False, default = ""):
+    """ get_value_string(lines, token, start[[, end], trim, default]) -> string
+
+    Return tokens after token as string, in lines, where
+    token is the first element. When trim is used, the first and last character
+    of the string is trimmed."""
+
+    val = get_value(lines, token, start, end, "")
+    if not val:
+      return default
+    if trim:
+      return val[1:-1]
+    return val
+
+
  def find_end_of_inset(lines, i):
      " Find end of inset, where lines[i] is included."
      return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
  
  def find_end_of_inset(lines, i):
      " Find end of inset, where lines[i] is included."
      return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
  
+# WARNING!
+# DO NOT do this:
+#   document.body[i] = wrap_insert_ert(...)
+# wrap_into_ert may returns a multiline string, which should NOT appear
+# in document.body. Instead, do something like this:
+#   subst = wrap_inset_ert(...)
+#   subst = subst.split('\n')
+#   document.body[i:i+1] = subst
+#   i+= len(subst) - 1
+# where the last statement resets the counter to accord with the added
+# lines.
+def wrap_into_ert(string, src, dst):
+    '''Within string, replace occurrences of src with dst, wrapped into ERT
+       E.g.: wrap_into_ert('sch\"on', "\\", "\\backslash") is:
+       sch<ERT>\\backslash</ERT>"on'''
+    return string.replace(src, '\n\\begin_inset ERT\nstatus collapsed\n\\begin_layout Standard\n'
+      + dst + '\n\\end_layout\n\\end_inset\n')
+
+def put_cmd_in_ert(string):
+    for rep in unicode_reps:
+        string = string.replace(rep[1], rep[0].replace('\\\\', '\\'))
+    string = string.replace('\\', "\\backslash\n")
+    string = "\\begin_inset ERT\nstatus collapsed\n\\begin_layout Standard\n" \
+      + string + "\n\\end_layout\n\\end_inset"
+    return string
+
+def add_to_preamble(document, text):
+    """ Add text to the preamble if it is not already there.
+    Only the first line is checked!"""
+
+    if find_token(document.preamble, text[0], 0) != -1:
+        return
+
+    document.preamble.extend(text)
+
+def insert_to_preamble(index, document, text):
+    """ Insert text to the preamble at a given line"""
+
+    document.preamble.insert(index, text)
+
+# Convert a LyX length into a LaTeX length
+def convert_len(len):
+    units = {"text%":"\\backslash\ntextwidth", "col%":"\\backslash\ncolumnwidth",
+             "page%":"\\backslash\npagewidth", "line%":"\\backslash\nlinewidth",
+             "theight%":"\\backslash\ntextheight", "pheight%":"\\backslash\npageheight"}
+
+    # Convert LyX units to LaTeX units
+    for unit in units.keys():
+        if len.find(unit) != -1:
+            len = '%f' % (len2value(len) / 100)
+            len = len.strip('0') + units[unit]
+            break
+
+    return len
+
+# Return the value of len without the unit in numerical form.
+def len2value(len):
+    result = re.search('([+-]?[0-9.]+)', len)
+    if result:
+        return float(result.group(1))
+    # No number means 1.0
+    return 1.0
+
+# Unfortunately, this doesn't really work, since Standard isn't always default.
+# But it's as good as we can do right now.
+def find_default_layout(document, start, end):
+    l = find_token(document.body, "\\begin_layout Standard", start, end)
+    if l == -1:
+        l = find_token(document.body, "\\begin_layout PlainLayout", start, end)
+    if l == -1:
+        l = find_token(document.body, "\\begin_layout Plain Layout", start, end)
+    return l
+
+def get_option(document, m, option, default):
+    l = document.body[m].find(option)
+    val = default
+    if l != -1:
+        val = document.body[m][l:].split('"')[1]
+    return val
+
+def remove_option(document, m, option):
+    l = document.body[m].find(option)
+    if l != -1:
+        val = document.body[m][l:].split('"')[1]
+        document.body[m] = document.body[m][:l-1] + document.body[m][l+len(option + '="' + val + '"'):]
+    return l
+
+def set_option(document, m, option, value):
+    l = document.body[m].find(option)
+    if l != -1:
+        oldval = document.body[m][l:].split('"')[1]
+        l = l + len(option + '="')
+        document.body[m] = document.body[m][:l] + value + document.body[m][l+len(oldval):]
+    else:
+        document.body[m] = document.body[m][:-1] + ' ' + option + '="' + value + '">'
+    return l
+
+
+def read_unicodesymbols():
+    " Read the unicodesymbols list of unicode characters and corresponding commands."
+    pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
+    fp = open(os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols'))
+    spec_chars = []
+    # Two backslashes, followed by some non-word character, and then a character
+    # in brackets. The idea is to check for constructs like: \"{u}, which is how
+    # they are written in the unicodesymbols file; but they can also be written
+    # as: \"u or even \" u.
+    r = re.compile(r'\\\\(\W)\{(\w)\}')
+    for line in fp.readlines():
+        if line[0] != '#' and line.strip() != "":
+            line=line.replace(' "',' ') # remove all quotation marks with spaces before
+            line=line.replace('" ',' ') # remove all quotation marks with spaces after
+            line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis)
+            try:
+                [ucs4,command,dead] = line.split(None,2)
+                if command[0:1] != "\\":
+                    continue
+                spec_chars.append([command, unichr(eval(ucs4))])
+            except:
+                continue
+            m = r.match(command)
+            if m != None:
+                command = "\\\\"
+                # If the character is a double-quote, then we need to escape it, too,
+                # since it is done that way in the LyX file.
+                if m.group(1) == "\"":
+                    command += "\\"
+                commandbl = command
+                command += m.group(1) + m.group(2)
+                commandbl += m.group(1) + ' ' + m.group(2)
+                spec_chars.append([command, unichr(eval(ucs4))])
+                spec_chars.append([commandbl, unichr(eval(ucs4))])
+    fp.close()
+    return spec_chars
+
+
+def extract_argument(line):
+    'Extracts a LaTeX argument from the start of line. Returns (arg, rest).'
+
+    if not line:
+        return (None, "")
+
+    bracere = re.compile("(\s*)(.*)")
+    n = bracere.match(line)
+    whitespace = n.group(1)
+    stuff = n.group(2)
+    brace = stuff[:1]
+    if brace != "[" and brace != "{":
+        return (None, line)
+
+    # find closing brace
+    remain = stuff[1:]
+    pos = 0
+    num = 1
+    term = "}"
+    if brace == "[":
+        term = "]"
+    skip = False
+    for c in remain:
+        if skip:
+            skip = False
+        elif c == "\\":
+            skip = True
+        elif c == brace:
+            num += 1
+        elif c == term:
+            num -= 1
+        if c == 0:
+            break
+        pos += 1
+    if num != 0:
+        # We never found the matching brace
+        # So, to be on the safe side, let's just return everything
+        # which will then get wrapped as ERT
+        return (line, "")
+    return (line[:pos + 1], line[pos + 1:])
+
+
+def latex2ert(line, isindex):
+    '''Converts LaTeX commands into ERT. line may well be a multi-line
+       string when it is returned.'''
+    if not line:
+        return line
+
+    retval = ""
+    ## FIXME Escaped \ ??
+    # This regex looks for a LaTeX command---i.e., something of the form
+    # "\alPhaStuFF", or "\X", where X is any character---where the command
+    # may also be preceded by an additional backslash, which is how it would
+    # appear (e.g.) in an InsetIndex.
+    labelre = re.compile(r'(.*?)\\?(\\(?:[a-zA-Z]+|.))(.*)')
+
+    m = labelre.match(line)
+    while m != None:
+        retval += m.group(1)
+        cmd = m.group(2)
+        end = m.group(3)
+
+        while True:
+            (arg, rest) = extract_argument(end)
+            if arg == None:
+                break
+            cmd += arg
+            end = rest
+        # If we wanted to put labels into an InsetLabel, for example, then we
+        # would just need to test here for cmd == "label" and then take some
+        # appropriate action, i.e., to use arg to get the content and then
+        # wrap it appropriately.
+        cmd = put_cmd_in_ert(cmd)
+        retval += "\n" + cmd + "\n"
+        line = end
+        m = labelre.match(line)
+    # put all remaining braces in ERT
+    line = wrap_into_ert(line, '}', '}')
+    line = wrap_into_ert(line, '{', '{')
+    if isindex:
+        # active character that is not available in all font encodings
+        line = wrap_into_ert(line, '|', '|')
+    retval += line
+    return retval
+
+
+unicode_reps = read_unicodesymbols()
+
+#Bug 5022....
+#Might should do latex2ert first, then deal with stuff that DOESN'T
+#end up inside ERT. That routine could be modified so that it returned
+#a list of lines, and we could then skip ERT bits and only deal with
+#the other bits.
+def latex2lyx(data, isindex):
+    '''Takes a string, possibly multi-line, and returns the result of
+    converting LaTeX constructs into LyX constructs. Returns a list of
+    lines, suitable for insertion into document.body.
+    The bool isindex specifies whether we are in an index macro (which
+    has some specific active characters that need to be ERTed).'''
+
+    if not data:
+        return [""]
+    retval = []
+
+    # Convert LaTeX to Unicode
+    # Commands of this sort need to be checked to make sure they are
+    # followed by a non-alpha character, lest we replace too much.
+    hardone = re.compile(r'^\\\\[a-zA-Z]+$')
+
+    for rep in unicode_reps:
+        if hardone.match(rep[0]):
+            pos = 0
+            while True:
+                pos = data.find(rep[0], pos)
+                if pos == -1:
+                    break
+                nextpos = pos + len(rep[0])
+                if nextpos < len(data) and data[nextpos].isalpha():
+                    # not the end of that command
+                    pos = nextpos
+                    continue
+                data = data[:pos] + rep[1] + data[nextpos:]
+                pos = nextpos
+        else:
+            data = data.replace(rep[0], rep[1])
+
+    # Generic
+    # \" -> ":
+    data = wrap_into_ert(data, r'\"', '"')
+    # \\ -> \:
+    data = data.replace('\\\\', '\\')
+
+    # Math:
+    mathre = re.compile('^(.*?)(\$.*?\$)(.*)')
+    lines = data.split('\n')
+    for line in lines:
+        #document.warning("LINE: " + line)
+        #document.warning(str(i) + ":" + document.body[i])
+        #document.warning("LAST: " + document.body[-1])
+        g = line
+        m = mathre.match(g)
+        while m != None:
+            s = m.group(1)
+            f = m.group(2).replace('\\\\', '\\')
+            g = m.group(3)
+            if s:
+                # this is non-math!
+                s = latex2ert(s, isindex)
+                subst = s.split('\n')
+                retval += subst
+            retval.append("\\begin_inset Formula " + f)
+            retval.append("\\end_inset")
+            m = mathre.match(g)
+        # Handle whatever is left, which is just text
+        g = latex2ert(g, isindex)
+        subst = g.split('\n')
+        retval += subst
+    return retval
+
+
+def lyxline2latex(document, line, inert):
+    'Convert some LyX stuff into corresponding LaTeX stuff line-wise, as best we can.'
+    if line.startswith("\\begin_inset Formula"):
+        line = line[20:]
+    elif line.startswith("\\begin_inset Quotes"):
+        # For now, we do a very basic reversion. Someone who understands
+        # quotes is welcome to fix it up.
+        qtype = line[20:].strip()
+        # lang = qtype[0]
+        side = qtype[1]
+        dbls = qtype[2]
+        if side == "l":
+            if dbls == "d":
+                line = "``"
+            else:
+                line = "`"
+        else:
+            if dbls == "d":
+                line = "''"
+            else:
+                line = "'"
+    elif line.isspace() or \
+          line.startswith("\\begin_layout") or \
+          line.startswith("\\end_layout") or \
+          line.startswith("\\begin_inset") or \
+          line.startswith("\\end_inset") or \
+          line.startswith("\\lang") or \
+          line.strip() == "status collapsed" or \
+          line.strip() == "status open":
+        #skip all that stuff
+        return ""
+
+    # this needs to be added to the preamble because of cases like
+    # \textmu, \textbackslash, etc.
+    add_to_preamble(document, ['% added by lyx2lyx for converted entries',
+                               '\\@ifundefined{textmu}',
+                               ' {\\usepackage{textcomp}}{}'])
+    # a lossless reversion is not possible
+    # try at least to handle some common insets and settings
+    if inert:
+        line = line.replace(r'\backslash', '\\')
+    else:
+        line = line.replace('&', '\\&{}')
+        line = line.replace('#', '\\#{}')
+        line = line.replace('^', '\\^{}')
+        line = line.replace('%', '\\%{}')
+        line = line.replace('_', '\\_{}')
+        line = line.replace('$', '\\${}')
+
+        # Do the LyX text --> LaTeX conversion
+        for rep in unicode_reps:
+            line = line.replace(rep[1], rep[0].replace('\\\\', '\\') + "{}")
+            line = line.replace(r'\backslash', r'\textbackslash{}')
+            line = line.replace(r'\series bold', r'\bfseries{}').replace(r'\series default', r'\mdseries{}')
+            line = line.replace(r'\shape italic', r'\itshape{}').replace(r'\shape smallcaps', r'\scshape{}')
+            line = line.replace(r'\shape slanted', r'\slshape{}').replace(r'\shape default', r'\upshape{}')
+            line = line.replace(r'\emph on', r'\em{}').replace(r'\emph default', r'\em{}')
+            line = line.replace(r'\noun on', r'\scshape{}').replace(r'\noun default', r'\upshape{}')
+            line = line.replace(r'\bar under', r'\underbar{').replace(r'\bar default', r'}')
+            line = line.replace(r'\family sans', r'\sffamily{}').replace(r'\family default', r'\normalfont{}')
+            line = line.replace(r'\family typewriter', r'\ttfamily{}').replace(r'\family roman', r'\rmfamily{}')
+            line = line.replace(r'\InsetSpace ', r'').replace(r'\SpecialChar ', r'')
+    return line
+
+
+def lyx2latex(document, lines):
+    'Convert some LyX stuff into corresponding LaTeX stuff, as best we can.'
+    # clean up multiline stuff
+    content = ""
+    ert_end = 0
+
+    for curline in range(len(lines)):
+        line = lines[curline]
+        if line.startswith("\\begin_inset ERT"):
+            # We don't want to replace things inside ERT, so figure out
+            # where the end of the inset is.
+            ert_end = find_end_of_inset(lines, curline + 1)
+            continue
+        inert = ert_end >= curline
+        content += lyxline2latex(document, lines[curline], inert)
+      
+    return content
+
  
  ####################################################################
  
  
  ####################################################################
  
+def convert_ltcaption(document):
+    i = 0
+    while True:
+        i = find_token(document.body, "\\begin_inset Tabular", i)
+        if i == -1:
+            return
+        j = find_end_of_inset(document.body, i + 1)
+        if j == -1:
+            document.warning("Malformed LyX document: Could not find end of tabular.")
+            i += 1
+            continue
+
+        nrows = int(document.body[i+1].split('"')[3])
+        ncols = int(document.body[i+1].split('"')[5])
+
+        m = i + 1
+        for k in range(nrows):
+            m = find_token(document.body, "<row", m)
+            r = m
+            caption = 'false'
+            for k in range(ncols):
+                m = find_token(document.body, "<cell", m)
+                if (k == 0):
+                    mend = find_token(document.body, "</cell>", m + 1)
+                    # first look for caption insets
+                    mcap = find_token(document.body, "\\begin_inset Caption", m + 1, mend)
+                    # then look for ERT captions
+                    if mcap == -1:
+                        mcap = find_token(document.body, "caption", m + 1, mend)
+                        if mcap > -1:
+                            mcap = find_token(document.body, "\\backslash", mcap - 1, mcap)
+                    if mcap > -1:
+                        caption = 'true'
+                if caption == 'true':
+                    if (k == 0):
+                        set_option(document, r, 'caption', 'true')
+                        set_option(document, m, 'multicolumn', '1')
+                        set_option(document, m, 'bottomline', 'false')
+                        set_option(document, m, 'topline', 'false')
+                        set_option(document, m, 'rightline', 'false')
+                        set_option(document, m, 'leftline', 'false')
+                        #j = find_end_of_inset(document.body, j + 1)
+                    else:
+                        set_option(document, m, 'multicolumn', '2')
+                m = m + 1
+            m = m + 1
+
+        i = j + 1
+
+
+#FIXME Use of wrap_into_ert can confuse lyx2lyx
+def revert_ltcaption(document):
+    i = 0
+    while True:
+        i = find_token(document.body, "\\begin_inset Tabular", i)
+        if i == -1:
+            return
+        j = find_end_of_inset(document.body, i + 1)
+        if j == -1:
+            document.warning("Malformed LyX document: Could not find end of tabular.")
+            i += 1
+            continue
+
+        m = i + 1
+        nrows = int(document.body[i+1].split('"')[3])
+        ncols = int(document.body[i+1].split('"')[5])
+
+        for k in range(nrows):
+            m = find_token(document.body, "<row", m)
+            caption = get_option(document, m, 'caption', 'false')
+            if caption == 'true':
+                remove_option(document, m, 'caption')
+                for k in range(ncols):
+                    m = find_token(document.body, "<cell", m)
+                    remove_option(document, m, 'multicolumn')
+                    if k == 0:
+                        m = find_token(document.body, "\\begin_inset Caption", m)
+                        if m == -1:
+                            return
+                        m = find_end_of_inset(document.body, m + 1)
+                        document.body[m] += wrap_into_ert("","","\\backslash\n\\backslash\n%")
+                    m = m + 1
+            m = m + 1
+        i = j + 1
+
+
+def convert_tablines(document):
+    i = 0
+    while True:
+        i = find_token(document.body, "\\begin_inset Tabular", i)
+        if i == -1:
+            # LyX 1.3 inserted an extra space between \begin_inset
+            # and Tabular so let us try if this is the case and fix it.
+            i = find_token(document.body, "\\begin_inset  Tabular", i)
+            if i == -1:
+                return
+            else:
+                document.body[i] = "\\begin_inset Tabular"
+        j = find_end_of_inset(document.body, i + 1)
+        if j == -1:
+            document.warning("Malformed LyX document: Could not find end of tabular.")
+            i += 1
+            continue
+
+        m = i + 1
+        nrows = int(document.body[i+1].split('"')[3])
+        ncols = int(document.body[i+1].split('"')[5])
+
+        col_info = []
+        for k in range(ncols):
+            m = find_token(document.body, "<column", m)
+            left = get_option(document, m, 'leftline', 'false')
+            right = get_option(document, m, 'rightline', 'false')
+            col_info.append([left, right])
+            remove_option(document, m, 'leftline')
+            remove_option(document, m, 'rightline')
+            m = m + 1
+
+        row_info = []
+        for k in range(nrows):
+            m = find_token(document.body, "<row", m)
+            top = get_option(document, m, 'topline', 'false')
+            bottom = get_option(document, m, 'bottomline', 'false')
+            row_info.append([top, bottom])
+            remove_option(document, m, 'topline')
+            remove_option(document, m, 'bottomline')
+            m = m + 1
+
+        m = i + 1
+        mc_info = []
+        for k in range(nrows*ncols):
+            m = find_token(document.body, "<cell", m)
+            mc_info.append(get_option(document, m, 'multicolumn', '0'))
+            m = m + 1
+        m = i + 1
+        for l in range(nrows):
+            for k in range(ncols):
+                m = find_token(document.body, '<cell', m)
+                if mc_info[l*ncols + k] == '0':
+                    r = set_option(document, m, 'topline', row_info[l][0])
+                    r = set_option(document, m, 'bottomline', row_info[l][1])
+                    r = set_option(document, m, 'leftline', col_info[k][0])
+                    r = set_option(document, m, 'rightline', col_info[k][1])
+                elif mc_info[l*ncols + k] == '1':
+                    s = k + 1
+                    while s < ncols and mc_info[l*ncols + s] == '2':
+                        s = s + 1
+                    if s < ncols and mc_info[l*ncols + s] != '1':
+                        r = set_option(document, m, 'rightline', col_info[k][1])
+                    if k > 0 and mc_info[l*ncols + k - 1] == '0':
+                        r = set_option(document, m, 'leftline', col_info[k][0])
+                m = m + 1
+        i = j + 1
+
+
+def revert_tablines(document):
+    i = 0
+    while True:
+        i = find_token(document.body, "\\begin_inset Tabular", i)
+        if i == -1:
+            return
+        j = find_end_of_inset(document.body, i)
+        if j == -1:
+            document.warning("Malformed LyX document: Could not find end of tabular.")
+            i += 1
+            continue
+
+        m = i + 1
+        nrows = int(document.body[i+1].split('"')[3])
+        ncols = int(document.body[i+1].split('"')[5])
+
+        lines = []
+        for k in range(nrows*ncols):
+            m = find_token(document.body, "<cell", m)
+            top = get_option(document, m, 'topline', 'false')
+            bottom = get_option(document, m, 'bottomline', 'false')
+            left = get_option(document, m, 'leftline', 'false')
+            right = get_option(document, m, 'rightline', 'false')
+            lines.append([top, bottom, left, right])
+            m = m + 1
+
+        # we will want to ignore longtable captions
+        m = i + 1
+        caption_info = []
+        for k in range(nrows):
+            m = find_token(document.body, "<row", m)
+            caption = get_option(document, m, 'caption', 'false')
+            caption_info.append([caption])
+            m = m + 1
+
+        m = i + 1
+        col_info = []
+        for k in range(ncols):
+            m = find_token(document.body, "<column", m)
+            left = 'true'
+            for l in range(nrows):
+                left = lines[l*ncols + k][2]
+                if left == 'false' and caption_info[l] == 'false':
+                    break
+            set_option(document, m, 'leftline', left)
+            right = 'true'
+            for l in range(nrows):
+                right = lines[l*ncols + k][3]
+                if right == 'false' and caption_info[l] == 'false':
+                    break
+            set_option(document, m, 'rightline', right)
+            m = m + 1
+
+        row_info = []
+        for k in range(nrows):
+            m = find_token(document.body, "<row", m)
+            top = 'true'
+            for l in range(ncols):
+                top = lines[k*ncols + l][0]
+                if top == 'false':
+                    break
+            if caption_info[k] == 'false':
+                top = 'false'
+            set_option(document, m, 'topline', top)
+            bottom = 'true'
+            for l in range(ncols):
+                bottom = lines[k*ncols + l][1]
+                if bottom == 'false':
+                    break
+            if caption_info[k] == 'false':
+                bottom = 'false'
+            set_option(document, m, 'bottomline', bottom)
+            m = m + 1
+
+        i = j + 1
+
+
  def fix_wrong_tables(document):
      i = 0
      while True:
  def fix_wrong_tables(document):
      i = 0
      while True:
@@ -43,6 +670,7 @@ def fix_wrong_tables(document):
          j = find_end_of_inset(document.body, i + 1)
          if j == -1:
              document.warning("Malformed LyX document: Could not find end of tabular.")
          j = find_end_of_inset(document.body, i + 1)
          if j == -1:
              document.warning("Malformed LyX document: Could not find end of tabular.")
+            i += 1
              continue
  
          m = i + 1
              continue
  
          m = i + 1
@@ -102,7 +730,7 @@ def revert_long_charstyle_names(document):
          i = find_token(document.body, "\\begin_inset CharStyle", i)
          if i == -1:
              return
          i = find_token(document.body, "\\begin_inset CharStyle", i)
          if i == -1:
              return
-        document.body[i] = document.body[i].replace("CharStyle CharStyle:", "CharStyle")
+        document.body[i] = document.body[i].replace("CharStyle CharStyle:", "CharStyle ")
          i += 1
  
  
          i += 1
  
  
@@ -124,7 +752,7 @@ def axe_show_label(document):
                      document.warning("Malformed LyX document: show_label neither false nor true.")
          else:
              document.warning("Malformed LyX document: show_label missing in CharStyle.")
                      document.warning("Malformed LyX document: show_label neither false nor true.")
          else:
              document.warning("Malformed LyX document: show_label missing in CharStyle.")
-            
+
          i += 1
  
  
          i += 1
  
  
@@ -165,7 +793,7 @@ def convert_flex(document):
          document.body[i] = document.body[i].replace('\\begin_inset CharStyle', '\\begin_inset Flex')
  
  def revert_flex(document):
          document.body[i] = document.body[i].replace('\\begin_inset CharStyle', '\\begin_inset Flex')
  
  def revert_flex(document):
-    "Convert Flex to CharStyle"
+    "Revert Flex to CharStyle"
      i = 0
      while True:
          i = find_token(document.body, "\\begin_inset Flex", i)
      i = 0
      while True:
          i = find_token(document.body, "\\begin_inset Flex", i)
@@ -174,67 +802,165 @@ def revert_flex(document):
          document.body[i] = document.body[i].replace('\\begin_inset Flex', '\\begin_inset CharStyle')
  
  
          document.body[i] = document.body[i].replace('\\begin_inset Flex', '\\begin_inset CharStyle')
  
  
-def remove_manifest(document):
-    "Remove the manifest section"
-    document.manifest = None
-
-
-#  Discard PDF options for hyperref
  def revert_pdf_options(document):
          "Revert PDF options for hyperref."
  def revert_pdf_options(document):
          "Revert PDF options for hyperref."
+        # store the PDF options and delete the entries from the Lyx file
          i = 0
          i = 0
+        hyperref = False
+        title = ""
+        author = ""
+        subject = ""
+        keywords = ""
+        bookmarks = ""
+        bookmarksnumbered = ""
+        bookmarksopen = ""
+        bookmarksopenlevel = ""
+        breaklinks = ""
+        pdfborder = ""
+        colorlinks = ""
+        backref = ""
+        pagebackref = ""
+        pagemode = ""
+        otheroptions = ""
          i = find_token(document.header, "\\use_hyperref", i)
          if i != -1:
          i = find_token(document.header, "\\use_hyperref", i)
          if i != -1:
+            hyperref = get_value(document.header, "\\use_hyperref", i) == 'true'
              del document.header[i]
          i = find_token(document.header, "\\pdf_store_options", i)
          if i != -1:
              del document.header[i]
          i = find_token(document.header, "\\pdf_title", 0)
          if i != -1:
              del document.header[i]
          i = find_token(document.header, "\\pdf_store_options", i)
          if i != -1:
              del document.header[i]
          i = find_token(document.header, "\\pdf_title", 0)
          if i != -1:
+            title = get_value_string(document.header, '\\pdf_title', 0, 0, True)
+            title = ' pdftitle={' + title + '}'
              del document.header[i]
          i = find_token(document.header, "\\pdf_author", 0)
          if i != -1:
              del document.header[i]
          i = find_token(document.header, "\\pdf_author", 0)
          if i != -1:
+            author = get_value_string(document.header, '\\pdf_author', 0, 0, True)
+            if title == "":
+                author = ' pdfauthor={' + author + '}'
+            else:
+                author = ',\n pdfauthor={' + author + '}'
              del document.header[i]
          i = find_token(document.header, "\\pdf_subject", 0)
          if i != -1:
              del document.header[i]
          i = find_token(document.header, "\\pdf_subject", 0)
          if i != -1:
+            subject = get_value_string(document.header, '\\pdf_subject', 0, 0, True)
+            if title == "" and author == "":
+                subject = ' pdfsubject={' + subject + '}'
+            else:
+                subject = ',\n pdfsubject={' + subject + '}'
              del document.header[i]
          i = find_token(document.header, "\\pdf_keywords", 0)
          if i != -1:
              del document.header[i]
          i = find_token(document.header, "\\pdf_keywords", 0)
          if i != -1:
+            keywords = get_value_string(document.header, '\\pdf_keywords', 0, 0, True)
+            if title == "" and author == "" and subject == "":
+                keywords = ' pdfkeywords={' + keywords + '}'
+            else:
+                keywords = ',\n pdfkeywords={' + keywords + '}'
              del document.header[i]
          i = find_token(document.header, "\\pdf_bookmarks", 0)
          if i != -1:
              del document.header[i]
          i = find_token(document.header, "\\pdf_bookmarks", 0)
          if i != -1:
+            bookmarks = get_value_string(document.header, '\\pdf_bookmarks', 0)
+            bookmarks = ',\n bookmarks=' + bookmarks
              del document.header[i]
          i = find_token(document.header, "\\pdf_bookmarksnumbered", i)
          if i != -1:
              del document.header[i]
          i = find_token(document.header, "\\pdf_bookmarksnumbered", i)
          if i != -1:
+            bookmarksnumbered = get_value_string(document.header, '\\pdf_bookmarksnumbered', 0)
+            bookmarksnumbered = ',\n bookmarksnumbered=' + bookmarksnumbered
              del document.header[i]
          i = find_token(document.header, "\\pdf_bookmarksopen", i)
          if i != -1:
              del document.header[i]
          i = find_token(document.header, "\\pdf_bookmarksopen", i)
          if i != -1:
+            bookmarksopen = get_value_string(document.header, '\\pdf_bookmarksopen', 0)
+            bookmarksopen = ',\n bookmarksopen=' + bookmarksopen
              del document.header[i]
          i = find_token(document.header, "\\pdf_bookmarksopenlevel", i)
          if i != -1:
              del document.header[i]
          i = find_token(document.header, "\\pdf_bookmarksopenlevel", i)
          if i != -1:
+            bookmarksopenlevel = get_value_string(document.header, '\\pdf_bookmarksopenlevel', 0, 0, True)
+            bookmarksopenlevel = ',\n bookmarksopenlevel=' + bookmarksopenlevel
              del document.header[i]
          i = find_token(document.header, "\\pdf_breaklinks", i)
          if i != -1:
              del document.header[i]
          i = find_token(document.header, "\\pdf_breaklinks", i)
          if i != -1:
+            breaklinks = get_value_string(document.header, '\\pdf_breaklinks', 0)
+            breaklinks = ',\n breaklinks=' + breaklinks
              del document.header[i]
          i = find_token(document.header, "\\pdf_pdfborder", i)
          if i != -1:
              del document.header[i]
          i = find_token(document.header, "\\pdf_pdfborder", i)
          if i != -1:
+            pdfborder = get_value_string(document.header, '\\pdf_pdfborder', 0)
+            if pdfborder == 'true':
+                pdfborder = ',\n pdfborder={0 0 0}'
+            else:
+                pdfborder = ',\n pdfborder={0 0 1}'
              del document.header[i]
          i = find_token(document.header, "\\pdf_colorlinks", i)
          if i != -1:
              del document.header[i]
          i = find_token(document.header, "\\pdf_colorlinks", i)
          if i != -1:
+            colorlinks = get_value_string(document.header, '\\pdf_colorlinks', 0)
+            colorlinks = ',\n colorlinks=' + colorlinks
              del document.header[i]
          i = find_token(document.header, "\\pdf_backref", i)
          if i != -1:
              del document.header[i]
          i = find_token(document.header, "\\pdf_backref", i)
          if i != -1:
+            backref = get_value_string(document.header, '\\pdf_backref', 0)
+            backref = ',\n backref=' + backref
              del document.header[i]
          i = find_token(document.header, "\\pdf_pagebackref", i)
          if i != -1:
              del document.header[i]
          i = find_token(document.header, "\\pdf_pagebackref", i)
          if i != -1:
+            pagebackref = get_value_string(document.header, '\\pdf_pagebackref', 0)
+            pagebackref = ',\n pagebackref=' + pagebackref
              del document.header[i]
          i = find_token(document.header, "\\pdf_pagemode", 0)
          if i != -1:
              del document.header[i]
          i = find_token(document.header, "\\pdf_pagemode", 0)
          if i != -1:
+            pagemode = get_value_string(document.header, '\\pdf_pagemode', 0)
+            pagemode = ',\n pdfpagemode=' + pagemode
              del document.header[i]
          i = find_token(document.header, "\\pdf_quoted_options", 0)
          if i != -1:
              del document.header[i]
          i = find_token(document.header, "\\pdf_quoted_options", 0)
          if i != -1:
+            otheroptions = get_value_string(document.header, '\\pdf_quoted_options', 0, 0, True)
+            if title == "" and author == "" and subject == "" and keywords == "":
+                otheroptions = ' ' + otheroptions
+            else:
+                otheroptions = ',\n ' + otheroptions
              del document.header[i]
  
              del document.header[i]
  
+        # write to the preamble when hyperref was used
+        if hyperref == True:
+            # preamble write preparations
+            # bookmark numbers are only output when they are turned on
+            if bookmarksopen == ',\n bookmarksopen=true':
+                bookmarksopen = bookmarksopen + bookmarksopenlevel
+            if bookmarks == ',\n bookmarks=true':
+                bookmarks = bookmarks + bookmarksnumbered + bookmarksopen
+            else:
+                bookmarks = bookmarks
+            # hypersetup is only output when there are things to be set up
+            setupstart = '\\hypersetup{%\n'
+            setupend = ' }\n'
+            if otheroptions == "" and title == "" and  author == ""\
+               and  subject == "" and keywords == "":
+                setupstart = ""
+                setupend = ""
+            # write the preamble
+            # babel must be loaded before hyperref and hyperref the first part
+            # of the preamble, like in LyX 1.6
+            insert_to_preamble(0, document,
+                                 '% Commands inserted by lyx2lyx for PDF properties\n'
+                                 + '\\usepackage{babel}\n'
+                                 + '\\usepackage[unicode=true'
+                                 + bookmarks
+                                 + breaklinks
+                                 + pdfborder
+                                 + backref
+                                 + pagebackref
+                                 + colorlinks
+                                 + pagemode
+                                 + ']\n'
+                                 + ' {hyperref}\n'
+                                 + setupstart
+                                 + title
+                                 + author
+                                 + subject
+                                 + keywords
+                                 + otheroptions
+                                 + setupend)
+
  
  def remove_inzip_options(document):
      "Remove inzipName and embed options from the Graphics inset"
  
  def remove_inzip_options(document):
      "Remove inzipName and embed options from the Graphics inset"
@@ -247,6 +973,8 @@ def remove_inzip_options(document):
          if j == -1:
              # should not happen
              document.warning("Malformed LyX document: Could not find end of graphics inset.")
          if j == -1:
              # should not happen
              document.warning("Malformed LyX document: Could not find end of graphics inset.")
+            i += 1
+            continue
          # If there's a inzip param, just remove that
          k = find_token(document.body, "\tinzipName", i + 1, j)
          if k != -1:
          # If there's a inzip param, just remove that
          k = find_token(document.body, "\tinzipName", i + 1, j)
          if k != -1:
@@ -259,8 +987,8 @@ def remove_inzip_options(document):
  def convert_inset_command(document):
      """
          Convert:
  def convert_inset_command(document):
      """
          Convert:
-            \begin_inset LatexCommand cmd 
-        to 
+            \begin_inset LatexCommand cmd
+        to
              \begin_inset CommandInset InsetType
              LatexCommand cmd
      """
              \begin_inset CommandInset InsetType
              LatexCommand cmd
      """
@@ -298,8 +1026,8 @@ def revert_inset_command(document):
          Convert:
              \begin_inset CommandInset InsetType
              LatexCommand cmd
          Convert:
              \begin_inset CommandInset InsetType
              LatexCommand cmd
-        to 
-            \begin_inset LatexCommand cmd 
+        to
+            \begin_inset LatexCommand cmd
          Some insets may end up being converted to insets earlier versions of LyX
          will not be able to recognize. Not sure what to do about that.
      """
          Some insets may end up being converted to insets earlier versions of LyX
          will not be able to recognize. Not sure what to do about that.
      """
@@ -313,6 +1041,7 @@ def revert_inset_command(document):
          m = r.match(nextline)
          if not m:
              document.warning("Malformed LyX document: Missing LatexCommand in " + document.body[i] + ".")
          m = r.match(nextline)
          if not m:
              document.warning("Malformed LyX document: Missing LatexCommand in " + document.body[i] + ".")
+            i += 1
              continue
          cmdName = m.group(1)
          insertion = ["\\begin_inset LatexCommand " + cmdName]
              continue
          cmdName = m.group(1)
          insertion = ["\\begin_inset LatexCommand " + cmdName]
@@ -342,53 +1071,85 @@ def revert_wrapfig_options(document):
      "Revert optional options for wrap floats (wrapfig)."
      i = 0
      while True:
      "Revert optional options for wrap floats (wrapfig)."
      i = 0
      while True:
-        i = find_token(document.body, "lines", i)
+        i = find_token(document.body, "\\begin_inset Wrap figure", i)
          if i == -1:
              return
          if i == -1:
              return
-        j = find_token(document.body, "overhang", i+1)
-        if j != i + 2 and j != -1:
-            document.warning("Malformed LyX document: Couldn't find overhang parameter of wrap float.")
+        j = find_end_of_inset(document.body, i)
          if j == -1:
          if j == -1:
-            return
-        del document.body[i]
-        del document.body[j-1]
-        i = i + 1
+            document.warning("Can't find end of Wrap inset at line " + str(i))
+            i += 1
+            continue
+        k = find_default_layout(document, i, j)
+        if k == -1:
+            document.warning("Can't find default layout for Wrap figure!")
+            i = j
+            continue
+        # Options should be between i and k now
+        l = find_token(document.body, "lines", i, k)
+        if l == -1:
+            document.warning("Can't find lines option for Wrap figure!")
+            i = k
+            continue
+        m = find_token(document.body, "overhang", i + 1, k)
+        if m == -1:
+            document.warning("Malformed LyX document: Couldn't find overhang parameter of wrap float!")
+            i = k
+            continue
+        # Do these in reverse order
+        del document.body[m]
+        del document.body[l]
+        i = k
  
  
  def convert_latexcommand_index(document):
      "Convert from LatexCommand form to collapsable form."
  
  
  def convert_latexcommand_index(document):
      "Convert from LatexCommand form to collapsable form."
-    i = 0 
+    i = 0
+    r1 = re.compile('name "(.*)"')
      while True:
          i = find_token(document.body, "\\begin_inset CommandInset index", i)
          if i == -1:
              return
          if document.body[i + 1] != "LatexCommand index": # Might also be index_print
      while True:
          i = find_token(document.body, "\\begin_inset CommandInset index", i)
          if i == -1:
              return
          if document.body[i + 1] != "LatexCommand index": # Might also be index_print
-            return
-        fullcommand = document.body[i + 2]
-        document.body[i] = "\\begin_inset Index"
-        document.body[i + 1] = "status collapsed"
-        document.body[i + 2] = "\\begin_layout standard"
-        document.body.insert(i + 3, fullcommand[6:].strip('"'))
-        document.body.insert(i + 4, "\\end_layout")
-        i = i + 5
+            i += 1
+            continue
+        j = find_end_of_inset(document.body, i + 1)
+        if j == -1:
+            document.warning("Unable to find end of index inset at line " + str(i) + "!")
+            i += 2
+            continue
+        m = r1.match(document.body[i + 2])
+        if m == None:
+            document.warning("Unable to match: " + document.body[i+2])
+            # this can happen with empty index insets!
+            linelist = [""]
+        else:
+            fullcontent = m.group(1)
+            linelist = latex2lyx(fullcontent, True)
+        #document.warning(fullcontent)
+
+        linelist = ["\\begin_inset Index", "status collapsed", "\\begin_layout Standard", ""] + \
+                   linelist + ["\\end_layout"]
+        document.body[i : j] = linelist
+        i += len(linelist) - (j - i)
  
  
  def revert_latexcommand_index(document):
  
  
  def revert_latexcommand_index(document):
-    "Revert from collapsable form toLatexCommand form."
+    "Revert from collapsable form to LatexCommand form."
      i = 0
      while True:
          i = find_token(document.body, "\\begin_inset Index", i)
          if i == -1:
      i = 0
      while True:
          i = find_token(document.body, "\\begin_inset Index", i)
          if i == -1:
-            return
-        j = find_end_of_inset(document.body, i)
-        del document.body[j - 1]
-        del document.body[j - 2] # \end_layout
-        document.body[i] =  "\\begin_inset CommandInset index"
-        document.body[i + 1] =  "LatexCommand index"
-        document.body[i + 3] = "name " + '"' + document.body[i + 3] + '"'
-        document.body.insert(i + 4, "")
-        del document.body[i + 2] # \begin_layout standard
-        i = i + 5
+          return
+        j = find_end_of_inset(document.body, i + 1)
+        if j == -1:
+          return
+
+        content = lyx2latex(document, document.body[i:j])
+        # escape quotes
+        content = content.replace('"', r'\"')
+        document.body[i:j] = ["\\begin_inset CommandInset index", "LatexCommand index",
+            "name " + '"' + content + '"', ""]
+        i += 5
  
  
  def revert_wraptable(document):
  
  
  def revert_wraptable(document):
@@ -420,6 +1181,24 @@ def revert_vietnamese(document):
          j = j + 1
  
  
          j = j + 1
  
  
+def convert_japanese_cjk(document):
+    "Set language japanese to japanese-cjk"
+    # Set document language from japanese-plain to japanese
+    i = 0
+    if document.language == "japanese":
+        document.language = "japanese-cjk"
+        i = find_token(document.header, "\\language", 0)
+        if i != -1:
+            document.header[i] = "\\language japanese-cjk"
+    j = 0
+    while True:
+        j = find_token(document.body, "\\lang japanese", j)
+        if j == -1:
+            return
+        document.body[j] = document.body[j].replace("\\lang japanese", "\\lang japanese-cjk")
+        j = j + 1
+
+
  def revert_japanese(document):
      "Set language japanese-plain to japanese"
      # Set document language from japanese-plain to japanese
  def revert_japanese(document):
      "Set language japanese-plain to japanese"
      # Set document language from japanese-plain to japanese
@@ -438,6 +1217,24 @@ def revert_japanese(document):
          j = j + 1
  
  
          j = j + 1
  
  
+def revert_japanese_cjk(document):
+    "Set language japanese-cjk to japanese"
+    # Set document language from japanese-plain to japanese
+    i = 0
+    if document.language == "japanese-cjk":
+        document.language = "japanese"
+        i = find_token(document.header, "\\language", 0)
+        if i != -1:
+            document.header[i] = "\\language japanese"
+    j = 0
+    while True:
+        j = find_token(document.body, "\\lang japanese-cjk", j)
+        if j == -1:
+            return
+        document.body[j] = document.body[j].replace("\\lang japanese-cjk", "\\lang japanese")
+        j = j + 1
+
+
  def revert_japanese_encoding(document):
      "Set input encoding form EUC-JP-plain to EUC-JP etc."
      # Set input encoding form EUC-JP-plain to EUC-JP etc.
  def revert_japanese_encoding(document):
      "Set input encoding form EUC-JP-plain to EUC-JP etc."
      # Set input encoding form EUC-JP-plain to EUC-JP etc.
@@ -451,7 +1248,7 @@ def revert_japanese_encoding(document):
          document.header[j] = "\\inputencoding JIS"
      k = 0
      k = find_token(document.header, "\\inputencoding SJIS-plain", 0)
          document.header[j] = "\\inputencoding JIS"
      k = 0
      k = find_token(document.header, "\\inputencoding SJIS-plain", 0)
-    if k != -1: # convert to UTF8 since there is currently no SJIS encoding 
+    if k != -1: # convert to UTF8 since there is currently no SJIS encoding
          document.header[k] = "\\inputencoding UTF8"
  
  
          document.header[k] = "\\inputencoding UTF8"
  
  
@@ -466,11 +1263,22 @@ def revert_inset_info(document):
          if j == -1:
              # should not happen
              document.warning("Malformed LyX document: Could not find end of Info inset.")
          if j == -1:
              # should not happen
              document.warning("Malformed LyX document: Could not find end of Info inset.")
+            i += 1
+            continue
          type = 'unknown'
          arg = ''
          for k in range(i, j+1):
              if document.body[k].startswith("arg"):
          type = 'unknown'
          arg = ''
          for k in range(i, j+1):
              if document.body[k].startswith("arg"):
-                arg = document.body[k][3:].strip().strip('"')
+                arg = document.body[k][3:].strip()
+                # remove embracing quotation marks
+                if arg[0] == '"':
+                    arg = arg[1:]
+                if arg[len(arg) - 1] == '"':
+                    arg = arg[:len(arg) - 1]
+                # \" to straight quote
+                arg = arg.replace(r'\"', '"')
+                # \ to \backslash
+                arg = arg.replace(r'\\', "\\backslash\n")
              if document.body[k].startswith("type"):
                  type = document.body[k][4:].strip().strip('"')
          # I think there is a newline after \\end_inset, which should be removed.
              if document.body[k].startswith("type"):
                  type = document.body[k][4:].strip().strip('"')
          # I think there is a newline after \\end_inset, which should be removed.
@@ -480,6 +1288,1877 @@ def revert_inset_info(document):
              document.body[i : (j + 1)] = [type + ':' + arg]
  
  
              document.body[i : (j + 1)] = [type + ':' + arg]
  
  
+def convert_pdf_options(document):
+    # Set the pdfusetitle tag, delete the pdf_store_options,
+    # set quotes for bookmarksopenlevel"
+    has_hr = get_value(document.header, "\\use_hyperref", 0, default = "0")
+    if has_hr == "1":
+        k = find_token(document.header, "\\use_hyperref", 0)
+        document.header.insert(k + 1, "\\pdf_pdfusetitle true")
+    k = find_token(document.header, "\\pdf_store_options", 0)
+    if k != -1:
+        del document.header[k]
+    i = find_token(document.header, "\\pdf_bookmarksopenlevel", k)
+    if i == -1: return
+    document.header[i] = document.header[i].replace('"', '')
+
+
+def revert_pdf_options_2(document):
+    # reset the pdfusetitle tag, set quotes for bookmarksopenlevel"
+    k = find_token(document.header, "\\use_hyperref", 0)
+    i = find_token(document.header, "\\pdf_pdfusetitle", k)
+    if i != -1:
+        del document.header[i]
+    i = find_token(document.header, "\\pdf_bookmarksopenlevel", k)
+    if i == -1: return
+    values = document.header[i].split()
+    values[1] = ' "' + values[1] + '"'
+    document.header[i] = ''.join(values)
+
+
+def convert_htmlurl(document):
+    'Convert "htmlurl" to "href" insets for docbook'
+    if document.backend != "docbook":
+      return
+    i = 0
+    while True:
+      i = find_token(document.body, "\\begin_inset CommandInset url", i)
+      if i == -1:
+        return
+      document.body[i] = "\\begin_inset CommandInset href"
+      document.body[i + 1] = "LatexCommand href"
+      i = i + 1
+
+
+def convert_url(document):
+    'Convert url insets to url charstyles'
+    if document.backend == "docbook":
+      return
+    i = 0
+    while True:
+      i = find_token(document.body, "\\begin_inset CommandInset url", i)
+      if i == -1:
+        break
+      n = find_token(document.body, "name", i)
+      if n == i + 2:
+        # place the URL name in typewriter before the new URL insert
+        # grab the name 'bla' from the e.g. the line 'name "bla"',
+        # therefore start with the 6th character
+        name = document.body[n][6:-1]
+        newname = [name + " "]
+        document.body[i:i] = newname
+        i = i + 1
+      j = find_token(document.body, "target", i)
+      if j == -1:
+        document.warning("Malformed LyX document: Can't find target for url inset")
+        i += 1
+        continue
+      target = document.body[j][8:-1]
+      k = find_token(document.body, "\\end_inset", j)
+      if k == -1:
+        document.warning("Malformed LyX document: Can't find end of url inset")
+        i = j
+        continue
+      newstuff = ["\\begin_inset Flex URL",
+        "status collapsed", "",
+        "\\begin_layout Standard",
+        "",
+        target,
+        "\\end_layout",
+        ""]
+      document.body[i:k] = newstuff
+      i = i + len(newstuff)
+
+def convert_ams_classes(document):
+  tc = document.textclass
+  if (tc != "amsart" and tc != "amsart-plain" and
+      tc != "amsart-seq" and tc != "amsbook"):
+    return
+  if tc == "amsart-plain":
+    document.textclass = "amsart"
+    document.set_textclass()
+    document.add_module("Theorems (Starred)")
+    return
+  if tc == "amsart-seq":
+    document.textclass = "amsart"
+    document.set_textclass()
+  document.add_module("Theorems (AMS)")
+
+  #Now we want to see if any of the environments in the extended theorems
+  #module were used in this document. If so, we'll add that module, too.
+  layouts = ["Criterion", "Algorithm", "Axiom", "Condition", "Note",  \
+    "Notation", "Summary", "Acknowledgement", "Conclusion", "Fact", \
+    "Assumption"]
+
+  r = re.compile(r'^\\begin_layout (.*?)\*?\s*$')
+  i = 0
+  while True:
+    i = find_token(document.body, "\\begin_layout", i)
+    if i == -1:
+      return
+    m = r.match(document.body[i])
+    if m == None:
+      # This is an empty layout
+      # document.warning("Weirdly formed \\begin_layout at line %d of body!" % i)
+      i += 1
+      continue
+    m = m.group(1)
+    if layouts.count(m) != 0:
+      document.add_module("Theorems (AMS-Extended)")
+      return
+    i += 1
+
+def revert_href(document):
+    'Reverts hyperlink insets (href) to url insets (url)'
+    i = 0
+    while True:
+      i = find_token(document.body, "\\begin_inset CommandInset href", i)
+      if i == -1:
+          return
+      document.body[i : i + 2] = \
+        ["\\begin_inset CommandInset url", "LatexCommand url"]
+      i = i + 2
+
+def revert_url(document):
+    'Reverts Flex URL insets to old-style URL insets'
+    i = 0
+    while True:
+        i = find_token(document.body, "\\begin_inset Flex URL", i)
+        if i == -1:
+            return
+        j = find_end_of_inset(document.body, i)
+        if j == -1:
+            document.warning("Can't find end of inset in revert_url!")
+            return
+        k = find_default_layout(document, i, j)
+        if k == -1:
+            document.warning("Can't find default layout in revert_url!")
+            i = j
+            continue
+        l = find_end_of(document.body, k, "\\begin_layout", "\\end_layout")
+        if l == -1 or l >= j:
+            document.warning("Can't find end of default layout in revert_url!")
+            i = j
+            continue
+        # OK, so the inset's data is between lines k and l.
+        data =  " ".join(document.body[k+1:l])
+        data = data.strip()
+        newinset = ["\\begin_inset LatexCommand url", "target \"" + data + "\"",\
+                    "", "\\end_inset"]
+        document.body[i:j+1] = newinset
+        i = i + len(newinset)
+
+
+def convert_include(document):
+  'Converts include insets to new format.'
+  i = 0
+  r = re.compile(r'\\begin_inset Include\s+\\([^{]+){([^}]*)}(?:\[(.*)\])?')
+  while True:
+    i = find_token(document.body, "\\begin_inset Include", i)
+    if i == -1:
+      return
+    line = document.body[i]
+    previewline = document.body[i + 1]
+    m = r.match(line)
+    if m == None:
+      document.warning("Unable to match line " + str(i) + " of body!")
+      i += 1
+      continue
+    cmd = m.group(1)
+    fn  = m.group(2)
+    opt = m.group(3)
+    insertion = ["\\begin_inset CommandInset include",
+       "LatexCommand " + cmd, previewline,
+       "filename \"" + fn + "\""]
+    newlines = 2
+    if opt:
+      insertion.append("lstparams " + '"' + opt + '"')
+      newlines += 1
+    document.body[i : i + 2] = insertion
+    i += newlines
+
+
+def revert_include(document):
+  'Reverts include insets to old format.'
+  i = 0
+  r0 = re.compile('preview.*')
+  r1 = re.compile('LatexCommand (.+)')
+  r2 = re.compile('filename "(.+)"')
+  r3 = re.compile('lstparams "(.*)"')
+  while True:
+    i = find_token(document.body, "\\begin_inset CommandInset include", i)
+    if i == -1:
+      return
+    nextline = i + 1
+    m = r1.match(document.body[nextline])
+    if m == None:
+      document.warning("Malformed LyX document: No LatexCommand line for `" +
+        document.body[i] + "' on line " + str(i) + ".")
+      i += 1
+      continue
+    cmd = m.group(1)
+    nextline += 1
+    if r0.match(document.body[nextline]):
+      previewline = document.body[nextline]
+      nextline += 1
+    else:
+      previewline = ""
+    m = r2.match(document.body[nextline])
+    if m == None:
+      document.warning("Malformed LyX document: No filename line for `" + \
+        document.body[i] + "' on line " + str(i) + ".")
+      i += 2
+      continue
+    fn = m.group(1)
+    nextline += 1
+    options = ""
+    if (cmd == "lstinputlisting"):
+      m = r3.match(document.body[nextline])
+      if m != None:
+        options = m.group(1)
+        numlines = 5
+        nextline += 1
+    newline = "\\begin_inset Include \\" + cmd + "{" + fn + "}"
+    if options:
+      newline += ("[" + options + "]")
+    insertion = [newline]
+    if previewline != "":
+      insertion.append(previewline)
+    document.body[i : nextline] = insertion
+    i += 2
+
+
+def revert_albanian(document):
+    "Set language Albanian to English"
+    i = 0
+    if document.language == "albanian":
+        document.language = "english"
+        i = find_token(document.header, "\\language", 0)
+        if i != -1:
+            document.header[i] = "\\language english"
+    j = 0
+    while True:
+        j = find_token(document.body, "\\lang albanian", j)
+        if j == -1:
+            return
+        document.body[j] = document.body[j].replace("\\lang albanian", "\\lang english")
+        j = j + 1
+
+
+def revert_lowersorbian(document):
+    "Set language lower Sorbian to English"
+    i = 0
+    if document.language == "lowersorbian":
+        document.language = "english"
+        i = find_token(document.header, "\\language", 0)
+        if i != -1:
+            document.header[i] = "\\language english"
+    j = 0
+    while True:
+        j = find_token(document.body, "\\lang lowersorbian", j)
+        if j == -1:
+            return
+        document.body[j] = document.body[j].replace("\\lang lowersorbian", "\\lang english")
+        j = j + 1
+
+
+def revert_uppersorbian(document):
+    "Set language uppersorbian to usorbian as this was used in LyX 1.5"
+    i = 0
+    if document.language == "uppersorbian":
+        document.language = "usorbian"
+        i = find_token(document.header, "\\language", 0)
+        if i != -1:
+            document.header[i] = "\\language usorbian"
+    j = 0
+    while True:
+        j = find_token(document.body, "\\lang uppersorbian", j)
+        if j == -1:
+            return
+        document.body[j] = document.body[j].replace("\\lang uppersorbian", "\\lang usorbian")
+        j = j + 1
+
+
+def convert_usorbian(document):
+    "Set language usorbian to uppersorbian"
+    i = 0
+    if document.language == "usorbian":
+        document.language = "uppersorbian"
+        i = find_token(document.header, "\\language", 0)
+        if i != -1:
+            document.header[i] = "\\language uppersorbian"
+    j = 0
+    while True:
+        j = find_token(document.body, "\\lang usorbian", j)
+        if j == -1:
+            return
+        document.body[j] = document.body[j].replace("\\lang usorbian", "\\lang uppersorbian")
+        j = j + 1
+
+
+def convert_macro_global(document):
+    "Remove TeX code command \global when it is in front of a macro"
+    # math macros are nowadays already defined \global, so that an additional
+    # \global would make the document uncompilable, see
+    # http://www.lyx.org/trac/ticket/5371
+    # We're looking for something like this:
+    # \begin_inset ERT
+    # status collapsed
+    #
+    # \begin_layout Plain Layout
+    #
+    #
+    # \backslash
+    # global
+    # \end_layout
+    #
+    # \end_inset
+    #
+    #
+    # \begin_inset FormulaMacro
+    # \renewcommand{\foo}{123}
+    # \end_inset
+    i = 0
+    while True:
+        i = find_token(document.body, "\\begin_inset FormulaMacro", i)
+        if i == -1:
+            return
+        # if i <= 13, then there isn't enough room for the ERT
+        if i <= 12:
+            i += 1
+            continue
+        if document.body[i-6] == "global":
+            del document.body[i-13 : i]
+            i = i - 12
+        else:
+            i += 1
+
+
+def revert_macro_optional_params(document):
+    "Convert macro definitions with optional parameters into ERTs"
+    # Stub to convert macro definitions with one or more optional parameters
+    # into uninterpreted ERT insets
+
+
+def revert_hyperlinktype(document):
+    'Reverts hyperlink type'
+    i = 0
+    j = 0
+    while True:
+      i = find_token(document.body, "target", i)
+      if i == -1:
+          return
+      j = find_token(document.body, "type", i)
+      if j == -1:
+          return
+      if j == i + 1:
+          del document.body[j]
+      i = i + 1
+
+
+def revert_pagebreak(document):
+    'Reverts pagebreak to ERT'
+    i = 0
+    while True:
+      i = find_token(document.body, "\\pagebreak", i)
+      if i == -1:
+          return
+      document.body[i] = '\\begin_inset ERT\nstatus collapsed\n\n' \
+      '\\begin_layout Standard\n\n\n\\backslash\n' \
+      'pagebreak{}\n\\end_layout\n\n\\end_inset\n\n'
+      i = i + 1
+
+
+def revert_linebreak(document):
+    'Reverts linebreak to ERT'
+    i = 0
+    while True:
+      i = find_token(document.body, "\\linebreak", i)
+      if i == -1:
+          return
+      document.body[i] = '\\begin_inset ERT\nstatus collapsed\n\n' \
+      '\\begin_layout Standard\n\n\n\\backslash\n' \
+      'linebreak{}\n\\end_layout\n\n\\end_inset\n\n'
+      i = i + 1
+
+
+def revert_latin(document):
+    "Set language Latin to English"
+    i = 0
+    if document.language == "latin":
+        document.language = "english"
+        i = find_token(document.header, "\\language", 0)
+        if i != -1:
+            document.header[i] = "\\language english"
+    j = 0
+    while True:
+        j = find_token(document.body, "\\lang latin", j)
+        if j == -1:
+            return
+        document.body[j] = document.body[j].replace("\\lang latin", "\\lang english")
+        j = j + 1
+
+
+def revert_samin(document):
+    "Set language North Sami to English"
+    i = 0
+    if document.language == "samin":
+        document.language = "english"
+        i = find_token(document.header, "\\language", 0)
+        if i != -1:
+            document.header[i] = "\\language english"
+    j = 0
+    while True:
+        j = find_token(document.body, "\\lang samin", j)
+        if j == -1:
+            return
+        document.body[j] = document.body[j].replace("\\lang samin", "\\lang english")
+        j = j + 1
+
+
+def convert_serbocroatian(document):
+    "Set language Serbocroatian to Croatian as this was really Croatian in LyX 1.5"
+    i = 0
+    if document.language == "serbocroatian":
+        document.language = "croatian"
+        i = find_token(document.header, "\\language", 0)
+        if i != -1:
+            document.header[i] = "\\language croatian"
+    j = 0
+    while True:
+        j = find_token(document.body, "\\lang serbocroatian", j)
+        if j == -1:
+            return
+        document.body[j] = document.body[j].replace("\\lang serbocroatian", "\\lang croatian")
+        j = j + 1
+
+
+def convert_framed_notes(document):
+    "Convert framed notes to boxes. "
+    i = 0
+    while 1:
+        i = find_tokens(document.body, ["\\begin_inset Note Framed", "\\begin_inset Note Shaded"], i)
+        if i == -1:
+            return
+        subst = [document.body[i].replace("\\begin_inset Note", "\\begin_inset Box"),
+                 'position "t"',
+                 'hor_pos "c"',
+                 'has_inner_box 0',
+                 'inner_pos "t"',
+                 'use_parbox 0',
+                 'width "100col%"',
+                 'special "none"',
+                 'height "1in"',
+                 'height_special "totalheight"']
+        document.body[i:i+1] = subst
+        i = i + 9
+
+
+def convert_module_names(document):
+  modulemap = { 'Braille' : 'braille', 'Endnote' : 'endnotes', 'Foot to End' : 'foottoend',\
+    'Hanging' : 'hanging', 'Linguistics' : 'linguistics', 'Logical Markup' : 'logicalmkup', \
+    'Theorems (AMS-Extended)' : 'theorems-ams-extended', 'Theorems (AMS)' : 'theorems-ams', \
+    'Theorems (Order By Chapter)' : 'theorems-chap', 'Theorems (Order By Section)' : 'theorems-sec', \
+    'Theorems (Starred)' : 'theorems-starred', 'Theorems' : 'theorems-std' }
+  modlist = document.get_module_list()
+  if len(modlist) == 0:
+    return
+  newmodlist = []
+  for mod in modlist:
+    if modulemap.has_key(mod):
+      newmodlist.append(modulemap[mod])
+    else:
+      document.warning("Can't find module %s in the module map!" % mod)
+      newmodlist.append(mod)
+  document.set_module_list(newmodlist)
+
+
+def revert_module_names(document):
+  modulemap = { 'braille' : 'Braille', 'endnotes' : 'Endnote', 'foottoend' : 'Foot to End',\
+    'hanging' : 'Hanging', 'linguistics' : 'Linguistics', 'logicalmkup' : 'Logical Markup', \
+    'theorems-ams-extended' : 'Theorems (AMS-Extended)', 'theorems-ams' : 'Theorems (AMS)', \
+    'theorems-chap' : 'Theorems (Order By Chapter)', 'theorems-sec' : 'Theorems (Order By Section)', \
+    'theorems-starred' : 'Theorems (Starred)', 'theorems-std' : 'Theorems'}
+  modlist = document.get_module_list()
+  if len(modlist) == 0:
+    return
+  newmodlist = []
+  for mod in modlist:
+    if mod in modulemap:
+      newmodlist.append(modulemap[mod])
+    else:
+      document.warning("Can't find module %s in the module map!" % mod)
+      newmodlist.append(mod)
+  document.set_module_list(newmodlist)
+
+
+def revert_colsep(document):
+    i = find_token(document.header, "\\columnsep", 0)
+    if i == -1:
+        return
+    colsepline = document.header[i]
+    r = re.compile(r'\\columnsep (.*)')
+    m = r.match(colsepline)
+    if not m:
+        document.warning("Malformed column separation line!")
+        return
+    colsep = m.group(1)
+    del document.header[i]
+    #it seems to be safe to add the package even if it is already used
+    pretext = ["\\usepackage{geometry}", "\\geometry{columnsep=" + colsep + "}"]
+
+    add_to_preamble(document, pretext)
+
+
+def revert_framed_notes(document):
+    "Revert framed boxes to notes. "
+    i = 0
+    while 1:
+        i = find_tokens(document.body, ["\\begin_inset Box Framed", "\\begin_inset Box Shaded"], i)
+
+        if i == -1:
+            return
+        j = find_end_of_inset(document.body, i + 1)
+        if j == -1:
+            # should not happen
+            document.warning("Malformed LyX document: Could not find end of Box inset.")
+            i += 1
+            continue
+        k = find_token(document.body, "status", i + 1, j)
+        if k == -1:
+            document.warning("Malformed LyX document: Missing `status' tag in Box inset.")
+            i = j
+            continue
+        status = document.body[k]
+        l = find_default_layout(document, i + 1, j)
+        if l == -1:
+            document.warning("Malformed LyX document: Missing `\\begin_layout' in Box inset.")
+            i = j
+            continue
+        m = find_token(document.body, "\\end_layout", i + 1, j)
+        if m == -1:
+            document.warning("Malformed LyX document: Missing `\\end_layout' in Box inset.")
+            i = j
+            continue
+        ibox = find_token(document.body, "has_inner_box 1", i + 1, k)
+        pbox = find_token(document.body, "use_parbox 1", i + 1, k)
+        if ibox == -1 and pbox == -1:
+            document.body[i] = document.body[i].replace("\\begin_inset Box", "\\begin_inset Note")
+            del document.body[i+1:k]
+        else:
+            document.body[i] = document.body[i].replace("\\begin_inset Box Shaded", "\\begin_inset Box Frameless")
+            subst1 = [document.body[l],
+                      "\\begin_inset Note Shaded",
+                      status,
+                      '\\begin_layout Standard']
+            document.body[l:l + 1] = subst1
+            subst2 = [document.body[m], "\\end_layout", "\\end_inset"]
+            document.body[m:m + 1] = subst2
+        i = i + 1
+
+
+def revert_slash(document):
+    'Revert \\SpecialChar \\slash{} to ERT'
+    i = 0
+    while i < len(document.body):
+        m = re.match(r'(.*)\\SpecialChar \\slash{}(.*)', document.body[i])
+        if m:
+            before = m.group(1)
+            after = m.group(2)
+            subst = [before,
+                     '\\begin_inset ERT',
+                     'status collapsed', '',
+                     '\\begin_layout Standard',
+                     '', '', '\\backslash',
+                     'slash{}',
+                     '\\end_layout', '',
+                     '\\end_inset', '',
+                     after]
+            document.body[i: i+1] = subst
+            i = i + len(subst)
+        else:
+            i = i + 1
+
+
+def revert_nobreakdash(document):
+    'Revert \\SpecialChar \\nobreakdash- to ERT'
+    i = 0
+    while i < len(document.body):
+        m = re.match(r'(.*)\\SpecialChar \\nobreakdash-(.*)', document.body[i])
+        if m:
+            before = m.group(1)
+            after = m.group(2)
+            subst = [before,
+                     '\\begin_inset ERT',
+                    'status collapsed', '',
+                    '\\begin_layout Standard', '', '',
+                    '\\backslash',
+                    'nobreakdash-',
+                    '\\end_layout', '',
+                    '\\end_inset', '',
+                     after]
+            document.body[i: i+1] = subst
+            i = i + len(subst)
+            j = find_token(document.header, "\\use_amsmath", 0)
+            if j == -1:
+                document.warning("Malformed LyX document: Missing '\\use_amsmath'.")
+                i += 1
+                continue
+            document.header[j] = "\\use_amsmath 2"
+        else:
+            i = i + 1
+
+
+#Returns number of lines added/removed
+def revert_nocite_key(body, start, end):
+    'key "..." -> \nocite{...}'
+    r = re.compile(r'^key "(.*)"')
+    i = start
+    j = end
+    while i < j:
+        m = r.match(body[i])
+        if m:
+            body[i:i+1] = ["\\backslash", "nocite{" + m.group(1) + "}"]
+            j += 1     # because we added a line
+            i += 2     # skip that line
+        else:
+            del body[i]
+            j -= 1     # because we deleted a line
+            # no need to change i, since it now points to the next line
+    return j - end
+
+
+def revert_nocite(document):
+    "Revert LatexCommand nocite to ERT"
+    i = 0
+    while 1:
+        i = find_token(document.body, "\\begin_inset CommandInset citation", i)
+        if i == -1:
+            return
+        if (document.body[i+1] != "LatexCommand nocite"):
+            # note that we already incremented i
+            i = i + 1
+            continue
+        insetEnd = find_end_of_inset(document.body, i)
+        if insetEnd == -1:
+            #this should not happen
+            document.warning("End of CommandInset citation not found in revert_nocite!")
+            return
+
+        paramLocation = i + 2 #start of the inset's parameters
+        addedLines = 0
+        document.body[i:i+2] = \
+            ["\\begin_inset ERT", "status collapsed", "", "\\begin_layout Standard"]
+        # that added two lines
+        paramLocation += 2
+        insetEnd += 2
+        #print insetEnd, document.body[i: insetEnd + 1]
+        insetEnd += revert_nocite_key(document.body, paramLocation, insetEnd)
+        #print insetEnd, document.body[i: insetEnd + 1]
+        document.body.insert(insetEnd, "\\end_layout")
+        document.body.insert(insetEnd + 1, "")
+        i = insetEnd + 1
+
+
+def revert_btprintall(document):
+    "Revert (non-bibtopic) btPrintAll option to ERT \nocite{*}"
+    i = find_token(document.header, '\\use_bibtopic', 0)
+    if i == -1:
+        document.warning("Malformed lyx document: Missing '\\use_bibtopic'.")
+        return
+    if get_value(document.header, '\\use_bibtopic', 0) == "false":
+        i = 0
+        while i < len(document.body):
+            i = find_token(document.body, "\\begin_inset CommandInset bibtex", i)
+            if i == -1:
+                return
+            j = find_end_of_inset(document.body, i + 1)
+            if j == -1:
+                #this should not happen
+                document.warning("End of CommandInset bibtex not found in revert_btprintall!")
+                j = len(document.body)
+            # this range isn't really right, but it should be OK, since we shouldn't
+            # see more than one matching line in each inset
+            addedlines = 0
+            for k in range(i, j):
+                if (document.body[k] == 'btprint "btPrintAll"'):
+                    del document.body[k]
+                    subst = ["\\begin_inset ERT",
+                             "status collapsed", "",
+                             "\\begin_layout Standard", "",
+                             "\\backslash",
+                             "nocite{*}",
+                             "\\end_layout",
+                             "\\end_inset"]
+                    document.body[i:i] = subst
+                    addlines = addedlines + len(subst) - 1
+            i = j + addedlines
+
+
+def revert_bahasam(document):
+    "Set language Bahasa Malaysia to Bahasa Indonesia"
+    i = 0
+    if document.language == "bahasam":
+        document.language = "bahasa"
+        i = find_token(document.header, "\\language", 0)
+        if i != -1:
+            document.header[i] = "\\language bahasa"
+    j = 0
+    while True:
+        j = find_token(document.body, "\\lang bahasam", j)
+        if j == -1:
+            return
+        document.body[j] = document.body[j].replace("\\lang bahasam", "\\lang bahasa")
+        j = j + 1
+
+
+def revert_interlingua(document):
+    "Set language Interlingua to English"
+    i = 0
+    if document.language == "interlingua":
+        document.language = "english"
+        i = find_token(document.header, "\\language", 0)
+        if i != -1:
+            document.header[i] = "\\language english"
+    j = 0
+    while True:
+        j = find_token(document.body, "\\lang interlingua", j)
+        if j == -1:
+            return
+        document.body[j] = document.body[j].replace("\\lang interlingua", "\\lang english")
+        j = j + 1
+
+
+def revert_serbianlatin(document):
+    "Set language Serbian-Latin to Croatian"
+    i = 0
+    if document.language == "serbian-latin":
+        document.language = "croatian"
+        i = find_token(document.header, "\\language", 0)
+        if i != -1:
+            document.header[i] = "\\language croatian"
+    j = 0
+    while True:
+        j = find_token(document.body, "\\lang serbian-latin", j)
+        if j == -1:
+            return
+        document.body[j] = document.body[j].replace("\\lang serbian-latin", "\\lang croatian")
+        j = j + 1
+
+
+def revert_rotfloat(document):
+    " Revert sideways custom floats. "
+    i = 0
+    while 1:
+        # whitespace intended (exclude \\begin_inset FloatList)
+        i = find_token(document.body, "\\begin_inset Float ", i)
+        if i == -1:
+            return
+        line = document.body[i]
+        r = re.compile(r'\\begin_inset Float (.*)$')
+        m = r.match(line)
+        if m == None:
+            document.warning("Unable to match line " + str(i) + " of body!")
+            i += 1
+            continue
+        floattype = m.group(1)
+        if floattype == "figure" or floattype == "table":
+            i += 1
+            continue
+        j = find_end_of_inset(document.body, i)
+        if j == -1:
+            document.warning("Malformed lyx document: Missing '\\end_inset' in revert_rotfloat.")
+            i += 1
+            continue
+        addedLines = 0
+        if get_value(document.body, 'sideways', i, j) == "false":
+            i += 1
+            continue
+        l = find_default_layout(document, i + 1, j)
+        if l == -1:
+            document.warning("Malformed LyX document: Missing `\\begin_layout' in Float inset.")
+            i = j
+            continue
+        subst = ['\\begin_layout Standard',
+                  '\\begin_inset ERT',
+                  'status collapsed', '',
+                  '\\begin_layout Standard', '', '',
+                  '\\backslash', '',
+                  'end{sideways' + floattype + '}',
+                  '\\end_layout', '', '\\end_inset']
+        document.body[j : j+1] = subst
+        addedLines = len(subst) - 1
+        del document.body[i+1 : l]
+        addedLines -= (l-1) - (i+1)
+        subst = ['\\begin_inset ERT', 'status collapsed', '',
+                  '\\begin_layout Standard', '', '', '\\backslash',
+                  'begin{sideways' + floattype + '}',
+                  '\\end_layout', '', '\\end_inset', '',
+                  '\\end_layout', '']
+        document.body[i : i+1] = subst
+        addedLines += len(subst) - 1
+        if floattype == "algorithm":
+            add_to_preamble(document,
+                            ['% Commands inserted by lyx2lyx for sideways algorithm float',
+                              '\\usepackage{rotfloat}',
+                              '\\floatstyle{ruled}',
+                              '\\newfloat{algorithm}{tbp}{loa}',
+                              '\\floatname{algorithm}{Algorithm}'])
+        else:
+            document.warning("Cannot create preamble definition for custom float" + floattype + ".")
+        i += addedLines + 1
+
+
+def revert_widesideways(document):
+    " Revert wide sideways floats. "
+    i = 0
+    while 1:
+        # whitespace intended (exclude \\begin_inset FloatList)
+        i = find_token(document.body, '\\begin_inset Float ', i)
+        if i == -1:
+            return
+        line = document.body[i]
+        r = re.compile(r'\\begin_inset Float (.*)$')
+        m = r.match(line)
+        if m == None:
+            document.warning("Unable to match line " + str(i) + " of body!")
+            i += 1
+            continue
+        floattype = m.group(1)
+        if floattype != "figure" and floattype != "table":
+            i += 1
+            continue
+        j = find_end_of_inset(document.body, i)
+        if j == -1:
+            document.warning("Malformed lyx document: Missing '\\end_inset' in revert_widesideways.")
+            i += 1
+            continue
+        if get_value(document.body, 'sideways', i, j) == "false" or \
+           get_value(document.body, 'wide', i, j) == "false":
+             i += 1
+             continue
+        l = find_default_layout(document, i + 1, j)
+        if l == -1:
+            document.warning("Malformed LyX document: Missing `\\begin_layout' in Float inset.")
+            i = j
+            continue
+        subst = ['\\begin_layout Standard', '\\begin_inset ERT',
+                  'status collapsed', '',
+                  '\\begin_layout Standard', '', '', '\\backslash',
+                  'end{sideways' + floattype + '*}',
+                  '\\end_layout', '', '\\end_inset']
+        document.body[j : j+1] = subst
+        addedLines = len(subst) - 1
+        del document.body[i+1:l-1]
+        addedLines -= (l-1) - (i+1)
+        subst = ['\\begin_inset ERT', 'status collapsed', '',
+                 '\\begin_layout Standard', '', '', '\\backslash',
+                 'begin{sideways' + floattype + '*}', '\\end_layout', '',
+                 '\\end_inset', '', '\\end_layout', '']
+        document.body[i : i+1] = subst
+        addedLines += len(subst) - 1
+        add_to_preamble(document, ['\\usepackage{rotfloat}\n'])
+        i += addedLines + 1
+
+
+def revert_inset_embedding(document, type):
+    ' Remove embed tag from certain type of insets'
+    i = 0
+    while 1:
+        i = find_token(document.body, "\\begin_inset %s" % type, i)
+        if i == -1:
+            return
+        j = find_end_of_inset(document.body, i)
+        if j == -1:
+            document.warning("Malformed lyx document: Missing '\\end_inset' in revert_inset_embedding.")
+            i = i + 1
+            continue
+        k = find_token(document.body, "\tembed", i, j)
+        if k == -1:
+            k = find_token(document.body, "embed", i, j)
+        if k != -1:
+            del document.body[k]
+        i = i + 1
+
+
+def revert_external_embedding(document):
+    ' Remove embed tag from external inset '
+    revert_inset_embedding(document, 'External')
+
+
+def convert_subfig(document):
+    " Convert subfigures to subfloats. "
+    i = 0
+    while 1:
+        addedLines = 0
+        i = find_token(document.body, '\\begin_inset Graphics', i)
+        if i == -1:
+            return
+        endInset = find_end_of_inset(document.body, i)
+        if endInset == -1:
+            document.warning("Malformed lyx document: Missing '\\end_inset' in convert_subfig.")
+            i += 1
+            continue
+        k = find_token(document.body, '\tsubcaption', i, endInset)
+        if k == -1:
+            i = endInset
+            continue
+        l = find_token(document.body, '\tsubcaptionText', i, endInset)
+        if l == -1:
+            caption = ""
+        else:
+            caption = document.body[l][16:].strip('"')
+            del document.body[l]
+            addedLines -= 1
+        del document.body[k]
+        addedLines -= 1
+        subst = ['\\begin_inset Float figure', 'wide false', 'sideways false',
+                 'status open', '', '\\begin_layout Plain Layout', '\\begin_inset Caption',
+                 '', '\\begin_layout Plain Layout'] + latex2lyx(caption, False) + \
+                 [ '\\end_layout', '', '\\end_inset', '',
+                 '\\end_layout', '', '\\begin_layout Plain Layout']
+        document.body[i : i] = subst
+        addedLines += len(subst)
+        endInset += addedLines
+        subst = ['', '\\end_inset', '', '\\end_layout']
+        document.body[endInset : endInset] = subst
+        addedLines += len(subst)
+        i += addedLines + 1
+
+
+def revert_subfig(document):
+    " Revert subfloats. "
+    i = 0
+    while 1:
+        # whitespace intended (exclude \\begin_inset FloatList)
+        i = find_tokens(document.body, ['\\begin_inset Float ', '\\begin_inset Wrap'], i)
+        if i == -1:
+            return
+        j = 0
+        addedLines = 0
+        while j != -1:
+            j = find_end_of_inset(document.body, i)
+            if j == -1:
+                document.warning("Malformed lyx document: Missing '\\end_inset' (float) at line " + str(i + len(document.header)) + ".\n\t" + document.body[i])
+                # document.warning(document.body[i-1] + "\n" + document.body[i+1])
+                i += 1
+                continue # this will get us back to the outer loop, since j == -1
+            # look for embedded float (= subfloat)
+            # whitespace intended (exclude \\begin_inset FloatList)
+            k = find_token(document.body, '\\begin_inset Float ', i + 1, j)
+            if k == -1:
+                break
+            # is the subfloat aligned?
+            al = find_token(document.body, '\\align ', k - 1, j)
+            alignment_beg = ""
+            alignment_end = ""
+            if al != -1:
+                if get_value(document.body, '\\align', al) == "center":
+                    alignment_beg = "\\backslash\nbegin{centering}"
+                    alignment_end = "\\backslash\npar\\backslash\nend{centering}"
+                elif get_value(document.body, '\\align', al) == "left":
+                    alignment_beg = "\\backslash\nbegin{raggedright}"
+                    alignment_end = "\\backslash\npar\\backslash\nend{raggedright}"
+                elif get_value(document.body, '\\align', al) == "right":
+                    alignment_beg = "\\backslash\nbegin{raggedleft}"
+                    alignment_end = "\\backslash\npar\\backslash\nend{raggedleft}"
+            l = find_end_of_inset(document.body, k)
+            if l == -1:
+                document.warning("Malformed lyx document: Missing '\\end_inset' (embedded float).")
+                i += 1
+                j = -1
+                continue # escape to the outer loop
+            m = find_default_layout(document, k + 1, l)
+            # caption?
+            cap = find_token(document.body, '\\begin_inset Caption', k + 1, l)
+            caption = ''
+            shortcap = ''
+            capend = cap
+            if cap != -1:
+                capend = find_end_of_inset(document.body, cap)
+                if capend == -1:
+                    document.warning("Malformed lyx document: Missing '\\end_inset' (caption).")
+                    return
+                # label?
+                label = ''
+                lbl = find_token(document.body, '\\begin_inset CommandInset label', cap, capend)
+                if lbl != -1:
+                    lblend = find_end_of_inset(document.body, lbl + 1)
+                    if lblend == -1:
+                        document.warning("Malformed lyx document: Missing '\\end_inset' (label).")
+                        return
+                    for line in document.body[lbl:lblend + 1]:
+                        if line.startswith('name '):
+                            label = line.split()[1].strip('"')
+                            break
+                else:
+                    lbl = capend
+                    lblend = capend
+                    label = ''
+                # opt arg?
+                opt = find_token(document.body, '\\begin_inset OptArg', cap, capend)
+                if opt != -1:
+                    optend = find_end_of_inset(document.body, opt)
+                    if optend == -1:
+                        document.warning("Malformed LyX document: Missing '\\end_inset' (OptArg).")
+                        return
+                    optc = find_default_layout(document, opt, optend)
+                    if optc == -1:
+                        document.warning("Malformed LyX document: Missing `\\begin_layout' in Float inset.")
+                        return
+                    optcend = find_end_of(document.body, optc, "\\begin_layout", "\\end_layout")
+                    for line in document.body[optc:optcend]:
+                        if not line.startswith('\\'):
+                            shortcap += line.strip()
+                else:
+                    opt = capend
+                    optend = capend
+                for line in document.body[cap:capend]:
+                    if line in document.body[lbl:lblend]:
+                        continue
+                    elif line in document.body[opt:optend]:
+                        continue
+                    else:
+                        inert = True
+                        caption += lyxline2latex(document, line, inert)
+                if len(label) > 0:
+                    caption += "\n\\backslash\nlabel{" + label + "}"
+            subst = '\\begin_layout PlainLayout\n\\begin_inset ERT\nstatus collapsed\n\n' \
+                      '\\begin_layout PlainLayout\n\n}' + alignment_end + \
+                      '\n\\end_layout\n\n\\end_inset\n\n' \
+                      '\\end_layout\n\n\\begin_layout PlainLayout\n'
+            subst = subst.split('\n')
+            document.body[l : l+1] = subst
+            addedLines = len(subst) - 1
+            # this is before l and so is unchanged by the multiline insertion
+            if cap != capend:
+                del document.body[cap:capend+1]
+                addedLines -= (capend + 1 - cap)
+            del document.body[k+1:m-1]
+            addedLines -= (m - 1 - (k + 1))
+            insertion = '\\begin_inset ERT\nstatus collapsed\n\n' \
+                        '\\begin_layout PlainLayout\n\n' + alignment_beg + '\n\\backslash\n' \
+                        'subfloat'
+            if len(shortcap) > 0:
+                insertion = insertion + "[" + shortcap + "]"
+            if len(caption) > 0:
+                insertion = insertion + "[" + caption + "]"
+            insertion = insertion + '{%\n\\end_layout\n\n\\end_inset\n\n\\end_layout\n'
+            insertion = insertion.split('\n')
+            document.body[k : k + 1] = insertion
+            addedLines += len(insertion) - 1
+            al = find_token(document.body, '\\align ', k - 1, j + addedLines)
+            if al != -1:
+                del document.body[al]
+                addedLines -= 1
+            add_to_preamble(document, ['\\usepackage{subfig}\n'])
+        i += addedLines + 1
+
+
+def revert_wrapplacement(document):
+    " Revert placement options wrap floats (wrapfig). "
+    i = 0
+    while True:
+        i = find_token(document.body, "\\begin_inset Wrap figure", i)
+        if i == -1:
+            return
+        e = find_end_of_inset(document.body, i)
+        j = find_token(document.body, "placement", i + 1, e)
+        if j == -1:
+            document.warning("Malformed LyX document: Couldn't find placement parameter of wrap float.")
+            i += 1
+            continue
+        r = re.compile("placement (o|i|l|r|O|I|L|R)")
+        m = r.match(document.body[j])
+        if m == None:
+            document.warning("Malformed LyX document: Placement option isn't O|I|R|L!")
+        else:
+            document.body[j] = "placement " + m.group(1).lower()
+        i = j
+
+
+def remove_extra_embedded_files(document):
+    " Remove \extra_embedded_files from buffer params "
+    i = find_token(document.header, '\\extra_embedded_files', 0)
+    if i == -1:
+        return
+    document.header.pop(i)
+
+
+def convert_spaceinset(document):
+    " Convert '\\InsetSpace foo' to '\\begin_inset Space foo\n\\end_inset' "
+    i = 0
+    while i < len(document.body):
+        m = re.match(r'(.*)\\InsetSpace (.*)', document.body[i])
+        if m:
+            before = m.group(1)
+            after = m.group(2)
+            subst = [before, "\\begin_inset Space " + after, "\\end_inset"]
+            document.body[i: i+1] = subst
+            i = i + len(subst)
+        else:
+            i = i + 1
+
+
+def revert_spaceinset(document):
+    " Revert '\\begin_inset Space foo\n\\end_inset' to '\\InsetSpace foo' "
+    i = 0
+    while True:
+        i = find_token(document.body, "\\begin_inset Space", i)
+        if i == -1:
+            return
+        j = find_end_of_inset(document.body, i)
+        if j == -1:
+            document.warning("Malformed LyX document: Could not find end of space inset.")
+            i += 1
+            continue
+        document.body[i] = document.body[i].replace('\\begin_inset Space', '\\InsetSpace')
+        del document.body[j]
+
+
+def convert_hfill(document):
+    " Convert hfill to space inset "
+    i = 0
+    while True:
+        i = find_token(document.body, "\\hfill", i)
+        if i == -1:
+            return
+        subst = document.body[i].replace('\\hfill', \
+                  '\n\\begin_inset Space \\hfill{}\n\\end_inset')
+        subst = subst.split('\n')
+        document.body[i : i+1] = subst
+        i += len(subst)
+
+
+def revert_hfills(document):
+    ' Revert \\hfill commands '
+    hfill = re.compile(r'\\hfill')
+    dotfill = re.compile(r'\\dotfill')
+    hrulefill = re.compile(r'\\hrulefill')
+    i = 0
+    while True:
+        i = find_token(document.body, "\\InsetSpace", i)
+        if i == -1:
+            return
+        if hfill.search(document.body[i]):
+            document.body[i] = \
+              document.body[i].replace('\\InsetSpace \\hfill{}', '\\hfill')
+            i += 1
+            continue
+        if dotfill.search(document.body[i]):
+            subst = document.body[i].replace('\\InsetSpace \\dotfill{}', \
+              '\\begin_inset ERT\nstatus collapsed\n\n' \
+              '\\begin_layout Standard\n\n\n\\backslash\n' \
+              'dotfill{}\n\\end_layout\n\n\\end_inset\n\n')
+            subst = subst.split('\n')
+            document.body[i : i+1] = subst
+            i += len(subst)
+            continue
+        if hrulefill.search(document.body[i]):
+            subst = document.body[i].replace('\\InsetSpace \\hrulefill{}', \
+              '\\begin_inset ERT\nstatus collapsed\n\n' \
+              '\\begin_layout Standard\n\n\n\\backslash\n' \
+              'hrulefill{}\n\\end_layout\n\n\\end_inset\n\n')
+            subst = subst.split('\n')
+            document.body[i : i+1] = subst
+            i += len(subst)
+            continue
+        i += 1
+
+def revert_hspace(document):
+    ' Revert \\InsetSpace \\hspace{} to ERT '
+    i = 0
+    hspace = re.compile(r'\\hspace{}')
+    hstar  = re.compile(r'\\hspace\*{}')
+    while True:
+        i = find_token(document.body, "\\InsetSpace \\hspace", i)
+        if i == -1:
+            return
+        length = get_value(document.body, '\\length', i+1)
+        if length == '':
+            document.warning("Malformed lyx document: Missing '\\length' in Space inset.")
+            return
+        del document.body[i+1]
+        addedLines = -1
+        if hstar.search(document.body[i]):
+            subst = document.body[i].replace('\\InsetSpace \\hspace*{}', \
+              '\\begin_inset ERT\nstatus collapsed\n\n' \
+              '\\begin_layout Standard\n\n\n\\backslash\n' \
+              'hspace*{' + length + '}\n\\end_layout\n\n\\end_inset\n\n')
+            subst = subst.split('\n')
+            document.body[i : i+1] = subst
+            addedLines += len(subst) - 1
+            i += addedLines + 1
+            continue
+        if hspace.search(document.body[i]):
+            subst = document.body[i].replace('\\InsetSpace \\hspace{}', \
+              '\\begin_inset ERT\nstatus collapsed\n\n' \
+              '\\begin_layout Standard\n\n\n\\backslash\n' \
+              'hspace{' + length + '}\n\\end_layout\n\n\\end_inset\n\n')
+            subst = subst.split('\n')
+            document.body[i : i+1] = subst
+            addedLines += len(subst) - 1
+            i += addedLines + 1
+            continue
+        i += 1
+
+
+def revert_protected_hfill(document):
+    ' Revert \\begin_inset Space \\hspace*{\\fill} to ERT '
+    i = 0
+    while True:
+        i = find_token(document.body, '\\begin_inset Space \\hspace*{\\fill}', i)
+        if i == -1:
+            return
+        j = find_end_of_inset(document.body, i)
+        if j == -1:
+            document.warning("Malformed LyX document: Could not find end of space inset.")
+            i += 1
+            continue
+        del document.body[j]
+        subst = document.body[i].replace('\\begin_inset Space \\hspace*{\\fill}', \
+          '\\begin_inset ERT\nstatus collapsed\n\n' \
+          '\\begin_layout Standard\n\n\n\\backslash\n' \
+          'hspace*{\n\\backslash\nfill}\n\\end_layout\n\n\\end_inset\n\n')
+        subst = subst.split('\n')
+        document.body[i : i+1] = subst
+        i += len(subst)
+
+
+def revert_leftarrowfill(document):
+    ' Revert \\begin_inset Space \\leftarrowfill{} to ERT '
+    i = 0
+    while True:
+        i = find_token(document.body, '\\begin_inset Space \\leftarrowfill{}', i)
+        if i == -1:
+            return
+        j = find_end_of_inset(document.body, i)
+        if j == -1:
+            document.warning("Malformed LyX document: Could not find end of space inset.")
+            i += 1
+            continue
+        del document.body[j]
+        subst = document.body[i].replace('\\begin_inset Space \\leftarrowfill{}', \
+          '\\begin_inset ERT\nstatus collapsed\n\n' \
+          '\\begin_layout Standard\n\n\n\\backslash\n' \
+          'leftarrowfill{}\n\\end_layout\n\n\\end_inset\n\n')
+        subst = subst.split('\n')
+        document.body[i : i+1] = subst
+        i += len(subst)
+
+
+def revert_rightarrowfill(document):
+    ' Revert \\begin_inset Space \\rightarrowfill{} to ERT '
+    i = 0
+    while True:
+        i = find_token(document.body, '\\begin_inset Space \\rightarrowfill{}', i)
+        if i == -1:
+            return
+        j = find_end_of_inset(document.body, i)
+        if j == -1:
+            document.warning("Malformed LyX document: Could not find end of space inset.")
+            i += 1
+            continue
+        del document.body[j]
+        subst = document.body[i].replace('\\begin_inset Space \\rightarrowfill{}', \
+          '\\begin_inset ERT\nstatus collapsed\n\n' \
+          '\\begin_layout Standard\n\n\n\\backslash\n' \
+          'rightarrowfill{}\n\\end_layout\n\n\\end_inset\n\n')
+        subst = subst.split('\n')
+        document.body[i : i+1] = subst
+        i += len(subst)
+
+
+def revert_upbracefill(document):
+    ' Revert \\begin_inset Space \\upbracefill{} to ERT '
+    i = 0
+    while True:
+        i = find_token(document.body, '\\begin_inset Space \\upbracefill{}', i)
+        if i == -1:
+            return
+        j = find_end_of_inset(document.body, i)
+        if j == -1:
+            document.warning("Malformed LyX document: Could not find end of space inset.")
+            i += 1
+            continue
+        del document.body[j]
+        subst = document.body[i].replace('\\begin_inset Space \\upbracefill{}', \
+          '\\begin_inset ERT\nstatus collapsed\n\n' \
+          '\\begin_layout Standard\n\n\n\\backslash\n' \
+          'upbracefill{}\n\\end_layout\n\n\\end_inset\n\n')
+        subst = subst.split('\n')
+        document.body[i : i+1] = subst
+        i += len(subst)
+
+
+def revert_downbracefill(document):
+    ' Revert \\begin_inset Space \\downbracefill{} to ERT '
+    i = 0
+    while True:
+        i = find_token(document.body, '\\begin_inset Space \\downbracefill{}', i)
+        if i == -1:
+            return
+        j = find_end_of_inset(document.body, i)
+        if j == -1:
+            document.warning("Malformed LyX document: Could not find end of space inset.")
+            i += 1
+            continue
+        del document.body[j]
+        subst = document.body[i].replace('\\begin_inset Space \\downbracefill{}', \
+          '\\begin_inset ERT\nstatus collapsed\n\n' \
+          '\\begin_layout Standard\n\n\n\\backslash\n' \
+          'downbracefill{}\n\\end_layout\n\n\\end_inset\n\n')
+        subst = subst.split('\n')
+        document.body[i : i+1] = subst
+        i += len(subst)
+
+
+def revert_local_layout(document):
+    ' Revert local layout headers.'
+    i = 0
+    while True:
+        i = find_token(document.header, "\\begin_local_layout", i)
+        if i == -1:
+            return
+        j = find_end_of(document.header, i, "\\begin_local_layout", "\\end_local_layout")
+        if j == -1:
+            # this should not happen
+            break
+        document.header[i : j + 1] = []
+
+
+def convert_pagebreaks(document):
+    ' Convert inline Newpage insets to new format '
+    i = 0
+    while True:
+        i = find_token(document.body, '\\newpage', i)
+        if i == -1:
+            break
+        document.body[i:i+1] = ['\\begin_inset Newpage newpage',
+                                '\\end_inset']
+    i = 0
+    while True:
+        i = find_token(document.body, '\\pagebreak', i)
+        if i == -1:
+            break
+        document.body[i:i+1] = ['\\begin_inset Newpage pagebreak',
+                                '\\end_inset']
+    i = 0
+    while True:
+        i = find_token(document.body, '\\clearpage', i)
+        if i == -1:
+            break
+        document.body[i:i+1] = ['\\begin_inset Newpage clearpage',
+                                '\\end_inset']
+    i = 0
+    while True:
+        i = find_token(document.body, '\\cleardoublepage', i)
+        if i == -1:
+            break
+        document.body[i:i+1] = ['\\begin_inset Newpage cleardoublepage',
+                                '\\end_inset']
+
+
+def revert_pagebreaks(document):
+    ' Revert \\begin_inset Newpage to previous inline format '
+    i = 0
+    while True:
+        i = find_token(document.body, '\\begin_inset Newpage', i)
+        if i == -1:
+            return
+        j = find_end_of_inset(document.body, i)
+        if j == -1:
+            document.warning("Malformed LyX document: Could not find end of Newpage inset.")
+            i += 1
+            continue
+        del document.body[j]
+        document.body[i] = document.body[i].replace('\\begin_inset Newpage newpage', '\\newpage')
+        document.body[i] = document.body[i].replace('\\begin_inset Newpage pagebreak', '\\pagebreak')
+        document.body[i] = document.body[i].replace('\\begin_inset Newpage clearpage', '\\clearpage')
+        document.body[i] = document.body[i].replace('\\begin_inset Newpage cleardoublepage', '\\cleardoublepage')
+
+
+def convert_linebreaks(document):
+    ' Convert inline Newline insets to new format '
+    i = 0
+    while True:
+        i = find_token(document.body, '\\newline', i)
+        if i == -1:
+            break
+        document.body[i:i+1] = ['\\begin_inset Newline newline',
+                                '\\end_inset']
+    i = 0
+    while True:
+        i = find_token(document.body, '\\linebreak', i)
+        if i == -1:
+            break
+        document.body[i:i+1] = ['\\begin_inset Newline linebreak',
+                                '\\end_inset']
+
+
+def revert_linebreaks(document):
+    ' Revert \\begin_inset Newline to previous inline format '
+    i = 0
+    while True:
+        i = find_token(document.body, '\\begin_inset Newline', i)
+        if i == -1:
+            return
+        j = find_end_of_inset(document.body, i)
+        if j == -1:
+            document.warning("Malformed LyX document: Could not find end of Newline inset.")
+            i += 1
+            continue
+        del document.body[j]
+        document.body[i] = document.body[i].replace('\\begin_inset Newline newline', '\\newline')
+        document.body[i] = document.body[i].replace('\\begin_inset Newline linebreak', '\\linebreak')
+
+
+def convert_japanese_plain(document):
+    ' Set language japanese-plain to japanese '
+    i = 0
+    if document.language == "japanese-plain":
+        document.language = "japanese"
+        i = find_token(document.header, "\\language", 0)
+        if i != -1:
+            document.header[i] = "\\language japanese"
+    j = 0
+    while True:
+        j = find_token(document.body, "\\lang japanese-plain", j)
+        if j == -1:
+            return
+        document.body[j] = document.body[j].replace("\\lang japanese-plain", "\\lang japanese")
+        j = j + 1
+
+
+def revert_pdfpages(document):
+    ' Revert pdfpages external inset to ERT '
+    i = 0
+    while 1:
+        i = find_token(document.body, "\\begin_inset External", i)
+        if i == -1:
+            return
+        j = find_end_of_inset(document.body, i)
+        if j == -1:
+            document.warning("Malformed lyx document: Missing '\\end_inset' in revert_pdfpages.")
+            i = i + 1
+            continue
+        if get_value(document.body, 'template', i, j) == "PDFPages":
+            filename = get_value(document.body, 'filename', i, j)
+            extra = ''
+            r = re.compile(r'\textra PDFLaTeX \"(.*)\"$')
+            for k in range(i, j):
+                m = r.match(document.body[k])
+                if m:
+                    extra = m.group(1)
+            angle = get_value(document.body, 'rotateAngle', i, j)
+            width = get_value(document.body, 'width', i, j)
+            height = get_value(document.body, 'height', i, j)
+            scale = get_value(document.body, 'scale', i, j)
+            keepAspectRatio = find_token(document.body, "\tkeepAspectRatio", i, j)
+            options = extra
+            if angle != '':
+                 if options != '':
+                     options += ",angle=" + angle
+                 else:
+                     options += "angle=" + angle
+            if width != '':
+                 if options != '':
+                     options += ",width=" + convert_len(width)
+                 else:
+                     options += "width=" + convert_len(width)
+            if height != '':
+                 if options != '':
+                     options += ",height=" + convert_len(height)
+                 else:
+                     options += "height=" + convert_len(height)
+            if scale != '':
+                 if options != '':
+                     options += ",scale=" + scale
+                 else:
+                     options += "scale=" + scale
+            if keepAspectRatio != '':
+                 if options != '':
+                     options += ",keepaspectratio"
+                 else:
+                     options += "keepaspectratio"
+            if options != '':
+                     options = '[' + options + ']'
+            del document.body[i+1:j+1]
+            document.body[i:i+1] = ['\\begin_inset ERT',
+                                'status collapsed',
+                                '',
+                                '\\begin_layout Standard',
+                                '',
+                                '\\backslash',
+                                'includepdf' + options + '{' + filename + '}',
+                                '\\end_layout',
+                                '',
+                                '\\end_inset']
+            add_to_preamble(document, ['\\usepackage{pdfpages}\n'])
+            i = i + 1
+            continue
+        i = i + 1
+
+
+def revert_mexican(document):
+    ' Set language Spanish(Mexico) to Spanish '
+    i = 0
+    if document.language == "spanish-mexico":
+        document.language = "spanish"
+        i = find_token(document.header, "\\language", 0)
+        if i != -1:
+            document.header[i] = "\\language spanish"
+    j = 0
+    while True:
+        j = find_token(document.body, "\\lang spanish-mexico", j)
+        if j == -1:
+            return
+        document.body[j] = document.body[j].replace("\\lang spanish-mexico", "\\lang spanish")
+        j = j + 1
+
+
+def remove_embedding(document):
+    ' Remove embed tag from all insets '
+    revert_inset_embedding(document, 'Graphics')
+    revert_inset_embedding(document, 'External')
+    revert_inset_embedding(document, 'CommandInset include')
+    revert_inset_embedding(document, 'CommandInset bibtex')
+
+
+def revert_master(document):
+    ' Remove master param '
+    i = find_token(document.header, "\\master", 0)
+    if i != -1:
+        del document.header[i]
+
+
+def revert_graphics_group(document):
+    ' Revert group information from graphics insets '
+    i = 0
+    while 1:
+        i = find_token(document.body, "\\begin_inset Graphics", i)
+        if i == -1:
+            return
+        j = find_end_of_inset(document.body, i)
+        if j == -1:
+            document.warning("Malformed lyx document: Missing '\\end_inset' in revert_graphics_group.")
+            i = i + 1
+            continue
+        k = find_token(document.body, "        groupId", i, j)
+        if k == -1:
+            i = i + 1
+            continue
+        del document.body[k]
+        i = i + 1
+
+
+def update_apa_styles(document):
+    ' Replace obsolete styles '
+
+    if document.textclass != "apa":
+        return
+
+    obsoletedby = { "Acknowledgments": "Acknowledgements",
+                    "Section*":        "Section",
+                    "Subsection*":     "Subsection",
+                    "Subsubsection*":  "Subsubsection",
+                    "Paragraph*":      "Paragraph",
+                    "Subparagraph*":   "Subparagraph"}
+    i = 0
+    while 1:
+        i = find_token(document.body, "\\begin_layout", i)
+        if i == -1:
+            return
+
+        layout = document.body[i][14:]
+        if layout in obsoletedby:
+            document.body[i] = "\\begin_layout " + obsoletedby[layout]
+
+        i += 1
+
+
+def convert_paper_sizes(document):
+    ' exchange size options legalpaper and executivepaper to correct order '
+    # routine is needed to fix http://www.lyx.org/trac/ticket/4868
+    i = 0
+    j = 0
+    i = find_token(document.header, "\\papersize executivepaper", 0)
+    if i != -1:
+        document.header[i] = "\\papersize legalpaper"
+        return
+    j = find_token(document.header, "\\papersize legalpaper", 0)
+    if j != -1:
+        document.header[j] = "\\papersize executivepaper"
+
+
+def revert_paper_sizes(document):
+    ' exchange size options legalpaper and executivepaper to correct order '
+    i = 0
+    j = 0
+    i = find_token(document.header, "\\papersize executivepaper", 0)
+    if i != -1:
+        document.header[i] = "\\papersize legalpaper"
+        return
+    j = find_token(document.header, "\\papersize legalpaper", 0)
+    if j != -1:
+        document.header[j] = "\\papersize executivepaper"
+
+
+def convert_InsetSpace(document):
+    " Convert '\\begin_inset Space foo' to '\\begin_inset space foo'"
+    i = 0
+    while True:
+        i = find_token(document.body, "\\begin_inset Space", i)
+        if i == -1:
+            return
+        document.body[i] = document.body[i].replace('\\begin_inset Space', '\\begin_inset space')
+
+
+def revert_InsetSpace(document):
+    " Revert '\\begin_inset space foo' to '\\begin_inset Space foo'"
+    i = 0
+    while True:
+        i = find_token(document.body, "\\begin_inset space", i)
+        if i == -1:
+            return
+        document.body[i] = document.body[i].replace('\\begin_inset space', '\\begin_inset Space')
+
+
+def convert_display_enum(document):
+    " Convert 'display foo' to 'display false/true'"
+    i = 0
+    while True:
+        i = find_token(document.body, "\tdisplay", i)
+        if i == -1:
+            return
+        val = get_value(document.body, 'display', i)
+        if val == "none":
+            document.body[i] = document.body[i].replace('none', 'false')
+        if val == "default":
+            document.body[i] = document.body[i].replace('default', 'true')
+        if val == "monochrome":
+            document.body[i] = document.body[i].replace('monochrome', 'true')
+        if val == "grayscale":
+            document.body[i] = document.body[i].replace('grayscale', 'true')
+        if val == "color":
+            document.body[i] = document.body[i].replace('color', 'true')
+        if val == "preview":
+            document.body[i] = document.body[i].replace('preview', 'true')
+        i += 1
+
+
+def revert_display_enum(document):
+    " Revert 'display false/true' to 'display none/color'"
+    i = 0
+    while True:
+        i = find_token(document.body, "\tdisplay", i)
+        if i == -1:
+            return
+        val = get_value(document.body, 'display', i)
+        if val == "false":
+            document.body[i] = document.body[i].replace('false', 'none')
+        if val == "true":
+            document.body[i] = document.body[i].replace('true', 'default')
+        i += 1
+
+
+def remove_fontsCJK(document):
+    ' Remove font_cjk param '
+    i = find_token(document.header, "\\font_cjk", 0)
+    if i != -1:
+        del document.header[i]
+
+
+def convert_plain_layout(document):
+    " Convert 'PlainLayout' to 'Plain Layout'"
+    i = 0
+    while True:
+        i = find_token(document.body, '\\begin_layout PlainLayout', i)
+        if i == -1:
+            return
+        document.body[i] = document.body[i].replace('\\begin_layout PlainLayout', \
+          '\\begin_layout Plain Layout')
+        i += 1
+
+
+def revert_plain_layout(document):
+    " Revert 'Plain Layout' to 'PlainLayout'"
+    i = 0
+    while True:
+        i = find_token(document.body, '\\begin_layout Plain Layout', i)
+        if i == -1:
+            return
+        document.body[i] = document.body[i].replace('\\begin_layout Plain Layout', \
+          '\\begin_layout PlainLayout')
+        i += 1
+
+
+def revert_plainlayout(document):
+    " Revert 'PlainLayout' to 'Standard'"
+    i = 0
+    while True:
+        i = find_token(document.body, '\\begin_layout PlainLayout', i)
+        if i == -1:
+            return
+        # This will be incorrect for some document classes, since Standard is not always
+        # the default. But (a) it is probably the best we can do and (b) it will actually
+        # work, in fact, since an unknown layout will be converted to default.
+        document.body[i] = document.body[i].replace('\\begin_layout PlainLayout', \
+          '\\begin_layout Standard')
+        i += 1
+
+
+def revert_polytonicgreek(document):
+    "Set language polytonic Greek to Greek"
+    i = 0
+    if document.language == "polutonikogreek":
+        document.language = "greek"
+        i = find_token(document.header, "\\language", 0)
+        if i != -1:
+            document.header[i] = "\\language greek"
+    j = 0
+    while True:
+        j = find_token(document.body, "\\lang polutonikogreek", j)
+        if j == -1:
+            return
+        document.body[j] = document.body[j].replace("\\lang polutonikogreek", "\\lang greek")
+        j = j + 1
+
+
+def revert_removed_modules(document):
+    i = 0
+    while True:
+        i = find_token(document.header, "\\begin_remove_modules", i)
+        if i == -1:
+            return
+        j = find_end_of(document.header, i, "\\begin_remove_modules", "\\end_remove_modules")
+        if j == -1:
+            # this should not happen
+            break
+        document.header[i : j + 1] = []
+
+
+def add_plain_layout(document):
+    i = 0
+    while True:
+        i = find_token(document.body, "\\begin_layout", i)
+        if i == -1:
+            return
+        if len(document.body[i].split()) == 1:
+            document.body[i] = "\\begin_layout Plain Layout"
+        i += 1
+
+
+def revert_tabulators(document):
+    "Revert tabulators to 4 spaces"
+    i = 0
+    while True:
+        i = find_token(document.body, "\t", i)
+        if i == -1:
+            return
+        document.body[i] = document.body[i].replace("\t", "    ")
+        i += 1
+
+
+def revert_tabsize(document):
+    "Revert the tabsize parameter of listings"
+    i = 0
+    j = 0
+    while True:
+        # either it is the only parameter
+        i = find_token(document.body, 'lstparams "tabsize=4"', i)
+        if i != -1:
+            del document.body[i]
+        # or the last one
+        j = find_token(document.body, "lstparams", j)
+        if j == -1:
+            return
+        pos = document.body[j].find(",tabsize=")
+        document.body[j] = document.body[j][:pos] + '"'
+        i += 1
+        j += 1
+
+
+def revert_mongolian(document):
+    "Set language Mongolian to English"
+    i = 0
+    if document.language == "mongolian":
+        document.language = "english"
+        i = find_token(document.header, "\\language", 0)
+        if i != -1:
+            document.header[i] = "\\language english"
+    j = 0
+    while True:
+        j = find_token(document.body, "\\lang mongolian", j)
+        if j == -1:
+            return
+        document.body[j] = document.body[j].replace("\\lang mongolian", "\\lang english")
+        j = j + 1
+
+
+def revert_default_options(document):
+    ' Remove param use_default_options '
+    i = find_token(document.header, "\\use_default_options", 0)
+    if i != -1:
+        del document.header[i]
+
+
+def convert_default_options(document):
+    ' Add param use_default_options and set it to false '
+    i = find_token(document.header, "\\textclass", 0)
+    if i == -1:
+        document.warning("Malformed LyX document: Missing `\\textclass'.")
+        return
+    document.header.insert(i, '\\use_default_options false')
+
+
+def revert_backref_options(document):
+    ' Revert option pdf_backref=page to pagebackref '
+    i = find_token(document.header, "\\pdf_backref page", 0)
+    if i != -1:
+        document.header[i] = "\\pdf_pagebackref true"
+
+
+def convert_backref_options(document):
+    ' We have changed the option pagebackref to backref=true '
+    i = find_token(document.header, "\\pdf_pagebackref true", 0)
+    if i != -1:
+        document.header[i] = "\\pdf_backref page"
+    j = find_token(document.header, "\\pdf_pagebackref false", 0)
+    if j != -1:
+        del document.header[j]
+    # backref=true was not a valid option, we meant backref=section
+    k = find_token(document.header, "\\pdf_backref true", 0)
+    if k != -1 and i != -1:
+        del document.header[k]
+    elif k != -1 and j != -1:
+        document.header[k] = "\\pdf_backref section"
+
+
+def convert_charstyle_element(document):
+    "Convert CharStyle to Element for docbook backend"
+    if document.backend != "docbook":
+        return
+    i = 0
+    while True:
+        i = find_token(document.body, "\\begin_inset Flex CharStyle:", i)
+        if i == -1:
+            return
+        document.body[i] = document.body[i].replace('\\begin_inset Flex CharStyle:',
+                                                    '\\begin_inset Flex Element:')
+
+def revert_charstyle_element(document):
+    "Convert Element to CharStyle for docbook backend"
+    if document.backend != "docbook":
+        return
+    i = 0
+    while True:
+        i = find_token(document.body, "\\begin_inset Flex Element:", i)
+        if i == -1:
+            return
+        document.body[i] = document.body[i].replace('\\begin_inset Flex Element:',
+                                                    '\\begin_inset Flex CharStyle:')
+
  ##
  # Conversion hub
  #
  ##
  # Conversion hub
  #
@@ -493,26 +3172,130 @@ convert = [[277, [fix_wrong_tables]],
             [282, []],
             [283, [convert_flex]],
             [284, []],
             [282, []],
             [283, [convert_flex]],
             [284, []],
-           [285, []], # an empty manifest is automatically added
+           [285, []],
             [286, []],
             [287, [convert_wrapfig_options]],
             [288, [convert_inset_command]],
             [289, [convert_latexcommand_index]],
             [290, []],
             [291, []],
             [286, []],
             [287, [convert_wrapfig_options]],
             [288, [convert_inset_command]],
             [289, [convert_latexcommand_index]],
             [290, []],
             [291, []],
-           [292, []],
-           [293, []]
+           [292, [convert_japanese_cjk]],
+           [293, []],
+           [294, [convert_pdf_options]],
+           [295, [convert_htmlurl, convert_url]],
+           [296, [convert_include]],
+           [297, [convert_usorbian]],
+           [298, [convert_macro_global]],
+           [299, []],
+           [300, []],
+           [301, []],
+           [302, []],
+           [303, [convert_serbocroatian]],
+           [304, [convert_framed_notes]],
+           [305, []],
+           [306, []],
+           [307, []],
+           [308, []],
+           [309, []],
+           [310, []],
+           [311, [convert_ams_classes]],
+           [312, []],
+           [313, [convert_module_names]],
+           [314, []],
+           [315, []],
+           [316, [convert_subfig]],
+           [317, []],
+           [318, []],
+           [319, [convert_spaceinset, convert_hfill]],
+           [320, []],
+           [321, [convert_tablines]],
+           [322, [convert_plain_layout]],
+           [323, [convert_pagebreaks]],
+           [324, [convert_linebreaks]],
+           [325, [convert_japanese_plain]],
+           [326, []],
+           [327, []],
+           [328, [remove_embedding, remove_extra_embedded_files, remove_inzip_options]],
+           [329, []],
+           [330, []],
+           [331, [convert_ltcaption]],
+           [332, []],
+           [333, [update_apa_styles]],
+           [334, [convert_paper_sizes]],
+           [335, [convert_InsetSpace]],
+           [336, []],
+           [337, [convert_display_enum]],
+           [338, []],
+           [339, []],
+           [340, [add_plain_layout]],
+           [341, []],
+           [342, []],
+           [343, [convert_default_options]],
+           [344, [convert_backref_options]],
+           [345, [convert_charstyle_element]]
            ]
  
            ]
  
-revert =  [[292, [revert_inset_info]],
-           [291, [revert_japanese, revert_japanese_encoding]],
+revert =  [[344, [revert_charstyle_element]],
+           [343, [revert_backref_options]],
+           [342, [revert_default_options]],
+           [341, [revert_mongolian]],
+           [340, [revert_tabulators, revert_tabsize]],
+           [339, []],
+           [338, [revert_removed_modules]],
+           [337, [revert_polytonicgreek]],
+           [336, [revert_display_enum]],
+           [335, [remove_fontsCJK]],
+           [334, [revert_InsetSpace]],
+           [333, [revert_paper_sizes]],
+           [332, []],
+           [331, [revert_graphics_group]],
+           [330, [revert_ltcaption]],
+           [329, [revert_leftarrowfill, revert_rightarrowfill, revert_upbracefill, revert_downbracefill]],
+           [328, [revert_master]],
+           [327, []],
+           [326, [revert_mexican]],
+           [325, [revert_pdfpages]],
+           [324, []],
+           [323, [revert_linebreaks]],
+           [322, [revert_pagebreaks]],
+           [321, [revert_local_layout, revert_plain_layout]],
+           [320, [revert_tablines]],
+           [319, [revert_protected_hfill]],
+           [318, [revert_spaceinset, revert_hfills, revert_hspace]],
+           [317, [remove_extra_embedded_files]],
+           [316, [revert_wrapplacement]],
+           [315, [revert_subfig]],
+           [314, [revert_colsep, revert_plainlayout]],
+           [313, []],
+           [312, [revert_module_names]],
+           [311, [revert_rotfloat, revert_widesideways]],
+           [310, [revert_external_embedding]],
+           [309, [revert_btprintall]],
+           [308, [revert_nocite]],
+           [307, [revert_serbianlatin]],
+           [306, [revert_slash, revert_nobreakdash]],
+           [305, [revert_interlingua]],
+           [304, [revert_bahasam]],
+           [303, [revert_framed_notes]],
+           [302, []],
+           [301, [revert_latin, revert_samin]],
+           [300, [revert_linebreak]],
+           [299, [revert_pagebreak]],
+           [298, [revert_hyperlinktype]],
+           [297, [revert_macro_optional_params]],
+           [296, [revert_albanian, revert_lowersorbian, revert_uppersorbian]],
+           [295, [revert_include]],
+           [294, [revert_href, revert_url]],
+           [293, [revert_pdf_options_2]],
+           [292, [revert_inset_info]],
+           [291, [revert_japanese, revert_japanese_encoding, revert_japanese_cjk]],
             [290, [revert_vietnamese]],
             [289, [revert_wraptable]],
             [288, [revert_latexcommand_index]],
             [287, [revert_inset_command]],
             [286, [revert_wrapfig_options]],
             [285, [revert_pdf_options]],
             [290, [revert_vietnamese]],
             [289, [revert_wraptable]],
             [288, [revert_latexcommand_index]],
             [287, [revert_inset_command]],
             [286, [revert_wrapfig_options]],
             [285, [revert_pdf_options]],
-           [284, [remove_manifest, remove_inzip_options]],
+           [284, [remove_inzip_options]],
             [283, []],
             [282, [revert_flex]],
             [281, []],
             [283, []],
             [282, [revert_flex]],
             [281, []],