Fix conversion of nested box insets

[lyx.git] / lib / lyx2lyx / lyx_2_0.py
diff --git a/lib/lyx2lyx/lyx_2_0.py b/lib/lyx2lyx/lyx_2_0.py

index 8b81520b0d0ed23e01ee5d3172bddffd7b084669..1771e185f45cde2dfb555aacb9dbf18ef4a58694 100644 (file)
--- a/lib/lyx2lyx/lyx_2_0.py
+++ b/lib/lyx2lyx/lyx_2_0.py
@@ -23,85 +23,26 @@ import re, string
  import unicodedata
  import sys, os
  
-from parser_tools import find_token, find_end_of, find_tokens, get_value, get_value_string
+from parser_tools import find_token, find_end_of, find_tokens, \
+  find_token_exact, find_end_of_inset, find_end_of_layout, \
+  find_token_backwards, is_in_inset, get_value, get_quoted_value, \
+  del_token, check_token, get_option_value
+  
+from lyx2lyx_tools import add_to_preamble, insert_to_preamble, \
+  put_cmd_in_ert, lyx2latex, latex_length, revert_flex_inset, \
+  revert_font_attrs, hex2ratio, str2bool
  
  ####################################################################
  # Private helper functions
  
-def remove_option(document, m, option):
-    l = document.body[m].find(option)
-    if l != -1:
-        val = document.body[m][l:].split('"')[1]
-        document.body[m] = document.body[m][:l - 1] + document.body[m][l+len(option + '="' + val + '"'):]
-    return l
-
-def find_end_of_inset(lines, i):
-    " Find end of inset, where lines[i] is included."
-    return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
-
-
-# Note that text can be either a list of lines or a single line.
-def add_to_preamble(document, text):
-    """ Add text to the preamble if it is not already there.
-    Only the first line is checked!"""
-
-    if not type(text) is list:
-      # split on \n just in case
-      # it'll give us the one element list we want
-      # if there's no \n, too
-      text = text.split('\n')
-
-    if find_token(document.preamble, text[0], 0) != -1:
-        return
-
-    document.preamble.extend(text)
-
-
-def insert_to_preamble(index, document, text):
-    """ Insert text to the preamble at a given line"""
-
-    document.preamble.insert(index, text)
-
-
-def read_unicodesymbols():
-    " Read the unicodesymbols list of unicode characters and corresponding commands."
-    pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
-    fp = open(os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols'))
-    spec_chars = []
-    # Two backslashes, followed by some non-word character, and then a character
-    # in brackets. The idea is to check for constructs like: \"{u}, which is how
-    # they are written in the unicodesymbols file; but they can also be written
-    # as: \"u or even \" u.
-    r = re.compile(r'\\\\(\W)\{(\w)\}')
-    for line in fp.readlines():
-        if line[0] != '#' and line.strip() != "":
-            line=line.replace(' "',' ') # remove all quotation marks with spaces before
-            line=line.replace('" ',' ') # remove all quotation marks with spaces after
-            line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis)
-            try:
-                [ucs4,command,dead] = line.split(None,2)
-                if command[0:1] != "\\":
-                    continue
-                spec_chars.append([command, unichr(eval(ucs4))])
-            except:
-                continue
-            m = r.match(command)
-            if m != None:
-                command = "\\\\"
-                # If the character is a double-quote, then we need to escape it, too,
-                # since it is done that way in the LyX file.
-                if m.group(1) == "\"":
-                    command += "\\"
-                commandbl = command
-                command += m.group(1) + m.group(2)
-                commandbl += m.group(1) + ' ' + m.group(2)
-                spec_chars.append([command, unichr(eval(ucs4))])
-                spec_chars.append([commandbl, unichr(eval(ucs4))])
-    fp.close()
-    return spec_chars
-
-
-unicode_reps = read_unicodesymbols()
+def remove_option(lines, m, option):
+    ''' removes option from line m. returns whether we did anything '''
+    l = lines[m].find(option)
+    if l == -1:
+        return False
+    val = lines[m][l:].split('"')[1]
+    lines[m] = lines[m][:l - 1] + lines[m][l+len(option + '="' + val + '"'):]
+    return True
  
  
  # DO NOT USE THIS ROUTINE ANY MORE. Better yet, replace the uses that
@@ -115,303 +56,6 @@ def old_put_cmd_in_ert(string):
      return string
  
  
-# This routine wraps some content in an ERT inset. 
-#
-# NOTE: The function accepts either a single string or a LIST of strings as
-# argument. But it returns a LIST of strings, split on \n, so that it does 
-# not have embedded newlines.
-# 
-# This is how lyx2lyx represents a LyX document: as a list of strings, 
-# each representing a line of a LyX file. Embedded newlines confuse 
-# lyx2lyx very much.
-#
-# A call to this routine will often go something like this:
-#   i = find_token('\\begin_inset FunkyInset', ...)
-#   ...
-#   j = find_end_of_inset(document.body, i)
-#   content = ...extract content from insets
-#   # that could be as simple as: 
-#   # content = lyx2latex(document[i:j + 1])
-#   ert = put_cmd_in_ert(content)
-#   document.body[i:j] = ert
-# Now, before we continue, we need to reset i appropriately. Normally,
-# this would be: 
-#   i += len(ert)
-# That puts us right after the ERT we just inserted.
-#
-def put_cmd_in_ert(arg):
-    ret = ["\\begin_inset ERT", "status collapsed", "\\begin_layout Plain Layout", ""]
-    # Despite the warnings just given, it will be faster for us to work
-    # with a single string internally. That way, we only go through the
-    # unicode_reps loop once.
-    if type(arg) is list:
-      s = "\n".join(arg)
-    else:
-      s = arg
-    for rep in unicode_reps:
-      s = s.replace(rep[1], rep[0].replace('\\\\', '\\'))
-    s = s.replace('\\', "\\backslash\n")
-    ret += s.splitlines()
-    ret += ["\\end_layout", "\\end_inset"]
-    return ret
-
-            
-def lyx2latex(document, lines):
-    'Convert some LyX stuff into corresponding LaTeX stuff, as best we can.'
-    # clean up multiline stuff
-    content = ""
-    ert_end = 0
-    note_end = 0
-    hspace = ""
-
-    for curline in range(len(lines)):
-      line = lines[curline]
-      if line.startswith("\\begin_inset Note Note"):
-          # We want to skip LyX notes, so remember where the inset ends
-          note_end = find_end_of_inset(lines, curline + 1)
-          continue
-      elif note_end >= curline:
-          # Skip LyX notes
-          continue
-      elif line.startswith("\\begin_inset ERT"):
-          # We don't want to replace things inside ERT, so figure out
-          # where the end of the inset is.
-          ert_end = find_end_of_inset(lines, curline + 1)
-          continue
-      elif line.startswith("\\begin_inset Formula"):
-          line = line[20:]
-      elif line.startswith("\\begin_inset Quotes"):
-          # For now, we do a very basic reversion. Someone who understands
-          # quotes is welcome to fix it up.
-          qtype = line[20:].strip()
-          # lang = qtype[0]
-          side = qtype[1]
-          dbls = qtype[2]
-          if side == "l":
-              if dbls == "d":
-                  line = "``"
-              else:
-                  line = "`"
-          else:
-              if dbls == "d":
-                  line = "''"
-              else:
-                  line = "'"
-      elif line.startswith("\\begin_inset space"):
-          line = line[18:].strip()
-          if line.startswith("\\hspace"):
-              # Account for both \hspace and \hspace*
-              hspace = line[:-2]
-              continue
-          elif line == "\\space{}":
-              line = "\\ "
-          elif line == "\\thinspace{}":
-              line = "\\,"
-      elif hspace != "":
-          # The LyX length is in line[8:], after the \length keyword
-          # latex_length returns "bool,length"
-          length = latex_length(line[8:]).split(",")[1]
-          line = hspace + "{" + length + "}"
-          hspace = ""
-      elif line.isspace() or \
-            line.startswith("\\begin_layout") or \
-            line.startswith("\\end_layout") or \
-            line.startswith("\\begin_inset") or \
-            line.startswith("\\end_inset") or \
-            line.startswith("\\lang") or \
-            line.strip() == "status collapsed" or \
-            line.strip() == "status open":
-          #skip all that stuff
-          continue
-
-      # this needs to be added to the preamble because of cases like
-      # \textmu, \textbackslash, etc.
-      add_to_preamble(document, ['% added by lyx2lyx for converted index entries',
-                                 '\\@ifundefined{textmu}',
-                                 ' {\\usepackage{textcomp}}{}'])
-      # a lossless reversion is not possible
-      # try at least to handle some common insets and settings
-      if ert_end >= curline:
-          line = line.replace(r'\backslash', '\\')
-      else:
-          # No need to add "{}" after single-nonletter macros
-          line = line.replace('&', '\\&')
-          line = line.replace('#', '\\#')
-          line = line.replace('^', '\\textasciicircum{}')
-          line = line.replace('%', '\\%')
-          line = line.replace('_', '\\_')
-          line = line.replace('$', '\\$')
-
-          # Do the LyX text --> LaTeX conversion
-          for rep in unicode_reps:
-            line = line.replace(rep[1], rep[0] + "{}")
-          line = line.replace(r'\backslash', r'\textbackslash{}')
-          line = line.replace(r'\series bold', r'\bfseries{}').replace(r'\series default', r'\mdseries{}')
-          line = line.replace(r'\shape italic', r'\itshape{}').replace(r'\shape smallcaps', r'\scshape{}')
-          line = line.replace(r'\shape slanted', r'\slshape{}').replace(r'\shape default', r'\upshape{}')
-          line = line.replace(r'\emph on', r'\em{}').replace(r'\emph default', r'\em{}')
-          line = line.replace(r'\noun on', r'\scshape{}').replace(r'\noun default', r'\upshape{}')
-          line = line.replace(r'\bar under', r'\underbar{').replace(r'\bar default', r'}')
-          line = line.replace(r'\family sans', r'\sffamily{}').replace(r'\family default', r'\normalfont{}')
-          line = line.replace(r'\family typewriter', r'\ttfamily{}').replace(r'\family roman', r'\rmfamily{}')
-          line = line.replace(r'\InsetSpace ', r'').replace(r'\SpecialChar ', r'')
-      content += line
-    return content
-
-
-def latex_length(string):
-    'Convert lengths to their LaTeX representation.'
-    i = 0
-    percent = False
-    # the string has the form
-    # ValueUnit+ValueUnit-ValueUnit or
-    # ValueUnit+-ValueUnit
-    # the + and - (glue lengths) are optional
-    # the + always precedes the -
-
-    # Convert relative lengths to LaTeX units
-    units = {"text%":"\\textwidth", "col%":"\\columnwidth",
-             "page%":"\\paperwidth", "line%":"\\linewidth",
-             "theight%":"\\textheight", "pheight%":"\\paperheight"}
-    for unit in units.keys():
-        i = string.find(unit)
-        if i != -1:
-            percent = True
-            minus = string.rfind("-", 1, i)
-            plus = string.rfind("+", 0, i)
-            latex_unit = units[unit]
-            if plus == -1 and minus == -1:
-                value = string[:i]
-                value = str(float(value)/100)
-                end = string[i + len(unit):]
-                string = value + latex_unit + end
-            if plus > minus:
-                value = string[plus + 1:i]
-                value = str(float(value)/100)
-                begin = string[:plus + 1]
-                end = string[i+len(unit):]
-                string = begin + value + latex_unit + end
-            if plus < minus:
-                value = string[minus + 1:i]
-                value = str(float(value)/100)
-                begin = string[:minus + 1]
-                string = begin + value + latex_unit
-
-    # replace + and -, but only if the - is not the first character
-    string = string[0] + string[1:].replace("+", " plus ").replace("-", " minus ")
-    # handle the case where "+-1mm" was used, because LaTeX only understands
-    # "plus 1mm minus 1mm"
-    if string.find("plus  minus"):
-        lastvaluepos = string.rfind(" ")
-        lastvalue = string[lastvaluepos:]
-        string = string.replace("  ", lastvalue + " ")
-    if percent ==  False:
-        return "False," + string
-    else:
-        return "True," + string
-
-
-def revert_flex_inset(document, name, LaTeXname, position):
-  " Convert flex insets to TeX code "
-  i = position
-  while True:
-    i = find_token(document.body, '\\begin_inset Flex ' + name, i)
-    if i == -1:
-      return
-    z = find_end_of_inset(document.body, i)
-    if z == -1:
-      document.warning("Malformed LyX document: Can't find end of Flex " + name + " inset.")
-      return
-    # remove the \end_inset
-    document.body[z - 2:z + 1] = put_cmd_in_ert("}")
-    # we need to reset character layouts if necessary
-    j = find_token(document.body, '\\emph on', i, z)
-    k = find_token(document.body, '\\noun on', i, z)
-    l = find_token(document.body, '\\series', i, z)
-    m = find_token(document.body, '\\family', i, z)
-    n = find_token(document.body, '\\shape', i, z)
-    o = find_token(document.body, '\\color', i, z)
-    p = find_token(document.body, '\\size', i, z)
-    q = find_token(document.body, '\\bar under', i, z)
-    r = find_token(document.body, '\\uuline on', i, z)
-    s = find_token(document.body, '\\uwave on', i, z)
-    t = find_token(document.body, '\\strikeout on', i, z)
-    if j != -1:
-      document.body.insert(z - 2, "\\emph default")
-    if k != -1:
-      document.body.insert(z - 2, "\\noun default")
-    if l != -1:
-      document.body.insert(z - 2, "\\series default")
-    if m != -1:
-      document.body.insert(z - 2, "\\family default")
-    if n != -1:
-      document.body.insert(z - 2, "\\shape default")
-    if o != -1:
-      document.body.insert(z - 2, "\\color inherit")
-    if p != -1:
-      document.body.insert(z - 2, "\\size default")
-    if q != -1:
-      document.body.insert(z - 2, "\\bar default")
-    if r != -1:
-      document.body.insert(z - 2, "\\uuline default")
-    if s != -1:
-      document.body.insert(z - 2, "\\uwave default")
-    if t != -1:
-      document.body.insert(z - 2, "\\strikeout default")
-    document.body[i:i + 4] = put_cmd_in_ert(LaTeXname + "{")
-    i += 1
-
-
-def revert_font_attrs(document, name, LaTeXname):
-  " Reverts font changes to TeX code "
-  i = 0
-  changed = False
-  while True:
-    i = find_token(document.body, name + ' on', i)
-    if i == -1:
-      return changed
-    j = find_token(document.body, name + ' default', i)
-    k = find_token(document.body, name + ' on', i + 1)
-    # if there is no default set, the style ends with the layout
-    # assure hereby that we found the correct layout end
-    if j != -1 and (j < k or k == -1):
-      document.body[j:j + 1] = put_cmd_in_ert("}")
-    else:
-      j = find_token(document.body, '\\end_layout', i)
-      document.body[j:j] = put_cmd_in_ert("}")
-    document.body[i:i + 1] = put_cmd_in_ert(LaTeXname + "{")
-    changed = True
-    i += 1
-
-
-def revert_layout_command(document, name, LaTeXname, position):
-  " Reverts a command from a layout to TeX code "
-  i = position
-  while True:
-    i = find_token(document.body, '\\begin_layout ' + name, i)
-    if i == -1:
-      return
-    k = -1
-    # find the next layout
-    j = i + 1
-    while k == -1:
-      j = find_token(document.body, '\\begin_layout', j)
-      l = len(document.body)
-      # if nothing was found it was the last layout of the document
-      if j == -1:
-        document.body[l - 4:l - 4] = put_cmd_in_ert("}")
-        k = 0
-      # exclude plain layout because this can be TeX code or another inset
-      elif document.body[j] != '\\begin_layout Plain Layout':
-        document.body[j - 2:j - 2] = put_cmd_in_ert("}")
-        k = 0
-      else:
-        j += 1
-    document.body[i] = '\\begin_layout Standard'
-    document.body[i + 1:i + 1] = put_cmd_in_ert(LaTeXname + "{")
-    i += 1
-
-
  ###############################################################################
  ###
  ### Conversion and reversion routines
@@ -445,12 +89,12 @@ def revert_tabularvalign(document):
        end = find_end_of_inset(document.body, i)
        if end == -1:
            document.warning("Can't find end of inset at line " + str(i))
-          i = j
+          i += 1
            continue
        fline = find_token(document.body, "<features", i, end)
        if fline == -1:
            document.warning("Can't find features for inset at line " + str(i))
-          i = end
+          i += 1
            continue
        p = document.body[fline].find("islongtable")
        if p != -1:
@@ -460,7 +104,7 @@ def revert_tabularvalign(document):
                # This seems wrong: It removes everything after 
                # tabularvalignment, too.
                document.body[fline] = document.body[fline][:q - 1] + '>'
-          i = end
+          i += 1
            continue
  
         # no longtable
@@ -501,7 +145,9 @@ def revert_tabularvalign(document):
            '',
            '\\begin_layout Plain Layout']
        document.body[i:i] = subst # this just inserts the array at i
-      i = end + len(subst) # adjust i to save a few cycles
+      # since there could be a tabular inside a tabular, we cannot
+      # jump to end
+      i += len(subst)
  
  
  def revert_phantom_types(document, ptype, cmd):
@@ -521,7 +167,7 @@ def revert_phantom_types(document, ptype, cmd):
            document.warning("Can't find layout for inset at line " + str(i))
            i = end
            continue
-      bend = find_token(document.body, "\\end_layout", blay, end)
+      bend = find_end_of_layout(document.body, blay)
        if bend == -1:
            document.warning("Malformed LyX document: Could not find end of Phantom inset's layout.")
            i = end
@@ -549,109 +195,117 @@ def revert_vphantom(document):
  
  def revert_xetex(document):
      " Reverts documents that use XeTeX "
+
      i = find_token(document.header, '\\use_xetex', 0)
      if i == -1:
          document.warning("Malformed LyX document: Missing \\use_xetex.")
          return
-    if get_value(document.header, "\\use_xetex", i) == 'false':
+    if not str2bool(get_value(document.header, "\\use_xetex", i)):
          del document.header[i]
          return
      del document.header[i]
+
      # 1.) set doc encoding to utf8-plain
      i = find_token(document.header, "\\inputencoding", 0)
      if i == -1:
          document.warning("Malformed LyX document: Missing \\inputencoding.")
-    document.header[i] = "\\inputencoding utf8-plain"
+    else:
+        document.header[i] = "\\inputencoding utf8-plain"
+
      # 2.) check font settings
-    l = find_token(document.header, "\\font_roman", 0)
-    if l == -1:
-        document.warning("Malformed LyX document: Missing \\font_roman.")
-    line = document.header[l]
-    l = re.compile(r'\\font_roman (.*)$')
-    m = l.match(line)
-    roman = m.group(1)
-    l = find_token(document.header, "\\font_sans", 0)
-    if l == -1:
-        document.warning("Malformed LyX document: Missing \\font_sans.")
-    line = document.header[l]
-    l = re.compile(r'\\font_sans (.*)$')
-    m = l.match(line)
-    sans = m.group(1)
-    l = find_token(document.header, "\\font_typewriter", 0)
-    if l == -1:
-        document.warning("Malformed LyX document: Missing \\font_typewriter.")
-    line = document.header[l]
-    l = re.compile(r'\\font_typewriter (.*)$')
-    m = l.match(line)
-    typewriter = m.group(1)
-    osf = get_value(document.header, '\\font_osf', 0) == "true"
-    sf_scale = float(get_value(document.header, '\\font_sf_scale', 0))
-    tt_scale = float(get_value(document.header, '\\font_tt_scale', 0))
-    # 3.) set preamble stuff
-    pretext = '%% This document must be processed with xelatex!\n'
-    pretext += '\\usepackage{fontspec}\n'
-    if roman != "default":
-        pretext += '\\setmainfont[Mapping=tex-text]{' + roman + '}\n'
-    if sans != "default":
-        pretext += '\\setsansfont['
-        if sf_scale != 100:
-            pretext += 'Scale=' + str(sf_scale / 100) + ','
-        pretext += 'Mapping=tex-text]{' + sans + '}\n'
-    if typewriter != "default":
-        pretext += '\\setmonofont'
-        if tt_scale != 100:
-            pretext += '[Scale=' + str(tt_scale / 100) + ']'
-        pretext += '{' + typewriter + '}\n'
-    if osf:
-        pretext += '\\defaultfontfeatures{Numbers=OldStyle}\n'
-    pretext += '\usepackage{xunicode}\n'
-    pretext += '\usepackage{xltxtra}\n'
-    insert_to_preamble(0, document, pretext)
-    # 4.) reset font settings
+    # defaults
+    roman = sans = typew = default
+    osf = False
+    sf_scale = tt_scale = 100.0
+    
      i = find_token(document.header, "\\font_roman", 0)
      if i == -1:
          document.warning("Malformed LyX document: Missing \\font_roman.")
-    document.header[i] = "\\font_roman default"
+    else:
+        roman = get_value(document.header, "\\font_roman", i)
+        document.header[i] = "\\font_roman default"
+
      i = find_token(document.header, "\\font_sans", 0)
      if i == -1:
          document.warning("Malformed LyX document: Missing \\font_sans.")
-    document.header[i] = "\\font_sans default"
+    else:
+        sans = get_value(document.header, "\\font_sans", i)
+        document.header[i] = "\\font_sans default"
+    
      i = find_token(document.header, "\\font_typewriter", 0)
      if i == -1:
          document.warning("Malformed LyX document: Missing \\font_typewriter.")
-    document.header[i] = "\\font_typewriter default"
+    else:
+        typew = get_value(document.header, "\\font_typewriter", i)
+        document.header[i] = "\\font_typewriter default"
+
      i = find_token(document.header, "\\font_osf", 0)
      if i == -1:
          document.warning("Malformed LyX document: Missing \\font_osf.")
-    document.header[i] = "\\font_osf false"
+    else:
+        osf = str2bool(get_value(document.header, "\\font_osf", i))
+        document.header[i] = "\\font_osf false"
+
      i = find_token(document.header, "\\font_sc", 0)
      if i == -1:
          document.warning("Malformed LyX document: Missing \\font_sc.")
-    document.header[i] = "\\font_sc false"
+    else:
+        # we do not need this value.
+        document.header[i] = "\\font_sc false"
+    
      i = find_token(document.header, "\\font_sf_scale", 0)
      if i == -1:
          document.warning("Malformed LyX document: Missing \\font_sf_scale.")
-    document.header[i] = "\\font_sf_scale 100"
+    else:
+      val = get_value(document.header, '\\font_sf_scale', i)
+      try:
+        # float() can throw
+        sf_scale = float(val)
+      except:
+        document.warning("Invalid font_sf_scale value: " + val)
+      document.header[i] = "\\font_sf_scale 100"
+
      i = find_token(document.header, "\\font_tt_scale", 0)
      if i == -1:
          document.warning("Malformed LyX document: Missing \\font_tt_scale.")
-    document.header[i] = "\\font_tt_scale 100"
+    else:
+        val = get_value(document.header, '\\font_tt_scale', i)
+        try:
+          # float() can throw
+          tt_scale = float(val)
+        except:
+          document.warning("Invalid font_tt_scale value: " + val)
+        document.header[i] = "\\font_tt_scale 100"
+
+    # 3.) set preamble stuff
+    pretext = ['%% This document must be processed with xelatex!']
+    pretext.append('\\usepackage{fontspec}')
+    if roman != "default":
+        pretext.append('\\setmainfont[Mapping=tex-text]{' + roman + '}')
+    if sans != "default":
+        sf = '\\setsansfont['
+        if sf_scale != 100.0:
+            sf += 'Scale=' + str(sf_scale / 100.0) + ','
+        sf += 'Mapping=tex-text]{' + sans + '}'
+        pretext.append(sf)
+    if typewriter != "default":
+        tw = '\\setmonofont'
+        if tt_scale != 100.0:
+            tw += '[Scale=' + str(tt_scale / 100.0) + ']'
+        tw += '{' + typewriter + '}'
+        pretext.append(tw)
+    if osf:
+        pretext.append('\\defaultfontfeatures{Numbers=OldStyle}')
+    pretext.append('\usepackage{xunicode}')
+    pretext.append('\usepackage{xltxtra}')
+    insert_to_preamble(document, pretext)
  
  
  def revert_outputformat(document):
      " Remove default output format param "
-    i = find_token(document.header, '\\default_output_format', 0)
-    if i == -1:
+    
+    if not del_token(document.header, '\\default_output_format', 0):
          document.warning("Malformed LyX document: Missing \\default_output_format.")
-        return
-    del document.header[i]
-
-
-def hex2ratio(s):
-    val = string.atoi(s, 16)
-    if val != 0:
-      val += 1
-    return str(val / 256.0)
  
  
  def revert_backgroundcolor(document):
@@ -667,12 +321,11 @@ def revert_backgroundcolor(document):
      red   = hex2ratio(colorcode[1:3])
      green = hex2ratio(colorcode[3:5])
      blue  = hex2ratio(colorcode[5:7])
-    insert_to_preamble(0, document,
-                          '% Commands inserted by lyx2lyx to set the background color\n'
-                          + '\\@ifundefined{definecolor}{\\usepackage{color}}{}\n'
-                          + '\\definecolor{page_backgroundcolor}{rgb}{'
-                          + red + ',' + green + ',' + blue + '}\n'
-                          + '\\pagecolor{page_backgroundcolor}\n')
+    insert_to_preamble(document, \
+        ['% To set the background color',
+        '\\@ifundefined{definecolor}{\\usepackage{color}}{}',
+        '\\definecolor{page_backgroundcolor}{rgb}{' + red + ',' + green + ',' + blue + '}',
+        '\\pagecolor{page_backgroundcolor}'])
  
  
  def revert_splitindex(document):
@@ -681,12 +334,11 @@ def revert_splitindex(document):
      if i == -1:
          document.warning("Malformed LyX document: Missing \\use_indices.")
          return
-    indices = get_value(document.header, "\\use_indices", i)
-    preamble = ""
-    useindices = (indices == "true")
-    if useindices:
-         preamble += "\\usepackage{splitidx}\n"
+    useindices = str2bool(get_value(document.header, "\\use_indices", i))
      del document.header[i]
+    preamble = []
+    if useindices:
+         preamble.append("\\usepackage{splitidx})")
      
      # deal with index declarations in the preamble
      i = 0
@@ -705,10 +357,10 @@ def revert_splitindex(document):
            iname = m.group(1)
            ishortcut = get_value(document.header, '\\shortcut', i, k)
            if ishortcut != "":
-              preamble += "\\newindex[" + iname + "]{" + ishortcut + "}\n"
+              preamble.append("\\newindex[" + iname + "]{" + ishortcut + "}")
          del document.header[i:k + 1]
-    if preamble != "":
-        insert_to_preamble(0, document, preamble)
+    if preamble:
+        insert_to_preamble(document, preamble)
          
      # deal with index insets
      # these need to have the argument removed
@@ -743,7 +395,7 @@ def revert_splitindex(document):
          if i == -1:
              return
          k = find_end_of_inset(document.body, i)
-        ptype = get_value(document.body, 'type', i, k).strip('"')
+        ptype = get_quoted_value(document.body, 'type', i, k)
          if ptype == "idx":
              j = find_token(document.body, "type", i, k)
              del document.body[j]
@@ -785,8 +437,7 @@ def revert_subindex(document):
      if i == -1:
          document.warning("Malformed LyX document: Missing \\use_indices.")
          return
-    indices = get_value(document.header, "\\use_indices", i)
-    useindices = (indices == "true")
+    useindices = str2bool(get_value(document.header, "\\use_indices", i))
      i = 0
      while True:
          i = find_token(document.body, "\\begin_inset CommandInset index_print", i)
@@ -797,7 +448,7 @@ def revert_subindex(document):
          if ctype != "printsubindex":
              i = k + 1
              continue
-        ptype = get_value(document.body, 'type', i, k).strip('"')
+        ptype = get_quoted_value(document.body, 'type', i, k)
          if not useindices:
              del document.body[i:k + 1]
          else:
@@ -812,8 +463,7 @@ def revert_printindexall(document):
      if i == -1:
          document.warning("Malformed LyX document: Missing \\use_indices.")
          return
-    indices = get_value(document.header, "\\use_indices", i)
-    useindices = (indices == "true")
+    useindices = str2bool(get_value(document.header, "\\use_indices", i))
      i = 0
      while True:
          i = find_token(document.body, "\\begin_inset CommandInset index_print", i)
@@ -834,14 +484,14 @@ def revert_printindexall(document):
  
  def revert_strikeout(document):
    " Reverts \\strikeout font attribute "
-  changed = revert_font_attrs(document, "\\uuline", "\\uuline")
-  changed = revert_font_attrs(document, "\\uwave", "\\uwave") or changed
-  changed = revert_font_attrs(document, "\\strikeout", "\\sout")  or changed
+  changed = revert_font_attrs(document.body, "\\uuline", "\\uuline")
+  changed = revert_font_attrs(document.body, "\\uwave", "\\uwave") or changed
+  changed = revert_font_attrs(document.body, "\\strikeout", "\\sout")  or changed
    if changed == True:
-    insert_to_preamble(0, document,
-        '% Commands inserted by lyx2lyx for proper underlining\n'
-        + '\\PassOptionsToPackage{normalem}{ulem}\n'
-        + '\\usepackage{ulem}\n')
+    insert_to_preamble(document, \
+        ['%  for proper underlining',
+        '\\PassOptionsToPackage{normalem}{ulem}',
+        '\\usepackage{ulem}'])
  
  
  def revert_ulinelatex(document):
@@ -849,28 +499,24 @@ def revert_ulinelatex(document):
      i = find_token(document.body, '\\bar under', 0)
      if i == -1:
          return
-    insert_to_preamble(0, document,
-            '% Commands inserted by lyx2lyx for proper underlining\n'
-            + '\\PassOptionsToPackage{normalem}{ulem}\n'
-            + '\\usepackage{ulem}\n'
-            + '\\let\\cite@rig\\cite\n'
-            + '\\newcommand{\\b@xcite}[2][\\%]{\\def\\def@pt{\\%}\\def\\pas@pt{#1}\n'
-            + '  \\mbox{\\ifx\\def@pt\\pas@pt\\cite@rig{#2}\\else\\cite@rig[#1]{#2}\\fi}}\n'
-            + '\\renewcommand{\\underbar}[1]{{\\let\\cite\\b@xcite\\uline{#1}}}\n')
+    insert_to_preamble(document,\
+            ['%  for proper underlining',
+            '\\PassOptionsToPackage{normalem}{ulem}',
+            '\\usepackage{ulem}',
+            '\\let\\cite@rig\\cite',
+            '\\newcommand{\\b@xcite}[2][\\%]{\\def\\def@pt{\\%}\\def\\pas@pt{#1}',
+            '  \\mbox{\\ifx\\def@pt\\pas@pt\\cite@rig{#2}\\else\\cite@rig[#1]{#2}\\fi}}',
+            '\\renewcommand{\\underbar}[1]{{\\let\\cite\\b@xcite\\uline{#1}}}'])
  
  
  def revert_custom_processors(document):
      " Remove bibtex_command and index_command params "
-    i = find_token(document.header, '\\bibtex_command', 0)
-    if i == -1:
+    
+    if not del_token(document.header, '\\bibtex_command', 0):
          document.warning("Malformed LyX document: Missing \\bibtex_command.")
-        return
-    del document.header[i]
-    i = find_token(document.header, '\\index_command', 0)
-    if i == -1:
+    
+    if not del_token(document.header, '\\index_command', 0):
          document.warning("Malformed LyX document: Missing \\index_command.")
-        return
-    del document.header[i]
  
  
  def convert_nomencl_width(document):
@@ -892,13 +538,9 @@ def revert_nomencl_width(document):
        if i == -1:
          break
        j = find_end_of_inset(document.body, i)
-      l = find_token(document.body, "set_width", i, j)
-      if l == -1:
-            document.warning("Can't find set_width option for nomencl_print!")
-            i = j
-            continue
-      del document.body[l]
-      i = i + 1
+      if not del_token(document.body, "set_width", i, j):
+        document.warning("Can't find set_width option for nomencl_print!")
+      i = j
  
  
  def revert_nomencl_cwidth(document):
@@ -911,41 +553,52 @@ def revert_nomencl_cwidth(document):
        j = find_end_of_inset(document.body, i)
        l = find_token(document.body, "width", i, j)
        if l == -1:
-            #Can't find width option for nomencl_print
-            i = j
-            continue
-      width = get_value(document.body, "width", i, j).strip('"')
+        document.warning("Can't find width option for nomencl_print!")
+        i = j
+        continue
+      width = get_quoted_value(document.body, "width", i, j)
        del document.body[l]
-      add_to_preamble(document, ["% this command was inserted by lyx2lyx"])
-      add_to_preamble(document, ["\\setlength{\\nomlabelwidth}{" + width + "}"])
-      i = i + 1
+      insert_to_preamble(document, ["\\setlength{\\nomlabelwidth}{" + width + "}"])
+      i = j - 1
  
  
  def revert_applemac(document):
      " Revert applemac encoding to auto "
-    i = 0
-    if document.encoding == "applemac":
-        document.encoding = "auto"
-        i = find_token(document.header, "\\encoding", 0)
-        if i != -1:
-            document.header[i] = "\\encoding auto"
+    if document.encoding != "applemac":
+      return
+    document.encoding = "auto"
+    i = find_token(document.header, "\\encoding", 0)
+    if i != -1:
+        document.header[i] = "\\encoding auto"
  
  
  def revert_longtable_align(document):
      " Remove longtable alignment setting "
      i = 0
-    j = 0
      while True:
        i = find_token(document.body, "\\begin_inset Tabular", i)
        if i == -1:
            break
-      # the alignment is 2 lines below \\begin_inset Tabular
-      j = document.body[i + 2].find("longtabularalignment")
+      end = find_end_of_inset(document.body, i)
+      if end == -1:
+          document.warning("Can't find end of inset at line " + str(i))
+          i += 1
+          continue
+      fline = find_token(document.body, "<features", i, end)
+      if fline == -1:
+          document.warning("Can't find features for inset at line " + str(i))
+          i += 1
+          continue
+      j = document.body[fline].find("longtabularalignment")
        if j == -1:
-          break
-      document.body[i + 2] = document.body[i + 2][:j - 1]
-      document.body[i + 2] = document.body[i + 2] + '>'
-      i = i + 1
+          i += 1
+          continue
+      # FIXME Is this correct? It wipes out everything after the 
+      # one we found.
+      document.body[fline] = document.body[fline][:j - 1] + '>'
+      # since there could be a tabular inside this one, we 
+      # cannot jump to end.
+      i += 1
  
  
  def revert_branch_filename(document):
@@ -960,49 +613,34 @@ def revert_branch_filename(document):
  
  def revert_paragraph_indentation(document):
      " Revert custom paragraph indentation to preamble code "
-    i = 0
-    while True:
-      i = find_token(document.header, "\\paragraph_indentation", i)
-      if i == -1:
-          break
-      # only remove the preamble line if default
-      # otherwise also write the value to the preamble
-      length = get_value(document.header, "\\paragraph_indentation", i)
-      if length == "default":
-          del document.header[i]
-          break
-      else:
-          # handle percent lengths
-          # latex_length returns "bool,length"
-          length = latex_length(length).split(",")[1]
-          add_to_preamble(document, ["% this command was inserted by lyx2lyx"])
-          add_to_preamble(document, ["\\setlength{\\parindent}{" + length + "}"])
-          del document.header[i]
-      i = i + 1
+    i = find_token(document.header, "\\paragraph_indentation", 0)
+    if i == -1:
+      return
+    length = get_value(document.header, "\\paragraph_indentation", i)
+    # we need only remove the line if indentation is default
+    if length != "default":
+      # handle percent lengths
+      length = latex_length(length)[1]
+      insert_to_preamble(document, ["\\setlength{\\parindent}{" + length + "}"])
+    del document.header[i]
  
  
  def revert_percent_skip_lengths(document):
      " Revert relative lengths for paragraph skip separation to preamble code "
-    i = 0
-    while True:
-      i = find_token(document.header, "\\defskip", i)
-      if i == -1:
-          break
-      length = get_value(document.header, "\\defskip", i)
-      # only revert when a custom length was set and when
-      # it used a percent length
-      if length not in ('smallskip', 'medskip', 'bigskip'):
-          # handle percent lengths
-          length = latex_length(length)
-          # latex_length returns "bool,length"
-          percent = length.split(",")[0]
-          length = length.split(",")[1]
-          if percent == "True":
-              add_to_preamble(document, ["% this command was inserted by lyx2lyx"])
-              add_to_preamble(document, ["\\setlength{\\parskip}{" + length + "}"])
-              # set defskip to medskip as default
-              document.header[i] = "\\defskip medskip"
-      i = i + 1
+    i = find_token(document.header, "\\defskip", 0)
+    if i == -1:
+        return
+    length = get_value(document.header, "\\defskip", i)
+    # only revert when a custom length was set and when
+    # it used a percent length
+    if length in ('smallskip', 'medskip', 'bigskip'):
+        return
+    # handle percent lengths
+    percent, length = latex_length(length)
+    if percent:
+        insert_to_preamble(document, ["\\setlength{\\parskip}{" + length + "}"])
+        # set defskip to medskip as default
+        document.header[i] = "\\defskip medskip"
  
  
  def revert_percent_vspace_lengths(document):
@@ -1014,215 +652,234 @@ def revert_percent_vspace_lengths(document):
            break
        # only revert if a custom length was set and if
        # it used a percent length
-      line = document.body[i]
        r = re.compile(r'\\begin_inset VSpace (.*)$')
-      m = r.match(line)
+      m = r.match(document.body[i])
        length = m.group(1)
-      if length not in ('defskip', 'smallskip', 'medskip', 'bigskip', 'vfill'):
-          # check if the space has a star (protected space)
-          protected = (document.body[i].rfind("*") != -1)
+      if length in ('defskip', 'smallskip', 'medskip', 'bigskip', 'vfill'):
+         i += 1
+         continue
+      # check if the space has a star (protected space)
+      protected = (document.body[i].rfind("*") != -1)
+      if protected:
+          length = length.rstrip('*')
+      # handle percent lengths
+      percent, length = latex_length(length)
+      # revert the VSpace inset to ERT
+      if percent:
            if protected:
-              length = length.rstrip('*')
-          # handle percent lengths
-          length = latex_length(length)
-          # latex_length returns "bool,length"
-          percent = length.split(",")[0]
-          length = length.split(",")[1]
-          # revert the VSpace inset to ERT
-          if percent == "True":
-              if protected:
-                  subst = [old_put_cmd_in_ert("\\vspace*{" + length + "}")]
-              else:
-                  subst = [old_put_cmd_in_ert("\\vspace{" + length + "}")]
-              document.body[i:i + 2] = subst
-      i = i + 1
+              subst = put_cmd_in_ert("\\vspace*{" + length + "}")
+          else:
+              subst = put_cmd_in_ert("\\vspace{" + length + "}")
+          document.body[i:i + 2] = subst
+      i += 1
  
  
  def revert_percent_hspace_lengths(document):
      " Revert relative HSpace lengths to ERT "
      i = 0
      while True:
-      i = find_token(document.body, "\\begin_inset space \\hspace", i)
+      i = find_token_exact(document.body, "\\begin_inset space \\hspace", i)
        if i == -1:
            break
-      protected = (document.body[i].find("\\hspace*{}") != -1)
-      # only revert if a custom length was set and if
-      # it used a percent length
-      length = get_value(document.body, '\\length', i + 1)
+      j = find_end_of_inset(document.body, i)
+      if j == -1:
+          document.warning("Can't find end of inset at line " + str(i))
+          i += 1
+          continue
+      # only revert if a custom length was set...
+      length = get_value(document.body, '\\length', i + 1, j)
        if length == '':
            document.warning("Malformed lyx document: Missing '\\length' in Space inset.")
-          return
-      # handle percent lengths
-      length = latex_length(length)
-      # latex_length returns "bool,length"
-      percent = length.split(",")[0]
-      length = length.split(",")[1]
+          i = j
+          continue
+      protected = ""
+      if document.body[i].find("\\hspace*{}") != -1:
+          protected = "*"
+      # ...and if it used a percent length
+      percent, length = latex_length(length)
        # revert the HSpace inset to ERT
-      if percent == "True":
-          if protected:
-              subst = [old_put_cmd_in_ert("\\hspace*{" + length + "}")]
-          else:
-              subst = [old_put_cmd_in_ert("\\hspace{" + length + "}")]
-          document.body[i:i + 3] = subst
-      i = i + 2
+      if percent:
+          subst = put_cmd_in_ert("\\hspace" + protected + "{" + length + "}")
+          document.body[i:j + 1] = subst
+      # if we did a substitution, this will still be ok
+      i = j
  
  
  def revert_hspace_glue_lengths(document):
      " Revert HSpace glue lengths to ERT "
      i = 0
      while True:
-      i = find_token(document.body, "\\begin_inset space \\hspace", i)
+      i = find_token_exact(document.body, "\\begin_inset space \\hspace", i)
        if i == -1:
            break
-      protected = (document.body[i].find("\\hspace*{}") != -1)
-      length = get_value(document.body, '\\length', i + 1)
+      j = find_end_of_inset(document.body, i)
+      if j == -1:
+          document.warning("Can't find end of inset at line " + str(i))
+          i += 1
+          continue
+      length = get_value(document.body, '\\length', i + 1, j)
        if length == '':
            document.warning("Malformed lyx document: Missing '\\length' in Space inset.")
-          return
+          i = j
+          continue
+      protected = ""
+      if document.body[i].find("\\hspace*{}") != -1:
+          protected = "*"
        # only revert if the length contains a plus or minus at pos != 0
-      glue  = re.compile(r'.+[\+-]')
-      if glue.search(length):
+      if length.find('-',1) != -1 or length.find('+',1) != -1:
            # handle percent lengths
-          # latex_length returns "bool,length"
-          length = latex_length(length).split(",")[1]
+          length = latex_length(length)[1]
            # revert the HSpace inset to ERT
-          if protected:
-              subst = [old_put_cmd_in_ert("\\hspace*{" + length + "}")]
-          else:
-              subst = [old_put_cmd_in_ert("\\hspace{" + length + "}")]
-          document.body[i:i + 3] = subst
-      i = i + 2
+          subst = put_cmd_in_ert("\\hspace" + protected + "{" + length + "}")
+          document.body[i:j+1] = subst
+      i = j
+
  
  def convert_author_id(document):
      " Add the author_id to the \\author definition and make sure 0 is not used"
      i = 0
-    j = 1
+    anum = 1
+    re_author = re.compile(r'(\\author) (\".*\")\s*(.*)$')
+    
      while True:
          i = find_token(document.header, "\\author", i)
          if i == -1:
              break
-        
-        r = re.compile(r'(\\author) (\".*\")\s?(.*)$')
-        m = r.match(document.header[i])
-        if m != None:
+        m = re_author.match(document.header[i])
+        if m:
              name = m.group(2)
-            
-            email = ''
-            if m.lastindex == 3:
-                email = m.group(3)
-            document.header[i] = "\\author %i %s %s" % (j, name, email)
-        j = j + 1
-        i = i + 1
+            email = m.group(3)
+            document.header[i] = "\\author %i %s %s" % (anum, name, email)
+        anum += 1
+        i += 1
          
-    k = 0
+    i = 0
      while True:
-        k = find_token(document.body, "\\change_", k)
-        if k == -1:
+        i = find_token(document.body, "\\change_", i)
+        if i == -1:
              break
-
-        change = document.body[k].split(' ');
+        change = document.body[i].split(' ');
          if len(change) == 3:
              type = change[0]
              author_id = int(change[1])
              time = change[2]
-            document.body[k] = "%s %i %s" % (type, author_id + 1, time)
-        k = k + 1
+            document.body[i] = "%s %i %s" % (type, author_id + 1, time)
+        i += 1
+
  
  def revert_author_id(document):
      " Remove the author_id from the \\author definition "
      i = 0
-    j = 0
+    anum = 0
+    rx = re.compile(r'(\\author)\s+(\d+)\s+(\".*\")\s*(.*)$')
      idmap = dict()
+
      while True:
          i = find_token(document.header, "\\author", i)
          if i == -1:
              break
-        
-        r = re.compile(r'(\\author) (\d+) (\".*\")\s?(.*)$')
-        m = r.match(document.header[i])
-        if m != None:
+        m = rx.match(document.header[i])
+        if m:
              author_id = int(m.group(2))
-            idmap[author_id] = j
+            idmap[author_id] = anum
              name = m.group(3)
-            
-            email = ''
-            if m.lastindex == 4:
-                email = m.group(4)
+            email = m.group(4)
              document.header[i] = "\\author %s %s" % (name, email)
-        i = i + 1
-        j = j + 1
+        i += 1
+        # FIXME Should this be incremented if we didn't match?
+        anum += 1
  
-    k = 0
+    i = 0
      while True:
-        k = find_token(document.body, "\\change_", k)
-        if k == -1:
+        i = find_token(document.body, "\\change_", i)
+        if i == -1:
              break
-
-        change = document.body[k].split(' ');
+        change = document.body[i].split(' ');
          if len(change) == 3:
              type = change[0]
              author_id = int(change[1])
              time = change[2]
-            document.body[k] = "%s %i %s" % (type, idmap[author_id], time)
-        k = k + 1
+            document.body[i] = "%s %i %s" % (type, idmap[author_id], time)
+        i += 1
  
  
  def revert_suppress_date(document):
      " Revert suppressing of default document date to preamble code "
-    i = 0
-    while True:
-      i = find_token(document.header, "\\suppress_date", i)
-      if i == -1:
-          break
-      # remove the preamble line and write to the preamble
-      # when suppress_date was true
-      date = get_value(document.header, "\\suppress_date", i)
-      if date == "true":
-          add_to_preamble(document, ["% this command was inserted by lyx2lyx"])
-          add_to_preamble(document, ["\\date{}"])
-      del document.header[i]
-      i = i + 1
+    i = find_token(document.header, "\\suppress_date", 0)
+    if i == -1:
+        return
+    # remove the preamble line and write to the preamble
+    # when suppress_date was true
+    date = str2bool(get_value(document.header, "\\suppress_date", i))
+    if date:
+        add_to_preamble(document, ["\\date{}"])
+    del document.header[i]
+
+
+def convert_mhchem(document):
+    "Set mhchem to off for versions older than 1.6.x"
+    if document.start < 277:
+        # LyX 1.5.x and older did never load mhchem.
+        # Therefore we must switch it off: Documents that use mhchem have
+        # a manual \usepackage anyway, and documents not using mhchem but
+        # custom macros with the same names as mhchem commands might get
+        # corrupted if mhchem is automatically loaded.
+        mhchem = 0 # off
+    else:
+        # LyX 1.6.x did always load mhchem automatically.
+        mhchem = 1 # auto
+    i = find_token(document.header, "\\use_esint", 0)
+    if i == -1:
+        # pre-1.5.x document
+        i = find_token(document.header, "\\use_amsmath", 0)
+    if i == -1:
+        document.warning("Malformed LyX document: Could not find amsmath os esint setting.")
+        return
+    document.header.insert(i + 1, "\\use_mhchem %d" % mhchem)
  
  
  def revert_mhchem(document):
      "Revert mhchem loading to preamble code"
-    i = 0
-    j = 0
-    k = 0
+
      mhchem = "off"
-    i = find_token(document.header, "\\use_mhchem 1", 0)
-    if i != -1:
+    i = find_token(document.header, "\\use_mhchem", 0)
+    if i == -1:
+        document.warning("Malformed LyX document: Could not find mhchem setting.")
          mhchem = "auto"
      else:
-        i = find_token(document.header, "\\use_mhchem 2", 0)
-        if i != -1:
+        val = get_value(document.header, "\\use_mhchem", i)
+        if val == "1":
+            mhchem = "auto"
+        elif val == "2":
              mhchem = "on"
+        del document.header[i]
+
+    if mhchem == "off":
+      # don't load case
+      return 
+
      if mhchem == "auto":
-        j = find_token(document.body, "\\cf{", 0)
-        if j != -1:
-            mhchem = "on"
-        else:
-            j = find_token(document.body, "\\ce{", 0)
-            if j != -1:
-                mhchem = "on"
+        i = 0
+        while True:
+            i = find_token(document.body, "\\begin_inset Formula", i)
+            if i == -1:
+               break
+            line = document.body[i]
+            if line.find("\\ce{") != -1 or line.find("\\cf{") != -1:
+              mhchem = "on"
+              break
+            i += 1
+
      if mhchem == "on":
-        add_to_preamble(document, ["% this command was inserted by lyx2lyx"])
-        add_to_preamble(document, ["\\PassOptionsToPackage{version=3}{mhchem}"])
-        add_to_preamble(document, ["\\usepackage{mhchem}"])
-    k = find_token(document.header, "\\use_mhchem", 0)
-    if k == -1:
-        document.warning("Malformed LyX document: Could not find mhchem setting.")
-        return
-    del document.header[k]
+        pre = ["\\PassOptionsToPackage{version=3}{mhchem}", 
+          "\\usepackage{mhchem}"]
+        insert_to_preamble(document, pre) 
  
  
  def revert_fontenc(document):
      " Remove fontencoding param "
-    i = find_token(document.header, '\\fontencoding', 0)
-    if i == -1:
+    if not del_token(document.header, '\\fontencoding', 0):
          document.warning("Malformed LyX document: Missing \\fontencoding.")
-        return
-    del document.header[i]
  
  
  def merge_gbrief(document):
@@ -1280,12 +937,8 @@ def revert_gbrief(document):
  
  def revert_html_options(document):
      " Remove html options "
-    i = find_token(document.header, '\\html_use_mathml', 0)
-    if i != -1:
-        del document.header[i]
-    i = find_token(document.header, '\\html_be_strict', 0)
-    if i != -1:
-        del document.header[i]
+    del_token(document.header, '\\html_use_mathml', 0)
+    del_token(document.header, '\\html_be_strict', 0)
  
  
  def revert_includeonly(document):
@@ -1296,58 +949,143 @@ def revert_includeonly(document):
              return
          j = find_end_of(document.header, i, "\\begin_includeonly", "\\end_includeonly")
          if j == -1:
-            # this should not happen
+            document.warning("Unable to find end of includeonly section!!")
              break
          document.header[i : j + 1] = []
  
  
  def revert_includeall(document):
      " Remove maintain_unincluded_children param "
-    i = find_token(document.header, '\\maintain_unincluded_children', 0)
-    if i != -1:
-        del document.header[i]
+    del_token(document.header, '\\maintain_unincluded_children', 0)
  
  
  def revert_multirow(document):
      " Revert multirow cells in tables to TeX-code"
-    i = 0
-    multirow = False
+
+    # first, let's find out if we need to do anything
+    # cell type 3 is multirow begin cell
+    i = find_token(document.body, '<cell multirow="3"', 0)
+    if i == -1:
+      return
+
+    add_to_preamble(document, ["\\usepackage{multirow}"])
+
+    begin_table = 0
      while True:
-      # cell type 3 is multirow begin cell
-      i = find_token(document.body, '<cell multirow="3"', i)
-      if i == -1:
-          break
-      # a multirow cell was found
-      multirow = True
-      # remove the multirow tag, set the valignment to top
-      # and remove the bottom line
-      document.body[i] = document.body[i].replace(' multirow="3" ', ' ')
-      document.body[i] = document.body[i].replace('valignment="middle"', 'valignment="top"')
-      document.body[i] = document.body[i].replace(' bottomline="true" ', ' ')
-      # write ERT to create the multirow cell
-      # use 2 rows and 2cm as default with because the multirow span
-      # and the column width is only hardly accessible
-      subst = [old_put_cmd_in_ert("\\multirow{2}{2cm}{")]
-      document.body[i + 4:i + 4] = subst
-      i = find_token(document.body, "</cell>", i)
-      if i == -1:
-           document.warning("Malformed LyX document: Could not find end of tabular cell.")
-           break
-      subst = [old_put_cmd_in_ert("}")]
-      document.body[i - 3:i - 3] = subst
-      # cell type 4 is multirow part cell
-      i = find_token(document.body, '<cell multirow="4"', i)
-      if i == -1:
-          break
-      # remove the multirow tag, set the valignment to top
-      # and remove the top line
-      document.body[i] = document.body[i].replace(' multirow="4" ', ' ')
-      document.body[i] = document.body[i].replace('valignment="middle"', 'valignment="top"')
-      document.body[i] = document.body[i].replace(' topline="true" ', ' ')
-      i = i + 1
-    if multirow == True:
-        add_to_preamble(document, ["% this command was inserted by lyx2lyx"])
-        add_to_preamble(document, ["\\usepackage{multirow}"])
+        # find begin/end of table
+        begin_table = find_token(document.body, '<lyxtabular version=', begin_table)
+        if begin_table == -1:
+            break
+        end_table = find_end_of(document.body, begin_table, '<lyxtabular', '</lyxtabular>')
+        if end_table == -1:
+            document.warning("Malformed LyX document: Could not find end of table.")
+            begin_table += 1
+            continue
+        # does this table have multirow?
+        i = find_token(document.body, '<cell multirow="3"', begin_table, end_table)
+        if i == -1:
+            begin_table = end_table
+            continue
+        
+        # store the number of rows and columns
+        numrows = get_option_value(document.body[begin_table], "rows")
+        numcols = get_option_value(document.body[begin_table], "columns")
+        try:
+          numrows = int(numrows)
+          numcols = int(numcols)
+        except:
+          document.warning(numrows)
+          document.warning("Unable to determine rows and columns!")
+          begin_table = end_table
+          continue
+
+        mrstarts = []
+        multirows = []
+        # collect info on rows and columns of this table.
+        begin_row = begin_table
+        for row in range(numrows):
+            begin_row = find_token(document.body, '<row>', begin_row, end_table)
+            if begin_row == -1:
+              document.warning("Can't find row " + str(row + 1))
+              break
+            end_row = find_end_of(document.body, begin_row, '<row>', '</row>')
+            if end_row == -1:
+              document.warning("Can't find end of row " + str(row + 1))
+              break
+            begin_cell = begin_row
+            multirows.append([])
+            for column in range(numcols):            
+                begin_cell = find_token(document.body, '<cell ', begin_cell, end_row)
+                if begin_cell == -1:
+                  document.warning("Can't find column " + str(column + 1) + \
+                    "in row " + str(row + 1))
+                  break
+                # NOTE 
+                # this will fail if someone puts "</cell>" in a cell, but
+                # that seems fairly unlikely.
+                end_cell = find_end_of(document.body, begin_cell, '<cell', '</cell>')
+                if end_cell == -1:
+                  document.warning("Can't find end of column " + str(column + 1) + \
+                    "in row " + str(row + 1))
+                  break
+                multirows[row].append([begin_cell, end_cell, 0])
+                if document.body[begin_cell].find('multirow="3"') != -1:
+                  multirows[row][column][2] = 3 # begin multirow
+                  mrstarts.append([row, column])
+                elif document.body[begin_cell].find('multirow="4"') != -1:
+                  multirows[row][column][2] = 4 # in multirow
+                begin_cell = end_cell
+            begin_row = end_row
+        # end of table info collection
+
+        # work from the back to avoid messing up numbering
+        mrstarts.reverse()
+        for m in mrstarts:
+            row = m[0]
+            col = m[1]
+            # get column width
+            col_width = get_option_value(document.body[begin_table + 2 + col], "width")
+            # "0pt" means that no width is specified
+            if not col_width or col_width == "0pt":
+              col_width = "*"
+            # determine the number of cells that are part of the multirow
+            nummrs = 1
+            for r in range(row + 1, numrows):
+                if multirows[r][col][2] != 4:
+                  break
+                nummrs += 1
+                # take the opportunity to revert this line
+                lineno = multirows[r][col][0]
+                document.body[lineno] = document.body[lineno].\
+                  replace(' multirow="4" ', ' ').\
+                  replace('valignment="middle"', 'valignment="top"').\
+                  replace(' topline="true" ', ' ')
+                # remove bottom line of previous multirow-part cell
+                lineno = multirows[r-1][col][0]
+                document.body[lineno] = document.body[lineno].replace(' bottomline="true" ', ' ')
+            # revert beginning cell
+            bcell = multirows[row][col][0]
+            ecell = multirows[row][col][1]
+            document.body[bcell] = document.body[bcell].\
+              replace(' multirow="3" ', ' ').\
+              replace('valignment="middle"', 'valignment="top"')
+            blay = find_token(document.body, "\\begin_layout", bcell, ecell)
+            if blay == -1:
+              document.warning("Can't find layout for cell!")
+              continue
+            bend = find_end_of_layout(document.body, blay)
+            if bend == -1:
+              document.warning("Can't find end of layout for cell!")
+              continue
+            # do the later one first, so as not to mess up the numbering
+            # we are wrapping the whole cell in this ert
+            # so before the end of the layout...
+            document.body[bend:bend] = put_cmd_in_ert("}")
+            # ...and after the beginning
+            document.body[blay + 1:blay + 1] = \
+              put_cmd_in_ert("\\multirow{" + str(nummrs) + "}{" + col_width + "}{")
+
+        begin_table = end_table
  
  
  def convert_math_output(document):
@@ -1359,8 +1097,8 @@ def convert_math_output(document):
      m = rgx.match(document.header[i])
      newval = "0" # MathML
      if m:
-      val = m.group(1)
-      if val != "true":
+      val = str2bool(m.group(1))
+      if not val:
          newval = "2" # Images
      else:
        document.warning("Can't match " + document.header[i])
@@ -1388,45 +1126,89 @@ def revert_math_output(document):
  def revert_inset_preview(document):
      " Dissolves the preview inset "
      i = 0
-    j = 0
-    k = 0
      while True:
        i = find_token(document.body, "\\begin_inset Preview", i)
        if i == -1:
            return
-      j = find_end_of_inset(document.body, i)
-      if j == -1:
+      iend = find_end_of_inset(document.body, i)
+      if iend == -1:
            document.warning("Malformed LyX document: Could not find end of Preview inset.")
-          return
-      #If the layout is Standard we need to remove it, otherwise there
-      #will be paragraph breaks that shouldn't be there.
-      k = find_token(document.body, "\\begin_layout Standard", i)
-      if k == i + 2:
-          del document.body[i:i + 3]
-          del document.body[j - 5:j - 2]
-          i -= 6
-      else:
+          i += 1
+          continue
+      
+      # This has several issues.
+      # We need to do something about the layouts inside InsetPreview.
+      # If we just leave the first one, then we have something like:
+      # \begin_layout Standard
+      # ...
+      # \begin_layout Standard
+      # and we get a "no \end_layout" error. So something has to be done.
+      # Ideally, we would check if it is the same as the layout we are in.
+      # If so, we just remove it; if not, we end the active one. But it is 
+      # not easy to know what layout we are in, due to depth changes, etc,
+      # and it is not clear to me how much work it is worth doing. In most
+      # cases, the layout will probably be the same.
+      # 
+      # For the same reason, we have to remove the \end_layout tag at the
+      # end of the last layout in the inset. Again, that will sometimes be
+      # wrong, but it will usually be right. To know what to do, we would
+      # again have to know what layout the inset is in.
+      
+      blay = find_token(document.body, "\\begin_layout", i, iend)
+      if blay == -1:
+          document.warning("Can't find layout for preview inset!")
+          # always do the later one first...
+          del document.body[iend]
            del document.body[i]
-          del document.body[j - 1]
-          i -= 2
+          # deletions mean we do not need to reset i
+          continue
+
+      # This is where we would check what layout we are in.
+      # The check for Standard is definitely wrong.
+      # 
+      # lay = document.body[blay].split(None, 1)[1]
+      # if lay != oldlayout:
+      #     # record a boolean to tell us what to do later....
+      #     # better to do it later, since (a) it won't mess up
+      #     # the numbering and (b) we only modify at the end.
+        
+      # we want to delete the last \\end_layout in this inset, too.
+      # note that this may not be the \\end_layout that goes with blay!!
+      bend = find_end_of_layout(document.body, blay)
+      while True:
+          tmp = find_token(document.body, "\\end_layout", bend + 1, iend)
+          if tmp == -1:
+              break
+          bend = tmp
+      if bend == blay:
+          document.warning("Unable to find last layout in preview inset!")
+          del document.body[iend]
+          del document.body[i]
+          # deletions mean we do not need to reset i
+          continue
+      # always do the later one first...
+      del document.body[iend]
+      del document.body[bend]
+      del document.body[i:blay + 1]
+      # we do not need to reset i
                  
  
  def revert_equalspacing_xymatrix(document):
      " Revert a Formula with xymatrix@! to an ERT inset "
      i = 0
-    j = 0
      has_preamble = False
      has_equal_spacing = False
+
      while True:
-      found = -1
        i = find_token(document.body, "\\begin_inset Formula", i)
        if i == -1:
            break
        j = find_end_of_inset(document.body, i)
        if j == -1:
            document.warning("Malformed LyX document: Could not find end of Formula inset.")
-          break
-          
+          i += 1
+          continue
+      
        for curline in range(i,j):
            found = document.body[curline].find("\\xymatrix@!")
            if found != -1:
@@ -1438,7 +1220,7 @@ def revert_equalspacing_xymatrix(document):
            content += document.body[i + 1:j]
            subst = put_cmd_in_ert(content)
            document.body[i:j + 1] = subst
-          i += len(subst)
+          i += len(subst) - (j - i) + 1
        else:
            for curline in range(i,j):
                l = document.body[curline].find("\\xymatrix")
@@ -1446,134 +1228,96 @@ def revert_equalspacing_xymatrix(document):
                    has_preamble = True;
                    break;
            i = j + 1
+  
      if has_equal_spacing and not has_preamble:
          add_to_preamble(document, ['\\usepackage[all]{xy}'])
  
  
  def revert_notefontcolor(document):
      " Reverts greyed-out note font color to preamble code "
-    i = 0
-    colorcode = ""
-    while True:
-      i = find_token(document.header, "\\notefontcolor", i)
-      if i == -1:
-          return
-      colorcode = get_value(document.header, '\\notefontcolor', 0)
-      del document.header[i]
-      # the color code is in the form #rrggbb where every character denotes a hex number
-      # convert the string to an int
-      red = string.atoi(colorcode[1:3],16)
-      # we want the output "0.5" for the value "127" therefore increment here
-      if red != 0:
-          red = red + 1
-      redout = float(red) / 256
-      green = string.atoi(colorcode[3:5],16)
-      if green != 0:
-          green = green + 1
-      greenout = float(green) / 256
-      blue = string.atoi(colorcode[5:7],16)
-      if blue != 0:
-          blue = blue + 1
-      blueout = float(blue) / 256
-      # write the preamble
-      insert_to_preamble(0, document,
-                           '% Commands inserted by lyx2lyx to set the font color\n'
-                           '% for greyed-out notes\n'
-                           + '\\@ifundefined{definecolor}{\\usepackage{color}}{}\n'
-                           + '\\definecolor{note_fontcolor}{rgb}{'
-                           + str(redout) + ', ' + str(greenout)
-                           + ', ' + str(blueout) + '}\n'
-                           + '\\renewenvironment{lyxgreyedout}\n'
-                           + ' {\\textcolor{note_fontcolor}\\bgroup}{\\egroup}\n')
+
+    i = find_token(document.header, "\\notefontcolor", 0)
+    if i == -1:
+        return
+
+    colorcode = get_value(document.header, '\\notefontcolor', i)
+    del document.header[i]
+
+    # are there any grey notes?
+    if find_token(document.body, "\\begin_inset Note Greyedout", 0) == -1:
+        # no need to do anything else, and \renewcommand will throw 
+        # an error since lyxgreyedout will not exist.
+        return
+
+    # the color code is in the form #rrggbb where every character denotes a hex number
+    red = hex2ratio(colorcode[1:3])
+    green = hex2ratio(colorcode[3:5])
+    blue = hex2ratio(colorcode[5:7])
+    # write the preamble
+    insert_to_preamble(document,
+      [ '%  for greyed-out notes',
+        '\\@ifundefined{definecolor}{\\usepackage{color}}{}'
+        '\\definecolor{note_fontcolor}{rgb}{%s,%s,%s}' % (red, green, blue),
+        '\\renewenvironment{lyxgreyedout}',
+        ' {\\textcolor{note_fontcolor}\\bgroup}{\\egroup}'])
  
  
  def revert_turkmen(document):
      "Set language Turkmen to English" 
-    i = 0 
+
      if document.language == "turkmen": 
          document.language = "english" 
          i = find_token(document.header, "\\language", 0) 
          if i != -1: 
              document.header[i] = "\\language english" 
+
      j = 0 
      while True: 
          j = find_token(document.body, "\\lang turkmen", j) 
          if j == -1: 
              return 
          document.body[j] = document.body[j].replace("\\lang turkmen", "\\lang english") 
-        j = j + 1 
+        j += 1 
  
  
  def revert_fontcolor(document):
      " Reverts font color to preamble code "
-    i = 0
-    colorcode = ""
-    while True:
-      i = find_token(document.header, "\\fontcolor", i)
-      if i == -1:
-          return
-      colorcode = get_value(document.header, '\\fontcolor', 0)
-      del document.header[i]
-      # don't clutter the preamble if backgroundcolor is not set
-      if colorcode == "#000000":
-          continue
-      # the color code is in the form #rrggbb where every character denotes a hex number
-      # convert the string to an int
-      red = string.atoi(colorcode[1:3],16)
-      # we want the output "0.5" for the value "127" therefore add here
-      if red != 0:
-          red = red + 1
-      redout = float(red) / 256
-      green = string.atoi(colorcode[3:5],16)
-      if green != 0:
-          green = green + 1
-      greenout = float(green) / 256
-      blue = string.atoi(colorcode[5:7],16)
-      if blue != 0:
-          blue = blue + 1
-      blueout = float(blue) / 256
-      # write the preamble
-      insert_to_preamble(0, document,
-                           '% Commands inserted by lyx2lyx to set the font color\n'
-                           + '\\@ifundefined{definecolor}{\\usepackage{color}}{}\n'
-                           + '\\definecolor{document_fontcolor}{rgb}{'
-                           + str(redout) + ', ' + str(greenout)
-                           + ', ' + str(blueout) + '}\n'
-                           + '\\color{document_fontcolor}\n')
+    i = find_token(document.header, "\\fontcolor", 0)
+    if i == -1:
+        return
+    colorcode = get_value(document.header, '\\fontcolor', i)
+    del document.header[i]
+    # don't clutter the preamble if font color is not set
+    if colorcode == "#000000":
+        return
+    # the color code is in the form #rrggbb where every character denotes a hex number
+    red = hex2ratio(colorcode[1:3])
+    green = hex2ratio(colorcode[3:5])
+    blue = hex2ratio(colorcode[5:7])
+    # write the preamble
+    insert_to_preamble(document,
+      ['%  Set the font color',
+      '\\@ifundefined{definecolor}{\\usepackage{color}}{}',
+      '\\definecolor{document_fontcolor}{rgb}{%s,%s,%s}' % (red, green, blue),
+      '\\color{document_fontcolor}'])
+
  
  def revert_shadedboxcolor(document):
      " Reverts shaded box color to preamble code "
-    i = 0
-    colorcode = ""
-    while True:
-      i = find_token(document.header, "\\boxbgcolor", i)
-      if i == -1:
-          return
-      colorcode = get_value(document.header, '\\boxbgcolor', 0)
-      del document.header[i]
-      # the color code is in the form #rrggbb where every character denotes a hex number
-      # convert the string to an int
-      red = string.atoi(colorcode[1:3],16)
-      # we want the output "0.5" for the value "127" therefore increment here
-      if red != 0:
-          red = red + 1
-      redout = float(red) / 256
-      green = string.atoi(colorcode[3:5],16)
-      if green != 0:
-          green = green + 1
-      greenout = float(green) / 256
-      blue = string.atoi(colorcode[5:7],16)
-      if blue != 0:
-          blue = blue + 1
-      blueout = float(blue) / 256
-      # write the preamble
-      insert_to_preamble(0, document,
-                           '% Commands inserted by lyx2lyx to set the color\n'
-                           '% of boxes with shaded background\n'
-                           + '\\@ifundefined{definecolor}{\\usepackage{color}}{}\n'
-                           + '\\definecolor{shadecolor}{rgb}{'
-                           + str(redout) + ', ' + str(greenout)
-                           + ', ' + str(blueout) + '}\n')
+    i = find_token(document.header, "\\boxbgcolor", 0)
+    if i == -1:
+        return
+    colorcode = get_value(document.header, '\\boxbgcolor', i)
+    del document.header[i]
+    # the color code is in the form #rrggbb
+    red = hex2ratio(colorcode[1:3])
+    green = hex2ratio(colorcode[3:5])
+    blue = hex2ratio(colorcode[5:7])
+    # write the preamble
+    insert_to_preamble(document,
+      ['%  Set the color of boxes with shaded background',
+      '\\@ifundefined{definecolor}{\\usepackage{color}}{}',
+      "\\definecolor{shadecolor}{rgb}{%s,%s,%s}" % (red, green, blue)])
  
  
  def revert_lyx_version(document):
@@ -1592,20 +1336,17 @@ def revert_lyx_version(document):
              return
          j = find_end_of_inset(document.body, i + 1)
          if j == -1:
-            # should not happen
              document.warning("Malformed LyX document: Could not find end of Info inset.")
+            i += 1
+            continue
+
          # We expect:
          # \begin_inset Info
          # type  "lyxinfo"
          # arg   "version"
          # \end_inset
-        # but we shall try to be forgiving.
-        arg = typ = ""
-        for k in range(i, j):
-            if document.body[k].startswith("arg"):
-                arg = document.body[k][3:].strip().strip('"')
-            if document.body[k].startswith("type"):
-                typ = document.body[k][4:].strip().strip('"')
+        typ = get_quoted_value(document.body, "type", i, j)
+        arg = get_quoted_value(document.body, "arg", i, j)
          if arg != "version" or typ != "lyxinfo":
              i = j + 1
              continue
@@ -1624,19 +1365,12 @@ def revert_lyx_version(document):
  
  def revert_math_scale(document):
    " Remove math scaling and LaTeX options "
-  i = find_token(document.header, '\\html_math_img_scale', 0)
-  if i != -1:
-    del document.header[i]
-  i = find_token(document.header, '\\html_latex_start', 0)
-  if i != -1:
-    del document.header[i]
-  i = find_token(document.header, '\\html_latex_end', 0)
-  if i != -1:
-    del document.header[i]
+  del_token(document.header, '\\html_math_img_scale', 0)
+  del_token(document.header, '\\html_latex_start', 0)
+  del_token(document.header, '\\html_latex_end', 0)
  
  
  def revert_pagesizes(document):
-  i = 0
    " Revert page sizes to default "
    i = find_token(document.header, '\\papersize', 0)
    if i != -1:
@@ -1650,7 +1384,6 @@ def revert_pagesizes(document):
  
  
  def revert_DIN_C_pagesizes(document):
-  i = 0
    " Revert DIN C page sizes to default "
    i = find_token(document.header, '\\papersize', 0)
    if i != -1:
@@ -1669,7 +1402,7 @@ def convert_html_quotes(document):
      line = document.header[i]
      l = re.compile(r'\\html_latex_start\s+"(.*)"')
      m = l.match(line)
-    if m != None:
+    if m:
        document.header[i] = "\\html_latex_start " + m.group(1)
        
    i = find_token(document.header, '\\html_latex_end', 0)
@@ -1677,7 +1410,7 @@ def convert_html_quotes(document):
      line = document.header[i]
      l = re.compile(r'\\html_latex_end\s+"(.*)"')
      m = l.match(line)
-    if m != None:
+    if m:
        document.header[i] = "\\html_latex_end " + m.group(1)
        
  
@@ -1689,126 +1422,58 @@ def revert_html_quotes(document):
      line = document.header[i]
      l = re.compile(r'\\html_latex_start\s+(.*)')
      m = l.match(line)
-    document.header[i] = "\\html_latex_start \"" + m.group(1) + "\""
+    if not m:
+        document.warning("Weird html_latex_start line: " + line)
+        del document.header[i]
+    else:
+        document.header[i] = "\\html_latex_start \"" + m.group(1) + "\""
        
    i = find_token(document.header, '\\html_latex_end', 0)
    if i != -1:
      line = document.header[i]
      l = re.compile(r'\\html_latex_end\s+(.*)')
      m = l.match(line)
-    document.header[i] = "\\html_latex_end \"" + m.group(1) + "\""
+    if not m:
+        document.warning("Weird html_latex_end line: " + line)
+        del document.header[i]
+    else:
+        document.header[i] = "\\html_latex_end \"" + m.group(1) + "\""
  
  
  def revert_output_sync(document):
    " Remove forward search options "
-  i = find_token(document.header, '\\output_sync_macro', 0)
-  if i != -1:
-    del document.header[i]
-  i = find_token(document.header, '\\output_sync', 0)
-  if i != -1:
-    del document.header[i]
-
-
-def convert_beamer_args(document):
-  " Convert ERT arguments in Beamer to InsetArguments "
-
-  if document.textclass != "beamer" and document.textclass != "article-beamer":
-    return
-  
-  layouts = ("Block", "ExampleBlock", "AlertBlock")
-  for layout in layouts:
-    blay = 0
-    while True:
-      blay = find_token(document.body, '\\begin_layout ' + layout, blay)
-      if blay == -1:
-        break
-      elay = find_end_of(document.body, blay, '\\begin_layout', '\\end_layout')
-      if elay == -1:
-        document.warning("Malformed LyX document: Can't find end of " + layout + " layout.")
-        blay += 1
-        continue
-      bert = find_token(document.body, '\\begin_inset ERT', blay)
-      if bert == -1:
-        document.warning("Malformed Beamer LyX document: Can't find argument of " + layout + " layout.")
-        blay = elay + 1
-        continue
-      eert = find_end_of_inset(document.body, bert)
-      if eert == -1:
-        document.warning("Malformed LyX document: Can't find end of ERT.")
-        blay = elay + 1
-        continue
-      
-      # So the ERT inset begins at line k and goes to line l. We now wrap it in 
-      # an argument inset.
-      # Do the end first, so as not to mess up the variables.
-      document.body[eert + 1:eert + 1] = ['', '\\end_layout', '', '\\end_inset', '']
-      document.body[bert:bert] = ['\\begin_inset OptArg', 'status open', '', 
-          '\\begin_layout Plain Layout']
-      blay = elay + 9
-
-
-def revert_beamer_args(document):
-  " Revert Beamer arguments to ERT "
-  
-  if document.textclass != "beamer" and document.textclass != "article-beamer":
-    return
-    
-  layouts = ("Block", "ExampleBlock", "AlertBlock")
-  for layout in layouts:
-    blay = 0
-    while True:
-      blay = find_token(document.body, '\\begin_layout ' + layout, blay)
-      if blay == -1:
-        break
-      elay = find_end_of(document.body, blay, '\\begin_layout', '\\end_layout')
-      if elay == -1:
-        document.warning("Malformed LyX document: Can't find end of " + layout + " layout.")
-        blay += 1
-        continue
-      bopt = find_token(document.body, '\\begin_inset OptArg', blay)
-      if bopt == -1:
-        # it is legal not to have one of these
-        blay = elay + 1
-        continue
-      eopt = find_end_of_inset(document.body, bopt)
-      if eopt == -1:
-        document.warning("Malformed LyX document: Can't find end of argument.")
-        blay = elay + 1
-        continue
-      bplay = find_token(document.body, '\\begin_layout Plain Layout', blay)
-      if bplay == -1:
-        document.warning("Malformed LyX document: Can't find plain layout.")
-        blay = elay + 1
-        continue
-      eplay = find_end_of(document.body, bplay, '\\begin_layout', '\\end_layout')
-      if eplay == -1:
-        document.warning("Malformed LyX document: Can't find end of plain layout.")
-        blay = elay + 1
-        continue
-      # So the content of the argument inset goes from bplay + 1 to eplay - 1
-      bcont = bplay + 1
-      if bcont >= eplay:
-        # Hmm.
-        document.warning(str(bcont) + " " + str(eplay))
-        blay = blay + 1
-        continue
-      # we convert the content of the argument into pure LaTeX...
-      content = lyx2latex(document, document.body[bcont:eplay])
-      strlist = put_cmd_in_ert(["{" + content + "}"])
-      
-      # now replace the optional argument with the ERT
-      document.body[bopt:eopt + 1] = strlist
-      blay = blay + 1
+  del_token(document.header, '\\output_sync_macro', 0)
+  del_token(document.header, '\\output_sync', 0)
  
  
  def revert_align_decimal(document):
-  l = 0
+  i = 0
    while True:
-    l = document.body[l].find('alignment=decimal')
-    if l == -1:
-        break
-    remove_option(document, l, 'decimal_point')
-    document.body[l].replace('decimal', 'center')
+    i = find_token(document.body, "\\begin_inset Tabular", i)
+    if i == -1:
+      return
+    j = find_end_of_inset(document.body, i)
+    if j == -1:
+      document.warning("Unable to find end of Tabular inset at line " + str(i))
+      i += 1
+      continue
+    cell = find_token(document.body, "<cell", i, j)
+    if cell == -1:
+      document.warning("Can't find any cells in Tabular inset at line " + str(i))
+      i = j
+      continue
+    k = i + 1
+    while True:
+      k = find_token(document.body, "<column", k, cell)
+      if k == -1:
+        return
+      if document.body[k].find('alignment="decimal"') == -1:
+        k += 1
+        continue
+      remove_option(document.body, k, 'decimal_point')
+      document.body[k] = \
+        document.body[k].replace('alignment="decimal"', 'alignment="center"')
+      k += 1
  
  
  def convert_optarg(document):
@@ -1837,75 +1502,84 @@ def revert_makebox(document):
    " Convert \\makebox to TeX code "
    i = 0
    while 1:
-    # only revert frameless boxes without an inner box
-    i = find_token(document.body, '\\begin_inset Box Frameless', i)
+    i = find_token(document.body, '\\begin_inset Box', i)
      if i == -1:
-      # remove the option use_makebox
-      revert_use_makebox(document)
-      return
+      break
      z = find_end_of_inset(document.body, i)
      if z == -1:
        document.warning("Malformed LyX document: Can't find end of box inset.")
-      return
-    j = find_token(document.body, 'use_makebox 1', i)
-    # assure we found the makebox of the current box
-    if j < z and j != -1:
-      y = find_token(document.body, "\\begin_layout", i)
-      if y > z or y == -1:
-        document.warning("Malformed LyX document: Can't find layout in box.")
-        return
-      # remove the \end_layout \end_inset pair
-      document.body[z - 2:z + 1] = put_cmd_in_ert("}")
-      # determine the alignment
-      k = find_token(document.body, 'hor_pos', j - 4)
-      align = document.body[k][9]
-      # determine the width
-      l = find_token(document.body, 'width "', j + 1)
-      length = document.body[l][7:]
-      # remove trailing '"'
-      length = length[:-1]
-      # latex_length returns "bool,length"
-      length = latex_length(length).split(",")[1]
-      subst = "\\makebox[" + length + "][" \
-        + align + "]{"
-      document.body[i:y + 1] = put_cmd_in_ert(subst)
+      i += 1
+      continue
+    blay = find_token(document.body, "\\begin_layout", i, z)
+    if blay == -1:
+      document.warning("Malformed LyX document: Can't find layout in box.")
+      i = z
+      continue
+    # by looking before the layout we make sure we're actually finding
+    # an option, not text.
+    j = find_token(document.body, 'use_makebox', i, blay)
+    if j == -1:
+        i = z
+        continue
+    
+    if not check_token(document.body[i], "\\begin_inset Box Frameless") \
+      or get_value(document.body, 'use_makebox', j) != 1:
+        del document.body[j]
+        i = z
+        continue
+    bend = find_end_of_layout(document.body, blay)
+    if bend == -1 or bend > z:
+        document.warning("Malformed LyX document: Can't find end of layout in box.")
+        i = z
+        continue
+    # determine the alignment
+    align = get_quoted_value(document.body, 'hor_pos', i, blay, "c")
+    # determine the width
+    length = get_quoted_value(document.body, 'width', i, blay, "50col%")
+    length = latex_length(length)[1]
+    # remove the \end_layout \end_inset pair
+    document.body[bend:z + 1] = put_cmd_in_ert("}")
+    subst = "\\makebox[" + length + "][" \
+      + align + "]{"
+    document.body[i:blay + 1] = put_cmd_in_ert(subst)
      i += 1
  
  
-def revert_use_makebox(document):
-  " Deletes use_makebox option of boxes "
-  h = 0
-  while 1:
-    # remove the option use_makebox
-    h = find_token(document.body, 'use_makebox', 0)
-    if h == -1:
-      return
-    del document.body[h]
-    h += 1
-
-
  def convert_use_makebox(document):
    " Adds use_makebox option for boxes "
    i = 0
    while 1:
-    # remove the option use_makebox
      i = find_token(document.body, '\\begin_inset Box', i)
      if i == -1:
        return
-    k = find_token(document.body, 'use_parbox', i)
+    # all of this is to make sure we actually find the use_parbox
+    # that is an option for this box, not some text elsewhere.
+    z = find_end_of_inset(document.body, i)
+    if z == -1:
+      document.warning("Can't find end of box inset!!")
+      i += 1
+      continue
+    blay = find_token(document.body, "\\begin_layout", i, z)
+    if blay == -1:
+      document.warning("Can't find layout in box inset!!")
+      i = z
+      continue
+    # so now we are looking for use_parbox before the box's layout
+    k = find_token(document.body, 'use_parbox', i, blay)
      if k == -1:
        document.warning("Malformed LyX document: Can't find use_parbox statement in box.")
-      return
+      i = z
+      continue
      document.body.insert(k + 1, "use_makebox 0")
-    i = k + 1
+    i = blay + 1 # not z + 1 (box insets may be nested)
  
  
  def revert_IEEEtran(document):
    " Convert IEEEtran layouts and styles to TeX code "
    if document.textclass != "IEEEtran":
      return
-  revert_flex_inset(document, "IEEE membership", "\\IEEEmembership", 0)
-  revert_flex_inset(document, "Lowercase", "\\MakeLowercase", 0)
+  revert_flex_inset(document.body, "IEEE membership", "\\IEEEmembership")
+  revert_flex_inset(document.body, "Lowercase", "\\MakeLowercase")
    layouts = ("Special Paper Notice", "After Title Text", "Publication ID",
               "Page headings", "Biography without photo")
    latexcmd = {"Special Paper Notice": "\\IEEEspecialpapernotice",
@@ -1919,7 +1593,7 @@ def revert_IEEEtran(document):
          i = find_token(document.body, '\\begin_layout ' + layout, i)
          if i == -1:
            break
-        j = find_end_of(document.body, i, '\\begin_layout', '\\end_layout')
+        j = find_end_of_layout(document.body, i)
          if j == -1:
            document.warning("Malformed LyX document: Can't find end of " + layout + " layout.")
            i += 1
@@ -1927,10 +1601,11 @@ def revert_IEEEtran(document):
          if layout in obsoletedby:
            document.body[i] = "\\begin_layout " + obsoletedby[layout]
            i = j
-        else:
-          content = lyx2latex(document, document.body[i:j + 1])
-          add_to_preamble(document, [latexcmd[layout] + "{" + content + "}"])
-          del document.body[i:j + 1]
+          continue
+        content = lyx2latex(document, document.body[i:j + 1])
+        add_to_preamble(document, [latexcmd[layout] + "{" + content + "}"])
+        del document.body[i:j + 1]
+        # no need to reset i
  
  
  def convert_prettyref(document):
@@ -1948,8 +1623,8 @@ def convert_prettyref(document):
                         document.warning("Malformed LyX document: No end of InsetRef!")
                         i += 1
                         continue
-               k = find_token(document.body, "LatexCommand prettyref", i)
-               if k != -1 and k < j:
+               k = find_token(document.body, "LatexCommand prettyref", i, j)
+               if k != -1:
                         document.body[k] = "LatexCommand formatted"
                 i = j + 1
         document.header.insert(-1, "\\use_refstyle 0")
@@ -1970,8 +1645,8 @@ def revert_refstyle(document):
                         document.warning("Malformed LyX document: No end of InsetRef")
                         i += 1
                         continue
-               k = find_token(document.body, "LatexCommand formatted", i)
-               if k != -1 and k < j:
+               k = find_token(document.body, "LatexCommand formatted", i, j)
+               if k != -1:
                         document.body[k] = "LatexCommand prettyref"
                 i = j + 1
         i = find_token(document.header, "\\use_refstyle", 0)
@@ -1996,23 +1671,16 @@ def revert_nameref(document):
        cmdloc = i
        i += 1
        # Make sure it is actually in an inset!
-      # We could just check document.lines[i-1], but that relies
-      # upon something that might easily change.
-      # We'll look back a few lines.
-      stins = cmdloc - 10
-      if stins < 0:
-        stins = 0
-      stins = find_token(document.body, "\\begin_inset CommandInset ref", stins)
-      if stins == -1 or stins > cmdloc:
-        continue
-      endins = find_end_of_inset(document.body, stins)
-      if endins == -1:
-        document.warning("Can't find end of inset at line " + stins + "!!")
-        continue
-      if endins < cmdloc:
-        continue
-      refline = find_token(document.body, "reference", stins)
-      if refline == -1 or refline > endins:
+      # A normal line could begin with "LatexCommand nameref"!
+      val = is_in_inset(document.body, cmdloc, \
+          "\\begin_inset CommandInset ref")
+      if not val:
+          continue
+      stins, endins = val
+
+      # ok, so it is in an InsetRef
+      refline = find_token(document.body, "reference", stins, endins)
+      if refline == -1:
          document.warning("Can't find reference for inset at line " + stinst + "!!")
          continue
        m = rx.match(document.body[refline])
@@ -2021,12 +1689,11 @@ def revert_nameref(document):
          continue
        foundone = True
        ref = m.group(1)
-      newcontent = ['\\begin_inset ERT', 'status collapsed', '', \
-        '\\begin_layout Plain Layout', '', '\\backslash', \
-        cmd + '{' + ref + '}', '\\end_layout', '', '\\end_inset']
+      newcontent = put_cmd_in_ert('\\' + cmd + '{' + ref + '}')
        document.body[stins:endins + 1] = newcontent
+
    if foundone:
-    add_to_preamble(document, "\usepackage{nameref}")
+    add_to_preamble(document, ["\usepackage{nameref}"])
  
  
  def remove_Nameref(document):
@@ -2042,20 +1709,9 @@ def remove_Nameref(document):
      i += 1
      
      # Make sure it is actually in an inset!
-    # We could just check document.lines[i-1], but that relies
-    # upon something that might easily change.
-    # We'll look back a few lines.
-    stins = cmdloc - 10
-    if stins < 0:
-      stins = 0
-    stins = find_token(document.body, "\\begin_inset CommandInset ref", stins)
-    if stins == -1 or stins > cmdloc:
-      continue
-    endins = find_end_of_inset(document.body, stins)
-    if endins == -1:
-      document.warning("Can't find end of inset at line " + stins + "!!")
-      continue
-    if endins < cmdloc:
+    val = is_in_inset(document.body, cmdloc, \
+        "\\begin_inset CommandInset ref")
+    if not val:
        continue
      document.body[cmdloc] = "LatexCommand nameref"
  
@@ -2063,16 +1719,10 @@ def remove_Nameref(document):
  def revert_mathrsfs(document):
      " Load mathrsfs if \mathrsfs us use in the document "
      i = 0
-    end = len(document.body) - 1
-    while True:
-      j = document.body[i].find("\\mathscr{")
-      if j != -1:
-        add_to_preamble(document, ["% this command was inserted by lyx2lyx"])
+    for line in document.body:
+      if line.find("\\mathscr{") != -1:
          add_to_preamble(document, ["\\usepackage{mathrsfs}"])
-        break
-      if i == end:
-        break
-      i += 1
+        return
  
  
  def convert_flexnames(document):
@@ -2090,66 +1740,66 @@ def convert_flexnames(document):
        i += 1
  
  
-flex_insets = [
-  ["Alert", "CharStyle:Alert"],
-  ["Code", "CharStyle:Code"],
-  ["Concepts", "CharStyle:Concepts"],
-  ["E-Mail", "CharStyle:E-Mail"],
-  ["Emph", "CharStyle:Emph"],
-  ["Expression", "CharStyle:Expression"],
-  ["Initial", "CharStyle:Initial"],
-  ["Institute", "CharStyle:Institute"],
-  ["Meaning", "CharStyle:Meaning"],
-  ["Noun", "CharStyle:Noun"],
-  ["Strong", "CharStyle:Strong"],
-  ["Structure", "CharStyle:Structure"],
-  ["ArticleMode", "Custom:ArticleMode"],
-  ["Endnote", "Custom:Endnote"],
-  ["Glosse", "Custom:Glosse"],
-  ["PresentationMode", "Custom:PresentationMode"],
-  ["Tri-Glosse", "Custom:Tri-Glosse"]
-]
-
-flex_elements = [
-  ["Abbrev", "Element:Abbrev"],
-  ["CCC-Code", "Element:CCC-Code"],
-  ["Citation-number", "Element:Citation-number"],
-  ["City", "Element:City"],
-  ["Code", "Element:Code"],
-  ["CODEN", "Element:CODEN"],
-  ["Country", "Element:Country"],
-  ["Day", "Element:Day"],
-  ["Directory", "Element:Directory"],
-  ["Dscr", "Element:Dscr"],
-  ["Email", "Element:Email"],
-  ["Emph", "Element:Emph"],
-  ["Filename", "Element:Filename"],
-  ["Firstname", "Element:Firstname"],
-  ["Fname", "Element:Fname"],
-  ["GuiButton", "Element:GuiButton"],
-  ["GuiMenu", "Element:GuiMenu"],
-  ["GuiMenuItem", "Element:GuiMenuItem"],
-  ["ISSN", "Element:ISSN"],
-  ["Issue-day", "Element:Issue-day"],
-  ["Issue-months", "Element:Issue-months"],
-  ["Issue-number", "Element:Issue-number"],
-  ["KeyCap", "Element:KeyCap"],
-  ["KeyCombo", "Element:KeyCombo"],
-  ["Keyword", "Element:Keyword"],
-  ["Literal", "Element:Literal"],
-  ["MenuChoice", "Element:MenuChoice"],
-  ["Month", "Element:Month"],
-  ["Orgdiv", "Element:Orgdiv"],
-  ["Orgname", "Element:Orgname"],
-  ["Postcode", "Element:Postcode"],
-  ["SS-Code", "Element:SS-Code"],
-  ["SS-Title", "Element:SS-Title"],
-  ["State", "Element:State"],
-  ["Street", "Element:Street"],
-  ["Surname", "Element:Surname"],
-  ["Volume", "Element:Volume"],
-  ["Year", "Element:Year"]
-]
+flex_insets = {
+  "Alert" : "CharStyle:Alert",
+  "Code" : "CharStyle:Code",
+  "Concepts" : "CharStyle:Concepts",
+  "E-Mail" : "CharStyle:E-Mail",
+  "Emph" : "CharStyle:Emph",
+  "Expression" : "CharStyle:Expression",
+  "Initial" : "CharStyle:Initial",
+  "Institute" : "CharStyle:Institute",
+  "Meaning" : "CharStyle:Meaning",
+  "Noun" : "CharStyle:Noun",
+  "Strong" : "CharStyle:Strong",
+  "Structure" : "CharStyle:Structure",
+  "ArticleMode" : "Custom:ArticleMode",
+  "Endnote" : "Custom:Endnote",
+  "Glosse" : "Custom:Glosse",
+  "PresentationMode" : "Custom:PresentationMode",
+  "Tri-Glosse" : "Custom:Tri-Glosse"
+}
+
+flex_elements = {
+  "Abbrev" : "Element:Abbrev",
+  "CCC-Code" : "Element:CCC-Code",
+  "Citation-number" : "Element:Citation-number",
+  "City" : "Element:City",
+  "Code" : "Element:Code",
+  "CODEN" : "Element:CODEN",
+  "Country" : "Element:Country",
+  "Day" : "Element:Day",
+  "Directory" : "Element:Directory",
+  "Dscr" : "Element:Dscr",
+  "Email" : "Element:Email",
+  "Emph" : "Element:Emph",
+  "Filename" : "Element:Filename",
+  "Firstname" : "Element:Firstname",
+  "Fname" : "Element:Fname",
+  "GuiButton" : "Element:GuiButton",
+  "GuiMenu" : "Element:GuiMenu",
+  "GuiMenuItem" : "Element:GuiMenuItem",
+  "ISSN" : "Element:ISSN",
+  "Issue-day" : "Element:Issue-day",
+  "Issue-months" : "Element:Issue-months",
+  "Issue-number" : "Element:Issue-number",
+  "KeyCap" : "Element:KeyCap",
+  "KeyCombo" : "Element:KeyCombo",
+  "Keyword" : "Element:Keyword",
+  "Literal" : "Element:Literal",
+  "MenuChoice" : "Element:MenuChoice",
+  "Month" : "Element:Month",
+  "Orgdiv" : "Element:Orgdiv",
+  "Orgname" : "Element:Orgname",
+  "Postcode" : "Element:Postcode",
+  "SS-Code" : "Element:SS-Code",
+  "SS-Title" : "Element:SS-Title",
+  "State" : "Element:State",
+  "Street" : "Element:Street",
+  "Surname" : "Element:Surname",
+  "Volume" : "Element:Volume",
+  "Year" : "Element:Year"
+}
  
  
  def revert_flexnames(document):
@@ -2169,100 +1819,116 @@ def revert_flexnames(document):
        document.warning("Illegal flex inset: " + document.body[i])
        i += 1
        continue
-    
      style = m.group(1)
-    for f in flexlist:
-      if f[0] == style:
-        document.body[i] = "\\begin_inset Flex " + f[1]
-        break
-
+    if style in flexlist:
+      document.body[i] = "\\begin_inset Flex " + flexlist[style]
      i += 1
  
  
  def convert_mathdots(document):
      " Load mathdots automatically "
-    while True:
+    i = find_token(document.header, "\\use_mhchem" , 0)
+    if i == -1:
        i = find_token(document.header, "\\use_esint" , 0)
-      if i != -1:
-        document.header.insert(i + 1, "\\use_mathdots 1")
-      break
+    if i != -1:
+      document.header.insert(i + 1, "\\use_mathdots 1")
  
  
  def revert_mathdots(document):
      " Load mathdots if used in the document "
-    i = 0
-    ddots = re.compile(r'\\begin_inset Formula .*\\ddots', re.DOTALL)
-    vdots = re.compile(r'\\begin_inset Formula .*\\vdots', re.DOTALL)
-    iddots = re.compile(r'\\begin_inset Formula .*\\iddots', re.DOTALL)
+
      mathdots = find_token(document.header, "\\use_mathdots" , 0)
-    no = find_token(document.header, "\\use_mathdots 0" , 0)
-    auto = find_token(document.header, "\\use_mathdots 1" , 0)
-    yes = find_token(document.header, "\\use_mathdots 2" , 0)
-    if mathdots != -1:
+    if mathdots == -1:
+      document.warning("No \\usemathdots line. Assuming auto.")
+    else:
+      val = get_value(document.header, "\\use_mathdots", mathdots)
        del document.header[mathdots]
+      try:
+        usedots = int(val)
+      except:
+        document.warning("Invalid \\use_mathdots value: " + val + ". Assuming auto.")
+        # probably usedots has not been changed, but be safe.
+        usedots = 1
+
+      if usedots == 0:
+        # do not load case
+        return
+      if usedots == 2:
+        # force load case
+        add_to_preamble(["\\usepackage{mathdots}"])
+        return
+    
+    # so we are in the auto case. we want to load mathdots if \iddots is used.
+    i = 0
      while True:
        i = find_token(document.body, '\\begin_inset Formula', i)
        if i == -1:
          return
        j = find_end_of_inset(document.body, i)
        if j == -1:
-        document.warning("Malformed LyX document: Can't find end of Formula inset.")
-        return 
-      k = ddots.search("\n".join(document.body[i:j]))
-      l = vdots.search("\n".join(document.body[i:j]))
-      m = iddots.search("\n".join(document.body[i:j]))
-      if (yes == -1) and ((no != -1) or (not k and not l and not m) or (auto != -1 and not m)):
+        document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(i))
          i += 1
          continue
-      # use \@ifundefined to catch also the "auto" case
-      add_to_preamble(document, ["% this command was inserted by lyx2lyx"])
-      add_to_preamble(document, ["\\@ifundefined{iddots}{\\usepackage{mathdots}}\n"])
-      return
+      code = "\n".join(document.body[i:j])
+      if code.find("\\iddots") != -1:
+        add_to_preamble(document, ["\\@ifundefined{iddots}{\\usepackage{mathdots}}"])
+        return
+      i = j
  
  
  def convert_rule(document):
-    " Convert \\lyxline to CommandInset line "
+    " Convert \\lyxline to CommandInset line. "
      i = 0
+    
+    inset = ['\\begin_inset CommandInset line',
+      'LatexCommand rule',
+      'offset "0.5ex"',
+      'width "100line%"',
+      'height "1pt"', '',
+      '\\end_inset', '', '']
+
+    # if paragraphs are indented, we may have to unindent to get the
+    # line to be full-width.
+    indent = get_value(document.header, "\\paragraph_separation", 0)
+    have_indent = (indent == "indent")
+
      while True:
        i = find_token(document.body, "\\lyxline" , i)
        if i == -1:
          return
-        
-      j = find_token(document.body, "\\color" , i - 2)
-      if j == i - 2:
-        color = document.body[j] + '\n'
-      else:
-        color = ''
-      k = find_token(document.body, "\\begin_layout Standard" , i - 4)
-      # we need to handle the case that \lyxline is in a separate paragraph and that it is colored
-      # the result is then an extra empty paragraph which we get by adding an empty ERT inset
-      if k == i - 4 and j == i - 2 and document.body[i - 1] == '':
-        layout = '\\begin_inset ERT\nstatus collapsed\n\n\\begin_layout Plain Layout\n\n\n\\end_layout\n\n\\end_inset\n' \
-          + '\\end_layout\n\n' \
-          + '\\begin_layout Standard\n'
-      elif k == i - 2 and document.body[i - 1] == '':
-        layout = ''
-      else:
-        layout = '\\end_layout\n\n' \
-          + '\\begin_layout Standard\n'
-      l = find_token(document.body, "\\begin_layout Standard" , i + 4)
-      if l == i + 4 and document.body[i + 1] == '':
-        layout2 = ''
+
+      # we need to find out if this line follows other content
+      # in its paragraph. find its layout....
+      lastlay = find_token_backwards(document.body, "\\begin_layout", i)
+      if lastlay == -1:
+        document.warning("Can't find layout for line at " + str(i))
+        # do the best we can.
+        document.body[i:i+1] = inset
+        i += len(inset)
+        continue
+
+      # ...and look for other content before it.
+      lineisfirst = True
+      for line in document.body[lastlay + 1:i]:
+        # is it empty or a paragraph option?
+        if not line or line[0] == '\\':
+          continue
+        lineisfirst = False
+        break
+
+      if lineisfirst:
+        document.body[i:i+1] = inset
+        if indent:
+          # we need to unindent, lest the line be too long
+          document.body.insert(lastlay + 1, "\\noindent")
+        i += len(inset)
        else:
-        layout2 = '\\end_layout\n' \
-          + '\n\\begin_layout Standard\n'
-      subst = layout \
-        + '\\noindent\n\n' \
-        + color \
-        + '\\begin_inset CommandInset line\n' \
-        + 'LatexCommand rule\n' \
-        + 'offset "0.5ex"\n' \
-        + 'width "100line%"\n' \
-        + 'height "1pt"\n' \
-        + '\n\\end_inset\n\n\n' \
-        + layout2
-      document.body[i] = subst
-      i += 1
+        # so our line is in the middle of a paragraph
+        # we need to add a new line, lest this line follow the
+        # other content on that line and run off the side of the page
+        document.body[i:i+1] = inset
+        document.body[i:i] = ["\\begin_inset Newline newline", "\\end_inset", ""]
+      i += len(inset)
  
  
  def revert_rule(document):
@@ -2274,41 +1940,28 @@ def revert_rule(document):
          return
        # find end of inset
        j = find_token(document.body, "\\end_inset" , i)
-      # assure we found the end_inset of the current inset
-      if j > i + 6 or j == -1:
+      if j == -1:
          document.warning("Malformed LyX document: Can't find end of line inset.")
          return
        # determine the optional offset
-      k = find_token(document.body, 'offset', i, j)
-      if k != -1:
-        offset = document.body[k][8:-1]
-      else:
-        offset = ""
+      offset = get_quoted_value(document.body, 'offset', i, j)
+      if offset:
+        offset = '[' + offset + ']'
        # determine the width
-      l = find_token(document.body, 'width', i, j)
-      if l != -1:
-        width = document.body[l][7:-1]
-      else:
-        width = "100col%"
+      width = get_quoted_value(document.body, 'width', i, j, "100col%")
+      width = latex_length(width)[1]
        # determine the height
-      m = find_token(document.body, 'height', i, j)
-      if m != -1:
-        height = document.body[m][8:-1]
-      else:
-        height = "1pt"
+      height = get_quoted_value(document.body, 'height', i, j, "1pt")
+      height = latex_length(height)[1]
        # output the \rule command
-      if offset:
-        subst = "\\rule[" + offset + "]{" + width + "}{" + height + "}"
-      else:
-        subst = "\\rule{" + width + "}{" + height + "}"
+      subst = "\\rule[" + offset + "]{" + width + "}{" + height + "}"
        document.body[i:j + 1] = put_cmd_in_ert(subst)
-      i += 1
+      i += len(subst) - (j - i)
  
  
  def revert_diagram(document):
    " Add the feyn package if \\Diagram is used in math "
    i = 0
-  re_diagram = re.compile(r'\\begin_inset Formula .*\\Diagram', re.DOTALL)
    while True:
      i = find_token(document.body, '\\begin_inset Formula', i)
      if i == -1:
@@ -2317,12 +1970,11 @@ def revert_diagram(document):
      if j == -1:
          document.warning("Malformed LyX document: Can't find end of Formula inset.")
          return 
-    m = re_diagram.search("\n".join(document.body[i:j]))
-    if not m:
-      i += 1
+    lines = "\n".join(document.body[i:j])
+    if lines.find("\\Diagram") == -1:
+      i = j
        continue
-    add_to_preamble(document, ["% this command was inserted by lyx2lyx"])
-    add_to_preamble(document, "\\usepackage{feyn}")
+    add_to_preamble(document, ["\\usepackage{feyn}"])
      # only need to do it once!
      return
  
@@ -2331,10 +1983,21 @@ def convert_bibtex_clearpage(document):
    " insert a clear(double)page bibliographystyle if bibtotoc option is used "
  
    i = find_token(document.header, '\\papersides', 0)
+  sides = 0
    if i == -1:
      document.warning("Malformed LyX document: Can't find papersides definition.")
-    return
-  sides = int(document.header[i][12])
+    document.warning("Assuming single sided.")
+    sides = 1
+  else:
+    val = get_value(document.header, "\\papersides", i)
+    try:
+      sides = int(val)
+    except:
+      pass
+    if sides != 1 and sides != 2:
+      document.warning("Invalid papersides value: " + val)
+      document.warning("Assuming single sided.")
+      sides = 1
  
    j = 0
    while True:
@@ -2349,47 +2012,389 @@ def convert_bibtex_clearpage(document):
        continue
  
      # only act if there is the option "bibtotoc"
-    m = find_token(document.body, 'options', j, k)
-    if m == -1:
+    val = get_value(document.body, 'options', j, k)
+    if not val:
        document.warning("Can't find options for bibliography inset at line " + str(j))
        j = k
        continue
      
-    optline = document.body[m]
-    idx = optline.find("bibtotoc")
-    if idx == -1:
+    if val.find("bibtotoc") == -1:
        j = k
        continue
      
      # so we want to insert a new page right before the paragraph that
-    # this bibliography thing is in. we'll look for it backwards.
-    lay = j - 1
-    while lay >= 0:
-      if document.body[lay].startswith("\\begin_layout"):
-        break
-      lay -= 1
-
-    if lay < 0:
+    # this bibliography thing is in. 
+    lay = find_token_backwards(document.body, "\\begin_layout", j)
+    if lay == -1:
        document.warning("Can't find layout containing bibliography inset at line " + str(j))
        j = k
        continue
  
-    subst1 = '\\begin_layout Standard\n' \
-      + '\\begin_inset Newpage clearpage\n' \
-      + '\\end_inset\n\n\n' \
-      + '\\end_layout\n'
-    subst2 = '\\begin_layout Standard\n' \
-      + '\\begin_inset Newpage cleardoublepage\n' \
-      + '\\end_inset\n\n\n' \
-      + '\\end_layout\n'
      if sides == 1:
-      document.body.insert(lay, subst1)
-      document.warning(subst1)
+      cmd = "clearpage"
      else:
-      document.body.insert(lay, subst2)
-      document.warning(subst2)
+      cmd = "cleardoublepage"
+    subst = ['\\begin_layout Standard',
+        '\\begin_inset Newpage ' + cmd,
+        '\\end_inset', '', '',
+        '\\end_layout', '']
+    document.body[lay:lay] = subst
+    j = k + len(subst)
+
+
+def check_passthru(document):
+  tc = document.textclass
+  ok = (tc == "literate-article" or tc == "literate-book" or tc == "literate-report")
+  if not ok:
+    mods = document.get_module_list()
+    for mod in mods:
+      if mod == "sweave" or mod == "noweb":
+        ok = True
+        break
+  return ok
+
+
+def convert_passthru(document):
+    " http://www.mail-archive.com/lyx-devel@lists.lyx.org/msg161298.html "
+    if not check_passthru:
+      return
+    
+    rx = re.compile("\\\\begin_layout \s*(\w+)")
+    beg = 0
+    for lay in ["Chunk", "Scrap"]:
+      while True:
+        beg = find_token(document.body, "\\begin_layout " + lay, beg)
+        if beg == -1:
+          break
+        end = find_end_of_layout(document.body, beg)
+        if end == -1:
+          document.warning("Can't find end of layout at line " + str(beg))
+          beg += 1
+          continue
+        # we are now going to replace newline insets within this layout
+        # by new instances of this layout. so we have repeated layouts
+        # instead of newlines.
+        ns = beg
+        while True:
+          ns = find_token(document.body, "\\begin_inset Newline newline", ns, end)
+          if ns == -1:
+            break
+          ne = find_end_of_inset(document.body, ns)
+          if ne == -1 or ne > end:
+            document.warning("Can't find end of inset at line " + str(nb))
+            ns += 1
+            continue
+          if document.body[ne + 1] == "":
+            ne += 1
+          subst = ["\\end_layout", "", "\\begin_layout " + lay]
+          document.body[ns:ne + 1] = subst
+          # now we need to adjust end, in particular, but might as well
+          # do ns properly, too
+          newlines = (ne - ns) - len(subst)
+          ns += newlines + 2
+          end += newlines + 1
+        # ok, we now want to find out if the next layout is the
+        # same as this one. if so, we will insert an extra copy of it
+        didit = False
+        next = find_token(document.body, "\\begin_layout", end)
+        if next != -1:
+          m = rx.match(document.body[next])
+          if m:
+            nextlay = m.group(1)
+            if nextlay == lay:
+              subst = ["\\begin_layout " + lay, "", "\\end_layout", ""]
+              document.body[next:next] = subst
+              didit = True
+        beg = end + 1
+        if didit:
+          beg += 4 # for the extra layout
+    
+
+def revert_passthru(document):
+    " http://www.mail-archive.com/lyx-devel@lists.lyx.org/msg161298.html "
+    if not check_passthru:
+      return
+    rx = re.compile("\\\\begin_layout \s*(\w+)")
+    beg = 0
+    for lay in ["Chunk", "Scrap"]:
+      while True:
+        beg = find_token(document.body, "\\begin_layout " + lay, beg)
+        if beg == -1:
+          break
+        end = find_end_of_layout(document.body, beg)
+        if end == -1:
+          document.warning("Can't find end of layout at line " + str(beg))
+          beg += 1
+          continue
+        
+        # we now want to find out if the next layout is the
+        # same as this one. but we will need to do this over and
+        # over again.
+        while True:
+          next = find_token(document.body, "\\begin_layout", end)
+          if next == -1:
+            break
+          m = rx.match(document.body[next])
+          if not m:
+            break
+          nextlay = m.group(1)
+          if nextlay != lay:
+            break
+          # so it is the same layout again. we now want to know if it is empty.
+          # but first let's check and make sure there is no content between the
+          # two layouts. i'm not sure if that can happen or not.
+          for l in range(end + 1, next):
+            document.warning("c'" + document.body[l] + "'")
+            if document.body[l] != "":
+              document.warning("Found content between adjacent " + lay + " layouts!")
+              break
+          nextend = find_end_of_layout(document.body, next)
+          if nextend == -1:
+            document.warning("Can't find end of layout at line " + str(next))
+            break
+          empty = True
+          for l in range(next + 1, nextend):
+            document.warning("e'" + document.body[l] + "'")
+            if document.body[l] != "":
+              empty = False
+              break
+          if empty:
+            # empty layouts just get removed
+            # should we check if it's before yet another such layout?
+            del document.body[next : nextend + 1]
+            # and we do not want to check again. we know the next layout
+            # should be another Chunk and should be left as is.
+            break
+          else:
+            # if it's not empty, then we want to insert a newline in place
+            # of the layout switch
+            subst = ["\\begin_inset Newline newline", "\\end_inset", ""]
+            document.body[end : next + 1] = subst
+            # and now we have to find the end of the new, larger layout
+            newend = find_end_of_layout(document.body, beg)
+            if newend == -1:
+              document.warning("Can't find end of new layout at line " + str(beg))
+              break
+            end = newend
+        beg = end + 1
+
+
+def revert_multirowOffset(document):
+    " Revert multirow cells with offset in tables to TeX-code"
+    # this routine is the same as the revert_multirow routine except that
+    # it checks additionally for the offset
+
+    # first, let's find out if we need to do anything
+    i = find_token(document.body, '<cell multirow="3" mroffset=', 0)
+    if i == -1:
+      return
+
+    add_to_preamble(document, ["\\usepackage{multirow}"])
+
+    rgx = re.compile(r'mroffset="[^"]+?"')
+    begin_table = 0
+
+    while True:
+        # find begin/end of table
+        begin_table = find_token(document.body, '<lyxtabular version=', begin_table)
+        if begin_table == -1:
+            break
+        end_table = find_end_of(document.body, begin_table, '<lyxtabular', '</lyxtabular>')
+        if end_table == -1:
+            document.warning("Malformed LyX document: Could not find end of table.")
+            begin_table += 1
+            continue
+        # does this table have multirow?
+        i = find_token(document.body, '<cell multirow="3"', begin_table, end_table)
+        if i == -1:
+            begin_table = end_table
+            continue
+        
+        # store the number of rows and columns
+        numrows = get_option_value(document.body[begin_table], "rows")
+        numcols = get_option_value(document.body[begin_table], "columns")
+        try:
+          numrows = int(numrows)
+          numcols = int(numcols)
+        except:
+          document.warning(numrows)
+          document.warning("Unable to determine rows and columns!")
+          begin_table = end_table
+          continue
+
+        mrstarts = []
+        multirows = []
+        # collect info on rows and columns of this table.
+        begin_row = begin_table
+        for row in range(numrows):
+            begin_row = find_token(document.body, '<row>', begin_row, end_table)
+            if begin_row == -1:
+              document.warning("Can't find row " + str(row + 1))
+              break
+            end_row = find_end_of(document.body, begin_row, '<row>', '</row>')
+            if end_row == -1:
+              document.warning("Can't find end of row " + str(row + 1))
+              break
+            begin_cell = begin_row
+            multirows.append([])
+            for column in range(numcols):            
+                begin_cell = find_token(document.body, '<cell ', begin_cell, end_row)
+                if begin_cell == -1:
+                  document.warning("Can't find column " + str(column + 1) + \
+                    "in row " + str(row + 1))
+                  break
+                # NOTE 
+                # this will fail if someone puts "</cell>" in a cell, but
+                # that seems fairly unlikely.
+                end_cell = find_end_of(document.body, begin_cell, '<cell', '</cell>')
+                if end_cell == -1:
+                  document.warning("Can't find end of column " + str(column + 1) + \
+                    "in row " + str(row + 1))
+                  break
+                multirows[row].append([begin_cell, end_cell, 0])
+                if document.body[begin_cell].find('multirow="3" mroffset=') != -1:
+                  multirows[row][column][2] = 3 # begin multirow
+                  mrstarts.append([row, column])
+                elif document.body[begin_cell].find('multirow="4"') != -1:
+                  multirows[row][column][2] = 4 # in multirow
+                begin_cell = end_cell
+            begin_row = end_row
+        # end of table info collection
+
+        # work from the back to avoid messing up numbering
+        mrstarts.reverse()
+        for m in mrstarts:
+            row = m[0]
+            col = m[1]
+            # get column width
+            col_width = get_option_value(document.body[begin_table + 2 + col], "width")
+            # "0pt" means that no width is specified
+            if not col_width or col_width == "0pt":
+              col_width = "*"
+            # determine the number of cells that are part of the multirow
+            nummrs = 1
+            for r in range(row + 1, numrows):
+                if multirows[r][col][2] != 4:
+                  break
+                nummrs += 1
+                # take the opportunity to revert this line
+                lineno = multirows[r][col][0]
+                document.body[lineno] = document.body[lineno].\
+                  replace(' multirow="4" ', ' ').\
+                  replace('valignment="middle"', 'valignment="top"').\
+                  replace(' topline="true" ', ' ')
+                # remove bottom line of previous multirow-part cell
+                lineno = multirows[r-1][col][0]
+                document.body[lineno] = document.body[lineno].replace(' bottomline="true" ', ' ')
+            # revert beginning cell
+            bcell = multirows[row][col][0]
+            ecell = multirows[row][col][1]
+            offset = get_option_value(document.body[bcell], "mroffset")
+            document.body[bcell] = document.body[bcell].\
+              replace(' multirow="3" ', ' ').\
+              replace('valignment="middle"', 'valignment="top"')
+            # remove mroffset option
+            document.body[bcell] = rgx.sub('', document.body[bcell])
+            
+            blay = find_token(document.body, "\\begin_layout", bcell, ecell)
+            if blay == -1:
+              document.warning("Can't find layout for cell!")
+              continue
+            bend = find_end_of_layout(document.body, blay)
+            if bend == -1:
+              document.warning("Can't find end of layout for cell!")
+              continue
+            # do the later one first, so as not to mess up the numbering
+            # we are wrapping the whole cell in this ert
+            # so before the end of the layout...
+            document.body[bend:bend] = put_cmd_in_ert("}")
+            # ...and after the beginning
+            document.body[blay + 1:blay + 1] = \
+              put_cmd_in_ert("\\multirow{" + str(nummrs) + "}{" + col_width + "}[" \
+                  + offset + "]{")
+
+        # on to the next table
+        begin_table = end_table
+
+
+def revert_script(document):
+    " Convert subscript/superscript inset to TeX code "
+    i = 0
+    foundsubscript = False
+    while 1:
+        i = find_token(document.body, '\\begin_inset script', i)
+        if i == -1:
+            break
+        z = find_end_of_inset(document.body, i)
+        if z == -1:
+            document.warning("Malformed LyX document: Can't find end of script inset.")
+            i += 1
+            continue
+        blay = find_token(document.body, "\\begin_layout", i, z)
+        if blay == -1:
+            document.warning("Malformed LyX document: Can't find layout in script inset.")
+            i = z
+            continue
+
+        if check_token(document.body[i], "\\begin_inset script subscript"):
+            subst = '\\textsubscript{'
+            foundsubscript = True
+        elif check_token(document.body[i], "\\begin_inset script superscript"):
+            subst = '\\textsuperscript{'
+        else:
+            document.warning("Malformed LyX document: Unknown type of script inset.")
+            i = z
+            continue
+        bend = find_end_of_layout(document.body, blay)
+        if bend == -1 or bend > z:
+            document.warning("Malformed LyX document: Can't find end of layout in script inset.")
+            i = z
+            continue
+        # remove the \end_layout \end_inset pair
+        document.body[bend:z + 1] = put_cmd_in_ert("}")
+        document.body[i:blay + 1] = put_cmd_in_ert(subst)
+        i += 1
+    # these classes provide a \textsubscript command:
+    # FIXME: Would be nice if we could use the information of the .layout file here
+    classes = ["memoir", "scrartcl", "scrbook", "scrlttr2", "scrreprt"]
+    if foundsubscript and find_token_exact(classes, document.textclass, 0) == -1:
+        add_to_preamble(document, ['\\usepackage{subscript}'])
+
+
+def convert_use_xetex(document):
+    " convert \\use_xetex to \\use_non_tex_fonts "
+    i = 0
+    i = find_token(document.header, "\\use_xetex", 0)
+    if i == -1:
+        return
+    
+    val = get_value(document.header, "\\use_xetex", 0)
+    document.header[i] = "\\use_non_tex_fonts " + val
+
+
+def revert_use_xetex(document):
+    " revert \\use_non_tex_fonts to \\use_xetex "
+    i = 0
+    i = find_token(document.header, "\\use_non_tex_fonts", 0)
+    if i == -1:
+        document.warning("Malformed document. No \\use_non_tex_fonts param!")
+        return
+
+    val = get_value(document.header, "\\use_non_tex_fonts", 0)
+    document.header[i] = "\\use_xetex " + val
  
-    j = k
+
+def revert_labeling(document):
+    koma = ("scrartcl", "scrarticle-beamer", "scrbook", "scrlettr",
+        "scrlttr2", "scrreprt")
+    if document.textclass in koma:
+        return
+    i = 0
+    while True:
+        i = find_token_exact(document.body, "\\begin_layout Labeling", i)
+        if i == -1:
+            return
+        document.body[i] = "\\begin_layout List"
  
  
  ##
@@ -2422,7 +2427,7 @@ convert = [[346, []],
             [368, []],
             [369, [convert_author_id]],
             [370, []],
-           [371, []],
+           [371, [convert_mhchem]],
             [372, []],
             [373, [merge_gbrief]],
             [374, []],
@@ -2455,10 +2460,22 @@ convert = [[346, []],
             [401, []],
             [402, [convert_bibtex_clearpage]],
             [403, [convert_flexnames]],
-           [404, [convert_prettyref]]
+           [404, [convert_prettyref]],
+           [405, []],
+           [406, [convert_passthru]],
+           [407, []],
+           [408, []],
+           [409, [convert_use_xetex]],
+           [410, []]
  ]
  
-revert =  [[403, [revert_refstyle]],
+revert =  [[409, [revert_labeling]],
+           [408, [revert_use_xetex]],
+           [407, [revert_script]],
+           [406, [revert_multirowOffset]],
+           [405, [revert_passthru]],
+           [404, []],
+           [403, [revert_refstyle]],
             [402, [revert_flexnames]],
             [401, []],
             [400, [revert_diagram]],
@@ -2470,7 +2487,7 @@ revert =  [[403, [revert_refstyle]],
             [394, [revert_DIN_C_pagesizes]],
             [393, [revert_makebox]],
             [392, [revert_argument]],
-           [391, [revert_beamer_args]],
+           [391, []],
             [390, [revert_align_decimal, revert_IEEEtran]],
             [389, [revert_output_sync]],
             [388, [revert_html_quotes]],