Length.cpp: add new unit representing \baselineskip

[lyx.git] / lib / lyx2lyx / lyx_2_0.py
diff --git a/lib/lyx2lyx/lyx_2_0.py b/lib/lyx2lyx/lyx_2_0.py

index 450a357a9f274f2d875269848f00e8eaf513796d..ad5636421bb23d16acfd4e9772b17a748bb62243 100644 (file)
--- a/lib/lyx2lyx/lyx_2_0.py
+++ b/lib/lyx2lyx/lyx_2_0.py
@@ -1,7 +1,6 @@
  # -*- coding: utf-8 -*-
  # This file is part of lyx2lyx
-# -*- coding: utf-8 -*-
-# Copyright (C) 2010 The LyX team
+# Copyright (C) 2011 The LyX team
  #
  # This program is free software; you can redistribute it and/or
  # modify it under the terms of the GNU General Public License
@@ -15,7 +14,7 @@
  #
  # You should have received a copy of the GNU General Public License
  # along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
  
  """ Convert files to the file format generated by lyx 2.0"""
  
@@ -24,36 +23,27 @@ import unicodedata
  import sys, os
  
  from parser_tools import find_token, find_end_of, find_tokens, \
-  find_end_of_inset, find_end_of_layout, get_value, get_value_string
-  
+  find_token_exact, find_end_of_inset, find_end_of_layout, \
+  find_token_backwards, is_in_inset, get_value, get_quoted_value, \
+  del_token, check_token, get_option_value
+
  from lyx2lyx_tools import add_to_preamble, insert_to_preamble, \
    put_cmd_in_ert, lyx2latex, latex_length, revert_flex_inset, \
-  revert_font_attrs, revert_layout_command, hex2ratio
+  revert_font_attrs, hex2ratio, str2bool
  
  ####################################################################
  # Private helper functions
  
-def remove_option(document, m, option):
+def remove_option(lines, m, option):
      ''' removes option from line m. returns whether we did anything '''
-    l = document.body[m].find(option)
+    l = lines[m].find(option)
      if l == -1:
          return False
-    val = document.body[m][l:].split('"')[1]
-    document.body[m] = document.body[m][:l - 1] + document.body[m][l+len(option + '="' + val + '"'):]
+    val = lines[m][l:].split('"')[1]
+    lines[m] = lines[m][:l - 1] + lines[m][l+len(option + '="' + val + '"'):]
      return True
  
  
-# DO NOT USE THIS ROUTINE ANY MORE. Better yet, replace the uses that
-# have been made of it with uses of put_cmd_in_ert.
-def old_put_cmd_in_ert(string):
-    for rep in unicode_reps:
-        string = string.replace(rep[1], rep[0].replace('\\\\', '\\'))
-    string = string.replace('\\', "\\backslash\n")
-    string = "\\begin_inset ERT\nstatus collapsed\n\\begin_layout Plain Layout\n" \
-      + string + "\n\\end_layout\n\\end_inset"
-    return string
-
-
  ###############################################################################
  ###
  ### Conversion and reversion routines
@@ -98,10 +88,7 @@ def revert_tabularvalign(document):
        if p != -1:
            q = document.body[fline].find("tabularvalignment")
            if q != -1:
-              # FIXME
-              # This seems wrong: It removes everything after 
-              # tabularvalignment, too.
-              document.body[fline] = document.body[fline][:q - 1] + '>'
+              document.body[fline] = re.sub(r' tabularvalignment=\"[a-z]+\"', "", document.body[fline])
            i += 1
            continue
  
@@ -117,16 +104,13 @@ def revert_tabularvalign(document):
        # delete tabularvalignment
        q = document.body[fline].find("tabularvalignment")
        if q != -1:
-          # FIXME
-          # This seems wrong: It removes everything after 
-          # tabularvalignment, too.
-          document.body[fline] = document.body[fline][:q - 1] + '>'
+          document.body[fline] = re.sub(r' tabularvalignment=\"[a-z]+\"', "", document.body[fline])
  
        # don't add a box when centered
        if tabularvalignment == 'c':
            i = end
            continue
-      subst = ['\\end_layout', '\\end_inset']
+      subst = ['\\end_inset', '\\end_layout']
        document.body[end:end] = subst # just inserts those lines
        subst = ['\\begin_inset Box Frameless',
            'position "' + tabularvalignment +'"',
@@ -165,13 +149,13 @@ def revert_phantom_types(document, ptype, cmd):
            document.warning("Can't find layout for inset at line " + str(i))
            i = end
            continue
-      bend = find_token(document.body, "\\end_layout", blay, end)
+      bend = find_end_of_layout(document.body, blay)
        if bend == -1:
            document.warning("Malformed LyX document: Could not find end of Phantom inset's layout.")
            i = end
            continue
        substi = ["\\begin_inset ERT", "status collapsed", "",
-                "\\begin_layout Plain Layout", "", "", "\\backslash", 
+                "\\begin_layout Plain Layout", "", "", "\\backslash",
                  cmd + "{", "\\end_layout", "", "\\end_inset"]
        substj = ["\\size default", "", "\\begin_inset ERT", "status collapsed", "",
                  "\\begin_layout Plain Layout", "", "}", "\\end_layout", "", "\\end_inset"]
@@ -183,7 +167,7 @@ def revert_phantom_types(document, ptype, cmd):
  
  def revert_phantom(document):
      revert_phantom_types(document, "Phantom", "phantom")
-    
+
  def revert_hphantom(document):
      revert_phantom_types(document, "HPhantom", "hphantom")
  
@@ -193,102 +177,117 @@ def revert_vphantom(document):
  
  def revert_xetex(document):
      " Reverts documents that use XeTeX "
+
      i = find_token(document.header, '\\use_xetex', 0)
      if i == -1:
          document.warning("Malformed LyX document: Missing \\use_xetex.")
          return
-    if get_value(document.header, "\\use_xetex", i) == 'false':
+    if not str2bool(get_value(document.header, "\\use_xetex", i)):
          del document.header[i]
          return
      del document.header[i]
+
      # 1.) set doc encoding to utf8-plain
      i = find_token(document.header, "\\inputencoding", 0)
      if i == -1:
          document.warning("Malformed LyX document: Missing \\inputencoding.")
-    document.header[i] = "\\inputencoding utf8-plain"
+    else:
+        document.header[i] = "\\inputencoding utf8-plain"
+
      # 2.) check font settings
-    l = find_token(document.header, "\\font_roman", 0)
-    if l == -1:
-        document.warning("Malformed LyX document: Missing \\font_roman.")
-    line = document.header[l]
-    l = re.compile(r'\\font_roman (.*)$')
-    m = l.match(line)
-    roman = m.group(1)
-    l = find_token(document.header, "\\font_sans", 0)
-    if l == -1:
-        document.warning("Malformed LyX document: Missing \\font_sans.")
-    line = document.header[l]
-    l = re.compile(r'\\font_sans (.*)$')
-    m = l.match(line)
-    sans = m.group(1)
-    l = find_token(document.header, "\\font_typewriter", 0)
-    if l == -1:
-        document.warning("Malformed LyX document: Missing \\font_typewriter.")
-    line = document.header[l]
-    l = re.compile(r'\\font_typewriter (.*)$')
-    m = l.match(line)
-    typewriter = m.group(1)
-    osf = get_value(document.header, '\\font_osf', 0) == "true"
-    sf_scale = float(get_value(document.header, '\\font_sf_scale', 0))
-    tt_scale = float(get_value(document.header, '\\font_tt_scale', 0))
-    # 3.) set preamble stuff
-    pretext = '%% This document must be processed with xelatex!\n'
-    pretext += '\\usepackage{fontspec}\n'
-    if roman != "default":
-        pretext += '\\setmainfont[Mapping=tex-text]{' + roman + '}\n'
-    if sans != "default":
-        pretext += '\\setsansfont['
-        if sf_scale != 100:
-            pretext += 'Scale=' + str(sf_scale / 100) + ','
-        pretext += 'Mapping=tex-text]{' + sans + '}\n'
-    if typewriter != "default":
-        pretext += '\\setmonofont'
-        if tt_scale != 100:
-            pretext += '[Scale=' + str(tt_scale / 100) + ']'
-        pretext += '{' + typewriter + '}\n'
-    if osf:
-        pretext += '\\defaultfontfeatures{Numbers=OldStyle}\n'
-    pretext += '\usepackage{xunicode}\n'
-    pretext += '\usepackage{xltxtra}\n'
-    insert_to_preamble(0, document, pretext)
-    # 4.) reset font settings
+    # defaults
+    roman = sans = typew = "default"
+    osf = False
+    sf_scale = tt_scale = 100.0
+
      i = find_token(document.header, "\\font_roman", 0)
      if i == -1:
          document.warning("Malformed LyX document: Missing \\font_roman.")
-    document.header[i] = "\\font_roman default"
+    else:
+        roman = get_value(document.header, "\\font_roman", i)
+        document.header[i] = "\\font_roman default"
+
      i = find_token(document.header, "\\font_sans", 0)
      if i == -1:
          document.warning("Malformed LyX document: Missing \\font_sans.")
-    document.header[i] = "\\font_sans default"
+    else:
+        sans = get_value(document.header, "\\font_sans", i)
+        document.header[i] = "\\font_sans default"
+
      i = find_token(document.header, "\\font_typewriter", 0)
      if i == -1:
          document.warning("Malformed LyX document: Missing \\font_typewriter.")
-    document.header[i] = "\\font_typewriter default"
+    else:
+        typew = get_value(document.header, "\\font_typewriter", i)
+        document.header[i] = "\\font_typewriter default"
+
      i = find_token(document.header, "\\font_osf", 0)
      if i == -1:
          document.warning("Malformed LyX document: Missing \\font_osf.")
-    document.header[i] = "\\font_osf false"
+    else:
+        osf = str2bool(get_value(document.header, "\\font_osf", i))
+        document.header[i] = "\\font_osf false"
+
      i = find_token(document.header, "\\font_sc", 0)
      if i == -1:
          document.warning("Malformed LyX document: Missing \\font_sc.")
-    document.header[i] = "\\font_sc false"
+    else:
+        # we do not need this value.
+        document.header[i] = "\\font_sc false"
+
      i = find_token(document.header, "\\font_sf_scale", 0)
      if i == -1:
          document.warning("Malformed LyX document: Missing \\font_sf_scale.")
-    document.header[i] = "\\font_sf_scale 100"
+    else:
+      val = get_value(document.header, '\\font_sf_scale', i)
+      try:
+        # float() can throw
+        sf_scale = float(val)
+      except:
+        document.warning("Invalid font_sf_scale value: " + val)
+      document.header[i] = "\\font_sf_scale 100"
+
      i = find_token(document.header, "\\font_tt_scale", 0)
      if i == -1:
          document.warning("Malformed LyX document: Missing \\font_tt_scale.")
-    document.header[i] = "\\font_tt_scale 100"
+    else:
+        val = get_value(document.header, '\\font_tt_scale', i)
+        try:
+          # float() can throw
+          tt_scale = float(val)
+        except:
+          document.warning("Invalid font_tt_scale value: " + val)
+        document.header[i] = "\\font_tt_scale 100"
+
+    # 3.) set preamble stuff
+    pretext = ['%% This document must be processed with xelatex!']
+    pretext.append('\\usepackage{fontspec}')
+    if roman != "default":
+        pretext.append('\\setmainfont[Mapping=tex-text]{' + roman + '}')
+    if sans != "default":
+        sf = '\\setsansfont['
+        if sf_scale != 100.0:
+            sf += 'Scale=' + str(sf_scale / 100.0) + ','
+        sf += 'Mapping=tex-text]{' + sans + '}'
+        pretext.append(sf)
+    if typew != "default":
+        tw = '\\setmonofont'
+        if tt_scale != 100.0:
+            tw += '[Scale=' + str(tt_scale / 100.0) + ']'
+        tw += '{' + typew + '}'
+        pretext.append(tw)
+    if osf:
+        pretext.append('\\defaultfontfeatures{Numbers=OldStyle}')
+    pretext.append('\\usepackage{xunicode}')
+    pretext.append('\\usepackage{xltxtra}')
+    insert_to_preamble(document, pretext)
  
  
  def revert_outputformat(document):
      " Remove default output format param "
-    i = find_token(document.header, '\\default_output_format', 0)
-    if i == -1:
+
+    if not del_token(document.header, '\\default_output_format', 0):
          document.warning("Malformed LyX document: Missing \\default_output_format.")
-        return
-    del document.header[i]
  
  
  def revert_backgroundcolor(document):
@@ -304,27 +303,47 @@ def revert_backgroundcolor(document):
      red   = hex2ratio(colorcode[1:3])
      green = hex2ratio(colorcode[3:5])
      blue  = hex2ratio(colorcode[5:7])
-    insert_to_preamble(0, document,
-                          '% Commands inserted by lyx2lyx to set the background color\n'
-                          + '\\@ifundefined{definecolor}{\\usepackage{color}}{}\n'
-                          + '\\definecolor{page_backgroundcolor}{rgb}{'
-                          + red + ',' + green + ',' + blue + '}\n'
-                          + '\\pagecolor{page_backgroundcolor}\n')
+    insert_to_preamble(document, \
+        ['% To set the background color',
+        '\\@ifundefined{definecolor}{\\usepackage{color}}{}',
+        '\\definecolor{page_backgroundcolor}{rgb}{' + red + ',' + green + ',' + blue + '}',
+        '\\pagecolor{page_backgroundcolor}'])
  
  
-def revert_splitindex(document):
-    " Reverts splitindex-aware documents "
+def add_use_indices(document):
+    " Add \\use_indices if it is missing "
      i = find_token(document.header, '\\use_indices', 0)
+    if i != -1:
+        return i
+    i = find_token(document.header, '\\use_bibtopic', 0)
+    if i == -1:
+        i = find_token(document.header, '\\cite_engine', 0)
+    if i == -1:
+        i = find_token(document.header, '\\use_mathdots', 0)
+    if i == -1:
+        i = find_token(document.header, '\\use_mhchem', 0)
+    if i == -1:
+        i = find_token(document.header, '\\use_esint', 0)
+    if i == -1:
+        i = find_token(document.header, '\\use_amsmath', 0)
      if i == -1:
          document.warning("Malformed LyX document: Missing \\use_indices.")
+        return -1
+    document.header.insert(i + 1, '\\use_indices 0')
+    return i + 1
+
+
+def revert_splitindex(document):
+    " Reverts splitindex-aware documents "
+    i = add_use_indices(document)
+    if i == -1:
          return
-    indices = get_value(document.header, "\\use_indices", i)
-    preamble = ""
-    useindices = (indices == "true")
-    if useindices:
-         preamble += "\\usepackage{splitidx}\n"
+    useindices = str2bool(get_value(document.header, "\\use_indices", i))
      del document.header[i]
-    
+    preamble = []
+    if useindices:
+         preamble.append("\\usepackage{splitidx})")
+
      # deal with index declarations in the preamble
      i = 0
      while True:
@@ -335,18 +354,18 @@ def revert_splitindex(document):
          if k == -1:
              document.warning("Malformed LyX document: Missing \\end_index.")
              return
-        if useindices:    
+        if useindices:
            line = document.header[i]
            l = re.compile(r'\\index (.*)$')
            m = l.match(line)
            iname = m.group(1)
            ishortcut = get_value(document.header, '\\shortcut', i, k)
            if ishortcut != "":
-              preamble += "\\newindex[" + iname + "]{" + ishortcut + "}\n"
+              preamble.append("\\newindex[" + iname + "]{" + ishortcut + "}")
          del document.header[i:k + 1]
-    if preamble != "":
-        insert_to_preamble(0, document, preamble)
-        
+    if preamble:
+        insert_to_preamble(document, preamble)
+
      # deal with index insets
      # these need to have the argument removed
      i = 0
@@ -372,7 +391,7 @@ def revert_splitindex(document):
              subst = put_cmd_in_ert("\\sindex[" + itype + "]{" + content + "}")
              document.body[i:k + 1] = subst
          i = i + 1
-        
+
      # deal with index_print insets
      i = 0
      while True:
@@ -380,7 +399,7 @@ def revert_splitindex(document):
          if i == -1:
              return
          k = find_end_of_inset(document.body, i)
-        ptype = get_value(document.body, 'type', i, k).strip('"')
+        ptype = get_quoted_value(document.body, 'type', i, k)
          if ptype == "idx":
              j = find_token(document.body, "type", i, k)
              del document.body[j]
@@ -394,6 +413,7 @@ def revert_splitindex(document):
  
  def convert_splitindex(document):
      " Converts index and printindex insets to splitindex-aware format "
+    add_use_indices(document)
      i = 0
      while True:
          i = find_token(document.body, "\\begin_inset Index", i)
@@ -410,7 +430,7 @@ def convert_splitindex(document):
          if document.body[i + 1].find('LatexCommand printindex') == -1:
              document.warning("Malformed LyX document: Incomplete printindex inset.")
              return
-        subst = ["LatexCommand printindex", 
+        subst = ["LatexCommand printindex",
              "type \"idx\""]
          document.body[i + 1:i + 2] = subst
          i = i + 1
@@ -418,12 +438,10 @@ def convert_splitindex(document):
  
  def revert_subindex(document):
      " Reverts \\printsubindex CommandInset types "
-    i = find_token(document.header, '\\use_indices', 0)
+    i = add_use_indices(document)
      if i == -1:
-        document.warning("Malformed LyX document: Missing \\use_indices.")
          return
-    indices = get_value(document.header, "\\use_indices", i)
-    useindices = (indices == "true")
+    useindices = str2bool(get_value(document.header, "\\use_indices", i))
      i = 0
      while True:
          i = find_token(document.body, "\\begin_inset CommandInset index_print", i)
@@ -434,7 +452,7 @@ def revert_subindex(document):
          if ctype != "printsubindex":
              i = k + 1
              continue
-        ptype = get_value(document.body, 'type', i, k).strip('"')
+        ptype = get_quoted_value(document.body, 'type', i, k)
          if not useindices:
              del document.body[i:k + 1]
          else:
@@ -445,12 +463,10 @@ def revert_subindex(document):
  
  def revert_printindexall(document):
      " Reverts \\print[sub]index* CommandInset types "
-    i = find_token(document.header, '\\use_indices', 0)
+    i = add_use_indices(document)
      if i == -1:
-        document.warning("Malformed LyX document: Missing \\use_indices.")
          return
-    indices = get_value(document.header, "\\use_indices", i)
-    useindices = (indices == "true")
+    useindices = str2bool(get_value(document.header, "\\use_indices", i))
      i = 0
      while True:
          i = find_token(document.body, "\\begin_inset CommandInset index_print", i)
@@ -471,14 +487,14 @@ def revert_printindexall(document):
  
  def revert_strikeout(document):
    " Reverts \\strikeout font attribute "
-  changed = revert_font_attrs(document, "\\uuline", "\\uuline")
-  changed = revert_font_attrs(document, "\\uwave", "\\uwave") or changed
-  changed = revert_font_attrs(document, "\\strikeout", "\\sout")  or changed
+  changed = revert_font_attrs(document.body, "\\uuline", "\\uuline")
+  changed = revert_font_attrs(document.body, "\\uwave", "\\uwave") or changed
+  changed = revert_font_attrs(document.body, "\\strikeout", "\\sout")  or changed
    if changed == True:
-    insert_to_preamble(0, document,
-        '% Commands inserted by lyx2lyx for proper underlining\n'
-        + '\\PassOptionsToPackage{normalem}{ulem}\n'
-        + '\\usepackage{ulem}\n')
+    insert_to_preamble(document, \
+        ['%  for proper underlining',
+        '\\PassOptionsToPackage{normalem}{ulem}',
+        '\\usepackage{ulem}'])
  
  
  def revert_ulinelatex(document):
@@ -486,28 +502,24 @@ def revert_ulinelatex(document):
      i = find_token(document.body, '\\bar under', 0)
      if i == -1:
          return
-    insert_to_preamble(0, document,
-            '% Commands inserted by lyx2lyx for proper underlining\n'
-            + '\\PassOptionsToPackage{normalem}{ulem}\n'
-            + '\\usepackage{ulem}\n'
-            + '\\let\\cite@rig\\cite\n'
-            + '\\newcommand{\\b@xcite}[2][\\%]{\\def\\def@pt{\\%}\\def\\pas@pt{#1}\n'
-            + '  \\mbox{\\ifx\\def@pt\\pas@pt\\cite@rig{#2}\\else\\cite@rig[#1]{#2}\\fi}}\n'
-            + '\\renewcommand{\\underbar}[1]{{\\let\\cite\\b@xcite\\uline{#1}}}\n')
+    insert_to_preamble(document,\
+            ['%  for proper underlining',
+            '\\PassOptionsToPackage{normalem}{ulem}',
+            '\\usepackage{ulem}',
+            '\\let\\cite@rig\\cite',
+            '\\newcommand{\\b@xcite}[2][\\%]{\\def\\def@pt{\\%}\\def\\pas@pt{#1}',
+            '  \\mbox{\\ifx\\def@pt\\pas@pt\\cite@rig{#2}\\else\\cite@rig[#1]{#2}\\fi}}',
+            '\\renewcommand{\\underbar}[1]{{\\let\\cite\\b@xcite\\uline{#1}}}'])
  
  
  def revert_custom_processors(document):
      " Remove bibtex_command and index_command params "
-    i = find_token(document.header, '\\bibtex_command', 0)
-    if i == -1:
+
+    if not del_token(document.header, '\\bibtex_command', 0):
          document.warning("Malformed LyX document: Missing \\bibtex_command.")
-    else:
-        del document.header[i]
-    i = find_token(document.header, '\\index_command', 0)
-    if i == -1:
+
+    if not del_token(document.header, '\\index_command', 0):
          document.warning("Malformed LyX document: Missing \\index_command.")
-    else:
-        del document.header[i]
  
  
  def convert_nomencl_width(document):
@@ -529,13 +541,9 @@ def revert_nomencl_width(document):
        if i == -1:
          break
        j = find_end_of_inset(document.body, i)
-      l = find_token(document.body, "set_width", i, j)
-      if l == -1:
-            document.warning("Can't find set_width option for nomencl_print!")
-            i = j
-            continue
-      del document.body[l]
-      i = j - 1
+      if not del_token(document.body, "set_width", i, j):
+        document.warning("Can't find set_width option for nomencl_print!")
+      i = j
  
  
  def revert_nomencl_cwidth(document):
@@ -548,13 +556,11 @@ def revert_nomencl_cwidth(document):
        j = find_end_of_inset(document.body, i)
        l = find_token(document.body, "width", i, j)
        if l == -1:
-        document.warning("Can't find width option for nomencl_print!")
          i = j
          continue
-      width = get_value(document.body, "width", i, j).strip('"')
+      width = get_quoted_value(document.body, "width", i, j)
        del document.body[l]
-      add_to_preamble(document, ["% this command was inserted by lyx2lyx"])
-      add_to_preamble(document, ["\\setlength{\\nomlabelwidth}{" + width + "}"])
+      insert_to_preamble(document, ["\\setlength{\\nomlabelwidth}{" + width + "}"])
        i = j - 1
  
  
@@ -589,10 +595,10 @@ def revert_longtable_align(document):
        if j == -1:
            i += 1
            continue
-      # FIXME Is this correct? It wipes out everything after the 
+      # FIXME Is this correct? It wipes out everything after the
        # one we found.
        document.body[fline] = document.body[fline][:j - 1] + '>'
-      # since there could be a tabular inside this one, we 
+      # since there could be a tabular inside this one, we
        # cannot jump to end.
        i += 1
  
@@ -617,8 +623,7 @@ def revert_paragraph_indentation(document):
      if length != "default":
        # handle percent lengths
        length = latex_length(length)[1]
-      add_to_preamble(document, ["% this command was inserted by lyx2lyx"])
-      add_to_preamble(document, ["\\setlength{\\parindent}{" + length + "}"])
+      insert_to_preamble(document, ["\\setlength{\\parindent}{" + length + "}"])
      del document.header[i]
  
  
@@ -635,8 +640,7 @@ def revert_percent_skip_lengths(document):
      # handle percent lengths
      percent, length = latex_length(length)
      if percent:
-        add_to_preamble(document, ["% this command was inserted by lyx2lyx"])
-        add_to_preamble(document, ["\\setlength{\\parskip}{" + length + "}"])
+        insert_to_preamble(document, ["\\setlength{\\parskip}{" + length + "}"])
          # set defskip to medskip as default
          document.header[i] = "\\defskip medskip"
  
@@ -676,7 +680,7 @@ def revert_percent_hspace_lengths(document):
      " Revert relative HSpace lengths to ERT "
      i = 0
      while True:
-      i = find_token(document.body, "\\begin_inset space \\hspace", i)
+      i = find_token_exact(document.body, "\\begin_inset space \\hspace", i)
        if i == -1:
            break
        j = find_end_of_inset(document.body, i)
@@ -707,7 +711,7 @@ def revert_hspace_glue_lengths(document):
      " Revert HSpace glue lengths to ERT "
      i = 0
      while True:
-      i = find_token(document.body, "\\begin_inset space \\hspace", i)
+      i = find_token_exact(document.body, "\\begin_inset space \\hspace", i)
        if i == -1:
            break
        j = find_end_of_inset(document.body, i)
@@ -738,7 +742,7 @@ def convert_author_id(document):
      i = 0
      anum = 1
      re_author = re.compile(r'(\\author) (\".*\")\s*(.*)$')
-    
+
      while True:
          i = find_token(document.header, "\\author", i)
          if i == -1:
@@ -748,10 +752,9 @@ def convert_author_id(document):
              name = m.group(2)
              email = m.group(3)
              document.header[i] = "\\author %i %s %s" % (anum, name, email)
-        # FIXME Should this really be incremented if we didn't match?
          anum += 1
          i += 1
-        
+
      i = 0
      while True:
          i = find_token(document.body, "\\change_", i)
@@ -770,7 +773,7 @@ def revert_author_id(document):
      " Remove the author_id from the \\author definition "
      i = 0
      anum = 0
-    rx = re.compile(r'(\\author)\s+(\d+)\s+(\".*\")\s*(.*)$')
+    rx = re.compile(r'(\\author)\s+(-?\d+)\s+(\".*\")\s*(.*)$')
      idmap = dict()
  
      while True:
@@ -809,13 +812,34 @@ def revert_suppress_date(document):
          return
      # remove the preamble line and write to the preamble
      # when suppress_date was true
-    date = get_value(document.header, "\\suppress_date", i)
-    if date == "true":
-        add_to_preamble(document, ["% this command was inserted by lyx2lyx"])
+    date = str2bool(get_value(document.header, "\\suppress_date", i))
+    if date:
          add_to_preamble(document, ["\\date{}"])
      del document.header[i]
  
  
+def convert_mhchem(document):
+    "Set mhchem to off for versions older than 1.6.x"
+    if document.initial_format < 277:
+        # LyX 1.5.x and older did never load mhchem.
+        # Therefore we must switch it off: Documents that use mhchem have
+        # a manual \usepackage anyway, and documents not using mhchem but
+        # custom macros with the same names as mhchem commands might get
+        # corrupted if mhchem is automatically loaded.
+        mhchem = 0 # off
+    else:
+        # LyX 1.6.x did always load mhchem automatically.
+        mhchem = 1 # auto
+    i = find_token(document.header, "\\use_esint", 0)
+    if i == -1:
+        # pre-1.5.x document
+        i = find_token(document.header, "\\use_amsmath", 0)
+    if i == -1:
+        document.warning("Malformed LyX document: Could not find amsmath os esint setting.")
+        return
+    document.header.insert(i + 1, "\\use_mhchem %d" % mhchem)
+
+
  def revert_mhchem(document):
      "Revert mhchem loading to preamble code"
  
@@ -832,6 +856,10 @@ def revert_mhchem(document):
              mhchem = "on"
          del document.header[i]
  
+    if mhchem == "off":
+      # don't load case
+      return
+
      if mhchem == "auto":
          i = 0
          while True:
@@ -839,25 +867,21 @@ def revert_mhchem(document):
              if i == -1:
                 break
              line = document.body[i]
-            if line.find("\\ce{") != -1 or line.find("\\cf{") != 1:
+            if line.find("\\ce{") != -1 or line.find("\\cf{") != -1:
                mhchem = "on"
                break
              i += 1
  
      if mhchem == "on":
-        pre = ["% lyx2lyx mhchem commands", 
-          "\\PassOptionsToPackage{version=3}{mhchem}", 
+        pre = ["\\PassOptionsToPackage{version=3}{mhchem}",
            "\\usepackage{mhchem}"]
-        add_to_preamble(document, pre) 
+        insert_to_preamble(document, pre)
  
  
  def revert_fontenc(document):
      " Remove fontencoding param "
-    i = find_token(document.header, '\\fontencoding', 0)
-    if i == -1:
+    if not del_token(document.header, '\\fontencoding', 0):
          document.warning("Malformed LyX document: Missing \\fontencoding.")
-        return
-    del document.header[i]
  
  
  def merge_gbrief(document):
@@ -891,7 +915,7 @@ def merge_gbrief(document):
                      "Verteiler":       "cc",
                      "Gruss":           "Closing"}
      i = 0
-    while 1:
+    while True:
          i = find_token(document.body, "\\begin_layout", i)
          if i == -1:
              break
@@ -901,7 +925,7 @@ def merge_gbrief(document):
              document.body[i] = "\\begin_layout " + obsoletedby[layout]
  
          i += 1
-        
+
      document.textclass = "g-brief"
      document.set_textclass()
  
@@ -915,12 +939,8 @@ def revert_gbrief(document):
  
  def revert_html_options(document):
      " Remove html options "
-    i = find_token(document.header, '\\html_use_mathml', 0)
-    if i != -1:
-        del document.header[i]
-    i = find_token(document.header, '\\html_be_strict', 0)
-    if i != -1:
-        del document.header[i]
+    del_token(document.header, '\\html_use_mathml', 0)
+    del_token(document.header, '\\html_be_strict', 0)
  
  
  def revert_includeonly(document):
@@ -938,72 +958,135 @@ def revert_includeonly(document):
  
  def revert_includeall(document):
      " Remove maintain_unincluded_children param "
-    i = find_token(document.header, '\\maintain_unincluded_children', 0)
-    if i != -1:
-        del document.header[i]
+    del_token(document.header, '\\maintain_unincluded_children', 0)
  
  
  def revert_multirow(document):
      " Revert multirow cells in tables to TeX-code"
-    i = 0
-    multirow = False
+
+    # first, let's find out if we need to do anything
+    # cell type 3 is multirow begin cell
+    i = find_token(document.body, '<cell multirow="3"', 0)
+    if i == -1:
+      return
+
+    add_to_preamble(document, ["\\usepackage{multirow}"])
+
+    begin_table = 0
      while True:
-      # cell type 3 is multirow begin cell
-      i = find_token(document.body, '<cell multirow="3"', i)
-      if i == -1:
-          break
-      # a multirow cell was found
-      multirow = True
-      # remove the multirow tag, set the valignment to top
-      # and remove the bottom line
-      # FIXME Are we sure these always have space around them?
-      document.body[i] = document.body[i].replace(' multirow="3" ', ' ')
-      document.body[i] = document.body[i].replace('valignment="middle"', 'valignment="top"')
-      document.body[i] = document.body[i].replace(' bottomline="true" ', ' ')
-      # write ERT to create the multirow cell
-      # use 2 rows and 2cm as default with because the multirow span
-      # and the column width is only hardly accessible
-      cend = find_token(document.body, "</cell>", i)
-      if cend == -1:
-          document.warning("Malformed LyX document: Could not find end of tabular cell.")
-          i += 1
-          continue
-      blay = find_token(document.body, "\\begin_layout", i, cend)
-      if blay == -1:
-          document.warning("Can't find layout for cell!")
-          i = j
-          continue
-      bend = find_end_of_layout(document.body, blay)
-      if blay == -1:
-          document.warning("Can't find end of layout for cell!")
-          i = cend
-          continue
+        # find begin/end of table
+        begin_table = find_token(document.body, '<lyxtabular version=', begin_table)
+        if begin_table == -1:
+            break
+        end_table = find_end_of(document.body, begin_table, '<lyxtabular', '</lyxtabular>')
+        if end_table == -1:
+            document.warning("Malformed LyX document: Could not find end of table.")
+            begin_table += 1
+            continue
+        # does this table have multirow?
+        i = find_token(document.body, '<cell multirow="3"', begin_table, end_table)
+        if i == -1:
+            begin_table = end_table
+            continue
  
-      # do the later one first, so as not to mess up the numbering
-      # we are wrapping the whole cell in this ert
-      # so before the end of the layout...
-      document.body[bend:bend] = put_cmd_in_ert("}")
-      # ...and after the beginning
-      document.body[blay+1:blay+1] = put_cmd_in_ert("\\multirow{2}{2cm}{")
+        # store the number of rows and columns
+        numrows = get_option_value(document.body[begin_table], "rows")
+        numcols = get_option_value(document.body[begin_table], "columns")
+        try:
+          numrows = int(numrows)
+          numcols = int(numcols)
+        except:
+          document.warning("Unable to determine rows and columns!")
+          begin_table = end_table
+          continue
  
-      while True:
-          # cell type 4 is multirow part cell
-          k = find_token(document.body, '<cell multirow="4"', cend)
-          if k == -1:
+        mrstarts = []
+        multirows = []
+        # collect info on rows and columns of this table.
+        begin_row = begin_table
+        for row in range(numrows):
+            begin_row = find_token(document.body, '<row>', begin_row, end_table)
+            if begin_row == -1:
+              document.warning("Can't find row " + str(row + 1))
                break
-          # remove the multirow tag, set the valignment to top
-          # and remove the top line
-          # FIXME Are we sure these always have space around them?
-          document.body[k] = document.body[k].replace(' multirow="4" ', ' ')
-          document.body[k] = document.body[k].replace('valignment="middle"', 'valignment="top"')
-          document.body[k] = document.body[k].replace(' topline="true" ', ' ')
-          k += 1
-      # this will always be ok
-      i = cend
-
-    if multirow == True:
-        add_to_preamble(document, 
-          ["% lyx2lyx multirow additions ", "\\usepackage{multirow}"])
+            end_row = find_end_of(document.body, begin_row, '<row>', '</row>')
+            if end_row == -1:
+              document.warning("Can't find end of row " + str(row + 1))
+              break
+            begin_cell = begin_row
+            multirows.append([])
+            for column in range(numcols):
+                begin_cell = find_token(document.body, '<cell ', begin_cell, end_row)
+                if begin_cell == -1:
+                  document.warning("Can't find column " + str(column + 1) + \
+                    "in row " + str(row + 1))
+                  break
+                # NOTE
+                # this will fail if someone puts "</cell>" in a cell, but
+                # that seems fairly unlikely.
+                end_cell = find_end_of(document.body, begin_cell, '<cell', '</cell>')
+                if end_cell == -1:
+                  document.warning("Can't find end of column " + str(column + 1) + \
+                    "in row " + str(row + 1))
+                  break
+                multirows[row].append([begin_cell, end_cell, 0])
+                if document.body[begin_cell].find('multirow="3"') != -1:
+                  multirows[row][column][2] = 3 # begin multirow
+                  mrstarts.append([row, column])
+                elif document.body[begin_cell].find('multirow="4"') != -1:
+                  multirows[row][column][2] = 4 # in multirow
+                begin_cell = end_cell
+            begin_row = end_row
+        # end of table info collection
+
+        # work from the back to avoid messing up numbering
+        mrstarts.reverse()
+        for m in mrstarts:
+            row = m[0]
+            col = m[1]
+            # get column width
+            col_width = get_option_value(document.body[begin_table + 2 + col], "width")
+            # "0pt" means that no width is specified
+            if not col_width or col_width == "0pt":
+              col_width = "*"
+            # determine the number of cells that are part of the multirow
+            nummrs = 1
+            for r in range(row + 1, numrows):
+                if multirows[r][col][2] != 4:
+                  break
+                nummrs += 1
+                # take the opportunity to revert this line
+                lineno = multirows[r][col][0]
+                document.body[lineno] = document.body[lineno].\
+                  replace(' multirow="4" ', ' ').\
+                  replace('valignment="middle"', 'valignment="top"').\
+                  replace(' topline="true" ', ' ')
+                # remove bottom line of previous multirow-part cell
+                lineno = multirows[r-1][col][0]
+                document.body[lineno] = document.body[lineno].replace(' bottomline="true" ', ' ')
+            # revert beginning cell
+            bcell = multirows[row][col][0]
+            ecell = multirows[row][col][1]
+            document.body[bcell] = document.body[bcell].\
+              replace(' multirow="3" ', ' ').\
+              replace('valignment="middle"', 'valignment="top"')
+            blay = find_token(document.body, "\\begin_layout", bcell, ecell)
+            if blay == -1:
+              document.warning("Can't find layout for cell!")
+              continue
+            bend = find_end_of_layout(document.body, blay)
+            if bend == -1:
+              document.warning("Can't find end of layout for cell!")
+              continue
+            # do the later one first, so as not to mess up the numbering
+            # we are wrapping the whole cell in this ert
+            # so before the end of the layout...
+            document.body[bend:bend] = put_cmd_in_ert("}")
+            # ...and after the beginning
+            document.body[blay + 1:blay + 1] = \
+              put_cmd_in_ert("\\multirow{" + str(nummrs) + "}{" + col_width + "}{")
+
+        begin_table = end_table
  
  
  def convert_math_output(document):
@@ -1015,8 +1098,8 @@ def convert_math_output(document):
      m = rgx.match(document.header[i])
      newval = "0" # MathML
      if m:
-      val = m.group(1)
-      if val != "true":
+      val = str2bool(m.group(1))
+      if not val:
          newval = "2" # Images
      else:
        document.warning("Can't match " + document.header[i])
@@ -1038,7 +1121,7 @@ def revert_math_output(document):
      else:
          document.warning("Unable to match " + document.header[i])
      document.header[i] = "\\html_use_mathml " + newval
-                
+
  
  
  def revert_inset_preview(document):
@@ -1053,7 +1136,7 @@ def revert_inset_preview(document):
            document.warning("Malformed LyX document: Could not find end of Preview inset.")
            i += 1
            continue
-      
+
        # This has several issues.
        # We need to do something about the layouts inside InsetPreview.
        # If we just leave the first one, then we have something like:
@@ -1062,16 +1145,16 @@ def revert_inset_preview(document):
        # \begin_layout Standard
        # and we get a "no \end_layout" error. So something has to be done.
        # Ideally, we would check if it is the same as the layout we are in.
-      # If so, we just remove it; if not, we end the active one. But it is 
+      # If so, we just remove it; if not, we end the active one. But it is
        # not easy to know what layout we are in, due to depth changes, etc,
        # and it is not clear to me how much work it is worth doing. In most
        # cases, the layout will probably be the same.
-      # 
+      #
        # For the same reason, we have to remove the \end_layout tag at the
        # end of the last layout in the inset. Again, that will sometimes be
        # wrong, but it will usually be right. To know what to do, we would
        # again have to know what layout the inset is in.
-      
+
        blay = find_token(document.body, "\\begin_layout", i, iend)
        if blay == -1:
            document.warning("Can't find layout for preview inset!")
@@ -1083,13 +1166,13 @@ def revert_inset_preview(document):
  
        # This is where we would check what layout we are in.
        # The check for Standard is definitely wrong.
-      # 
+      #
        # lay = document.body[blay].split(None, 1)[1]
        # if lay != oldlayout:
        #     # record a boolean to tell us what to do later....
        #     # better to do it later, since (a) it won't mess up
        #     # the numbering and (b) we only modify at the end.
-        
+
        # we want to delete the last \\end_layout in this inset, too.
        # note that this may not be the \\end_layout that goes with blay!!
        bend = find_end_of_layout(document.body, blay)
@@ -1109,7 +1192,7 @@ def revert_inset_preview(document):
        del document.body[bend]
        del document.body[i:blay + 1]
        # we do not need to reset i
-                
+
  
  def revert_equalspacing_xymatrix(document):
      " Revert a Formula with xymatrix@! to an ERT inset "
@@ -1126,12 +1209,12 @@ def revert_equalspacing_xymatrix(document):
            document.warning("Malformed LyX document: Could not find end of Formula inset.")
            i += 1
            continue
-      
+
        for curline in range(i,j):
            found = document.body[curline].find("\\xymatrix@!")
            if found != -1:
                break
- 
+
        if found != -1:
            has_equal_spacing = True
            content = [document.body[i][21:]]
@@ -1146,9 +1229,9 @@ def revert_equalspacing_xymatrix(document):
                    has_preamble = True;
                    break;
            i = j + 1
-  
+
      if has_equal_spacing and not has_preamble:
-        add_to_preamble(document, ['% lyx2lyx xymatrix addition', '\\usepackage[all]{xy}'])
+        add_to_preamble(document, ['\\usepackage[all]{xy}'])
  
  
  def revert_notefontcolor(document):
@@ -1158,22 +1241,22 @@ def revert_notefontcolor(document):
      if i == -1:
          return
  
+    colorcode = get_value(document.header, '\\notefontcolor', i)
+    del document.header[i]
+
      # are there any grey notes?
      if find_token(document.body, "\\begin_inset Note Greyedout", 0) == -1:
-        # no need to do anything, and \renewcommand will throw an error
-        # since lyxgreyedout will not exist.
+        # no need to do anything else, and \renewcommand will throw
+        # an error since lyxgreyedout will not exist.
          return
  
-    colorcode = get_value(document.header, '\\notefontcolor', i)
-    del document.header[i]
      # the color code is in the form #rrggbb where every character denotes a hex number
      red = hex2ratio(colorcode[1:3])
      green = hex2ratio(colorcode[3:5])
      blue = hex2ratio(colorcode[5:7])
      # write the preamble
-    insert_to_preamble(0, document,
-      ['% Commands inserted by lyx2lyx to set the font color',
-        '% for greyed-out notes',
+    insert_to_preamble(document,
+      [ '%  for greyed-out notes',
          '\\@ifundefined{definecolor}{\\usepackage{color}}{}'
          '\\definecolor{note_fontcolor}{rgb}{%s,%s,%s}' % (red, green, blue),
          '\\renewenvironment{lyxgreyedout}',
@@ -1181,21 +1264,21 @@ def revert_notefontcolor(document):
  
  
  def revert_turkmen(document):
-    "Set language Turkmen to English" 
+    "Set language Turkmen to English"
  
-    if document.language == "turkmen": 
-        document.language = "english" 
-        i = find_token(document.header, "\\language", 0) 
-        if i != -1: 
-            document.header[i] = "\\language english" 
+    if document.language == "turkmen":
+        document.language = "english"
+        i = find_token(document.header, "\\language", 0)
+        if i != -1:
+            document.header[i] = "\\language english"
  
-    j = 0 
-    while True: 
-        j = find_token(document.body, "\\lang turkmen", j) 
-        if j == -1: 
-            return 
-        document.body[j] = document.body[j].replace("\\lang turkmen", "\\lang english") 
-        j += 1 
+    j = 0
+    while True:
+        j = find_token(document.body, "\\lang turkmen", j)
+        if j == -1:
+            return
+        document.body[j] = document.body[j].replace("\\lang turkmen", "\\lang english")
+        j += 1
  
  
  def revert_fontcolor(document):
@@ -1213,8 +1296,8 @@ def revert_fontcolor(document):
      green = hex2ratio(colorcode[3:5])
      blue = hex2ratio(colorcode[5:7])
      # write the preamble
-    insert_to_preamble(0, document,
-      ['% Commands inserted by lyx2lyx to set the font color',
+    insert_to_preamble(document,
+      ['%  Set the font color',
        '\\@ifundefined{definecolor}{\\usepackage{color}}{}',
        '\\definecolor{document_fontcolor}{rgb}{%s,%s,%s}' % (red, green, blue),
        '\\color{document_fontcolor}'])
@@ -1232,8 +1315,8 @@ def revert_shadedboxcolor(document):
      green = hex2ratio(colorcode[3:5])
      blue = hex2ratio(colorcode[5:7])
      # write the preamble
-    insert_to_preamble(0, document,
-      ['% Commands inserted by lyx2lyx to set the color of boxes with shaded background',
+    insert_to_preamble(document,
+      ['%  Set the color of boxes with shaded background',
        '\\@ifundefined{definecolor}{\\usepackage{color}}{}',
        "\\definecolor{shadecolor}{rgb}{%s,%s,%s}" % (red, green, blue)])
  
@@ -1248,7 +1331,7 @@ def revert_lyx_version(document):
          pass
  
      i = 0
-    while 1:
+    while True:
          i = find_token(document.body, '\\begin_inset Info', i)
          if i == -1:
              return
@@ -1263,13 +1346,8 @@ def revert_lyx_version(document):
          # type  "lyxinfo"
          # arg   "version"
          # \end_inset
-        # but we shall try to be forgiving.
-        arg = typ = ""
-        for k in range(i, j):
-            if document.body[k].startswith("arg"):
-                arg = document.body[k][3:].strip().strip('"')
-            if document.body[k].startswith("type"):
-                typ = document.body[k][4:].strip().strip('"')
+        typ = get_quoted_value(document.body, "type", i, j)
+        arg = get_quoted_value(document.body, "arg", i, j)
          if arg != "version" or typ != "lyxinfo":
              i = j + 1
              continue
@@ -1288,15 +1366,9 @@ def revert_lyx_version(document):
  
  def revert_math_scale(document):
    " Remove math scaling and LaTeX options "
-  i = find_token(document.header, '\\html_math_img_scale', 0)
-  if i != -1:
-    del document.header[i]
-  i = find_token(document.header, '\\html_latex_start', 0)
-  if i != -1:
-    del document.header[i]
-  i = find_token(document.header, '\\html_latex_end', 0)
-  if i != -1:
-    del document.header[i]
+  del_token(document.header, '\\html_math_img_scale', 0)
+  del_token(document.header, '\\html_latex_start', 0)
+  del_token(document.header, '\\html_latex_end', 0)
  
  
  def revert_pagesizes(document):
@@ -1333,7 +1405,7 @@ def convert_html_quotes(document):
      m = l.match(line)
      if m:
        document.header[i] = "\\html_latex_start " + m.group(1)
-      
+
    i = find_token(document.header, '\\html_latex_end', 0)
    if i != -1:
      line = document.header[i]
@@ -1341,11 +1413,11 @@ def convert_html_quotes(document):
      m = l.match(line)
      if m:
        document.header[i] = "\\html_latex_end " + m.group(1)
-      
+
  
  def revert_html_quotes(document):
    " Remove quotes around html_latex_start and html_latex_end "
-  
+
    i = find_token(document.header, '\\html_latex_start', 0)
    if i != -1:
      line = document.header[i]
@@ -1356,7 +1428,7 @@ def revert_html_quotes(document):
          del document.header[i]
      else:
          document.header[i] = "\\html_latex_start \"" + m.group(1) + "\""
-      
+
    i = find_token(document.header, '\\html_latex_end', 0)
    if i != -1:
      line = document.header[i]
@@ -1371,120 +1443,44 @@ def revert_html_quotes(document):
  
  def revert_output_sync(document):
    " Remove forward search options "
-  i = find_token(document.header, '\\output_sync_macro', 0)
-  if i != -1:
-    del document.header[i]
-  i = find_token(document.header, '\\output_sync', 0)
-  if i != -1:
-    del document.header[i]
-
-
-def convert_beamer_args(document):
-  " Convert ERT arguments in Beamer to InsetArguments "
-
-  if document.textclass != "beamer" and document.textclass != "article-beamer":
-    return
-  
-  layouts = ("Block", "ExampleBlock", "AlertBlock")
-  for layout in layouts:
-    blay = 0
-    while True:
-      blay = find_token(document.body, '\\begin_layout ' + layout, blay)
-      if blay == -1:
-        break
-      elay = find_end_of(document.body, blay, '\\begin_layout', '\\end_layout')
-      if elay == -1:
-        document.warning("Malformed LyX document: Can't find end of " + layout + " layout.")
-        blay += 1
-        continue
-      bert = find_token(document.body, '\\begin_inset ERT', blay)
-      if bert == -1:
-        document.warning("Malformed Beamer LyX document: Can't find argument of " + layout + " layout.")
-        blay = elay + 1
-        continue
-      eert = find_end_of_inset(document.body, bert)
-      if eert == -1:
-        document.warning("Malformed LyX document: Can't find end of ERT.")
-        blay = elay + 1
-        continue
-      
-      # So the ERT inset begins at line k and goes to line l. We now wrap it in 
-      # an argument inset.
-      # Do the end first, so as not to mess up the variables.
-      document.body[eert + 1:eert + 1] = ['', '\\end_layout', '', '\\end_inset', '']
-      document.body[bert:bert] = ['\\begin_inset OptArg', 'status open', '', 
-          '\\begin_layout Plain Layout']
-      blay = elay + 9
-
-
-def revert_beamer_args(document):
-  " Revert Beamer arguments to ERT "
-  
-  if document.textclass != "beamer" and document.textclass != "article-beamer":
-    return
-    
-  layouts = ("Block", "ExampleBlock", "AlertBlock")
-  for layout in layouts:
-    blay = 0
-    while True:
-      blay = find_token(document.body, '\\begin_layout ' + layout, blay)
-      if blay == -1:
-        break
-      elay = find_end_of(document.body, blay, '\\begin_layout', '\\end_layout')
-      if elay == -1:
-        document.warning("Malformed LyX document: Can't find end of " + layout + " layout.")
-        blay += 1
-        continue
-      bopt = find_token(document.body, '\\begin_inset OptArg', blay)
-      if bopt == -1:
-        # it is legal not to have one of these
-        blay = elay + 1
-        continue
-      eopt = find_end_of_inset(document.body, bopt)
-      if eopt == -1:
-        document.warning("Malformed LyX document: Can't find end of argument.")
-        blay = elay + 1
-        continue
-      bplay = find_token(document.body, '\\begin_layout Plain Layout', blay)
-      if bplay == -1:
-        document.warning("Malformed LyX document: Can't find plain layout.")
-        blay = elay + 1
-        continue
-      eplay = find_end_of(document.body, bplay, '\\begin_layout', '\\end_layout')
-      if eplay == -1:
-        document.warning("Malformed LyX document: Can't find end of plain layout.")
-        blay = elay + 1
-        continue
-      # So the content of the argument inset goes from bplay + 1 to eplay - 1
-      bcont = bplay + 1
-      if bcont >= eplay:
-        # Hmm.
-        document.warning(str(bcont) + " " + str(eplay))
-        blay = blay + 1
-        continue
-      # we convert the content of the argument into pure LaTeX...
-      content = lyx2latex(document, document.body[bcont:eplay])
-      strlist = put_cmd_in_ert(["{" + content + "}"])
-      
-      # now replace the optional argument with the ERT
-      document.body[bopt:eopt + 1] = strlist
-      blay = blay + 1
+  del_token(document.header, '\\output_sync_macro', 0)
+  del_token(document.header, '\\output_sync', 0)
  
  
  def revert_align_decimal(document):
-  l = 0
+  i = 0
    while True:
-    l = document.body[l].find('alignment=decimal')
-    if l == -1:
-        break
-    remove_option(document, l, 'decimal_point')
-    document.body[l].replace('decimal', 'center')
+    i = find_token(document.body, "\\begin_inset Tabular", i)
+    if i == -1:
+      return
+    j = find_end_of_inset(document.body, i)
+    if j == -1:
+      document.warning("Unable to find end of Tabular inset at line " + str(i))
+      i += 1
+      continue
+    cell = find_token(document.body, "<cell", i, j)
+    if cell == -1:
+      document.warning("Can't find any cells in Tabular inset at line " + str(i))
+      i = j
+      continue
+    k = i + 1
+    while True:
+      k = find_token(document.body, "<column", k, cell)
+      if k == -1:
+        return
+      if document.body[k].find('alignment="decimal"') == -1:
+        k += 1
+        continue
+      remove_option(document.body, k, 'decimal_point')
+      document.body[k] = \
+        document.body[k].replace('alignment="decimal"', 'alignment="center"')
+      k += 1
  
  
  def convert_optarg(document):
    " Convert \\begin_inset OptArg to \\begin_inset Argument "
    i = 0
-  while 1:
+  while True:
      i = find_token(document.body, '\\begin_inset OptArg', i)
      if i == -1:
        return
@@ -1495,7 +1491,7 @@ def convert_optarg(document):
  def revert_argument(document):
    " Convert \\begin_inset Argument to \\begin_inset OptArg "
    i = 0
-  while 1:
+  while True:
      i = find_token(document.body, '\\begin_inset Argument', i)
      if i == -1:
        return
@@ -1506,75 +1502,88 @@ def revert_argument(document):
  def revert_makebox(document):
    " Convert \\makebox to TeX code "
    i = 0
-  while 1:
-    # only revert frameless boxes without an inner box
-    i = find_token(document.body, '\\begin_inset Box Frameless', i)
+  while True:
+    i = find_token(document.body, '\\begin_inset Box', i)
      if i == -1:
-      # remove the option use_makebox
-      revert_use_makebox(document)
-      return
+      break
      z = find_end_of_inset(document.body, i)
      if z == -1:
        document.warning("Malformed LyX document: Can't find end of box inset.")
-      return
-    j = find_token(document.body, 'use_makebox 1', i)
-    # assure we found the makebox of the current box
-    if j < z and j != -1:
-      y = find_token(document.body, "\\begin_layout", i)
-      if y > z or y == -1:
-        document.warning("Malformed LyX document: Can't find layout in box.")
-        return
-      # remove the \end_layout \end_inset pair
-      document.body[z - 2:z + 1] = put_cmd_in_ert("}")
-      # determine the alignment
-      k = find_token(document.body, 'hor_pos', j - 4)
-      align = document.body[k][9]
-      # determine the width
-      l = find_token(document.body, 'width "', j + 1)
-      length = document.body[l][7:]
-      # remove trailing '"'
-      length = length[:-1]
-      length = latex_length(length)[1]
-      subst = "\\makebox[" + length + "][" \
-        + align + "]{"
-      document.body[i:y + 1] = put_cmd_in_ert(subst)
-    i += 1
-
+      i += 1
+      continue
+    blay = find_token(document.body, "\\begin_layout", i, z)
+    if blay == -1:
+      document.warning("Malformed LyX document: Can't find layout in box.")
+      i = z
+      continue
+    # by looking before the layout we make sure we're actually finding
+    # an option, not text.
+    j = find_token(document.body, 'use_makebox', i, blay)
+    if j == -1:
+        i = z
+        continue
  
-def revert_use_makebox(document):
-  " Deletes use_makebox option of boxes "
-  h = 0
-  while 1:
-    # remove the option use_makebox
-    h = find_token(document.body, 'use_makebox', 0)
-    if h == -1:
-      return
-    del document.body[h]
-    h += 1
+    if not check_token(document.body[i], "\\begin_inset Box Frameless") \
+      or get_value(document.body, 'use_makebox', j) != 1:
+        del document.body[j]
+        i = z
+        continue
+    bend = find_end_of_layout(document.body, blay)
+    if bend == -1 or bend > z:
+        document.warning("Malformed LyX document: Can't find end of layout in box.")
+        i = z
+        continue
+    # determine the alignment
+    align = get_quoted_value(document.body, 'hor_pos', i, blay, "c")
+    # determine the width
+    length = get_quoted_value(document.body, 'width', i, blay, "50col%")
+    length = latex_length(length)[1]
+    # remove the \end_layout \end_inset pair
+    document.body[bend:z + 1] = put_cmd_in_ert("}")
+    subst = "\\makebox[" + length + "][" \
+      + align + "]{"
+    document.body[i:blay + 1] = put_cmd_in_ert(subst)
+    i += 1
  
  
  def convert_use_makebox(document):
    " Adds use_makebox option for boxes "
    i = 0
-  while 1:
-    # remove the option use_makebox
+  while True:
      i = find_token(document.body, '\\begin_inset Box', i)
      if i == -1:
        return
-    k = find_token(document.body, 'use_parbox', i)
+    # all of this is to make sure we actually find the use_parbox
+    # that is an option for this box, not some text elsewhere.
+    z = find_end_of_inset(document.body, i)
+    if z == -1:
+      document.warning("Can't find end of box inset!!")
+      i += 1
+      continue
+    blay = find_token(document.body, "\\begin_layout", i, z)
+    if blay == -1:
+      document.warning("Can't find layout in box inset!!")
+      i = z
+      continue
+    # so now we are looking for use_parbox before the box's layout
+    k = find_token(document.body, 'use_parbox', i, blay)
      if k == -1:
        document.warning("Malformed LyX document: Can't find use_parbox statement in box.")
-      return
+      i = z
+      continue
      document.body.insert(k + 1, "use_makebox 0")
-    i = k + 1
+    i = blay + 1 # not z + 1 (box insets may be nested)
  
  
  def revert_IEEEtran(document):
    " Convert IEEEtran layouts and styles to TeX code "
+
    if document.textclass != "IEEEtran":
      return
-  revert_flex_inset(document, "IEEE membership", "\\IEEEmembership", 0)
-  revert_flex_inset(document, "Lowercase", "\\MakeLowercase", 0)
+
+  revert_flex_inset(document.body, "IEEE membership", "\\IEEEmembership")
+  revert_flex_inset(document.body, "Lowercase", "\\MakeLowercase")
+
    layouts = ("Special Paper Notice", "After Title Text", "Publication ID",
               "Page headings", "Biography without photo")
    latexcmd = {"Special Paper Notice": "\\IEEEspecialpapernotice",
@@ -1582,24 +1591,26 @@ def revert_IEEEtran(document):
                "Publication ID":       "\\IEEEpubid"}
    obsoletedby = {"Page headings":            "MarkBoth",
                   "Biography without photo":  "BiographyNoPhoto"}
+
    for layout in layouts:
      i = 0
      while True:
          i = find_token(document.body, '\\begin_layout ' + layout, i)
          if i == -1:
            break
-        j = find_end_of(document.body, i, '\\begin_layout', '\\end_layout')
+        j = find_end_of_layout(document.body, i)
          if j == -1:
            document.warning("Malformed LyX document: Can't find end of " + layout + " layout.")
            i += 1
            continue
-        if layout in obsoletedby:
+        if layout in list(obsoletedby.keys()):
            document.body[i] = "\\begin_layout " + obsoletedby[layout]
            i = j
-        else:
-          content = lyx2latex(document, document.body[i:j + 1])
-          add_to_preamble(document, [latexcmd[layout] + "{" + content + "}"])
-          del document.body[i:j + 1]
+          continue
+        content = lyx2latex(document, document.body[i:j + 1])
+        add_to_preamble(document, [latexcmd[layout] + "{" + content + "}"])
+        del document.body[i:j + 1]
+        # no need to reset i
  
  
  def convert_prettyref(document):
@@ -1617,13 +1628,13 @@ def convert_prettyref(document):
                         document.warning("Malformed LyX document: No end of InsetRef!")
                         i += 1
                         continue
-               k = find_token(document.body, "LatexCommand prettyref", i)
-               if k != -1 and k < j:
+               k = find_token(document.body, "LatexCommand prettyref", i, j)
+               if k != -1:
                         document.body[k] = "LatexCommand formatted"
                 i = j + 1
         document.header.insert(-1, "\\use_refstyle 0")
-               
- 
+
+
  def revert_refstyle(document):
         " Reverts neutral formatted refs to prettyref "
         re_ref = re.compile("^reference\s+\"(\w+):(\S+)\"")
@@ -1639,14 +1650,14 @@ def revert_refstyle(document):
                         document.warning("Malformed LyX document: No end of InsetRef")
                         i += 1
                         continue
-               k = find_token(document.body, "LatexCommand formatted", i)
-               if k != -1 and k < j:
+               k = find_token(document.body, "LatexCommand formatted", i, j)
+               if k != -1:
                         document.body[k] = "LatexCommand prettyref"
                 i = j + 1
         i = find_token(document.header, "\\use_refstyle", 0)
         if i != -1:
                 document.header.pop(i)
- 
+
  
  def revert_nameref(document):
    " Convert namerefs to regular references "
@@ -1656,7 +1667,7 @@ def revert_nameref(document):
    for cmd in cmds:
      i = 0
      oldcmd = "LatexCommand " + cmd
-    while 1:
+    while True:
        # It seems better to look for this, as most of the reference
        # insets won't be ones we care about.
        i = find_token(document.body, oldcmd, i)
@@ -1665,23 +1676,16 @@ def revert_nameref(document):
        cmdloc = i
        i += 1
        # Make sure it is actually in an inset!
-      # We could just check document.lines[i-1], but that relies
-      # upon something that might easily change.
-      # We'll look back a few lines.
-      stins = cmdloc - 10
-      if stins < 0:
-        stins = 0
-      stins = find_token(document.body, "\\begin_inset CommandInset ref", stins)
-      if stins == -1 or stins > cmdloc:
-        continue
-      endins = find_end_of_inset(document.body, stins)
-      if endins == -1:
-        document.warning("Can't find end of inset at line " + stins + "!!")
-        continue
-      if endins < cmdloc:
-        continue
-      refline = find_token(document.body, "reference", stins)
-      if refline == -1 or refline > endins:
+      # A normal line could begin with "LatexCommand nameref"!
+      val = is_in_inset(document.body, cmdloc, \
+          "\\begin_inset CommandInset ref")
+      if not val:
+          continue
+      stins, endins = val
+
+      # ok, so it is in an InsetRef
+      refline = find_token(document.body, "reference", stins, endins)
+      if refline == -1:
          document.warning("Can't find reference for inset at line " + stinst + "!!")
          continue
        m = rx.match(document.body[refline])
@@ -1690,18 +1694,17 @@ def revert_nameref(document):
          continue
        foundone = True
        ref = m.group(1)
-      newcontent = ['\\begin_inset ERT', 'status collapsed', '', \
-        '\\begin_layout Plain Layout', '', '\\backslash', \
-        cmd + '{' + ref + '}', '\\end_layout', '', '\\end_inset']
+      newcontent = put_cmd_in_ert('\\' + cmd + '{' + ref + '}')
        document.body[stins:endins + 1] = newcontent
+
    if foundone:
-    add_to_preamble(document, "\usepackage{nameref}")
+    add_to_preamble(document, ["\\usepackage{nameref}"])
  
  
  def remove_Nameref(document):
    " Convert Nameref commands to nameref commands "
    i = 0
-  while 1:
+  while True:
      # It seems better to look for this, as most of the reference
      # insets won't be ones we care about.
      i = find_token(document.body, "LatexCommand Nameref" , i)
@@ -1709,22 +1712,11 @@ def remove_Nameref(document):
        break
      cmdloc = i
      i += 1
-    
+
      # Make sure it is actually in an inset!
-    # We could just check document.lines[i-1], but that relies
-    # upon something that might easily change.
-    # We'll look back a few lines.
-    stins = cmdloc - 10
-    if stins < 0:
-      stins = 0
-    stins = find_token(document.body, "\\begin_inset CommandInset ref", stins)
-    if stins == -1 or stins > cmdloc:
-      continue
-    endins = find_end_of_inset(document.body, stins)
-    if endins == -1:
-      document.warning("Can't find end of inset at line " + stins + "!!")
-      continue
-    if endins < cmdloc:
+    val = is_in_inset(document.body, cmdloc, \
+        "\\begin_inset CommandInset ref")
+    if not val:
        continue
      document.body[cmdloc] = "LatexCommand nameref"
  
@@ -1732,21 +1724,15 @@ def remove_Nameref(document):
  def revert_mathrsfs(document):
      " Load mathrsfs if \mathrsfs us use in the document "
      i = 0
-    end = len(document.body) - 1
-    while True:
-      j = document.body[i].find("\\mathscr{")
-      if j != -1:
-        add_to_preamble(document, ["% this command was inserted by lyx2lyx"])
+    for line in document.body:
+      if line.find("\\mathscr{") != -1:
          add_to_preamble(document, ["\\usepackage{mathrsfs}"])
-        break
-      if i == end:
-        break
-      i += 1
+        return
  
  
  def convert_flexnames(document):
      "Convert \\begin_inset Flex Custom:Style to \\begin_inset Flex Style and similarly for CharStyle and Element."
-    
+
      i = 0
      rx = re.compile(r'^\\begin_inset Flex (?:Custom|CharStyle|Element):(.+)$')
      while True:
@@ -1759,66 +1745,66 @@ def convert_flexnames(document):
        i += 1
  
  
-flex_insets = [
-  ["Alert", "CharStyle:Alert"],
-  ["Code", "CharStyle:Code"],
-  ["Concepts", "CharStyle:Concepts"],
-  ["E-Mail", "CharStyle:E-Mail"],
-  ["Emph", "CharStyle:Emph"],
-  ["Expression", "CharStyle:Expression"],
-  ["Initial", "CharStyle:Initial"],
-  ["Institute", "CharStyle:Institute"],
-  ["Meaning", "CharStyle:Meaning"],
-  ["Noun", "CharStyle:Noun"],
-  ["Strong", "CharStyle:Strong"],
-  ["Structure", "CharStyle:Structure"],
-  ["ArticleMode", "Custom:ArticleMode"],
-  ["Endnote", "Custom:Endnote"],
-  ["Glosse", "Custom:Glosse"],
-  ["PresentationMode", "Custom:PresentationMode"],
-  ["Tri-Glosse", "Custom:Tri-Glosse"]
-]
-
-flex_elements = [
-  ["Abbrev", "Element:Abbrev"],
-  ["CCC-Code", "Element:CCC-Code"],
-  ["Citation-number", "Element:Citation-number"],
-  ["City", "Element:City"],
-  ["Code", "Element:Code"],
-  ["CODEN", "Element:CODEN"],
-  ["Country", "Element:Country"],
-  ["Day", "Element:Day"],
-  ["Directory", "Element:Directory"],
-  ["Dscr", "Element:Dscr"],
-  ["Email", "Element:Email"],
-  ["Emph", "Element:Emph"],
-  ["Filename", "Element:Filename"],
-  ["Firstname", "Element:Firstname"],
-  ["Fname", "Element:Fname"],
-  ["GuiButton", "Element:GuiButton"],
-  ["GuiMenu", "Element:GuiMenu"],
-  ["GuiMenuItem", "Element:GuiMenuItem"],
-  ["ISSN", "Element:ISSN"],
-  ["Issue-day", "Element:Issue-day"],
-  ["Issue-months", "Element:Issue-months"],
-  ["Issue-number", "Element:Issue-number"],
-  ["KeyCap", "Element:KeyCap"],
-  ["KeyCombo", "Element:KeyCombo"],
-  ["Keyword", "Element:Keyword"],
-  ["Literal", "Element:Literal"],
-  ["MenuChoice", "Element:MenuChoice"],
-  ["Month", "Element:Month"],
-  ["Orgdiv", "Element:Orgdiv"],
-  ["Orgname", "Element:Orgname"],
-  ["Postcode", "Element:Postcode"],
-  ["SS-Code", "Element:SS-Code"],
-  ["SS-Title", "Element:SS-Title"],
-  ["State", "Element:State"],
-  ["Street", "Element:Street"],
-  ["Surname", "Element:Surname"],
-  ["Volume", "Element:Volume"],
-  ["Year", "Element:Year"]
-]
+flex_insets = {
+  "Alert" : "CharStyle:Alert",
+  "Code" : "CharStyle:Code",
+  "Concepts" : "CharStyle:Concepts",
+  "E-Mail" : "CharStyle:E-Mail",
+  "Emph" : "CharStyle:Emph",
+  "Expression" : "CharStyle:Expression",
+  "Initial" : "CharStyle:Initial",
+  "Institute" : "CharStyle:Institute",
+  "Meaning" : "CharStyle:Meaning",
+  "Noun" : "CharStyle:Noun",
+  "Strong" : "CharStyle:Strong",
+  "Structure" : "CharStyle:Structure",
+  "ArticleMode" : "Custom:ArticleMode",
+  "Endnote" : "Custom:Endnote",
+  "Glosse" : "Custom:Glosse",
+  "PresentationMode" : "Custom:PresentationMode",
+  "Tri-Glosse" : "Custom:Tri-Glosse"
+}
+
+flex_elements = {
+  "Abbrev" : "Element:Abbrev",
+  "CCC-Code" : "Element:CCC-Code",
+  "Citation-number" : "Element:Citation-number",
+  "City" : "Element:City",
+  "Code" : "Element:Code",
+  "CODEN" : "Element:CODEN",
+  "Country" : "Element:Country",
+  "Day" : "Element:Day",
+  "Directory" : "Element:Directory",
+  "Dscr" : "Element:Dscr",
+  "Email" : "Element:Email",
+  "Emph" : "Element:Emph",
+  "Filename" : "Element:Filename",
+  "Firstname" : "Element:Firstname",
+  "Fname" : "Element:Fname",
+  "GuiButton" : "Element:GuiButton",
+  "GuiMenu" : "Element:GuiMenu",
+  "GuiMenuItem" : "Element:GuiMenuItem",
+  "ISSN" : "Element:ISSN",
+  "Issue-day" : "Element:Issue-day",
+  "Issue-months" : "Element:Issue-months",
+  "Issue-number" : "Element:Issue-number",
+  "KeyCap" : "Element:KeyCap",
+  "KeyCombo" : "Element:KeyCombo",
+  "Keyword" : "Element:Keyword",
+  "Literal" : "Element:Literal",
+  "MenuChoice" : "Element:MenuChoice",
+  "Month" : "Element:Month",
+  "Orgdiv" : "Element:Orgdiv",
+  "Orgname" : "Element:Orgname",
+  "Postcode" : "Element:Postcode",
+  "SS-Code" : "Element:SS-Code",
+  "SS-Title" : "Element:SS-Title",
+  "State" : "Element:State",
+  "Street" : "Element:Street",
+  "Surname" : "Element:Surname",
+  "Volume" : "Element:Volume",
+  "Year" : "Element:Year"
+}
  
  
  def revert_flexnames(document):
@@ -1826,7 +1812,7 @@ def revert_flexnames(document):
      flexlist = flex_insets
    else:
      flexlist = flex_elements
-  
+
    rx = re.compile(r'^\\begin_inset Flex\s+(.+)$')
    i = 0
    while True:
@@ -1838,146 +1824,156 @@ def revert_flexnames(document):
        document.warning("Illegal flex inset: " + document.body[i])
        i += 1
        continue
-    
      style = m.group(1)
-    for f in flexlist:
-      if f[0] == style:
-        document.body[i] = "\\begin_inset Flex " + f[1]
-        break
-
+    if style in flexlist:
+      document.body[i] = "\\begin_inset Flex " + flexlist[style]
      i += 1
  
  
  def convert_mathdots(document):
      " Load mathdots automatically "
-    while True:
-      i = find_token(document.header, "\\use_esint" , 0)
-      if i != -1:
-        document.header.insert(i + 1, "\\use_mathdots 1")
-      break
+    i = find_token(document.header, "\\use_mhchem" , 0)
+    if i == -1:
+        i = find_token(document.header, "\\use_esint" , 0)
+    if i == -1:
+        document.warning("Malformed LyX document: Can't find \\use_mhchem.")
+        return;
+    j = find_token(document.preamble, "\\usepackage{mathdots}", 0)
+    if j == -1:
+        document.header.insert(i + 1, "\\use_mathdots 0")
+    else:
+        document.header.insert(i + 1, "\\use_mathdots 2")
+        del document.preamble[j]
  
  
  def revert_mathdots(document):
      " Load mathdots if used in the document "
-    i = 0
-    ddots = re.compile(r'\\begin_inset Formula .*\\ddots', re.DOTALL)
-    vdots = re.compile(r'\\begin_inset Formula .*\\vdots', re.DOTALL)
-    iddots = re.compile(r'\\begin_inset Formula .*\\iddots', re.DOTALL)
+
      mathdots = find_token(document.header, "\\use_mathdots" , 0)
-    no = find_token(document.header, "\\use_mathdots 0" , 0)
-    auto = find_token(document.header, "\\use_mathdots 1" , 0)
-    yes = find_token(document.header, "\\use_mathdots 2" , 0)
-    if mathdots != -1:
+    if mathdots == -1:
+      document.warning("No \\use_mathdots line. Assuming auto.")
+    else:
+      val = get_value(document.header, "\\use_mathdots", mathdots)
        del document.header[mathdots]
+      try:
+        usedots = int(val)
+      except:
+        document.warning("Invalid \\use_mathdots value: " + val + ". Assuming auto.")
+        # probably usedots has not been changed, but be safe.
+        usedots = 1
+
+      if usedots == 0:
+        # do not load case
+        return
+      if usedots == 2:
+        # force load case
+        add_to_preamble(document, ["\\usepackage{mathdots}"])
+        return
+
+    # so we are in the auto case. we want to load mathdots if \iddots is used.
+    i = 0
      while True:
        i = find_token(document.body, '\\begin_inset Formula', i)
        if i == -1:
          return
        j = find_end_of_inset(document.body, i)
        if j == -1:
-        document.warning("Malformed LyX document: Can't find end of Formula inset.")
-        return 
-      k = ddots.search("\n".join(document.body[i:j]))
-      l = vdots.search("\n".join(document.body[i:j]))
-      m = iddots.search("\n".join(document.body[i:j]))
-      if (yes == -1) and ((no != -1) or (not k and not l and not m) or (auto != -1 and not m)):
+        document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(i))
          i += 1
          continue
-      # use \@ifundefined to catch also the "auto" case
-      add_to_preamble(document, ["% this command was inserted by lyx2lyx"])
-      add_to_preamble(document, ["\\@ifundefined{iddots}{\\usepackage{mathdots}}\n"])
-      return
+      code = "\n".join(document.body[i:j])
+      if code.find("\\iddots") != -1:
+        add_to_preamble(document, ["\\@ifundefined{iddots}{\\usepackage{mathdots}}"])
+        return
+      i = j
  
  
  def convert_rule(document):
-    " Convert \\lyxline to CommandInset line "
+    " Convert \\lyxline to CommandInset line. "
      i = 0
+
+    inset = ['\\begin_inset CommandInset line',
+      'LatexCommand rule',
+      'offset "0.5ex"',
+      'width "100line%"',
+      'height "1pt"', '',
+      '\\end_inset', '', '']
+
+    # if paragraphs are indented, we may have to unindent to get the
+    # line to be full-width.
+    indent = get_value(document.header, "\\paragraph_separation", 0)
+    have_indent = (indent == "indent")
+
      while True:
        i = find_token(document.body, "\\lyxline" , i)
        if i == -1:
          return
-        
-      j = find_token(document.body, "\\color" , i - 2)
-      if j == i - 2:
-        color = document.body[j] + '\n'
-      else:
-        color = ''
-      k = find_token(document.body, "\\begin_layout Standard" , i - 4)
-      # we need to handle the case that \lyxline is in a separate paragraph and that it is colored
-      # the result is then an extra empty paragraph which we get by adding an empty ERT inset
-      if k == i - 4 and j == i - 2 and document.body[i - 1] == '':
-        layout = '\\begin_inset ERT\nstatus collapsed\n\n\\begin_layout Plain Layout\n\n\n\\end_layout\n\n\\end_inset\n' \
-          + '\\end_layout\n\n' \
-          + '\\begin_layout Standard\n'
-      elif k == i - 2 and document.body[i - 1] == '':
-        layout = ''
-      else:
-        layout = '\\end_layout\n\n' \
-          + '\\begin_layout Standard\n'
-      l = find_token(document.body, "\\begin_layout Standard" , i + 4)
-      if l == i + 4 and document.body[i + 1] == '':
-        layout2 = ''
+
+      # we need to find out if this line follows other content
+      # in its paragraph. find its layout....
+      lastlay = find_token_backwards(document.body, "\\begin_layout", i)
+      if lastlay == -1:
+        document.warning("Can't find layout for line at " + str(i))
+        # do the best we can.
+        document.body[i:i+1] = inset
+        i += len(inset)
+        continue
+
+      # ...and look for other content before it.
+      lineisfirst = True
+      for line in document.body[lastlay + 1:i]:
+        # is it empty or a paragraph option?
+        if not line or line[0] == '\\':
+          continue
+        lineisfirst = False
+        break
+
+      if lineisfirst:
+        document.body[i:i+1] = inset
+        if indent:
+          # we need to unindent, lest the line be too long
+          document.body.insert(lastlay + 1, "\\noindent")
+        i += len(inset)
        else:
-        layout2 = '\\end_layout\n' \
-          + '\n\\begin_layout Standard\n'
-      subst = layout \
-        + '\\noindent\n\n' \
-        + color \
-        + '\\begin_inset CommandInset line\n' \
-        + 'LatexCommand rule\n' \
-        + 'offset "0.5ex"\n' \
-        + 'width "100line%"\n' \
-        + 'height "1pt"\n' \
-        + '\n\\end_inset\n\n\n' \
-        + layout2
-      document.body[i] = subst
-      i += 1
+        # so our line is in the middle of a paragraph
+        # we need to add a new line, lest this line follow the
+        # other content on that line and run off the side of the page
+        document.body[i:i+1] = inset
+        document.body[i:i] = ["\\begin_inset Newline newline", "\\end_inset", ""]
+      i += len(inset)
  
  
  def revert_rule(document):
      " Revert line insets to Tex code "
      i = 0
-    while 1:
+    while True:
        i = find_token(document.body, "\\begin_inset CommandInset line" , i)
        if i == -1:
          return
        # find end of inset
        j = find_token(document.body, "\\end_inset" , i)
-      # assure we found the end_inset of the current inset
-      if j > i + 6 or j == -1:
+      if j == -1:
          document.warning("Malformed LyX document: Can't find end of line inset.")
          return
        # determine the optional offset
-      k = find_token(document.body, 'offset', i, j)
-      if k != -1:
-        offset = document.body[k][8:-1]
-      else:
-        offset = ""
+      offset = get_quoted_value(document.body, 'offset', i, j)
+      if offset:
+        offset = '[' + offset + ']'
        # determine the width
-      l = find_token(document.body, 'width', i, j)
-      if l != -1:
-        width = document.body[l][7:-1]
-      else:
-        width = "100col%"
+      width = get_quoted_value(document.body, 'width', i, j, "100col%")
+      width = latex_length(width)[1]
        # determine the height
-      m = find_token(document.body, 'height', i, j)
-      if m != -1:
-        height = document.body[m][8:-1]
-      else:
-        height = "1pt"
+      height = get_quoted_value(document.body, 'height', i, j, "1pt")
+      height = latex_length(height)[1]
        # output the \rule command
-      if offset:
-        subst = "\\rule[" + offset + "]{" + width + "}{" + height + "}"
-      else:
-        subst = "\\rule{" + width + "}{" + height + "}"
+      subst = "\\rule[" + offset + "]{" + width + "}{" + height + "}"
        document.body[i:j + 1] = put_cmd_in_ert(subst)
-      i += 1
+      i += len(subst) - (j - i)
  
  
  def revert_diagram(document):
    " Add the feyn package if \\Diagram is used in math "
    i = 0
-  re_diagram = re.compile(r'\\begin_inset Formula .*\\Diagram', re.DOTALL)
    while True:
      i = find_token(document.body, '\\begin_inset Formula', i)
      if i == -1:
@@ -1985,25 +1981,42 @@ def revert_diagram(document):
      j = find_end_of_inset(document.body, i)
      if j == -1:
          document.warning("Malformed LyX document: Can't find end of Formula inset.")
-        return 
-    m = re_diagram.search("\n".join(document.body[i:j]))
-    if not m:
-      i += 1
+        return
+    lines = "\n".join(document.body[i:j])
+    if lines.find("\\Diagram") == -1:
+      i = j
        continue
-    add_to_preamble(document, ["% this command was inserted by lyx2lyx"])
-    add_to_preamble(document, "\\usepackage{feyn}")
+    add_to_preamble(document, ["\\usepackage{feyn}"])
      # only need to do it once!
      return
  
+chapters = ("amsbook", "book", "docbook-book", "elsart", "extbook", "extreport",
+    "jbook", "jreport", "jsbook", "literate-book", "literate-report", "memoir",
+    "mwbk", "mwrep", "recipebook", "report", "scrbook", "scrreprt", "svmono",
+    "svmult", "tbook", "treport", "tufte-book")
  
  def convert_bibtex_clearpage(document):
    " insert a clear(double)page bibliographystyle if bibtotoc option is used "
  
+  if document.textclass not in chapters:
+    return
+
    i = find_token(document.header, '\\papersides', 0)
+  sides = 0
    if i == -1:
      document.warning("Malformed LyX document: Can't find papersides definition.")
-    return
-  sides = int(document.header[i][12])
+    document.warning("Assuming single sided.")
+    sides = 1
+  else:
+    val = get_value(document.header, "\\papersides", i)
+    try:
+      sides = int(val)
+    except:
+      pass
+    if sides != 1 and sides != 2:
+      document.warning("Invalid papersides value: " + val)
+      document.warning("Assuming single sided.")
+      sides = 1
  
    j = 0
    while True:
@@ -2018,47 +2031,440 @@ def convert_bibtex_clearpage(document):
        continue
  
      # only act if there is the option "bibtotoc"
-    m = find_token(document.body, 'options', j, k)
-    if m == -1:
+    val = get_value(document.body, 'options', j, k)
+    if not val:
        document.warning("Can't find options for bibliography inset at line " + str(j))
        j = k
        continue
-    
-    optline = document.body[m]
-    idx = optline.find("bibtotoc")
-    if idx == -1:
+
+    if val.find("bibtotoc") == -1:
        j = k
        continue
-    
-    # so we want to insert a new page right before the paragraph that
-    # this bibliography thing is in. we'll look for it backwards.
-    lay = j - 1
-    while lay >= 0:
-      if document.body[lay].startswith("\\begin_layout"):
-        break
-      lay -= 1
  
-    if lay < 0:
+    # so we want to insert a new page right before the paragraph that
+    # this bibliography thing is in.
+    lay = find_token_backwards(document.body, "\\begin_layout", j)
+    if lay == -1:
        document.warning("Can't find layout containing bibliography inset at line " + str(j))
        j = k
        continue
  
-    subst1 = '\\begin_layout Standard\n' \
-      + '\\begin_inset Newpage clearpage\n' \
-      + '\\end_inset\n\n\n' \
-      + '\\end_layout\n'
-    subst2 = '\\begin_layout Standard\n' \
-      + '\\begin_inset Newpage cleardoublepage\n' \
-      + '\\end_inset\n\n\n' \
-      + '\\end_layout\n'
      if sides == 1:
-      document.body.insert(lay, subst1)
-      document.warning(subst1)
+      cmd = "clearpage"
+    else:
+      cmd = "cleardoublepage"
+    subst = ['\\begin_layout Standard',
+        '\\begin_inset Newpage ' + cmd,
+        '\\end_inset', '', '',
+        '\\end_layout', '']
+    document.body[lay:lay] = subst
+    j = k + len(subst)
+
+
+def check_passthru(document):
+  tc = document.textclass
+  ok = (tc == "literate-article" or tc == "literate-book" or tc == "literate-report")
+  if not ok:
+    mods = document.get_module_list()
+    for mod in mods:
+      if mod == "sweave" or mod == "noweb":
+        ok = True
+        break
+  return ok
+
+
+def convert_passthru(document):
+    " http://www.mail-archive.com/lyx-devel@lists.lyx.org/msg161298.html "
+    if not check_passthru:
+      return
+
+    rx = re.compile("\\\\begin_layout \s*(\w+)")
+    beg = 0
+    for lay in ["Chunk", "Scrap"]:
+      while True:
+        beg = find_token(document.body, "\\begin_layout " + lay, beg)
+        if beg == -1:
+          break
+        end = find_end_of_layout(document.body, beg)
+        if end == -1:
+          document.warning("Can't find end of layout at line " + str(beg))
+          beg += 1
+          continue
+
+        # we are now going to replace newline insets within this layout
+        # by new instances of this layout. so we have repeated layouts
+        # instead of newlines.
+
+        # if the paragraph has any customization, however, we do not want to
+        # do the replacement.
+        if document.body[beg + 1].startswith("\\"):
+          beg = end + 1
+          continue
+
+        ns = beg
+        while True:
+          ns = find_token(document.body, "\\begin_inset Newline newline", ns, end)
+          if ns == -1:
+            break
+          ne = find_end_of_inset(document.body, ns)
+          if ne == -1 or ne > end:
+            document.warning("Can't find end of inset at line " + str(nb))
+            ns += 1
+            continue
+          if document.body[ne + 1] == "":
+            ne += 1
+          subst = ["\\end_layout", "", "\\begin_layout " + lay]
+          document.body[ns:ne + 1] = subst
+          # now we need to adjust end, in particular, but might as well
+          # do ns properly, too
+          newlines = (ne - ns) - len(subst)
+          ns += newlines + 2
+          end += newlines + 2
+
+        # ok, we now want to find out if the next layout is the
+        # same as this one. if so, we will insert an extra copy of it
+        didit = False
+        next = find_token(document.body, "\\begin_layout", end)
+        if next != -1:
+          m = rx.match(document.body[next])
+          if m:
+            nextlay = m.group(1)
+            if nextlay == lay:
+              subst = ["\\begin_layout " + lay, "", "\\end_layout", ""]
+              document.body[next:next] = subst
+              didit = True
+        beg = end + 1
+        if didit:
+          beg += 4 # for the extra layout
+
+
+def revert_passthru(document):
+    " http://www.mail-archive.com/lyx-devel@lists.lyx.org/msg161298.html "
+    if not check_passthru:
+      return
+    rx = re.compile("\\\\begin_layout \s*(\w+)")
+    beg = 0
+    for lay in ["Chunk", "Scrap"]:
+      while True:
+        beg = find_token(document.body, "\\begin_layout " + lay, beg)
+        if beg == -1:
+          break
+        end = find_end_of_layout(document.body, beg)
+        if end == -1:
+          document.warning("Can't find end of layout at line " + str(beg))
+          beg += 1
+          continue
+
+        # we now want to find out if the next layout is the
+        # same as this one. but we will need to do this over and
+        # over again.
+        while True:
+          next = find_token(document.body, "\\begin_layout", end)
+          if next == -1:
+            break
+          m = rx.match(document.body[next])
+          if not m:
+            break
+          nextlay = m.group(1)
+          if nextlay != lay:
+            break
+          # so it is the same layout again. we now want to know if it is empty.
+          # but first let's check and make sure there is no content between the
+          # two layouts. i'm not sure if that can happen or not.
+          for l in range(end + 1, next):
+            if document.body[l] != "":
+              document.warning("Found content between adjacent " + lay + " layouts!")
+              break
+          nextend = find_end_of_layout(document.body, next)
+          if nextend == -1:
+            document.warning("Can't find end of layout at line " + str(next))
+            break
+          empty = True
+          for l in range(next + 1, nextend):
+            if document.body[l] != "":
+              empty = False
+              break
+          if empty:
+            # empty layouts just get removed
+            # should we check if it's before yet another such layout?
+            del document.body[next : nextend + 1]
+            # and we do not want to check again. we know the next layout
+            # should be another Chunk and should be left as is.
+            break
+          else:
+            # if it's not empty, then we want to insert a newline in place
+            # of the layout switch
+            subst = ["\\begin_inset Newline newline", "\\end_inset", ""]
+            document.body[end : next + 1] = subst
+            # and now we have to find the end of the new, larger layout
+            newend = find_end_of_layout(document.body, beg)
+            if newend == -1:
+              document.warning("Can't find end of new layout at line " + str(beg))
+              break
+            end = newend
+        beg = end + 1
+
+
+def revert_multirowOffset(document):
+    " Revert multirow cells with offset in tables to TeX-code"
+    # this routine is the same as the revert_multirow routine except that
+    # it checks additionally for the offset
+
+    # first, let's find out if we need to do anything
+    i = find_token(document.body, '<cell multirow="3" mroffset=', 0)
+    if i == -1:
+      return
+
+    add_to_preamble(document, ["\\usepackage{multirow}"])
+
+    rgx = re.compile(r'mroffset="[^"]+?"')
+    begin_table = 0
+
+    while True:
+        # find begin/end of table
+        begin_table = find_token(document.body, '<lyxtabular version=', begin_table)
+        if begin_table == -1:
+            break
+        end_table = find_end_of(document.body, begin_table, '<lyxtabular', '</lyxtabular>')
+        if end_table == -1:
+            document.warning("Malformed LyX document: Could not find end of table.")
+            begin_table += 1
+            continue
+        # does this table have multirow?
+        i = find_token(document.body, '<cell multirow="3"', begin_table, end_table)
+        if i == -1:
+            begin_table = end_table
+            continue
+
+        # store the number of rows and columns
+        numrows = get_option_value(document.body[begin_table], "rows")
+        numcols = get_option_value(document.body[begin_table], "columns")
+        try:
+          numrows = int(numrows)
+          numcols = int(numcols)
+        except:
+          document.warning("Unable to determine rows and columns!")
+          begin_table = end_table
+          continue
+
+        mrstarts = []
+        multirows = []
+        # collect info on rows and columns of this table.
+        begin_row = begin_table
+        for row in range(numrows):
+            begin_row = find_token(document.body, '<row>', begin_row, end_table)
+            if begin_row == -1:
+              document.warning("Can't find row " + str(row + 1))
+              break
+            end_row = find_end_of(document.body, begin_row, '<row>', '</row>')
+            if end_row == -1:
+              document.warning("Can't find end of row " + str(row + 1))
+              break
+            begin_cell = begin_row
+            multirows.append([])
+            for column in range(numcols):
+                begin_cell = find_token(document.body, '<cell ', begin_cell, end_row)
+                if begin_cell == -1:
+                  document.warning("Can't find column " + str(column + 1) + \
+                    "in row " + str(row + 1))
+                  break
+                # NOTE
+                # this will fail if someone puts "</cell>" in a cell, but
+                # that seems fairly unlikely.
+                end_cell = find_end_of(document.body, begin_cell, '<cell', '</cell>')
+                if end_cell == -1:
+                  document.warning("Can't find end of column " + str(column + 1) + \
+                    "in row " + str(row + 1))
+                  break
+                multirows[row].append([begin_cell, end_cell, 0])
+                if document.body[begin_cell].find('multirow="3" mroffset=') != -1:
+                  multirows[row][column][2] = 3 # begin multirow
+                  mrstarts.append([row, column])
+                elif document.body[begin_cell].find('multirow="4"') != -1:
+                  multirows[row][column][2] = 4 # in multirow
+                begin_cell = end_cell
+            begin_row = end_row
+        # end of table info collection
+
+        # work from the back to avoid messing up numbering
+        mrstarts.reverse()
+        for m in mrstarts:
+            row = m[0]
+            col = m[1]
+            # get column width
+            col_width = get_option_value(document.body[begin_table + 2 + col], "width")
+            # "0pt" means that no width is specified
+            if not col_width or col_width == "0pt":
+              col_width = "*"
+            # determine the number of cells that are part of the multirow
+            nummrs = 1
+            for r in range(row + 1, numrows):
+                if multirows[r][col][2] != 4:
+                  break
+                nummrs += 1
+                # take the opportunity to revert this line
+                lineno = multirows[r][col][0]
+                document.body[lineno] = document.body[lineno].\
+                  replace(' multirow="4" ', ' ').\
+                  replace('valignment="middle"', 'valignment="top"').\
+                  replace(' topline="true" ', ' ')
+                # remove bottom line of previous multirow-part cell
+                lineno = multirows[r-1][col][0]
+                document.body[lineno] = document.body[lineno].replace(' bottomline="true" ', ' ')
+            # revert beginning cell
+            bcell = multirows[row][col][0]
+            ecell = multirows[row][col][1]
+            offset = get_option_value(document.body[bcell], "mroffset")
+            document.body[bcell] = document.body[bcell].\
+              replace(' multirow="3" ', ' ').\
+              replace('valignment="middle"', 'valignment="top"')
+            # remove mroffset option
+            document.body[bcell] = rgx.sub('', document.body[bcell])
+
+            blay = find_token(document.body, "\\begin_layout", bcell, ecell)
+            if blay == -1:
+              document.warning("Can't find layout for cell!")
+              continue
+            bend = find_end_of_layout(document.body, blay)
+            if bend == -1:
+              document.warning("Can't find end of layout for cell!")
+              continue
+            # do the later one first, so as not to mess up the numbering
+            # we are wrapping the whole cell in this ert
+            # so before the end of the layout...
+            document.body[bend:bend] = put_cmd_in_ert("}")
+            # ...and after the beginning
+            document.body[blay + 1:blay + 1] = \
+              put_cmd_in_ert("\\multirow{" + str(nummrs) + "}{" + col_width + "}[" \
+                  + offset + "]{")
+
+        # on to the next table
+        begin_table = end_table
+
+
+def revert_script(document):
+    " Convert subscript/superscript inset to TeX code "
+    i = 0
+    foundsubscript = False
+    while True:
+        i = find_token(document.body, '\\begin_inset script', i)
+        if i == -1:
+            break
+        z = find_end_of_inset(document.body, i)
+        if z == -1:
+            document.warning("Malformed LyX document: Can't find end of script inset.")
+            i += 1
+            continue
+        blay = find_token(document.body, "\\begin_layout", i, z)
+        if blay == -1:
+            document.warning("Malformed LyX document: Can't find layout in script inset.")
+            i = z
+            continue
+
+        if check_token(document.body[i], "\\begin_inset script subscript"):
+            subst = '\\textsubscript{'
+            foundsubscript = True
+        elif check_token(document.body[i], "\\begin_inset script superscript"):
+            subst = '\\textsuperscript{'
+        else:
+            document.warning("Malformed LyX document: Unknown type of script inset.")
+            i = z
+            continue
+        bend = find_end_of_layout(document.body, blay)
+        if bend == -1 or bend > z:
+            document.warning("Malformed LyX document: Can't find end of layout in script inset.")
+            i = z
+            continue
+        # remove the \end_layout \end_inset pair
+        document.body[bend:z + 1] = put_cmd_in_ert("}")
+        document.body[i:blay + 1] = put_cmd_in_ert(subst)
+        i += 1
+    # these classes provide a \textsubscript command:
+    # FIXME: Would be nice if we could use the information of the .layout file here
+    classes = ["memoir", "scrartcl", "scrbook", "scrlttr2", "scrreprt"]
+    if foundsubscript and find_token_exact(classes, document.textclass, 0) == -1:
+        add_to_preamble(document, ['\\usepackage{subscript}'])
+
+
+def convert_use_xetex(document):
+    " convert \\use_xetex to \\use_non_tex_fonts "
+    i = find_token(document.header, "\\use_xetex", 0)
+    if i == -1:
+        document.header.insert(-1, "\\use_non_tex_fonts 0")
      else:
-      document.body.insert(lay, subst2)
-      document.warning(subst2)
+        val = get_value(document.header, "\\use_xetex", 0)
+        document.header[i] = "\\use_non_tex_fonts " + val
+
+
+def revert_use_xetex(document):
+    " revert \\use_non_tex_fonts to \\use_xetex "
+    i = 0
+    i = find_token(document.header, "\\use_non_tex_fonts", 0)
+    if i == -1:
+        document.warning("Malformed document. No \\use_non_tex_fonts param!")
+        return
+
+    val = get_value(document.header, "\\use_non_tex_fonts", 0)
+    document.header[i] = "\\use_xetex " + val
+
+
+def revert_labeling(document):
+    koma = ("scrartcl", "scrarticle-beamer", "scrbook", "scrlettr",
+        "scrlttr2", "scrreprt")
+    if document.textclass in koma:
+        return
+    i = 0
+    while True:
+        i = find_token_exact(document.body, "\\begin_layout Labeling", i)
+        if i == -1:
+            return
+        document.body[i] = "\\begin_layout List"
  
-    j = k
+
+def revert_langpack(document):
+    " revert \\language_package parameter "
+    i = 0
+    i = find_token(document.header, "\\language_package", 0)
+    if i == -1:
+        document.warning("Malformed document. No \\language_package param!")
+        return
+
+    del document.header[i]
+
+
+def convert_langpack(document):
+    " Add \\language_package parameter "
+    i = find_token(document.header, "\language" , 0)
+    if i == -1:
+        document.warning("Malformed document. No \\language defined!")
+        return
+
+    document.header.insert(i + 1, "\\language_package default")
+
+
+def revert_tabularwidth(document):
+  i = 0
+  while True:
+    i = find_token(document.body, "\\begin_inset Tabular", i)
+    if i == -1:
+      return
+    j = find_end_of_inset(document.body, i)
+    if j == -1:
+      document.warning("Unable to find end of Tabular inset at line " + str(i))
+      i += 1
+      continue
+    i += 1
+    features = find_token(document.body, "<features", i, j)
+    if features == -1:
+      document.warning("Can't find any features in Tabular inset at line " + str(i))
+      i = j
+      continue
+    if document.body[features].find('alignment="tabularwidth"') != -1:
+      remove_option(document.body, features, 'tabularwidth')
+
+def revert_html_css_as_file(document):
+  if not del_token(document.header, '\\html_css_as_file', 0):
+    document.warning("Malformed LyX document: Missing \\html_css_as_file.")
  
  
  ##
@@ -2091,7 +2497,7 @@ convert = [[346, []],
             [368, []],
             [369, [convert_author_id]],
             [370, []],
-           [371, []],
+           [371, [convert_mhchem]],
             [372, []],
             [373, [merge_gbrief]],
             [374, []],
@@ -2124,10 +2530,28 @@ convert = [[346, []],
             [401, []],
             [402, [convert_bibtex_clearpage]],
             [403, [convert_flexnames]],
-           [404, [convert_prettyref]]
+           [404, [convert_prettyref]],
+           [405, []],
+           [406, [convert_passthru]],
+           [407, []],
+           [408, []],
+           [409, [convert_use_xetex]],
+           [410, []],
+           [411, [convert_langpack]],
+           [412, []],
+           [413, []]
  ]
  
-revert =  [[403, [revert_refstyle]],
+revert =  [[412, [revert_html_css_as_file]],
+           [411, [revert_tabularwidth]],
+           [410, [revert_langpack]],
+           [409, [revert_labeling]],
+           [408, [revert_use_xetex]],
+           [407, [revert_script]],
+           [406, [revert_multirowOffset]],
+           [405, [revert_passthru]],
+           [404, []],
+           [403, [revert_refstyle]],
             [402, [revert_flexnames]],
             [401, []],
             [400, [revert_diagram]],
@@ -2139,7 +2563,7 @@ revert =  [[403, [revert_refstyle]],
             [394, [revert_DIN_C_pagesizes]],
             [393, [revert_makebox]],
             [392, [revert_argument]],
-           [391, [],
+           [391, []],
             [390, [revert_align_decimal, revert_IEEEtran]],
             [389, [revert_output_sync]],
             [388, [revert_html_quotes]],