less fingerpainting involved now. and no warnings...

[lyx.git] / lib / lyx2lyx / lyx_1_5.py
diff --git a/lib/lyx2lyx/lyx_1_5.py b/lib/lyx2lyx/lyx_1_5.py

index d934d80e9abc5397512423fbafeb4a55133e46a0..05fd93fdc23688b55898a09e5aa18a2d13812188 100644 (file)
--- a/lib/lyx2lyx/lyx_1_5.py
+++ b/lib/lyx2lyx/lyx_1_5.py
@@ -246,12 +246,19 @@ document.encoding must be set to the old value (format 248) in both cases.
  We do this here and not in LyX.py because it is far easier to do the
  necessary parsing in modern formats than in ancient ones.
  """
+    inset_types = ["Foot", "Note"]
      if document.cjk_encoding != '':
          return
      encoding_stack = [document.encoding]
+    insets = []
      lang_re = re.compile(r"^\\lang\s(\S+)")
+    inset_re = re.compile(r"^\\begin_inset\s(\S+)")
+    if not forward: # no need to read file unless we are reverting
+        spec_chars = read_unicodesymbols()
+
      if document.inputencoding == "auto" or document.inputencoding == "default":
-        for i in range(len(document.body)):
+        i = 0
+        while i < len(document.body):
              result = lang_re.match(document.body[i])
              if result:
                  language = result.group(1)
@@ -264,7 +271,11 @@ necessary parsing in modern formats than in ancient ones.
                      encoding_stack[-1] = lang[language][3]
              elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
                  document.warning("Adding nested encoding %s." % encoding_stack[-1], 3)
-                encoding_stack.append(encoding_stack[-1])
+                if len(insets) > 0 and insets[-1] in inset_types:
+                    from lyx2lyx_lang import lang
+                    encoding_stack.append(lang[document.language][3])
+                else:
+                    encoding_stack.append(encoding_stack[-1])
              elif find_token(document.body, "\\end_layout", i, i + 1) == i:
                  document.warning("Removing nested encoding %s." % encoding_stack[-1], 3)
                  if len(encoding_stack) == 1:
@@ -272,6 +283,14 @@ necessary parsing in modern formats than in ancient ones.
                      document.warning("Malformed LyX document: Unexpected `\\end_layout'.")
                  else:
                      del encoding_stack[-1]
+            elif find_token(document.body, "\\begin_inset", i, i + 1) == i:
+                inset_result = inset_re.match(document.body[i])
+                if inset_result:
+                    insets.append(inset_result.group(1))
+                else: 
+                    insets.append("")
+            elif find_token(document.body, "\\end_inset", i, i + 1) == i:
+                del insets[-1]
              if encoding_stack[-1] != document.encoding:
                  if forward:
                      # This line has been incorrectly interpreted as if it was
@@ -282,13 +301,19 @@ necessary parsing in modern formats than in ancient ones.
                      # with the correct encoding.
                      document.body[i] = orig.decode(encoding_stack[-1])
                  else:
-                    # Convert unicode to the 8bit string that will be written
-                    # to the file with the correct encoding.
-                    orig = document.body[i].encode(encoding_stack[-1])
-                    # Convert the 8bit string that will be written to the
-                    # file to fake unicode with the encoding that will later
-                    # be used when writing to the file.
-                    document.body[i] = orig.decode(document.encoding)
+                    try:
+                        # Convert unicode to the 8bit string that will be written
+                        # to the file with the correct encoding.
+                        orig = document.body[i].encode(encoding_stack[-1])
+                        # Convert the 8bit string that will be written to the
+                        # file to fake unicode with the encoding that will later
+                        # be used when writing to the file.
+                        document.body[i] = orig.decode(document.encoding)
+                    except:
+                        mod_line = revert_unicode_line(document, i, insets, spec_chars)
+                        document.body[i:i+1] = mod_line.split('\n')
+                        i += len(mod_line.split('\n')) - 1
+            i += 1
  
  
  def convert_utf8(document):
@@ -309,6 +334,130 @@ def revert_utf8(document):
      convert_multiencoding(document, False)
  
  
+def read_unicodesymbols():
+    " Read the unicodesymbols list of unicode characters and corresponding commands."
+    pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
+    fp = open(os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols'))
+    spec_chars = {}
+    for line in fp.readlines():
+        if line[0] != '#':
+            line=line.replace(' "',' ') # remove all quotation marks with spaces before
+            line=line.replace('" ',' ') # remove all quotation marks with spaces after
+            line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis)
+            try:
+                # flag1 and flag2 are preamble and other flags
+                [ucs4,command,flag1,flag2] =line.split(None,3)
+                spec_chars[unichr(eval(ucs4))] = [command, flag1, flag2]
+            except:
+                pass
+    fp.close()
+    return spec_chars
+
+
+def revert_unicode_line(document, i, insets, spec_chars, replacement_character = '???'):
+    # Define strings to start and end ERT and math insets
+    ert_intro='\n\n\\begin_inset ERT\nstatus collapsed\n\\begin_layout %s\n\\backslash\n' % document.default_layout
+    ert_outro='\n\\end_layout\n\n\\end_inset\n'
+    math_intro='\n\\begin_inset Formula $'
+    math_outro='$\n\\end_inset'
+
+    mod_line = u''
+    if i and not is_inset_line(document, i-1):
+        last_char = document.body[i - 1][-1:]
+    else:
+        last_char = ''
+
+    line = document.body[i]
+    for character in line:
+        try:
+            # Try to write the character
+            dummy = character.encode(document.encoding)
+            mod_line += character
+            last_char = character
+        except:
+            # Try to replace with ERT/math inset
+            if spec_chars.has_key(character):
+                command = spec_chars[character][0] # the command to replace unicode
+                flag1 = spec_chars[character][1]
+                flag2 = spec_chars[character][2]
+                if flag1.find('combining') > -1 or flag2.find('combining') > -1:
+                    # We have a character that should be combined with the previous
+                    command += '{' + last_char + '}'
+                    # Remove the last character. Ignore if it is whitespace
+                    if len(last_char.rstrip()):
+                        # last_char was found and is not whitespace
+                        if mod_line:
+                            mod_line = mod_line[:-1]
+                        else: # last_char belongs to the last line
+                            document.body[i-1] = document.body[i-1][:-1]
+                    else:
+                        # The last character was replaced by a command. For now it is
+                        # ignored. This could be handled better.
+                        pass
+                if command[0:2] == '\\\\':
+                    if command[2:12]=='ensuremath':
+                        if insets and insets[-1] == "ERT":
+                            # math in ERT
+                            command = command.replace('\\\\ensuremath{\\\\', '$\n\\backslash\n')
+                            command = command.replace('}', '$\n')
+                        elif not insets or insets[-1] != "Formula":
+                            # add a math inset with the replacement character
+                            command = command.replace('\\\\ensuremath{\\', math_intro)
+                            command = command.replace('}', math_outro)
+                        else:
+                            # we are already in a math inset
+                            command = command.replace('\\\\ensuremath{\\', '')
+                            command = command.replace('}', '')
+                    else:
+                        if insets and insets[-1] == "Formula":
+                            # avoid putting an ERT in a math; instead put command as text
+                            command = command.replace('\\\\', '\mathrm{')
+                            command = command + '}'
+                        elif not insets or insets[-1] != "ERT":
+                            # add an ERT inset with the replacement character
+                            command = command.replace('\\\\', ert_intro)
+                            command = command + ert_outro
+                        else:
+                            command = command.replace('\\\\', '\n\\backslash\n')
+                    last_char = '' # indicate that the character should not be removed
+                mod_line += command
+            else:
+                # Replace with replacement string
+                mod_line += replacement_character
+    return mod_line
+
+
+def revert_unicode(document):
+    '''Transform unicode characters that can not be written using the
+document encoding to commands according to the unicodesymbols
+file. Characters that can not be replaced by commands are replaced by
+an replacement string.  Flags other than 'combined' are currently not
+implemented.'''
+    spec_chars = read_unicodesymbols()
+    insets = [] # list of active insets
+
+    # Go through the document to capture all combining characters
+    i = 0
+    while i < len(document.body):
+        line = document.body[i]
+        # Check for insets
+        if line.find('\\begin_inset') > -1:
+            insets.append(line[13:].split()[0])
+        if line.find('\\end_inset') > -1:
+            del insets[-1]
+        
+        # Try to write the line
+        try:
+            # If all goes well the line is written here
+            dummy = line.encode(document.encoding)
+            i += 1
+        except:
+            # Error, some character(s) in the line need to be replaced
+            mod_line = revert_unicode_line(document, i, insets, spec_chars)
+            document.body[i:i+1] = mod_line.split('\n')
+            i += len(mod_line.split('\n'))
+
+
  def revert_cs_label(document):
      " Remove status flag of charstyle label. "
      i = 0
@@ -444,6 +593,13 @@ def convert_commandparams(document):
              i = i + 1
              continue
  
+        j = find_token(document.body, "\\end_inset", i + 1)
+        if j == -1:
+            document.warning("Malformed document")
+        else:
+            command += "".join(document.body[i+1:j])
+            document.body[i+1:j] = []
+
          # The following parser is taken from the original InsetCommandParams::scanCommand
          name = ""
          option1 = ""
@@ -497,17 +653,17 @@ def convert_commandparams(document):
              if commandparams_info[name][0] == "":
                  document.warning("Ignoring invalid option `%s' of command `%s'." % (option1, name))
              else:
-                lines.append('%s "%s"' % (commandparams_info[name][0], option1.replace('"', '\\"')))
+                lines.append('%s "%s"' % (commandparams_info[name][0], option1.replace('\\', '\\\\').replace('"', '\\"')))
          if option2 != "":
              if commandparams_info[name][1] == "":
                  document.warning("Ignoring invalid second option `%s' of command `%s'." % (option2, name))
              else:
-                lines.append('%s "%s"' % (commandparams_info[name][1], option2.replace('"', '\\"')))
+                lines.append('%s "%s"' % (commandparams_info[name][1], option2.replace('\\', '\\\\').replace('"', '\\"')))
          if argument != "":
              if commandparams_info[name][2] == "":
                  document.warning("Ignoring invalid argument `%s' of command `%s'." % (argument, name))
              else:
-                lines.append('%s "%s"' % (commandparams_info[name][2], argument.replace('"', '\\"')))
+                lines.append('%s "%s"' % (commandparams_info[name][2], argument.replace('\\', '\\\\').replace('"', '\\"')))
          document.body[i:i+1] = lines
          i = i + 1
  
@@ -534,13 +690,13 @@ def revert_commandparams(document):
                      preview_line = document.body[k]
                  elif (commandparams_info[name][0] != "" and
                        pname == commandparams_info[name][0]):
-                    option1 = pvalue.strip('"').replace('\\"', '"')
+                    option1 = pvalue.strip('"').replace('\\"', '"').replace('\\\\', '\\')
                  elif (commandparams_info[name][1] != "" and
                        pname == commandparams_info[name][1]):
-                    option2 = pvalue.strip('"').replace('\\"', '"')
+                    option2 = pvalue.strip('"').replace('\\"', '"').replace('\\\\', '\\')
                  elif (commandparams_info[name][2] != "" and
                        pname == commandparams_info[name][2]):
-                    argument = pvalue.strip('"').replace('\\"', '"')
+                    argument = pvalue.strip('"').replace('\\"', '"').replace('\\\\', '\\')
              elif document.body[k].strip() != "":
                  document.warning("Ignoring unknown contents `%s' in command inset %s." % (document.body[k], name))
          if name == "bibitem":
@@ -868,7 +1024,7 @@ accent_map = {
      "=" : u'\u0304', # macron
      "u" : u'\u0306', # breve
      "." : u'\u0307', # dot above
-    "\"": u'\u0308', # diaresis
+    "\"": u'\u0308', # diaeresis
      "r" : u'\u030a', # ring above
      "H" : u'\u030b', # double acute
      "v" : u'\u030c', # caron
@@ -921,7 +1077,7 @@ def _convert_accent(accent, accented_char):
          return ''
      a = accent_map.get(type)
      if a:
-        return unicodedata.normalize("NFKC", "%s%s" % (char, a))
+        return unicodedata.normalize("NFC", "%s%s" % (char, a))
      return ''
  
  
@@ -994,6 +1150,33 @@ def convert_accent(document):
          i += 3
  
  
+def is_inset_line(document, i):
+    """ Line i of body has an inset """
+    if document.body[i][:1] == '\\':
+        return True
+    last_tokens = "".join(document.body[i].split()[-2:])
+    return last_tokens.find('\\') != -1
+
+
+# A wrapper around normalize that handles special cases (cf. bug 3313)
+def normalize(form, text):
+    # do not normalize OHM, ANGSTROM
+    keep_characters = [0x2126,0x212b]
+    result = ''
+    convert = ''
+    for i in text:
+        if ord(i) in keep_characters:
+            if len(convert) > 0:
+                result = result + unicodedata.normalize(form, convert)
+                convert = ''
+            result = result + i
+        else:
+            convert = convert + i
+    if len(convert) > 0:
+        result = result + unicodedata.normalize(form, convert)
+    return result
+
+
  def revert_accent(document):
      inverse_accent_map = {}
      for k in accent_map:
@@ -1009,35 +1192,35 @@ def revert_accent(document):
      # words before unicode normalization.
      # We do this only if the next line starts with an accent, otherwise we
      # would create things like '\begin_inset ERTstatus'.
-    numberoflines = len(document.body)
-    for i in range(numberoflines-1):
+    for i in range(len(document.body) - 1):
          if document.body[i] == '' or document.body[i+1] == '' or document.body[i][-1] == ' ':
              continue
-        if (document.body[i+1][0] in inverse_accent_map):
+        if (document.body[i+1][0] in inverse_accent_map and not is_inset_line(document, i)):
              # the last character of this line and the first of the next line
-            # form probably a surrogate pair.
+            # form probably a surrogate pair, inline insets are excluded (second part of the test)
              while (len(document.body[i+1]) > 0 and document.body[i+1][0] != ' '):
                  document.body[i] += document.body[i+1][0]
                  document.body[i+1] = document.body[i+1][1:]
  
      # Normalize to "Normal form D" (NFD, also known as canonical decomposition).
      # This is needed to catch all accented characters.
-    for i in range(numberoflines):
+    for i in range(len(document.body)):
          # Unfortunately we have a mixture of unicode strings and plain strings,
          # because we never use u'xxx' for string literals, but 'xxx'.
          # Therefore we may have to try two times to normalize the data.
          try:
-            document.body[i] = unicodedata.normalize("NFKD", document.body[i])
+            document.body[i] = normalize("NFD", document.body[i])
          except TypeError:
-            document.body[i] = unicodedata.normalize("NFKD", unicode(document.body[i], 'utf-8'))
+            document.body[i] = normalize("NFD", unicode(document.body[i], 'utf-8'))
  
      # Replace accented characters with InsetLaTeXAccent
      # Do not convert characters that can be represented in the chosen
      # encoding.
      encoding_stack = [get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)]
      lang_re = re.compile(r"^\\lang\s(\S+)")
-    for i in range(len(document.body)):
  
+    i = 0
+    while i < len(document.body):
          if (document.inputencoding == "auto" or document.inputencoding == "default") and document.cjk_encoding != '':
              # Track the encoding of the current line
              result = lang_re.match(document.body[i])
@@ -1068,12 +1251,9 @@ def revert_accent(document):
                  except UnicodeEncodeError:
                      # Insert the rest of the line as new line
                      if j < len(document.body[i]) - 1:
-                        document.body[i+1:i+1] = document.body[i][j+1:]
+                        document.body.insert(i+1, document.body[i][j+1:])
                      # Delete the accented character
-                    if j > 0:
-                        document.body[i] = document.body[i][:j-1]
-                    else:
-                        document.body[i] = u''
+                    document.body[i] = document.body[i][:j]
                      # Finally add the InsetLaTeXAccent
                      document.body[i] += "\\i \\%s{}" % inverse_special_accent_map[accent]
                      break
@@ -1086,22 +1266,21 @@ def revert_accent(document):
                      accented_char = inverse_accented_map[accented_char]
                  accent = document.body[i][j]
                  try:
-                    dummy = unicodedata.normalize("NFKC", accented_char + accent).encode(encoding_stack[-1])
+                    dummy = normalize("NFC", accented_char + accent).encode(encoding_stack[-1])
                  except UnicodeEncodeError:
                      # Insert the rest of the line as new line
                      if j < len(document.body[i]) - 1:
-                        document.body[i+1:i+1] = document.body[i][j+1:]
+                        document.body.insert(i+1, document.body[i][j+1:])
                      # Delete the accented characters
-                    if j > 1:
-                        document.body[i] = document.body[i][:j-2]
-                    else:
-                        document.body[i] = u''
+                    document.body[i] = document.body[i][:j-1]
                      # Finally add the InsetLaTeXAccent
                      document.body[i] += "\\i \\%s{%s}" % (inverse_accent_map[accent], accented_char)
                      break
+        i = i + 1
+
      # Normalize to "Normal form C" (NFC, pre-composed characters) again
-    for i in range(numberoflines):
-        document.body[i] = unicodedata.normalize("NFKC", document.body[i])
+    for i in range(len(document.body)):
+        document.body[i] = normalize("NFC", document.body[i])
  
  
  def normalize_font_whitespace_259(document):
@@ -1372,17 +1551,15 @@ def convert_graphics_rotation(document):
              document.warning("Malformed LyX document: Could not find end of graphics inset.")
          # Seach for rotateAngle and width or height or scale
          # If these params are not there, nothing needs to be done.
-        # FIXME: this also inserts scaleBeforeRotation if "rotateAngle" is not there!
-        for k in range(i+1, j):
-            if (document.body[k].find("rotateAngle") and \
-                (document.body[k].find("width") or \
-                document.body[k].find("height") or \
-                document.body[k].find("scale"))):
-                        document.body.insert(j, 'scaleBeforeRotation')
+        k = find_token(document.body, "\trotateAngle", i + 1, j)
+        l = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j)
+        if (k != -1 and l != -1):
+            document.body.insert(j, 'scaleBeforeRotation')
          i = i + 1
  
  
-# FIXME: does not work at all
+#
+# remove scaleBeforeRotation graphics param
  def revert_graphics_rotation(document):
      " remove scaleBeforeRotation graphics parameter. "
      i = 0
@@ -1394,24 +1571,27 @@ def revert_graphics_rotation(document):
          if j == -1:
              # should not happen
              document.warning("Malformed LyX document: Could not find end of graphics inset.")
-        for k in range(i+1, j):
-            # If there's a scaleBeforeRotation param, just remove that
-            if document.body[k].find('scaleBeforeRotation'):
-                del document.body[k]
-                break
+        # If there's a scaleBeforeRotation param, just remove that
+        k = find_token(document.body, "\tscaleBeforeRotation", i + 1, j)
+        if k != -1:
+            del document.body[k]
+        else:
              # if not, and if we have rotateAngle and width or height or scale,
              # we have to put the rotateAngle value to special
-            rotateAngle = get_value(document.body, 'rotateAngle', i+1, j)
-            special = get_value(document.body, 'special', i+1, j)
-            if (document.body[k].find("width") or \
-                document.body[k].find("height") or \
-                document.body[k].find("scale") and \
-                document.body[k].find("rotateAngle")):
-                    if special == "":
-                        document.body.insert(j-1, '\tspecial angle=%s' % rotateAngle)
-                    else:
-                        l = find_token(document.body, "special", i+1, j)
-                        document.body[l].replace(special, 'angle=%s,%s' % (rotateAngle, special))
+            rotateAngle = get_value(document.body, 'rotateAngle', i + 1, j)
+            special = get_value(document.body, 'special', i + 1, j)
+            if rotateAngle != "":
+                k = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j)
+                if k == -1:
+                    break
+                if special == "":
+                    document.body.insert(j-1, '\tspecial angle=%s' % rotateAngle)
+                else:
+                    l = find_token(document.body, "\tspecial", i + 1, j)
+                    document.body[l] = document.body[l].replace(special, 'angle=%s,%s' % (rotateAngle, special))
+                k = find_token(document.body, "\trotateAngle", i + 1, j)
+                if k != -1:
+                    del document.body[k]
          i = i + 1
  
  
@@ -1608,7 +1788,7 @@ after label
          inlinecode = ''
          # looking for the oneline code for lstinline
          inlinecode = document.body[find_end_of_layout(document.body, 
-            find_token(document.body, '\\begin_layout Standard', i + 1) +1 ) - 1]
+            find_token(document.body,  '\\begin_layout %s' % document.default_layout, i + 1) +1 ) - 1]
          if len(caption) > 0:
              if len(params) == 0:
                  params = 'caption={%s}' % caption
@@ -1625,7 +1805,7 @@ after label
          if inline == 'true':
              document.body[i:(j+1)] = [r'\begin_inset ERT',
                                        'status %s' % status,
-                                      r'\begin_layout Standard',
+                                      r'\begin_layout %s' % document.default_layout,
                                        '', 
                                        '',
                                        r'\backslash',
@@ -1637,15 +1817,17 @@ after label
              document.body[i: j+1] =  [r'\begin_inset ERT',
                                        'status %s' % status,
                                        '',
-                                      r'\begin_layout Standard',
+                                      r'\begin_layout %s' % document.default_layout,
                                        '',
                                        '',
                                        r'\backslash',
                                        r'begin{lstlisting}%s' % params,
-                                      r'\end_layout'
+                                      r'\end_layout',
+                                      '',
+                                      r'\begin_layout %s' % document.default_layout,
                                      ] + document.body[k : j - 1] + \
                                       ['',
-                                      r'\begin_layout Standard',
+                                      r'\begin_layout %s' % document.default_layout,
                                        '',
                                        r'\backslash',
                                        'end{lstlisting}',
@@ -1696,7 +1878,7 @@ lstinputlisting{file}[opt]
          document.body[i : j + 1] = [r'\begin_inset ERT',
                                      'status open',
                                      '',
-                                    r'\begin_layout Standard',
+                                    r'\begin_layout %s' % document.default_layout,
                                      '',
                                      '',
                                      r'\backslash',
@@ -1761,6 +1943,7 @@ def convert_ext_font_sizes(document):
      else:
          del document.header[i]
  
+
  def revert_separator_layout(document):
      r'''Revert --Separator-- to a lyx note
  From
@@ -1795,11 +1978,11 @@ something
          if j == -1:
              # this should not happen
              break
-        document.body[i : j + 1] = [r'\begin_layout Standard',
+        document.body[i : j + 1] = [r'\begin_layout %s' % document.default_layout,
                                      r'\begin_inset Note Note',
                                      'status open',
                                      '',
-                                    r'\begin_layout Standard',
+                                    r'\begin_layout %s' % document.default_layout,
                                      'Separate Environment',
                                      r'\end_layout',
                                      '',
@@ -1809,6 +1992,7 @@ something
                                      r'\end_layout'
                                      ]
  
+
  def convert_arabic (document):
      if document.language == "arabic":
          document.language = "arabic_arabtex"
@@ -1822,7 +2006,8 @@ def convert_arabic (document):
              # change the language name
              document.body[i] = '\lang arabic_arabtex'
          i = i + 1
-       
+
+
  def revert_arabic (document):
      if document.language == "arabic_arabtex":
          document.language = "arabic"
@@ -1837,95 +2022,6 @@ def revert_arabic (document):
              document.body[i] = '\lang arabic'
          i = i + 1
  
-def revert_unicode(document):
-    '''Transform unicode symbols according to the unicode list.
-Preamble flags are not implemented.
-Combination characters are currently ignored.
-Forced output is currently not enforced'''
-    pathname = os.path.dirname(sys.argv[0])
-    fp = open(pathname.strip('lyx2lyx') + 'unicodesymbols','r')
-    spec_chars = {}
-    for line in fp.readlines():
-        if line[0] != '#':
-            line=line.replace('"','') #remove all qoutation marks
-            try:
-                # flag1 and flag2 are preamble & flags
-                # currently NOT impemented
-                [ucs4,command,flag1,flag2] =line.split(None,3)
-                spec_chars[unichr(eval(ucs4))] = [command, flag1, flag2]
-            except:
-                pass
-    fp.close()
-    #Define strings to start and end ERT and math insets
-    ert_intro='\n\n\\begin_inset ERT\nstatus collapsed\n\\begin_layout Standard\n\\backslash\n'
-    ert_outro='\n\\end_layout\n\n\\end_inset\n\n'
-    math_intro='\n\\begin_inset Formula $'
-    math_outro='$\n\\end_inset\n'
-    # Find unicode characters and replace them
-    in_ert = 0 # flag set to 1 if in ERT inset
-    in_math = 0 # flag set to 1 if in math inset
-    insets = [] # list of active insets
-    for i, current_line in enumerate(document.body):
-        if current_line.find('\\begin_inset') > -1:
-            # check which inset to start
-            if current_line.find('\\begin_inset ERT') > -1:
-                in_ert = 1
-                insets.append('ert')
-            elif current_line.find('\\begin_inset Formula') > -1:
-                in_math = 1
-                insets.append('math')
-            else:
-                insets.append('other')
-        if current_line.find('\\end_inset') > -1:
-            # check which inset to end
-            try:
-                cur_inset = insets.pop()
-                if cur_inset == 'ert':
-                    in_ert = 0
-                elif cur_inset == 'math':
-                    in_math = 0
-                else:
-                    pass # end of other inset
-            except:
-                pass # inset list was empty (for some reason)
-        current_line=''; # clear to have as container for modified line
-        for j in range(len(document.body[i])):
-            if spec_chars.has_key(document.body[i][j]):
-                flags = spec_chars[document.body[i][j]][1] + spec_chars[document.body[i][j]][2]
-                if flags.find('combining') > -1:
-                    command = ''
-                else:
-                    command = spec_chars[document.body[i][j]][0]; # the command to replace unicode
-                    if command[0:2] == '\\\\':
-                        if command[2:12]=='ensuremath':
-                            if in_ert == 1:
-                                # math in ERT
-                                command = command.replace('\\\\ensuremath{\\\\', '$\n\\backslash\n')
-                                command = command.replace('}', '$\n')
-                            elif in_math == 0:
-                                # add a math inset with the replacement character
-                                command = command.replace('\\\\ensuremath{\\', math_intro)
-                                command = command.replace('}', math_outro)
-                            else:
-                                # we are already in a math inset
-                                command = command.replace('\\\\ensuremath{\\', '')
-                                command = command.replace('}', '')
-                        else:
-                            if in_math == 1:
-                                # avoid putting an ERT in a math; instead put command as text
-                                command = command.replace('\\\\', '\mathrm{')
-                                command = command + '}'
-                            elif in_ert == 0:
-                                # add an ERT inset with the replacement character
-                                command = command.replace('\\\\', ert_intro)
-                                command = command + ert_outro
-                            else:
-                                command = command.replace('\\\\', '\n\\backslash\n')
-                current_line = current_line + command
-            else:
-                current_line = current_line + document.body[i][j]
-        document.body[i] = current_line
-
  
  ##
  # Conversion hub
@@ -2001,6 +2097,3 @@ revert =  [
  
  if __name__ == "__main__":
      pass
-
-
-