* src/frontends/qt4/QParagraph.cpp:

[lyx.git] / lib / lyx2lyx / lyx_1_5.py
diff --git a/lib/lyx2lyx/lyx_1_5.py b/lib/lyx2lyx/lyx_1_5.py

index b559850430b87988237a0cb4af77c5e157f8862d..dbffe31abbfe550ba7cbc97bb85d3b731de53481 100644 (file)
--- a/lib/lyx2lyx/lyx_1_5.py
+++ b/lib/lyx2lyx/lyx_1_5.py
@@ -21,8 +21,9 @@
  
  import re
  import unicodedata
+import sys, os
  
-from parser_tools import find_re, find_token, find_token_backwards, find_token_exact, find_tokens, find_end_of, get_value
+from parser_tools import find_re, find_token, find_token_backwards, find_token_exact, find_tokens, find_end_of, get_value, find_beginning_of, find_nonempty_line
  from LyX import get_encoding
  
  
@@ -37,6 +38,10 @@ def find_end_of_layout(lines, i):
      " Find end of layout, where lines[i] is included."
      return find_end_of(lines, i, "\\begin_layout", "\\end_layout")
  
+def find_beginning_of_layout(lines, i):
+    "Find beginning of layout, where lines[i] is included."
+    return find_beginning_of(lines, i, "\\begin_layout", "\\end_layout")
+
  # End of helper functions
  ####################################################################
  
@@ -230,6 +235,8 @@ where at least two languages have different default encodings are encoded
  in multiple encodings for file formats < 249. These files are incorrectly
  read and written (as if the whole file was in the encoding of the main
  language).
+This is not true for files written by CJK-LyX, they are always in the locale
+encoding.
  
  This function
  - converts from fake unicode values to true unicode if forward is true, and
@@ -239,6 +246,8 @@ document.encoding must be set to the old value (format 248) in both cases.
  We do this here and not in LyX.py because it is far easier to do the
  necessary parsing in modern formats than in ancient ones.
  """
+    if document.cjk_encoding != '':
+        return
      encoding_stack = [document.encoding]
      lang_re = re.compile(r"^\\lang\s(\S+)")
      if document.inputencoding == "auto" or document.inputencoding == "default":
@@ -247,18 +256,22 @@ necessary parsing in modern formats than in ancient ones.
              if result:
                  language = result.group(1)
                  if language == "default":
-                    document.warning("Resetting encoding from %s to %s." % (encoding_stack[-1], document.encoding))
+                    document.warning("Resetting encoding from %s to %s." % (encoding_stack[-1], document.encoding), 3)
                      encoding_stack[-1] = document.encoding
                  else:
                      from lyx2lyx_lang import lang
-                    document.warning("Setting encoding from %s to %s." % (encoding_stack[-1], lang[language][3]))
+                    document.warning("Setting encoding from %s to %s." % (encoding_stack[-1], lang[language][3]), 3)
                      encoding_stack[-1] = lang[language][3]
              elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
-                document.warning("Adding nested encoding %s." % encoding_stack[-1])
+                document.warning("Adding nested encoding %s." % encoding_stack[-1], 3)
                  encoding_stack.append(encoding_stack[-1])
              elif find_token(document.body, "\\end_layout", i, i + 1) == i:
-                document.warning("Removing nested encoding %s." % encoding_stack[-1])
-                del encoding_stack[-1]
+                document.warning("Removing nested encoding %s." % encoding_stack[-1], 3)
+                if len(encoding_stack) == 1:
+                    # Don't remove the document encoding from the stack
+                    document.warning("Malformed LyX document: Unexpected `\\end_layout'.")
+                else:
+                    del encoding_stack[-1]
              if encoding_stack[-1] != document.encoding:
                  if forward:
                      # This line has been incorrectly interpreted as if it was
@@ -292,7 +305,7 @@ def revert_utf8(document):
      elif get_value(document.header, "\\inputencoding", i) == "utf8":
          document.header[i] = "\\inputencoding auto"
      document.inputencoding = get_value(document.header, "\\inputencoding", 0)
-    document.encoding = get_encoding(document.language, document.inputencoding, 248)
+    document.encoding = get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)
      convert_multiencoding(document, False)
  
  
@@ -332,19 +345,24 @@ key "argument"
  
  This must be called after convert_commandparams.
  """
-    regex = re.compile(r'\S+\s*(\[[^\[\{]*\])?(\{[^}]*\})')
      i = 0
      while 1:
          i = find_token(document.body, "\\bibitem", i)
          if i == -1:
              break
-        match = re.match(regex, document.body[i])
-        option = match.group(1)
-        argument = match.group(2)
+        j = document.body[i].find('[') + 1
+        k = document.body[i].rfind(']')
+        if j == 0: # No optional argument found
+            option = None
+        else:
+            option = document.body[i][j:k]
+        j = document.body[i].rfind('{') + 1
+        k = document.body[i].rfind('}')
+        argument = document.body[i][j:k]
          lines = ['\\begin_inset LatexCommand bibitem']
          if option != None:
-            lines.append('label "%s"' % option[1:-1].replace('"', '\\"'))
-        lines.append('key "%s"' % argument[1:-1].replace('"', '\\"'))
+            lines.append('label "%s"' % option.replace('"', '\\"'))
+        lines.append('key "%s"' % argument.replace('"', '\\"'))
          lines.append('')
          lines.append('\\end_inset')
          document.body[i:i+1] = lines
@@ -724,7 +742,7 @@ def convert_lyxline(document):
          k = 0
          while i < len(document.body):
              i = find_token(document.body, "\\size " + fontsizes[n], i)
-            k = find_token(document.body, "\\lyxline",i)
+            k = find_token(document.body, "\\lyxline", i)
              # the corresponding fontsize command is always 2 lines before the \lyxline
              if (i != -1 and k == i+2):
                  document.body[i:i+1] = []
@@ -850,7 +868,7 @@ accent_map = {
      "=" : u'\u0304', # macron
      "u" : u'\u0306', # breve
      "." : u'\u0307', # dot above
-    "\"": u'\u0308', # diaresis
+    "\"": u'\u0308', # diaeresis
      "r" : u'\u030a', # ring above
      "H" : u'\u030b', # double acute
      "v" : u'\u030c', # caron
@@ -903,7 +921,7 @@ def _convert_accent(accent, accented_char):
          return ''
      a = accent_map.get(type)
      if a:
-        return unicodedata.normalize("NFKC", "%s%s" % (char, a))
+        return unicodedata.normalize("NFC", "%s%s" % (char, a))
      return ''
  
  
@@ -1009,18 +1027,18 @@ def revert_accent(document):
          # because we never use u'xxx' for string literals, but 'xxx'.
          # Therefore we may have to try two times to normalize the data.
          try:
-            document.body[i] = unicodedata.normalize("NFKD", document.body[i])
+            document.body[i] = unicodedata.normalize("NFD", document.body[i])
          except TypeError:
-            document.body[i] = unicodedata.normalize("NFKD", unicode(document.body[i], 'utf-8'))
+            document.body[i] = unicodedata.normalize("NFD", unicode(document.body[i], 'utf-8'))
  
      # Replace accented characters with InsetLaTeXAccent
      # Do not convert characters that can be represented in the chosen
      # encoding.
-    encoding_stack = [get_encoding(document.language, document.inputencoding, 248)]
+    encoding_stack = [get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)]
      lang_re = re.compile(r"^\\lang\s(\S+)")
      for i in range(len(document.body)):
  
-        if document.inputencoding == "auto" or document.inputencoding == "default":
+        if (document.inputencoding == "auto" or document.inputencoding == "default") and document.cjk_encoding != '':
              # Track the encoding of the current line
              result = lang_re.match(document.body[i])
              if result:
@@ -1068,7 +1086,7 @@ def revert_accent(document):
                      accented_char = inverse_accented_map[accented_char]
                  accent = document.body[i][j]
                  try:
-                    dummy = unicodedata.normalize("NFKC", accented_char + accent).encode(encoding_stack[-1])
+                    dummy = unicodedata.normalize("NFC", accented_char + accent).encode(encoding_stack[-1])
                  except UnicodeEncodeError:
                      # Insert the rest of the line as new line
                      if j < len(document.body[i]) - 1:
@@ -1083,33 +1101,834 @@ def revert_accent(document):
                      break
      # Normalize to "Normal form C" (NFC, pre-composed characters) again
      for i in range(numberoflines):
-        document.body[i] = unicodedata.normalize("NFKC", document.body[i])
+        document.body[i] = unicodedata.normalize("NFC", document.body[i])
  
  
-def normalize_font_whitespace(document):
+def normalize_font_whitespace_259(document):
      """ Before format 259 the font changes were ignored if a
-    whitespace was the last character in the sequence, this function
+    whitespace was the first or last character in the sequence, this function
      transfers the whitespace outside."""
+       
+    char_properties = {"\\series": "default",
+                       "\\emph": "default",
+                       "\\color": "none",
+                       "\\shape": "default",
+                       "\\bar": "default",
+                       "\\family": "default"}
+    return normalize_font_whitespace(document, char_properties)
+
+def normalize_font_whitespace_274(document):
+    """ Before format 259 (sic) the font changes were ignored if a
+    whitespace was the first or last character in the sequence. This was 
+    corrected for most font properties in format 259, but the language 
+    was forgotten then. This function applies the same conversion done
+    there (namely, transfers the whitespace outside) for font language
+    changes, as well."""
+
+    char_properties = {"\\lang": "default"}
+    return normalize_font_whitespace(document, char_properties)
+
+def get_paragraph_language(document, i):
+    """ Return the language of the paragraph in which line i of the document
+    body is. If the first thing in the paragraph is a \\lang command, that
+    is the paragraph's langauge; otherwise, the paragraph's language is the 
+    document's language."""
+
+    lines = document.body
+       
+    first_nonempty_line = \
+        find_nonempty_line(lines, find_beginning_of_layout(lines, i) + 1)
+
+    words = lines[first_nonempty_line].split()
+
+    if len(words) > 1 and words[0] == "\\lang":
+        return words[1]
+    else:
+        return document.language
+       
+def normalize_font_whitespace(document, char_properties):
+    """ Before format 259 the font changes were ignored if a
+    whitespace was the first or last character in the sequence, this function
+    transfers the whitespace outside. Only a change in one of the properties
+    in the provided    char_properties is handled by this function."""
  
      if document.backend != "latex":
          return
  
      lines = document.body
  
-    char_properties = {"\\series": "default",
-                       "\\emph": "default",
-                       "\\color": "none",
-                       "\\shape": "default",
-                       "\\family": "default"}
+    changes = {}
  
-    for i in range(len(lines)):
+    i = 0
+    while i < len(lines):
          words = lines[i].split()
  
-        if len(words) > 1 and words[0] in char_properties.keys() \
-               and words[1] == char_properties[words[0]] \
-               and lines[i-1][-1] == " ":
-            lines[i-1] = lines[i-1][:-1]
-            lines[i+1] = " " + lines[i+1]
+        if len(words) > 0 and words[0] == "\\begin_layout":
+            # a new paragraph resets all font changes
+            changes.clear()
+            # also reset the default language to be the paragraph's language
+            if "\\lang" in char_properties.keys():
+                char_properties["\\lang"] = \
+                    get_paragraph_language(document, i + 1)
+
+        elif len(words) > 1 and words[0] in char_properties.keys():
+            # we have a font change
+            if char_properties[words[0]] == words[1]:
+                # property gets reset
+                if words[0] in changes.keys():
+                    del changes[words[0]]
+                defaultproperty = True
+            else:
+                # property gets set
+                changes[words[0]] = words[1]
+                defaultproperty = False
+
+            # We need to explicitly reset all changed properties if we find
+            # a space below, because LyX 1.4 would output the space after
+            # closing the previous change and before starting the new one,
+            # and closing a font change means to close all properties, not
+            # just the changed one.
+
+            if lines[i-1] and lines[i-1][-1] == " ":
+                lines[i-1] = lines[i-1][:-1]
+                # a space before the font change
+                added_lines = [" "]
+                for k in changes.keys():
+                    # exclude property k because that is already in lines[i]
+                    if k != words[0]:
+                        added_lines[1:1] = ["%s %s" % (k, changes[k])]
+                for k in changes.keys():
+                    # exclude property k because that must be added below anyway
+                    if k != words[0]:
+                        added_lines[0:0] = ["%s %s" % (k, char_properties[k])]
+                if defaultproperty:
+                    # Property is reset in lines[i], so add the new stuff afterwards
+                    lines[i+1:i+1] = added_lines
+                else:
+                    # Reset property for the space
+                    added_lines[0:0] = ["%s %s" % (words[0], char_properties[words[0]])]
+                    lines[i:i] = added_lines
+                i = i + len(added_lines)
+
+            elif lines[i+1] and lines[i+1][0] == " " and (len(changes) > 0 or not defaultproperty):
+                # a space after the font change
+                if (lines[i+1] == " " and lines[i+2]):
+                    next_words = lines[i+2].split()
+                    if len(next_words) > 0 and next_words[0] == words[0]:
+                        # a single blank with a property different from the
+                        # previous and the next line must not be changed
+                        i = i + 2
+                        continue
+                lines[i+1] = lines[i+1][1:]
+                added_lines = [" "]
+                for k in changes.keys():
+                    # exclude property k because that is already in lines[i]
+                    if k != words[0]:
+                        added_lines[1:1] = ["%s %s" % (k, changes[k])]
+                for k in changes.keys():
+                    # exclude property k because that must be added below anyway
+                    if k != words[0]:
+                        added_lines[0:0] = ["%s %s" % (k, char_properties[k])]
+                # Reset property for the space
+                added_lines[0:0] = ["%s %s" % (words[0], char_properties[words[0]])]
+                lines[i:i] = added_lines
+                i = i + len(added_lines)
+
+        i = i + 1
+
+
+def revert_utf8x(document):
+    " Set utf8x encoding to utf8. "
+    i = find_token(document.header, "\\inputencoding", 0)
+    if i == -1:
+        document.header.append("\\inputencoding auto")
+    else:
+        inputenc = get_value(document.header, "\\inputencoding", i)
+        if inputenc == "utf8x":
+            document.header[i] = "\\inputencoding utf8"
+    document.inputencoding = get_value(document.header, "\\inputencoding", 0)
+
+
+def revert_utf8plain(document):
+    " Set utf8plain encoding to utf8. "
+    i = find_token(document.header, "\\inputencoding", 0)
+    if i == -1:
+        document.header.append("\\inputencoding auto")
+    else:
+        inputenc = get_value(document.header, "\\inputencoding", i)
+        if inputenc == "utf8-plain":
+            document.header[i] = "\\inputencoding utf8"
+    document.inputencoding = get_value(document.header, "\\inputencoding", 0)
+
+
+def revert_beamer_alert(document):
+    " Revert beamer's \\alert inset back to ERT. "
+    i = 0
+    while 1:
+        i = find_token(document.body, "\\begin_inset CharStyle Alert", i)
+        if i == -1:
+            return
+        document.body[i] = "\\begin_inset ERT"
+        i = i + 1
+        while 1:
+            if (document.body[i][:13] == "\\begin_layout"):
+                # Insert the \alert command
+                document.body[i + 1] = "\\alert{" + document.body[i + 1] + '}'
+                break
+            i = i + 1
+
+        i = i + 1
+
+
+def revert_beamer_structure(document):
+    " Revert beamer's \\structure inset back to ERT. "
+    i = 0
+    while 1:
+        i = find_token(document.body, "\\begin_inset CharStyle Structure", i)
+        if i == -1:
+            return
+        document.body[i] = "\\begin_inset ERT"
+        i = i + 1
+        while 1:
+            if (document.body[i][:13] == "\\begin_layout"):
+                document.body[i + 1] = "\\structure{" + document.body[i + 1] + '}'
+                break
+            i = i + 1
+
+        i = i + 1
+
+
+def convert_changes(document):
+    " Switch output_changes off if tracking_changes is off. "
+    i = find_token(document.header, '\\tracking_changes', 0)
+    if i == -1:
+        document.warning("Malformed lyx document: Missing '\\tracking_changes'.")
+        return
+    j = find_token(document.header, '\\output_changes', 0)
+    if j == -1:
+        document.warning("Malformed lyx document: Missing '\\output_changes'.")
+        return
+    tracking_changes = get_value(document.header, "\\tracking_changes", i)
+    output_changes = get_value(document.header, "\\output_changes", j)
+    if tracking_changes == "false" and output_changes == "true":
+        document.header[j] = "\\output_changes false"
+
+
+def revert_ascii(document):
+    " Set ascii encoding to auto. "
+    i = find_token(document.header, "\\inputencoding", 0)
+    if i == -1:
+        document.header.append("\\inputencoding auto")
+    else:
+        inputenc = get_value(document.header, "\\inputencoding", i)
+        if inputenc == "ascii":
+            document.header[i] = "\\inputencoding auto"
+    document.inputencoding = get_value(document.header, "\\inputencoding", 0)
+
+
+def normalize_language_name(document):
+    lang = { "brazil": "brazilian",
+             "portuges": "portuguese"}
+
+    if document.language in lang:
+        document.language = lang[document.language]
+        i = find_token(document.header, "\\language", 0)
+        document.header[i] = "\\language %s" % document.language
+
+
+def revert_language_name(document):
+    lang = { "brazilian": "brazil",
+             "portuguese": "portuges"}
+
+    if document.language in lang:
+        document.language = lang[document.language]
+        i = find_token(document.header, "\\language", 0)
+        document.header[i] = "\\language %s" % document.language
+
+#
+#  \textclass cv -> \textclass simplecv
+def convert_cv_textclass(document):
+    if document.textclass == "cv":
+        document.textclass = "simplecv"
+
+
+def revert_cv_textclass(document):
+    if document.textclass == "simplecv":
+        document.textclass = "cv"
+
+
+#
+# add scaleBeforeRotation graphics param
+def convert_graphics_rotation(document):
+    " add scaleBeforeRotation graphics parameter. "
+    i = 0
+    while 1:
+        i = find_token(document.body, "\\begin_inset Graphics", i)
+        if i == -1:
+            return
+        j = find_end_of_inset(document.body, i+1)
+        if j == -1:
+            # should not happen
+            document.warning("Malformed LyX document: Could not find end of graphics inset.")
+        # Seach for rotateAngle and width or height or scale
+        # If these params are not there, nothing needs to be done.
+        k = find_token(document.body, "\trotateAngle", i + 1, j)
+        l = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j)
+        if (k != -1 and l != -1):
+            document.body.insert(j, 'scaleBeforeRotation')
+        i = i + 1
+
+
+#
+# remove scaleBeforeRotation graphics param
+def revert_graphics_rotation(document):
+    " remove scaleBeforeRotation graphics parameter. "
+    i = 0
+    while 1:
+        i = find_token(document.body, "\\begin_inset Graphics", i)
+        if i == -1:
+            return
+        j = find_end_of_inset(document.body, i + 1)
+        if j == -1:
+            # should not happen
+            document.warning("Malformed LyX document: Could not find end of graphics inset.")
+        # If there's a scaleBeforeRotation param, just remove that
+        k = find_token(document.body, "\tscaleBeforeRotation", i + 1, j)
+        if k != -1:
+            del document.body[k]
+        else:
+            # if not, and if we have rotateAngle and width or height or scale,
+            # we have to put the rotateAngle value to special
+            rotateAngle = get_value(document.body, 'rotateAngle', i + 1, j)
+            special = get_value(document.body, 'special', i + 1, j)
+            if rotateAngle != "":
+                k = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j)
+                if k == -1:
+                    break
+                if special == "":
+                    document.body.insert(j-1, '\tspecial angle=%s' % rotateAngle)
+                else:
+                    l = find_token(document.body, "\tspecial", i + 1, j)
+                    document.body[l] = document.body[l].replace(special, 'angle=%s,%s' % (rotateAngle, special))
+                k = find_token(document.body, "\trotateAngle", i + 1, j)
+                if k != -1:
+                    del document.body[k]
+        i = i + 1
+
+
+
+def convert_tableborder(document):
+    # The problematic is: LyX double the table cell border as it ignores the "|" character in
+    # the cell arguments. A fix takes care of this and therefore the "|" has to be removed
+    i = 0
+    while i < len(document.body):
+        h = document.body[i].find("leftline=\"true\"", 0, len(document.body[i]))
+        k = document.body[i].find("|>{", 0, len(document.body[i]))
+        # the two tokens have to be in one line
+        if (h != -1 and k != -1):
+            # delete the "|"
+            document.body[i] = document.body[i][:k] + document.body[i][k+1:len(document.body[i])-1]
+        i = i + 1
+
+
+def revert_tableborder(document):
+    i = 0
+    while i < len(document.body):
+        h = document.body[i].find("leftline=\"true\"", 0, len(document.body[i]))
+        k = document.body[i].find(">{", 0, len(document.body[i]))
+        # the two tokens have to be in one line
+        if (h != -1 and k != -1):
+            # add the "|"
+            document.body[i] = document.body[i][:k] + '|' + document.body[i][k:]
+        i = i + 1
+
+
+def revert_armenian(document):
+    
+    # set inputencoding from armscii8 to auto 
+    if document.inputencoding == "armscii8":
+        i = find_token(document.header, "\\inputencoding", 0)
+        if i != -1:
+            document.header[i] = "\\inputencoding auto"
+    # check if preamble exists, if not k is set to -1 
+    i = 0
+    k = -1
+    while i < len(document.preamble):
+        if k == -1:
+            k = document.preamble[i].find("\\", 0, len(document.preamble[i]))
+        if k == -1:
+            k = document.preamble[i].find("%", 0, len(document.preamble[i]))
+        i = i + 1
+    # add the entry \usepackage{armtex} to the document preamble
+    if document.language == "armenian":
+        # set the armtex entry as the first preamble line
+        if k != -1:
+            document.preamble[0:0] = ["\\usepackage{armtex}"]
+        # create the preamble when it doesn't exist
+        else:
+            document.preamble.append('\\usepackage{armtex}')
+    # Set document language from armenian to english 
+    if document.language == "armenian":
+        document.language = "english"
+        i = find_token(document.header, "\\language", 0)
+        if i != -1:
+            document.header[i] = "\\language english"
+
+
+def revert_CJK(document):
+    " Set CJK encodings to default and languages chinese, japanese and korean to english. "
+    encodings = ["Bg5", "Bg5+", "GB", "GBt", "GBK", "JIS",
+                 "KS", "SJIS", "UTF8", "EUC-TW", "EUC-JP"]
+    i = find_token(document.header, "\\inputencoding", 0)
+    if i == -1:
+        document.header.append("\\inputencoding auto")
+    else:
+        inputenc = get_value(document.header, "\\inputencoding", i)
+        if inputenc in encodings:
+            document.header[i] = "\\inputencoding default"
+    document.inputencoding = get_value(document.header, "\\inputencoding", 0)
+
+    if document.language == "chinese-simplified" or \
+       document.language == "chinese-traditional" or \
+       document.language == "japanese" or document.language == "korean":
+        document.language = "english"
+        i = find_token(document.header, "\\language", 0)
+        if i != -1:
+            document.header[i] = "\\language english"
+
+
+def revert_preamble_listings_params(document):
+    " Revert preamble option \listings_params "
+    i = find_token(document.header, "\\listings_params", 0)
+    if i != -1:
+        document.preamble.append('\\usepackage{listings}')
+        document.preamble.append('\\lstset{%s}' % document.header[i].split()[1].strip('"'))
+        document.header.pop(i);
+
+
+def revert_listings_inset(document):
+    r''' Revert listings inset to \lstinline or \begin, \end lstlisting, translate 
+FROM
+
+\begin_inset 
+lstparams "language=Delphi"
+inline true
+status open
+
+\begin_layout Standard
+var i = 10;
+\end_layout
+
+\end_inset
+
+TO
+
+\begin_inset ERT
+status open
+\begin_layout Standard
+
+
+\backslash
+lstinline[language=Delphi]{var i = 10;}
+\end_layout
+
+\end_inset
+
+There can be an caption inset in this inset
+
+\begin_layout Standard
+\begin_inset Caption
+
+\begin_layout Standard
+before label
+\begin_inset LatexCommand label
+name "lst:caption"
+
+\end_inset
+
+after label
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+'''
+    i = 0
+    while True:
+        i = find_token(document.body, '\\begin_inset listings', i)
+        if i == -1:
+            break
+        else:
+            if not '\\usepackage{listings}' in document.preamble:
+                document.preamble.append('\\usepackage{listings}')
+        j = find_end_of_inset(document.body, i + 1)
+        if j == -1:
+            # this should not happen
+            break
+        inline = 'false'
+        params = ''
+        status = 'open'
+        # first three lines
+        for line in range(i + 1, i + 4):
+            if document.body[line].startswith('inline'):
+                inline = document.body[line].split()[1]
+            if document.body[line].startswith('lstparams'):
+                params = document.body[line].split()[1].strip('"')
+            if document.body[line].startswith('status'):
+                status = document.body[line].split()[1].strip()
+                k = line + 1
+        # caption?
+        caption = ''
+        label = ''
+        cap = find_token(document.body, '\\begin_inset Caption', i)
+        if cap != -1:
+            cap_end = find_end_of_inset(document.body, cap + 1)
+            if cap_end == -1:
+                # this should not happen
+                break
+            # label?
+            lbl = find_token(document.body, '\\begin_inset LatexCommand label', cap + 1)
+            if lbl != -1:
+                lbl_end = find_end_of_inset(document.body, lbl + 1)
+                if lbl_end == -1:
+                    # this should not happen
+                    break
+            else:
+                lbl = cap_end
+                lbl_end = cap_end
+            for line in document.body[lbl : lbl_end + 1]:
+                if line.startswith('name '):
+                    label = line.split()[1].strip('"')
+                    break
+            for line in document.body[cap : lbl ] + document.body[lbl_end + 1 : cap_end + 1]:
+                if not line.startswith('\\'):
+                    caption += line.strip()
+            k = cap_end + 1
+        inlinecode = ''
+        # looking for the oneline code for lstinline
+        inlinecode = document.body[find_end_of_layout(document.body, 
+            find_token(document.body, '\\begin_layout Standard', i + 1) +1 ) - 1]
+        if len(caption) > 0:
+            if len(params) == 0:
+                params = 'caption={%s}' % caption
+            else:
+                params += ',caption={%s}' % caption
+        if len(label) > 0:
+            if len(params) == 0:
+                params = 'label={%s}' % label
+            else:
+                params += ',label={%s}' % label
+        if len(params) > 0:
+            params = '[%s]' % params
+            params = params.replace('\\', '\\backslash\n')
+        if inline == 'true':
+            document.body[i:(j+1)] = [r'\begin_inset ERT',
+                                      'status %s' % status,
+                                      r'\begin_layout Standard',
+                                      '', 
+                                      '',
+                                      r'\backslash',
+                                      'lstinline%s{%s}' % (params, inlinecode),
+                                      r'\end_layout',
+                                      '',
+                                      r'\end_inset']
+        else:
+            document.body[i: j+1] =  [r'\begin_inset ERT',
+                                      'status %s' % status,
+                                      '',
+                                      r'\begin_layout Standard',
+                                      '',
+                                      '',
+                                      r'\backslash',
+                                      r'begin{lstlisting}%s' % params,
+                                      r'\end_layout'
+                                    ] + document.body[k : j - 1] + \
+                                     ['',
+                                      r'\begin_layout Standard',
+                                      '',
+                                      r'\backslash',
+                                      'end{lstlisting}',
+                                      r'\end_layout',
+                                      '',
+                                      r'\end_inset']
+            
+
+def revert_include_listings(document):
+    r''' Revert lstinputlisting Include option , translate
+\begin_inset Include \lstinputlisting{file}[opt]
+preview false
+
+\end_inset
+
+TO
+
+\begin_inset ERT
+status open
+
+\begin_layout Standard
+
+
+\backslash
+lstinputlisting{file}[opt]
+\end_layout
+
+\end_inset
+    '''
+
+    i = 0
+    while True:
+        i = find_token(document.body, r'\begin_inset Include \lstinputlisting', i)
+        if i == -1:
+            break
+        else:
+            if not '\\usepackage{listings}' in document.preamble:
+                document.preamble.append('\\usepackage{listings}')
+        j = find_end_of_inset(document.body, i + 1)
+        if j == -1:
+            # this should not happen
+            break
+        # find command line lstinputlisting{file}[options]
+        cmd, file, option = '', '', ''
+        if re.match(r'\\(lstinputlisting){([.\w]*)}(.*)', document.body[i].split()[2]):
+            cmd, file, option = re.match(r'\\(lstinputlisting){([.\w]*)}(.*)', document.body[i].split()[2]).groups()            
+        option = option.replace('\\', '\\backslash\n')
+        document.body[i : j + 1] = [r'\begin_inset ERT',
+                                    'status open',
+                                    '',
+                                    r'\begin_layout Standard',
+                                    '',
+                                    '',
+                                    r'\backslash',
+                                    '%s%s{%s}' % (cmd, option, file),
+                                    r'\end_layout',
+                                    '',
+                                    r'\end_inset']
+
+
+def revert_ext_font_sizes(document):
+    if document.backend != "latex": return
+    if not document.textclass.startswith("ext"): return
+
+    fontsize = get_value(document.header, '\\paperfontsize', 0)
+    if fontsize not in ('10', '11', '12'): return
+    fontsize += 'pt'
+
+    i = find_token(document.header, '\\paperfontsize', 0)
+    document.header[i] = '\\paperfontsize default'
+
+    i = find_token(document.header, '\\options', 0)
+    if i == -1:
+        i = find_token(document.header, '\\textclass', 0) + 1
+        document.header[i:i] = ['\\options %s' % fontsize]
+    else:
+        document.header[i] += ',%s' % fontsize
+
+
+def convert_ext_font_sizes(document):
+    if document.backend != "latex": return
+    if not document.textclass.startswith("ext"): return
+
+    fontsize = get_value(document.header, '\\paperfontsize', 0)
+    if fontsize != 'default': return
+
+    i = find_token(document.header, '\\options', 0)
+    if i == -1: return
+
+    options = get_value(document.header, '\\options', i)
+
+    fontsizes = '10pt', '11pt', '12pt'
+    for fs in fontsizes:
+        if options.find(fs) != -1:
+            break
+    else: # this else will only be attained if the for cycle had no match
+        return
+
+    options = options.split(',')
+    for j, opt in enumerate(options):
+        if opt in fontsizes:
+            fontsize = opt[:-2]
+            del options[j]
+            break
+    else:
+        return
+
+    k = find_token(document.header, '\\paperfontsize', 0)
+    document.header[k] = '\\paperfontsize %s' % fontsize
+
+    if options:
+        document.header[i] = '\\options %s' % ','.join(options)
+    else:
+        del document.header[i]
+
+def revert_separator_layout(document):
+    r'''Revert --Separator-- to a lyx note
+From
+
+\begin_layout --Separator--
+something
+\end_layout
+
+to
+
+\begin_layout Standard
+\begin_inset Note Note
+status open
+
+\begin_layout Standard
+Separate Evironment
+\end_layout
+
+\end_inset
+something
+
+\end_layout
+
+    '''
+
+    i = 0
+    while True:
+        i = find_token(document.body, r'\begin_layout --Separator--', i)
+        if i == -1:
+            break
+        j = find_end_of_layout(document.body, i + 1)
+        if j == -1:
+            # this should not happen
+            break
+        document.body[i : j + 1] = [r'\begin_layout Standard',
+                                    r'\begin_inset Note Note',
+                                    'status open',
+                                    '',
+                                    r'\begin_layout Standard',
+                                    'Separate Environment',
+                                    r'\end_layout',
+                                    '',
+                                    r'\end_inset'] + \
+                                    document.body[ i + 1 : j] + \
+                                    ['',
+                                    r'\end_layout'
+                                    ]
+
+def convert_arabic (document):
+    if document.language == "arabic":
+        document.language = "arabic_arabtex"
+        i = find_token(document.header, "\\language", 0)
+        if i != -1:
+            document.header[i] = "\\language arabic_arabtex"
+    i = 0
+    while i < len(document.body):
+        h = document.body[i].find("\lang arabic", 0, len(document.body[i]))
+        if (h != -1):
+            # change the language name
+            document.body[i] = '\lang arabic_arabtex'
+        i = i + 1
+       
+def revert_arabic (document):
+    if document.language == "arabic_arabtex":
+        document.language = "arabic"
+        i = find_token(document.header, "\\language", 0)
+        if i != -1:
+            document.header[i] = "\\language arabic"
+    i = 0
+    while i < len(document.body):
+        h = document.body[i].find("\lang arabic_arabtex", 0, len(document.body[i]))
+        if (h != -1):
+            # change the language name
+            document.body[i] = '\lang arabic'
+        i = i + 1
+
+def revert_unicode(document):
+    '''Transform unicode symbols according to the unicode list.
+Preamble flags are not implemented.
+Combination characters are currently ignored.
+Forced output is currently not enforced'''
+    pathname = os.path.dirname(sys.argv[0])
+    fp = open(pathname.strip('lyx2lyx') + 'unicodesymbols','r')
+    spec_chars = {}
+    for line in fp.readlines():
+        if line[0] != '#':
+            line=line.replace(' "',' ') # remove all quotation marks with spaces before
+            line=line.replace('" ',' ') # remove all quotation marks with spaces after
+            line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis)
+            try:
+                # flag1 and flag2 are preamble & flags
+                # currently NOT implemented
+                [ucs4,command,flag1,flag2] =line.split(None,3)
+                spec_chars[unichr(eval(ucs4))] = [command, flag1, flag2]
+            except:
+                pass
+    fp.close()
+    # Define strings to start and end ERT and math insets
+    ert_intro='\n\n\\begin_inset ERT\nstatus collapsed\n\\begin_layout Standard\n\\backslash\n'
+    ert_outro='\n\\end_layout\n\n\\end_inset\n\n'
+    math_intro='\n\\begin_inset Formula $'
+    math_outro='$\n\\end_inset\n'
+    # Find unicode characters and replace them
+    in_ert = 0 # flag set to 1 if in ERT inset
+    in_math = 0 # flag set to 1 if in math inset
+    insets = [] # list of active insets
+    for i, current_line in enumerate(document.body):
+        if current_line.find('\\begin_inset') > -1:
+            # check which inset to start
+            if current_line.find('\\begin_inset ERT') > -1:
+                in_ert = 1
+                insets.append('ert')
+            elif current_line.find('\\begin_inset Formula') > -1:
+                in_math = 1
+                insets.append('math')
+            else:
+                insets.append('other')
+        if current_line.find('\\end_inset') > -1:
+            # check which inset to end
+            try:
+                cur_inset = insets.pop()
+                if cur_inset == 'ert':
+                    in_ert = 0
+                elif cur_inset == 'math':
+                    in_math = 0
+                else:
+                    pass # end of other inset
+            except:
+                pass # inset list was empty (for some reason)
+        current_line=''; # clear to have as container for modified line
+        for j in range(len(document.body[i])):
+            if spec_chars.has_key(document.body[i][j]):
+                flags = spec_chars[document.body[i][j]][1] + spec_chars[document.body[i][j]][2]
+                if flags.find('combining') > -1:
+                    command = ''
+                else:
+                    command = spec_chars[document.body[i][j]][0]; # the command to replace unicode
+                    if command[0:2] == '\\\\':
+                        if command[2:12]=='ensuremath':
+                            if in_ert == 1:
+                                # math in ERT
+                                command = command.replace('\\\\ensuremath{\\\\', '$\n\\backslash\n')
+                                command = command.replace('}', '$\n')
+                            elif in_math == 0:
+                                # add a math inset with the replacement character
+                                command = command.replace('\\\\ensuremath{\\', math_intro)
+                                command = command.replace('}', math_outro)
+                            else:
+                                # we are already in a math inset
+                                command = command.replace('\\\\ensuremath{\\', '')
+                                command = command.replace('}', '')
+                        else:
+                            if in_math == 1:
+                                # avoid putting an ERT in a math; instead put command as text
+                                command = command.replace('\\\\', '\mathrm{')
+                                command = command + '}'
+                            elif in_ert == 0:
+                                # add an ERT inset with the replacement character
+                                command = command.replace('\\\\', ert_intro)
+                                command = command + ert_outro
+                            else:
+                                command = command.replace('\\\\', '\n\\backslash\n')
+                current_line = current_line + command
+            else:
+                current_line = current_line + document.body[i][j]
+        document.body[i] = current_line
+
  
  ##
  # Conversion hub
@@ -1129,9 +1948,45 @@ convert = [[246, []],
             [256, []],
             [257, [convert_caption]],
             [258, [convert_lyxline]],
-           [259, [convert_accent, normalize_font_whitespace]]]
-
-revert =  [[258, []],
+           [259, [convert_accent, normalize_font_whitespace_259]],
+           [260, []],
+           [261, [convert_changes]],
+           [262, []],
+           [263, [normalize_language_name]],
+           [264, [convert_cv_textclass]],
+           [265, [convert_tableborder]],
+           [266, []],
+           [267, []],
+           [268, []],
+           [269, []],
+           [270, []],
+           [271, [convert_ext_font_sizes]],
+           [272, []],
+           [273, []],
+           [274, [normalize_font_whitespace_274]],
+           [275, [convert_graphics_rotation]],
+           [276, [convert_arabic]]
+          ]
+
+revert =  [
+           [275, [revert_arabic]],
+           [274, [revert_graphics_rotation]],
+           [273, []],
+           [272, [revert_separator_layout]],
+           [271, [revert_preamble_listings_params, revert_listings_inset, revert_include_listings]],
+           [270, [revert_ext_font_sizes]],
+           [269, [revert_beamer_alert, revert_beamer_structure]],
+           [268, [revert_preamble_listings_params, revert_listings_inset, revert_include_listings]],
+           [267, [revert_CJK]],
+           [266, [revert_utf8plain]],
+           [265, [revert_armenian]],
+           [264, [revert_tableborder]],
+           [263, [revert_cv_textclass]],
+           [262, [revert_language_name]],
+           [261, [revert_ascii]],
+           [260, []],
+           [259, [revert_utf8x]],
+           [258, []],
             [257, []],
             [256, [revert_caption]],
             [255, [revert_encodings]],
@@ -1141,7 +1996,7 @@ revert =  [[258, []],
             [251, [revert_commandparams]],
             [250, [revert_cs_label]],
             [249, []],
-           [248, [revert_accent, revert_utf8]],
+           [248, [revert_accent, revert_utf8, revert_unicode]],
             [247, [revert_booktabs]],
             [246, [revert_font_settings]],
             [245, [revert_framed]]]