]> git.lyx.org Git - lyx.git/blobdiff - lib/lyx2lyx/lyx_1_5.py
add support for TABs in Listings, fileformat change, fixes http://bugzilla.lyx.org...
[lyx.git] / lib / lyx2lyx / lyx_1_5.py
index 21320cf4a0e41c415353ab5d918b3bf738931c98..05fd93fdc23688b55898a09e5aa18a2d13812188 100644 (file)
@@ -362,7 +362,7 @@ def revert_unicode_line(document, i, insets, spec_chars, replacement_character =
     math_outro='$\n\\end_inset'
 
     mod_line = u''
-    if i and document.body[i - 1][:1] != '\\':
+    if i and not is_inset_line(document, i-1):
         last_char = document.body[i - 1][-1:]
     else:
         last_char = ''
@@ -653,17 +653,17 @@ def convert_commandparams(document):
             if commandparams_info[name][0] == "":
                 document.warning("Ignoring invalid option `%s' of command `%s'." % (option1, name))
             else:
-                lines.append('%s "%s"' % (commandparams_info[name][0], option1.replace('"', '\\"')))
+                lines.append('%s "%s"' % (commandparams_info[name][0], option1.replace('\\', '\\\\').replace('"', '\\"')))
         if option2 != "":
             if commandparams_info[name][1] == "":
                 document.warning("Ignoring invalid second option `%s' of command `%s'." % (option2, name))
             else:
-                lines.append('%s "%s"' % (commandparams_info[name][1], option2.replace('"', '\\"')))
+                lines.append('%s "%s"' % (commandparams_info[name][1], option2.replace('\\', '\\\\').replace('"', '\\"')))
         if argument != "":
             if commandparams_info[name][2] == "":
                 document.warning("Ignoring invalid argument `%s' of command `%s'." % (argument, name))
             else:
-                lines.append('%s "%s"' % (commandparams_info[name][2], argument.replace('"', '\\"')))
+                lines.append('%s "%s"' % (commandparams_info[name][2], argument.replace('\\', '\\\\').replace('"', '\\"')))
         document.body[i:i+1] = lines
         i = i + 1
 
@@ -690,13 +690,13 @@ def revert_commandparams(document):
                     preview_line = document.body[k]
                 elif (commandparams_info[name][0] != "" and
                       pname == commandparams_info[name][0]):
-                    option1 = pvalue.strip('"').replace('\\"', '"')
+                    option1 = pvalue.strip('"').replace('\\"', '"').replace('\\\\', '\\')
                 elif (commandparams_info[name][1] != "" and
                       pname == commandparams_info[name][1]):
-                    option2 = pvalue.strip('"').replace('\\"', '"')
+                    option2 = pvalue.strip('"').replace('\\"', '"').replace('\\\\', '\\')
                 elif (commandparams_info[name][2] != "" and
                       pname == commandparams_info[name][2]):
-                    argument = pvalue.strip('"').replace('\\"', '"')
+                    argument = pvalue.strip('"').replace('\\"', '"').replace('\\\\', '\\')
             elif document.body[k].strip() != "":
                 document.warning("Ignoring unknown contents `%s' in command inset %s." % (document.body[k], name))
         if name == "bibitem":
@@ -1150,6 +1150,33 @@ def convert_accent(document):
         i += 3
 
 
+def is_inset_line(document, i):
+    """ Line i of body has an inset """
+    if document.body[i][:1] == '\\':
+        return True
+    last_tokens = "".join(document.body[i].split()[-2:])
+    return last_tokens.find('\\') != -1
+
+
+# A wrapper around normalize that handles special cases (cf. bug 3313)
+def normalize(form, text):
+    # do not normalize OHM, ANGSTROM
+    keep_characters = [0x2126,0x212b]
+    result = ''
+    convert = ''
+    for i in text:
+        if ord(i) in keep_characters:
+            if len(convert) > 0:
+                result = result + unicodedata.normalize(form, convert)
+                convert = ''
+            result = result + i
+        else:
+            convert = convert + i
+    if len(convert) > 0:
+        result = result + unicodedata.normalize(form, convert)
+    return result
+
+
 def revert_accent(document):
     inverse_accent_map = {}
     for k in accent_map:
@@ -1168,7 +1195,7 @@ def revert_accent(document):
     for i in range(len(document.body) - 1):
         if document.body[i] == '' or document.body[i+1] == '' or document.body[i][-1] == ' ':
             continue
-        if (document.body[i+1][0] in inverse_accent_map and document.body[i][:1] != '\\'):
+        if (document.body[i+1][0] in inverse_accent_map and not is_inset_line(document, i)):
             # the last character of this line and the first of the next line
             # form probably a surrogate pair, inline insets are excluded (second part of the test)
             while (len(document.body[i+1]) > 0 and document.body[i+1][0] != ' '):
@@ -1182,9 +1209,9 @@ def revert_accent(document):
         # because we never use u'xxx' for string literals, but 'xxx'.
         # Therefore we may have to try two times to normalize the data.
         try:
-            document.body[i] = unicodedata.normalize("NFD", document.body[i])
+            document.body[i] = normalize("NFD", document.body[i])
         except TypeError:
-            document.body[i] = unicodedata.normalize("NFD", unicode(document.body[i], 'utf-8'))
+            document.body[i] = normalize("NFD", unicode(document.body[i], 'utf-8'))
 
     # Replace accented characters with InsetLaTeXAccent
     # Do not convert characters that can be represented in the chosen
@@ -1226,10 +1253,7 @@ def revert_accent(document):
                     if j < len(document.body[i]) - 1:
                         document.body.insert(i+1, document.body[i][j+1:])
                     # Delete the accented character
-                    if j > 0:
-                        document.body[i] = document.body[i][:j-1]
-                    else:
-                        document.body[i] = u''
+                    document.body[i] = document.body[i][:j]
                     # Finally add the InsetLaTeXAccent
                     document.body[i] += "\\i \\%s{}" % inverse_special_accent_map[accent]
                     break
@@ -1242,16 +1266,13 @@ def revert_accent(document):
                     accented_char = inverse_accented_map[accented_char]
                 accent = document.body[i][j]
                 try:
-                    dummy = unicodedata.normalize("NFC", accented_char + accent).encode(encoding_stack[-1])
+                    dummy = normalize("NFC", accented_char + accent).encode(encoding_stack[-1])
                 except UnicodeEncodeError:
                     # Insert the rest of the line as new line
                     if j < len(document.body[i]) - 1:
                         document.body.insert(i+1, document.body[i][j+1:])
                     # Delete the accented characters
-                    if j > 1:
-                        document.body[i] = document.body[i][:j-1]
-                    else:
-                        document.body[i] = u''
+                    document.body[i] = document.body[i][:j-1]
                     # Finally add the InsetLaTeXAccent
                     document.body[i] += "\\i \\%s{%s}" % (inverse_accent_map[accent], accented_char)
                     break
@@ -1259,7 +1280,7 @@ def revert_accent(document):
 
     # Normalize to "Normal form C" (NFC, pre-composed characters) again
     for i in range(len(document.body)):
-        document.body[i] = unicodedata.normalize("NFC", document.body[i])
+        document.body[i] = normalize("NFC", document.body[i])
 
 
 def normalize_font_whitespace_259(document):