]> git.lyx.org Git - lyx.git/blobdiff - lib/lyx2lyx/lyx_1_5.py
* src/frontends/qt4/QParagraph.cpp:
[lyx.git] / lib / lyx2lyx / lyx_1_5.py
index 595fffe15fae5e217246869612994bd5d5357bcd..dbffe31abbfe550ba7cbc97bb85d3b731de53481 100644 (file)
@@ -21,6 +21,7 @@
 
 import re
 import unicodedata
+import sys, os
 
 from parser_tools import find_re, find_token, find_token_backwards, find_token_exact, find_tokens, find_end_of, get_value, find_beginning_of, find_nonempty_line
 from LyX import get_encoding
@@ -867,7 +868,7 @@ accent_map = {
     "=" : u'\u0304', # macron
     "u" : u'\u0306', # breve
     "." : u'\u0307', # dot above
-    "\"": u'\u0308', # diaresis
+    "\"": u'\u0308', # diaeresis
     "r" : u'\u030a', # ring above
     "H" : u'\u030b', # double acute
     "v" : u'\u030c', # caron
@@ -920,7 +921,7 @@ def _convert_accent(accent, accented_char):
         return ''
     a = accent_map.get(type)
     if a:
-        return unicodedata.normalize("NFKC", "%s%s" % (char, a))
+        return unicodedata.normalize("NFC", "%s%s" % (char, a))
     return ''
 
 
@@ -1026,9 +1027,9 @@ def revert_accent(document):
         # because we never use u'xxx' for string literals, but 'xxx'.
         # Therefore we may have to try two times to normalize the data.
         try:
-            document.body[i] = unicodedata.normalize("NFKD", document.body[i])
+            document.body[i] = unicodedata.normalize("NFD", document.body[i])
         except TypeError:
-            document.body[i] = unicodedata.normalize("NFKD", unicode(document.body[i], 'utf-8'))
+            document.body[i] = unicodedata.normalize("NFD", unicode(document.body[i], 'utf-8'))
 
     # Replace accented characters with InsetLaTeXAccent
     # Do not convert characters that can be represented in the chosen
@@ -1085,7 +1086,7 @@ def revert_accent(document):
                     accented_char = inverse_accented_map[accented_char]
                 accent = document.body[i][j]
                 try:
-                    dummy = unicodedata.normalize("NFKC", accented_char + accent).encode(encoding_stack[-1])
+                    dummy = unicodedata.normalize("NFC", accented_char + accent).encode(encoding_stack[-1])
                 except UnicodeEncodeError:
                     # Insert the rest of the line as new line
                     if j < len(document.body[i]) - 1:
@@ -1100,7 +1101,7 @@ def revert_accent(document):
                     break
     # Normalize to "Normal form C" (NFC, pre-composed characters) again
     for i in range(numberoflines):
-        document.body[i] = unicodedata.normalize("NFKC", document.body[i])
+        document.body[i] = unicodedata.normalize("NFC", document.body[i])
 
 
 def normalize_font_whitespace_259(document):
@@ -1371,17 +1372,15 @@ def convert_graphics_rotation(document):
             document.warning("Malformed LyX document: Could not find end of graphics inset.")
         # Seach for rotateAngle and width or height or scale
         # If these params are not there, nothing needs to be done.
-        # FIXME: this also inserts scaleBeforeRotation if "rotateAngle" is not there!
-        for k in range(i+1, j):
-            if (document.body[k].find("rotateAngle") and \
-                (document.body[k].find("width") or \
-                document.body[k].find("height") or \
-                document.body[k].find("scale"))):
-                        document.body.insert(j, 'scaleBeforeRotation')
+        k = find_token(document.body, "\trotateAngle", i + 1, j)
+        l = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j)
+        if (k != -1 and l != -1):
+            document.body.insert(j, 'scaleBeforeRotation')
         i = i + 1
 
 
-# FIXME: does not work at all
+#
+# remove scaleBeforeRotation graphics param
 def revert_graphics_rotation(document):
     " remove scaleBeforeRotation graphics parameter. "
     i = 0
@@ -1393,24 +1392,27 @@ def revert_graphics_rotation(document):
         if j == -1:
             # should not happen
             document.warning("Malformed LyX document: Could not find end of graphics inset.")
-        for k in range(i+1, j):
-            # If there's a scaleBeforeRotation param, just remove that
-            if document.body[k].find('scaleBeforeRotation'):
-                del document.body[k]
-                break
+        # If there's a scaleBeforeRotation param, just remove that
+        k = find_token(document.body, "\tscaleBeforeRotation", i + 1, j)
+        if k != -1:
+            del document.body[k]
+        else:
             # if not, and if we have rotateAngle and width or height or scale,
             # we have to put the rotateAngle value to special
-            rotateAngle = get_value(document.body, 'rotateAngle', i+1, j)
-            special = get_value(document.body, 'special', i+1, j)
-            if (document.body[k].find("width") or \
-                document.body[k].find("height") or \
-                document.body[k].find("scale") and \
-                document.body[k].find("rotateAngle")):
-                    if special == "":
-                        document.body.insert(j-1, '\tspecial angle=%s' % rotateAngle)
-                    else:
-                        l = find_token(document.body, "special", i+1, j)
-                        document.body[l].replace(special, 'angle=%s,%s' % (rotateAngle, special))
+            rotateAngle = get_value(document.body, 'rotateAngle', i + 1, j)
+            special = get_value(document.body, 'special', i + 1, j)
+            if rotateAngle != "":
+                k = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j)
+                if k == -1:
+                    break
+                if special == "":
+                    document.body.insert(j-1, '\tspecial angle=%s' % rotateAngle)
+                else:
+                    l = find_token(document.body, "\tspecial", i + 1, j)
+                    document.body[l] = document.body[l].replace(special, 'angle=%s,%s' % (rotateAngle, special))
+                k = find_token(document.body, "\trotateAngle", i + 1, j)
+                if k != -1:
+                    del document.body[k]
         i = i + 1
 
 
@@ -1808,6 +1810,126 @@ something
                                     r'\end_layout'
                                     ]
 
+def convert_arabic (document):
+    if document.language == "arabic":
+        document.language = "arabic_arabtex"
+        i = find_token(document.header, "\\language", 0)
+        if i != -1:
+            document.header[i] = "\\language arabic_arabtex"
+    i = 0
+    while i < len(document.body):
+        h = document.body[i].find("\lang arabic", 0, len(document.body[i]))
+        if (h != -1):
+            # change the language name
+            document.body[i] = '\lang arabic_arabtex'
+        i = i + 1
+       
+def revert_arabic (document):
+    if document.language == "arabic_arabtex":
+        document.language = "arabic"
+        i = find_token(document.header, "\\language", 0)
+        if i != -1:
+            document.header[i] = "\\language arabic"
+    i = 0
+    while i < len(document.body):
+        h = document.body[i].find("\lang arabic_arabtex", 0, len(document.body[i]))
+        if (h != -1):
+            # change the language name
+            document.body[i] = '\lang arabic'
+        i = i + 1
+
+def revert_unicode(document):
+    '''Transform unicode symbols according to the unicode list.
+Preamble flags are not implemented.
+Combination characters are currently ignored.
+Forced output is currently not enforced'''
+    pathname = os.path.dirname(sys.argv[0])
+    fp = open(pathname.strip('lyx2lyx') + 'unicodesymbols','r')
+    spec_chars = {}
+    for line in fp.readlines():
+        if line[0] != '#':
+            line=line.replace(' "',' ') # remove all quotation marks with spaces before
+            line=line.replace('" ',' ') # remove all quotation marks with spaces after
+            line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis)
+            try:
+                # flag1 and flag2 are preamble & flags
+                # currently NOT implemented
+                [ucs4,command,flag1,flag2] =line.split(None,3)
+                spec_chars[unichr(eval(ucs4))] = [command, flag1, flag2]
+            except:
+                pass
+    fp.close()
+    # Define strings to start and end ERT and math insets
+    ert_intro='\n\n\\begin_inset ERT\nstatus collapsed\n\\begin_layout Standard\n\\backslash\n'
+    ert_outro='\n\\end_layout\n\n\\end_inset\n\n'
+    math_intro='\n\\begin_inset Formula $'
+    math_outro='$\n\\end_inset\n'
+    # Find unicode characters and replace them
+    in_ert = 0 # flag set to 1 if in ERT inset
+    in_math = 0 # flag set to 1 if in math inset
+    insets = [] # list of active insets
+    for i, current_line in enumerate(document.body):
+        if current_line.find('\\begin_inset') > -1:
+            # check which inset to start
+            if current_line.find('\\begin_inset ERT') > -1:
+                in_ert = 1
+                insets.append('ert')
+            elif current_line.find('\\begin_inset Formula') > -1:
+                in_math = 1
+                insets.append('math')
+            else:
+                insets.append('other')
+        if current_line.find('\\end_inset') > -1:
+            # check which inset to end
+            try:
+                cur_inset = insets.pop()
+                if cur_inset == 'ert':
+                    in_ert = 0
+                elif cur_inset == 'math':
+                    in_math = 0
+                else:
+                    pass # end of other inset
+            except:
+                pass # inset list was empty (for some reason)
+        current_line=''; # clear to have as container for modified line
+        for j in range(len(document.body[i])):
+            if spec_chars.has_key(document.body[i][j]):
+                flags = spec_chars[document.body[i][j]][1] + spec_chars[document.body[i][j]][2]
+                if flags.find('combining') > -1:
+                    command = ''
+                else:
+                    command = spec_chars[document.body[i][j]][0]; # the command to replace unicode
+                    if command[0:2] == '\\\\':
+                        if command[2:12]=='ensuremath':
+                            if in_ert == 1:
+                                # math in ERT
+                                command = command.replace('\\\\ensuremath{\\\\', '$\n\\backslash\n')
+                                command = command.replace('}', '$\n')
+                            elif in_math == 0:
+                                # add a math inset with the replacement character
+                                command = command.replace('\\\\ensuremath{\\', math_intro)
+                                command = command.replace('}', math_outro)
+                            else:
+                                # we are already in a math inset
+                                command = command.replace('\\\\ensuremath{\\', '')
+                                command = command.replace('}', '')
+                        else:
+                            if in_math == 1:
+                                # avoid putting an ERT in a math; instead put command as text
+                                command = command.replace('\\\\', '\mathrm{')
+                                command = command + '}'
+                            elif in_ert == 0:
+                                # add an ERT inset with the replacement character
+                                command = command.replace('\\\\', ert_intro)
+                                command = command + ert_outro
+                            else:
+                                command = command.replace('\\\\', '\n\\backslash\n')
+                current_line = current_line + command
+            else:
+                current_line = current_line + document.body[i][j]
+        document.body[i] = current_line
+
+
 ##
 # Conversion hub
 #
@@ -1842,10 +1964,12 @@ convert = [[246, []],
            [272, []],
            [273, []],
            [274, [normalize_font_whitespace_274]],
-           [275, [convert_graphics_rotation]]
+           [275, [convert_graphics_rotation]],
+           [276, [convert_arabic]]
           ]
 
 revert =  [
+           [275, [revert_arabic]],
            [274, [revert_graphics_rotation]],
            [273, []],
            [272, [revert_separator_layout]],
@@ -1872,7 +1996,7 @@ revert =  [
            [251, [revert_commandparams]],
            [250, [revert_cs_label]],
            [249, []],
-           [248, [revert_accent, revert_utf8]],
+           [248, [revert_accent, revert_utf8, revert_unicode]],
            [247, [revert_booktabs]],
            [246, [revert_font_settings]],
            [245, [revert_framed]]]