* src/frontends/qt4/QParagraph.cpp:

[lyx.git] / lib / lyx2lyx / lyx_1_5.py
diff --git a/lib/lyx2lyx/lyx_1_5.py b/lib/lyx2lyx/lyx_1_5.py

index 32f84cd54ef153538126de8bd5dc70952969633e..dbffe31abbfe550ba7cbc97bb85d3b731de53481 100644 (file)
--- a/lib/lyx2lyx/lyx_1_5.py
+++ b/lib/lyx2lyx/lyx_1_5.py
@@ -21,6 +21,7 @@
  
  import re
  import unicodedata
+import sys, os
  
  from parser_tools import find_re, find_token, find_token_backwards, find_token_exact, find_tokens, find_end_of, get_value, find_beginning_of, find_nonempty_line
  from LyX import get_encoding
@@ -867,7 +868,7 @@ accent_map = {
      "=" : u'\u0304', # macron
      "u" : u'\u0306', # breve
      "." : u'\u0307', # dot above
-    "\"": u'\u0308', # diaresis
+    "\"": u'\u0308', # diaeresis
      "r" : u'\u030a', # ring above
      "H" : u'\u030b', # double acute
      "v" : u'\u030c', # caron
@@ -920,7 +921,7 @@ def _convert_accent(accent, accented_char):
          return ''
      a = accent_map.get(type)
      if a:
-        return unicodedata.normalize("NFKC", "%s%s" % (char, a))
+        return unicodedata.normalize("NFC", "%s%s" % (char, a))
      return ''
  
  
@@ -1026,9 +1027,9 @@ def revert_accent(document):
          # because we never use u'xxx' for string literals, but 'xxx'.
          # Therefore we may have to try two times to normalize the data.
          try:
-            document.body[i] = unicodedata.normalize("NFKD", document.body[i])
+            document.body[i] = unicodedata.normalize("NFD", document.body[i])
          except TypeError:
-            document.body[i] = unicodedata.normalize("NFKD", unicode(document.body[i], 'utf-8'))
+            document.body[i] = unicodedata.normalize("NFD", unicode(document.body[i], 'utf-8'))
  
      # Replace accented characters with InsetLaTeXAccent
      # Do not convert characters that can be represented in the chosen
@@ -1085,7 +1086,7 @@ def revert_accent(document):
                      accented_char = inverse_accented_map[accented_char]
                  accent = document.body[i][j]
                  try:
-                    dummy = unicodedata.normalize("NFKC", accented_char + accent).encode(encoding_stack[-1])
+                    dummy = unicodedata.normalize("NFC", accented_char + accent).encode(encoding_stack[-1])
                  except UnicodeEncodeError:
                      # Insert the rest of the line as new line
                      if j < len(document.body[i]) - 1:
@@ -1100,7 +1101,7 @@ def revert_accent(document):
                      break
      # Normalize to "Normal form C" (NFC, pre-composed characters) again
      for i in range(numberoflines):
-        document.body[i] = unicodedata.normalize("NFKC", document.body[i])
+        document.body[i] = unicodedata.normalize("NFC", document.body[i])
  
  
  def normalize_font_whitespace_259(document):
@@ -1356,6 +1357,66 @@ def revert_cv_textclass(document):
          document.textclass = "cv"
  
  
+#
+# add scaleBeforeRotation graphics param
+def convert_graphics_rotation(document):
+    " add scaleBeforeRotation graphics parameter. "
+    i = 0
+    while 1:
+        i = find_token(document.body, "\\begin_inset Graphics", i)
+        if i == -1:
+            return
+        j = find_end_of_inset(document.body, i+1)
+        if j == -1:
+            # should not happen
+            document.warning("Malformed LyX document: Could not find end of graphics inset.")
+        # Seach for rotateAngle and width or height or scale
+        # If these params are not there, nothing needs to be done.
+        k = find_token(document.body, "\trotateAngle", i + 1, j)
+        l = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j)
+        if (k != -1 and l != -1):
+            document.body.insert(j, 'scaleBeforeRotation')
+        i = i + 1
+
+
+#
+# remove scaleBeforeRotation graphics param
+def revert_graphics_rotation(document):
+    " remove scaleBeforeRotation graphics parameter. "
+    i = 0
+    while 1:
+        i = find_token(document.body, "\\begin_inset Graphics", i)
+        if i == -1:
+            return
+        j = find_end_of_inset(document.body, i + 1)
+        if j == -1:
+            # should not happen
+            document.warning("Malformed LyX document: Could not find end of graphics inset.")
+        # If there's a scaleBeforeRotation param, just remove that
+        k = find_token(document.body, "\tscaleBeforeRotation", i + 1, j)
+        if k != -1:
+            del document.body[k]
+        else:
+            # if not, and if we have rotateAngle and width or height or scale,
+            # we have to put the rotateAngle value to special
+            rotateAngle = get_value(document.body, 'rotateAngle', i + 1, j)
+            special = get_value(document.body, 'special', i + 1, j)
+            if rotateAngle != "":
+                k = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j)
+                if k == -1:
+                    break
+                if special == "":
+                    document.body.insert(j-1, '\tspecial angle=%s' % rotateAngle)
+                else:
+                    l = find_token(document.body, "\tspecial", i + 1, j)
+                    document.body[l] = document.body[l].replace(special, 'angle=%s,%s' % (rotateAngle, special))
+                k = find_token(document.body, "\trotateAngle", i + 1, j)
+                if k != -1:
+                    del document.body[k]
+        i = i + 1
+
+
+
  def convert_tableborder(document):
      # The problematic is: LyX double the table cell border as it ignores the "|" character in
      # the cell arguments. A fix takes care of this and therefore the "|" has to be removed
@@ -1749,6 +1810,126 @@ something
                                      r'\end_layout'
                                      ]
  
+def convert_arabic (document):
+    if document.language == "arabic":
+        document.language = "arabic_arabtex"
+        i = find_token(document.header, "\\language", 0)
+        if i != -1:
+            document.header[i] = "\\language arabic_arabtex"
+    i = 0
+    while i < len(document.body):
+        h = document.body[i].find("\lang arabic", 0, len(document.body[i]))
+        if (h != -1):
+            # change the language name
+            document.body[i] = '\lang arabic_arabtex'
+        i = i + 1
+       
+def revert_arabic (document):
+    if document.language == "arabic_arabtex":
+        document.language = "arabic"
+        i = find_token(document.header, "\\language", 0)
+        if i != -1:
+            document.header[i] = "\\language arabic"
+    i = 0
+    while i < len(document.body):
+        h = document.body[i].find("\lang arabic_arabtex", 0, len(document.body[i]))
+        if (h != -1):
+            # change the language name
+            document.body[i] = '\lang arabic'
+        i = i + 1
+
+def revert_unicode(document):
+    '''Transform unicode symbols according to the unicode list.
+Preamble flags are not implemented.
+Combination characters are currently ignored.
+Forced output is currently not enforced'''
+    pathname = os.path.dirname(sys.argv[0])
+    fp = open(pathname.strip('lyx2lyx') + 'unicodesymbols','r')
+    spec_chars = {}
+    for line in fp.readlines():
+        if line[0] != '#':
+            line=line.replace(' "',' ') # remove all quotation marks with spaces before
+            line=line.replace('" ',' ') # remove all quotation marks with spaces after
+            line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis)
+            try:
+                # flag1 and flag2 are preamble & flags
+                # currently NOT implemented
+                [ucs4,command,flag1,flag2] =line.split(None,3)
+                spec_chars[unichr(eval(ucs4))] = [command, flag1, flag2]
+            except:
+                pass
+    fp.close()
+    # Define strings to start and end ERT and math insets
+    ert_intro='\n\n\\begin_inset ERT\nstatus collapsed\n\\begin_layout Standard\n\\backslash\n'
+    ert_outro='\n\\end_layout\n\n\\end_inset\n\n'
+    math_intro='\n\\begin_inset Formula $'
+    math_outro='$\n\\end_inset\n'
+    # Find unicode characters and replace them
+    in_ert = 0 # flag set to 1 if in ERT inset
+    in_math = 0 # flag set to 1 if in math inset
+    insets = [] # list of active insets
+    for i, current_line in enumerate(document.body):
+        if current_line.find('\\begin_inset') > -1:
+            # check which inset to start
+            if current_line.find('\\begin_inset ERT') > -1:
+                in_ert = 1
+                insets.append('ert')
+            elif current_line.find('\\begin_inset Formula') > -1:
+                in_math = 1
+                insets.append('math')
+            else:
+                insets.append('other')
+        if current_line.find('\\end_inset') > -1:
+            # check which inset to end
+            try:
+                cur_inset = insets.pop()
+                if cur_inset == 'ert':
+                    in_ert = 0
+                elif cur_inset == 'math':
+                    in_math = 0
+                else:
+                    pass # end of other inset
+            except:
+                pass # inset list was empty (for some reason)
+        current_line=''; # clear to have as container for modified line
+        for j in range(len(document.body[i])):
+            if spec_chars.has_key(document.body[i][j]):
+                flags = spec_chars[document.body[i][j]][1] + spec_chars[document.body[i][j]][2]
+                if flags.find('combining') > -1:
+                    command = ''
+                else:
+                    command = spec_chars[document.body[i][j]][0]; # the command to replace unicode
+                    if command[0:2] == '\\\\':
+                        if command[2:12]=='ensuremath':
+                            if in_ert == 1:
+                                # math in ERT
+                                command = command.replace('\\\\ensuremath{\\\\', '$\n\\backslash\n')
+                                command = command.replace('}', '$\n')
+                            elif in_math == 0:
+                                # add a math inset with the replacement character
+                                command = command.replace('\\\\ensuremath{\\', math_intro)
+                                command = command.replace('}', math_outro)
+                            else:
+                                # we are already in a math inset
+                                command = command.replace('\\\\ensuremath{\\', '')
+                                command = command.replace('}', '')
+                        else:
+                            if in_math == 1:
+                                # avoid putting an ERT in a math; instead put command as text
+                                command = command.replace('\\\\', '\mathrm{')
+                                command = command + '}'
+                            elif in_ert == 0:
+                                # add an ERT inset with the replacement character
+                                command = command.replace('\\\\', ert_intro)
+                                command = command + ert_outro
+                            else:
+                                command = command.replace('\\\\', '\n\\backslash\n')
+                current_line = current_line + command
+            else:
+                current_line = current_line + document.body[i][j]
+        document.body[i] = current_line
+
+
  ##
  # Conversion hub
  #
@@ -1782,10 +1963,14 @@ convert = [[246, []],
             [271, [convert_ext_font_sizes]],
             [272, []],
             [273, []],
-           [274, [normalize_font_whitespace_274]]
+           [274, [normalize_font_whitespace_274]],
+           [275, [convert_graphics_rotation]],
+           [276, [convert_arabic]]
            ]
  
  revert =  [
+           [275, [revert_arabic]],
+           [274, [revert_graphics_rotation]],
             [273, []],
             [272, [revert_separator_layout]],
             [271, [revert_preamble_listings_params, revert_listings_inset, revert_include_listings]],
@@ -1811,7 +1996,7 @@ revert =  [
             [251, [revert_commandparams]],
             [250, [revert_cs_label]],
             [249, []],
-           [248, [revert_accent, revert_utf8]],
+           [248, [revert_accent, revert_utf8, revert_unicode]],
             [247, [revert_booktabs]],
             [246, [revert_font_settings]],
             [245, [revert_framed]]]