* src/frontends/qt4/QParagraph.cpp:

[lyx.git] / lib / lyx2lyx / lyx_1_5.py
diff --git a/lib/lyx2lyx/lyx_1_5.py b/lib/lyx2lyx/lyx_1_5.py

index 898b0b90e634fb45d88278c2f58dc0cfb928dfa7..dbffe31abbfe550ba7cbc97bb85d3b731de53481 100644 (file)
--- a/lib/lyx2lyx/lyx_1_5.py
+++ b/lib/lyx2lyx/lyx_1_5.py
@@ -21,8 +21,9 @@
  
  import re
  import unicodedata
+import sys, os
  
-from parser_tools import find_re, find_token, find_token_backwards, find_token_exact, find_tokens, find_end_of, get_value
+from parser_tools import find_re, find_token, find_token_backwards, find_token_exact, find_tokens, find_end_of, get_value, find_beginning_of, find_nonempty_line
  from LyX import get_encoding
  
  
@@ -37,6 +38,10 @@ def find_end_of_layout(lines, i):
      " Find end of layout, where lines[i] is included."
      return find_end_of(lines, i, "\\begin_layout", "\\end_layout")
  
+def find_beginning_of_layout(lines, i):
+    "Find beginning of layout, where lines[i] is included."
+    return find_beginning_of(lines, i, "\\begin_layout", "\\end_layout")
+
  # End of helper functions
  ####################################################################
  
@@ -863,7 +868,7 @@ accent_map = {
      "=" : u'\u0304', # macron
      "u" : u'\u0306', # breve
      "." : u'\u0307', # dot above
-    "\"": u'\u0308', # diaresis
+    "\"": u'\u0308', # diaeresis
      "r" : u'\u030a', # ring above
      "H" : u'\u030b', # double acute
      "v" : u'\u030c', # caron
@@ -916,7 +921,7 @@ def _convert_accent(accent, accented_char):
          return ''
      a = accent_map.get(type)
      if a:
-        return unicodedata.normalize("NFKC", "%s%s" % (char, a))
+        return unicodedata.normalize("NFC", "%s%s" % (char, a))
      return ''
  
  
@@ -1022,9 +1027,9 @@ def revert_accent(document):
          # because we never use u'xxx' for string literals, but 'xxx'.
          # Therefore we may have to try two times to normalize the data.
          try:
-            document.body[i] = unicodedata.normalize("NFKD", document.body[i])
+            document.body[i] = unicodedata.normalize("NFD", document.body[i])
          except TypeError:
-            document.body[i] = unicodedata.normalize("NFKD", unicode(document.body[i], 'utf-8'))
+            document.body[i] = unicodedata.normalize("NFD", unicode(document.body[i], 'utf-8'))
  
      # Replace accented characters with InsetLaTeXAccent
      # Do not convert characters that can be represented in the chosen
@@ -1081,7 +1086,7 @@ def revert_accent(document):
                      accented_char = inverse_accented_map[accented_char]
                  accent = document.body[i][j]
                  try:
-                    dummy = unicodedata.normalize("NFKC", accented_char + accent).encode(encoding_stack[-1])
+                    dummy = unicodedata.normalize("NFC", accented_char + accent).encode(encoding_stack[-1])
                  except UnicodeEncodeError:
                      # Insert the rest of the line as new line
                      if j < len(document.body[i]) - 1:
@@ -1096,25 +1101,62 @@ def revert_accent(document):
                      break
      # Normalize to "Normal form C" (NFC, pre-composed characters) again
      for i in range(numberoflines):
-        document.body[i] = unicodedata.normalize("NFKC", document.body[i])
+        document.body[i] = unicodedata.normalize("NFC", document.body[i])
  
  
-def normalize_font_whitespace(document):
+def normalize_font_whitespace_259(document):
      """ Before format 259 the font changes were ignored if a
      whitespace was the first or last character in the sequence, this function
      transfers the whitespace outside."""
-
-    if document.backend != "latex":
-        return
-
-    lines = document.body
-
+       
      char_properties = {"\\series": "default",
                         "\\emph": "default",
                         "\\color": "none",
                         "\\shape": "default",
                         "\\bar": "default",
                         "\\family": "default"}
+    return normalize_font_whitespace(document, char_properties)
+
+def normalize_font_whitespace_274(document):
+    """ Before format 259 (sic) the font changes were ignored if a
+    whitespace was the first or last character in the sequence. This was 
+    corrected for most font properties in format 259, but the language 
+    was forgotten then. This function applies the same conversion done
+    there (namely, transfers the whitespace outside) for font language
+    changes, as well."""
+
+    char_properties = {"\\lang": "default"}
+    return normalize_font_whitespace(document, char_properties)
+
+def get_paragraph_language(document, i):
+    """ Return the language of the paragraph in which line i of the document
+    body is. If the first thing in the paragraph is a \\lang command, that
+    is the paragraph's langauge; otherwise, the paragraph's language is the 
+    document's language."""
+
+    lines = document.body
+       
+    first_nonempty_line = \
+        find_nonempty_line(lines, find_beginning_of_layout(lines, i) + 1)
+
+    words = lines[first_nonempty_line].split()
+
+    if len(words) > 1 and words[0] == "\\lang":
+        return words[1]
+    else:
+        return document.language
+       
+def normalize_font_whitespace(document, char_properties):
+    """ Before format 259 the font changes were ignored if a
+    whitespace was the first or last character in the sequence, this function
+    transfers the whitespace outside. Only a change in one of the properties
+    in the provided    char_properties is handled by this function."""
+
+    if document.backend != "latex":
+        return
+
+    lines = document.body
+
      changes = {}
  
      i = 0
@@ -1124,6 +1166,10 @@ def normalize_font_whitespace(document):
          if len(words) > 0 and words[0] == "\\begin_layout":
              # a new paragraph resets all font changes
              changes.clear()
+            # also reset the default language to be the paragraph's language
+            if "\\lang" in char_properties.keys():
+                char_properties["\\lang"] = \
+                    get_paragraph_language(document, i + 1)
  
          elif len(words) > 1 and words[0] in char_properties.keys():
              # we have a font change
@@ -1311,6 +1357,66 @@ def revert_cv_textclass(document):
          document.textclass = "cv"
  
  
+#
+# add scaleBeforeRotation graphics param
+def convert_graphics_rotation(document):
+    " add scaleBeforeRotation graphics parameter. "
+    i = 0
+    while 1:
+        i = find_token(document.body, "\\begin_inset Graphics", i)
+        if i == -1:
+            return
+        j = find_end_of_inset(document.body, i+1)
+        if j == -1:
+            # should not happen
+            document.warning("Malformed LyX document: Could not find end of graphics inset.")
+        # Seach for rotateAngle and width or height or scale
+        # If these params are not there, nothing needs to be done.
+        k = find_token(document.body, "\trotateAngle", i + 1, j)
+        l = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j)
+        if (k != -1 and l != -1):
+            document.body.insert(j, 'scaleBeforeRotation')
+        i = i + 1
+
+
+#
+# remove scaleBeforeRotation graphics param
+def revert_graphics_rotation(document):
+    " remove scaleBeforeRotation graphics parameter. "
+    i = 0
+    while 1:
+        i = find_token(document.body, "\\begin_inset Graphics", i)
+        if i == -1:
+            return
+        j = find_end_of_inset(document.body, i + 1)
+        if j == -1:
+            # should not happen
+            document.warning("Malformed LyX document: Could not find end of graphics inset.")
+        # If there's a scaleBeforeRotation param, just remove that
+        k = find_token(document.body, "\tscaleBeforeRotation", i + 1, j)
+        if k != -1:
+            del document.body[k]
+        else:
+            # if not, and if we have rotateAngle and width or height or scale,
+            # we have to put the rotateAngle value to special
+            rotateAngle = get_value(document.body, 'rotateAngle', i + 1, j)
+            special = get_value(document.body, 'special', i + 1, j)
+            if rotateAngle != "":
+                k = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j)
+                if k == -1:
+                    break
+                if special == "":
+                    document.body.insert(j-1, '\tspecial angle=%s' % rotateAngle)
+                else:
+                    l = find_token(document.body, "\tspecial", i + 1, j)
+                    document.body[l] = document.body[l].replace(special, 'angle=%s,%s' % (rotateAngle, special))
+                k = find_token(document.body, "\trotateAngle", i + 1, j)
+                if k != -1:
+                    del document.body[k]
+        i = i + 1
+
+
+
  def convert_tableborder(document):
      # The problematic is: LyX double the table cell border as it ignores the "|" character in
      # the cell arguments. A fix takes care of this and therefore the "|" has to be removed
@@ -1656,6 +1762,173 @@ def convert_ext_font_sizes(document):
      else:
          del document.header[i]
  
+def revert_separator_layout(document):
+    r'''Revert --Separator-- to a lyx note
+From
+
+\begin_layout --Separator--
+something
+\end_layout
+
+to
+
+\begin_layout Standard
+\begin_inset Note Note
+status open
+
+\begin_layout Standard
+Separate Evironment
+\end_layout
+
+\end_inset
+something
+
+\end_layout
+
+    '''
+
+    i = 0
+    while True:
+        i = find_token(document.body, r'\begin_layout --Separator--', i)
+        if i == -1:
+            break
+        j = find_end_of_layout(document.body, i + 1)
+        if j == -1:
+            # this should not happen
+            break
+        document.body[i : j + 1] = [r'\begin_layout Standard',
+                                    r'\begin_inset Note Note',
+                                    'status open',
+                                    '',
+                                    r'\begin_layout Standard',
+                                    'Separate Environment',
+                                    r'\end_layout',
+                                    '',
+                                    r'\end_inset'] + \
+                                    document.body[ i + 1 : j] + \
+                                    ['',
+                                    r'\end_layout'
+                                    ]
+
+def convert_arabic (document):
+    if document.language == "arabic":
+        document.language = "arabic_arabtex"
+        i = find_token(document.header, "\\language", 0)
+        if i != -1:
+            document.header[i] = "\\language arabic_arabtex"
+    i = 0
+    while i < len(document.body):
+        h = document.body[i].find("\lang arabic", 0, len(document.body[i]))
+        if (h != -1):
+            # change the language name
+            document.body[i] = '\lang arabic_arabtex'
+        i = i + 1
+       
+def revert_arabic (document):
+    if document.language == "arabic_arabtex":
+        document.language = "arabic"
+        i = find_token(document.header, "\\language", 0)
+        if i != -1:
+            document.header[i] = "\\language arabic"
+    i = 0
+    while i < len(document.body):
+        h = document.body[i].find("\lang arabic_arabtex", 0, len(document.body[i]))
+        if (h != -1):
+            # change the language name
+            document.body[i] = '\lang arabic'
+        i = i + 1
+
+def revert_unicode(document):
+    '''Transform unicode symbols according to the unicode list.
+Preamble flags are not implemented.
+Combination characters are currently ignored.
+Forced output is currently not enforced'''
+    pathname = os.path.dirname(sys.argv[0])
+    fp = open(pathname.strip('lyx2lyx') + 'unicodesymbols','r')
+    spec_chars = {}
+    for line in fp.readlines():
+        if line[0] != '#':
+            line=line.replace(' "',' ') # remove all quotation marks with spaces before
+            line=line.replace('" ',' ') # remove all quotation marks with spaces after
+            line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis)
+            try:
+                # flag1 and flag2 are preamble & flags
+                # currently NOT implemented
+                [ucs4,command,flag1,flag2] =line.split(None,3)
+                spec_chars[unichr(eval(ucs4))] = [command, flag1, flag2]
+            except:
+                pass
+    fp.close()
+    # Define strings to start and end ERT and math insets
+    ert_intro='\n\n\\begin_inset ERT\nstatus collapsed\n\\begin_layout Standard\n\\backslash\n'
+    ert_outro='\n\\end_layout\n\n\\end_inset\n\n'
+    math_intro='\n\\begin_inset Formula $'
+    math_outro='$\n\\end_inset\n'
+    # Find unicode characters and replace them
+    in_ert = 0 # flag set to 1 if in ERT inset
+    in_math = 0 # flag set to 1 if in math inset
+    insets = [] # list of active insets
+    for i, current_line in enumerate(document.body):
+        if current_line.find('\\begin_inset') > -1:
+            # check which inset to start
+            if current_line.find('\\begin_inset ERT') > -1:
+                in_ert = 1
+                insets.append('ert')
+            elif current_line.find('\\begin_inset Formula') > -1:
+                in_math = 1
+                insets.append('math')
+            else:
+                insets.append('other')
+        if current_line.find('\\end_inset') > -1:
+            # check which inset to end
+            try:
+                cur_inset = insets.pop()
+                if cur_inset == 'ert':
+                    in_ert = 0
+                elif cur_inset == 'math':
+                    in_math = 0
+                else:
+                    pass # end of other inset
+            except:
+                pass # inset list was empty (for some reason)
+        current_line=''; # clear to have as container for modified line
+        for j in range(len(document.body[i])):
+            if spec_chars.has_key(document.body[i][j]):
+                flags = spec_chars[document.body[i][j]][1] + spec_chars[document.body[i][j]][2]
+                if flags.find('combining') > -1:
+                    command = ''
+                else:
+                    command = spec_chars[document.body[i][j]][0]; # the command to replace unicode
+                    if command[0:2] == '\\\\':
+                        if command[2:12]=='ensuremath':
+                            if in_ert == 1:
+                                # math in ERT
+                                command = command.replace('\\\\ensuremath{\\\\', '$\n\\backslash\n')
+                                command = command.replace('}', '$\n')
+                            elif in_math == 0:
+                                # add a math inset with the replacement character
+                                command = command.replace('\\\\ensuremath{\\', math_intro)
+                                command = command.replace('}', math_outro)
+                            else:
+                                # we are already in a math inset
+                                command = command.replace('\\\\ensuremath{\\', '')
+                                command = command.replace('}', '')
+                        else:
+                            if in_math == 1:
+                                # avoid putting an ERT in a math; instead put command as text
+                                command = command.replace('\\\\', '\mathrm{')
+                                command = command + '}'
+                            elif in_ert == 0:
+                                # add an ERT inset with the replacement character
+                                command = command.replace('\\\\', ert_intro)
+                                command = command + ert_outro
+                            else:
+                                command = command.replace('\\\\', '\n\\backslash\n')
+                current_line = current_line + command
+            else:
+                current_line = current_line + document.body[i][j]
+        document.body[i] = current_line
+
  
  ##
  # Conversion hub
@@ -1675,7 +1948,7 @@ convert = [[246, []],
             [256, []],
             [257, [convert_caption]],
             [258, [convert_lyxline]],
-           [259, [convert_accent, normalize_font_whitespace]],
+           [259, [convert_accent, normalize_font_whitespace_259]],
             [260, []],
             [261, [convert_changes]],
             [262, []],
@@ -1687,10 +1960,20 @@ convert = [[246, []],
             [268, []],
             [269, []],
             [270, []],
-           [271, [convert_ext_font_sizes]]
+           [271, [convert_ext_font_sizes]],
+           [272, []],
+           [273, []],
+           [274, [normalize_font_whitespace_274]],
+           [275, [convert_graphics_rotation]],
+           [276, [convert_arabic]]
            ]
  
  revert =  [
+           [275, [revert_arabic]],
+           [274, [revert_graphics_rotation]],
+           [273, []],
+           [272, [revert_separator_layout]],
+           [271, [revert_preamble_listings_params, revert_listings_inset, revert_include_listings]],
             [270, [revert_ext_font_sizes]],
             [269, [revert_beamer_alert, revert_beamer_structure]],
             [268, [revert_preamble_listings_params, revert_listings_inset, revert_include_listings]],
@@ -1713,7 +1996,7 @@ revert =  [
             [251, [revert_commandparams]],
             [250, [revert_cs_label]],
             [249, []],
-           [248, [revert_accent, revert_utf8]],
+           [248, [revert_accent, revert_utf8, revert_unicode]],
             [247, [revert_booktabs]],
             [246, [revert_font_settings]],
             [245, [revert_framed]]]