import re
import unicodedata
+import sys, os
from parser_tools import find_re, find_token, find_token_backwards, find_token_exact, find_tokens, find_end_of, get_value, find_beginning_of, find_nonempty_line
from LyX import get_encoding
"=" : u'\u0304', # macron
"u" : u'\u0306', # breve
"." : u'\u0307', # dot above
- "\"": u'\u0308', # diaresis
+ "\"": u'\u0308', # diaeresis
"r" : u'\u030a', # ring above
"H" : u'\u030b', # double acute
"v" : u'\u030c', # caron
return ''
a = accent_map.get(type)
if a:
- return unicodedata.normalize("NFKC", "%s%s" % (char, a))
+ return unicodedata.normalize("NFC", "%s%s" % (char, a))
return ''
# because we never use u'xxx' for string literals, but 'xxx'.
# Therefore we may have to try two times to normalize the data.
try:
- document.body[i] = unicodedata.normalize("NFKD", document.body[i])
+ document.body[i] = unicodedata.normalize("NFD", document.body[i])
except TypeError:
- document.body[i] = unicodedata.normalize("NFKD", unicode(document.body[i], 'utf-8'))
+ document.body[i] = unicodedata.normalize("NFD", unicode(document.body[i], 'utf-8'))
# Replace accented characters with InsetLaTeXAccent
# Do not convert characters that can be represented in the chosen
accented_char = inverse_accented_map[accented_char]
accent = document.body[i][j]
try:
- dummy = unicodedata.normalize("NFKC", accented_char + accent).encode(encoding_stack[-1])
+ dummy = unicodedata.normalize("NFC", accented_char + accent).encode(encoding_stack[-1])
except UnicodeEncodeError:
# Insert the rest of the line as new line
if j < len(document.body[i]) - 1:
break
# Normalize to "Normal form C" (NFC, pre-composed characters) again
for i in range(numberoflines):
- document.body[i] = unicodedata.normalize("NFKC", document.body[i])
+ document.body[i] = unicodedata.normalize("NFC", document.body[i])
def normalize_font_whitespace_259(document):
document.textclass = "cv"
+#
+# add scaleBeforeRotation graphics param
+def convert_graphics_rotation(document):
+ " add scaleBeforeRotation graphics parameter. "
+ i = 0
+ while 1:
+ i = find_token(document.body, "\\begin_inset Graphics", i)
+ if i == -1:
+ return
+ j = find_end_of_inset(document.body, i+1)
+ if j == -1:
+ # should not happen
+ document.warning("Malformed LyX document: Could not find end of graphics inset.")
+ # Seach for rotateAngle and width or height or scale
+ # If these params are not there, nothing needs to be done.
+ k = find_token(document.body, "\trotateAngle", i + 1, j)
+ l = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j)
+ if (k != -1 and l != -1):
+ document.body.insert(j, 'scaleBeforeRotation')
+ i = i + 1
+
+
+#
+# remove scaleBeforeRotation graphics param
+def revert_graphics_rotation(document):
+ " remove scaleBeforeRotation graphics parameter. "
+ i = 0
+ while 1:
+ i = find_token(document.body, "\\begin_inset Graphics", i)
+ if i == -1:
+ return
+ j = find_end_of_inset(document.body, i + 1)
+ if j == -1:
+ # should not happen
+ document.warning("Malformed LyX document: Could not find end of graphics inset.")
+ # If there's a scaleBeforeRotation param, just remove that
+ k = find_token(document.body, "\tscaleBeforeRotation", i + 1, j)
+ if k != -1:
+ del document.body[k]
+ else:
+ # if not, and if we have rotateAngle and width or height or scale,
+ # we have to put the rotateAngle value to special
+ rotateAngle = get_value(document.body, 'rotateAngle', i + 1, j)
+ special = get_value(document.body, 'special', i + 1, j)
+ if rotateAngle != "":
+ k = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j)
+ if k == -1:
+ break
+ if special == "":
+ document.body.insert(j-1, '\tspecial angle=%s' % rotateAngle)
+ else:
+ l = find_token(document.body, "\tspecial", i + 1, j)
+ document.body[l] = document.body[l].replace(special, 'angle=%s,%s' % (rotateAngle, special))
+ k = find_token(document.body, "\trotateAngle", i + 1, j)
+ if k != -1:
+ del document.body[k]
+ i = i + 1
+
+
+
def convert_tableborder(document):
# The problematic is: LyX double the table cell border as it ignores the "|" character in
# the cell arguments. A fix takes care of this and therefore the "|" has to be removed
r'\end_layout'
]
+def convert_arabic (document):
+ if document.language == "arabic":
+ document.language = "arabic_arabtex"
+ i = find_token(document.header, "\\language", 0)
+ if i != -1:
+ document.header[i] = "\\language arabic_arabtex"
+ i = 0
+ while i < len(document.body):
+ h = document.body[i].find("\lang arabic", 0, len(document.body[i]))
+ if (h != -1):
+ # change the language name
+ document.body[i] = '\lang arabic_arabtex'
+ i = i + 1
+
+def revert_arabic (document):
+ if document.language == "arabic_arabtex":
+ document.language = "arabic"
+ i = find_token(document.header, "\\language", 0)
+ if i != -1:
+ document.header[i] = "\\language arabic"
+ i = 0
+ while i < len(document.body):
+ h = document.body[i].find("\lang arabic_arabtex", 0, len(document.body[i]))
+ if (h != -1):
+ # change the language name
+ document.body[i] = '\lang arabic'
+ i = i + 1
+
+def revert_unicode(document):
+ '''Transform unicode symbols according to the unicode list.
+Preamble flags are not implemented.
+Combination characters are currently ignored.
+Forced output is currently not enforced'''
+ pathname = os.path.dirname(sys.argv[0])
+ fp = open(pathname.strip('lyx2lyx') + 'unicodesymbols','r')
+ spec_chars = {}
+ for line in fp.readlines():
+ if line[0] != '#':
+ line=line.replace(' "',' ') # remove all quotation marks with spaces before
+ line=line.replace('" ',' ') # remove all quotation marks with spaces after
+ line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis)
+ try:
+ # flag1 and flag2 are preamble & flags
+ # currently NOT implemented
+ [ucs4,command,flag1,flag2] =line.split(None,3)
+ spec_chars[unichr(eval(ucs4))] = [command, flag1, flag2]
+ except:
+ pass
+ fp.close()
+ # Define strings to start and end ERT and math insets
+ ert_intro='\n\n\\begin_inset ERT\nstatus collapsed\n\\begin_layout Standard\n\\backslash\n'
+ ert_outro='\n\\end_layout\n\n\\end_inset\n\n'
+ math_intro='\n\\begin_inset Formula $'
+ math_outro='$\n\\end_inset\n'
+ # Find unicode characters and replace them
+ in_ert = 0 # flag set to 1 if in ERT inset
+ in_math = 0 # flag set to 1 if in math inset
+ insets = [] # list of active insets
+ for i, current_line in enumerate(document.body):
+ if current_line.find('\\begin_inset') > -1:
+ # check which inset to start
+ if current_line.find('\\begin_inset ERT') > -1:
+ in_ert = 1
+ insets.append('ert')
+ elif current_line.find('\\begin_inset Formula') > -1:
+ in_math = 1
+ insets.append('math')
+ else:
+ insets.append('other')
+ if current_line.find('\\end_inset') > -1:
+ # check which inset to end
+ try:
+ cur_inset = insets.pop()
+ if cur_inset == 'ert':
+ in_ert = 0
+ elif cur_inset == 'math':
+ in_math = 0
+ else:
+ pass # end of other inset
+ except:
+ pass # inset list was empty (for some reason)
+ current_line=''; # clear to have as container for modified line
+ for j in range(len(document.body[i])):
+ if spec_chars.has_key(document.body[i][j]):
+ flags = spec_chars[document.body[i][j]][1] + spec_chars[document.body[i][j]][2]
+ if flags.find('combining') > -1:
+ command = ''
+ else:
+ command = spec_chars[document.body[i][j]][0]; # the command to replace unicode
+ if command[0:2] == '\\\\':
+ if command[2:12]=='ensuremath':
+ if in_ert == 1:
+ # math in ERT
+ command = command.replace('\\\\ensuremath{\\\\', '$\n\\backslash\n')
+ command = command.replace('}', '$\n')
+ elif in_math == 0:
+ # add a math inset with the replacement character
+ command = command.replace('\\\\ensuremath{\\', math_intro)
+ command = command.replace('}', math_outro)
+ else:
+ # we are already in a math inset
+ command = command.replace('\\\\ensuremath{\\', '')
+ command = command.replace('}', '')
+ else:
+ if in_math == 1:
+ # avoid putting an ERT in a math; instead put command as text
+ command = command.replace('\\\\', '\mathrm{')
+ command = command + '}'
+ elif in_ert == 0:
+ # add an ERT inset with the replacement character
+ command = command.replace('\\\\', ert_intro)
+ command = command + ert_outro
+ else:
+ command = command.replace('\\\\', '\n\\backslash\n')
+ current_line = current_line + command
+ else:
+ current_line = current_line + document.body[i][j]
+ document.body[i] = current_line
+
+
##
# Conversion hub
#
[271, [convert_ext_font_sizes]],
[272, []],
[273, []],
- [274, [normalize_font_whitespace_274]]
+ [274, [normalize_font_whitespace_274]],
+ [275, [convert_graphics_rotation]],
+ [276, [convert_arabic]]
]
revert = [
+ [275, [revert_arabic]],
+ [274, [revert_graphics_rotation]],
[273, []],
[272, [revert_separator_layout]],
[271, [revert_preamble_listings_params, revert_listings_inset, revert_include_listings]],
[251, [revert_commandparams]],
[250, [revert_cs_label]],
[249, []],
- [248, [revert_accent, revert_utf8]],
+ [248, [revert_accent, revert_utf8, revert_unicode]],
[247, [revert_booktabs]],
[246, [revert_font_settings]],
[245, [revert_framed]]]