-def read_unicodesymbols():
- " Read the unicodesymbols list of unicode characters and corresponding commands."
- pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
- fp = open(os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols'))
- spec_chars = {}
- for line in fp.readlines():
- if line[0] != '#':
- line=line.replace(' "',' ') # remove all quotation marks with spaces before
- line=line.replace('" ',' ') # remove all quotation marks with spaces after
- line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis)
- try:
- # flag1 and flag2 are preamble and other flags
- [ucs4,command,flag1,flag2] =line.split(None,3)
- spec_chars[unichr(eval(ucs4))] = [command, flag1, flag2]
- except:
- pass
- fp.close()
-
- return spec_chars
-
-
-def revert_unicode(document):
- '''Transform unicode characters that can not be written using the
-document encoding to commands according to the unicodesymbols
-file. Characters that can not be replaced by commands are replaced by
-an replacement string. Flags other than 'combined' are currently not
-implemented.'''
-
- replacement_character = '???'
- spec_chars = read_unicodesymbols()
-
- # Define strings to start and end ERT and math insets
- ert_intro='\n\n\\begin_inset ERT\nstatus collapsed\n\\begin_layout %s\n\\backslash\n' % document.default_layout
- ert_outro='\n\\end_layout\n\n\\end_inset\n\n'
- math_intro='\n\\begin_inset Formula $'
- math_outro='$\n\\end_inset\n'
- # Find unicode characters and replace them
- in_ert = False # flag set to 1 if in ERT inset
- in_math = False # flag set to 1 if in math inset
- insets = [] # list of active insets
- mod_body = u'' # to store the modified document body
-
- # Go through the file to capture all combining characters
- last_char = '' # to store the previous character
- body_string = u'' # store the document temporarily as a string
- for line in document.body:
- body_string = body_string + line +'\n'
- [body_string, apa] = body_string.rsplit('\n',1)
-
- body = body_string.split('\n')
- for line in body:
- # Check for insets
- if line.find('\\begin_inset') > -1:
- # check which inset to start
- if line.find('\\begin_inset ERT') > -1:
- in_ert = True
- insets.append('ert')
- elif line.find('\\begin_inset Formula') > -1:
- in_math = True
- insets.append('math')
- else:
- insets.append('other')
- if line.find('\\end_inset') > -1:
- # check which inset to end
- try:
- cur_inset = insets.pop()
- if cur_inset == 'ert':
- in_ert = False
- elif cur_inset == 'math':
- in_math = False
- else:
- pass # end of other inset
- except:
- pass # inset list was empty (for some reason)
-
- # Try to write the line
- try:
- # If all goes well the line is written here
- dummy = line.encode(document.encoding)
- mod_body = mod_body + line + '\n'
- last_char = line[-1]
- except:
- # Error, some character(s) in the line need to be replaced
- for character in line:
- try:
- # Try to write the character
- dummy = character.encode(document.encoding)
- mod_body = mod_body + character
- last_char = character
- except:
- # Try to replace with ERT/math inset
- if spec_chars.has_key(character):
- command = spec_chars[character][0]; # the command to replace unicode
- flag1 = spec_chars[character][1]
- flag2 = spec_chars[character][2]
- if flag1.find('combining') > -1 or flag2.find('combining') > -1:
- # We have a character that should be combined with the previous
- command = command + '{' +last_char + '}'
- # Remove the last character. Ignore if it is whitespace
- if len(last_char.rstrip()) > 0:
- # last_char was found and is not whitespace
- [mod_body, apa] = mod_body.rsplit(last_char,1)
- else:
- # The last character was replaced by a command. For now it is
- # ignored. This could be handled better.
- pass
- if command[0:2] == '\\\\':
- if command[2:12]=='ensuremath':
- if in_ert == True:
- # math in ERT
- command = command.replace('\\\\ensuremath{\\\\', '$\n\\backslash\n')
- command = command.replace('}', '$\n')
- elif in_math == False:
- # add a math inset with the replacement character
- command = command.replace('\\\\ensuremath{\\', math_intro)
- command = command.replace('}', math_outro)
- else:
- # we are already in a math inset
- command = command.replace('\\\\ensuremath{\\', '')
- command = command.replace('}', '')
- else:
- if in_math == True:
- # avoid putting an ERT in a math; instead put command as text
- command = command.replace('\\\\', '\mathrm{')
- command = command + '}'
- elif in_ert == False:
- # add an ERT inset with the replacement character
- command = command.replace('\\\\', ert_intro)
- command = command + ert_outro
- else:
- command = command.replace('\\\\', '\n\\backslash\n')
- last_char = '' # indicate that the character should not be removed
- mod_body = mod_body + command
- else:
- # Replace with replacement string
- mod_body = mod_body + replacement_character
- [mod_body, apa] = mod_body.rsplit('\n',1)
- document.body = mod_body.split('\n')
-
-