convert_multiencoding(document, False)
+def read_unicodesymbols():
+ " Read the unicodesymbols list of unicode characters and corresponding commands."
+ pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
+ fp = open(os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols'))
+ spec_chars = {}
+ for line in fp.readlines():
+ if line[0] != '#':
+ line=line.replace(' "',' ') # remove all quotation marks with spaces before
+ line=line.replace('" ',' ') # remove all quotation marks with spaces after
+ line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis)
+ try:
+ # flag1 and flag2 are preamble and other flags
+ [ucs4,command,flag1,flag2] =line.split(None,3)
+ spec_chars[unichr(eval(ucs4))] = [command, flag1, flag2]
+ except:
+ pass
+ fp.close()
+
+ return spec_chars
+
+
+def revert_unicode(document):
+ '''Transform unicode characters that can not be written using the
+document encoding to commands according to the unicodesymbols
+file. Characters that can not be replaced by commands are replaced by
+an replacement string. Flags other than 'combined' are currently not
+implemented.'''
+
+ replacement_character = '???'
+ spec_chars = read_unicodesymbols()
+
+ # Define strings to start and end ERT and math insets
+ ert_intro='\n\n\\begin_inset ERT\nstatus collapsed\n\\begin_layout %s\n\\backslash\n' % document.default_layout
+ ert_outro='\n\\end_layout\n\n\\end_inset\n'
+ math_intro='\n\\begin_inset Formula $'
+ math_outro='$\n\\end_inset'
+ # Find unicode characters and replace them
+ in_ert = False # flag set to 1 if in ERT inset
+ in_math = False # flag set to 1 if in math inset
+ insets = [] # list of active insets
+
+ # Go through the file to capture all combining characters
+ last_char = '' # to store the previous character
+
+ i = 0
+ while i < len(document.body):
+ line = document.body[i]
+ # Check for insets
+ if line.find('\\begin_inset') > -1:
+ # check which inset to start
+ if line.find('\\begin_inset ERT') > -1:
+ in_ert = True
+ insets.append('ert')
+ elif line.find('\\begin_inset Formula') > -1:
+ in_math = True
+ insets.append('math')
+ else:
+ insets.append('other')
+ if line.find('\\end_inset') > -1:
+ # check which inset to end
+ try:
+ cur_inset = insets.pop()
+ if cur_inset == 'ert':
+ in_ert = False
+ elif cur_inset == 'math':
+ in_math = False
+ else:
+ pass # end of other inset
+ except:
+ pass # inset list was empty (for some reason)
+
+ # Try to write the line
+ try:
+ # If all goes well the line is written here
+ dummy = line.encode(document.encoding)
+ last_char = line[-1]
+ i += 1
+ except:
+ # Error, some character(s) in the line need to be replaced
+ mod_line = u''
+ for character in line:
+ try:
+ # Try to write the character
+ dummy = character.encode(document.encoding)
+ mod_line += character
+ last_char = character
+ except:
+ # Try to replace with ERT/math inset
+ if spec_chars.has_key(character):
+ command = spec_chars[character][0] # the command to replace unicode
+ flag1 = spec_chars[character][1]
+ flag2 = spec_chars[character][2]
+ if flag1.find('combining') > -1 or flag2.find('combining') > -1:
+ # We have a character that should be combined with the previous
+ command += '{' + last_char + '}'
+ # Remove the last character. Ignore if it is whitespace
+ if len(last_char.rstrip()):
+ # last_char was found and is not whitespace
+ if mod_line:
+ mod_line = mod_line[:-1]
+ else: # last_char belongs to the last line
+ document.body[i-1] = document.body[i-1][:-1]
+ else:
+ # The last character was replaced by a command. For now it is
+ # ignored. This could be handled better.
+ pass
+ if command[0:2] == '\\\\':
+ if command[2:12]=='ensuremath':
+ if in_ert:
+ # math in ERT
+ command = command.replace('\\\\ensuremath{\\\\', '$\n\\backslash\n')
+ command = command.replace('}', '$\n')
+ elif not in_math:
+ # add a math inset with the replacement character
+ command = command.replace('\\\\ensuremath{\\', math_intro)
+ command = command.replace('}', math_outro)
+ else:
+ # we are already in a math inset
+ command = command.replace('\\\\ensuremath{\\', '')
+ command = command.replace('}', '')
+ else:
+ if in_math:
+ # avoid putting an ERT in a math; instead put command as text
+ command = command.replace('\\\\', '\mathrm{')
+ command = command + '}'
+ elif not in_ert:
+ # add an ERT inset with the replacement character
+ command = command.replace('\\\\', ert_intro)
+ command = command + ert_outro
+ else:
+ command = command.replace('\\\\', '\n\\backslash\n')
+ last_char = '' # indicate that the character should not be removed
+ mod_line += command
+ else:
+ # Replace with replacement string
+ mod_line += replacement_character
+ document.body[i:i+1] = mod_line.split('\n')
+ i += len(mod_line.split('\n'))
+
+
def revert_cs_label(document):
" Remove status flag of charstyle label. "
i = 0
i = i + 1
-def read_unicodesymbols():
- " Read the unicodesymbols list of unicode characters and corresponding commands."
- pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
- fp = open(os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols'))
- spec_chars = {}
- for line in fp.readlines():
- if line[0] != '#':
- line=line.replace(' "',' ') # remove all quotation marks with spaces before
- line=line.replace('" ',' ') # remove all quotation marks with spaces after
- line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis)
- try:
- # flag1 and flag2 are preamble and other flags
- [ucs4,command,flag1,flag2] =line.split(None,3)
- spec_chars[unichr(eval(ucs4))] = [command, flag1, flag2]
- except:
- pass
- fp.close()
-
- return spec_chars
-
-
-def revert_unicode(document):
- '''Transform unicode characters that can not be written using the
-document encoding to commands according to the unicodesymbols
-file. Characters that can not be replaced by commands are replaced by
-an replacement string. Flags other than 'combined' are currently not
-implemented.'''
-
- replacement_character = '???'
- spec_chars = read_unicodesymbols()
-
- # Define strings to start and end ERT and math insets
- ert_intro='\n\n\\begin_inset ERT\nstatus collapsed\n\\begin_layout %s\n\\backslash\n' % document.default_layout
- ert_outro='\n\\end_layout\n\n\\end_inset\n'
- math_intro='\n\\begin_inset Formula $'
- math_outro='$\n\\end_inset'
- # Find unicode characters and replace them
- in_ert = False # flag set to 1 if in ERT inset
- in_math = False # flag set to 1 if in math inset
- insets = [] # list of active insets
-
- # Go through the file to capture all combining characters
- last_char = '' # to store the previous character
-
- i = 0
- while i < len(document.body):
- line = document.body[i]
- # Check for insets
- if line.find('\\begin_inset') > -1:
- # check which inset to start
- if line.find('\\begin_inset ERT') > -1:
- in_ert = True
- insets.append('ert')
- elif line.find('\\begin_inset Formula') > -1:
- in_math = True
- insets.append('math')
- else:
- insets.append('other')
- if line.find('\\end_inset') > -1:
- # check which inset to end
- try:
- cur_inset = insets.pop()
- if cur_inset == 'ert':
- in_ert = False
- elif cur_inset == 'math':
- in_math = False
- else:
- pass # end of other inset
- except:
- pass # inset list was empty (for some reason)
-
- # Try to write the line
- try:
- # If all goes well the line is written here
- dummy = line.encode(document.encoding)
- last_char = line[-1]
- i += 1
- except:
- # Error, some character(s) in the line need to be replaced
- mod_line = u''
- for character in line:
- try:
- # Try to write the character
- dummy = character.encode(document.encoding)
- mod_line += character
- last_char = character
- except:
- # Try to replace with ERT/math inset
- if spec_chars.has_key(character):
- command = spec_chars[character][0] # the command to replace unicode
- flag1 = spec_chars[character][1]
- flag2 = spec_chars[character][2]
- if flag1.find('combining') > -1 or flag2.find('combining') > -1:
- # We have a character that should be combined with the previous
- command += '{' + last_char + '}'
- # Remove the last character. Ignore if it is whitespace
- if len(last_char.rstrip()):
- # last_char was found and is not whitespace
- if mod_line:
- mod_line = mod_line[:-1]
- else: # last_char belongs to the last line
- document.body[i-1] = document.body[i-1][:-1]
- else:
- # The last character was replaced by a command. For now it is
- # ignored. This could be handled better.
- pass
- if command[0:2] == '\\\\':
- if command[2:12]=='ensuremath':
- if in_ert:
- # math in ERT
- command = command.replace('\\\\ensuremath{\\\\', '$\n\\backslash\n')
- command = command.replace('}', '$\n')
- elif not in_math:
- # add a math inset with the replacement character
- command = command.replace('\\\\ensuremath{\\', math_intro)
- command = command.replace('}', math_outro)
- else:
- # we are already in a math inset
- command = command.replace('\\\\ensuremath{\\', '')
- command = command.replace('}', '')
- else:
- if in_math:
- # avoid putting an ERT in a math; instead put command as text
- command = command.replace('\\\\', '\mathrm{')
- command = command + '}'
- elif not in_ert:
- # add an ERT inset with the replacement character
- command = command.replace('\\\\', ert_intro)
- command = command + ert_outro
- else:
- command = command.replace('\\\\', '\n\\backslash\n')
- last_char = '' # indicate that the character should not be removed
- mod_line += command
- else:
- # Replace with replacement string
- mod_line += replacement_character
- document.body[i:i+1] = mod_line.split('\n')
- i += len(mod_line.split('\n'))
-
-
##
# Conversion hub
#
if __name__ == "__main__":
pass
-
-