math_outro='$\n\\end_inset'
mod_line = u''
- if i and document.body[i - 1][:1] != '\\':
+ if i and not is_inset_line(document, i-1):
last_char = document.body[i - 1][-1:]
else:
last_char = ''
if commandparams_info[name][0] == "":
document.warning("Ignoring invalid option `%s' of command `%s'." % (option1, name))
else:
- lines.append('%s "%s"' % (commandparams_info[name][0], option1.replace('"', '\\"')))
+ lines.append('%s "%s"' % (commandparams_info[name][0], option1.replace('\\', '\\\\').replace('"', '\\"')))
if option2 != "":
if commandparams_info[name][1] == "":
document.warning("Ignoring invalid second option `%s' of command `%s'." % (option2, name))
else:
- lines.append('%s "%s"' % (commandparams_info[name][1], option2.replace('"', '\\"')))
+ lines.append('%s "%s"' % (commandparams_info[name][1], option2.replace('\\', '\\\\').replace('"', '\\"')))
if argument != "":
if commandparams_info[name][2] == "":
document.warning("Ignoring invalid argument `%s' of command `%s'." % (argument, name))
else:
- lines.append('%s "%s"' % (commandparams_info[name][2], argument.replace('"', '\\"')))
+ lines.append('%s "%s"' % (commandparams_info[name][2], argument.replace('\\', '\\\\').replace('"', '\\"')))
document.body[i:i+1] = lines
i = i + 1
preview_line = document.body[k]
elif (commandparams_info[name][0] != "" and
pname == commandparams_info[name][0]):
- option1 = pvalue.strip('"').replace('\\"', '"')
+ option1 = pvalue.strip('"').replace('\\"', '"').replace('\\\\', '\\')
elif (commandparams_info[name][1] != "" and
pname == commandparams_info[name][1]):
- option2 = pvalue.strip('"').replace('\\"', '"')
+ option2 = pvalue.strip('"').replace('\\"', '"').replace('\\\\', '\\')
elif (commandparams_info[name][2] != "" and
pname == commandparams_info[name][2]):
- argument = pvalue.strip('"').replace('\\"', '"')
+ argument = pvalue.strip('"').replace('\\"', '"').replace('\\\\', '\\')
elif document.body[k].strip() != "":
document.warning("Ignoring unknown contents `%s' in command inset %s." % (document.body[k], name))
if name == "bibitem":
i += 3
+def is_inset_line(document, i):
+ """ Line i of body has an inset """
+ if document.body[i][:1] == '\\':
+ return True
+ last_tokens = "".join(document.body[i].split()[-2:])
+ return last_tokens.find('\\') != -1
+
+
+# A wrapper around normalize that handles special cases (cf. bug 3313)
+def normalize(form, text):
+ # do not normalize OHM, ANGSTROM
+ keep_characters = [0x2126,0x212b]
+ result = ''
+ convert = ''
+ for i in text:
+ if ord(i) in keep_characters:
+ if len(convert) > 0:
+ result = result + unicodedata.normalize(form, convert)
+ convert = ''
+ result = result + i
+ else:
+ convert = convert + i
+ if len(convert) > 0:
+ result = result + unicodedata.normalize(form, convert)
+ return result
+
+
def revert_accent(document):
inverse_accent_map = {}
for k in accent_map:
for i in range(len(document.body) - 1):
if document.body[i] == '' or document.body[i+1] == '' or document.body[i][-1] == ' ':
continue
- if (document.body[i+1][0] in inverse_accent_map and document.body[i][:1] != '\\'):
+ if (document.body[i+1][0] in inverse_accent_map and not is_inset_line(document, i)):
# the last character of this line and the first of the next line
# form probably a surrogate pair, inline insets are excluded (second part of the test)
while (len(document.body[i+1]) > 0 and document.body[i+1][0] != ' '):
# because we never use u'xxx' for string literals, but 'xxx'.
# Therefore we may have to try two times to normalize the data.
try:
- document.body[i] = unicodedata.normalize("NFD", document.body[i])
+ document.body[i] = normalize("NFD", document.body[i])
except TypeError:
- document.body[i] = unicodedata.normalize("NFD", unicode(document.body[i], 'utf-8'))
+ document.body[i] = normalize("NFD", unicode(document.body[i], 'utf-8'))
# Replace accented characters with InsetLaTeXAccent
# Do not convert characters that can be represented in the chosen
if j < len(document.body[i]) - 1:
document.body.insert(i+1, document.body[i][j+1:])
# Delete the accented character
- if j > 0:
- document.body[i] = document.body[i][:j-1]
- else:
- document.body[i] = u''
+ document.body[i] = document.body[i][:j]
# Finally add the InsetLaTeXAccent
document.body[i] += "\\i \\%s{}" % inverse_special_accent_map[accent]
break
accented_char = inverse_accented_map[accented_char]
accent = document.body[i][j]
try:
- dummy = unicodedata.normalize("NFC", accented_char + accent).encode(encoding_stack[-1])
+ dummy = normalize("NFC", accented_char + accent).encode(encoding_stack[-1])
except UnicodeEncodeError:
# Insert the rest of the line as new line
if j < len(document.body[i]) - 1:
document.body.insert(i+1, document.body[i][j+1:])
# Delete the accented characters
- if j > 1:
- document.body[i] = document.body[i][:j-2]
- else:
- document.body[i] = u''
+ document.body[i] = document.body[i][:j-1]
# Finally add the InsetLaTeXAccent
document.body[i] += "\\i \\%s{%s}" % (inverse_accent_map[accent], accented_char)
break
# Normalize to "Normal form C" (NFC, pre-composed characters) again
for i in range(len(document.body)):
- document.body[i] = unicodedata.normalize("NFC", document.body[i])
+ document.body[i] = normalize("NFC", document.body[i])
def normalize_font_whitespace_259(document):