From a2f127f8c38d10bc9b4414ea522640b64adb5b77 Mon Sep 17 00:00:00 2001 From: Georg Baum Date: Sun, 14 Jun 2015 12:44:44 +0200 Subject: [PATCH] Fix lyx2lyx unicodesymbols escaping The escaping of symbols read from unicodesymbols was inconsistent, which lead to wrong replacements (bug 9559). Now the escaping is consistent: unicode_reps contains unescaped LaTeX commands, the needed escaping for LyX files is applied in put_cmd_in_ert() and lyx2latex(). --- lib/lyx2lyx/lyx2lyx_tools.py | 4 ++-- lib/lyx2lyx/lyx_1_5.py | 1 + lib/lyx2lyx/lyx_1_6.py | 1 + lib/lyx2lyx/unicode_symbols.py | 22 ++++++++++++++-------- 4 files changed, 18 insertions(+), 10 deletions(-) diff --git a/lib/lyx2lyx/lyx2lyx_tools.py b/lib/lyx2lyx/lyx2lyx_tools.py index 5c1f3cb427..92febd6cc4 100644 --- a/lib/lyx2lyx/lyx2lyx_tools.py +++ b/lib/lyx2lyx/lyx2lyx_tools.py @@ -125,7 +125,7 @@ def put_cmd_in_ert(arg): else: s = arg for rep in unicode_reps: - s = s.replace(rep[1], rep[0].replace('\\\\', '\\')) + s = s.replace(rep[1], rep[0]) s = s.replace('\\', "\\backslash\n") ret += s.splitlines() ret += ["\\end_layout", "", "\\end_inset"] @@ -254,7 +254,7 @@ def lyx2latex(document, lines): # Do the LyX text --> LaTeX conversion for rep in unicode_reps: - line = line.replace(rep[1], rep[0] + "{}") + line = line.replace(rep[1], rep[0]) line = line.replace(r'\backslash', r'\textbackslash{}') line = line.replace(r'\series bold', r'\bfseries{}').replace(r'\series default', r'\mdseries{}') line = line.replace(r'\shape italic', r'\itshape{}').replace(r'\shape smallcaps', r'\scshape{}') diff --git a/lib/lyx2lyx/lyx_1_5.py b/lib/lyx2lyx/lyx_1_5.py index 8856d0b7c6..97adec9098 100644 --- a/lib/lyx2lyx/lyx_1_5.py +++ b/lib/lyx2lyx/lyx_1_5.py @@ -343,6 +343,7 @@ def revert_utf8(document): convert_multiencoding(document, False) +# FIXME: Use the version in unicode_symbols.py which has some bug fixes def read_unicodesymbols(): " Read the unicodesymbols list of unicode characters and corresponding commands." pathname = os.path.abspath(os.path.dirname(sys.argv[0])) diff --git a/lib/lyx2lyx/lyx_1_6.py b/lib/lyx2lyx/lyx_1_6.py index a30e50e8fb..2f8ebd24b8 100644 --- a/lib/lyx2lyx/lyx_1_6.py +++ b/lib/lyx2lyx/lyx_1_6.py @@ -145,6 +145,7 @@ def set_option(document, m, option, value): return l +# FIXME: Use the version in unicode_symbols.py which has some bug fixes def read_unicodesymbols(): " Read the unicodesymbols list of unicode characters and corresponding commands." pathname = os.path.abspath(os.path.dirname(sys.argv[0])) diff --git a/lib/lyx2lyx/unicode_symbols.py b/lib/lyx2lyx/unicode_symbols.py index 9171c180ef..f24f3a9187 100644 --- a/lib/lyx2lyx/unicode_symbols.py +++ b/lib/lyx2lyx/unicode_symbols.py @@ -31,30 +31,36 @@ def read_unicodesymbols(): pathname = os.path.abspath(os.path.dirname(sys.argv[0])) fp = open(os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols')) spec_chars = [] - # Two backslashes, followed by some non-word character, and then a character + # A backslash, followed by some non-word character, and then a character # in brackets. The idea is to check for constructs like: \"{u}, which is how # they are written in the unicodesymbols file; but they can also be written # as: \"u or even \" u. - r = re.compile(r'\\\\(\W)\{(\w)\}') + # The two backslashes in the string literal are needed to specify a literal + # backslash in the regex. Without r prefix, these would be four backslashes. + r = re.compile(r'\\(\W)\{(\w)\}') for line in fp.readlines(): if line[0] != '#' and line.strip() != "": + # Note: backslashes in the string literals with r prefix are not escaped, + # so one backslash in the source file equals one backslash in memory. + # Without r prefix backslahses are escaped, so two backslashes in the + # source file equal one backslash in memory. line=line.replace(' "',' ') # remove all quotation marks with spaces before line=line.replace('" ',' ') # remove all quotation marks with spaces after - line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis) + line=line.replace(r'\"','"') # unescape " + line=line.replace(r'\\','\\') # unescape \ try: [ucs4,command,dead] = line.split(None,2) if command[0:1] != "\\": continue + if (line.find("notermination=text") < 0 and + line.find("notermination=both") < 0 and command[-1] != "}"): + command = command + "{}" spec_chars.append([command, unichr(eval(ucs4))]) except: continue m = r.match(command) if m != None: - command = "\\\\" - # If the character is a double-quote, then we need to escape it, too, - # since it is done that way in the LyX file. - if m.group(1) == "\"": - command += "\\" + command = "\\" commandbl = command command += m.group(1) + m.group(2) commandbl += m.group(1) + ' ' + m.group(2) -- 2.39.2