From a2f127f8c38d10bc9b4414ea522640b64adb5b77 Mon Sep 17 00:00:00 2001
From: Georg Baum <baum@lyx.org>
Date: Sun, 14 Jun 2015 12:44:44 +0200
Subject: [PATCH] Fix lyx2lyx unicodesymbols escaping

The escaping of symbols read from unicodesymbols was inconsistent, which lead
to wrong replacements (bug 9559). Now the escaping is consistent: unicode_reps
contains unescaped LaTeX commands, the needed escaping for LyX files is
applied in put_cmd_in_ert() and lyx2latex().
---
 lib/lyx2lyx/lyx2lyx_tools.py   |  4 ++--
 lib/lyx2lyx/lyx_1_5.py         |  1 +
 lib/lyx2lyx/lyx_1_6.py         |  1 +
 lib/lyx2lyx/unicode_symbols.py | 22 ++++++++++++++--------
 4 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/lib/lyx2lyx/lyx2lyx_tools.py b/lib/lyx2lyx/lyx2lyx_tools.py
index 5c1f3cb427..92febd6cc4 100644
--- a/lib/lyx2lyx/lyx2lyx_tools.py
+++ b/lib/lyx2lyx/lyx2lyx_tools.py
@@ -125,7 +125,7 @@ def put_cmd_in_ert(arg):
     else:
       s = arg
     for rep in unicode_reps:
-      s = s.replace(rep[1], rep[0].replace('\\\\', '\\'))
+      s = s.replace(rep[1], rep[0])
     s = s.replace('\\', "\\backslash\n")
     ret += s.splitlines()
     ret += ["\\end_layout", "", "\\end_inset"]
@@ -254,7 +254,7 @@ def lyx2latex(document, lines):
 
           # Do the LyX text --> LaTeX conversion
           for rep in unicode_reps:
-            line = line.replace(rep[1], rep[0] + "{}")
+              line = line.replace(rep[1], rep[0])
           line = line.replace(r'\backslash', r'\textbackslash{}')
           line = line.replace(r'\series bold', r'\bfseries{}').replace(r'\series default', r'\mdseries{}')
           line = line.replace(r'\shape italic', r'\itshape{}').replace(r'\shape smallcaps', r'\scshape{}')
diff --git a/lib/lyx2lyx/lyx_1_5.py b/lib/lyx2lyx/lyx_1_5.py
index 8856d0b7c6..97adec9098 100644
--- a/lib/lyx2lyx/lyx_1_5.py
+++ b/lib/lyx2lyx/lyx_1_5.py
@@ -343,6 +343,7 @@ def revert_utf8(document):
     convert_multiencoding(document, False)
 
 
+# FIXME: Use the version in unicode_symbols.py which has some bug fixes
 def read_unicodesymbols():
     " Read the unicodesymbols list of unicode characters and corresponding commands."
     pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
diff --git a/lib/lyx2lyx/lyx_1_6.py b/lib/lyx2lyx/lyx_1_6.py
index a30e50e8fb..2f8ebd24b8 100644
--- a/lib/lyx2lyx/lyx_1_6.py
+++ b/lib/lyx2lyx/lyx_1_6.py
@@ -145,6 +145,7 @@ def set_option(document, m, option, value):
     return l
 
 
+# FIXME: Use the version in unicode_symbols.py which has some bug fixes
 def read_unicodesymbols():
     " Read the unicodesymbols list of unicode characters and corresponding commands."
     pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
diff --git a/lib/lyx2lyx/unicode_symbols.py b/lib/lyx2lyx/unicode_symbols.py
index 9171c180ef..f24f3a9187 100644
--- a/lib/lyx2lyx/unicode_symbols.py
+++ b/lib/lyx2lyx/unicode_symbols.py
@@ -31,30 +31,36 @@ def read_unicodesymbols():
     pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
     fp = open(os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols'))
     spec_chars = []
-    # Two backslashes, followed by some non-word character, and then a character
+    # A backslash, followed by some non-word character, and then a character
     # in brackets. The idea is to check for constructs like: \"{u}, which is how
     # they are written in the unicodesymbols file; but they can also be written
     # as: \"u or even \" u.
-    r = re.compile(r'\\\\(\W)\{(\w)\}')
+    # The two backslashes in the string literal are needed to specify a literal
+    # backslash in the regex. Without r prefix, these would be four backslashes.
+    r = re.compile(r'\\(\W)\{(\w)\}')
     for line in fp.readlines():
         if line[0] != '#' and line.strip() != "":
+            # Note: backslashes in the string literals with r prefix are not escaped,
+            #       so one backslash in the source file equals one backslash in memory.
+            #       Without r prefix backslahses are escaped, so two backslashes in the
+            #       source file equal one backslash in memory.
             line=line.replace(' "',' ') # remove all quotation marks with spaces before
             line=line.replace('" ',' ') # remove all quotation marks with spaces after
-            line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis)
+            line=line.replace(r'\"','"') # unescape "
+            line=line.replace(r'\\','\\') # unescape \
             try:
                 [ucs4,command,dead] = line.split(None,2)
                 if command[0:1] != "\\":
                     continue
+                if (line.find("notermination=text") < 0 and
+                    line.find("notermination=both") < 0 and command[-1] != "}"):
+                    command = command + "{}"
                 spec_chars.append([command, unichr(eval(ucs4))])
             except:
                 continue
             m = r.match(command)
             if m != None:
-                command = "\\\\"
-                # If the character is a double-quote, then we need to escape it, too,
-                # since it is done that way in the LyX file.
-                if m.group(1) == "\"":
-                    command += "\\"
+                command = "\\"
                 commandbl = command
                 command += m.group(1) + m.group(2)
                 commandbl += m.group(1) + ' ' + m.group(2)
-- 
2.39.2