Fix lyx2lyx unicodesymbols escaping

author Georg Baum <baum@lyx.org>

Sun, 14 Jun 2015 10:44:44 +0000 (12:44 +0200)

committer Georg Baum <baum@lyx.org>

Sun, 14 Jun 2015 10:44:44 +0000 (12:44 +0200)
author Georg Baum <baum@lyx.org>
Sun, 14 Jun 2015 10:44:44 +0000 (12:44 +0200)
committer Georg Baum <baum@lyx.org>
Sun, 14 Jun 2015 10:44:44 +0000 (12:44 +0200)
diff --git a/lib/lyx2lyx/lyx2lyx_tools.py b/lib/lyx2lyx/lyx2lyx_tools.py

index 5c1f3cb427471ce960404dc741a68f454bb051c5..92febd6cc44d96e27f8d792312dc4e2c4100e423 100644 (file)
--- a/lib/lyx2lyx/lyx2lyx_tools.py
+++ b/lib/lyx2lyx/lyx2lyx_tools.py
@@ -125,7 +125,7 @@ def put_cmd_in_ert(arg):
      else:
        s = arg
      for rep in unicode_reps:
-      s = s.replace(rep[1], rep[0].replace('\\\\', '\\'))
+      s = s.replace(rep[1], rep[0])
      s = s.replace('\\', "\\backslash\n")
      ret += s.splitlines()
      ret += ["\\end_layout", "", "\\end_inset"]
@@ -254,7 +254,7 @@ def lyx2latex(document, lines):
  
            # Do the LyX text --> LaTeX conversion
            for rep in unicode_reps:
-            line = line.replace(rep[1], rep[0] + "{}")
+              line = line.replace(rep[1], rep[0])
            line = line.replace(r'\backslash', r'\textbackslash{}')
            line = line.replace(r'\series bold', r'\bfseries{}').replace(r'\series default', r'\mdseries{}')
            line = line.replace(r'\shape italic', r'\itshape{}').replace(r'\shape smallcaps', r'\scshape{}')
diff --git a/lib/lyx2lyx/lyx_1_5.py b/lib/lyx2lyx/lyx_1_5.py

index 8856d0b7c63dfff9fcdc472c9c9397ad42d06ba6..97adec9098d7034a5639dbe3c5c98b8006019898 100644 (file)
--- a/lib/lyx2lyx/lyx_1_5.py
+++ b/lib/lyx2lyx/lyx_1_5.py
@@ -343,6 +343,7 @@ def revert_utf8(document):
      convert_multiencoding(document, False)
  
  
+# FIXME: Use the version in unicode_symbols.py which has some bug fixes
  def read_unicodesymbols():
      " Read the unicodesymbols list of unicode characters and corresponding commands."
      pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
diff --git a/lib/lyx2lyx/lyx_1_6.py b/lib/lyx2lyx/lyx_1_6.py

index a30e50e8fb03cfc2ec4b21aec2431b6cab6a2ad8..2f8ebd24b8a6ae9664902d786a0e2eda3a3e8432 100644 (file)
--- a/lib/lyx2lyx/lyx_1_6.py
+++ b/lib/lyx2lyx/lyx_1_6.py
@@ -145,6 +145,7 @@ def set_option(document, m, option, value):
      return l
  
  
+# FIXME: Use the version in unicode_symbols.py which has some bug fixes
  def read_unicodesymbols():
      " Read the unicodesymbols list of unicode characters and corresponding commands."
      pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
diff --git a/lib/lyx2lyx/unicode_symbols.py b/lib/lyx2lyx/unicode_symbols.py

index 9171c180ef559a8891bd8304f199daa7463f6e70..f24f3a918710390d5d744b35e272e923899cfcf6 100644 (file)
--- a/lib/lyx2lyx/unicode_symbols.py
+++ b/lib/lyx2lyx/unicode_symbols.py
@@ -31,30 +31,36 @@ def read_unicodesymbols():
      pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
      fp = open(os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols'))
      spec_chars = []
-    # Two backslashes, followed by some non-word character, and then a character
+    # A backslash, followed by some non-word character, and then a character
      # in brackets. The idea is to check for constructs like: \"{u}, which is how
      # they are written in the unicodesymbols file; but they can also be written
      # as: \"u or even \" u.
-    r = re.compile(r'\\\\(\W)\{(\w)\}')
+    # The two backslashes in the string literal are needed to specify a literal
+    # backslash in the regex. Without r prefix, these would be four backslashes.
+    r = re.compile(r'\\(\W)\{(\w)\}')
      for line in fp.readlines():
          if line[0] != '#' and line.strip() != "":
+            # Note: backslashes in the string literals with r prefix are not escaped,
+            #       so one backslash in the source file equals one backslash in memory.
+            #       Without r prefix backslahses are escaped, so two backslashes in the
+            #       source file equal one backslash in memory.
              line=line.replace(' "',' ') # remove all quotation marks with spaces before
              line=line.replace('" ',' ') # remove all quotation marks with spaces after
-            line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis)
+            line=line.replace(r'\"','"') # unescape "
+            line=line.replace(r'\\','\\') # unescape \
              try:
                  [ucs4,command,dead] = line.split(None,2)
                  if command[0:1] != "\\":
                      continue
+                if (line.find("notermination=text") < 0 and
+                    line.find("notermination=both") < 0 and command[-1] != "}"):
+                    command = command + "{}"
                  spec_chars.append([command, unichr(eval(ucs4))])
              except:
                  continue
              m = r.match(command)
              if m != None:
-                command = "\\\\"
-                # If the character is a double-quote, then we need to escape it, too,
-                # since it is done that way in the LyX file.
-                if m.group(1) == "\"":
-                    command += "\\"
+                command = "\\"
                  commandbl = command
                  command += m.group(1) + m.group(2)
                  commandbl += m.group(1) + ' ' + m.group(2)
author	Georg Baum <baum@lyx.org>
	Sun, 14 Jun 2015 10:44:44 +0000 (12:44 +0200)
committer	Georg Baum <baum@lyx.org>
	Sun, 14 Jun 2015 10:44:44 +0000 (12:44 +0200)
lib/lyx2lyx/lyx2lyx_tools.py		patch \| blob \| history
lib/lyx2lyx/lyx_1_5.py		patch \| blob \| history
lib/lyx2lyx/lyx_1_6.py		patch \| blob \| history
lib/lyx2lyx/unicode_symbols.py		patch \| blob \| history