Try to fix bug 5006. The idea here is to wrap all LaTeX commands that are not in...

[lyx.git] / lib / lyx2lyx / lyx_1_6.py
diff --git a/lib/lyx2lyx/lyx_1_6.py b/lib/lyx2lyx/lyx_1_6.py

index 0c10ed8ff21231d3dc93846acf809d0dfdeee3db..0d3bd489664f50f5decfc9fcd6fbb4a40c94f75b 100644 (file)
--- a/lib/lyx2lyx/lyx_1_6.py
+++ b/lib/lyx2lyx/lyx_1_6.py
@@ -43,10 +43,18 @@ def find_end_of_inset(lines, i):
  # where the last statement resets the counter to accord with the added
  # lines.
  def wrap_into_ert(string, src, dst):
-    " Wrap a something into an ERT"
+    '''Within string, replace occurrences of src with dst, wrapped into ERT
+       E.g.: wrap_into_ert('sch\"on', "\\", "\\backslash") is:
+       sch<ERT>\\backslash</ERT>"on'''
      return string.replace(src, '\n\\begin_inset ERT\nstatus collapsed\n\\begin_layout Standard\n'
        + dst + '\n\\end_layout\n\\end_inset\n')
  
+def put_cmd_in_ert(string):
+    string = string.replace('\\', "\\backslash\n")
+    string = "\\begin_inset ERT\nstatus collapsed\n\\begin_layout Standard\n" \
+      + string + "\n\\end_layout\n\\end_inset"
+    return string
+
  def add_to_preamble(document, text):
      """ Add text to the preamble if it is not already there.
      Only the first line is checked!"""
@@ -114,6 +122,175 @@ def set_option(document, m, option, value):
      return l
  
  
+def read_unicodesymbols():
+    " Read the unicodesymbols list of unicode characters and corresponding commands."
+    pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
+    fp = open(os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols'))
+    spec_chars = []
+    # Two backslashes, followed by some non-word character, and then a character
+    # in brackets. The idea is to check for constructs like: \"{u}, which is how
+    # they are written in the unicodesymbols file; but they can also be written
+    # as: \"u.
+    r = re.compile(r'\\\\(\W)\{(\w)\}')
+    for line in fp.readlines():
+        if line[0] != '#' and line.strip() != "":
+            line=line.replace(' "',' ') # remove all quotation marks with spaces before
+            line=line.replace('" ',' ') # remove all quotation marks with spaces after
+            line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis)
+            try:
+                [ucs4,command,dead] = line.split(None,2)
+                if command[0:1] != "\\":
+                    continue
+                spec_chars.append([command, unichr(eval(ucs4))])
+            except:
+                continue
+            m = r.match(command)
+            if m != None:
+                command = "\\\\"
+                # If the character is a double-quote, then we need to escape it, too,
+                # since it is done that way in the LyX file.
+                if m.group(1) == "\"":
+                    command += "\\"
+                command += m.group(1) + m.group(2)
+                spec_chars.append([command, unichr(eval(ucs4))])
+    fp.close()
+    return spec_chars
+
+
+def extract_argument(line):
+    'Extracts a LaTeX argument from the start of line. Returns (arg, rest).'
+
+    if not line:
+        return (None, "")
+
+    bracere = re.compile("(\s*)(.*)")
+    n = bracere.match(line)
+    whitespace = n.group(1)
+    stuff = n.group(2)
+    brace = stuff[:1]
+    if brace != "[" and brace != "{":
+        return (None, line)
+
+    # find closing brace
+    remain = stuff[1:]
+    pos = 0
+    num = 1
+    term = "}"
+    if brace == "[":
+        term = "]"
+    skip = False
+    for c in remain:
+        if skip:
+            skip = False
+        elif c == "\\":
+            skip = True
+        elif c == brace:
+            num += 1
+        elif c == term:
+            num -= 1
+        if c == 0:
+            break
+        pos += 1
+    if num != 0:
+        # We never found the matching brace
+        # So, to be on the safe side, let's just return everything
+        # which will then get wrapped as ERT
+        return (line, "")
+    return (line[:pos + 1], line[pos + 1:])
+
+
+def latex2ert(line):
+    '''Converts LaTeX commands into ERT. line may well be a multi-line
+       string when it is returned.'''
+    if not line:
+        return line
+
+    retval = ""
+    ## FIXME Escaped \ ??
+    labelre = re.compile(r'(.*?)\\(\\(?:[a-zA-Z]+|.))(.*)')
+
+    m = labelre.match(line)
+    while m != None:
+        retval += m.group(1)
+        cmd = m.group(2)
+        end = m.group(3)
+
+        while True:
+            (arg, rest) = extract_argument(end)
+            if arg == None:
+                break
+            cmd += arg
+            end = rest
+        cmd = put_cmd_in_ert(cmd)
+        retval += "\n" + cmd + "\n"
+        line = end
+        m = labelre.match(line)
+    retval += line
+    return retval
+
+
+def latex2lyx(data):
+    '''Takes a string, possibly multi-line, and returns the result of 
+    converting LaTeX constructs into LyX constructs. Returns a list of
+    lines, suitable for insertion into document.body.'''
+
+    retval = []
+
+    # Convert LaTeX to Unicode
+    reps = read_unicodesymbols()
+    # Commands of this sort need to be checked to make sure they are
+    # followed by a non-alpha character, lest we replace too much.
+    hardone = re.compile(r'^\\\\[a-zA-Z]+$')
+    
+    for rep in reps:
+        if hardone.match(rep[0]):
+            pos = 0
+            while True:
+                pos = data.find(rep[0], pos)
+                if pos == -1:
+                    break
+                nextpos = pos + len(rep[0])
+                nextchar = data[nextpos - 1 : nextpos]
+                if nextchar.isalpha():
+                    # not the end of that command
+                    pos = nextpos
+                    continue
+                data = data[:pos] + rep[1] + data[nextpos:]
+                pos = nextpos
+        else:
+            data = data.replace(rep[0], rep[1])
+
+    # Generic, \" -> ":
+    data = wrap_into_ert(data, r'\"', '"')
+
+    # Math:
+    mathre = re.compile('^(.*?)(\$.*?\$)(.*)')
+    lines = data.split('\n')
+    for line in lines:
+        #document.warning("LINE: " + line)
+        #document.warning(str(i) + ":" + document.body[i])
+        #document.warning("LAST: " + document.body[-1])
+        g = line
+        m = mathre.match(g)
+        while m != None:
+            s = m.group(1)
+            f = m.group(2).replace('\\\\', '\\')
+            g = m.group(3)
+            if s:
+                # this is non-math!
+                s = latex2ert(s)
+                subst = s.split('\n')
+                retval += subst
+            retval.append("\\begin_inset Formula " + f)
+            retval.append("\\end_inset")
+            m = mathre.match(g)
+        # Handle whatever is left, which is just text
+        g = latex2ert(g)
+        subst = g.split('\n')
+        retval += subst
+    return retval
+
+
  ####################################################################
  
  def convert_ltcaption(document):
@@ -778,32 +955,23 @@ def revert_wrapfig_options(document):
          i = k
  
  
-# To convert and revert indices, we need to convert between LaTeX 
-# strings and LyXText. Here we do a minimal conversion to prevent 
-# crashes and data loss. Manual patch-up may be needed.
-replacements = [
-  [r'\\\"a', u'ä'], 
-  [r'\\\"o', u'ö'], 
-  [r'\\\"u', u'ü'],
-  [r'\\\'a', u'á'],
-  [r'\\\'e', u'é'],
-  [r'\\\'i', u'í'],
-  [r'\\\'o', u'ó'],
-  [r'\\\'u', u'ú']
-]
-
  def convert_latexcommand_index(document):
      "Convert from LatexCommand form to collapsable form."
      i = 0
+    r1 = re.compile('name "(.*)"')
      while True:
          i = find_token(document.body, "\\begin_inset CommandInset index", i)
          if i == -1:
              return
          if document.body[i + 1] != "LatexCommand index": # Might also be index_print
              return
-        fullcontent = document.body[i + 2][5:]
-        fullcontent.strip()
-        fullcontent = fullcontent[1:-1]
+        m = r1.match(document.body[i + 2])
+        if m == None:
+            document.warning("Unable to match: " + document.body[i+2])
+            i += 1
+            continue
+        fullcontent = m.group(1)
+        #document.warning(fullcontent)
          document.body[i:i + 3] = ["\\begin_inset Index",
            "status collapsed",
            "\\begin_layout Standard"]
@@ -811,43 +979,12 @@ def convert_latexcommand_index(document):
          # We are now on the blank line preceding "\end_inset"
          # We will write the content here, into the inset.
  
-        # Do the LaTeX --> LyX text conversion
-        for rep in replacements:
-            fullcontent = fullcontent.replace(rep[0], rep[1])
-        # Generic, \" -> ":
-        fullcontent = wrap_into_ert(fullcontent, r'\"', '"')
-        # Math:
-        r = re.compile('^(.*?)(\$.*?\$)(.*)')
-        lines = fullcontent.split('\n')
-        for line in lines:
-            #document.warning("LINE: " + line)
-            #document.warning(str(i) + ":" + document.body[i])
-            #document.warning("LAST: " + document.body[-1])
-            g = line
-            while r.match(g):
-                m = r.match(g)
-                s = m.group(1)
-                f = m.group(2).replace('\\\\', '\\')
-                g = m.group(3)
-                if s:
-                  # this is non-math!
-                  s = wrap_into_ert(s, r'\\', '\\backslash')
-                  s = wrap_into_ert(s, '{', '{')
-                  s = wrap_into_ert(s, '}', '}')
-                  subst = s.split('\n')
-                  document.body[i:i] = subst
-                  i += len(subst)
-                document.body.insert(i + 1, "\\begin_inset Formula " + f)
-                document.body.insert(i + 2, "\\end_inset")
-                i += 2
-            # Generic, \\ -> \backslash:
-            g = wrap_into_ert(g, r'\\', '\\backslash')
-            g = wrap_into_ert(g, '{', '{')
-            g = wrap_into_ert(g, '}', '}')
-            subst = g.split('\n')
-            document.body[i+1:i+1] = subst
-            i += len(subst)
+        linelist = latex2lyx(fullcontent)
+        document.body[i+1:i+1] = linelist
+        i += len(linelist)
+
          document.body.insert(i + 1, "\\end_layout")
+        i += 1
  
  
  def revert_latexcommand_index(document):
@@ -1197,7 +1334,7 @@ def revert_include(document):
    i = 0
    r0 = re.compile('preview.*')
    r1 = re.compile('LatexCommand (.+)')
-  r2 = re.compile('filename (.+)')
+  r2 = re.compile('filename "(.+)"')
    r3 = re.compile('lstparams "(.*)"')
    while True:
      i = find_token(document.body, "\\begin_inset CommandInset include", i)
@@ -1876,8 +2013,8 @@ def convert_subfig(document):
          addedLines = -2
          subst = ['\\begin_inset Float figure', 'wide false', 'sideways false', 
                   'status open', '', '\\begin_layout Plain Layout', '\\begin_inset Caption', 
-                 '', '\\begin_layout Plain Layout',
-                 caption, '\\end_layout', '', '\\end_inset', '', 
+                 '', '\\begin_layout Plain Layout'] + latex2lyx(caption) + \
+                 [ '\\end_layout', '', '\\end_inset', '', 
                   '\\end_layout', '', '\\begin_layout Plain Layout']
          document.body[i : i] = subst
          addedLines += len(subst)
@@ -1993,8 +2130,7 @@ def revert_subfig(document):
              insertion = insertion.split('\n')
              document.body[k : k + 1] = insertion
              addedLines += len(insertion) - 1
-            add_to_preamble(document,
-                            ['\\usepackage{subfig}\n'])
+            add_to_preamble(document, ['\\usepackage{subfig}\n'])
          i += addedLines + 1