Do a little more work towards making subcaptions work.

[lyx.git] / lib / lyx2lyx / lyx_1_6.py
diff --git a/lib/lyx2lyx/lyx_1_6.py b/lib/lyx2lyx/lyx_1_6.py

index f60300b4c2538e9086c262cd2308f42dbf71eacb..9b8c2764ca72cb3c749269e677b36227927b7f86 100644 (file)
--- a/lib/lyx2lyx/lyx_1_6.py
+++ b/lib/lyx2lyx/lyx_1_6.py
@@ -114,6 +114,96 @@ def set_option(document, m, option, value):
      return l
  
  
+def read_unicodesymbols():
+    " Read the unicodesymbols list of unicode characters and corresponding commands."
+    pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
+    fp = open(os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols'))
+    spec_chars = []
+    # Two backslashes, followed by some non-word character, and then a character
+    # in brackets. The idea is to check for constructs like: \"{u}, which is how
+    # they are written in the unicodesymbols file; but they can also be written
+    # as: \"u.
+    r = re.compile(r'\\\\(\W)\{(\w)\}')
+    for line in fp.readlines():
+        if line[0] != '#' and line.strip() != "" and line.find("\\") != -1:
+            line=line.replace(' "',' ') # remove all quotation marks with spaces before
+            line=line.replace('" ',' ') # remove all quotation marks with spaces after
+            line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis)
+            try:
+                [ucs4,command,dead] = line.split(None,2)
+                spec_chars.append([command, unichr(eval(ucs4))])
+            except:
+                continue
+            m = r.match(command)
+            if m != None:
+                command = "\\\\"
+                # If the character is a double-quote, then we need to escape it, too,
+                # since it is done that way in the LyX file.
+                if m.group(1) == "\"":
+                    command += "\\"
+                command += m.group(1) + m.group(2)
+                spec_chars.append([command, unichr(eval(ucs4))])
+    fp.close()
+    return spec_chars
+
+
+def latex2lyx(data):
+    '''Takes a string, possibly multi-line, and returns the result of 
+    converting LaTeX constructs into LyX constructs. Returns a list of
+    lines, suitable for insertion into document.body.'''
+
+    mathre = re.compile('^(.*?)(\$.*?\$)(.*)')
+    retval = []
+
+    # Convert LaTeX to Unicode
+    reps = read_unicodesymbols()
+    for rep in reps:
+        try:
+            data = data.replace(rep[0], rep[1])
+        except:
+            # There seems to be a character in the unicodesymbols file
+            # that causes problems, namely, 0x2109.
+            pass
+    # Generic, \" -> ":
+    data = wrap_into_ert(data, r'\"', '"')
+    # Math:
+    lines = data.split('\n')
+    for line in lines:
+        #document.warning("LINE: " + line)
+        #document.warning(str(i) + ":" + document.body[i])
+        #document.warning("LAST: " + document.body[-1])
+        g = line
+        m = mathre.match(g)
+        if m == None:
+            g = wrap_into_ert(g, '\\', '\\backslash')
+            g = wrap_into_ert(g, '{', '{')
+            g = wrap_into_ert(g, '}', '}')
+            subst = g.split('\n')
+            retval += subst
+            continue
+        while m != None:
+            s = m.group(1)
+            f = m.group(2).replace('\\\\', '\\')
+            g = m.group(3)
+            if s:
+                # this is non-math!
+                s = wrap_into_ert(s, '\\', '\\backslash')
+                s = wrap_into_ert(s, '{', '{')
+                s = wrap_into_ert(s, '}', '}')
+                subst = s.split('\n')
+                retval += subst
+            retval.append("\\begin_inset Formula " + f)
+            retval.append("\\end_inset")
+            m = mathre.match(g)
+        # Generic, \\ -> \backslash:
+        g = wrap_into_ert(g, r'\\', '\\backslash')
+        g = wrap_into_ert(g, '{', '{')
+        g = wrap_into_ert(g, '}', '}')
+        subst = g.split('\n')
+        retval += subst
+    return retval
+
+
  ####################################################################
  
  def convert_ltcaption(document):
@@ -748,45 +838,53 @@ def revert_wrapfig_options(document):
      "Revert optional options for wrap floats (wrapfig)."
      i = 0
      while True:
-        i = find_token(document.body, "lines", i)
+        i = find_token(document.body, "\\begin_inset Wrap figure", i)
          if i == -1:
              return
-        j = find_token(document.body, "overhang", i+1)
-        if j != i + 2 and j != -1:
-            document.warning("Malformed LyX document: Couldn't find overhang parameter of wrap float.")
+        j = find_end_of_inset(document.body, i)
          if j == -1:
-            return
-        del document.body[i]
-        del document.body[j-1]
-        i = i + 1
-
+            document.warning("Can't find end of Wrap inset at line " + str(i))
+            i += 1
+            continue
+        k = find_default_layout(document, i, j)
+        if k == -1:
+            document.warning("Can't find default layout for Wrap figure!")
+            i = j
+            continue
+        # Options should be between i and k now
+        l = find_token(document.body, "lines", i, k)
+        if l == -1:
+            document.warning("Can't find lines option for Wrap figure!")
+            i = k
+            continue
+        m = find_token(document.body, "overhang", i + 1, k)
+        if m == -1:
+            document.warning("Malformed LyX document: Couldn't find overhang parameter of wrap float!")
+            i = k
+            continue
+        # Do these in reverse order
+        del document.body[m]
+        del document.body[l]
+        i = k
  
-# To convert and revert indices, we need to convert between LaTeX 
-# strings and LyXText. Here we do a minimal conversion to prevent 
-# crashes and data loss. Manual patch-up may be needed.
-replacements = [
-  [r'\\\"a', u'ä'], 
-  [r'\\\"o', u'ö'], 
-  [r'\\\"u', u'ü'],
-  [r'\\\'a', u'á'],
-  [r'\\\'e', u'é'],
-  [r'\\\'i', u'í'],
-  [r'\\\'o', u'ó'],
-  [r'\\\'u', u'ú']
-]
  
  def convert_latexcommand_index(document):
      "Convert from LatexCommand form to collapsable form."
      i = 0
+    r1 = re.compile('name "(.*)"')
      while True:
          i = find_token(document.body, "\\begin_inset CommandInset index", i)
          if i == -1:
              return
          if document.body[i + 1] != "LatexCommand index": # Might also be index_print
              return
-        fullcontent = document.body[i + 2][5:]
-        fullcontent.strip()
-        fullcontent = fullcontent[1:-1]
+        m = r1.match(document.body[i + 2])
+        if m == None:
+            document.warning("Unable to match: " + document.body[i+2])
+            i += 1
+            continue
+        fullcontent = m.group(1)
+        #document.warning(fullcontent)
          document.body[i:i + 3] = ["\\begin_inset Index",
            "status collapsed",
            "\\begin_layout Standard"]
@@ -794,43 +892,12 @@ def convert_latexcommand_index(document):
          # We are now on the blank line preceding "\end_inset"
          # We will write the content here, into the inset.
  
-        # Do the LaTeX --> LyX text conversion
-        for rep in replacements:
-            fullcontent = fullcontent.replace(rep[0], rep[1])
-        # Generic, \" -> ":
-        fullcontent = wrap_into_ert(fullcontent, r'\"', '"')
-        # Math:
-        r = re.compile('^(.*?)(\$.*?\$)(.*)')
-        lines = fullcontent.split('\n')
-        for line in lines:
-          #document.warning("LINE: " + line)
-          #document.warning(str(i) + ":" + document.body[i])
-          #document.warning("LAST: " + document.body[-1])
-          g = line
-          while r.match(g):
-            m = r.match(g)
-            s = m.group(1)
-            f = m.group(2).replace('\\\\', '\\')
-            g = m.group(3)
-            if s:
-              # this is non-math!
-              s = wrap_into_ert(s, r'\\', '\\backslash')
-              s = wrap_into_ert(s, '{', '{')
-              s = wrap_into_ert(s, '}', '}')
-              subst = s.split('\n')
-              document.body[i:i] = subst
-              i += len(subst)
-            document.body.insert(i + 1, "\\begin_inset Formula " + f)
-            document.body.insert(i + 2, "\\end_inset")
-            i += 2
-          # Generic, \\ -> \backslash:
-          g = wrap_into_ert(g, r'\\', '\\backslash')
-          g = wrap_into_ert(g, '{', '{')
-          g = wrap_into_ert(g, '}', '}')
-          subst = g.split('\n')
-          document.body[i+1:i+1] = subst
-          i += len(subst)
+        linelist = latex2lyx(fullcontent)
+        document.body[i+1:i+1] = linelist
+        i += len(linelist)
+
          document.body.insert(i + 1, "\\end_layout")
+        i += 1
  
  
  def revert_latexcommand_index(document):
@@ -1116,6 +1183,35 @@ def revert_href(document):
          ["\\begin_inset CommandInset url", "LatexCommand url"]
        i = i + 2
  
+def revert_url(document):
+    'Reverts Flex URL insets to old-style URL insets'
+    i = 0
+    while True:
+        i = find_token(document.body, "\\begin_inset Flex URL", i)
+        if i == -1:
+            return
+        j = find_end_of_inset(document.body, i)
+        if j == -1:
+            document.warning("Can't find end of inset in revert_url!")
+            return
+        k = find_default_layout(document, i, j)
+        if k == -1:
+            document.warning("Can't find default layout in revert_url!")
+            i = j
+            continue
+        l = find_end_of(document.body, k, "\\begin_layout", "\\end_layout")
+        if l == -1 or l >= j:
+            document.warning("Can't find end of default layout in revert_url!")
+            i = j
+            continue
+        # OK, so the inset's data is between lines k and l.
+        data =  " ".join(document.body[k+1:l])
+        data = data.strip()
+        newinset = ["\\begin_inset LatexCommand url", "target \"" + data + "\"",\
+                    "", "\\end_inset"]
+        document.body[i:j+1] = newinset
+        i = i + len(newinset)
+
  
  def convert_include(document):
    'Converts include insets to new format.'
@@ -1149,40 +1245,50 @@ def convert_include(document):
  def revert_include(document):
    'Reverts include insets to old format.'
    i = 0
+  r0 = re.compile('preview.*')
    r1 = re.compile('LatexCommand (.+)')
-  r2 = re.compile('filename (.+)')
-  r3 = re.compile('options (.*)')
+  r2 = re.compile('filename "(.+)"')
+  r3 = re.compile('lstparams "(.*)"')
    while True:
      i = find_token(document.body, "\\begin_inset CommandInset include", i)
      if i == -1:
        return
-    previewline = document.body[i + 1]
-    m = r1.match(document.body[i + 2])
+    nextline = i + 1
+    if r0.match(document.body[nextline]):
+      previewline = document.body[nextline]
+      nextline += 1
+    else:
+      previewline = ""
+    m = r1.match(document.body[nextline])
      if m == None:
        document.warning("Malformed LyX document: No LatexCommand line for `" +
          document.body[i] + "' on line " + str(i) + ".")
        i += 1
        continue
      cmd = m.group(1)
-    m = r2.match(document.body[i + 3])
+    nextline += 1
+    m = r2.match(document.body[nextline])
      if m == None:
        document.warning("Malformed LyX document: No filename line for `" + \
          document.body[i] + "' on line " + str(i) + ".")
        i += 2
        continue
      fn = m.group(1)
+    nextline += 1
      options = ""
-    numlines = 4
      if (cmd == "lstinputlisting"):
-      m = r3.match(document.body[i + 4])
+      m = r3.match(document.body[nextline])
        if m != None:
          options = m.group(1)
          numlines = 5
+        nextline += 1
      newline = "\\begin_inset Include \\" + cmd + "{" + fn + "}"
      if options:
        newline += ("[" + options + "]")
-    insertion = [newline, previewline]
-    document.body[i : i + numlines] = insertion
+    insertion = [newline]
+    if previewline != "":
+      insertion.append(previewline)
+    document.body[i : nextline] = insertion
      i += 2
  
  
@@ -1793,7 +1899,6 @@ def revert_external_embedding(document):
      revert_inset_embedding(document, 'External')
  
  
-# FIXME This code can still be cleaned up a fair bit.
  def convert_subfig(document):
      " Convert subfigures to subfloats. "
      i = 0
@@ -1808,28 +1913,28 @@ def convert_subfig(document):
              continue
          k = find_token(document.body, '\tsubcaption', i, endInset)
          if k == -1:
-            i += 1
+            i = endInset
              continue
          l = find_token(document.body, '\tsubcaptionText', i, endInset)
+        if l == -1:
+            document.warning("Malformed lyx document: Can't find subcaptionText!")
+            i = endInset
+            continue
          caption = document.body[l][16:].strip('"')
-        savestr = document.body[i]
-        laststr = document.body[endInset]
          del document.body[l]
          del document.body[k]
          addedLines = -2
-        # savestr should no longer be needed here.
          subst = ['\\begin_inset Float figure', 'wide false', 'sideways false', 
                   'status open', '', '\\begin_layout Plain Layout', '\\begin_inset Caption', 
-                 '', '\\begin_layout Plain Layout',
-                 caption, '\\end_layout', '', '\\end_inset', '', 
-                 '\\end_layout', '', '\\begin_layout Plain Layout', savestr]
-        document.body[i : i+1] = subst
-        addedLines += len(subst) - 1
+                 '', '\\begin_layout Plain Layout'] + latex2lyx(caption) + \
+                 [ '\\end_layout', '', '\\end_inset', '', 
+                 '\\end_layout', '', '\\begin_layout Plain Layout']
+        document.body[i : i] = subst
+        addedLines += len(subst)
          endInset += addedLines
-        # There should be an easier way to do this.
-        subst = ['', '\\end_inset', '', '\\end_layout', laststr]
-        document.body[endInset : endInset+1] = subst
-        addedLines += len(subst) - 1
+        subst = ['', '\\end_inset', '', '\\end_layout']
+        document.body[endInset : endInset] = subst
+        addedLines += len(subst)
          i += addedLines + 1
  
  
@@ -1938,8 +2043,7 @@ def revert_subfig(document):
              insertion = insertion.split('\n')
              document.body[k : k + 1] = insertion
              addedLines += len(insertion) - 1
-            add_to_preamble(document,
-                            ['\\usepackage{subfig}\n'])
+            add_to_preamble(document, ['\\usepackage{subfig}\n'])
          i += addedLines + 1
  
  
@@ -1947,18 +2051,21 @@ def revert_wrapplacement(document):
      " Revert placement options wrap floats (wrapfig). "
      i = 0
      while True:
-        i = find_token(document.body, "lines", i)
+        i = find_token(document.body, "\\begin_inset Wrap figure", i)
          if i == -1:
              return
-        j = find_token(document.body, "placement", i+1)
-        if j != i + 1:
+        e = find_end_of_inset(document.body, i)
+        j = find_token(document.body, "placement", i + 1, e)
+        if j == -1:
              document.warning("Malformed LyX document: Couldn't find placement parameter of wrap float.")
-            return
-        document.body[j] = document.body[j].replace("placement O", "placement o")
-        document.body[j] = document.body[j].replace("placement I", "placement i")
-        document.body[j] = document.body[j].replace("placement L", "placement l")
-        document.body[j] = document.body[j].replace("placement R", "placement r")
-        i = i + 1
+            i += 1
+            continue
+        r = re.compile("placement (o|i|l|r)")
+        m = r.match(document.body[j])
+        if m == None:
+            document.warning("Malformed LyX document: Placement option isn't O|I|R|L!")
+        document.body[j] = "placement " + m.group(1).lower()
+        i = j
  
  
  def remove_extra_embedded_files(document):
@@ -2584,6 +2691,23 @@ def revert_plainlayout(document):
          i += 1
  
  
+def revert_polytonicgreek(document):
+    "Set language polytonic Greek to Greek"
+    i = 0
+    if document.language == "polutonikogreek":
+        document.language = "greek"
+        i = find_token(document.header, "\\language", 0)
+        if i != -1:
+            document.header[i] = "\\language greek"
+    j = 0
+    while True:
+        j = find_token(document.body, "\\lang polutonikogreek", j)
+        if j == -1:
+            return
+        document.body[j] = document.body[j].replace("\\lang polutonikogreek", "\\lang greek")
+        j = j + 1
+
+
  ##
  # Conversion hub
  #
@@ -2650,9 +2774,11 @@ convert = [[277, [fix_wrong_tables]],
             [335, [convert_InsetSpace]],
             [336, []],
             [337, [convert_display_enum]],
+           [338, []],
            ]
  
-revert =  [[336, [revert_display_enum]],
+revert =  [[337, [revert_polytonicgreek]],
+           [336, [revert_display_enum]],
             [335, [remove_fontsCJK]],
             [334, [revert_InsetSpace]],
             [333, [revert_paper_sizes]],
@@ -2694,7 +2820,7 @@ revert =  [[336, [revert_display_enum]],
             [297, [revert_macro_optional_params]],
             [296, [revert_albanian, revert_lowersorbian, revert_uppersorbian]],
             [295, [revert_include]],
-           [294, [revert_href]],
+           [294, [revert_href, revert_url]],
             [293, [revert_pdf_options_2]],
             [292, [revert_inset_info]],
             [291, [revert_japanese, revert_japanese_encoding]],