fix a few compiler warnings

[lyx.git] / lib / lyx2lyx / lyx_2_0.py
diff --git a/lib/lyx2lyx/lyx_2_0.py b/lib/lyx2lyx/lyx_2_0.py

index 646a7c519ace617bccdba4f78ee88887c72655c2..90f4d0e1e3f6d8c18ff79532b1eb14d50102d8f1 100644 (file)
--- a/lib/lyx2lyx/lyx_2_0.py
+++ b/lib/lyx2lyx/lyx_2_0.py
@@ -19,6 +19,8 @@
  """ Convert files to the file format generated by lyx 2.0"""
  
  import re, string
+import unicodedata
+import sys, os
  
  from parser_tools import find_token, find_end_of, find_tokens, get_value, get_value_string
  
@@ -46,6 +48,133 @@ def insert_to_preamble(index, document, text):
      document.preamble.insert(index, text)
  
  
+def read_unicodesymbols():
+    " Read the unicodesymbols list of unicode characters and corresponding commands."
+    pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
+    fp = open(os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols'))
+    spec_chars = []
+    # Two backslashes, followed by some non-word character, and then a character
+    # in brackets. The idea is to check for constructs like: \"{u}, which is how
+    # they are written in the unicodesymbols file; but they can also be written
+    # as: \"u or even \" u.
+    r = re.compile(r'\\\\(\W)\{(\w)\}')
+    for line in fp.readlines():
+        if line[0] != '#' and line.strip() != "":
+            line=line.replace(' "',' ') # remove all quotation marks with spaces before
+            line=line.replace('" ',' ') # remove all quotation marks with spaces after
+            line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis)
+            try:
+                [ucs4,command,dead] = line.split(None,2)
+                if command[0:1] != "\\":
+                    continue
+                spec_chars.append([command, unichr(eval(ucs4))])
+            except:
+                continue
+            m = r.match(command)
+            if m != None:
+                command = "\\\\"
+                # If the character is a double-quote, then we need to escape it, too,
+                # since it is done that way in the LyX file.
+                if m.group(1) == "\"":
+                    command += "\\"
+                commandbl = command
+                command += m.group(1) + m.group(2)
+                commandbl += m.group(1) + ' ' + m.group(2)
+                spec_chars.append([command, unichr(eval(ucs4))])
+                spec_chars.append([commandbl, unichr(eval(ucs4))])
+    fp.close()
+    return spec_chars
+
+
+unicode_reps = read_unicodesymbols()
+
+
+def put_cmd_in_ert(string):
+    for rep in unicode_reps:
+        string = string.replace(rep[1], rep[0].replace('\\\\', '\\'))
+    string = string.replace('\\', "\\backslash\n")
+    string = "\\begin_inset ERT\nstatus collapsed\n\\begin_layout Standard\n" \
+      + string + "\n\\end_layout\n\\end_inset"
+    return string
+
+
+def lyx2latex(document, lines):
+    'Convert some LyX stuff into corresponding LaTeX stuff, as best we can.'
+    # clean up multiline stuff
+    content = ""
+    ert_end = 0
+
+    for curline in range(len(lines)):
+      line = lines[curline]
+      if line.startswith("\\begin_inset ERT"):
+          # We don't want to replace things inside ERT, so figure out
+          # where the end of the inset is.
+          ert_end = find_end_of_inset(lines, curline + 1)
+          continue
+      elif line.startswith("\\begin_inset Formula"):
+          line = line[20:]
+      elif line.startswith("\\begin_inset Quotes"):
+          # For now, we do a very basic reversion. Someone who understands
+          # quotes is welcome to fix it up.
+          qtype = line[20:].strip()
+          # lang = qtype[0]
+          side = qtype[1]
+          dbls = qtype[2]
+          if side == "l":
+              if dbls == "d":
+                  line = "``"
+              else:
+                  line = "`"
+          else:
+              if dbls == "d":
+                  line = "''"
+              else:
+                  line = "'"
+      elif line.isspace() or \
+            line.startswith("\\begin_layout") or \
+            line.startswith("\\end_layout") or \
+            line.startswith("\\begin_inset") or \
+            line.startswith("\\end_inset") or \
+            line.startswith("\\lang") or \
+            line.strip() == "status collapsed" or \
+            line.strip() == "status open":
+          #skip all that stuff
+          continue
+
+      # this needs to be added to the preamble because of cases like
+      # \textmu, \textbackslash, etc.
+      add_to_preamble(document, ['% added by lyx2lyx for converted index entries',
+                                 '\\@ifundefined{textmu}',
+                                 ' {\\usepackage{textcomp}}{}'])
+      # a lossless reversion is not possible
+      # try at least to handle some common insets and settings
+      if ert_end >= curline:
+          line = line.replace(r'\backslash', r'\\')
+      else:
+          line = line.replace('&', '\\&{}')
+          line = line.replace('#', '\\#{}')
+          line = line.replace('^', '\\^{}')
+          line = line.replace('%', '\\%{}')
+          line = line.replace('_', '\\_{}')
+          line = line.replace('$', '\\${}')
+
+          # Do the LyX text --> LaTeX conversion
+          for rep in unicode_reps:
+            line = line.replace(rep[1], rep[0] + "{}")
+          line = line.replace(r'\backslash', r'\textbackslash{}')
+          line = line.replace(r'\series bold', r'\bfseries{}').replace(r'\series default', r'\mdseries{}')
+          line = line.replace(r'\shape italic', r'\itshape{}').replace(r'\shape smallcaps', r'\scshape{}')
+          line = line.replace(r'\shape slanted', r'\slshape{}').replace(r'\shape default', r'\upshape{}')
+          line = line.replace(r'\emph on', r'\em{}').replace(r'\emph default', r'\em{}')
+          line = line.replace(r'\noun on', r'\scshape{}').replace(r'\noun default', r'\upshape{}')
+          line = line.replace(r'\bar under', r'\underbar{').replace(r'\bar default', r'}')
+          line = line.replace(r'\family sans', r'\sffamily{}').replace(r'\family default', r'\normalfont{}')
+          line = line.replace(r'\family typewriter', r'\ttfamily{}').replace(r'\family roman', r'\rmfamily{}')
+          line = line.replace(r'\InsetSpace ', r'').replace(r'\SpecialChar ', r'')
+      content += line
+    return content
+
+
  ####################################################################
  
  
@@ -297,7 +426,7 @@ def revert_outputformat(document):
  
  
  def revert_backgroundcolor(document):
-    " Reverts backgrund color to preamble code "
+    " Reverts background color to preamble code "
      i = 0
      colorcode = ""
      while True:
@@ -306,6 +435,9 @@ def revert_backgroundcolor(document):
            return
        colorcode = get_value(document.header, '\\backgroundcolor', 0)
        del document.header[i]
+      # don't clutter the preamble if backgroundcolor is not set
+      if colorcode == "#ffffff":
+          continue
        # the color code is in the form #rrggbb where every character denotes a hex number
        # convert the string to an int
        red = string.atoi(colorcode[1:3],16)
@@ -331,6 +463,250 @@ def revert_backgroundcolor(document):
                             + '\\pagecolor{page_backgroundcolor}\n')
  
  
+def revert_splitindex(document):
+    " Reverts splitindex-aware documents "
+    i = find_token(document.header, '\\use_indices', 0)
+    if i == -1:
+        document.warning("Malformed LyX document: Missing \\use_indices.")
+        return
+    indices = get_value(document.header, "\\use_indices", i)
+    preamble = ""
+    if indices == "true":
+         preamble += "\\usepackage{splitidx}\n"
+    del document.header[i]
+    i = 0
+    while True:
+        i = find_token(document.header, "\\index", i)
+        if i == -1:
+            break
+        k = find_token(document.header, "\\end_index", i)
+        if k == -1:
+            document.warning("Malformed LyX document: Missing \\end_index.")
+            return
+        line = document.header[i]
+        l = re.compile(r'\\index (.*)$')
+        m = l.match(line)
+        iname = m.group(1)
+        ishortcut = get_value(document.header, '\\shortcut', i, k)
+        if ishortcut != "" and indices == "true":
+            preamble += "\\newindex[" + iname + "]{" + ishortcut + "}\n"
+        del document.header[i:k+1]
+        i = 0
+    if preamble != "":
+        insert_to_preamble(0, document, preamble)
+    i = 0
+    while True:
+        i = find_token(document.body, "\\begin_inset Index", i)
+        if i == -1:
+            break
+        line = document.body[i]
+        l = re.compile(r'\\begin_inset Index (.*)$')
+        m = l.match(line)
+        itype = m.group(1)
+        if itype == "idx" or indices == "false":
+            document.body[i] = "\\begin_inset Index"
+        else:
+            k = find_end_of_inset(document.body, i)
+            if k == -1:
+                 return
+            content = lyx2latex(document, document.body[i:k])
+            # escape quotes
+            content = content.replace('"', r'\"')
+            subst = [put_cmd_in_ert("\\sindex[" + itype + "]{" + content + "}")]
+            document.body[i:k+1] = subst
+        i = i + 1
+    i = 0
+    while True:
+        i = find_token(document.body, "\\begin_inset CommandInset index_print", i)
+        if i == -1:
+            return
+        k = find_end_of_inset(document.body, i)
+        ptype = get_value(document.body, 'type', i, k).strip('"')
+        if ptype == "idx":
+            j = find_token(document.body, "type", i, k)
+            del document.body[j]
+        elif indices == "false":
+            del document.body[i:k+1]
+        else:
+            subst = [put_cmd_in_ert("\\printindex[" + ptype + "]{}")]
+            document.body[i:k+1] = subst
+        i = i + 1
+
+
+def convert_splitindex(document):
+    " Converts index and printindex insets to splitindex-aware format "
+    i = 0
+    while True:
+        i = find_token(document.body, "\\begin_inset Index", i)
+        if i == -1:
+            break
+        document.body[i] = document.body[i].replace("\\begin_inset Index",
+            "\\begin_inset Index idx")
+        i = i + 1
+    i = 0
+    while True:
+        i = find_token(document.body, "\\begin_inset CommandInset index_print", i)
+        if i == -1:
+            return
+        if document.body[i + 1].find('LatexCommand printindex') == -1:
+            document.warning("Malformed LyX document: Incomplete printindex inset.")
+            return
+        subst = ["LatexCommand printindex", 
+            "type \"idx\""]
+        document.body[i + 1:i + 2] = subst
+        i = i + 1
+
+
+def revert_subindex(document):
+    " Reverts \\printsubindex CommandInset types "
+    i = find_token(document.header, '\\use_indices', 0)
+    if i == -1:
+        document.warning("Malformed LyX document: Missing \\use_indices.")
+        return
+    indices = get_value(document.header, "\\use_indices", i)
+    i = 0
+    while True:
+        i = find_token(document.body, "\\begin_inset CommandInset index_print", i)
+        if i == -1:
+            return
+        k = find_end_of_inset(document.body, i)
+        ctype = get_value(document.body, 'LatexCommand', i, k)
+        if ctype != "printsubindex":
+            i = i + 1
+            continue
+        ptype = get_value(document.body, 'type', i, k).strip('"')
+        if indices == "false":
+            del document.body[i:k+1]
+        else:
+            subst = [put_cmd_in_ert("\\printsubindex[" + ptype + "]{}")]
+            document.body[i:k+1] = subst
+        i = i + 1
+
+
+def revert_printindexall(document):
+    " Reverts \\print[sub]index* CommandInset types "
+    i = find_token(document.header, '\\use_indices', 0)
+    if i == -1:
+        document.warning("Malformed LyX document: Missing \\use_indices.")
+        return
+    indices = get_value(document.header, "\\use_indices", i)
+    i = 0
+    while True:
+        i = find_token(document.body, "\\begin_inset CommandInset index_print", i)
+        if i == -1:
+            return
+        k = find_end_of_inset(document.body, i)
+        ctype = get_value(document.body, 'LatexCommand', i, k)
+        if ctype != "printindex*" and ctype != "printsubindex*":
+            i = i + 1
+            continue
+        if indices == "false":
+            del document.body[i:k+1]
+        else:
+            subst = [put_cmd_in_ert("\\" + ctype + "{}")]
+            document.body[i:k+1] = subst
+        i = i + 1
+
+
+def revert_strikeout(document):
+    " Reverts \\strikeout character style "
+    while True:
+        i = find_token(document.body, '\\strikeout', 0)
+        if i == -1:
+            return
+        del document.body[i]
+
+
+def revert_uulinewave(document):
+    " Reverts \\uuline, and \\uwave character styles "
+    while True:
+        i = find_token(document.body, '\\uuline', 0)
+        if i == -1:
+            break
+        del document.body[i]
+    while True:
+        i = find_token(document.body, '\\uwave', 0)
+        if i == -1:
+            return
+        del document.body[i]
+
+
+def revert_ulinelatex(document):
+    " Reverts \\uline character style "
+    i = find_token(document.body, '\\bar under', 0)
+    if i == -1:
+        return
+    insert_to_preamble(0, document,
+            '% Commands inserted by lyx2lyx for proper underlining\n'
+            + '\\PassOptionsToPackage{normalem}{ulem}\n'
+            + '\\usepackage{ulem}\n'
+            + '\\let\\cite@rig\\cite\n'
+            + '\\newcommand{\\b@xcite}[2][\\%]{\\def\\def@pt{\\%}\\def\\pas@pt{#1}\n'
+            + '  \\mbox{\\ifx\\def@pt\\pas@pt\\cite@rig{#2}\\else\\cite@rig[#1]{#2}\\fi}}\n'
+            + '\\renewcommand{\\underbar}[1]{{\\let\\cite\\b@xcite\\uline{#1}}}\n')
+
+
+def revert_custom_processors(document):
+    " Remove bibtex_command and index_command params "
+    i = find_token(document.header, '\\bibtex_command', 0)
+    if i == -1:
+        document.warning("Malformed LyX document: Missing \\bibtex_command.")
+        return
+    del document.header[i]
+    i = find_token(document.header, '\\index_command', 0)
+    if i == -1:
+        document.warning("Malformed LyX document: Missing \\index_command.")
+        return
+    del document.header[i]
+
+
+def convert_nomencl_width(document):
+    " Add set_width param to nomencl_print "
+    i = 0
+    while True:
+      i = find_token(document.body, "\\begin_inset CommandInset nomencl_print", i)
+      if i == -1:
+        break
+      document.body.insert(i + 2, "set_width \"none\"")
+      i = i + 1
+
+
+def revert_nomencl_width(document):
+    " Remove set_width param from nomencl_print "
+    i = 0
+    while True:
+      i = find_token(document.body, "\\begin_inset CommandInset nomencl_print", i)
+      if i == -1:
+        break
+      j = find_end_of_inset(document.body, i)
+      l = find_token(document.body, "set_width", i, j)
+      if l == -1:
+            document.warning("Can't find set_width option for nomencl_print!")
+            i = j
+            continue
+      del document.body[l]
+      i = i + 1
+
+
+def revert_nomencl_cwidth(document):
+    " Remove width param from nomencl_print "
+    i = 0
+    while True:
+      i = find_token(document.body, "\\begin_inset CommandInset nomencl_print", i)
+      if i == -1:
+        break
+      j = find_end_of_inset(document.body, i)
+      l = find_token(document.body, "width", i, j)
+      if l == -1:
+            document.warning("Can't find width option for nomencl_print!")
+            i = j
+            continue
+      width = get_value(document.body, "width", i, j).strip('"')
+      del document.body[l]
+      add_to_preamble(document, ["\\setlength{\\nomlabelwidth}{" + width + "}"])
+      i = i + 1
+
+
  ##
  # Conversion hub
  #
@@ -341,10 +717,30 @@ convert = [[346, []],
             [348, []],
             [349, []],
             [350, []],
-           [351, []]
+           [351, []],
+           [352, [convert_splitindex]],
+           [353, []],
+           [354, []],
+           [355, []],
+           [356, []],
+           [357, []],
+           [358, []],
+           [359, [convert_nomencl_width]],
+           [360, []],
+           [361, []]
            ]
  
-revert =  [[350, [revert_backgroundcolor]],
+revert =  [[360, []],
+           [359, [revert_nomencl_cwidth]],
+           [358, [revert_nomencl_width]],
+           [357, [revert_custom_processors]],
+           [356, [revert_ulinelatex]],
+           [355, [revert_uulinewave]],
+           [354, [revert_strikeout]],
+           [353, [revert_printindexall]],
+           [352, [revert_subindex]],
+           [351, [revert_splitindex]],
+           [350, [revert_backgroundcolor]],
             [349, [revert_outputformat]],
             [348, [revert_xetex]],
             [347, [revert_phantom, revert_hphantom, revert_vphantom]],