* lib/images/*.xpm:

[lyx.git] / lib / lyx2lyx / lyx_1_5.py
diff --git a/lib/lyx2lyx/lyx_1_5.py b/lib/lyx2lyx/lyx_1_5.py

index 1ce73f661001304d6c51c24a6eb5e02b80c6c7a9..17f9f2d5e35798ec1d99364e7deece128b71005f 100644 (file)
--- a/lib/lyx2lyx/lyx_1_5.py
+++ b/lib/lyx2lyx/lyx_1_5.py
@@ -1,6 +1,7 @@
  # This file is part of lyx2lyx
-# -*- coding: iso-8859-1 -*-
-# Copyright (C) 2006 José Matos <jamatos@lyx.org>
+# -*- coding: utf-8 -*-
+# Copyright (C) 2006 José Matos <jamatos@lyx.org>
+# Copyright (C) 2004-2006 Georg Baum <Georg.Baum@post.rwth-aachen.de>
  #
  # This program is free software; you can redistribute it and/or
  # modify it under the terms of the GNU General Public License
@@ -16,20 +17,37 @@
  # along with this program; if not, write to the Free Software
  # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  
-from parser_tools import find_token_exact, find_tokens, get_value
+""" Convert files to the file format generated by lyx 1.5"""
+
+import re
+from parser_tools import find_token, find_token_exact, find_tokens, find_end_of, get_value
+from LyX import get_encoding
+
+
+####################################################################
+# Private helper functions
+
+def find_end_of_inset(lines, i):
+    " Find beginning of inset, where lines[i] is included."
+    return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
+
+# End of helper functions
+####################################################################
+
  
  ##
  #  Notes: Framed/Shaded
  #
  
-def revert_framed(file):
+def revert_framed(document):
+    "Revert framed notes. "
      i = 0
      while 1:
-        i = find_tokens(file.body, ["\\begin_inset Note Framed", "\\begin_inset Note Shaded"], i)
+        i = find_tokens(document.body, ["\\begin_inset Note Framed", "\\begin_inset Note Shaded"], i)
  
          if i == -1:
              return
-        file.body[i] = "\\begin_inset Note"
+        document.body[i] = "\\begin_inset Note"
          i = i + 1
  
  
@@ -53,20 +71,21 @@ typewriter_fonts = {'default' : 'default', 'ae'       : 'default',
                      'newcent' : 'default', 'bookman'  : 'default',
                      'pslatex' : 'courier'}
  
-def convert_font_settings(file):
+def convert_font_settings(document):
+    " Convert font settings. "
      i = 0
-    i = find_token_exact(file.header, "\\fontscheme", i)
+    i = find_token_exact(document.header, "\\fontscheme", i)
      if i == -1:
-        file.warning("Malformed LyX file: Missing `\\fontscheme'.")
+        document.warning("Malformed LyX document: Missing `\\fontscheme'.")
          return
-    font_scheme = get_value(file.header, "\\fontscheme", i, i + 1)
+    font_scheme = get_value(document.header, "\\fontscheme", i, i + 1)
      if font_scheme == '':
-        file.warning("Malformed LyX file: Empty `\\fontscheme'.")
+        document.warning("Malformed LyX document: Empty `\\fontscheme'.")
          font_scheme = 'default'
      if not font_scheme in roman_fonts.keys():
-        file.warning("Malformed LyX file: Unknown `\\fontscheme' `%s'." % font_scheme)
+        document.warning("Malformed LyX document: Unknown `\\fontscheme' `%s'." % font_scheme)
          font_scheme = 'default'
-    file.header[i:i+1] = ['\\font_roman %s' % roman_fonts[font_scheme],
+    document.header[i:i+1] = ['\\font_roman %s' % roman_fonts[font_scheme],
                            '\\font_sans %s' % sans_fonts[font_scheme],
                            '\\font_typewriter %s' % typewriter_fonts[font_scheme],
                            '\\font_default_family default',
@@ -76,110 +95,421 @@ def convert_font_settings(file):
                            '\\font_tt_scale 100']
  
  
-def revert_font_settings(file):
+def revert_font_settings(document):
+    " Revert font settings. "
      i = 0
      insert_line = -1
      fonts = {'roman' : 'default', 'sans' : 'default', 'typewriter' : 'default'}
      for family in 'roman', 'sans', 'typewriter':
          name = '\\font_%s' % family
-        i = find_token_exact(file.header, name, i)
+        i = find_token_exact(document.header, name, i)
          if i == -1:
-            file.warning("Malformed LyX file: Missing `%s'." % name)
+            document.warning("Malformed LyX document: Missing `%s'." % name)
              i = 0
          else:
              if (insert_line < 0):
                  insert_line = i
-            fonts[family] = get_value(file.header, name, i, i + 1)
-            del file.header[i]
-    i = find_token_exact(file.header, '\\font_default_family', i)
+            fonts[family] = get_value(document.header, name, i, i + 1)
+            del document.header[i]
+    i = find_token_exact(document.header, '\\font_default_family', i)
      if i == -1:
-        file.warning("Malformed LyX file: Missing `\\font_default_family'.")
+        document.warning("Malformed LyX document: Missing `\\font_default_family'.")
          font_default_family = 'default'
      else:
-        font_default_family = get_value(file.header, "\\font_default_family", i, i + 1)
-        del file.header[i]
-    i = find_token_exact(file.header, '\\font_sc', i)
+        font_default_family = get_value(document.header, "\\font_default_family", i, i + 1)
+        del document.header[i]
+    i = find_token_exact(document.header, '\\font_sc', i)
      if i == -1:
-        file.warning("Malformed LyX file: Missing `\\font_sc'.")
+        document.warning("Malformed LyX document: Missing `\\font_sc'.")
          font_sc = 'false'
      else:
-        font_sc = get_value(file.header, '\\font_sc', i, i + 1)
-        del file.header[i]
+        font_sc = get_value(document.header, '\\font_sc', i, i + 1)
+        del document.header[i]
      if font_sc != 'false':
-        file.warning("Conversion of '\\font_sc' not yet implemented.")
-    i = find_token_exact(file.header, '\\font_osf', i)
+        document.warning("Conversion of '\\font_sc' not yet implemented.")
+    i = find_token_exact(document.header, '\\font_osf', i)
      if i == -1:
-        file.warning("Malformed LyX file: Missing `\\font_osf'.")
+        document.warning("Malformed LyX document: Missing `\\font_osf'.")
          font_osf = 'false'
      else:
-        font_osf = get_value(file.header, '\\font_osf', i, i + 1)
-        del file.header[i]
-    i = find_token_exact(file.header, '\\font_sf_scale', i)
+        font_osf = get_value(document.header, '\\font_osf', i, i + 1)
+        del document.header[i]
+    i = find_token_exact(document.header, '\\font_sf_scale', i)
      if i == -1:
-        file.warning("Malformed LyX file: Missing `\\font_sf_scale'.")
+        document.warning("Malformed LyX document: Missing `\\font_sf_scale'.")
          font_sf_scale = '100'
      else:
-        font_sf_scale = get_value(file.header, '\\font_sf_scale', i, i + 1)
-        del file.header[i]
+        font_sf_scale = get_value(document.header, '\\font_sf_scale', i, i + 1)
+        del document.header[i]
      if font_sf_scale != '100':
-        file.warning("Conversion of '\\font_sf_scale' not yet implemented.")
-    i = find_token_exact(file.header, '\\font_tt_scale', i)
+        document.warning("Conversion of '\\font_sf_scale' not yet implemented.")
+    i = find_token_exact(document.header, '\\font_tt_scale', i)
      if i == -1:
-        file.warning("Malformed LyX file: Missing `\\font_tt_scale'.")
+        document.warning("Malformed LyX document: Missing `\\font_tt_scale'.")
          font_tt_scale = '100'
      else:
-        font_tt_scale = get_value(file.header, '\\font_tt_scale', i, i + 1)
-        del file.header[i]
+        font_tt_scale = get_value(document.header, '\\font_tt_scale', i, i + 1)
+        del document.header[i]
      if font_tt_scale != '100':
-        file.warning("Conversion of '\\font_tt_scale' not yet implemented.")
+        document.warning("Conversion of '\\font_tt_scale' not yet implemented.")
      for font_scheme in roman_fonts.keys():
          if (roman_fonts[font_scheme] == fonts['roman'] and
              sans_fonts[font_scheme] == fonts['sans'] and
              typewriter_fonts[font_scheme] == fonts['typewriter']):
-            file.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
+            document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
              if font_default_family != 'default':
-                file.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
+                document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
              if font_osf == 'true':
-                file.warning("Ignoring `\\font_osf = true'")
+                document.warning("Ignoring `\\font_osf = true'")
              return
      font_scheme = 'default'
-    file.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
+    document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
      if fonts['roman'] == 'cmr':
-        file.preamble.append('\\renewcommand{\\rmdefault}{cmr}')
+        document.preamble.append('\\renewcommand{\\rmdefault}{cmr}')
          if font_osf == 'true':
-            file.preamble.append('\\usepackage{eco}')
+            document.preamble.append('\\usepackage{eco}')
              font_osf = 'false'
      for font in 'lmodern', 'charter', 'utopia', 'beraserif', 'ccfonts', 'chancery':
          if fonts['roman'] == font:
-            file.preamble.append('\\usepackage{%s}' % font)
+            document.preamble.append('\\usepackage{%s}' % font)
      for font in 'cmss', 'lmss', 'cmbr':
          if fonts['sans'] == font:
-            file.preamble.append('\\renewcommand{\\sfdefault}{%s}' % font)
+            document.preamble.append('\\renewcommand{\\sfdefault}{%s}' % font)
      for font in 'berasans':
          if fonts['sans'] == font:
-            file.preamble.append('\\usepackage{%s}' % font)
+            document.preamble.append('\\usepackage{%s}' % font)
      for font in 'cmtt', 'lmtt', 'cmtl':
          if fonts['typewriter'] == font:
-            file.preamble.append('\\renewcommand{\\ttdefault}{%s}' % font)
+            document.preamble.append('\\renewcommand{\\ttdefault}{%s}' % font)
      for font in 'courier', 'beramono', 'luximono':
          if fonts['typewriter'] == font:
-            file.preamble.append('\\usepackage{%s}' % font)
+            document.preamble.append('\\usepackage{%s}' % font)
      if font_default_family != 'default':
-        file.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
+        document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
      if font_osf == 'true':
-        file.warning("Ignoring `\\font_osf = true'")
+        document.warning("Ignoring `\\font_osf = true'")
+
+
+def revert_booktabs(document):
+    " We remove the booktabs flag or everything else will become a mess. "
+    re_row = re.compile(r'^<row.*space="[^"]+".*>$')
+    re_tspace = re.compile(r'\s+topspace="[^"]+"')
+    re_bspace = re.compile(r'\s+bottomspace="[^"]+"')
+    re_ispace = re.compile(r'\s+interlinespace="[^"]+"')
+    i = 0
+    while 1:
+        i = find_token(document.body, "\\begin_inset Tabular", i)
+        if i == -1:
+            return
+        j = find_end_of_inset(document.body, i + 1)
+        if j == -1:
+            document.warning("Malformed LyX document: Could not find end of tabular.")
+            continue
+        for k in range(i, j):
+            if re.search('^<features.* booktabs="true".*>$', document.body[k]):
+                document.warning("Converting 'booktabs' table to normal table.")
+                document.body[k] = document.body[k].replace(' booktabs="true"', '')
+            if re.search(re_row, document.body[k]):
+                document.warning("Removing extra row space.")
+                document.body[k] = re_tspace.sub('', document.body[k])
+                document.body[k] = re_bspace.sub('', document.body[k])
+                document.body[k] = re_ispace.sub('', document.body[k])
+        i = i + 1
+
+
+def convert_utf8(document):
+    document.encoding = "utf8"
+
+
+def revert_utf8(document):
+    i = find_token(document.header, "\\inputencoding", 0)
+    if i == -1:
+        document.header.append("\\inputencoding auto")
+    elif get_value(document.header, "\\inputencoding", i) == "utf8":
+        document.header[i] = "\\inputencoding auto"
+    document.inputencoding = get_value(document.header, "\\inputencoding", 0)
+    document.encoding = get_encoding(document.language, document.inputencoding, 248)
+
+
+def revert_cs_label(document):
+    " Remove status flag of charstyle label. "
+    i = 0
+    while 1:
+        i = find_token(document.body, "\\begin_inset CharStyle", i)
+        if i == -1:
+            return
+        # Seach for a line starting 'show_label'
+        # If it is not there, break with a warning message
+        i = i + 1
+        while 1:
+            if (document.body[i][:10] == "show_label"):
+                del document.body[i]
+                break
+            elif (document.body[i][:13] == "\\begin_layout"):
+                document.warning("Malformed LyX document: Missing 'show_label'.")
+                break
+            i = i + 1
+
+        i = i + 1
+
+
+def convert_bibitem(document):
+    """ Convert
+\bibitem [option]{argument}
+
+to
+
+\begin_inset LatexCommand bibitem
+label "option"
+key "argument"
+
+\end_inset
+
+This must be called after convert_commandparams.
+"""
+    regex = re.compile(r'\S+\s*(\[[^\[\{]*\])?(\{[^}]*\})')
+    i = 0
+    while 1:
+        i = find_token(document.body, "\\bibitem", i)
+        if i == -1:
+            break
+        match = re.match(regex, document.body[i])
+        option = match.group(1)
+        argument = match.group(2)
+        lines = ['\\begin_inset LatexCommand bibitem']
+        if option != None:
+            lines.append('label "%s"' % option[1:-1].replace('"', '\\"'))
+        lines.append('key "%s"' % argument[1:-1].replace('"', '\\"'))
+        lines.append('')
+        lines.append('\\end_inset')
+        document.body[i:i+1] = lines
+        i = i + 1
+
+
+commandparams_info = {
+    # command : [option1, option2, argument]
+    "bibitem" : ["label", "", "key"],
+    "bibtex" : ["options", "btprint", "bibfiles"],
+    "cite"        : ["after", "before", "key"],
+    "citet"       : ["after", "before", "key"],
+    "citep"       : ["after", "before", "key"],
+    "citealt"     : ["after", "before", "key"],
+    "citealp"     : ["after", "before", "key"],
+    "citeauthor"  : ["after", "before", "key"],
+    "citeyear"    : ["after", "before", "key"],
+    "citeyearpar" : ["after", "before", "key"],
+    "citet*"      : ["after", "before", "key"],
+    "citep*"      : ["after", "before", "key"],
+    "citealt*"    : ["after", "before", "key"],
+    "citealp*"    : ["after", "before", "key"],
+    "citeauthor*" : ["after", "before", "key"],
+    "Citet"       : ["after", "before", "key"],
+    "Citep"       : ["after", "before", "key"],
+    "Citealt"     : ["after", "before", "key"],
+    "Citealp"     : ["after", "before", "key"],
+    "Citeauthor"  : ["after", "before", "key"],
+    "Citet*"      : ["after", "before", "key"],
+    "Citep*"      : ["after", "before", "key"],
+    "Citealt*"    : ["after", "before", "key"],
+    "Citealp*"    : ["after", "before", "key"],
+    "Citeauthor*" : ["after", "before", "key"],
+    "citefield"   : ["after", "before", "key"],
+    "citetitle"   : ["after", "before", "key"],
+    "cite*"       : ["after", "before", "key"],
+    "hfill" : ["", "", ""],
+    "index"      : ["", "", "name"],
+    "printindex" : ["", "", "name"],
+    "label" : ["", "", "name"],
+    "eqref"     : ["name", "", "reference"],
+    "pageref"   : ["name", "", "reference"],
+    "prettyref" : ["name", "", "reference"],
+    "ref"       : ["name", "", "reference"],
+    "vpageref"  : ["name", "", "reference"],
+    "vref"      : ["name", "", "reference"],
+    "tableofcontents" : ["", "", "type"],
+    "htmlurl" : ["name", "", "target"],
+    "url"     : ["name", "", "target"]}
+
+
+def convert_commandparams(document):
+    """ Convert
+
+ \begin_inset LatexCommand \cmdname[opt1][opt2]{arg}
+ \end_inset
+
+ to
+
+ \begin_inset LatexCommand cmdname
+ name1 "opt1"
+ name2 "opt2"
+ name3 "arg"
+ \end_inset
+
+ name1, name2 and name3 can be different for each command.
+"""
+    # \begin_inset LatexCommand bibitem was not the official version (see
+    # convert_bibitem()), but could be read in, so we convert it here, too.
+
+    # FIXME: Handle things like \command[foo[bar]]{foo{bar}}
+    # we need a real parser here.
+    regex = re.compile(r'\\([^\[\{]+)(\[[^\[\{]*\])?(\[[^\[\{]*\])?(\{[^}]*\})?')
+    i = 0
+    while 1:
+        i = find_token(document.body, "\\begin_inset LatexCommand", i)
+        if i == -1:
+            break
+        command = document.body[i][26:].strip()
+        if command == "":
+            document.warning("Malformed LyX document: Missing LatexCommand name.")
+            i = i + 1
+            continue
+
+        # The following parser is taken from the original InsetCommandParams::scanCommand
+        name = ""
+        option1 = ""
+        option2 = ""
+        argument = ""
+        state = "WS"
+        # Used to handle things like \command[foo[bar]]{foo{bar}}
+        nestdepth = 0
+        b = 0
+        for c in command:
+            if ((state == "CMDNAME" and c == ' ') or
+                (state == "CMDNAME" and c == '[') or
+                (state == "CMDNAME" and c == '{')):
+                state = "WS"
+            if ((state == "OPTION" and c == ']') or
+                (state == "SECOPTION" and c == ']') or
+                (state == "CONTENT" and c == '}')):
+                if nestdepth == 0:
+                    state = "WS"
+                else:
+                    --nestdepth
+            if ((state == "OPTION" and c == '[') or
+                (state == "SECOPTION" and c == '[') or
+                (state == "CONTENT" and c == '{')):
+                ++nestdepth
+            if state == "CMDNAME":
+                    name += c
+            elif state == "OPTION":
+                    option1 += c
+            elif state == "SECOPTION":
+                    option2 += c
+            elif state == "CONTENT":
+                    argument += c
+            elif state == "WS":
+                if c == '\\':
+                    state = "CMDNAME"
+                elif c == '[' and b != ']':
+                    state = "OPTION"
+                    nestdepth = 0 # Just to be sure
+                elif c == '[' and b == ']':
+                    state = "SECOPTION"
+                    nestdepth = 0 # Just to be sure
+                elif c == '{':
+                    state = "CONTENT"
+                    nestdepth = 0 # Just to be sure
+            b = c
+
+        # Now we have parsed the command, output the parameters
+        lines = ["\\begin_inset LatexCommand %s" % name]
+        if option1 != "":
+            if commandparams_info[name][0] == "":
+                document.warning("Ignoring invalid option `%s' of command `%s'." % (option1, name))
+            else:
+                lines.append('%s "%s"' % (commandparams_info[name][0], option1.replace('"', '\\"')))
+        if option2 != "":
+            if commandparams_info[name][1] == "":
+                document.warning("Ignoring invalid second option `%s' of command `%s'." % (option2, name))
+            else:
+                lines.append('%s "%s"' % (commandparams_info[name][1], option2.replace('"', '\\"')))
+        if argument != "":
+            if commandparams_info[name][2] == "":
+                document.warning("Ignoring invalid argument `%s' of command `%s'." % (argument, name))
+            else:
+                lines.append('%s "%s"' % (commandparams_info[name][2], argument.replace('"', '\\"')))
+        document.body[i:i+1] = lines
+        i = i + 1
+
+
+def revert_commandparams(document):
+    regex = re.compile(r'(\S+)\s+(.+)')
+    i = 0
+    while 1:
+        i = find_token(document.body, "\\begin_inset LatexCommand", i)
+        if i == -1:
+            break
+        name = document.body[i].split()[2]
+        j = find_end_of_inset(document.body, i + 1)
+        preview_line = ""
+        option1 = ""
+        option2 = ""
+        argument = ""
+        for k in range(i + 1, j):
+            match = re.match(regex, document.body[k])
+            if match:
+                pname = match.group(1)
+                pvalue = match.group(2)
+                if pname == "preview":
+                    preview_line = document.body[k]
+                elif (commandparams_info[name][0] != "" and
+                      pname == commandparams_info[name][0]):
+                    option1 = pvalue.strip('"').replace('\\"', '"')
+                elif (commandparams_info[name][1] != "" and
+                      pname == commandparams_info[name][1]):
+                    option2 = pvalue.strip('"').replace('\\"', '"')
+                elif (commandparams_info[name][2] != "" and
+                      pname == commandparams_info[name][2]):
+                    argument = pvalue.strip('"').replace('\\"', '"')
+            elif document.body[k].strip() != "":
+                document.warning("Ignoring unknown contents `%s' in command inset %s." % (document.body[k], name))
+        if name == "bibitem":
+            if option1 == "":
+                lines = ["\\bibitem {%s}" % argument]
+            else:
+                lines = ["\\bibitem [%s]{%s}" % (option1, argument)]
+        else:
+            if option1 == "":
+                if option2 == "":
+                    lines = ["\\begin_inset LatexCommand \\%s{%s}" % (name, argument)]
+                else:
+                    lines = ["\\begin_inset LatexCommand \\%s[][%s]{%s}" % (name, option2, argument)]
+            else:
+                if option2 == "":
+                    lines = ["\\begin_inset LatexCommand \\%s[%s]{%s}" % (name, option1, argument)]
+                else:
+                    lines = ["\\begin_inset LatexCommand \\%s[%s][%s]{%s}" % (name, option1, option2, argument)]
+        if name != "bibitem":
+            if preview_line != "":
+                lines.append(preview_line)
+            lines.append('')
+            lines.append('\\end_inset')
+        document.body[i:j+1] = lines
+        i = j + 1
  
  
  ##
  # Conversion hub
  #
  
+supported_versions = ["1.5.0","1.5"]
  convert = [[246, []],
-           [247, [convert_font_settings]]]
+           [247, [convert_font_settings]],
+           [248, []],
+           [249, [convert_utf8]],
+           [250, []],
+           [251, []],
+           [252, [convert_commandparams, convert_bibitem]]]
  
-revert  = [[246, [revert_font_settings]],
+revert =  [[251, [revert_commandparams]],
+           [250, [revert_cs_label]],
+           [249, []],
+           [248, [revert_utf8]],
+           [247, [revert_booktabs]],
+           [246, [revert_font_settings]],
             [245, [revert_framed]]]
  
+
  if __name__ == "__main__":
      pass