Amend 0f782b0d: add the required changes to lyx2lyx.

[features.git] / lib / lyx2lyx / lyx_1_2.py
diff --git a/lib/lyx2lyx/lyx_1_2.py b/lib/lyx2lyx/lyx_1_2.py

index 3ac5d9cfb92eba3c7fef187e492f2f92aaa71293..b697a918c588159ed37bb42a18eacb283ef8a9a6 100644 (file)
--- a/lib/lyx2lyx/lyx_1_2.py
+++ b/lib/lyx2lyx/lyx_1_2.py
@@ -1,7 +1,7 @@
  # This file is part of lyx2lyx
-# -*- coding: iso-8859-1 -*-
+# -*- coding: utf-8 -*-
  # Copyright (C) 2002 Dekel Tsur <dekel@lyx.org>
-# Copyright (C) 2004 José Matos <jamatos@lyx.org>
+# Copyright (C) 2004 José Matos <jamatos@lyx.org>
  #
  # This program is free software; you can redistribute it and/or
  # modify it under the terms of the GNU General Public License
@@ -15,15 +15,88 @@
  #
  # You should have received a copy of the GNU General Public License
  # along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+
+""" Convert files to the file format generated by lyx 1.2"""
  
-import string
  import re
  
-from parser_tools import find_token, find_token_backwards, get_next_paragraph,\
-                         find_tokens, find_end_of_inset, find_re, \
-                         is_nonempty_line, get_paragraph, find_nonempty_line, \
-                         get_value, get_tabular_lines, check_token, get_layout
+from parser_tools import find_token, find_token_backwards, \
+                         find_tokens, find_tokens_backwards, \
+                         find_beginning_of, find_end_of, find_re, \
+                         is_nonempty_line, find_nonempty_line, \
+                         get_value, check_token
+
+####################################################################
+# Private helper functions
+
+def get_layout(line, default_layout):
+    " Get layout, if empty return the default layout."
+    tokens = line.split()
+    if len(tokens) > 1:
+        return tokens[1]
+    return default_layout
+
+
+def get_paragraph(lines, i, format):
+    " Finds the paragraph that contains line i."
+    begin_layout = "\\layout"
+
+    while i != -1:
+        i = find_tokens_backwards(lines, ["\\end_inset", begin_layout], i)
+        if i == -1: return -1
+        if check_token(lines[i], begin_layout):
+            return i
+        i = find_beginning_of_inset(lines, i)
+    return -1
+
+
+def get_next_paragraph(lines, i, format):
+    " Finds the paragraph after the paragraph that contains line i."
+    tokens = ["\\begin_inset", "\\layout", "\\end_float", "\\the_end"]
+
+    while i != -1:
+        i = find_tokens(lines, tokens, i)
+        if not check_token(lines[i], "\\begin_inset"):
+            return i
+        i = find_end_of_inset(lines, i)
+    return -1
+
+
+def find_beginning_of_inset(lines, i):
+    " Find beginning of inset, where lines[i] is included."
+    return find_beginning_of(lines, i, "\\begin_inset", "\\end_inset")
+
+
+def find_end_of_inset(lines, i):
+    " Finds the matching \end_inset"
+    return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
+
+
+def find_end_of_tabular(lines, i):
+    " Finds the matching end of tabular."
+    return find_end_of(lines, i, "<lyxtabular", "</lyxtabular")
+
+
+def get_tabular_lines(lines, i):
+    " Returns a lists of tabular lines."
+    result = []
+    i = i+1
+    j = find_end_of_tabular(lines, i)
+    if j == -1:
+        return []
+
+    while i <= j:
+        if check_token(lines[i], "\\begin_inset"):
+            i = find_end_of_inset(lines, i)+1
+        else:
+            result.append(i)
+            i = i+1
+    return result
+
+# End of helper functions
+####################################################################
+
  
  floats = {
      "footnote": ["\\begin_inset Foot",
@@ -59,6 +132,7 @@ pextra_rexp = re.compile(r"\\pextra_type\s+(\S+)"+\
  
  
  def get_width(mo):
+    " Get width from a regular expression. "
      if mo.group(10):
          if mo.group(9) == "\\pextra_widthp":
              return mo.group(10)+"col%"
@@ -68,22 +142,20 @@ def get_width(mo):
          return "100col%"
  
  
-#
-# Change \begin_float .. \end_float into \begin_inset Float .. \end_inset
-#
-def remove_oldfloat(file):
-    lines = file.body
+def remove_oldfloat(document):
+    " Change \begin_float .. \end_float into \begin_inset Float .. \end_inset"
+    lines = document.body
      i = 0
-    while 1:
+    while True:
          i = find_token(lines, "\\begin_float", i)
          if i == -1:
              break
          # There are no nested floats, so finding the end of the float is simple
          j = find_token(lines, "\\end_float", i+1)
  
-        floattype = string.split(lines[i])[1]
-        if not floats.has_key(floattype):
-            file.warning("Error! Unknown float type " + floattype)
+        floattype = lines[i].split()[1]
+        if floattype not in floats:
+            document.warning("Error! Unknown float type " + floattype)
              floattype = "fig"
  
          # skip \end_deeper tokens
@@ -91,7 +163,7 @@ def remove_oldfloat(file):
          while check_token(lines[i2], "\\end_deeper"):
              i2 = i2+1
          if i2 > i+1:
-            j2 = get_next_paragraph(lines, j + 1, file.format + 1)
+            j2 = get_next_paragraph(lines, j + 1, document.format + 1)
              lines[j2:j2] = ["\\end_deeper "]*(i2-(i+1))
  
          new = floats[floattype]+[""]
@@ -116,7 +188,7 @@ def remove_oldfloat(file):
          # as extra '\foo default' commands are ignored.
          # In fact, it might be safer to output '\foo default' for all
          # font attributes.
-        k = get_paragraph(lines, i, file.format + 1)
+        k = get_paragraph(lines, i, document.format + 1)
          flag = 0
          for token in font_tokens:
              if find_token(lines, token, k, i) != -1:
@@ -126,7 +198,7 @@ def remove_oldfloat(file):
                      flag = 1
                      new.append("")
                  if token == "\\lang":
-                    new.append(token+" "+ file.language)
+                    new.append(token+" "+ document.language)
                  else:
                      new.append(token+" default ")
  
@@ -138,11 +210,12 @@ pextra_type2_rexp = re.compile(r".*\\pextra_type\s+[12]")
  pextra_type2_rexp2 = re.compile(r".*(\\layout|\\pextra_type\s+2)")
  pextra_widthp = re.compile(r"\\pextra_widthp")
  
-def remove_pextra(file):
-    lines = file.body
+def remove_pextra(document):
+    " Remove pextra token."
+    lines = document.body
      i = 0
      flag = 0
-    while 1:
+    while True:
          i = find_re(lines, pextra_type2_rexp, i)
          if i == -1:
              break
@@ -179,13 +252,13 @@ def remove_pextra(file):
              if hfill:
                  start = ["","\hfill",""]+start
          else:
-            start = ['\\layout %s' % file.default_layout,''] + start
+            start = ['\\layout %s' % document.default_layout,''] + start
  
          j0 = find_token_backwards(lines,"\\layout", i-1)
-        j = get_next_paragraph(lines, i, file.format + 1)
+        j = get_next_paragraph(lines, i, document.format + 1)
  
          count = 0
-        while 1:
+        while True:
              # collect more paragraphs to the minipage
              count = count+1
              if j == -1 or not check_token(lines[j], "\\layout"):
@@ -210,7 +283,8 @@ def remove_pextra(file):
  
  
  def is_empty(lines):
-    return filter(is_nonempty_line, lines) == []
+    " Are all the lines empty?"
+    return list(filter(is_nonempty_line, lines)) == []
  
  
  move_rexp =  re.compile(r"\\(family|series|shape|size|emph|numeric|bar|noun|end_deeper)")
@@ -218,20 +292,21 @@ ert_rexp = re.compile(r"\\begin_inset|\\hfill|.*\\SpecialChar")
  spchar_rexp = re.compile(r"(.*)(\\SpecialChar.*)")
  
  
-def remove_oldert(file):
+def remove_oldert(document):
+    " Remove old ERT inset."
      ert_begin = ["\\begin_inset ERT",
                   "status Collapsed",
                   "",
-                 '\\layout %s' % file.default_layout,
+                 '\\layout %s' % document.default_layout,
                   ""]
-    lines = file.body
+    lines = document.body
      i = 0
-    while 1:
+    while True:
          i = find_tokens(lines, ["\\latex latex", "\\layout LaTeX"], i)
          if i == -1:
              break
          j = i+1
-        while 1:
+        while True:
              # \end_inset is for ert inside a tabular cell. The other tokens
              # are obvious.
              j = find_tokens(lines, ["\\latex default", "\\layout", "\\begin_inset", "\\end_inset", "\\end_float", "\\the_end"],
@@ -249,10 +324,10 @@ def remove_oldert(file):
          new = []
          new2 = []
          if check_token(lines[i], "\\layout LaTeX"):
-            new = ['\layout %s' % file.default_layout, "", ""]
+            new = ['\layout %s' % document.default_layout, "", ""]
  
          k = i+1
-        while 1:
+        while True:
              k2 = find_re(lines, ert_rexp, k, j)
              inset = hfill = specialchar = 0
              if k2 == -1:
@@ -283,7 +358,7 @@ def remove_oldert(file):
                      tmp.append(line)
  
              if is_empty(tmp):
-                if filter(lambda x:x != "", tmp) != []:
+                if [x for x in tmp if x != ""] != []:
                      if new == []:
                          # This is not necessary, but we want the output to be
                          # as similar as posible to the lyx format
@@ -324,24 +399,24 @@ def remove_oldert(file):
  
      # Delete remaining "\latex xxx" tokens
      i = 0
-    while 1:
+    while True:
          i = find_token(lines, "\\latex ", i)
          if i == -1:
              break
          del lines[i]
  
  
-# ERT insert are hidden feature of lyx 1.1.6. This might be removed in the future.
-def remove_oldertinset(file):
-    lines = file.body
+def remove_oldertinset(document):
+    " ERT insert are hidden feature of lyx 1.1.6. This might be removed in the future."
+    lines = document.body
      i = 0
-    while 1:
+    while True:
          i = find_token(lines, "\\begin_inset ERT", i)
          if i == -1:
              break
          j = find_end_of_inset(lines, i)
          k = find_token(lines, "\\layout", i+1)
-        l = get_paragraph(lines, i, file.format + 1)
+        l = get_paragraph(lines, i, document.format + 1)
          if lines[k] == lines[l]: # same layout
              k = k+1
          new = lines[k:j]
@@ -349,11 +424,12 @@ def remove_oldertinset(file):
          i = i+1
  
  
-def is_ert_paragraph(file, i):
-    lines = file.body
+def is_ert_paragraph(document, i):
+    " Is this a ert paragraph? "
+    lines = document.body
      if not check_token(lines[i], "\\layout"):
          return 0
-    if not file.is_default_layout(get_layout(lines[i], file.default_layout)):
+    if not document.is_default_layout(get_layout(lines[i], document.default_layout)):
          return 0
  
      i = find_nonempty_line(lines, i+1)
@@ -365,17 +441,18 @@ def is_ert_paragraph(file, i):
      return check_token(lines[k], "\\layout")
  
  
-def combine_ert(file):
-    lines = file.body
+def combine_ert(document):
+    " Combine ERT paragraphs."
+    lines = document.body
      i = 0
-    while 1:
+    while True:
          i = find_token(lines, "\\begin_inset ERT", i)
          if i == -1:
              break
-        j = get_paragraph(lines, i, file.format + 1)
+        j = get_paragraph(lines, i, document.format + 1)
          count = 0
          text = []
-        while is_ert_paragraph(file, j):
+        while is_ert_paragraph(document, j):
  
              count = count+1
              i2 = find_token(lines, "\\layout", j+1)
@@ -395,30 +472,33 @@ def combine_ert(file):
  oldunits = ["pt", "cm", "in", "text%", "col%"]
  
  def get_length(lines, name, start, end):
+    " Get lenght."
      i = find_token(lines, name, start, end)
      if i == -1:
          return ""
-    x = string.split(lines[i])
+    x = lines[i].split()
      return x[2]+oldunits[int(x[1])]
  
  
  def write_attribute(x, token, value):
+    " Write attribute."
      if value != "":
          x.append("\t"+token+" "+value)
  
  
-def remove_figinset(file):
-    lines = file.body
+def remove_figinset(document):
+    " Remove figinset."
+    lines = document.body
      i = 0
-    while 1:
+    while True:
          i = find_token(lines, "\\begin_inset Figure", i)
          if i == -1:
              break
          j = find_end_of_inset(lines, i)
  
-        if ( len(string.split(lines[i])) > 2 ):
-            lyxwidth = string.split(lines[i])[3]+"pt"
-            lyxheight = string.split(lines[i])[4]+"pt"
+        if ( len(lines[i].split()) > 2 ):
+            lyxwidth = lines[i].split()[3]+"pt"
+            lyxheight = lines[i].split()[4]+"pt"
          else:
              lyxwidth = ""
              lyxheight = ""
@@ -476,26 +556,24 @@ def remove_figinset(file):
          lines[i:j+1] = new
  
  
-##
-# Convert tabular format 2 to 3
-#
  attr_re = re.compile(r' \w*="(false|0|)"')
  line_re = re.compile(r'<(features|column|row|cell)')
  
-def update_tabular(file):
+def update_tabular(document):
+    " Convert tabular format 2 to 3."
      regexp = re.compile(r'^\\begin_inset\s+Tabular')
-    lines = file.body
+    lines = document.body
      i = 0
-    while 1:
+    while True:
          i = find_re(lines, regexp, i)
          if i == -1:
              break
  
          for k in get_tabular_lines(lines, i):
              if check_token(lines[k], "<lyxtabular"):
-                lines[k] = string.replace(lines[k], 'version="2"', 'version="3"')
+                lines[k] = lines[k].replace('version="2"', 'version="3"')
              elif check_token(lines[k], "<column"):
-                lines[k] = string.replace(lines[k], 'width=""', 'width="0pt"')
+                lines[k] = lines[k].replace('width=""', 'width="0pt"')
  
              if line_re.match(lines[k]):
                  lines[k] = re.sub(attr_re, "", lines[k])
@@ -520,8 +598,8 @@ def update_tabular(file):
  false = 0
  true = 1
  
-# simple data structure to deal with long table info
  class row:
+    " Simple data structure to deal with long table info."
      def __init__(self):
          self.endhead = false                # header row
          self.endfirsthead = false        # first header row
@@ -530,6 +608,7 @@ class row:
  
  
  def haveLTFoot(row_info):
+    " Does row has LTFoot?"
      for row_ in row_info:
          if row_.endfoot:
              return true
@@ -537,6 +616,7 @@ def haveLTFoot(row_info):
  
  
  def setHeaderFooterRows(hr, fhr, fr, lfr, rows_, row_info):
+    " Set Header/Footer rows."
      endfirsthead_empty = false
      endlastfoot_empty = false
      # set header info
@@ -603,7 +683,8 @@ def setHeaderFooterRows(hr, fhr, fr, lfr, rows_, row_info):
  
  
  def insert_attribute(lines, i, attribute):
-    last = string.find(lines[i],'>')
+    " Insert attribute in lines[i]."
+    last = lines[i].find('>')
      lines[i] = lines[i][:last] + ' ' + attribute + lines[i][last:]
  
  
@@ -611,11 +692,12 @@ rows_re = re.compile(r'rows="(\d*)"')
  longtable_re = re.compile(r'islongtable="(\w)"')
  ltvalues_re = re.compile(r'endhead="(-?\d*)" endfirsthead="(-?\d*)" endfoot="(-?\d*)" endlastfoot="(-?\d*)"')
  lt_features_re = re.compile(r'(endhead="-?\d*" endfirsthead="-?\d*" endfoot="-?\d*" endlastfoot="-?\d*")')
-def update_longtables(file):
+def update_longtables(document):
+    " Update longtables to new format."
      regexp = re.compile(r'^\\begin_inset\s+Tabular')
-    body = file.body
+    body = document.body
      i = 0
-    while 1:
+    while True:
          i = find_re(body, regexp, i)
          if i == -1:
              break
@@ -643,7 +725,7 @@ def update_longtables(file):
              # remove longtable elements from features
              features = lt_features_re.search(body[i])
              if features:
-                body[i] = string.replace(body[i], features.group(1), "")
+                body[i] = body[i].replace(features.group(1), "")
              continue
  
          row_info = row() * rows
@@ -680,11 +762,11 @@ def update_longtables(file):
              i = i + 1
  
  
-# Figure insert are hidden feature of lyx 1.1.6. This might be removed in the future.
-def fix_oldfloatinset(file):
-    lines = file.body
+def fix_oldfloatinset(document):
+    " Figure insert are hidden feature of lyx 1.1.6. This might be removed in the future."
+    lines = document.body
      i = 0
-    while 1:
+    while True:
          i = find_token(lines, "\\begin_inset Float ", i)
          if i == -1:
              break
@@ -694,10 +776,11 @@ def fix_oldfloatinset(file):
          i = i+1
  
  
-def change_listof(file):
-    lines = file.body
+def change_listof(document):
+    " Change listof insets."
+    lines = document.body
      i = 0
-    while 1:
+    while True:
          i = find_token(lines, "\\begin_inset LatexCommand \\listof", i)
          if i == -1:
              break
@@ -706,14 +789,15 @@ def change_listof(file):
          i = i+1
  
  
-def change_infoinset(file):
-    lines = file.body
+def change_infoinset(document):
+    " Change info inset."
+    lines = document.body
      i = 0
-    while 1:
+    while True:
          i = find_token(lines, "\\begin_inset Info", i)
          if i == -1:
              break
-        txt = string.lstrip(lines[i][18:])
+        txt = lines[i][18:].lstrip()
          new = ["\\begin_inset Note", "collapsed true", ""]
          j = find_token(lines, "\\end_inset", i)
          if j == -1:
@@ -724,8 +808,8 @@ def change_infoinset(file):
              note_lines = [txt]+note_lines
  
          for line in note_lines:
-            new = new + ['\layout %s' % file.default_layout, ""]
-            tmp = string.split(line, '\\')
+            new = new + ['\layout %s' % document.default_layout, ""]
+            tmp = line.split('\\')
              new = new + [tmp[0]]
              for x in tmp[1:]:
                  new = new + ["\\backslash ", x]
@@ -733,15 +817,17 @@ def change_infoinset(file):
          i = i+5
  
  
-def change_header(file):
-    lines = file.header
+def change_header(document):
+    " Update header."
+    lines = document.header
      i = find_token(lines, "\\use_amsmath", 0)
      if i == -1:
          return
      lines[i+1:i+1] = ["\\use_natbib 0",
-                      "\use_numerical_citations 0"]
+                      "\\use_numerical_citations 0"]
  
  
+supported_versions = ["1.2.%d" % i for i in range(5)] + ["1.2"]
  convert = [[220, [change_header, change_listof, fix_oldfloatinset,
                    update_tabular, update_longtables, remove_pextra,
                    remove_oldfloat, remove_figinset, remove_oldertinset,