# This file is part of lyx2lyx
-# -*- coding: iso-8859-1 -*-
+# -*- coding: utf-8 -*-
# Copyright (C) 2002 Dekel Tsur <dekel@lyx.org>
-# Copyright (C) 2004 José Matos <jamatos@lyx.org>
+# Copyright (C) 2004 José Matos <jamatos@lyx.org>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+""" Convert files to the file format generated by lyx 1.2"""
-import string
import re
-from parser_tools import find_token, find_token_backwards, get_next_paragraph,\
- find_tokens, find_end_of_inset, find_re, \
- is_nonempty_line, get_paragraph, find_nonempty_line, \
- get_value, get_tabular_lines, check_token, get_layout
+from parser_tools import find_token, find_token_backwards, \
+ find_tokens, find_tokens_backwards, \
+ find_beginning_of, find_end_of, find_re, \
+ is_nonempty_line, find_nonempty_line, \
+ get_value, check_token
+
+####################################################################
+# Private helper functions
+
+def get_layout(line, default_layout):
+ " Get layout, if empty return the default layout."
+ tokens = line.split()
+ if len(tokens) > 1:
+ return tokens[1]
+ return default_layout
+
+
+def get_paragraph(lines, i, format):
+ " Finds the paragraph that contains line i."
+ begin_layout = "\\layout"
+
+ while i != -1:
+ i = find_tokens_backwards(lines, ["\\end_inset", begin_layout], i)
+ if i == -1: return -1
+ if check_token(lines[i], begin_layout):
+ return i
+ i = find_beginning_of_inset(lines, i)
+ return -1
+
+
+def get_next_paragraph(lines, i, format):
+ " Finds the paragraph after the paragraph that contains line i."
+ tokens = ["\\begin_inset", "\\layout", "\\end_float", "\\the_end"]
+
+ while i != -1:
+ i = find_tokens(lines, tokens, i)
+ if not check_token(lines[i], "\\begin_inset"):
+ return i
+ i = find_end_of_inset(lines, i)
+ return -1
+
+
+def find_beginning_of_inset(lines, i):
+ " Find beginning of inset, where lines[i] is included."
+ return find_beginning_of(lines, i, "\\begin_inset", "\\end_inset")
+
+
+def find_end_of_inset(lines, i):
+ " Finds the matching \end_inset"
+ return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
+
+
+def find_end_of_tabular(lines, i):
+ " Finds the matching end of tabular."
+ return find_end_of(lines, i, "<lyxtabular", "</lyxtabular")
+
+
+def get_tabular_lines(lines, i):
+ " Returns a lists of tabular lines."
+ result = []
+ i = i+1
+ j = find_end_of_tabular(lines, i)
+ if j == -1:
+ return []
+
+ while i <= j:
+ if check_token(lines[i], "\\begin_inset"):
+ i = find_end_of_inset(lines, i)+1
+ else:
+ result.append(i)
+ i = i+1
+ return result
+
+# End of helper functions
+####################################################################
+
floats = {
"footnote": ["\\begin_inset Foot",
def get_width(mo):
+ " Get width from a regular expression. "
if mo.group(10):
if mo.group(9) == "\\pextra_widthp":
return mo.group(10)+"col%"
return "100col%"
-#
-# Change \begin_float .. \end_float into \begin_inset Float .. \end_inset
-#
-def remove_oldfloat(file):
- lines = file.body
+def remove_oldfloat(document):
+ " Change \begin_float .. \end_float into \begin_inset Float .. \end_inset"
+ lines = document.body
i = 0
- while 1:
+ while True:
i = find_token(lines, "\\begin_float", i)
if i == -1:
break
# There are no nested floats, so finding the end of the float is simple
j = find_token(lines, "\\end_float", i+1)
- floattype = string.split(lines[i])[1]
- if not floats.has_key(floattype):
- file.warning("Error! Unknown float type " + floattype)
+ floattype = lines[i].split()[1]
+ if floattype not in floats:
+ document.warning("Error! Unknown float type " + floattype)
floattype = "fig"
# skip \end_deeper tokens
while check_token(lines[i2], "\\end_deeper"):
i2 = i2+1
if i2 > i+1:
- j2 = get_next_paragraph(lines, j + 1, file.format + 1)
+ j2 = get_next_paragraph(lines, j + 1, document.format + 1)
lines[j2:j2] = ["\\end_deeper "]*(i2-(i+1))
new = floats[floattype]+[""]
# as extra '\foo default' commands are ignored.
# In fact, it might be safer to output '\foo default' for all
# font attributes.
- k = get_paragraph(lines, i, file.format + 1)
+ k = get_paragraph(lines, i, document.format + 1)
flag = 0
for token in font_tokens:
if find_token(lines, token, k, i) != -1:
flag = 1
new.append("")
if token == "\\lang":
- new.append(token+" "+ file.language)
+ new.append(token+" "+ document.language)
else:
new.append(token+" default ")
pextra_type2_rexp2 = re.compile(r".*(\\layout|\\pextra_type\s+2)")
pextra_widthp = re.compile(r"\\pextra_widthp")
-def remove_pextra(file):
- lines = file.body
+def remove_pextra(document):
+ " Remove pextra token."
+ lines = document.body
i = 0
flag = 0
- while 1:
+ while True:
i = find_re(lines, pextra_type2_rexp, i)
if i == -1:
break
if hfill:
start = ["","\hfill",""]+start
else:
- start = ['\\layout %s' % file.default_layout,''] + start
+ start = ['\\layout %s' % document.default_layout,''] + start
j0 = find_token_backwards(lines,"\\layout", i-1)
- j = get_next_paragraph(lines, i, file.format + 1)
+ j = get_next_paragraph(lines, i, document.format + 1)
count = 0
- while 1:
+ while True:
# collect more paragraphs to the minipage
count = count+1
if j == -1 or not check_token(lines[j], "\\layout"):
def is_empty(lines):
- return filter(is_nonempty_line, lines) == []
+ " Are all the lines empty?"
+ return list(filter(is_nonempty_line, lines)) == []
move_rexp = re.compile(r"\\(family|series|shape|size|emph|numeric|bar|noun|end_deeper)")
spchar_rexp = re.compile(r"(.*)(\\SpecialChar.*)")
-def remove_oldert(file):
+def remove_oldert(document):
+ " Remove old ERT inset."
ert_begin = ["\\begin_inset ERT",
"status Collapsed",
"",
- '\\layout %s' % file.default_layout,
+ '\\layout %s' % document.default_layout,
""]
- lines = file.body
+ lines = document.body
i = 0
- while 1:
+ while True:
i = find_tokens(lines, ["\\latex latex", "\\layout LaTeX"], i)
if i == -1:
break
j = i+1
- while 1:
+ while True:
# \end_inset is for ert inside a tabular cell. The other tokens
# are obvious.
j = find_tokens(lines, ["\\latex default", "\\layout", "\\begin_inset", "\\end_inset", "\\end_float", "\\the_end"],
new = []
new2 = []
if check_token(lines[i], "\\layout LaTeX"):
- new = ['\layout %s' % file.default_layout, "", ""]
+ new = ['\layout %s' % document.default_layout, "", ""]
k = i+1
- while 1:
+ while True:
k2 = find_re(lines, ert_rexp, k, j)
inset = hfill = specialchar = 0
if k2 == -1:
tmp.append(line)
if is_empty(tmp):
- if filter(lambda x:x != "", tmp) != []:
+ if [x for x in tmp if x != ""] != []:
if new == []:
# This is not necessary, but we want the output to be
# as similar as posible to the lyx format
# Delete remaining "\latex xxx" tokens
i = 0
- while 1:
+ while True:
i = find_token(lines, "\\latex ", i)
if i == -1:
break
del lines[i]
-# ERT insert are hidden feature of lyx 1.1.6. This might be removed in the future.
-def remove_oldertinset(file):
- lines = file.body
+def remove_oldertinset(document):
+ " ERT insert are hidden feature of lyx 1.1.6. This might be removed in the future."
+ lines = document.body
i = 0
- while 1:
+ while True:
i = find_token(lines, "\\begin_inset ERT", i)
if i == -1:
break
j = find_end_of_inset(lines, i)
k = find_token(lines, "\\layout", i+1)
- l = get_paragraph(lines, i, file.format + 1)
+ l = get_paragraph(lines, i, document.format + 1)
if lines[k] == lines[l]: # same layout
k = k+1
new = lines[k:j]
i = i+1
-def is_ert_paragraph(file, i):
- lines = file.body
+def is_ert_paragraph(document, i):
+ " Is this a ert paragraph? "
+ lines = document.body
if not check_token(lines[i], "\\layout"):
return 0
- if not file.is_default_layout(get_layout(lines[i], file.default_layout)):
+ if not document.is_default_layout(get_layout(lines[i], document.default_layout)):
return 0
i = find_nonempty_line(lines, i+1)
return check_token(lines[k], "\\layout")
-def combine_ert(file):
- lines = file.body
+def combine_ert(document):
+ " Combine ERT paragraphs."
+ lines = document.body
i = 0
- while 1:
+ while True:
i = find_token(lines, "\\begin_inset ERT", i)
if i == -1:
break
- j = get_paragraph(lines, i, file.format + 1)
+ j = get_paragraph(lines, i, document.format + 1)
count = 0
text = []
- while is_ert_paragraph(file, j):
+ while is_ert_paragraph(document, j):
count = count+1
i2 = find_token(lines, "\\layout", j+1)
oldunits = ["pt", "cm", "in", "text%", "col%"]
def get_length(lines, name, start, end):
+ " Get lenght."
i = find_token(lines, name, start, end)
if i == -1:
return ""
- x = string.split(lines[i])
+ x = lines[i].split()
return x[2]+oldunits[int(x[1])]
def write_attribute(x, token, value):
+ " Write attribute."
if value != "":
x.append("\t"+token+" "+value)
-def remove_figinset(file):
- lines = file.body
+def remove_figinset(document):
+ " Remove figinset."
+ lines = document.body
i = 0
- while 1:
+ while True:
i = find_token(lines, "\\begin_inset Figure", i)
if i == -1:
break
j = find_end_of_inset(lines, i)
- if ( len(string.split(lines[i])) > 2 ):
- lyxwidth = string.split(lines[i])[3]+"pt"
- lyxheight = string.split(lines[i])[4]+"pt"
+ if ( len(lines[i].split()) > 2 ):
+ lyxwidth = lines[i].split()[3]+"pt"
+ lyxheight = lines[i].split()[4]+"pt"
else:
lyxwidth = ""
lyxheight = ""
lines[i:j+1] = new
-##
-# Convert tabular format 2 to 3
-#
attr_re = re.compile(r' \w*="(false|0|)"')
line_re = re.compile(r'<(features|column|row|cell)')
-def update_tabular(file):
+def update_tabular(document):
+ " Convert tabular format 2 to 3."
regexp = re.compile(r'^\\begin_inset\s+Tabular')
- lines = file.body
+ lines = document.body
i = 0
- while 1:
+ while True:
i = find_re(lines, regexp, i)
if i == -1:
break
for k in get_tabular_lines(lines, i):
if check_token(lines[k], "<lyxtabular"):
- lines[k] = string.replace(lines[k], 'version="2"', 'version="3"')
+ lines[k] = lines[k].replace('version="2"', 'version="3"')
elif check_token(lines[k], "<column"):
- lines[k] = string.replace(lines[k], 'width=""', 'width="0pt"')
+ lines[k] = lines[k].replace('width=""', 'width="0pt"')
if line_re.match(lines[k]):
lines[k] = re.sub(attr_re, "", lines[k])
false = 0
true = 1
-# simple data structure to deal with long table info
class row:
+ " Simple data structure to deal with long table info."
def __init__(self):
self.endhead = false # header row
self.endfirsthead = false # first header row
def haveLTFoot(row_info):
+ " Does row has LTFoot?"
for row_ in row_info:
if row_.endfoot:
return true
def setHeaderFooterRows(hr, fhr, fr, lfr, rows_, row_info):
+ " Set Header/Footer rows."
endfirsthead_empty = false
endlastfoot_empty = false
# set header info
def insert_attribute(lines, i, attribute):
- last = string.find(lines[i],'>')
+ " Insert attribute in lines[i]."
+ last = lines[i].find('>')
lines[i] = lines[i][:last] + ' ' + attribute + lines[i][last:]
longtable_re = re.compile(r'islongtable="(\w)"')
ltvalues_re = re.compile(r'endhead="(-?\d*)" endfirsthead="(-?\d*)" endfoot="(-?\d*)" endlastfoot="(-?\d*)"')
lt_features_re = re.compile(r'(endhead="-?\d*" endfirsthead="-?\d*" endfoot="-?\d*" endlastfoot="-?\d*")')
-def update_longtables(file):
+def update_longtables(document):
+ " Update longtables to new format."
regexp = re.compile(r'^\\begin_inset\s+Tabular')
- body = file.body
+ body = document.body
i = 0
- while 1:
+ while True:
i = find_re(body, regexp, i)
if i == -1:
break
# remove longtable elements from features
features = lt_features_re.search(body[i])
if features:
- body[i] = string.replace(body[i], features.group(1), "")
+ body[i] = body[i].replace(features.group(1), "")
continue
row_info = row() * rows
i = i + 1
-# Figure insert are hidden feature of lyx 1.1.6. This might be removed in the future.
-def fix_oldfloatinset(file):
- lines = file.body
+def fix_oldfloatinset(document):
+ " Figure insert are hidden feature of lyx 1.1.6. This might be removed in the future."
+ lines = document.body
i = 0
- while 1:
+ while True:
i = find_token(lines, "\\begin_inset Float ", i)
if i == -1:
break
i = i+1
-def change_listof(file):
- lines = file.body
+def change_listof(document):
+ " Change listof insets."
+ lines = document.body
i = 0
- while 1:
+ while True:
i = find_token(lines, "\\begin_inset LatexCommand \\listof", i)
if i == -1:
break
i = i+1
-def change_infoinset(file):
- lines = file.body
+def change_infoinset(document):
+ " Change info inset."
+ lines = document.body
i = 0
- while 1:
+ while True:
i = find_token(lines, "\\begin_inset Info", i)
if i == -1:
break
- txt = string.lstrip(lines[i][18:])
+ txt = lines[i][18:].lstrip()
new = ["\\begin_inset Note", "collapsed true", ""]
j = find_token(lines, "\\end_inset", i)
if j == -1:
note_lines = [txt]+note_lines
for line in note_lines:
- new = new + ['\layout %s' % file.default_layout, ""]
- tmp = string.split(line, '\\')
+ new = new + ['\layout %s' % document.default_layout, ""]
+ tmp = line.split('\\')
new = new + [tmp[0]]
for x in tmp[1:]:
new = new + ["\\backslash ", x]
i = i+5
-def change_header(file):
- lines = file.header
+def change_header(document):
+ " Update header."
+ lines = document.header
i = find_token(lines, "\\use_amsmath", 0)
if i == -1:
return
lines[i+1:i+1] = ["\\use_natbib 0",
- "\use_numerical_citations 0"]
+ "\\use_numerical_citations 0"]
+supported_versions = ["1.2.%d" % i for i in range(5)] + ["1.2"]
convert = [[220, [change_header, change_listof, fix_oldfloatinset,
update_tabular, update_longtables, remove_pextra,
remove_oldfloat, remove_figinset, remove_oldertinset,