X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=lib%2Flyx2lyx%2Flyx_1_2.py;h=b697a918c588159ed37bb42a18eacb283ef8a9a6;hb=e798db5739871aaa29f95de321c52f19058064c9;hp=d42540f1bf752c9731d0b985eb2023e1d13f9f85;hpb=eb0de102db22fad861d758e411f4709f26d3f3a5;p=lyx.git diff --git a/lib/lyx2lyx/lyx_1_2.py b/lib/lyx2lyx/lyx_1_2.py index d42540f1bf..b697a918c5 100644 --- a/lib/lyx2lyx/lyx_1_2.py +++ b/lib/lyx2lyx/lyx_1_2.py @@ -1,7 +1,7 @@ # This file is part of lyx2lyx -# -*- coding: iso-8859-1 -*- +# -*- coding: utf-8 -*- # Copyright (C) 2002 Dekel Tsur -# Copyright (C) 2004 José Matos +# Copyright (C) 2004 José Matos # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License @@ -15,138 +15,218 @@ # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +""" Convert files to the file format generated by lyx 1.2""" -import string import re -from parser_tools import find_token, find_token_backwards, get_next_paragraph,\ - find_tokens, find_end_of_inset, find_re, \ - is_nonempty_line, get_paragraph, find_nonempty_line, \ - get_value, get_tabular_lines, check_token +from parser_tools import find_token, find_token_backwards, \ + find_tokens, find_tokens_backwards, \ + find_beginning_of, find_end_of, find_re, \ + is_nonempty_line, find_nonempty_line, \ + get_value, check_token + +#################################################################### +# Private helper functions + +def get_layout(line, default_layout): + " Get layout, if empty return the default layout." + tokens = line.split() + if len(tokens) > 1: + return tokens[1] + return default_layout + + +def get_paragraph(lines, i, format): + " Finds the paragraph that contains line i." + begin_layout = "\\layout" + + while i != -1: + i = find_tokens_backwards(lines, ["\\end_inset", begin_layout], i) + if i == -1: return -1 + if check_token(lines[i], begin_layout): + return i + i = find_beginning_of_inset(lines, i) + return -1 + + +def get_next_paragraph(lines, i, format): + " Finds the paragraph after the paragraph that contains line i." + tokens = ["\\begin_inset", "\\layout", "\\end_float", "\\the_end"] + + while i != -1: + i = find_tokens(lines, tokens, i) + if not check_token(lines[i], "\\begin_inset"): + return i + i = find_end_of_inset(lines, i) + return -1 + + +def find_beginning_of_inset(lines, i): + " Find beginning of inset, where lines[i] is included." + return find_beginning_of(lines, i, "\\begin_inset", "\\end_inset") + + +def find_end_of_inset(lines, i): + " Finds the matching \end_inset" + return find_end_of(lines, i, "\\begin_inset", "\\end_inset") + + +def find_end_of_tabular(lines, i): + " Finds the matching end of tabular." + return find_end_of(lines, i, " i+1: - j2 = get_next_paragraph(lines, j+1) - lines[j2:j2] = ["\\end_deeper "]*(i2-(i+1)) - - new = floats[floattype]+[""] - - # Check if the float is floatingfigure - k = find_re(lines, pextra_type3_rexp, i, j) - if k != -1: - mo = pextra_rexp.search(lines[k]) - width = get_width(mo) - lines[k] = re.sub(pextra_rexp, "", lines[k]) - new = ["\\begin_inset Wrap figure", - 'width "%s"' % width, - "collapsed false", - ""] - - new = new+lines[i2:j]+["\\end_inset ", ""] - - # After a float, all font attributes are reseted. - # We need to output '\foo default' for every attribute foo - # whose value is not default before the float. - # The check here is not accurate, but it doesn't matter - # as extra '\foo default' commands are ignored. - # In fact, it might be safer to output '\foo default' for all - # font attributes. - k = get_paragraph(lines, i) - flag = 0 - for token in font_tokens: - if find_token(lines, token, k, i) != -1: - if not flag: - # This is not necessary, but we want the output to be - # as similar as posible to the lyx format - flag = 1 - new.append("") - if token == "\\lang": - new.append(token+" "+ file.language) - else: - new.append(token+" default ") - - lines[i:j+1] = new - i = i+1 + while True: + i = find_token(lines, "\\begin_float", i) + if i == -1: + break + # There are no nested floats, so finding the end of the float is simple + j = find_token(lines, "\\end_float", i+1) + + floattype = lines[i].split()[1] + if floattype not in floats: + document.warning("Error! Unknown float type " + floattype) + floattype = "fig" + + # skip \end_deeper tokens + i2 = i+1 + while check_token(lines[i2], "\\end_deeper"): + i2 = i2+1 + if i2 > i+1: + j2 = get_next_paragraph(lines, j + 1, document.format + 1) + lines[j2:j2] = ["\\end_deeper "]*(i2-(i+1)) + + new = floats[floattype]+[""] + + # Check if the float is floatingfigure + k = find_re(lines, pextra_type3_rexp, i, j) + if k != -1: + mo = pextra_rexp.search(lines[k]) + width = get_width(mo) + lines[k] = re.sub(pextra_rexp, "", lines[k]) + new = ["\\begin_inset Wrap figure", + 'width "%s"' % width, + "collapsed false", + ""] + + new = new+lines[i2:j]+["\\end_inset ", ""] + + # After a float, all font attributes are reseted. + # We need to output '\foo default' for every attribute foo + # whose value is not default before the float. + # The check here is not accurate, but it doesn't matter + # as extra '\foo default' commands are ignored. + # In fact, it might be safer to output '\foo default' for all + # font attributes. + k = get_paragraph(lines, i, document.format + 1) + flag = 0 + for token in font_tokens: + if find_token(lines, token, k, i) != -1: + if not flag: + # This is not necessary, but we want the output to be + # as similar as posible to the lyx format + flag = 1 + new.append("") + if token == "\\lang": + new.append(token+" "+ document.language) + else: + new.append(token+" default ") + + lines[i:j+1] = new + i = i+1 pextra_type2_rexp = re.compile(r".*\\pextra_type\s+[12]") pextra_type2_rexp2 = re.compile(r".*(\\layout|\\pextra_type\s+2)") +pextra_widthp = re.compile(r"\\pextra_widthp") -def remove_pextra(file): - lines = file.body +def remove_pextra(document): + " Remove pextra token." + lines = document.body i = 0 flag = 0 - while 1: - i = find_re(lines, pextra_type2_rexp, i) - if i == -1: - break + while True: + i = find_re(lines, pextra_type2_rexp, i) + if i == -1: + break + + # Sometimes the \pextra_widthp argument comes in it own + # line. If that happens insert it back in this line. + if pextra_widthp.search(lines[i+1]): + lines[i] = lines[i] + ' ' + lines[i+1] + del lines[i+1] - mo = pextra_rexp.search(lines[i]) + mo = pextra_rexp.search(lines[i]) width = get_width(mo) if mo.group(1) == "1": @@ -156,340 +236,349 @@ def remove_pextra(file): continue # handle \pextra_type 2 (minipage) - position = mo.group(3) - hfill = mo.group(5) - lines[i] = re.sub(pextra_rexp, "", lines[i]) - - start = ["\\begin_inset Minipage", - "position " + position, - "inner_position 0", - 'height "0pt"', - 'width "%s"' % width, - "collapsed false" - ] - if flag: - flag = 0 - if hfill: - start = ["","\hfill",""]+start - else: - start = ["\\layout Standard"] + start - - j0 = find_token_backwards(lines,"\\layout", i-1) - j = get_next_paragraph(lines, i) - - count = 0 - while 1: - # collect more paragraphs to the minipage - count = count+1 - if j == -1 or not check_token(lines[j], "\\layout"): - break - i = find_re(lines, pextra_type2_rexp2, j+1) - if i == -1: - break - mo = pextra_rexp.search(lines[i]) - if not mo: - break - if mo.group(7) == "1": - flag = 1 - break - lines[i] = re.sub(pextra_rexp, "", lines[i]) - j = find_tokens(lines, ["\\layout", "\\end_float"], i+1) - - mid = lines[j0:j] - end = ["\\end_inset "] - - lines[j0:j] = start+mid+end - i = i+1 + position = mo.group(3) + hfill = mo.group(5) + lines[i] = re.sub(pextra_rexp, "", lines[i]) + + start = ["\\begin_inset Minipage", + "position " + position, + "inner_position 0", + 'height "0pt"', + 'width "%s"' % width, + "collapsed false" + ] + if flag: + flag = 0 + if hfill: + start = ["","\hfill",""]+start + else: + start = ['\\layout %s' % document.default_layout,''] + start + + j0 = find_token_backwards(lines,"\\layout", i-1) + j = get_next_paragraph(lines, i, document.format + 1) + + count = 0 + while True: + # collect more paragraphs to the minipage + count = count+1 + if j == -1 or not check_token(lines[j], "\\layout"): + break + i = find_re(lines, pextra_type2_rexp2, j+1) + if i == -1: + break + mo = pextra_rexp.search(lines[i]) + if not mo: + break + if mo.group(7) == "1": + flag = 1 + break + lines[i] = re.sub(pextra_rexp, "", lines[i]) + j = find_tokens(lines, ["\\layout", "\\end_float"], i+1) + + mid = lines[j0:j] + end = ["\\end_inset "] + + lines[j0:j] = start+mid+end + i = i+1 def is_empty(lines): - return filter(is_nonempty_line, lines) == [] + " Are all the lines empty?" + return list(filter(is_nonempty_line, lines)) == [] move_rexp = re.compile(r"\\(family|series|shape|size|emph|numeric|bar|noun|end_deeper)") ert_rexp = re.compile(r"\\begin_inset|\\hfill|.*\\SpecialChar") spchar_rexp = re.compile(r"(.*)(\\SpecialChar.*)") -ert_begin = ["\\begin_inset ERT", - "status Collapsed", - "", - "\\layout Standard"] -def remove_oldert(file): - lines = file.body +def remove_oldert(document): + " Remove old ERT inset." + ert_begin = ["\\begin_inset ERT", + "status Collapsed", + "", + '\\layout %s' % document.default_layout, + ""] + lines = document.body i = 0 - while 1: - i = find_tokens(lines, ["\\latex latex", "\\layout LaTeX"], i) - if i == -1: - break - j = i+1 - while 1: + while True: + i = find_tokens(lines, ["\\latex latex", "\\layout LaTeX"], i) + if i == -1: + break + j = i+1 + while True: # \end_inset is for ert inside a tabular cell. The other tokens # are obvious. - j = find_tokens(lines, ["\\latex default", "\\layout", "\\begin_inset", "\\end_inset", "\\end_float", "\\the_end"], - j) - if check_token(lines[j], "\\begin_inset"): - j = find_end_of_inset(lines, j)+1 - else: - break - - if check_token(lines[j], "\\layout"): - while j-1 >= 0 and check_token(lines[j-1], "\\begin_deeper"): - j = j-1 - - # We need to remove insets, special chars & font commands from ERT text - new = [] - new2 = [] - if check_token(lines[i], "\\layout LaTeX"): - new = ["\layout Standard", "", ""] - # We have a problem with classes in which Standard is not the default layout! - - k = i+1 - while 1: - k2 = find_re(lines, ert_rexp, k, j) - inset = hfill = specialchar = 0 - if k2 == -1: - k2 = j - elif check_token(lines[k2], "\\begin_inset"): - inset = 1 + j = find_tokens(lines, ["\\latex default", "\\layout", "\\begin_inset", "\\end_inset", "\\end_float", "\\the_end"], + j) + if check_token(lines[j], "\\begin_inset"): + j = find_end_of_inset(lines, j)+1 + else: + break + + if check_token(lines[j], "\\layout"): + while j-1 >= 0 and check_token(lines[j-1], "\\begin_deeper"): + j = j-1 + + # We need to remove insets, special chars & font commands from ERT text + new = [] + new2 = [] + if check_token(lines[i], "\\layout LaTeX"): + new = ['\layout %s' % document.default_layout, "", ""] + + k = i+1 + while True: + k2 = find_re(lines, ert_rexp, k, j) + inset = hfill = specialchar = 0 + if k2 == -1: + k2 = j + elif check_token(lines[k2], "\\begin_inset"): + inset = 1 elif check_token(lines[k2], "\\hfill"): hfill = 1 del lines[k2] j = j-1 - else: - specialchar = 1 - mo = spchar_rexp.match(lines[k2]) - lines[k2] = mo.group(1) - specialchar_str = mo.group(2) - k2 = k2+1 - - tmp = [] - for line in lines[k:k2]: + else: + specialchar = 1 + mo = spchar_rexp.match(lines[k2]) + lines[k2] = mo.group(1) + specialchar_str = mo.group(2) + k2 = k2+1 + + tmp = [] + for line in lines[k:k2]: # Move some lines outside the ERT inset: - if move_rexp.match(line): - if new2 == []: - # This is not necessary, but we want the output to be - # as similar as posible to the lyx format - new2 = [""] - new2.append(line) - elif not check_token(line, "\\latex"): - tmp.append(line) - - if is_empty(tmp): - if filter(lambda x:x != "", tmp) != []: - if new == []: - # This is not necessary, but we want the output to be - # as similar as posible to the lyx format - lines[i-1] = lines[i-1]+" " - else: - new = new+[" "] - else: - new = new+ert_begin+tmp+["\\end_inset ", ""] - - if inset: - k3 = find_end_of_inset(lines, k2) - new = new+[""]+lines[k2:k3+1]+[""] # Put an empty line after \end_inset - k = k3+1 - # Skip the empty line after \end_inset - if not is_nonempty_line(lines[k]): - k = k+1 - new.append("") + if move_rexp.match(line): + if new2 == []: + # This is not necessary, but we want the output to be + # as similar as posible to the lyx format + new2 = [""] + new2.append(line) + elif not check_token(line, "\\latex"): + tmp.append(line) + + if is_empty(tmp): + if [x for x in tmp if x != ""] != []: + if new == []: + # This is not necessary, but we want the output to be + # as similar as posible to the lyx format + lines[i-1] = lines[i-1]+" " + else: + new = new+[" "] + else: + new = new+ert_begin+tmp+["\\end_inset ", ""] + + if inset: + k3 = find_end_of_inset(lines, k2) + new = new+[""]+lines[k2:k3+1]+[""] # Put an empty line after \end_inset + k = k3+1 + # Skip the empty line after \end_inset + if not is_nonempty_line(lines[k]): + k = k+1 + new.append("") elif hfill: new = new + ["\\hfill", ""] k = k2 - elif specialchar: - if new == []: - # This is not necessary, but we want the output to be - # as similar as posible to the lyx format - lines[i-1] = lines[i-1]+specialchar_str - new = [""] - else: - new = new+[specialchar_str, ""] - k = k2 - else: - break - - new = new+new2 - if not check_token(lines[j], "\\latex "): - new = new+[""]+[lines[j]] - lines[i:j+1] = new - i = i+1 + elif specialchar: + if new == []: + # This is not necessary, but we want the output to be + # as similar as posible to the lyx format + lines[i-1] = lines[i-1]+specialchar_str + new = [""] + else: + new = new+[specialchar_str, ""] + k = k2 + else: + break + + new = new+new2 + if not check_token(lines[j], "\\latex "): + new = new+[""]+[lines[j]] + lines[i:j+1] = new + i = i+1 # Delete remaining "\latex xxx" tokens i = 0 - while 1: - i = find_token(lines, "\\latex ", i) - if i == -1: - break - del lines[i] + while True: + i = find_token(lines, "\\latex ", i) + if i == -1: + break + del lines[i] -# ERT insert are hidden feature of lyx 1.1.6. This might be removed in the future. -def remove_oldertinset(file): - lines = file.body +def remove_oldertinset(document): + " ERT insert are hidden feature of lyx 1.1.6. This might be removed in the future." + lines = document.body i = 0 - while 1: - i = find_token(lines, "\\begin_inset ERT", i) - if i == -1: - break - j = find_end_of_inset(lines, i) - k = find_token(lines, "\\layout", i+1) - l = get_paragraph(lines, i) - if lines[k] == lines[l]: # same layout - k = k+1 - new = lines[k:j] - lines[i:j+1] = new - i = i+1 - - -def is_ert_paragraph(lines, i): - if not check_token(lines[i], "\\layout Standard"): + while True: + i = find_token(lines, "\\begin_inset ERT", i) + if i == -1: + break + j = find_end_of_inset(lines, i) + k = find_token(lines, "\\layout", i+1) + l = get_paragraph(lines, i, document.format + 1) + if lines[k] == lines[l]: # same layout + k = k+1 + new = lines[k:j] + lines[i:j+1] = new + i = i+1 + + +def is_ert_paragraph(document, i): + " Is this a ert paragraph? " + lines = document.body + if not check_token(lines[i], "\\layout"): + return 0 + if not document.is_default_layout(get_layout(lines[i], document.default_layout)): return 0 i = find_nonempty_line(lines, i+1) if not check_token(lines[i], "\\begin_inset ERT"): - return 0 + return 0 j = find_end_of_inset(lines, i) k = find_nonempty_line(lines, j+1) return check_token(lines[k], "\\layout") -def combine_ert(file): - lines = file.body +def combine_ert(document): + " Combine ERT paragraphs." + lines = document.body i = 0 - while 1: - i = find_token(lines, "\\begin_inset ERT", i) - if i == -1: - break - j = get_paragraph(lines, i) - count = 0 - text = [] - while is_ert_paragraph(lines, j): - - count = count+1 - i2 = find_token(lines, "\\layout", j+1) - k = find_token(lines, "\\end_inset", i2+1) - text = text+lines[i2:k] - j = find_token(lines, "\\layout", k+1) - if j == -1: - break - - if count >= 2: - j = find_token(lines, "\\layout", i+1) - lines[j:k] = text - - i = i+1 + while True: + i = find_token(lines, "\\begin_inset ERT", i) + if i == -1: + break + j = get_paragraph(lines, i, document.format + 1) + count = 0 + text = [] + while is_ert_paragraph(document, j): + + count = count+1 + i2 = find_token(lines, "\\layout", j+1) + k = find_token(lines, "\\end_inset", i2+1) + text = text+lines[i2:k] + j = find_token(lines, "\\layout", k+1) + if j == -1: + break + + if count >= 2: + j = find_token(lines, "\\layout", i+1) + lines[j:k] = text + + i = i+1 oldunits = ["pt", "cm", "in", "text%", "col%"] def get_length(lines, name, start, end): + " Get lenght." i = find_token(lines, name, start, end) if i == -1: - return "" - x = string.split(lines[i]) + return "" + x = lines[i].split() return x[2]+oldunits[int(x[1])] def write_attribute(x, token, value): + " Write attribute." if value != "": - x.append("\t"+token+" "+value) + x.append("\t"+token+" "+value) -def remove_figinset(file): - lines = file.body +def remove_figinset(document): + " Remove figinset." + lines = document.body i = 0 - while 1: - i = find_token(lines, "\\begin_inset Figure", i) - if i == -1: - break - j = find_end_of_inset(lines, i) - - if ( len(string.split(lines[i])) > 2 ): - lyxwidth = string.split(lines[i])[3]+"pt" - lyxheight = string.split(lines[i])[4]+"pt" - else: - lyxwidth = "" - lyxheight = "" - - filename = get_value(lines, "file", i+1, j) - - width = get_length(lines, "width", i+1, j) - # what does width=5 mean ? - height = get_length(lines, "height", i+1, j) - rotateAngle = get_value(lines, "angle", i+1, j) - if width == "" and height == "": - size_type = "0" - else: - size_type = "1" - - flags = get_value(lines, "flags", i+1, j) - x = int(flags)%4 - if x == 1: - display = "monochrome" - elif x == 2: - display = "gray" - else: - display = "color" - - subcaptionText = "" - subcaptionLine = find_token(lines, "subcaption", i+1, j) - if subcaptionLine != -1: + while True: + i = find_token(lines, "\\begin_inset Figure", i) + if i == -1: + break + j = find_end_of_inset(lines, i) + + if ( len(lines[i].split()) > 2 ): + lyxwidth = lines[i].split()[3]+"pt" + lyxheight = lines[i].split()[4]+"pt" + else: + lyxwidth = "" + lyxheight = "" + + filename = get_value(lines, "file", i+1, j) + + width = get_length(lines, "width", i+1, j) + # what does width=5 mean ? + height = get_length(lines, "height", i+1, j) + rotateAngle = get_value(lines, "angle", i+1, j) + if width == "" and height == "": + size_type = "0" + else: + size_type = "1" + + flags = get_value(lines, "flags", i+1, j) + x = int(flags)%4 + if x == 1: + display = "monochrome" + elif x == 2: + display = "gray" + else: + display = "color" + + subcaptionText = "" + subcaptionLine = find_token(lines, "subcaption", i+1, j) + if subcaptionLine != -1: subcaptionText = lines[subcaptionLine][11:] - if subcaptionText != "": - subcaptionText = '"'+subcaptionText+'"' - - k = find_token(lines, "subfigure", i+1,j) - if k == -1: - subcaption = 0 - else: - subcaption = 1 - - new = ["\\begin_inset Graphics FormatVersion 1"] - write_attribute(new, "filename", filename) - write_attribute(new, "display", display) - if subcaption: - new.append("\tsubcaption") - write_attribute(new, "subcaptionText", subcaptionText) - write_attribute(new, "size_type", size_type) - write_attribute(new, "width", width) - write_attribute(new, "height", height) - if rotateAngle != "": - new.append("\trotate") - write_attribute(new, "rotateAngle", rotateAngle) - write_attribute(new, "rotateOrigin", "leftBaseline") - write_attribute(new, "lyxsize_type", "1") - write_attribute(new, "lyxwidth", lyxwidth) - write_attribute(new, "lyxheight", lyxheight) - new = new + ["\\end_inset"] - lines[i:j+1] = new + if subcaptionText != "": + subcaptionText = '"'+subcaptionText+'"' + + k = find_token(lines, "subfigure", i+1,j) + if k == -1: + subcaption = 0 + else: + subcaption = 1 + + new = ["\\begin_inset Graphics FormatVersion 1"] + write_attribute(new, "filename", filename) + write_attribute(new, "display", display) + if subcaption: + new.append("\tsubcaption") + write_attribute(new, "subcaptionText", subcaptionText) + write_attribute(new, "size_type", size_type) + write_attribute(new, "width", width) + write_attribute(new, "height", height) + if rotateAngle != "": + new.append("\trotate") + write_attribute(new, "rotateAngle", rotateAngle) + write_attribute(new, "rotateOrigin", "leftBaseline") + write_attribute(new, "lyxsize_type", "1") + write_attribute(new, "lyxwidth", lyxwidth) + write_attribute(new, "lyxheight", lyxheight) + new = new + ["\\end_inset"] + lines[i:j+1] = new -## -# Convert tabular format 2 to 3 -# attr_re = re.compile(r' \w*="(false|0|)"') line_re = re.compile(r'<(features|column|row|cell)') -def update_tabular(file): - lines = file.body +def update_tabular(document): + " Convert tabular format 2 to 3." + regexp = re.compile(r'^\\begin_inset\s+Tabular') + lines = document.body i = 0 - while 1: - i = find_token(lines, '\\begin_inset Tabular', i) + while True: + i = find_re(lines, regexp, i) if i == -1: break - for k in get_tabular_lines(lines, i): - if check_token(lines[k], "') + " Insert attribute in lines[i]." + last = lines[i].find('>') lines[i] = lines[i][:last] + ' ' + attribute + lines[i][last:] @@ -600,11 +692,13 @@ rows_re = re.compile(r'rows="(\d*)"') longtable_re = re.compile(r'islongtable="(\w)"') ltvalues_re = re.compile(r'endhead="(-?\d*)" endfirsthead="(-?\d*)" endfoot="(-?\d*)" endlastfoot="(-?\d*)"') lt_features_re = re.compile(r'(endhead="-?\d*" endfirsthead="-?\d*" endfoot="-?\d*" endlastfoot="-?\d*")') -def update_longtables(file): - body = file.body +def update_longtables(document): + " Update longtables to new format." + regexp = re.compile(r'^\\begin_inset\s+Tabular') + body = document.body i = 0 - while 1: - i = find_token(body, '\\begin_inset Tabular', i) + while True: + i = find_re(body, regexp, i) if i == -1: break i = i + 1 @@ -619,7 +713,7 @@ def update_longtables(file): i = find_token(body, '