X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=lib%2Flyx2lyx%2Fparser_tools.py;h=78459d21d9ce4afc59b7f5bee5ac35d700fc1a73;hb=f36359c7f1abd15d0ca86667ff0adfae050832ed;hp=676710f4b758decf68141430fc210485f9b4d9e1;hpb=34b9ed8ac93b3d1e6c7ab0dda0aea88d15eeebc9;p=lyx.git diff --git a/lib/lyx2lyx/parser_tools.py b/lib/lyx2lyx/parser_tools.py index 676710f4b7..78459d21d9 100644 --- a/lib/lyx2lyx/parser_tools.py +++ b/lib/lyx2lyx/parser_tools.py @@ -1,6 +1,6 @@ # This file is part of lyx2lyx -# -*- coding: iso-8859-1 -*- -# Copyright (C) 2002-2004 Dekel Tsur , José Matos +# -*- coding: utf-8 -*- +# Copyright (C) 2002-2004 Dekel Tsur , José Matos # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License @@ -16,377 +16,197 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -import string -import re - +" This modules offer several free functions to help parse lines." +# Utilities for one line def check_token(line, token): - if line[:len(token)] == token: - return 1 - return 0 + """ check_token(line, token) -> bool + Return True if token is present in line and is the first element + else returns False.""" -# We need to check that the char after the token is space, but I think -# we can ignore this -def find_token(lines, token, start, end = 0): - if end == 0: - end = len(lines) - m = len(token) - for i in xrange(start, end): - if lines[i][:m] == token: - return i - return -1 + return line[:len(token)] == token -def find_token2(lines, token, start, end = 0): - if end == 0: - end = len(lines) - for i in xrange(start, end): - x = string.split(lines[i]) - if len(x) > 0 and x[0] == token: - return i - return -1 +def is_nonempty_line(line): + """ is_nonempty_line(line) -> bool + Return False if line is either empty or it has only whitespaces, + else return True.""" + return line != " "*len(line) -def find_tokens(lines, tokens, start, end = 0): - if end == 0: - end = len(lines) - for i in xrange(start, end): - line = lines[i] - for token in tokens: - if line[:len(token)] == token: - return i - return -1 +# Utilities for a list of lines +def find_token(lines, token, start, end = 0, exact = False): + """ find_token(lines, token, start[[, end], exact]) -> int -def find_re(lines, rexp, start, end = 0): - if end == 0: - end = len(lines) - for i in xrange(start, end): - if rexp.match(lines[i]): - return i - return -1 + Return the lowest line where token is found, and is the first + element, in lines[start, end]. + Return -1 on failure.""" -def find_token_backwards(lines, token, start): + if end == 0: + end = len(lines) m = len(token) - for i in xrange(start, -1, -1): - line = lines[i] - if line[:m] == token: - return i - return -1 - - -def find_tokens_backwards(lines, tokens, start): - for i in xrange(start, -1, -1): - line = lines[i] - for token in tokens: - if line[:len(token)] == token: - return i + for i in xrange(start, end): + if exact: + x = lines[i].split() + y = token.split() + if len(x) < len(y): + continue + if x[:len(y)] == y: + return i + else: + if lines[i][:m] == token: + return i return -1 -def get_value(lines, token, start, end = 0): - i = find_token2(lines, token, start, end) - if i == -1: - return "" - if len(string.split(lines[i])) > 1: - return string.split(lines[i])[1] - else: - return "" +def find_token_exact(lines, token, start, end = 0): + return find_token(lines, token, start, end, True) -def del_token(lines, token, i, j): - k = find_token2(lines, token, i, j) - if k == -1: - return j - else: - del lines[k] - return j-1 - - -# Finds the paragraph that contains line i. -def get_paragraph(lines, i): - while i != -1: - i = find_tokens_backwards(lines, ["\\end_inset", "\\layout"], i) - if i == -1: return -1 - if check_token(lines[i], "\\layout"): - return i - i = find_beginning_of_inset(lines, i) - return -1 +def find_tokens(lines, tokens, start, end = 0, exact = False): + """ find_tokens(lines, tokens, start[[, end], exact]) -> int + Return the lowest line where one token in tokens is found, and is + the first element, in lines[start, end]. -# Finds the paragraph after the paragraph that contains line i. -def get_next_paragraph(lines, i): - while i != -1: - i = find_tokens(lines, ["\\begin_inset", "\\layout", "\\end_float", "\\the_end"], i) - if not check_token(lines[i], "\\begin_inset"): - return i - i = find_end_of_inset(lines, i) - return -1 - - -def find_end_of(lines, i, start_token, end_token): - count = 1 - n = len(lines) - while i < n: - i = find_tokens(lines, [end_token, start_token], i+1) - if check_token(lines[i], start_token): - count = count+1 - else: - count = count-1 - if count == 0: - return i - return -1 - + Return -1 on failure.""" + if end == 0: + end = len(lines) -# Finds the matching \end_inset -def find_beginning_of(lines, i, start_token, end_token): - count = 1 - while i > 0: - i = find_tokens_backwards(lines, [start_token, end_token], i-1) - if check_token(lines[i], end_token): - count = count+1 - else: - count = count-1 - if count == 0: - return i + for i in xrange(start, end): + for token in tokens: + if exact: + x = lines[i].split() + y = token.split() + if len(x) < len(y): + continue + if x[:len(y)] == y: + return i + else: + if lines[i][:len(token)] == token: + return i return -1 -# Finds the matching \end_inset -def find_end_of_inset(lines, i): - return find_end_of(lines, i, "\\begin_inset", "\\end_inset") - - -# Finds the matching \end_inset -def find_beginning_of_inset(lines, i): - return find_beginning_of(lines, i, "\\begin_inset", "\\end_inset") - +def find_tokens_exact(lines, tokens, start, end = 0): + return find_tokens(lines, tokens, start, end, True) -def find_end_of_tabular(lines, i): - return find_end_of(lines, i, " int -def get_tabular_lines(lines, i): - result = [] - i = i+1 - j = find_end_of_tabular(lines, i) - if j == -1: - return [] - - while i <= j: - if check_token(lines[i], "\\begin_inset"): - i = find_end_of_inset(lines, i)+1 - else: - result.append(i) - i = i+1 - return result + Return the lowest line where rexp, a regular expression, is found + in lines[start, end]. + Return -1 on failure.""" -def is_nonempty_line(line): - return line != " "*len(line) - - -def find_nonempty_line(lines, start, end = 0): if end == 0: - end = len(lines) + end = len(lines) for i in xrange(start, end): - if is_nonempty_line(lines[i]): - return i + if rexp.match(lines[i]): + return i return -1 -## -# Tools for file reading -# -def read_file(header, body, opt): - """Reads a file into the header and body parts""" - preamble = 0 - - while 1: - line = opt.input.readline() - if not line: - opt.error("Invalid LyX file.") - - line = line[:-1] - if check_token(line, '\\begin_preamble'): - preamble = 1 - if check_token(line, '\\end_preamble'): - preamble = 0 - - if not preamble: - line = string.strip(line) - - if not line and not preamble: - break - - header.append(line) - - while 1: - line = opt.input.readline() - if not line: - break - body.append(line[:-1]) - - -def write_file(header, body, opt): - for line in header: - opt.output.write(line+"\n") - opt.output.write("\n") - for line in body: - opt.output.write(line+"\n") - +def find_token_backwards(lines, token, start): + """ find_token_backwards(lines, token, start) -> int -## -# lyx version -# -original_version = re.compile(r"\#LyX (\S*)") + Return the highest line where token is found, and is the first + element, in lines[start, end]. -def read_version(header): - for line in header: - if line[0] != "#": - return None + Return -1 on failure.""" + m = len(token) + for i in xrange(start, -1, -1): + line = lines[i] + if line[:m] == token: + return i + return -1 - result = original_version.match(line) - if result: - return result.group(1) - return None +def find_tokens_backwards(lines, tokens, start): + """ find_tokens_backwards(lines, token, start) -> int -def set_version(lines, version): - lines[0] = "#LyX %s created this file. For more info see http://www.lyx.org/" % version - if lines[1][0] == '#': - del lines[1] + Return the highest line where token is found, and is the first + element, in lines[end, start]. -## -# file format version -# -format_re = re.compile(r"(\d)[\.,]?(\d\d)") -fileformat = re.compile(r"\\lyxformat\s*(\S*)") -lst_ft = [210, 215, 216, 217, 218, 220, 221, 223, 224, 225, 226, 227, 228, 229, - 230, 231, 232, 233] - -format_relation = [("0_10", [210], ["0.10.7","0.10"]), - ("0_12", [215], ["0.12","0.12.1","0.12"]), - ("1_0_0", [215], ["1.0.0","1.0"]), - ("1_0_1", [215], ["1.0.1","1.0.2","1.0.3","1.0.4", "1.1.2","1.1"]), - ("1_1_4", [215], ["1.1.4","1.1"]), - ("1_1_5", [216], ["1.1.5","1.1.5fix1","1.1.5fix2","1.1"]), - ("1_1_6", [217], ["1.1.6","1.1.6fix1","1.1.6fix2","1.1"]), - ("1_1_6fix3", [218], ["1.1.6fix3","1.1.6fix4","1.1"]), - ("1_2", [220], ["1.2.0","1.2.1","1.2.3","1.2.4","1.2"]), - ("1_3", [221], ["1.3.0","1.3.1","1.3.2","1.3.3","1.3.4","1.3"]), - ("1_4", [223,224,225,226,227,228,229,230,231,232,233], ["1.4.0cvs","1.4"])] - - -def lyxformat(format, opt): - result = format_re.match(format) - if result: - format = int(result.group(1) + result.group(2)) - else: - opt.error(str(format) + ": " + "Invalid LyX file.") + Return -1 on failure.""" + for i in xrange(start, -1, -1): + line = lines[i] + for token in tokens: + if line[:len(token)] == token: + return i + return -1 - if format in lst_ft: - return format - opt.error(str(format) + ": " + "Format no supported.") - return None +def get_value(lines, token, start, end = 0, default = ""): + """ get_value(lines, token, start[[, end], default]) -> list of strings + Return tokens after token for the first line, in lines, where + token is the first element.""" -def read_format(header, opt): - for line in header: - result = fileformat.match(line) - if result: - return lyxformat(result.group(1), opt) + i = find_token_exact(lines, token, start, end) + if i == -1: + return "" + if len(lines[i].split()) > 1: + return lines[i].split()[1] else: - opt.error("Invalid LyX File.") - return None + return default -def set_format(lines, number): - if int(number) <= 217: - number = float(number)/100 - i = find_token(lines, "\\lyxformat", 0) - lines[i] = "\\lyxformat %s" % number +def del_token(lines, token, start, end): + """ del_token(lines, token, start, end) -> int + Find the lower line in lines where token is the first element and + delete that line. -def get_end_format(): - return format_relation[-1:][0][1][-1:][0] + Returns the number of lines remaining.""" + k = find_token_exact(lines, token, start, end) + if k == -1: + return end + else: + del lines[k] + return end - 1 -def get_backend(textclass): - if textclass == "linuxdoc" or textclass == "manpage": - return "linuxdoc" - if textclass[:7] == "docbook": - return "docbook" - return "latex" +def find_beginning_of(lines, i, start_token, end_token): + count = 1 + while i > 0: + i = find_tokens_backwards(lines, [start_token, end_token], i-1) + if i == -1: + return -1 + if check_token(lines[i], end_token): + count = count+1 + else: + count = count-1 + if count == 0: + return i + return -1 -def chain(opt, initial_version): - """ This is where all the decisions related with the convertion are taken""" - format = opt.format - if opt.start: - if opt.start != format: - opt.warning("%s: %s %s" % ("Proposed file format and input file formats do not match:", opt.start, format)) - else: - opt.start = format - - if not opt.end: - opt.end = get_end_format() - - correct_version = 0 - - for rel in format_relation: - if initial_version in rel[2]: - if format in rel[1]: - initial_step = rel[0] - correct_version = 1 - break - - if not correct_version: - if format <= 215: - opt.warning("Version does not match file format, discarding it.") - for rel in format_relation: - if format in rel[1]: - initial_step = rel[0] - break +def find_end_of(lines, i, start_token, end_token): + count = 1 + n = len(lines) + while i < n: + i = find_tokens(lines, [end_token, start_token], i+1) + if i == -1: + return -1 + if check_token(lines[i], start_token): + count = count+1 else: - # This should not happen, really. - opt.error("Format not supported.") - - # Find the final step - for rel in format_relation: - if opt.end in rel[1]: - final_step = rel[0] - break - else: - opt.error("Format not supported.") - - # Convertion mode, back or forth - steps = [] - if (initial_step, opt.start) < (final_step, opt.end): - mode = "convert" - first_step = 1 - for step in format_relation: - if initial_step <= step[0] <= final_step: - if first_step and len(step[1]) == 1: - first_step = 0 - continue - steps.append(step[0]) - else: - mode = "revert" - relation_format = format_relation - relation_format.reverse() - last_step = None - - for step in relation_format: - if final_step <= step[0] <= initial_step: - steps.append(step[0]) - last_step = step + count = count-1 + if count == 0: + return i + return -1 - if last_step[1][-1] == opt.end: - steps.pop() - return mode, steps +def find_nonempty_line(lines, start, end = 0): + if end == 0: + end = len(lines) + for i in xrange(start, end): + if is_nonempty_line(lines[i]): + return i + return -1