X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=lib%2Flyx2lyx%2Fparser_tools.py;h=3b1322974e6878e058be8533b7ca8c73c60cca63;hb=9da74fe2078e24e1e7891784ecbfe33ff77e7f85;hp=8db73c615a62af449561467fb127b841454e15b4;hpb=e0227fa3968d0e55f145fbb6937476c2f9f66bd4;p=lyx.git diff --git a/lib/lyx2lyx/parser_tools.py b/lib/lyx2lyx/parser_tools.py index 8db73c615a..3b1322974e 100644 --- a/lib/lyx2lyx/parser_tools.py +++ b/lib/lyx2lyx/parser_tools.py @@ -1,6 +1,6 @@ # This file is part of lyx2lyx # -*- coding: utf-8 -*- -# Copyright (C) 2002-2010 Dekel Tsur , +# Copyright (C) 2002-2011 Dekel Tsur , # José Matos , Richard Heck # # This program is free software; you can redistribute it and/or @@ -15,9 +15,144 @@ # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - -" This modules offer several free functions to help parse lines. " +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + + +''' +This modules offer several free functions to help parse lines. +More documentaton is below, but here is a quick guide to what +they do. Optional arguments are marked by brackets. + +find_token(lines, token, start[, end[, ignorews]]): + Returns the first line i, start <= i < end, on which + token is found at the beginning. Returns -1 if not + found. + If ignorews is (given and) True, then differences + in whitespace do not count, except that there must be no + extra whitespace following token itself. + +find_token_exact(lines, token, start[, end]): + As find_token, but with ignorews True. + +find_tokens(lines, tokens, start[, end[, ignorews]]): + Returns the first line i, start <= i < end, on which + oen of the tokens in tokens is found at the beginning. + Returns -1 if not found. + If ignorews is (given and) True, then differences + in whitespace do not count, except that there must be no + extra whitespace following token itself. + +find_tokens_exact(lines, token, start[, end]): + As find_tokens, but with ignorews True. + +find_token_backwards(lines, token, start): +find_tokens_backwards(lines, tokens, start): + As before, but look backwards. + +find_re(lines, rexp, start[, end]): + As find_token, but rexp is a regular expression object, + so it has to be passed as e.g.: re.compile(r'...'). + +get_value(lines, token, start[, end[, default]): + Similar to find_token, but it returns what follows the + token on the found line. Example: + get_value(document.header, "\use_xetex", 0) + will find a line like: + \use_xetex true + and, in that case, return "true". (Note that whitespace + is stripped.) The final argument, default, defaults to "", + and is what is returned if we do not find anything. So you + can use that to set a default. + +get_quoted_value(lines, token, start[, end[, default]): + Similar to get_value, but it will strip quotes off the + value, if they are present. So use this one for cases + where the value is normally quoted. + +get_option_value(line, option): + This assumes we have a line with something like: + option="value" + and returns value. Returns "" if not found. + +del_token(lines, token, start[, end]): + Like find_token, but deletes the line if it finds one. + Returns True if a line got deleted, otherwise False. + +find_beginning_of(lines, i, start_token, end_token): + Here, start_token and end_token are meant to be a matching + pair, like "\begin_layout" and "\end_layout". We look for + the start_token that pairs with the end_token that occurs + on or after line i. Returns -1 if not found. + So, in the layout case, this would find the \begin_layout + for the layout line i is in. + Example: + ec = find_token(document.body, " int +def find_token(lines, token, start, end = 0, ignorews = False): + """ find_token(lines, token, start[[, end], ignorews]) -> int Return the lowest line where token is found, and is the first element, in lines[start, end]. + + If ignorews is True (default is False), then differences in + whitespace are ignored, except that there must be no extra + whitespace following token itself. Return -1 on failure.""" @@ -50,7 +189,7 @@ def find_token(lines, token, start, end = 0, exact = False): end = len(lines) m = len(token) for i in xrange(start, end): - if exact: + if ignorews: x = lines[i].split() y = token.split() if len(x) < len(y): @@ -67,19 +206,19 @@ def find_token_exact(lines, token, start, end = 0): return find_token(lines, token, start, end, True) -def find_tokens(lines, tokens, start, end = 0, exact = False): - """ find_tokens(lines, tokens, start[[, end], exact]) -> int +def find_tokens(lines, tokens, start, end = 0, ignorews = False): + """ find_tokens(lines, tokens, start[[, end], ignorews]) -> int Return the lowest line where one token in tokens is found, and is the first element, in lines[start, end]. Return -1 on failure.""" - if end == 0: + if end == 0 or end > len(lines): end = len(lines) for i in xrange(start, end): for token in tokens: - if exact: + if ignorews: x = lines[i].split() y = token.split() if len(x) < len(y): @@ -104,7 +243,7 @@ def find_re(lines, rexp, start, end = 0): Return -1 on failure.""" - if end == 0: + if end == 0 or end > len(lines): end = len(lines) for i in xrange(start, end): if rexp.match(lines[i]): @@ -143,55 +282,70 @@ def find_tokens_backwards(lines, tokens, start): def get_value(lines, token, start, end = 0, default = ""): - """ get_value(lines, token, start[[, end], default]) -> list of strings - - Return tokens after token for the first line, in lines, where - token is the first element.""" - - i = find_token_exact(lines, token, start, end) - if i == -1: - return default - if len(lines[i].split()) > 1: - return lines[i].split()[1] - else: - return default - - -def get_value_string(lines, token, start, end = 0, trim = False, default = ""): - """ get_value_string(lines, token, start[[, end], trim, default]) -> string + """ get_value(lines, token, start[[, end], default]) -> string - Return tokens after token as string, in lines, where - token is the first element. When trim is used, the first and last character - of the string is trimmed.""" + Find the next line that looks like: + token followed by other stuff + Returns "followed by other stuff" with leading and trailing + whitespace removed. + """ i = find_token_exact(lines, token, start, end) if i == -1: return default - if len(lines[i].split()) > 1: - for k in range (0, len(lines[i])): - if lines[i][k] == ' ': - if trim ==False: - return lines[i][k+1:len(lines[i])] - else: - return lines[i][k+2:len(lines[i])-1] - else: - return default - - -def del_token(lines, token, start, end): + l = lines[i].split(None, 1) + if len(l) > 1: + return l[1].strip() + return default + + +def get_quoted_value(lines, token, start, end = 0, default = ""): + """ get_quoted_value(lines, token, start[[, end], default]) -> string + + Find the next line that looks like: + token "followed by other stuff" + Returns "followed by other stuff" with leading and trailing + whitespace and quotes removed. If there are no quotes, that is OK too. + So use get_value to preserve possible quotes, this one to remove them, + if they are there. + Note that we will NOT strip quotes from default! + """ + val = get_value(lines, token, start, end, "") + if not val: + return default + return val.strip('"') + + +def get_option_value(line, option): + rx = option + '\s*=\s*"([^"]+)"' + rx = re.compile(rx) + m = rx.search(line) + if not m: + return "" + return m.group(1) + + +def set_option_value(line, option, value): + rx = '(' + option + '\s*=\s*")[^"]+"' + rx = re.compile(rx) + m = rx.search(line) + if not m: + return line + return re.sub(rx, '\g<1>' + value + '"', line) + + +def del_token(lines, token, start, end = 0): """ del_token(lines, token, start, end) -> int - Find the lower line in lines where token is the first element and - delete that line. - - Returns the number of lines remaining.""" + Find the first line in lines where token is the first element + and delete that line. Returns True if we deleted a line, False + if we did not.""" k = find_token_exact(lines, token, start, end) if k == -1: - return end - else: - del lines[k] - return end - 1 + return False + del lines[k] + return True def find_beginning_of(lines, i, start_token, end_token): @@ -244,10 +398,20 @@ def find_end_of_layout(lines, i): return find_end_of(lines, i, "\\begin_layout", "\\end_layout") -# checks if line i is in the inset e.g., "\\begin_inset CommandInset ref" -# if so, returns starting and ending lines -# otherwise, returns (-1, -1) -def get_containing_inset(lines, i, inset): +def is_in_inset(lines, i, inset): + ''' + Checks if line i is in an inset of the given type. + If so, returns starting and ending lines. + Otherwise, returns False. + Example: + is_in_inset(document.body, i, "\\begin_inset Tabular") + returns False unless i is within a table. If it is, then + it returns the line on which the table begins and the one + on which it ends. Note that this pair will evaulate to + boolean True, so + if is_in_inset(...): + will do what you expect. + ''' defval = (-1, -1) stins = find_token_backwards(lines, inset, i) if stins == -1: @@ -257,3 +421,109 @@ def get_containing_inset(lines, i, inset): if endins < i: return defval return (stins, endins) + + +def get_containing_inset(lines, i): + ''' + Finds out what kind of inset line i is within. Returns a + list containing (i) what follows \begin_inset on the line + on which the inset begins, plus the starting and ending line. + Returns False on any kind of error or if it isn't in an inset. + ''' + j = i + while True: + stins = find_token_backwards(lines, "\\begin_inset", j) + if stins == -1: + return False + endins = find_end_of_inset(lines, stins) + if endins > j: + break + j = stins - 1 + + inset = get_value(lines, "\\begin_inset", stins) + if inset == "": + # shouldn't happen + return False + return (inset, stins, endins) + + +def get_containing_layout(lines, i): + ''' + Finds out what kind of layout line i is within. Returns a + list containing (i) what follows \begin_layout on the line + on which the layout begins, plus the starting and ending line + and the start of the apargraph (after all params). + Returns False on any kind of error. + ''' + j = i + while True: + stlay = find_token_backwards(lines, "\\begin_layout", j) + if stlay == -1: + return False + endlay = find_end_of_layout(lines, stlay) + if endlay > i: + break + j = stlay - 1 + + lay = get_value(lines, "\\begin_layout", stlay) + if lay == "": + # shouldn't happen + return False + par_params = ["\\noindent", "\\indent", "\\indent-toggle", "\\leftindent", + "\\start_of_appendix", "\\paragraph_spacing single", + "\\paragraph_spacing onehalf", "\\paragraph_spacing double", + "\\paragraph_spacing other", "\\align", "\\labelwidthstring"] + stpar = stlay + while True: + stpar += 1 + if lines[stpar] not in par_params: + break + return (lay, stlay, endlay, stpar) + + +def count_pars_in_inset(lines, i): + ''' + Counts the paragraphs within this inset + ''' + ins = get_containing_inset(lines, i) + if ins == -1: + return -1 + pars = 0 + for j in range(ins[1], ins[2]): + m = re.match(r'\\begin_layout (.*)', lines[j]) + if m and get_containing_inset(lines, j)[0] == ins[0]: + pars += 1 + + return pars + + +def find_end_of_sequence(lines, i): + ''' + Returns the end of a sequence of identical layouts. + ''' + lay = get_containing_layout(lines, i) + if lay == False: + return -1 + layout = lay[0] + endlay = lay[2] + i = endlay + while True: + m = re.match(r'\\begin_layout (.*)', lines[i]) + if m and m.group(1) != layout: + return endlay + elif lines[i] == "\\begin_deeper": + j = find_end_of(lines, i, "\\begin_deeper", "\\end_deeper") + if j != -1: + i = j + endlay = j + continue + if m and m.group(1) == layout: + endlay = find_end_of_layout(lines, i) + i = endlay + continue + if i == len(lines) - 1: + break + i = i + 1 + + return endlay +