X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=lib%2Flyx2lyx%2Fparser_tools.py;h=6d64a793ce2e3861b5a922e3930f3bd0351cc005;hb=a84a98b54561044b5902dfce39bc31082877fec1;hp=d0466df703ded682baecbe1b2bfd5c7575404021;hpb=b1c11faf7e29bf24d2315fd04a2515a2fd3c126a;p=lyx.git diff --git a/lib/lyx2lyx/parser_tools.py b/lib/lyx2lyx/parser_tools.py index d0466df703..6d64a793ce 100644 --- a/lib/lyx2lyx/parser_tools.py +++ b/lib/lyx2lyx/parser_tools.py @@ -1,6 +1,6 @@ # This file is part of lyx2lyx # -*- coding: utf-8 -*- -# Copyright (C) 2002-2010 Dekel Tsur , +# Copyright (C) 2002-2011 Dekel Tsur , # José Matos , Richard Heck # # This program is free software; you can redistribute it and/or @@ -15,7 +15,7 @@ # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ''' @@ -23,23 +23,27 @@ This modules offer several free functions to help parse lines. More documentaton is below, but here is a quick guide to what they do. Optional arguments are marked by brackets. -find_token(lines, token, start[, end[, exact]]): +find_token(lines, token, start[, end[, ignorews]]): Returns the first line i, start <= i < end, on which token is found at the beginning. Returns -1 if not - found. If exact is (given and) True, then differences - in whitespace do not count. + found. + If ignorews is (given and) True, then differences + in whitespace do not count, except that there must be no + extra whitespace following token itself. find_token_exact(lines, token, start[, end]): - Badly named. As find_token, but with ignorews True. + As find_token, but with ignorews True. find_tokens(lines, tokens, start[, end[, ignorews]]): Returns the first line i, start <= i < end, on which oen of the tokens in tokens is found at the beginning. - Returns -1 if not found. If ignorews is (given and) True, - then differences in whitespace do not count. + Returns -1 if not found. + If ignorews is (given and) True, then differences + in whitespace do not count, except that there must be no + extra whitespace following token itself. find_tokens_exact(lines, token, start[, end]): - Badly named. As find_tokens, but with ignorews True. + As find_tokens, but with ignorews True. find_token_backwards(lines, token, start): find_tokens_backwards(lines, tokens, start): @@ -65,11 +69,81 @@ get_quoted_value(lines, token, start[, end[, default]): value, if they are present. So use this one for cases where the value is normally quoted. -del_token(lines, token, start, end): +get_option_value(line, option): + This assumes we have a line with something like: + option="value" + and returns value. Returns "" if not found. + +del_token(lines, token, start[, end]): Like find_token, but deletes the line if it finds one. Returns True if a line got deleted, otherwise False. + +find_beginning_of(lines, i, start_token, end_token): + Here, start_token and end_token are meant to be a matching + pair, like "\begin_layout" and "\end_layout". We look for + the start_token that pairs with the end_token that occurs + on or after line i. Returns -1 if not found. + So, in the layout case, this would find the \begin_layout + for the layout line i is in. + Example: + ec = find_token(document.body, " bool @@ -89,14 +163,15 @@ def is_nonempty_line(line): # Utilities for a list of lines -def find_token(lines, token, start, end = 0, exact = False): - """ find_token(lines, token, start[[, end], exact]) -> int +def find_token(lines, token, start, end = 0, ignorews = False): + """ find_token(lines, token, start[[, end], ignorews]) -> int Return the lowest line where token is found, and is the first element, in lines[start, end]. - If exact is True (default is False), then differences in - whitespace are ignored. + If ignorews is True (default is False), then differences in + whitespace are ignored, except that there must be no extra + whitespace following token itself. Return -1 on failure.""" @@ -104,7 +179,7 @@ def find_token(lines, token, start, end = 0, exact = False): end = len(lines) m = len(token) for i in xrange(start, end): - if exact: + if ignorews: x = lines[i].split() y = token.split() if len(x) < len(y): @@ -121,19 +196,19 @@ def find_token_exact(lines, token, start, end = 0): return find_token(lines, token, start, end, True) -def find_tokens(lines, tokens, start, end = 0, exact = False): - """ find_tokens(lines, tokens, start[[, end], exact]) -> int +def find_tokens(lines, tokens, start, end = 0, ignorews = False): + """ find_tokens(lines, tokens, start[[, end], ignorews]) -> int Return the lowest line where one token in tokens is found, and is the first element, in lines[start, end]. Return -1 on failure.""" - if end == 0: + if end == 0 or end > len(lines): end = len(lines) for i in xrange(start, end): for token in tokens: - if exact: + if ignorews: x = lines[i].split() y = token.split() if len(x) < len(y): @@ -158,7 +233,7 @@ def find_re(lines, rexp, start, end = 0): Return -1 on failure.""" - if end == 0: + if end == 0 or end > len(lines): end = len(lines) for i in xrange(start, end): if rexp.match(lines[i]): @@ -231,7 +306,25 @@ def get_quoted_value(lines, token, start, end = 0, default = ""): return val.strip('"') -def del_token(lines, token, start, end): +def get_option_value(line, option): + rx = option + '\s*=\s*"([^"]+)"' + rx = re.compile(rx) + m = rx.search(line) + if not m: + return "" + return m.group(1) + + +def set_option_value(line, option, value): + rx = '(' + option + '\s*=\s*")[^"]+"' + rx = re.compile(rx) + m = rx.search(line) + if not m: + return line + return re.sub(rx, '\g<1>' + value + '"', line) + + +def del_token(lines, token, start, end = 0): """ del_token(lines, token, start, end) -> int Find the first line in lines where token is the first element @@ -327,12 +420,16 @@ def get_containing_inset(lines, i): on which the inset begins, plus the starting and ending line. Returns False on any kind of error or if it isn't in an inset. ''' - stins = find_token_backwards(lines, i, "\\begin_inset") - if stins == -1: - return False - endins = find_end_of_inset(lines, stins) - if endins < i: - return False + j = i + while True: + stins = find_token_backwards(lines, "\\begin_inset", j) + if stins == -1: + return False + endins = find_end_of_inset(lines, stins) + if endins > j: + break + j = stins - 1 + inset = get_value(lines, "\\begin_inset", stins) if inset == "": # shouldn't happen @@ -347,14 +444,18 @@ def get_containing_layout(lines, i): on which the layout begins, plus the starting and ending line. Returns False on any kind of error. ''' - stins = find_token_backwards(lines, i, "\\begin_layout") - if stins == -1: - return False - endins = find_end_of_layout(lines, stins) - if endins < i: - return False - lay = get_value(lines, "\\begin_layout", stins) + j = i + while True: + stlay = find_token_backwards(lines, "\\begin_layout", j) + if stlay == -1: + return False + endlay = find_end_of_layout(lines, stlay) + if endlay > i: + break + j = stlay - 1 + + lay = get_value(lines, "\\begin_layout", stlay) if lay == "": # shouldn't happen return False - return (lay, stins, endins) + return (lay, stlay, endlay)