X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=lib%2Flyx2lyx%2Fparser_tools.py;h=175db00ac3b21de3bc335204a435261f57aec90a;hb=69ed8cb89abd291b19ca2a3423d1f368a0d67f71;hp=7818ac5dd7115f9361dfaa2d3991608c1ad0efeb;hpb=8da6cdcf23d042cf431ae3cab3b2826a7b9f00c4;p=lyx.git diff --git a/lib/lyx2lyx/parser_tools.py b/lib/lyx2lyx/parser_tools.py index 7818ac5dd7..175db00ac3 100644 --- a/lib/lyx2lyx/parser_tools.py +++ b/lib/lyx2lyx/parser_tools.py @@ -23,7 +23,7 @@ This module offers several free functions to help parse lines. More documentaton is below, but here is a quick guide to what they do. Optional arguments are marked by brackets. -find_token(lines, token, start[, end[, ignorews]]): +find_token(lines, token[, start[, end[, ignorews]]]): Returns the first line i, start <= i < end, on which token is found at the beginning. Returns -1 if not found. @@ -31,10 +31,10 @@ find_token(lines, token, start[, end[, ignorews]]): in whitespace do not count, except that there must be no extra whitespace following token itself. -find_token_exact(lines, token, start[, end]): +find_token_exact(lines, token[, start[, end]]]): As find_token, but with ignorews set to True. -find_tokens(lines, tokens, start[, end[, ignorews]]): +find_tokens(lines, tokens[, start[, end[, ignorews]]]): Returns the first line i, start <= i < end, on which one of the tokens in tokens is found at the beginning. Returns -1 if not found. @@ -42,18 +42,21 @@ find_tokens(lines, tokens, start[, end[, ignorews]]): in whitespace do not count, except that there must be no extra whitespace following token itself. -find_tokens_exact(lines, token, start[, end]): +find_tokens_exact(lines, token[, start[, end]]): As find_tokens, but with ignorews True. find_token_backwards(lines, token, start): find_tokens_backwards(lines, tokens, start): As before, but look backwards. +find_substring(lines, sub[, start[, end]]) -> int + As find_token, but sub may be anywhere in the line. + find_re(lines, rexp, start[, end]): As find_token, but rexp is a regular expression object, so it has to be passed as e.g.: re.compile(r'...'). -get_value(lines, token, start[, end[, default]): +get_value(lines, token[, start[, end[, default[, delete]]]]): Similar to find_token, but it returns what follows the token on the found line. Example: get_value(document.header, "\\use_xetex", 0) @@ -64,7 +67,7 @@ get_value(lines, token, start[, end[, default]): and is what is returned if we do not find anything. So you can use that to set a default. -get_quoted_value(lines, token, start[, end[, default]]): +get_quoted_value(lines, token[, start[, end[, default[, delete]]]]): Similar to get_value, but it will strip quotes off the value, if they are present. So use this one for cases where the value is normally quoted. @@ -74,13 +77,20 @@ get_option_value(line, option): option="value" and returns value. Returns "" if not found. -get_bool_value(lines, token, start[, end[, default]]): +get_bool_value(lines, token[, start[, end[, default, delete]]]]): Like get_value, but returns a boolean. -del_token(lines, token, start[, end]): +set_bool_value(lines, token, value[, start[, end]]): + Find `token` in `lines[start:end]` and set to boolean value bool(`value`). + Return old value. Raise ValueError if token is not in lines. + +del_token(lines, token[, start[, end]]): Like find_token, but deletes the line if it finds one. Returns True if a line got deleted, otherwise False. + Use get_* with the optional argument "delete=True", if you want to + get and delete a token. + find_beginning_of(lines, i, start_token, end_token): Here, start_token and end_token are meant to be a matching pair, like "\\begin_layout" and "\\end_layout". We look for @@ -113,17 +123,17 @@ find_end_of_sequence(lines, i): the position of the last \end_deeper is returned, else the position of the last \end_layout. -is_in_inset(lines, i, inset): - Checks if line i is in an inset of the given type. +is_in_inset(lines, i, inset, default=(-1,-1)): + Check if line i is in an inset of the given type. If so, returns starting and ending lines. Otherwise, - returns False. + return default. Example: is_in_inset(document.body, i, "\\begin_inset Tabular") - returns False unless i is within a table. If it is, then + returns (-1,-1) unless i is within a table. If it is, then it returns the line on which the table begins and the one on which it ends. Note that this pair will evaulate to boolean True, so - if is_in_inset(...): + if is_in_inset(..., default=False): will do what you expect. get_containing_inset(lines, i): @@ -187,6 +197,8 @@ def find_token(lines, token, start=0, end=0, ignorews=False): whitespace are ignored, but there must be whitespace following token itself. + Use find_substring(lines, sub) to find a substring anywhere in `lines`. + Return -1 on failure.""" if end == 0 or end > len(lines): @@ -217,6 +229,7 @@ def find_tokens(lines, tokens, start=0, end=0, ignorews=False): the first element, in lines[start, end]. Return -1 on failure.""" + if end == 0 or end > len(lines): end = len(lines) @@ -239,14 +252,32 @@ def find_tokens_exact(lines, tokens, start=0, end=0): return find_tokens(lines, tokens, start, end, True) -def find_re(lines, rexp, start=0, end=0): - """ find_re(lines, rexp, start[, end]) -> int +def find_substring(lines, sub, start=0, end=0): + """ find_substring(lines, sub[, start[, end]]) -> int - Return the lowest line where rexp, a regular expression, is found - in lines[start, end]. + Return the lowest line number `i` in [start, end] where + `sub` is a substring of line[i]. Return -1 on failure.""" + if end == 0 or end > len(lines): + end = len(lines) + for i in range(start, end): + if sub in lines[i]: + return i + return -1 + + +def find_re(lines, rexp, start=0, end=0): + """ find_re(lines, rexp[, start[, end]]) -> int + + Return the lowest line number `i` in [start, end] where the regular + expression object `rexp` matches at the beginning of line[i]. + Return -1 on failure. + + Start your pattern with the wildcard ".*" to find a match anywhere in a + line. Use find_substring() to find a substring anywhere in the lines. + """ if end == 0 or end > len(lines): end = len(lines) for i in range(start, end): @@ -357,12 +388,15 @@ def find_across_lines(lines, sub, start=0, end=0): return -1 -def get_value(lines, token, start=0, end=0, default=""): - """ get_value(lines, token, start[[, end], default]) -> string +def get_value(lines, token, start=0, end=0, default="", delete=False): + """Find `token` in `lines` and return part of line that follows it. Find the next line that looks like: token followed by other stuff - Returns "followed by other stuff" with leading and trailing + + If `delete` is True, delete the line (if found). + + Return "followed by other stuff" with leading and trailing whitespace removed. """ i = find_token_exact(lines, token, start, end) @@ -372,12 +406,14 @@ def get_value(lines, token, start=0, end=0, default=""): # return lines.pop(i)[len(token):].strip() # or default # see test_parser_tools.py l = lines[i].split(None, 1) + if delete: + del(lines[i]) if len(l) > 1: return l[1].strip() return default -def get_quoted_value(lines, token, start=0, end=0, default=""): +def get_quoted_value(lines, token, start=0, end=0, default="", delete=False): """ get_quoted_value(lines, token, start[[, end], default]) -> string Find the next line that looks like: @@ -388,29 +424,48 @@ def get_quoted_value(lines, token, start=0, end=0, default=""): if they are there. Note that we will NOT strip quotes from default! """ - val = get_value(lines, token, start, end, "") + val = get_value(lines, token, start, end, "", delete) if not val: return default return val.strip('"') -def get_bool_value(lines, token, start=0, end=0, default=None): +bool_values = {"true": True, "1": True, + "false": False, "0": False} + +def get_bool_value(lines, token, start=0, end=0, default=None, delete=False): """ get_bool_value(lines, token, start[[, end], default]) -> string Find the next line that looks like: - token bool_value + `token` - Returns True if bool_value is 1 or true and - False if bool_value is 0 or false + Return True if is 1 or "true", False if + is 0 or "false", else `default`. """ + val = get_quoted_value(lines, token, start, end, default, delete) + return bool_values.get(val, default) - val = get_quoted_value(lines, token, start, end, "") - if val == "1" or val == "true": - return True - if val == "0" or val == "false": - return False - return default +def set_bool_value(lines, token, value, start=0, end=0): + """Find `token` in `lines` and set to bool(`value`). + + Return previous value. Raise `ValueError` if `token` is not in lines. + + Cf. find_token(), get_bool_value(). + """ + i = find_token(lines, token, start, end) + if i == -1: + raise ValueError + oldvalue = get_bool_value(lines, token, i, i+1) + if oldvalue is value: + return oldvalue + # set to new value + if get_quoted_value(lines, token, i, i+1) in ('0', '1'): + lines[i] = "%s %d" % (token, value) + else: + lines[i] = "%s %s" % (token, str(value).lower()) + + return oldvalue def get_option_value(line, option): @@ -530,29 +585,27 @@ def find_end_of_layout(lines, i): return find_end_of(lines, i, "\\begin_layout", "\\end_layout") -def is_in_inset(lines, i, inset): - ''' - Checks if line i is in an inset of the given type. - If so, returns starting and ending lines. - Otherwise, returns False. +def is_in_inset(lines, i, inset, default=(-1,-1)): + """ + Check if line i is in an inset of the given type. + If so, return starting and ending lines, otherwise `default`. Example: is_in_inset(document.body, i, "\\begin_inset Tabular") - returns False unless i is within a table. If it is, then - it returns the line on which the table begins and the one - on which it ends. Note that this pair will evaulate to - boolean True, so - if is_in_inset(...): + returns (-1,-1) if `i` is not within a "Tabular" inset (i.e. a table). + If it is, then it returns the line on which the table begins and the one + on which it ends. + Note that this pair will evaulate to boolean True, so (with the optional + default value set to False) + if is_in_inset(..., default=False): will do what you expect. - ''' - defval = (-1, -1) - stins = find_token_backwards(lines, inset, i) - if stins == -1: - return defval - endins = find_end_of_inset(lines, stins) - # note that this includes the notfound case. - if endins < i: - return defval - return (stins, endins) + """ + start = find_token_backwards(lines, inset, i) + if start == -1: + return default + end = find_end_of_inset(lines, start) + if end < i: # this includes the notfound case. + return default + return (start, end) def get_containing_inset(lines, i): @@ -584,12 +637,15 @@ def get_containing_inset(lines, i): def get_containing_layout(lines, i): ''' - Finds out what kind of layout line i is within. Returns a - list containing what follows \begin_layout on the line - on which the layout begins, plus the starting and ending line - and the start of the paragraph (after all params). I.e, returns: + Find out what kind of layout line `i` is within. + Return a tuple (layoutname, layoutstart, layoutend, startofcontent) - Returns False on any kind of error. + containing + * layout style/name, + * start line number, + * end line number, and + * number of first paragraph line (after all params). + Return `False` on any kind of error. ''' j = i while True: @@ -604,10 +660,13 @@ def get_containing_layout(lines, i): if endlay < i: return False - lay = get_value(lines, "\\begin_layout", stlay) - if lay == "": - # shouldn't happen - return False + layoutname = get_value(lines, "\\begin_layout", stlay) + if layoutname == "": # layout style missing + # TODO: What shall we do in this case? + pass + # layoutname == "Standard" # use same fallback as the LyX parser: + # raise ValueError("Missing layout name on line %d"%stlay) # diagnosis + # return False # generic error response par_params = ["\\noindent", "\\indent", "\\indent-toggle", "\\leftindent", "\\start_of_appendix", "\\paragraph_spacing", "\\align", "\\labelwidthstring"] @@ -616,7 +675,7 @@ def get_containing_layout(lines, i): stpar += 1 if lines[stpar].split(' ', 1)[0] not in par_params: break - return (lay, stlay, endlay, stpar) + return (layoutname, stlay, endlay, stpar) def count_pars_in_inset(lines, i):