# This file is part of lyx2lyx
# -*- coding: utf-8 -*-
# Copyright (C) 2002-2011 Dekel Tsur <dekel@lyx.org>,
-# José Matos <jamatos@lyx.org>, Richard Heck <rgheck@comcast.net>
+# José Matos <jamatos@lyx.org>, Richard Kimberly Heck <rikiheck@lyx.org>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
More documentaton is below, but here is a quick guide to what
they do. Optional arguments are marked by brackets.
-find_token(lines, token, start[, end[, ignorews]]):
+find_token(lines, token[, start[, end[, ignorews]]]):
Returns the first line i, start <= i < end, on which
token is found at the beginning. Returns -1 if not
found.
in whitespace do not count, except that there must be no
extra whitespace following token itself.
-find_token_exact(lines, token, start[, end]):
+find_token_exact(lines, token[, start[, end]]]):
As find_token, but with ignorews set to True.
-find_tokens(lines, tokens, start[, end[, ignorews]]):
+find_tokens(lines, tokens[, start[, end[, ignorews]]]):
Returns the first line i, start <= i < end, on which
one of the tokens in tokens is found at the beginning.
Returns -1 if not found.
in whitespace do not count, except that there must be no
extra whitespace following token itself.
-find_tokens_exact(lines, token, start[, end]):
+find_tokens_exact(lines, token[, start[, end]]):
As find_tokens, but with ignorews True.
find_token_backwards(lines, token, start):
find_tokens_backwards(lines, tokens, start):
As before, but look backwards.
+find_substring(lines, sub[, start[, end]]) -> int
+ As find_token, but sub may be anywhere in the line.
+
find_re(lines, rexp, start[, end]):
As find_token, but rexp is a regular expression object,
so it has to be passed as e.g.: re.compile(r'...').
-get_value(lines, token, start[, end[, default]):
+get_value(lines, token[, start[, end[, default[, delete]]]]):
Similar to find_token, but it returns what follows the
token on the found line. Example:
get_value(document.header, "\\use_xetex", 0)
is stripped.) The final argument, default, defaults to "",
and is what is returned if we do not find anything. So you
can use that to set a default.
+ If delete is True, then delete the line if found.
-get_quoted_value(lines, token, start[, end[, default]]):
+get_quoted_value(lines, token[, start[, end[, default[, delete]]]]):
Similar to get_value, but it will strip quotes off the
value, if they are present. So use this one for cases
where the value is normally quoted.
option="value"
and returns value. Returns "" if not found.
-get_bool_value(lines, token, start[, end[, default]]):
+get_bool_value(lines, token[, start[, end[, default, delete]]]]):
Like get_value, but returns a boolean.
-del_token(lines, token, start[, end]):
+set_bool_value(lines, token, value[, start[, end]]):
+ Find `token` in `lines[start:end]` and set to boolean value bool(`value`).
+ Return old value. Raise ValueError if token is not in lines.
+
+del_token(lines, token[, start[, end]]):
Like find_token, but deletes the line if it finds one.
Returns True if a line got deleted, otherwise False.
+ Use get_* with the optional argument "delete=True", if you want to
+ get and delete a token.
+
find_beginning_of(lines, i, start_token, end_token):
Here, start_token and end_token are meant to be a matching
pair, like "\\begin_layout" and "\\end_layout". We look for
the position of the last \end_deeper is returned, else
the position of the last \end_layout.
-is_in_inset(lines, i, inset):
- Checks if line i is in an inset of the given type.
+is_in_inset(lines, i, inset, default=(-1,-1)):
+ Check if line i is in an inset of the given type.
If so, returns starting and ending lines. Otherwise,
- returns False.
+ return default.
Example:
is_in_inset(document.body, i, "\\begin_inset Tabular")
- returns False unless i is within a table. If it is, then
+ returns (-1,-1) unless i is within a table. If it is, then
it returns the line on which the table begins and the one
on which it ends. Note that this pair will evaulate to
boolean True, so
- if is_in_inset(...):
+ if is_in_inset(..., default=False):
will do what you expect.
get_containing_inset(lines, i):
whitespace are ignored, but there must be whitespace following
token itself.
+ Use find_substring(lines, sub) to find a substring anywhere in `lines`.
+
Return -1 on failure."""
if end == 0 or end > len(lines):
the first element, in lines[start, end].
Return -1 on failure."""
+
if end == 0 or end > len(lines):
end = len(lines)
return find_tokens(lines, tokens, start, end, True)
-def find_re(lines, rexp, start=0, end=0):
- """ find_re(lines, rexp, start[, end]) -> int
+def find_substring(lines, sub, start=0, end=0):
+ """ find_substring(lines, sub[, start[, end]]) -> int
- Return the lowest line where rexp, a regular expression, is found
- in lines[start, end].
+ Return the lowest line number `i` in [start, end] where
+ `sub` is a substring of line[i].
Return -1 on failure."""
+ if end == 0 or end > len(lines):
+ end = len(lines)
+ for i in range(start, end):
+ if sub in lines[i]:
+ return i
+ return -1
+
+
+def find_re(lines, rexp, start=0, end=0):
+ """ find_re(lines, rexp[, start[, end]]) -> int
+
+ Return the lowest line number `i` in [start, end] where the regular
+ expression object `rexp` matches at the beginning of line[i].
+ Return -1 on failure.
+
+ Start your pattern with the wildcard ".*" to find a match anywhere in a
+ line. Use find_substring() to find a substring anywhere in the lines.
+ """
if end == 0 or end > len(lines):
end = len(lines)
for i in range(start, end):
return -1
-def get_value(lines, token, start=0, end=0, default=""):
- """ get_value(lines, token, start[[, end], default]) -> string
+def get_value(lines, token, start=0, end=0, default="", delete=False):
+ """Find `token` in `lines` and return part of line that follows it.
Find the next line that looks like:
token followed by other stuff
- Returns "followed by other stuff" with leading and trailing
+
+ If `delete` is True, delete the line (if found).
+
+ Return "followed by other stuff" with leading and trailing
whitespace removed.
"""
i = find_token_exact(lines, token, start, end)
# return lines.pop(i)[len(token):].strip() # or default
# see test_parser_tools.py
l = lines[i].split(None, 1)
+ if delete:
+ del(lines[i])
if len(l) > 1:
return l[1].strip()
return default
-def get_quoted_value(lines, token, start=0, end=0, default=""):
+def get_quoted_value(lines, token, start=0, end=0, default="", delete=False):
""" get_quoted_value(lines, token, start[[, end], default]) -> string
Find the next line that looks like:
if they are there.
Note that we will NOT strip quotes from default!
"""
- val = get_value(lines, token, start, end, "")
+ val = get_value(lines, token, start, end, "", delete)
if not val:
return default
return val.strip('"')
-def get_bool_value(lines, token, start=0, end=0, default=None):
+bool_values = {"true": True, "1": True,
+ "false": False, "0": False}
+
+def get_bool_value(lines, token, start=0, end=0, default=None, delete=False):
""" get_bool_value(lines, token, start[[, end], default]) -> string
Find the next line that looks like:
- token bool_value
+ `token` <bool_value>
- Returns True if bool_value is 1 or true and
- False if bool_value is 0 or false
+ Return True if <bool_value> is 1 or "true", False if <bool_value>
+ is 0 or "false", else `default`.
"""
+ val = get_quoted_value(lines, token, start, end, default, delete)
+ return bool_values.get(val, default)
- val = get_quoted_value(lines, token, start, end, "")
- if val == "1" or val == "true":
- return True
- if val == "0" or val == "false":
- return False
- return default
+def set_bool_value(lines, token, value, start=0, end=0):
+ """Find `token` in `lines` and set to bool(`value`).
+
+ Return previous value. Raise `ValueError` if `token` is not in lines.
+
+ Cf. find_token(), get_bool_value().
+ """
+ i = find_token(lines, token, start, end)
+ if i == -1:
+ raise ValueError
+ oldvalue = get_bool_value(lines, token, i, i+1)
+ if oldvalue is value:
+ return oldvalue
+ # set to new value
+ if get_quoted_value(lines, token, i, i+1) in ('0', '1'):
+ lines[i] = "%s %d" % (token, value)
+ else:
+ lines[i] = "%s %s" % (token, str(value).lower())
+
+ return oldvalue
def get_option_value(line, option):
return find_end_of(lines, i, "\\begin_layout", "\\end_layout")
-def is_in_inset(lines, i, inset):
- '''
- Checks if line i is in an inset of the given type.
- If so, returns starting and ending lines.
- Otherwise, returns False.
+def is_in_inset(lines, i, inset, default=(-1,-1)):
+ """
+ Check if line i is in an inset of the given type.
+ If so, return starting and ending lines, otherwise `default`.
Example:
is_in_inset(document.body, i, "\\begin_inset Tabular")
- returns False unless i is within a table. If it is, then
- it returns the line on which the table begins and the one
- on which it ends. Note that this pair will evaulate to
- boolean True, so
- if is_in_inset(...):
+ returns (-1,-1) if `i` is not within a "Tabular" inset (i.e. a table).
+ If it is, then it returns the line on which the table begins and the one
+ on which it ends.
+ Note that this pair will evaulate to boolean True, so (with the optional
+ default value set to False)
+ if is_in_inset(..., default=False):
will do what you expect.
- '''
- defval = (-1, -1)
- stins = find_token_backwards(lines, inset, i)
- if stins == -1:
- return defval
- endins = find_end_of_inset(lines, stins)
- # note that this includes the notfound case.
- if endins < i:
- return defval
- return (stins, endins)
+ """
+ start = find_token_backwards(lines, inset, i)
+ if start == -1:
+ return default
+ end = find_end_of_inset(lines, start)
+ if end < i: # this includes the notfound case.
+ return default
+ return (start, end)
def get_containing_inset(lines, i):
def get_containing_layout(lines, i):
'''
- Finds out what kind of layout line i is within. Returns a
- list containing what follows \begin_layout on the line
- on which the layout begins, plus the starting and ending line
- and the start of the paragraph (after all params). I.e, returns:
+ Find out what kind of layout line `i` is within.
+ Return a tuple
(layoutname, layoutstart, layoutend, startofcontent)
- Returns False on any kind of error.
+ containing
+ * layout style/name,
+ * start line number,
+ * end line number, and
+ * number of first paragraph line (after all params).
+ Return `False` on any kind of error.
'''
j = i
while True:
if endlay < i:
return False
- lay = get_value(lines, "\\begin_layout", stlay)
- if lay == "":
- # shouldn't happen
- return False
+ layoutname = get_value(lines, "\\begin_layout", stlay)
+ if layoutname == "": # layout style missing
+ # TODO: What shall we do in this case?
+ pass
+ # layoutname == "Standard" # use same fallback as the LyX parser:
+ # raise ValueError("Missing layout name on line %d"%stlay) # diagnosis
+ # return False # generic error response
par_params = ["\\noindent", "\\indent", "\\indent-toggle", "\\leftindent",
"\\start_of_appendix", "\\paragraph_spacing", "\\align",
"\\labelwidthstring"]
stpar += 1
if lines[stpar].split(' ', 1)[0] not in par_params:
break
- return (lay, stlay, endlay, stpar)
+ return (layoutname, stlay, endlay, stpar)
def count_pars_in_inset(lines, i):
pars = 0
for j in range(ins[1], ins[2]):
m = re.match(r'\\begin_layout (.*)', lines[j])
- if m and get_containing_inset(lines, j)[0] == ins[0]:
+ if m and get_containing_inset(lines, j)[1] == ins[1]:
pars += 1
return pars