1 # This file is part of lyx2lyx
2 # -*- coding: utf-8 -*-
3 # Copyright (C) 2002-2010 Dekel Tsur <dekel@lyx.org>,
4 # José Matos <jamatos@lyx.org>, Richard Heck <rgheck@comcast.net>
6 # This program is free software; you can redistribute it and/or
7 # modify it under the terms of the GNU General Public License
8 # as published by the Free Software Foundation; either version 2
9 # of the License, or (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
22 This modules offer several free functions to help parse lines.
23 More documentaton is below, but here is a quick guide to what
24 they do. Optional arguments are marked by brackets.
26 find_token(lines, token, start[, end[, exact]]):
27 Returns the first line i, start <= i < end, on which
28 token is found at the beginning. Returns -1 if not
29 found. If exact is (given and) True, then differences
30 in whitespace do not count.
32 find_token_exact(lines, token, start[, end]):
33 Badly named. As find_token, but with ignorews True.
35 find_tokens(lines, tokens, start[, end[, ignorews]]):
36 Returns the first line i, start <= i < end, on which
37 oen of the tokens in tokens is found at the beginning.
38 Returns -1 if not found. If ignorews is (given and) True,
39 then differences in whitespace do not count.
41 find_tokens_exact(lines, token, start[, end]):
42 Badly named. As find_tokens, but with ignorews True.
44 find_token_backwards(lines, token, start):
45 find_tokens_backwards(lines, tokens, start):
46 As before, but look backwards.
48 find_re(lines, rexp, start[, end]):
49 As find_token, but rexp is a regular expression object,
50 so it has to be passed as e.g.: re.compile(r'...').
52 get_value(lines, token, start[, end[, default]):
53 Similar to find_token, but it returns what follows the
54 token on the found line. Example:
55 get_value(document.header, "\use_xetex", 0)
56 will find a line like:
58 and, in that case, return "true". (Note that whitespace
59 is stripped.) The final argument, default, defaults to "",
60 and is what is returned if we do not find anything. So you
61 can use that to set a default.
63 get_quoted_value(lines, token, start[, end[, default]):
64 Similar to get_value, but it will strip quotes off the
65 value, if they are present. So use this one for cases
66 where the value is normally quoted.
70 # Utilities for one line
71 def check_token(line, token):
72 """ check_token(line, token) -> bool
74 Return True if token is present in line and is the first element
75 else returns False."""
77 return line[:len(token)] == token
80 def is_nonempty_line(line):
81 """ is_nonempty_line(line) -> bool
83 Return False if line is either empty or it has only whitespaces,
85 return line != " "*len(line)
88 # Utilities for a list of lines
89 def find_token(lines, token, start, end = 0, exact = False):
90 """ find_token(lines, token, start[[, end], exact]) -> int
92 Return the lowest line where token is found, and is the first
93 element, in lines[start, end].
95 If exact is True (default is False), then differences in
96 whitespace are ignored.
98 Return -1 on failure."""
100 if end == 0 or end > len(lines):
103 for i in xrange(start, end):
112 if lines[i][:m] == token:
117 def find_token_exact(lines, token, start, end = 0):
118 return find_token(lines, token, start, end, True)
121 def find_tokens(lines, tokens, start, end = 0, exact = False):
122 """ find_tokens(lines, tokens, start[[, end], exact]) -> int
124 Return the lowest line where one token in tokens is found, and is
125 the first element, in lines[start, end].
127 Return -1 on failure."""
131 for i in xrange(start, end):
141 if lines[i][:len(token)] == token:
146 def find_tokens_exact(lines, tokens, start, end = 0):
147 return find_tokens(lines, tokens, start, end, True)
150 def find_re(lines, rexp, start, end = 0):
151 """ find_token_re(lines, rexp, start[, end]) -> int
153 Return the lowest line where rexp, a regular expression, is found
154 in lines[start, end].
156 Return -1 on failure."""
160 for i in xrange(start, end):
161 if rexp.match(lines[i]):
166 def find_token_backwards(lines, token, start):
167 """ find_token_backwards(lines, token, start) -> int
169 Return the highest line where token is found, and is the first
170 element, in lines[start, end].
172 Return -1 on failure."""
174 for i in xrange(start, -1, -1):
176 if line[:m] == token:
181 def find_tokens_backwards(lines, tokens, start):
182 """ find_tokens_backwards(lines, token, start) -> int
184 Return the highest line where token is found, and is the first
185 element, in lines[end, start].
187 Return -1 on failure."""
188 for i in xrange(start, -1, -1):
191 if line[:len(token)] == token:
196 def get_value(lines, token, start, end = 0, default = ""):
197 """ get_value(lines, token, start[[, end], default]) -> string
199 Find the next line that looks like:
200 token followed by other stuff
201 Returns "followed by other stuff" with leading and trailing
205 i = find_token_exact(lines, token, start, end)
208 l = lines[i].split(None, 1)
214 def get_quoted_value(lines, token, start, end = 0, default = ""):
215 """ get_quoted_value(lines, token, start[[, end], default]) -> string
217 Find the next line that looks like:
218 token "followed by other stuff"
219 Returns "followed by other stuff" with leading and trailing
220 whitespace and quotes removed. If there are no quotes, that is OK too.
221 So use get_value to preserve possible quotes, this one to remove them,
223 Note that we will NOT strip quotes from default!
225 val = get_value(lines, token, start, end, "")
228 return val.strip('"')
231 def del_token(lines, token, start, end):
232 """ del_token(lines, token, start, end) -> int
234 Find the lower line in lines where token is the first element and
237 Returns the number of lines remaining."""
239 k = find_token_exact(lines, token, start, end)
247 def find_beginning_of(lines, i, start_token, end_token):
250 i = find_tokens_backwards(lines, [start_token, end_token], i-1)
253 if check_token(lines[i], end_token):
262 def find_end_of(lines, i, start_token, end_token):
266 i = find_tokens(lines, [end_token, start_token], i+1)
269 if check_token(lines[i], start_token):
278 def find_nonempty_line(lines, start, end = 0):
281 for i in xrange(start, end):
282 if is_nonempty_line(lines[i]):
287 def find_end_of_inset(lines, i):
288 " Find end of inset, where lines[i] is included."
289 return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
292 def find_end_of_layout(lines, i):
293 " Find end of layout, where lines[i] is included."
294 return find_end_of(lines, i, "\\begin_layout", "\\end_layout")
297 def is_in_inset(lines, i, inset):
299 Checks if line i is in an inset of the given type.
300 If so, returns starting and ending lines.
301 Otherwise, returns False.
303 is_in_inset(document.body, i, "\\begin_inset Tabular")
304 returns False unless i is within a table. If it is, then
305 it returns the line on which the table begins and the one
306 on which it ends. Note that this pair will evaulate to
309 will do what you expect.
312 stins = find_token_backwards(lines, inset, i)
315 endins = find_end_of_inset(lines, stins)
316 # note that this includes the notfound case.
319 return (stins, endins)
322 def get_containing_inset(lines, i):
324 Finds out what kind of inset line i is within. Returns a
325 list containing (i) what follows \begin_inset on the the line
326 on which the inset begins, plus the starting and ending line.
327 Returns False on any kind of error or if it isn't in an inset.
329 stins = find_token_backwards(lines, i, "\\begin_inset")
332 endins = find_end_of_inset(lines, stins)
335 inset = get_value(lines, "\\begin_inset", stins)
339 return (inset, stins, endins)
342 def get_containing_layout(lines, i):
344 Finds out what kind of layout line i is within. Returns a
345 list containing (i) what follows \begin_layout on the the line
346 on which the layout begins, plus the starting and ending line.
347 Returns False on any kind of error.
349 stins = find_token_backwards(lines, i, "\\begin_layout")
352 endins = find_end_of_layout(lines, stins)
355 lay = get_value(lines, "\\begin_layout", stins)
359 return (lay, stins, endins)