1 # This file is part of lyx2lyx
2 # -*- coding: utf-8 -*-
3 # Copyright (C) 2002-2010 Dekel Tsur <dekel@lyx.org>,
4 # José Matos <jamatos@lyx.org>, Richard Heck <rgheck@comcast.net>
6 # This program is free software; you can redistribute it and/or
7 # modify it under the terms of the GNU General Public License
8 # as published by the Free Software Foundation; either version 2
9 # of the License, or (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
22 This modules offer several free functions to help parse lines.
23 More documentaton is below, but here is a quick guide to what
24 they do. Optional arguments are marked by brackets.
26 find_token(lines, token, start[, end[, exact]]):
27 Returns the first line i, start <= i < end, on which
28 token is found at the beginning. Returns -1 if not
29 found. If exact is (given and) True, then differences
30 in whitespace do not count.
32 find_token_exact(lines, token, start[, end]):
33 Badly named. As find_token, but with ignorews True.
35 find_tokens(lines, tokens, start[, end[, ignorews]]):
36 Returns the first line i, start <= i < end, on which
37 oen of the tokens in tokens is found at the beginning.
38 Returns -1 if not found. If ignorews is (given and) True,
39 then differences in whitespace do not count.
41 find_tokens_exact(lines, token, start[, end]):
42 Badly named. As find_tokens, but with ignorews True.
44 find_token_backwards(lines, token, start):
45 find_tokens_backwards(lines, tokens, start):
46 As before, but look backwards.
48 find_re(lines, rexp, start[, end]):
49 As find_token, but rexp is a regular expression object,
50 so it has to be passed as e.g.: re.compile(r'...').
52 get_value(lines, token, start[, end[, default]):
53 Similar to find_token, but it returns what follows the
54 token on the found line. Example:
55 get_value(document.header, "\use_xetex", 0)
56 will find a line like:
58 and, in that case, return "true". (Note that whitespace
59 is stripped.) The final argument, default, defaults to "",
60 and is what is returned if we do not find anything. So you
61 can use that to set a default.
63 get_quoted_value(lines, token, start[, end[, default]):
64 Similar to get_value, but it will strip quotes off the
65 value, if they are present. So use this one for cases
66 where the value is normally quoted.
68 del_token(lines, token, start[, end]):
69 Like find_token, but deletes the line if it finds one.
70 Returns True if a line got deleted, otherwise False.
73 # Utilities for one line
74 def check_token(line, token):
75 """ check_token(line, token) -> bool
77 Return True if token is present in line and is the first element
78 else returns False."""
80 return line[:len(token)] == token
83 def is_nonempty_line(line):
84 """ is_nonempty_line(line) -> bool
86 Return False if line is either empty or it has only whitespaces,
88 return line != " "*len(line)
91 # Utilities for a list of lines
92 def find_token(lines, token, start, end = 0, exact = False):
93 """ find_token(lines, token, start[[, end], exact]) -> int
95 Return the lowest line where token is found, and is the first
96 element, in lines[start, end].
98 If exact is True (default is False), then differences in
99 whitespace are ignored.
101 Return -1 on failure."""
103 if end == 0 or end > len(lines):
106 for i in xrange(start, end):
115 if lines[i][:m] == token:
120 def find_token_exact(lines, token, start, end = 0):
121 return find_token(lines, token, start, end, True)
124 def find_tokens(lines, tokens, start, end = 0, exact = False):
125 """ find_tokens(lines, tokens, start[[, end], exact]) -> int
127 Return the lowest line where one token in tokens is found, and is
128 the first element, in lines[start, end].
130 Return -1 on failure."""
131 if end == 0 or end > len(lines):
134 for i in xrange(start, end):
144 if lines[i][:len(token)] == token:
149 def find_tokens_exact(lines, tokens, start, end = 0):
150 return find_tokens(lines, tokens, start, end, True)
153 def find_re(lines, rexp, start, end = 0):
154 """ find_token_re(lines, rexp, start[, end]) -> int
156 Return the lowest line where rexp, a regular expression, is found
157 in lines[start, end].
159 Return -1 on failure."""
161 if end == 0 or end > len(lines):
163 for i in xrange(start, end):
164 if rexp.match(lines[i]):
169 def find_token_backwards(lines, token, start):
170 """ find_token_backwards(lines, token, start) -> int
172 Return the highest line where token is found, and is the first
173 element, in lines[start, end].
175 Return -1 on failure."""
177 for i in xrange(start, -1, -1):
179 if line[:m] == token:
184 def find_tokens_backwards(lines, tokens, start):
185 """ find_tokens_backwards(lines, token, start) -> int
187 Return the highest line where token is found, and is the first
188 element, in lines[end, start].
190 Return -1 on failure."""
191 for i in xrange(start, -1, -1):
194 if line[:len(token)] == token:
199 def get_value(lines, token, start, end = 0, default = ""):
200 """ get_value(lines, token, start[[, end], default]) -> string
202 Find the next line that looks like:
203 token followed by other stuff
204 Returns "followed by other stuff" with leading and trailing
208 i = find_token_exact(lines, token, start, end)
211 l = lines[i].split(None, 1)
217 def get_quoted_value(lines, token, start, end = 0, default = ""):
218 """ get_quoted_value(lines, token, start[[, end], default]) -> string
220 Find the next line that looks like:
221 token "followed by other stuff"
222 Returns "followed by other stuff" with leading and trailing
223 whitespace and quotes removed. If there are no quotes, that is OK too.
224 So use get_value to preserve possible quotes, this one to remove them,
226 Note that we will NOT strip quotes from default!
228 val = get_value(lines, token, start, end, "")
231 return val.strip('"')
234 def del_token(lines, token, start, end = 0):
235 """ del_token(lines, token, start, end) -> int
237 Find the first line in lines where token is the first element
238 and delete that line. Returns True if we deleted a line, False
241 k = find_token_exact(lines, token, start, end)
248 def find_beginning_of(lines, i, start_token, end_token):
251 i = find_tokens_backwards(lines, [start_token, end_token], i-1)
254 if check_token(lines[i], end_token):
263 def find_end_of(lines, i, start_token, end_token):
267 i = find_tokens(lines, [end_token, start_token], i+1)
270 if check_token(lines[i], start_token):
279 def find_nonempty_line(lines, start, end = 0):
282 for i in xrange(start, end):
283 if is_nonempty_line(lines[i]):
288 def find_end_of_inset(lines, i):
289 " Find end of inset, where lines[i] is included."
290 return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
293 def find_end_of_layout(lines, i):
294 " Find end of layout, where lines[i] is included."
295 return find_end_of(lines, i, "\\begin_layout", "\\end_layout")
298 def is_in_inset(lines, i, inset):
300 Checks if line i is in an inset of the given type.
301 If so, returns starting and ending lines.
302 Otherwise, returns False.
304 is_in_inset(document.body, i, "\\begin_inset Tabular")
305 returns False unless i is within a table. If it is, then
306 it returns the line on which the table begins and the one
307 on which it ends. Note that this pair will evaulate to
310 will do what you expect.
313 stins = find_token_backwards(lines, inset, i)
316 endins = find_end_of_inset(lines, stins)
317 # note that this includes the notfound case.
320 return (stins, endins)
323 def get_containing_inset(lines, i):
325 Finds out what kind of inset line i is within. Returns a
326 list containing (i) what follows \begin_inset on the the line
327 on which the inset begins, plus the starting and ending line.
328 Returns False on any kind of error or if it isn't in an inset.
330 stins = find_token_backwards(lines, i, "\\begin_inset")
333 endins = find_end_of_inset(lines, stins)
336 inset = get_value(lines, "\\begin_inset", stins)
340 return (inset, stins, endins)
343 def get_containing_layout(lines, i):
345 Finds out what kind of layout line i is within. Returns a
346 list containing (i) what follows \begin_layout on the the line
347 on which the layout begins, plus the starting and ending line.
348 Returns False on any kind of error.
350 stins = find_token_backwards(lines, i, "\\begin_layout")
353 endins = find_end_of_layout(lines, stins)
356 lay = get_value(lines, "\\begin_layout", stins)
360 return (lay, stins, endins)