1 # This file is part of lyx2lyx
2 # -*- coding: utf-8 -*-
3 # Copyright (C) 2002-2010 Dekel Tsur <dekel@lyx.org>,
4 # José Matos <jamatos@lyx.org>, Richard Heck <rgheck@comcast.net>
6 # This program is free software; you can redistribute it and/or
7 # modify it under the terms of the GNU General Public License
8 # as published by the Free Software Foundation; either version 2
9 # of the License, or (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
22 This modules offer several free functions to help parse lines.
23 More documentaton is below, but here is a quick guide to what
24 they do. Optional arguments are marked by brackets.
26 find_token(lines, token, start[, end[, ignorews]]):
27 Returns the first line i, start <= i < end, on which
28 token is found at the beginning. Returns -1 if not
29 found. If ignorews is (given and) True, then differences
30 in whitespace do not count.
32 find_token_exact(lines, token, start[, end]):
33 Badly named. As find_token, but with ignorews True.
35 find_tokens(lines, tokens, start[, end[, ignorews]]):
36 Returns the first line i, start <= i < end, on which
37 oen of the tokens in tokens is found at the beginning.
38 Returns -1 if not found. If ignorews is (given and) True,
39 then differences in whitespace do not count.
41 find_tokens_exact(lines, token, start[, end]):
42 Badly named. As find_tokens, but with ignorews True.
44 find_token_backwards(lines, token, start):
45 find_tokens_backwards(lines, tokens, start):
46 As before, but look backwards.
48 find_re(lines, rexp, start[, end]):
49 As find_token, but rexp is a regular expression object,
50 so it has to be passed as e.g.: re.compile(r'...').
52 get_value(lines, token, start[, end[, default]):
53 Similar to find_token, but it returns what follows the
54 token on the found line. Example:
55 get_value(document.header, "\use_xetex", 0)
56 will find a line like:
58 and, in that case, return "true". (Note that whitespace
59 is stripped.) The final argument, default, defaults to "",
60 and is what is returned if we do not find anything. So you
61 can use that to set a default.
63 get_quoted_value(lines, token, start[, end[, default]):
64 Similar to get_value, but it will strip quotes off the
65 value, if they are present. So use this one for cases
66 where the value is normally quoted.
68 del_token(lines, token, start[, end]):
69 Like find_token, but deletes the line if it finds one.
70 Returns True if a line got deleted, otherwise False.
72 find_beginning_of(lines, i, start_token, end_token):
73 Here, start_token and end_token are meant to be a matching
74 pair, like "\begin_layout" and "\end_layout". We look for
75 the start_token that pairs with the end_token that occurs
76 on or after line i. Returns -1 if not found.
77 So, in the layout case, this would find the \begin_layout
78 for the layout line i is in.
80 ec = find_token(document.body, "</cell", i)
81 bc = find_beginning_of(document.body, ec, \
83 Now, assuming no -1s, bc-ec wraps the cell for line i.
85 find_end_of(lines, i, start_token, end_token):
86 Like find_beginning_of, but looking for the matching
87 end_token. This might look like:
88 bc = find_token_(document.body, "<cell", i)
89 ec = find_end_of(document.body, bc, "<cell", "</cell")
90 Now, assuming no -1s, bc-ec wrap the next cell.
92 find_end_of_inset(lines, i):
93 Specialization of find_end_of for insets.
95 find_end_of_layout(lines, i):
96 Specialization of find_end_of for layouts.
98 is_in_inset(lines, i, inset):
99 Checks if line i is in an inset of the given type.
100 If so, returns starting and ending lines. Otherwise,
103 is_in_inset(document.body, i, "\\begin_inset Tabular")
104 returns False unless i is within a table. If it is, then
105 it returns the line on which the table begins and the one
106 on which it ends. Note that this pair will evaulate to
109 will do what you expect.
111 get_containing_inset(lines, i):
112 Finds out what kind of inset line i is within. Returns a
113 list containing what follows \begin_inset on the the line
114 on which the inset begins, plus the starting and ending line.
115 Returns False on any kind of error or if it isn't in an inset.
116 So get_containing_inset(document.body, i) might return:
117 ("CommandInset ref", 300, 306)
118 if i is within an InsetRef beginning on line 300 and ending
121 get_containing_layout(lines, i):
122 As get_containing_inset, but for layout.
125 find_nonempty_line(lines, start[, end):
126 Finds the next non-empty line.
128 check_token(line, token):
129 Does line begin with token?
131 is_nonempty_line(line):
132 Does line contain something besides whitespace?
136 # Utilities for one line
137 def check_token(line, token):
138 """ check_token(line, token) -> bool
140 Return True if token is present in line and is the first element
141 else returns False."""
143 return line[:len(token)] == token
146 def is_nonempty_line(line):
147 """ is_nonempty_line(line) -> bool
149 Return False if line is either empty or it has only whitespaces,
151 return line != " "*len(line)
154 # Utilities for a list of lines
155 def find_token(lines, token, start, end = 0, ignorews = False):
156 """ find_token(lines, token, start[[, end], ignorews]) -> int
158 Return the lowest line where token is found, and is the first
159 element, in lines[start, end].
161 If ignorews is True (default is False), then differences in
162 whitespace are ignored.
164 Return -1 on failure."""
166 if end == 0 or end > len(lines):
169 for i in xrange(start, end):
178 if lines[i][:m] == token:
183 def find_token_exact(lines, token, start, end = 0):
184 return find_token(lines, token, start, end, True)
187 def find_tokens(lines, tokens, start, end = 0, ignorews = False):
188 """ find_tokens(lines, tokens, start[[, end], ignorews]) -> int
190 Return the lowest line where one token in tokens is found, and is
191 the first element, in lines[start, end].
193 Return -1 on failure."""
194 if end == 0 or end > len(lines):
197 for i in xrange(start, end):
207 if lines[i][:len(token)] == token:
212 def find_tokens_exact(lines, tokens, start, end = 0):
213 return find_tokens(lines, tokens, start, end, True)
216 def find_re(lines, rexp, start, end = 0):
217 """ find_token_re(lines, rexp, start[, end]) -> int
219 Return the lowest line where rexp, a regular expression, is found
220 in lines[start, end].
222 Return -1 on failure."""
224 if end == 0 or end > len(lines):
226 for i in xrange(start, end):
227 if rexp.match(lines[i]):
232 def find_token_backwards(lines, token, start):
233 """ find_token_backwards(lines, token, start) -> int
235 Return the highest line where token is found, and is the first
236 element, in lines[start, end].
238 Return -1 on failure."""
240 for i in xrange(start, -1, -1):
242 if line[:m] == token:
247 def find_tokens_backwards(lines, tokens, start):
248 """ find_tokens_backwards(lines, token, start) -> int
250 Return the highest line where token is found, and is the first
251 element, in lines[end, start].
253 Return -1 on failure."""
254 for i in xrange(start, -1, -1):
257 if line[:len(token)] == token:
262 def get_value(lines, token, start, end = 0, default = ""):
263 """ get_value(lines, token, start[[, end], default]) -> string
265 Find the next line that looks like:
266 token followed by other stuff
267 Returns "followed by other stuff" with leading and trailing
271 i = find_token_exact(lines, token, start, end)
274 l = lines[i].split(None, 1)
280 def get_quoted_value(lines, token, start, end = 0, default = ""):
281 """ get_quoted_value(lines, token, start[[, end], default]) -> string
283 Find the next line that looks like:
284 token "followed by other stuff"
285 Returns "followed by other stuff" with leading and trailing
286 whitespace and quotes removed. If there are no quotes, that is OK too.
287 So use get_value to preserve possible quotes, this one to remove them,
289 Note that we will NOT strip quotes from default!
291 val = get_value(lines, token, start, end, "")
294 return val.strip('"')
297 def del_token(lines, token, start, end = 0):
298 """ del_token(lines, token, start, end) -> int
300 Find the first line in lines where token is the first element
301 and delete that line. Returns True if we deleted a line, False
304 k = find_token_exact(lines, token, start, end)
311 def find_beginning_of(lines, i, start_token, end_token):
314 i = find_tokens_backwards(lines, [start_token, end_token], i-1)
317 if check_token(lines[i], end_token):
326 def find_end_of(lines, i, start_token, end_token):
330 i = find_tokens(lines, [end_token, start_token], i+1)
333 if check_token(lines[i], start_token):
342 def find_nonempty_line(lines, start, end = 0):
345 for i in xrange(start, end):
346 if is_nonempty_line(lines[i]):
351 def find_end_of_inset(lines, i):
352 " Find end of inset, where lines[i] is included."
353 return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
356 def find_end_of_layout(lines, i):
357 " Find end of layout, where lines[i] is included."
358 return find_end_of(lines, i, "\\begin_layout", "\\end_layout")
361 def is_in_inset(lines, i, inset):
363 Checks if line i is in an inset of the given type.
364 If so, returns starting and ending lines.
365 Otherwise, returns False.
367 is_in_inset(document.body, i, "\\begin_inset Tabular")
368 returns False unless i is within a table. If it is, then
369 it returns the line on which the table begins and the one
370 on which it ends. Note that this pair will evaulate to
373 will do what you expect.
376 stins = find_token_backwards(lines, inset, i)
379 endins = find_end_of_inset(lines, stins)
380 # note that this includes the notfound case.
383 return (stins, endins)
386 def get_containing_inset(lines, i):
388 Finds out what kind of inset line i is within. Returns a
389 list containing (i) what follows \begin_inset on the the line
390 on which the inset begins, plus the starting and ending line.
391 Returns False on any kind of error or if it isn't in an inset.
393 stins = find_token_backwards(lines, i, "\\begin_inset")
396 endins = find_end_of_inset(lines, stins)
399 inset = get_value(lines, "\\begin_inset", stins)
403 return (inset, stins, endins)
406 def get_containing_layout(lines, i):
408 Finds out what kind of layout line i is within. Returns a
409 list containing (i) what follows \begin_layout on the the line
410 on which the layout begins, plus the starting and ending line.
411 Returns False on any kind of error.
413 stins = find_token_backwards(lines, i, "\\begin_layout")
416 endins = find_end_of_layout(lines, stins)
419 lay = get_value(lines, "\\begin_layout", stins)
423 return (lay, stins, endins)