1 # This file is part of lyx2lyx
2 # -*- coding: utf-8 -*-
3 # Copyright (C) 2002-2011 Dekel Tsur <dekel@lyx.org>,
4 # José Matos <jamatos@lyx.org>, Richard Heck <rgheck@comcast.net>
6 # This program is free software; you can redistribute it and/or
7 # modify it under the terms of the GNU General Public License
8 # as published by the Free Software Foundation; either version 2
9 # of the License, or (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 This modules offer several free functions to help parse lines.
23 More documentaton is below, but here is a quick guide to what
24 they do. Optional arguments are marked by brackets.
26 find_token(lines, token, start[, end[, ignorews]]):
27 Returns the first line i, start <= i < end, on which
28 token is found at the beginning. Returns -1 if not
30 If ignorews is (given and) True, then differences
31 in whitespace do not count, except that there must be no
32 extra whitespace following token itself.
34 find_token_exact(lines, token, start[, end]):
35 As find_token, but with ignorews True.
37 find_tokens(lines, tokens, start[, end[, ignorews]]):
38 Returns the first line i, start <= i < end, on which
39 oen of the tokens in tokens is found at the beginning.
40 Returns -1 if not found.
41 If ignorews is (given and) True, then differences
42 in whitespace do not count, except that there must be no
43 extra whitespace following token itself.
45 find_tokens_exact(lines, token, start[, end]):
46 As find_tokens, but with ignorews True.
48 find_token_backwards(lines, token, start):
49 find_tokens_backwards(lines, tokens, start):
50 As before, but look backwards.
52 find_re(lines, rexp, start[, end]):
53 As find_token, but rexp is a regular expression object,
54 so it has to be passed as e.g.: re.compile(r'...').
56 get_value(lines, token, start[, end[, default]):
57 Similar to find_token, but it returns what follows the
58 token on the found line. Example:
59 get_value(document.header, "\use_xetex", 0)
60 will find a line like:
62 and, in that case, return "true". (Note that whitespace
63 is stripped.) The final argument, default, defaults to "",
64 and is what is returned if we do not find anything. So you
65 can use that to set a default.
67 get_quoted_value(lines, token, start[, end[, default]):
68 Similar to get_value, but it will strip quotes off the
69 value, if they are present. So use this one for cases
70 where the value is normally quoted.
72 get_option_value(line, option):
73 This assumes we have a line with something like:
75 and returns value. Returns "" if not found.
77 del_token(lines, token, start[, end]):
78 Like find_token, but deletes the line if it finds one.
79 Returns True if a line got deleted, otherwise False.
81 find_beginning_of(lines, i, start_token, end_token):
82 Here, start_token and end_token are meant to be a matching
83 pair, like "\begin_layout" and "\end_layout". We look for
84 the start_token that pairs with the end_token that occurs
85 on or after line i. Returns -1 if not found.
86 So, in the layout case, this would find the \begin_layout
87 for the layout line i is in.
89 ec = find_token(document.body, "</cell", i)
90 bc = find_beginning_of(document.body, ec, \
92 Now, assuming no -1s, bc-ec wraps the cell for line i.
94 find_end_of(lines, i, start_token, end_token):
95 Like find_beginning_of, but looking for the matching
96 end_token. This might look like:
97 bc = find_token_(document.body, "<cell", i)
98 ec = find_end_of(document.body, bc, "<cell", "</cell")
99 Now, assuming no -1s, bc-ec wrap the next cell.
101 find_end_of_inset(lines, i):
102 Specialization of find_end_of for insets.
104 find_end_of_layout(lines, i):
105 Specialization of find_end_of for layouts.
107 is_in_inset(lines, i, inset):
108 Checks if line i is in an inset of the given type.
109 If so, returns starting and ending lines. Otherwise,
112 is_in_inset(document.body, i, "\\begin_inset Tabular")
113 returns False unless i is within a table. If it is, then
114 it returns the line on which the table begins and the one
115 on which it ends. Note that this pair will evaulate to
118 will do what you expect.
120 get_containing_inset(lines, i):
121 Finds out what kind of inset line i is within. Returns a
122 list containing what follows \begin_inset on the the line
123 on which the inset begins, plus the starting and ending line.
124 Returns False on any kind of error or if it isn't in an inset.
125 So get_containing_inset(document.body, i) might return:
126 ("CommandInset ref", 300, 306)
127 if i is within an InsetRef beginning on line 300 and ending
130 get_containing_layout(lines, i):
131 As get_containing_inset, but for layout. Additionally returns the
132 position of real paragraph start (after par params) as 4th value.
135 find_nonempty_line(lines, start[, end):
136 Finds the next non-empty line.
138 check_token(line, token):
139 Does line begin with token?
141 is_nonempty_line(line):
142 Does line contain something besides whitespace?
148 # Utilities for one line
149 def check_token(line, token):
150 """ check_token(line, token) -> bool
152 Return True if token is present in line and is the first element
153 else returns False."""
155 return line[:len(token)] == token
158 def is_nonempty_line(line):
159 """ is_nonempty_line(line) -> bool
161 Return False if line is either empty or it has only whitespaces,
163 return line != " "*len(line)
166 # Utilities for a list of lines
167 def find_token(lines, token, start, end = 0, ignorews = False):
168 """ find_token(lines, token, start[[, end], ignorews]) -> int
170 Return the lowest line where token is found, and is the first
171 element, in lines[start, end].
173 If ignorews is True (default is False), then differences in
174 whitespace are ignored, except that there must be no extra
175 whitespace following token itself.
177 Return -1 on failure."""
179 if end == 0 or end > len(lines):
182 for i in xrange(start, end):
191 if lines[i][:m] == token:
196 def find_token_exact(lines, token, start, end = 0):
197 return find_token(lines, token, start, end, True)
200 def find_tokens(lines, tokens, start, end = 0, ignorews = False):
201 """ find_tokens(lines, tokens, start[[, end], ignorews]) -> int
203 Return the lowest line where one token in tokens is found, and is
204 the first element, in lines[start, end].
206 Return -1 on failure."""
207 if end == 0 or end > len(lines):
210 for i in xrange(start, end):
220 if lines[i][:len(token)] == token:
225 def find_tokens_exact(lines, tokens, start, end = 0):
226 return find_tokens(lines, tokens, start, end, True)
229 def find_re(lines, rexp, start, end = 0):
230 """ find_token_re(lines, rexp, start[, end]) -> int
232 Return the lowest line where rexp, a regular expression, is found
233 in lines[start, end].
235 Return -1 on failure."""
237 if end == 0 or end > len(lines):
239 for i in xrange(start, end):
240 if rexp.match(lines[i]):
245 def find_token_backwards(lines, token, start):
246 """ find_token_backwards(lines, token, start) -> int
248 Return the highest line where token is found, and is the first
249 element, in lines[start, end].
251 Return -1 on failure."""
253 for i in xrange(start, -1, -1):
255 if line[:m] == token:
260 def find_tokens_backwards(lines, tokens, start):
261 """ find_tokens_backwards(lines, token, start) -> int
263 Return the highest line where token is found, and is the first
264 element, in lines[end, start].
266 Return -1 on failure."""
267 for i in xrange(start, -1, -1):
270 if line[:len(token)] == token:
275 def get_value(lines, token, start, end = 0, default = ""):
276 """ get_value(lines, token, start[[, end], default]) -> string
278 Find the next line that looks like:
279 token followed by other stuff
280 Returns "followed by other stuff" with leading and trailing
284 i = find_token_exact(lines, token, start, end)
287 l = lines[i].split(None, 1)
293 def get_quoted_value(lines, token, start, end = 0, default = ""):
294 """ get_quoted_value(lines, token, start[[, end], default]) -> string
296 Find the next line that looks like:
297 token "followed by other stuff"
298 Returns "followed by other stuff" with leading and trailing
299 whitespace and quotes removed. If there are no quotes, that is OK too.
300 So use get_value to preserve possible quotes, this one to remove them,
302 Note that we will NOT strip quotes from default!
304 val = get_value(lines, token, start, end, "")
307 return val.strip('"')
310 def get_option_value(line, option):
311 rx = option + '\s*=\s*"([^"]+)"'
319 def set_option_value(line, option, value):
320 rx = '(' + option + '\s*=\s*")[^"]+"'
325 return re.sub(rx, '\g<1>' + value + '"', line)
328 def del_token(lines, token, start, end = 0):
329 """ del_token(lines, token, start, end) -> int
331 Find the first line in lines where token is the first element
332 and delete that line. Returns True if we deleted a line, False
335 k = find_token_exact(lines, token, start, end)
342 def find_beginning_of(lines, i, start_token, end_token):
345 i = find_tokens_backwards(lines, [start_token, end_token], i-1)
348 if check_token(lines[i], end_token):
357 def find_end_of(lines, i, start_token, end_token):
361 i = find_tokens(lines, [end_token, start_token], i+1)
364 if check_token(lines[i], start_token):
373 def find_nonempty_line(lines, start, end = 0):
376 for i in xrange(start, end):
377 if is_nonempty_line(lines[i]):
382 def find_end_of_inset(lines, i):
383 " Find end of inset, where lines[i] is included."
384 return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
387 def find_end_of_layout(lines, i):
388 " Find end of layout, where lines[i] is included."
389 return find_end_of(lines, i, "\\begin_layout", "\\end_layout")
392 def is_in_inset(lines, i, inset):
394 Checks if line i is in an inset of the given type.
395 If so, returns starting and ending lines.
396 Otherwise, returns False.
398 is_in_inset(document.body, i, "\\begin_inset Tabular")
399 returns False unless i is within a table. If it is, then
400 it returns the line on which the table begins and the one
401 on which it ends. Note that this pair will evaulate to
404 will do what you expect.
407 stins = find_token_backwards(lines, inset, i)
410 endins = find_end_of_inset(lines, stins)
411 # note that this includes the notfound case.
414 return (stins, endins)
417 def get_containing_inset(lines, i):
419 Finds out what kind of inset line i is within. Returns a
420 list containing (i) what follows \begin_inset on the the line
421 on which the inset begins, plus the starting and ending line.
422 Returns False on any kind of error or if it isn't in an inset.
426 stins = find_token_backwards(lines, "\\begin_inset", j)
429 endins = find_end_of_inset(lines, stins)
434 inset = get_value(lines, "\\begin_inset", stins)
438 return (inset, stins, endins)
441 def get_containing_layout(lines, i):
443 Finds out what kind of layout line i is within. Returns a
444 list containing (i) what follows \begin_layout on the the line
445 on which the layout begins, plus the starting and ending line
446 and the start of the apargraph (after all params).
447 Returns False on any kind of error.
451 stlay = find_token_backwards(lines, "\\begin_layout", j)
454 endlay = find_end_of_layout(lines, stlay)
459 lay = get_value(lines, "\\begin_layout", stlay)
463 par_params = ["\\noindent", "\\indent", "\\indent-toggle", "\\leftindent",
464 "\\start_of_appendix", "\\paragraph_spacing single",
465 "\\paragraph_spacing onehalf", "\\paragraph_spacing double",
466 "\\paragraph_spacing other", "\\align", "\\labelwidthstring"]
470 if lines[stpar] not in par_params:
472 return (lay, stlay, endlay, stpar)
475 def count_pars_in_inset(lines, i):
477 Counts the paragraphs within this inset
479 ins = get_containing_inset(lines, i)
483 for j in range(ins[1], ins[2]):
484 m = re.match(r'\\begin_layout (.*)', lines[j])
485 if m and get_containing_inset(lines, j)[0] == ins[0]: