lib/lyx2lyx/parser_tools.py

   1 # This file is part of lyx2lyx
   2 # -*- coding: utf-8 -*-
   3 # Copyright (C) 2002-2010 Dekel Tsur <dekel@lyx.org>,
   4 # José Matos <jamatos@lyx.org>, Richard Heck <rgheck@comcast.net>
   5 #
   6 # This program is free software; you can redistribute it and/or
   7 # modify it under the terms of the GNU General Public License
   8 # as published by the Free Software Foundation; either version 2
   9 # of the License, or (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful,
  12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 # GNU General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  19
  20
  21 '''
  22 This modules offer several free functions to help parse lines.
  23 More documentaton is below, but here is a quick guide to what
  24 they do. Optional arguments are marked by brackets.
  25
  26 find_token(lines, token, start[, end[, exact]]):
  27   Returns the first line i, start <= i < end, on which
  28   token is found at the beginning. Returns -1 if not
  29   found. If exact is (given and) True, then differences
  30   in whitespace do not count.
  31
  32 find_token_exact(lines, token, start[, end]):
  33   Badly named. As find_token, but with ignorews True.
  34
  35 find_tokens(lines, tokens, start[, end[, ignorews]]):
  36   Returns the first line i, start <= i < end, on which
  37   oen of the tokens in tokens is found at the beginning.
  38   Returns -1 if not found. If ignorews is (given and) True,
  39   then differences in whitespace do not count.
  40
  41 find_tokens_exact(lines, token, start[, end]):
  42   Badly named. As find_tokens, but with ignorews True.
  43
  44 find_token_backwards(lines, token, start):
  45 find_tokens_backwards(lines, tokens, start):
  46   As before, but look backwards.
  47
  48 find_re(lines, rexp, start[, end]):
  49   As find_token, but rexp is a regular expression object,
  50   so it has to be passed as e.g.: re.compile(r'...').
  51
  52 get_value(lines, token, start[, end[, default]):
  53   Similar to find_token, but it returns what follows the
  54   token on the found line. Example:
  55     get_value(document.header, "\use_xetex", 0)
  56   will find a line like:
  57     \use_xetex true
  58   and, in that case, return "true". (Note that whitespace
  59   is stripped.) The final argument, default, defaults to "",
  60   and is what is returned if we do not find anything. So you
  61   can use that to set a default.
  62
  63 get_quoted_value(lines, token, start[, end[, default]):
  64   Similar to get_value, but it will strip quotes off the
  65   value, if they are present. So use this one for cases
  66   where the value is normally quoted.
  67
  68 del_token(lines, token, start[, end]):
  69   Like find_token, but deletes the line if it finds one.
  70   Returns True if a line got deleted, otherwise False.
  71 '''
  72
  73 # Utilities for one line
  74 def check_token(line, token):
  75     """ check_token(line, token) -> bool
  76
  77     Return True if token is present in line and is the first element
  78     else returns False."""
  79
  80     return line[:len(token)] == token
  81
  82
  83 def is_nonempty_line(line):
  84     """ is_nonempty_line(line) -> bool
  85
  86     Return False if line is either empty or it has only whitespaces,
  87     else return True."""
  88     return line != " "*len(line)
  89
  90
  91 # Utilities for a list of lines
  92 def find_token(lines, token, start, end = 0, exact = False):
  93     """ find_token(lines, token, start[[, end], exact]) -> int
  94
  95     Return the lowest line where token is found, and is the first
  96     element, in lines[start, end].
  97
  98     If exact is True (default is False), then differences in
  99     whitespace are ignored.
 100
 101     Return -1 on failure."""
 102
 103     if end == 0 or end > len(lines):
 104         end = len(lines)
 105     m = len(token)
 106     for i in xrange(start, end):
 107         if exact:
 108             x = lines[i].split()
 109             y = token.split()
 110             if len(x) < len(y):
 111                 continue
 112             if x[:len(y)] == y:
 113                 return i
 114         else:
 115             if lines[i][:m] == token:
 116                 return i
 117     return -1
 118
 119
 120 def find_token_exact(lines, token, start, end = 0):
 121     return find_token(lines, token, start, end, True)
 122
 123
 124 def find_tokens(lines, tokens, start, end = 0, exact = False):
 125     """ find_tokens(lines, tokens, start[[, end], exact]) -> int
 126
 127     Return the lowest line where one token in tokens is found, and is
 128     the first element, in lines[start, end].
 129
 130     Return -1 on failure."""
 131     if end == 0 or end > len(lines):
 132         end = len(lines)
 133
 134     for i in xrange(start, end):
 135         for token in tokens:
 136             if exact:
 137                 x = lines[i].split()
 138                 y = token.split()
 139                 if len(x) < len(y):
 140                     continue
 141                 if x[:len(y)] == y:
 142                     return i
 143             else:
 144                 if lines[i][:len(token)] == token:
 145                     return i
 146     return -1
 147
 148
 149 def find_tokens_exact(lines, tokens, start, end = 0):
 150     return find_tokens(lines, tokens, start, end, True)
 151
 152
 153 def find_re(lines, rexp, start, end = 0):
 154     """ find_token_re(lines, rexp, start[, end]) -> int
 155
 156     Return the lowest line where rexp, a regular expression, is found
 157     in lines[start, end].
 158
 159     Return -1 on failure."""
 160
 161     if end == 0 or end > len(lines):
 162         end = len(lines)
 163     for i in xrange(start, end):
 164         if rexp.match(lines[i]):
 165                 return i
 166     return -1
 167
 168
 169 def find_token_backwards(lines, token, start):
 170     """ find_token_backwards(lines, token, start) -> int
 171
 172     Return the highest line where token is found, and is the first
 173     element, in lines[start, end].
 174
 175     Return -1 on failure."""
 176     m = len(token)
 177     for i in xrange(start, -1, -1):
 178         line = lines[i]
 179         if line[:m] == token:
 180             return i
 181     return -1
 182
 183
 184 def find_tokens_backwards(lines, tokens, start):
 185     """ find_tokens_backwards(lines, token, start) -> int
 186
 187     Return the highest line where token is found, and is the first
 188     element, in lines[end, start].
 189
 190     Return -1 on failure."""
 191     for i in xrange(start, -1, -1):
 192         line = lines[i]
 193         for token in tokens:
 194             if line[:len(token)] == token:
 195                 return i
 196     return -1
 197
 198
 199 def get_value(lines, token, start, end = 0, default = ""):
 200     """ get_value(lines, token, start[[, end], default]) -> string
 201
 202     Find the next line that looks like:
 203       token followed by other stuff
 204     Returns "followed by other stuff" with leading and trailing
 205     whitespace removed.
 206     """
 207
 208     i = find_token_exact(lines, token, start, end)
 209     if i == -1:
 210         return default
 211     l = lines[i].split(None, 1)
 212     if len(l) > 1:
 213         return l[1].strip()
 214     return default
 215
 216
 217 def get_quoted_value(lines, token, start, end = 0, default = ""):
 218     """ get_quoted_value(lines, token, start[[, end], default]) -> string
 219
 220     Find the next line that looks like:
 221       token "followed by other stuff"
 222     Returns "followed by other stuff" with leading and trailing
 223     whitespace and quotes removed. If there are no quotes, that is OK too.
 224     So use get_value to preserve possible quotes, this one to remove them,
 225     if they are there.
 226     Note that we will NOT strip quotes from default!
 227     """
 228     val = get_value(lines, token, start, end, "")
 229     if not val:
 230       return default
 231     return val.strip('"')
 232
 233
 234 def del_token(lines, token, start, end = 0):
 235     """ del_token(lines, token, start, end) -> int
 236
 237     Find the first line in lines where token is the first element
 238     and delete that line. Returns True if we deleted a line, False
 239     if we did not."""
 240
 241     k = find_token_exact(lines, token, start, end)
 242     if k == -1:
 243         return False
 244     del lines[k]
 245     return True
 246
 247
 248 def find_beginning_of(lines, i, start_token, end_token):
 249     count = 1
 250     while i > 0:
 251         i = find_tokens_backwards(lines, [start_token, end_token], i-1)
 252         if i == -1:
 253             return -1
 254         if check_token(lines[i], end_token):
 255             count = count+1
 256         else:
 257             count = count-1
 258         if count == 0:
 259             return i
 260     return -1
 261
 262
 263 def find_end_of(lines, i, start_token, end_token):
 264     count = 1
 265     n = len(lines)
 266     while i < n:
 267         i = find_tokens(lines, [end_token, start_token], i+1)
 268         if i == -1:
 269             return -1
 270         if check_token(lines[i], start_token):
 271             count = count+1
 272         else:
 273             count = count-1
 274         if count == 0:
 275             return i
 276     return -1
 277
 278
 279 def find_nonempty_line(lines, start, end = 0):
 280     if end == 0:
 281         end = len(lines)
 282     for i in xrange(start, end):
 283         if is_nonempty_line(lines[i]):
 284             return i
 285     return -1
 286
 287
 288 def find_end_of_inset(lines, i):
 289     " Find end of inset, where lines[i] is included."
 290     return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
 291
 292
 293 def find_end_of_layout(lines, i):
 294     " Find end of layout, where lines[i] is included."
 295     return find_end_of(lines, i, "\\begin_layout", "\\end_layout")
 296
 297
 298 def is_in_inset(lines, i, inset):
 299     '''
 300     Checks if line i is in an inset of the given type.
 301     If so, returns starting and ending lines.
 302     Otherwise, returns False.
 303     Example:
 304       is_in_inset(document.body, i, "\\begin_inset Tabular")
 305     returns False unless i is within a table. If it is, then
 306     it returns the line on which the table begins and the one
 307     on which it ends. Note that this pair will evaulate to
 308     boolean True, so
 309       if is_in_inset(...):
 310     will do what you expect.
 311     '''
 312     defval = (-1, -1)
 313     stins = find_token_backwards(lines, inset, i)
 314     if stins == -1:
 315       return defval
 316     endins = find_end_of_inset(lines, stins)
 317     # note that this includes the notfound case.
 318     if endins < i:
 319       return defval
 320     return (stins, endins)
 321
 322
 323 def get_containing_inset(lines, i):
 324   '''
 325   Finds out what kind of inset line i is within. Returns a
 326   list containing (i) what follows \begin_inset on the the line
 327   on which the inset begins, plus the starting and ending line.
 328   Returns False on any kind of error or if it isn't in an inset.
 329   '''
 330   stins = find_token_backwards(lines, i, "\\begin_inset")
 331   if stins == -1:
 332       return False
 333   endins = find_end_of_inset(lines, stins)
 334   if endins < i:
 335       return False
 336   inset = get_value(lines, "\\begin_inset", stins)
 337   if inset == "":
 338       # shouldn't happen
 339       return False
 340   return (inset, stins, endins)
 341
 342
 343 def get_containing_layout(lines, i):
 344   '''
 345   Finds out what kind of layout line i is within. Returns a
 346   list containing (i) what follows \begin_layout on the the line
 347   on which the layout begins, plus the starting and ending line.
 348   Returns False on any kind of error.
 349   '''
 350   stins = find_token_backwards(lines, i, "\\begin_layout")
 351   if stins == -1:
 352       return False
 353   endins = find_end_of_layout(lines, stins)
 354   if endins < i:
 355       return False
 356   lay = get_value(lines, "\\begin_layout", stins)
 357   if lay == "":
 358       # shouldn't happen
 359       return False
 360   return (lay, stins, endins)