lib/lyx2lyx/parser_tools.py

   1 # This file is part of lyx2lyx
   2 # -*- coding: utf-8 -*-
   3 # Copyright (C) 2002-2010 Dekel Tsur <dekel@lyx.org>,
   4 # José Matos <jamatos@lyx.org>, Richard Heck <rgheck@comcast.net>
   5 #
   6 # This program is free software; you can redistribute it and/or
   7 # modify it under the terms of the GNU General Public License
   8 # as published by the Free Software Foundation; either version 2
   9 # of the License, or (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful,
  12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 # GNU General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  19
  20 " This modules offer several free functions to help parse lines. "
  21
  22 # Utilities for one line
  23 def check_token(line, token):
  24     """ check_token(line, token) -> bool
  25
  26     Return True if token is present in line and is the first element
  27     else returns False."""
  28
  29     return line[:len(token)] == token
  30
  31
  32 def is_nonempty_line(line):
  33     """ is_nonempty_line(line) -> bool
  34
  35     Return False if line is either empty or it has only whitespaces,
  36     else return True."""
  37     return line != " "*len(line)
  38
  39
  40 # Utilities for a list of lines
  41 def find_token(lines, token, start, end = 0, exact = False):
  42     """ find_token(lines, token, start[[, end], exact]) -> int
  43
  44     Return the lowest line where token is found, and is the first
  45     element, in lines[start, end].
  46
  47     If exact is True (default is False), then differences in
  48     whitespace are ignored.
  49
  50     Return -1 on failure."""
  51
  52     if end == 0 or end > len(lines):
  53         end = len(lines)
  54     m = len(token)
  55     for i in xrange(start, end):
  56         if exact:
  57             x = lines[i].split()
  58             y = token.split()
  59             if len(x) < len(y):
  60                 continue
  61             if x[:len(y)] == y:
  62                 return i
  63         else:
  64             if lines[i][:m] == token:
  65                 return i
  66     return -1
  67
  68
  69 def find_token_exact(lines, token, start, end = 0):
  70     return find_token(lines, token, start, end, True)
  71
  72
  73 def find_tokens(lines, tokens, start, end = 0, exact = False):
  74     """ find_tokens(lines, tokens, start[[, end], exact]) -> int
  75
  76     Return the lowest line where one token in tokens is found, and is
  77     the first element, in lines[start, end].
  78
  79     Return -1 on failure."""
  80     if end == 0:
  81         end = len(lines)
  82
  83     for i in xrange(start, end):
  84         for token in tokens:
  85             if exact:
  86                 x = lines[i].split()
  87                 y = token.split()
  88                 if len(x) < len(y):
  89                     continue
  90                 if x[:len(y)] == y:
  91                     return i
  92             else:
  93                 if lines[i][:len(token)] == token:
  94                     return i
  95     return -1
  96
  97
  98 def find_tokens_exact(lines, tokens, start, end = 0):
  99     return find_tokens(lines, tokens, start, end, True)
 100
 101
 102 def find_re(lines, rexp, start, end = 0):
 103     """ find_token_re(lines, rexp, start[, end]) -> int
 104
 105     Return the lowest line where rexp, a regular expression, is found
 106     in lines[start, end].
 107
 108     Return -1 on failure."""
 109
 110     if end == 0:
 111         end = len(lines)
 112     for i in xrange(start, end):
 113         if rexp.match(lines[i]):
 114                 return i
 115     return -1
 116
 117
 118 def find_token_backwards(lines, token, start):
 119     """ find_token_backwards(lines, token, start) -> int
 120
 121     Return the highest line where token is found, and is the first
 122     element, in lines[start, end].
 123
 124     Return -1 on failure."""
 125     m = len(token)
 126     for i in xrange(start, -1, -1):
 127         line = lines[i]
 128         if line[:m] == token:
 129             return i
 130     return -1
 131
 132
 133 def find_tokens_backwards(lines, tokens, start):
 134     """ find_tokens_backwards(lines, token, start) -> int
 135
 136     Return the highest line where token is found, and is the first
 137     element, in lines[end, start].
 138
 139     Return -1 on failure."""
 140     for i in xrange(start, -1, -1):
 141         line = lines[i]
 142         for token in tokens:
 143             if line[:len(token)] == token:
 144                 return i
 145     return -1
 146
 147
 148 def get_value(lines, token, start, end = 0, default = ""):
 149     """ get_value(lines, token, start[[, end], default]) -> string
 150
 151     Find the next line that looks like:
 152       token followed by other stuff
 153     Returns "followed by other stuff" with leading and trailing
 154     whitespace removed.
 155     """
 156
 157     i = find_token_exact(lines, token, start, end)
 158     if i == -1:
 159         return default
 160     l = lines[i].split(None, 1)
 161     if len(l) > 1:
 162         return l[1].strip()
 163     return default
 164
 165
 166 def get_quoted_value(lines, token, start, end = 0, default = ""):
 167     """ get_quoted_value(lines, token, start[[, end], default]) -> string
 168
 169     Find the next line that looks like:
 170       token "followed by other stuff"
 171     Returns "followed by other stuff" with leading and trailing
 172     whitespace and quotes removed. If there are no quotes, that is OK too.
 173     So use get_value to preserve possible quotes, this one to remove them,
 174     if they are there.
 175     Note that we will NOT strip quotes from default!
 176     """
 177     val = get_value(lines, token, start, end, "")
 178     if not val:
 179       return default
 180     return val.strip('"')
 181
 182
 183 def del_token(lines, token, start, end):
 184     """ del_token(lines, token, start, end) -> int
 185
 186     Find the lower line in lines where token is the first element and
 187     delete that line.
 188
 189     Returns the number of lines remaining."""
 190
 191     k = find_token_exact(lines, token, start, end)
 192     if k == -1:
 193         return end
 194     else:
 195         del lines[k]
 196         return end - 1
 197
 198
 199 def find_beginning_of(lines, i, start_token, end_token):
 200     count = 1
 201     while i > 0:
 202         i = find_tokens_backwards(lines, [start_token, end_token], i-1)
 203         if i == -1:
 204             return -1
 205         if check_token(lines[i], end_token):
 206             count = count+1
 207         else:
 208             count = count-1
 209         if count == 0:
 210             return i
 211     return -1
 212
 213
 214 def find_end_of(lines, i, start_token, end_token):
 215     count = 1
 216     n = len(lines)
 217     while i < n:
 218         i = find_tokens(lines, [end_token, start_token], i+1)
 219         if i == -1:
 220             return -1
 221         if check_token(lines[i], start_token):
 222             count = count+1
 223         else:
 224             count = count-1
 225         if count == 0:
 226             return i
 227     return -1
 228
 229
 230 def find_nonempty_line(lines, start, end = 0):
 231     if end == 0:
 232         end = len(lines)
 233     for i in xrange(start, end):
 234         if is_nonempty_line(lines[i]):
 235             return i
 236     return -1
 237
 238
 239 def find_end_of_inset(lines, i):
 240     " Find end of inset, where lines[i] is included."
 241     return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
 242
 243
 244 def find_end_of_layout(lines, i):
 245     " Find end of layout, where lines[i] is included."
 246     return find_end_of(lines, i, "\\begin_layout", "\\end_layout")
 247
 248
 249 def is_in_inset(lines, i, inset):
 250     '''
 251     Checks if line i is in an inset of the given type.
 252     If so, returns starting and ending lines.
 253     Otherwise, returns False.
 254     Example:
 255       is_in_inset(document.body, i, "\\begin_inset Tabular")
 256     returns False unless i is within a table. If it is, then
 257     it returns the line on which the table begins and the one
 258     on which it ends. Note that this pair will evaulate to
 259     boolean True, so
 260       if is_in_inset(...):
 261     will do what you expect.
 262     '''
 263     defval = (-1, -1)
 264     stins = find_token_backwards(lines, inset, i)
 265     if stins == -1:
 266       return defval
 267     endins = find_end_of_inset(lines, stins)
 268     # note that this includes the notfound case.
 269     if endins < i:
 270       return defval
 271     return (stins, endins)
 272
 273
 274 def get_containing_inset(lines, i):
 275   '''
 276   Finds out what kind of inset line i is within. Returns a
 277   list containing (i) what follows \begin_inset on the the line
 278   on which the inset begins, plus the starting and ending line.
 279   Returns False on any kind of error or if it isn't in an inset.
 280   '''
 281   stins = find_token_backwards(lines, i, "\\begin_inset")
 282   if stins == -1:
 283       return False
 284   endins = find_end_of_inset(lines, stins)
 285   if endins < i:
 286       return False
 287   inset = get_value(lines, "\\begin_inset", stins)
 288   if inset == "":
 289       # shouldn't happen
 290       return False
 291   return (inset, stins, endins)
 292
 293
 294 def get_containing_layout(lines, i):
 295   '''
 296   Finds out what kind of layout line i is within. Returns a
 297   list containing (i) what follows \begin_layout on the the line
 298   on which the layout begins, plus the starting and ending line.
 299   Returns False on any kind of error.
 300   '''
 301   stins = find_token_backwards(lines, i, "\\begin_layout")
 302   if stins == -1:
 303       return False
 304   endins = find_end_of_layout(lines, stins)
 305   if endins < i:
 306       return False
 307   lay = get_value(lines, "\\begin_layout", stins)
 308   if lay == "":
 309       # shouldn't happen
 310       return False
 311   return (lay, stins, endins)