lib/lyx2lyx/parser_tools.py

   1 # This file is part of lyx2lyx
   2 # -*- coding: utf-8 -*-
   3 # Copyright (C) 2002-2010 Dekel Tsur <dekel@lyx.org>,
   4 # José Matos <jamatos@lyx.org>, Richard Heck <rgheck@comcast.net>
   5 #
   6 # This program is free software; you can redistribute it and/or
   7 # modify it under the terms of the GNU General Public License
   8 # as published by the Free Software Foundation; either version 2
   9 # of the License, or (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful,
  12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 # GNU General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  19
  20 " This modules offer several free functions to help parse lines. "
  21
  22 # Utilities for one line
  23 def check_token(line, token):
  24     """ check_token(line, token) -> bool
  25
  26     Return True if token is present in line and is the first element
  27     else returns False."""
  28
  29     return line[:len(token)] == token
  30
  31
  32 def is_nonempty_line(line):
  33     """ is_nonempty_line(line) -> bool
  34
  35     Return False if line is either empty or it has only whitespaces,
  36     else return True."""
  37     return line != " "*len(line)
  38
  39
  40 # Utilities for a list of lines
  41 def find_token(lines, token, start, end = 0, exact = False):
  42     """ find_token(lines, token, start[[, end], exact]) -> int
  43
  44     Return the lowest line where token is found, and is the first
  45     element, in lines[start, end].
  46
  47     Return -1 on failure."""
  48
  49     if end == 0 or end > len(lines):
  50         end = len(lines)
  51     m = len(token)
  52     for i in xrange(start, end):
  53         if exact:
  54             x = lines[i].split()
  55             y = token.split()
  56             if len(x) < len(y):
  57                 continue
  58             if x[:len(y)] == y:
  59                 return i
  60         else:
  61             if lines[i][:m] == token:
  62                 return i
  63     return -1
  64
  65
  66 def find_token_exact(lines, token, start, end = 0):
  67     return find_token(lines, token, start, end, True)
  68
  69
  70 def find_tokens(lines, tokens, start, end = 0, exact = False):
  71     """ find_tokens(lines, tokens, start[[, end], exact]) -> int
  72
  73     Return the lowest line where one token in tokens is found, and is
  74     the first element, in lines[start, end].
  75
  76     Return -1 on failure."""
  77     if end == 0:
  78         end = len(lines)
  79
  80     for i in xrange(start, end):
  81         for token in tokens:
  82             if exact:
  83                 x = lines[i].split()
  84                 y = token.split()
  85                 if len(x) < len(y):
  86                     continue
  87                 if x[:len(y)] == y:
  88                     return i
  89             else:
  90                 if lines[i][:len(token)] == token:
  91                     return i
  92     return -1
  93
  94
  95 def find_tokens_exact(lines, tokens, start, end = 0):
  96     return find_tokens(lines, tokens, start, end, True)
  97
  98
  99 def find_re(lines, rexp, start, end = 0):
 100     """ find_token_re(lines, rexp, start[, end]) -> int
 101
 102     Return the lowest line where rexp, a regular expression, is found
 103     in lines[start, end].
 104
 105     Return -1 on failure."""
 106
 107     if end == 0:
 108         end = len(lines)
 109     for i in xrange(start, end):
 110         if rexp.match(lines[i]):
 111                 return i
 112     return -1
 113
 114
 115 def find_token_backwards(lines, token, start):
 116     """ find_token_backwards(lines, token, start) -> int
 117
 118     Return the highest line where token is found, and is the first
 119     element, in lines[start, end].
 120
 121     Return -1 on failure."""
 122     m = len(token)
 123     for i in xrange(start, -1, -1):
 124         line = lines[i]
 125         if line[:m] == token:
 126             return i
 127     return -1
 128
 129
 130 def find_tokens_backwards(lines, tokens, start):
 131     """ find_tokens_backwards(lines, token, start) -> int
 132
 133     Return the highest line where token is found, and is the first
 134     element, in lines[end, start].
 135
 136     Return -1 on failure."""
 137     for i in xrange(start, -1, -1):
 138         line = lines[i]
 139         for token in tokens:
 140             if line[:len(token)] == token:
 141                 return i
 142     return -1
 143
 144
 145 def get_value(lines, token, start, end = 0, default = ""):
 146     """ get_value(lines, token, start[[, end], default]) -> string
 147
 148     Find the next line that looks like:
 149       token followed by other stuff
 150     Returns "followed by other stuff" with leading and trailing
 151     whitespace removed.
 152     """
 153
 154     i = find_token_exact(lines, token, start, end)
 155     if i == -1:
 156         return default
 157     l = lines[i].split(None, 1)
 158     if len(l) > 1:
 159         return l[1].strip()
 160     return default
 161
 162
 163 def get_quoted_value(lines, token, start, end = 0, default = ""):
 164     """ get_quoted_value(lines, token, start[[, end], default]) -> string
 165
 166     Find the next line that looks like:
 167       token "followed by other stuff"
 168     Returns "followed by other stuff" with leading and trailing
 169     whitespace and quotes removed. If there are no quotes, that is OK too.
 170     So use get_value to preserve possible quotes, this one to remove them,
 171     if they are there.
 172     Note that we will NOT strip quotes from default!
 173     """
 174     val = get_value(lines, token, start, end, "")
 175     if not val:
 176       return default
 177     return val.strip('"')
 178
 179
 180 def del_token(lines, token, start, end):
 181     """ del_token(lines, token, start, end) -> int
 182
 183     Find the lower line in lines where token is the first element and
 184     delete that line.
 185
 186     Returns the number of lines remaining."""
 187
 188     k = find_token_exact(lines, token, start, end)
 189     if k == -1:
 190         return end
 191     else:
 192         del lines[k]
 193         return end - 1
 194
 195
 196 def find_beginning_of(lines, i, start_token, end_token):
 197     count = 1
 198     while i > 0:
 199         i = find_tokens_backwards(lines, [start_token, end_token], i-1)
 200         if i == -1:
 201             return -1
 202         if check_token(lines[i], end_token):
 203             count = count+1
 204         else:
 205             count = count-1
 206         if count == 0:
 207             return i
 208     return -1
 209
 210
 211 def find_end_of(lines, i, start_token, end_token):
 212     count = 1
 213     n = len(lines)
 214     while i < n:
 215         i = find_tokens(lines, [end_token, start_token], i+1)
 216         if i == -1:
 217             return -1
 218         if check_token(lines[i], start_token):
 219             count = count+1
 220         else:
 221             count = count-1
 222         if count == 0:
 223             return i
 224     return -1
 225
 226
 227 def find_nonempty_line(lines, start, end = 0):
 228     if end == 0:
 229         end = len(lines)
 230     for i in xrange(start, end):
 231         if is_nonempty_line(lines[i]):
 232             return i
 233     return -1
 234
 235
 236 def find_end_of_inset(lines, i):
 237     " Find end of inset, where lines[i] is included."
 238     return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
 239
 240
 241 def find_end_of_layout(lines, i):
 242     " Find end of layout, where lines[i] is included."
 243     return find_end_of(lines, i, "\\begin_layout", "\\end_layout")
 244
 245
 246 # checks if line i is in the given inset
 247 # if so, returns starting and ending lines
 248 # otherwise, returns (-1, -1)
 249 # Example:
 250 #  get_containing_inset(document.body, i, "\\begin_inset Tabular")
 251 # returns (-1, -1) unless i is within a table.
 252 def get_containing_inset(lines, i, inset):
 253     defval = (-1, -1)
 254     stins = find_token_backwards(lines, inset, i)
 255     if stins == -1:
 256       return defval
 257     endins = find_end_of_inset(lines, stins)
 258     # note that this includes the notfound case.
 259     if endins < i:
 260       return defval
 261     return (stins, endins)