lib/lyx2lyx/parser_tools.py

   1 # This file is part of lyx2lyx
   2 # -*- coding: utf-8 -*-
   3 # Copyright (C) 2002-2010 Dekel Tsur <dekel@lyx.org>,
   4 # José Matos <jamatos@lyx.org>, Richard Heck <rgheck@comcast.net>
   5 #
   6 # This program is free software; you can redistribute it and/or
   7 # modify it under the terms of the GNU General Public License
   8 # as published by the Free Software Foundation; either version 2
   9 # of the License, or (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful,
  12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 # GNU General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  19
  20 " This modules offer several free functions to help parse lines. "
  21
  22 # Utilities for one line
  23 def check_token(line, token):
  24     """ check_token(line, token) -> bool
  25
  26     Return True if token is present in line and is the first element
  27     else returns False."""
  28
  29     return line[:len(token)] == token
  30
  31
  32 def is_nonempty_line(line):
  33     """ is_nonempty_line(line) -> bool
  34
  35     Return False if line is either empty or it has only whitespaces,
  36     else return True."""
  37     return line != " "*len(line)
  38
  39
  40 # Utilities for a list of lines
  41 def find_token(lines, token, start, end = 0, exact = False):
  42     """ find_token(lines, token, start[[, end], exact]) -> int
  43
  44     Return the lowest line where token is found, and is the first
  45     element, in lines[start, end].
  46
  47     Return -1 on failure."""
  48
  49     if end == 0 or end > len(lines):
  50         end = len(lines)
  51     m = len(token)
  52     for i in xrange(start, end):
  53         if exact:
  54             x = lines[i].split()
  55             y = token.split()
  56             if len(x) < len(y):
  57                 continue
  58             if x[:len(y)] == y:
  59                 return i
  60         else:
  61             if lines[i][:m] == token:
  62                 return i
  63     return -1
  64
  65
  66 def find_token_exact(lines, token, start, end = 0):
  67     return find_token(lines, token, start, end, True)
  68
  69
  70 def find_tokens(lines, tokens, start, end = 0, exact = False):
  71     """ find_tokens(lines, tokens, start[[, end], exact]) -> int
  72
  73     Return the lowest line where one token in tokens is found, and is
  74     the first element, in lines[start, end].
  75
  76     Return -1 on failure."""
  77     if end == 0:
  78         end = len(lines)
  79
  80     for i in xrange(start, end):
  81         for token in tokens:
  82             if exact:
  83                 x = lines[i].split()
  84                 y = token.split()
  85                 if len(x) < len(y):
  86                     continue
  87                 if x[:len(y)] == y:
  88                     return i
  89             else:
  90                 if lines[i][:len(token)] == token:
  91                     return i
  92     return -1
  93
  94
  95 def find_tokens_exact(lines, tokens, start, end = 0):
  96     return find_tokens(lines, tokens, start, end, True)
  97
  98
  99 def find_re(lines, rexp, start, end = 0):
 100     """ find_token_re(lines, rexp, start[, end]) -> int
 101
 102     Return the lowest line where rexp, a regular expression, is found
 103     in lines[start, end].
 104
 105     Return -1 on failure."""
 106
 107     if end == 0:
 108         end = len(lines)
 109     for i in xrange(start, end):
 110         if rexp.match(lines[i]):
 111                 return i
 112     return -1
 113
 114
 115 def find_token_backwards(lines, token, start):
 116     """ find_token_backwards(lines, token, start) -> int
 117
 118     Return the highest line where token is found, and is the first
 119     element, in lines[start, end].
 120
 121     Return -1 on failure."""
 122     m = len(token)
 123     for i in xrange(start, -1, -1):
 124         line = lines[i]
 125         if line[:m] == token:
 126             return i
 127     return -1
 128
 129
 130 def find_tokens_backwards(lines, tokens, start):
 131     """ find_tokens_backwards(lines, token, start) -> int
 132
 133     Return the highest line where token is found, and is the first
 134     element, in lines[end, start].
 135
 136     Return -1 on failure."""
 137     for i in xrange(start, -1, -1):
 138         line = lines[i]
 139         for token in tokens:
 140             if line[:len(token)] == token:
 141                 return i
 142     return -1
 143
 144
 145 def get_value(lines, token, start, end = 0, default = ""):
 146     """ get_value(lines, token, start[[, end], default]) -> string
 147
 148     Find the next line that looks like:
 149       token followed by other stuff
 150     Returns "followed by other stuff" with leading and trailing
 151     whitespace removed.
 152     """
 153
 154     i = find_token_exact(lines, token, start, end)
 155     if i == -1:
 156         return default
 157     l = lines[i].split(None, 1)
 158     if len(l) > 1:
 159         return l[1].strip()
 160     return default
 161
 162
 163 def del_token(lines, token, start, end):
 164     """ del_token(lines, token, start, end) -> int
 165
 166     Find the lower line in lines where token is the first element and
 167     delete that line.
 168
 169     Returns the number of lines remaining."""
 170
 171     k = find_token_exact(lines, token, start, end)
 172     if k == -1:
 173         return end
 174     else:
 175         del lines[k]
 176         return end - 1
 177
 178
 179 def find_beginning_of(lines, i, start_token, end_token):
 180     count = 1
 181     while i > 0:
 182         i = find_tokens_backwards(lines, [start_token, end_token], i-1)
 183         if i == -1:
 184             return -1
 185         if check_token(lines[i], end_token):
 186             count = count+1
 187         else:
 188             count = count-1
 189         if count == 0:
 190             return i
 191     return -1
 192
 193
 194 def find_end_of(lines, i, start_token, end_token):
 195     count = 1
 196     n = len(lines)
 197     while i < n:
 198         i = find_tokens(lines, [end_token, start_token], i+1)
 199         if i == -1:
 200             return -1
 201         if check_token(lines[i], start_token):
 202             count = count+1
 203         else:
 204             count = count-1
 205         if count == 0:
 206             return i
 207     return -1
 208
 209
 210 def find_nonempty_line(lines, start, end = 0):
 211     if end == 0:
 212         end = len(lines)
 213     for i in xrange(start, end):
 214         if is_nonempty_line(lines[i]):
 215             return i
 216     return -1
 217
 218
 219 def find_end_of_inset(lines, i):
 220     " Find end of inset, where lines[i] is included."
 221     return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
 222
 223
 224 def find_end_of_layout(lines, i):
 225     " Find end of layout, where lines[i] is included."
 226     return find_end_of(lines, i, "\\begin_layout", "\\end_layout")
 227
 228
 229 # checks if line i is in the given inset
 230 # if so, returns starting and ending lines
 231 # otherwise, returns (-1, -1)
 232 # Example:
 233 #  get_containing_inset(document.body, i, "\\begin_inset Tabular")
 234 # returns (-1, -1) unless i is within a table.
 235 def get_containing_inset(lines, i, inset):
 236     defval = (-1, -1)
 237     stins = find_token_backwards(lines, inset, i)
 238     if stins == -1:
 239       return defval
 240     endins = find_end_of_inset(lines, stins)
 241     # note that this includes the notfound case.
 242     if endins < i:
 243       return defval
 244     return (stins, endins)