lib/lyx2lyx/parser_tools.py

   1 # This file is part of lyx2lyx
   2 # -*- coding: utf-8 -*-
   3 # Copyright (C) 2002-2010 Dekel Tsur <dekel@lyx.org>,
   4 # José Matos <jamatos@lyx.org>, Richard Heck <rgheck@comcast.net>
   5 #
   6 # This program is free software; you can redistribute it and/or
   7 # modify it under the terms of the GNU General Public License
   8 # as published by the Free Software Foundation; either version 2
   9 # of the License, or (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful,
  12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 # GNU General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  19
  20 " This modules offer several free functions to help parse lines. "
  21
  22 # Utilities for one line
  23 def check_token(line, token):
  24     """ check_token(line, token) -> bool
  25
  26     Return True if token is present in line and is the first element
  27     else returns False."""
  28
  29     return line[:len(token)] == token
  30
  31
  32 def is_nonempty_line(line):
  33     """ is_nonempty_line(line) -> bool
  34
  35     Return False if line is either empty or it has only whitespaces,
  36     else return True."""
  37     return line != " "*len(line)
  38
  39
  40 # Utilities for a list of lines
  41 def find_token(lines, token, start, end = 0, exact = False):
  42     """ find_token(lines, token, start[[, end], exact]) -> int
  43
  44     Return the lowest line where token is found, and is the first
  45     element, in lines[start, end].
  46
  47     Return -1 on failure."""
  48
  49     if end == 0:
  50         end = len(lines)
  51     m = len(token)
  52     for i in xrange(start, end):
  53         if exact:
  54             x = lines[i].split()
  55             y = token.split()
  56             if len(x) < len(y):
  57                 continue
  58             if x[:len(y)] == y:
  59                 return i
  60         else:
  61             if lines[i][:m] == token:
  62                 return i
  63     return -1
  64
  65
  66 def find_token_exact(lines, token, start, end = 0):
  67     return find_token(lines, token, start, end, True)
  68
  69
  70 def find_tokens(lines, tokens, start, end = 0, exact = False):
  71     """ find_tokens(lines, tokens, start[[, end], exact]) -> int
  72
  73     Return the lowest line where one token in tokens is found, and is
  74     the first element, in lines[start, end].
  75
  76     Return -1 on failure."""
  77     if end == 0:
  78         end = len(lines)
  79
  80     for i in xrange(start, end):
  81         for token in tokens:
  82             if exact:
  83                 x = lines[i].split()
  84                 y = token.split()
  85                 if len(x) < len(y):
  86                     continue
  87                 if x[:len(y)] == y:
  88                     return i
  89             else:
  90                 if lines[i][:len(token)] == token:
  91                     return i
  92     return -1
  93
  94
  95 def find_tokens_exact(lines, tokens, start, end = 0):
  96     return find_tokens(lines, tokens, start, end, True)
  97
  98
  99 def find_re(lines, rexp, start, end = 0):
 100     """ find_token_re(lines, rexp, start[, end]) -> int
 101
 102     Return the lowest line where rexp, a regular expression, is found
 103     in lines[start, end].
 104
 105     Return -1 on failure."""
 106
 107     if end == 0:
 108         end = len(lines)
 109     for i in xrange(start, end):
 110         if rexp.match(lines[i]):
 111                 return i
 112     return -1
 113
 114
 115 def find_token_backwards(lines, token, start):
 116     """ find_token_backwards(lines, token, start) -> int
 117
 118     Return the highest line where token is found, and is the first
 119     element, in lines[start, end].
 120
 121     Return -1 on failure."""
 122     m = len(token)
 123     for i in xrange(start, -1, -1):
 124         line = lines[i]
 125         if line[:m] == token:
 126             return i
 127     return -1
 128
 129
 130 def find_tokens_backwards(lines, tokens, start):
 131     """ find_tokens_backwards(lines, token, start) -> int
 132
 133     Return the highest line where token is found, and is the first
 134     element, in lines[end, start].
 135
 136     Return -1 on failure."""
 137     for i in xrange(start, -1, -1):
 138         line = lines[i]
 139         for token in tokens:
 140             if line[:len(token)] == token:
 141                 return i
 142     return -1
 143
 144
 145 def get_value(lines, token, start, end = 0, default = ""):
 146     """ get_value(lines, token, start[[, end], default]) -> list of strings
 147
 148     Return tokens after token for the first line, in lines, where
 149     token is the first element."""
 150
 151     i = find_token_exact(lines, token, start, end)
 152     if i == -1:
 153         return default
 154     if len(lines[i].split()) > 1:
 155         return lines[i].split()[1]
 156     else:
 157         return default
 158
 159
 160 def get_value_string(lines, token, start, end = 0, trim = False, default = ""):
 161     """ get_value_string(lines, token, start[[, end], trim, default]) -> string
 162
 163     Return tokens after token as string, in lines, where
 164     token is the first element. When trim is used, the first and last character
 165     of the string is trimmed."""
 166
 167     i = find_token_exact(lines, token, start, end)
 168     if i == -1:
 169         return default
 170     if len(lines[i].split()) > 1:
 171         for k in range (0, len(lines[i])):
 172             if lines[i][k] == ' ':
 173                 if trim ==False:
 174                     return lines[i][k+1:len(lines[i])]
 175                 else:
 176                     return lines[i][k+2:len(lines[i])-1]
 177     else:
 178         return default
 179
 180
 181 def del_token(lines, token, start, end):
 182     """ del_token(lines, token, start, end) -> int
 183
 184     Find the lower line in lines where token is the first element and
 185     delete that line.
 186
 187     Returns the number of lines remaining."""
 188
 189     k = find_token_exact(lines, token, start, end)
 190     if k == -1:
 191         return end
 192     else:
 193         del lines[k]
 194         return end - 1
 195
 196
 197 def find_beginning_of(lines, i, start_token, end_token):
 198     count = 1
 199     while i > 0:
 200         i = find_tokens_backwards(lines, [start_token, end_token], i-1)
 201         if i == -1:
 202             return -1
 203         if check_token(lines[i], end_token):
 204             count = count+1
 205         else:
 206             count = count-1
 207         if count == 0:
 208             return i
 209     return -1
 210
 211
 212 def find_end_of(lines, i, start_token, end_token):
 213     count = 1
 214     n = len(lines)
 215     while i < n:
 216         i = find_tokens(lines, [end_token, start_token], i+1)
 217         if i == -1:
 218             return -1
 219         if check_token(lines[i], start_token):
 220             count = count+1
 221         else:
 222             count = count-1
 223         if count == 0:
 224             return i
 225     return -1
 226
 227
 228 def find_nonempty_line(lines, start, end = 0):
 229     if end == 0:
 230         end = len(lines)
 231     for i in xrange(start, end):
 232         if is_nonempty_line(lines[i]):
 233             return i
 234     return -1
 235
 236
 237 def find_end_of_inset(lines, i):
 238     " Find end of inset, where lines[i] is included."
 239     return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
 240
 241
 242 def find_end_of_layout(lines, i):
 243     " Find end of layout, where lines[i] is included."
 244     return find_end_of(lines, i, "\\begin_layout", "\\end_layout")
 245
 246
 247 # checks if line i is in the inset e.g., "\\begin_inset CommandInset ref"
 248 # if so, returns starting and ending lines
 249 # otherwise, returns (-1, -1)
 250 def get_containing_inset(lines, i, inset):
 251     defval = (-1, -1)
 252     stins = find_token_backwards(lines, inset, i)
 253     if stins == -1:
 254       return defval
 255     endins = find_end_of_inset(lines, stins)
 256     # note that this includes the notfound case.
 257     if endins < i:
 258       return defval
 259     return (stins, endins)