lib/lyx2lyx/parser_tools.py

   1 # This file is part of lyx2lyx
   2 # -*- coding: iso-8859-1 -*-
   3 # Copyright (C) 2002-2004 Dekel Tsur <dekel@lyx.org>, José Matos <jamatos@lyx.org>
   4 #
   5 # This program is free software; you can redistribute it and/or
   6 # modify it under the terms of the GNU General Public License
   7 # as published by the Free Software Foundation; either version 2
   8 # of the License, or (at your option) any later version.
   9 #
  10 # This program is distributed in the hope that it will be useful,
  11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 # GNU General Public License for more details.
  14 #
  15 # You should have received a copy of the GNU General Public License
  16 # along with this program; if not, write to the Free Software
  17 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  18
  19 import string
  20 import re
  21
  22 def check_token(line, token):
  23     if line[:len(token)] == token:
  24         return 1
  25     return 0
  26
  27
  28 # We need to check that the char after the token is space, but I think
  29 # we can ignore this
  30 def find_token(lines, token, start, end = 0):
  31     if end == 0:
  32         end = len(lines)
  33     m = len(token)
  34     for i in xrange(start, end):
  35         if lines[i][:m] == token:
  36             return i
  37     return -1
  38
  39
  40 def find_token_exact(lines, token, start, end = 0):
  41     if end == 0:
  42         end = len(lines)
  43     for i in xrange(start, end):
  44         x = string.split(lines[i])
  45         y = string.split(token)
  46         if len(x) < len(y):
  47             continue
  48         if x[:len(y)] == y:
  49             return i
  50     return -1
  51
  52
  53 def find_tokens(lines, tokens, start, end = 0):
  54     if end == 0:
  55         end = len(lines)
  56     for i in xrange(start, end):
  57         for token in tokens:
  58             if lines[i][:len(token)] == token:
  59                 return i
  60     return -1
  61
  62
  63 def find_tokens_exact(lines, tokens, start, end = 0):
  64     if end == 0:
  65         end = len(lines)
  66     for i in xrange(start, end):
  67         for token in tokens:
  68             x = string.split(lines[i])
  69             y = string.split(token)
  70             if len(x) < len(y):
  71                 continue
  72             if x[:len(y)] == y:
  73                 return i
  74     return -1
  75
  76
  77 def find_re(lines, rexp, start, end = 0):
  78     if end == 0:
  79         end = len(lines)
  80     for i in xrange(start, end):
  81         if rexp.match(lines[i]):
  82                 return i
  83     return -1
  84
  85
  86 def find_token_backwards(lines, token, start):
  87     m = len(token)
  88     for i in xrange(start, -1, -1):
  89         line = lines[i]
  90         if line[:m] == token:
  91             return i
  92     return -1
  93
  94
  95 def find_tokens_backwards(lines, tokens, start):
  96     for i in xrange(start, -1, -1):
  97         line = lines[i]
  98         for token in tokens:
  99             if line[:len(token)] == token:
 100                 return i
 101     return -1
 102
 103
 104 def get_value(lines, token, start, end = 0):
 105     i = find_token_exact(lines, token, start, end)
 106     if i == -1:
 107         return ""
 108     if len(string.split(lines[i])) > 1:
 109         return string.split(lines[i])[1]
 110     else:
 111         return ""
 112
 113
 114 def get_layout(line, default_layout):
 115     tokens = string.split(line)
 116     if len(tokens) > 1:
 117         return tokens[1]
 118     return default_layout
 119
 120
 121 def del_token(lines, token, i, j):
 122     k = find_token_exact(lines, token, i, j)
 123     if k == -1:
 124         return j
 125     else:
 126         del lines[k]
 127         return j-1
 128
 129
 130 # Finds the paragraph that contains line i.
 131 def get_paragraph(lines, i, format):
 132     if format < 225:
 133         begin_layout = "\\layout"
 134     else:
 135         begin_layout = "\\begin_layout"
 136     while i != -1:
 137         i = find_tokens_backwards(lines, ["\\end_inset", begin_layout], i)
 138         if i == -1: return -1
 139         if check_token(lines[i], begin_layout):
 140             return i
 141         i = find_beginning_of_inset(lines, i)
 142     return -1
 143
 144
 145 # Finds the paragraph after the paragraph that contains line i.
 146 def get_next_paragraph(lines, i, format):
 147     if format < 225:
 148         tokens = ["\\begin_inset", "\\layout", "\\end_float", "\\the_end"]
 149     elif format < 236:
 150         tokens = ["\\begin_inset", "\\begin_layout", "\\end_float", "\\end_document"]
 151     else:
 152         tokens = ["\\begin_inset", "\\begin_layout", "\\end_float", "\\end_body", "\\end_document"]
 153     while i != -1:
 154         i = find_tokens(lines, tokens, i)
 155         if not check_token(lines[i], "\\begin_inset"):
 156             return i
 157         i = find_end_of_inset(lines, i)
 158     return -1
 159
 160
 161 def find_end_of(lines, i, start_token, end_token):
 162     count = 1
 163     n = len(lines)
 164     while i < n:
 165         i = find_tokens(lines, [end_token, start_token], i+1)
 166         if check_token(lines[i], start_token):
 167             count = count+1
 168         else:
 169             count = count-1
 170         if count == 0:
 171             return i
 172     return -1
 173
 174
 175 # Finds the matching \end_inset
 176 def find_beginning_of(lines, i, start_token, end_token):
 177     count = 1
 178     while i > 0:
 179         i = find_tokens_backwards(lines, [start_token, end_token], i-1)
 180         if check_token(lines[i], end_token):
 181             count = count+1
 182         else:
 183             count = count-1
 184         if count == 0:
 185             return i
 186     return -1
 187
 188
 189 # Finds the matching \end_inset
 190 def find_end_of_inset(lines, i):
 191     return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
 192
 193
 194 # Finds the matching \end_inset
 195 def find_beginning_of_inset(lines, i):
 196     return find_beginning_of(lines, i, "\\begin_inset", "\\end_inset")
 197
 198
 199 def find_end_of_tabular(lines, i):
 200     return find_end_of(lines, i, "<lyxtabular", "</lyxtabular")
 201
 202
 203 def get_tabular_lines(lines, i):
 204     result = []
 205     i = i+1
 206     j = find_end_of_tabular(lines, i)
 207     if j == -1:
 208         return []
 209
 210     while i <= j:
 211         if check_token(lines[i], "\\begin_inset"):
 212             i = find_end_of_inset(lines, i)+1
 213         else:
 214             result.append(i)
 215             i = i+1
 216     return result
 217
 218
 219 def is_nonempty_line(line):
 220     return line != " "*len(line)
 221
 222
 223 def find_nonempty_line(lines, start, end = 0):
 224     if end == 0:
 225         end = len(lines)
 226     for i in xrange(start, end):
 227         if is_nonempty_line(lines[i]):
 228             return i
 229     return -1