lib/lyx2lyx/parser_tools.py

   1 # This file is part of lyx2lyx
   2 # -*- coding: iso-8859-1 -*-
   3 # Copyright (C) 2002-2004 Dekel Tsur <dekel@lyx.org>, José Matos <jamatos@lyx.org>
   4 #
   5 # This program is free software; you can redistribute it and/or
   6 # modify it under the terms of the GNU General Public License
   7 # as published by the Free Software Foundation; either version 2
   8 # of the License, or (at your option) any later version.
   9 #
  10 # This program is distributed in the hope that it will be useful,
  11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 # GNU General Public License for more details.
  14 #
  15 # You should have received a copy of the GNU General Public License
  16 # along with this program; if not, write to the Free Software
  17 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  18
  19 import string
  20 import re
  21
  22
  23 def check_token(line, token):
  24     if line[:len(token)] == token:
  25         return 1
  26     return 0
  27
  28
  29 # We need to check that the char after the token is space, but I think
  30 # we can ignore this
  31 def find_token(lines, token, start, end = 0):
  32     if end == 0:
  33         end = len(lines)
  34     m = len(token)
  35     for i in xrange(start, end):
  36         if lines[i][:m] == token:
  37             return i
  38     return -1
  39
  40
  41 def find_token2(lines, token, start, end = 0):
  42     if end == 0:
  43         end = len(lines)
  44     for i in xrange(start, end):
  45         x = string.split(lines[i])
  46         if len(x) > 0 and x[0] == token:
  47             return i
  48     return -1
  49
  50
  51 def find_tokens(lines, tokens, start, end = 0):
  52     if end == 0:
  53         end = len(lines)
  54     for i in xrange(start, end):
  55         line = lines[i]
  56         for token in tokens:
  57             if line[:len(token)] == token:
  58                 return i
  59     return -1
  60
  61
  62 def find_re(lines, rexp, start, end = 0):
  63     if end == 0:
  64         end = len(lines)
  65     for i in xrange(start, end):
  66         if rexp.match(lines[i]):
  67                 return i
  68     return -1
  69
  70
  71 def find_token_backwards(lines, token, start):
  72     m = len(token)
  73     for i in xrange(start, -1, -1):
  74         line = lines[i]
  75         if line[:m] == token:
  76             return i
  77     return -1
  78
  79
  80 def find_tokens_backwards(lines, tokens, start):
  81     for i in xrange(start, -1, -1):
  82         line = lines[i]
  83         for token in tokens:
  84             if line[:len(token)] == token:
  85                 return i
  86     return -1
  87
  88
  89 def get_value(lines, token, start, end = 0):
  90     i = find_token2(lines, token, start, end)
  91     if i == -1:
  92         return ""
  93     if len(string.split(lines[i])) > 1:
  94         return string.split(lines[i])[1]
  95     else:
  96         return ""
  97
  98
  99 def del_token(lines, token, i, j):
 100     k = find_token2(lines, token, i, j)
 101     if k == -1:
 102         return j
 103     else:
 104         del lines[k]
 105         return j-1
 106
 107
 108 # Finds the paragraph that contains line i.
 109 def get_paragraph(lines, i):
 110     while i != -1:
 111         i = find_tokens_backwards(lines, ["\\end_inset", "\\layout"], i)
 112         if i == -1: return -1
 113         if check_token(lines[i], "\\layout"):
 114             return i
 115         i = find_beginning_of_inset(lines, i)
 116     return -1
 117
 118
 119 # Finds the paragraph after the paragraph that contains line i.
 120 def get_next_paragraph(lines, i):
 121     while i != -1:
 122         i = find_tokens(lines, ["\\begin_inset", "\\layout", "\\end_float", "\\the_end"], i)
 123         if not check_token(lines[i], "\\begin_inset"):
 124             return i
 125         i = find_end_of_inset(lines, i)
 126     return -1
 127
 128
 129 def find_end_of(lines, i, start_token, end_token):
 130     count = 1
 131     n = len(lines)
 132     while i < n:
 133         i = find_tokens(lines, [end_token, start_token], i+1)
 134         if check_token(lines[i], start_token):
 135             count = count+1
 136         else:
 137             count = count-1
 138         if count == 0:
 139             return i
 140     return -1
 141
 142
 143 # Finds the matching \end_inset
 144 def find_beginning_of(lines, i, start_token, end_token):
 145     count = 1
 146     while i > 0:
 147         i = find_tokens_backwards(lines, [start_token, end_token], i-1)
 148         if check_token(lines[i], end_token):
 149             count = count+1
 150         else:
 151             count = count-1
 152         if count == 0:
 153             return i
 154     return -1
 155
 156
 157 # Finds the matching \end_inset
 158 def find_end_of_inset(lines, i):
 159     return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
 160
 161
 162 # Finds the matching \end_inset
 163 def find_beginning_of_inset(lines, i):
 164     return find_beginning_of(lines, i, "\\begin_inset", "\\end_inset")
 165
 166
 167 def find_end_of_tabular(lines, i):
 168     return find_end_of(lines, i, "<lyxtabular", "</lyxtabular")
 169
 170
 171 def get_tabular_lines(lines, i):
 172     result = []
 173     i = i+1
 174     j = find_end_of_tabular(lines, i)
 175     if j == -1:
 176         return []
 177
 178     while i <= j:
 179         if check_token(lines[i], "\\begin_inset"):
 180             i = find_end_of_inset(lines, i)+1
 181         else:
 182             result.append(i)
 183             i = i+1
 184     return result
 185
 186
 187 def is_nonempty_line(line):
 188     return line != " "*len(line)
 189
 190
 191 def find_nonempty_line(lines, start, end = 0):
 192     if end == 0:
 193         end = len(lines)
 194     for i in xrange(start, end):
 195         if is_nonempty_line(lines[i]):
 196             return i
 197     return -1
 198
 199
 200 ##
 201 # Tools for file reading
 202 #
 203 def read_file(header, body, opt):
 204     """Reads a file into the header and body parts"""
 205     preamble = 0
 206
 207     while 1:
 208         line = opt.input.readline()
 209         if not line:
 210             opt.error("Invalid LyX file.")
 211
 212         line = line[:-1]
 213         # remove '\r' from line's end, if present
 214         if line[-1:] == '\r':
 215             line = line[:-1]
 216
 217         if check_token(line, '\\begin_preamble'):
 218             preamble = 1
 219         if check_token(line, '\\end_preamble'):
 220             preamble = 0
 221
 222         if not preamble:
 223             line = string.strip(line)
 224
 225         if not line and not preamble:
 226             break
 227
 228         header.append(line)
 229
 230     while 1:
 231         line = opt.input.readline()
 232         if not line:
 233             break
 234         # remove '\r' from line's end, if present
 235         if line[-2:-1] == '\r':
 236             body.append(line[:-2])
 237         else:
 238             body.append(line[:-1])
 239
 240
 241 def write_file(header, body, opt):
 242     for line in header:
 243         opt.output.write(line+"\n")
 244     opt.output.write("\n")
 245     for line in body:
 246         opt.output.write(line+"\n")
 247
 248
 249 ##
 250 # lyx version
 251 #
 252 original_version = re.compile(r"\#LyX (\S*)")
 253
 254 def read_version(header):
 255     for line in header:
 256         if line[0] != "#":
 257             return None
 258
 259         result = original_version.match(line)
 260         if result:
 261             return result.group(1)
 262     return None
 263
 264
 265 def set_version(lines, version):
 266     lines[0] = "#LyX %s created this file. For more info see http://www.lyx.org/" % version
 267     if lines[1][0] == '#':
 268         del lines[1]
 269
 270 ##
 271 # file format version
 272 #
 273 format_re = re.compile(r"(\d)[\.,]?(\d\d)")
 274 fileformat = re.compile(r"\\lyxformat\s*(\S*)")
 275 lst_ft = [210, 215, 216, 217, 218, 220, 221, 223, 224, 225, 226, 227, 228, 229,
 276           230, 231, 232, 233, 234, 235]
 277
 278 format_relation = [("0_10",  [210], ["0.10.7","0.10"]),
 279                    ("0_12",  [215], ["0.12","0.12.1","0.12"]),
 280                    ("1_0_0", [215], ["1.0.0","1.0"]),
 281                    ("1_0_1", [215], ["1.0.1","1.0.2","1.0.3","1.0.4", "1.1.2","1.1"]),
 282                    ("1_1_4", [215], ["1.1.4","1.1"]),
 283                    ("1_1_5", [216], ["1.1.5","1.1.5fix1","1.1.5fix2","1.1"]),
 284                    ("1_1_6", [217], ["1.1.6","1.1.6fix1","1.1.6fix2","1.1"]),
 285                    ("1_1_6fix3", [218], ["1.1.6fix3","1.1.6fix4","1.1"]),
 286                    ("1_2", [220], ["1.2.0","1.2.1","1.2.3","1.2.4","1.2"]),
 287                    ("1_3", [221], ["1.3.0","1.3.1","1.3.2","1.3.3","1.3.4","1.3"]),
 288                    ("1_4", [223,224,225,226,227,228,229,230,231,232,233,234,235], ["1.4.0cvs","1.4"])]
 289
 290
 291 def lyxformat(format, opt):
 292     result = format_re.match(format)
 293     if result:
 294         format = int(result.group(1) + result.group(2))
 295     else:
 296         opt.error(str(format) + ": " + "Invalid LyX file.")
 297
 298     if format in lst_ft:
 299         return format
 300
 301     opt.error(str(format) + ": " + "Format not supported.")
 302     return None
 303
 304
 305 def read_format(header, opt):
 306     for line in header:
 307         result = fileformat.match(line)
 308         if result:
 309             return lyxformat(result.group(1), opt)
 310     else:
 311         opt.error("Invalid LyX File.")
 312     return None
 313
 314
 315 def set_format(lines, number):
 316     if int(number) <= 217:
 317         number = float(number)/100
 318     i = find_token(lines, "\\lyxformat", 0)
 319     lines[i] = "\\lyxformat %s" % number
 320
 321
 322 def get_end_format():
 323     return format_relation[-1:][0][1][-1:][0]
 324
 325
 326 def get_backend(textclass):
 327     if textclass == "linuxdoc" or textclass == "manpage":
 328         return "linuxdoc"
 329     if textclass[:7] == "docbook":
 330         return "docbook"
 331     return "latex"
 332
 333
 334 def chain(opt, initial_version):
 335     """ This is where all the decisions related with the convertion are taken"""
 336
 337     format = opt.format
 338     if opt.start:
 339         if opt.start != format:
 340             opt.warning("%s: %s %s" % ("Proposed file format and input file formats do not match:", opt.start, format))
 341     else:
 342         opt.start = format
 343
 344     if not opt.end:
 345         opt.end = get_end_format()
 346
 347     correct_version = 0
 348
 349     for rel in format_relation:
 350         if initial_version in rel[2]:
 351             if format in rel[1]:
 352                 initial_step = rel[0]
 353                 correct_version = 1
 354                 break
 355
 356     if not correct_version:
 357         if format <= 215:
 358             opt.warning("Version does not match file format, discarding it.")
 359         for rel in format_relation:
 360             if format in rel[1]:
 361                 initial_step = rel[0]
 362                 break
 363         else:
 364             # This should not happen, really.
 365             opt.error("Format not supported.")
 366
 367     # Find the final step
 368     for rel in format_relation:
 369         if opt.end in rel[1]:
 370             final_step = rel[0]
 371             break
 372     else:
 373         opt.error("Format not supported.")
 374
 375     # Convertion mode, back or forth
 376     steps = []
 377     if (initial_step, opt.start) < (final_step, opt.end):
 378         mode = "convert"
 379         first_step = 1
 380         for step in format_relation:
 381             if  initial_step <= step[0] <= final_step:
 382                 if first_step and len(step[1]) == 1:
 383                     first_step = 0
 384                     continue
 385                 steps.append(step[0])
 386     else:
 387         mode = "revert"
 388         relation_format = format_relation
 389         relation_format.reverse()
 390         last_step = None
 391
 392         for step in relation_format:
 393             if  final_step <= step[0] <= initial_step:
 394                 steps.append(step[0])
 395                 last_step = step
 396
 397         if last_step[1][-1] == opt.end:
 398             steps.pop()
 399
 400     return mode, steps