lib/lyx2lyx/parser_tools.py

   1 # This file is part of lyx2lyx
   2 # -*- coding: iso-8859-1 -*-
   3 # Copyright (C) 2002-2004 Dekel Tsur <dekel@lyx.org>, José Matos <jamatos@lyx.org>
   4 #
   5 # This program is free software; you can redistribute it and/or
   6 # modify it under the terms of the GNU General Public License
   7 # as published by the Free Software Foundation; either version 2
   8 # of the License, or (at your option) any later version.
   9 #
  10 # This program is distributed in the hope that it will be useful,
  11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 # GNU General Public License for more details.
  14 #
  15 # You should have received a copy of the GNU General Public License
  16 # along with this program; if not, write to the Free Software
  17 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  18
  19 import string
  20 import re
  21
  22 def check_token(line, token):
  23     if line[:len(token)] == token:
  24         return 1
  25     return 0
  26
  27 # We need to check that the char after the token is space, but I think
  28 # we can ignore this
  29 def find_token(lines, token, start, end = 0):
  30     if end == 0:
  31         end = len(lines)
  32     m = len(token)
  33     for i in xrange(start, end):
  34         if lines[i][:m] == token:
  35             return i
  36     return -1
  37
  38 def find_token2(lines, token, start, end = 0):
  39     if end == 0:
  40         end = len(lines)
  41     for i in xrange(start, end):
  42         x = string.split(lines[i])
  43         if len(x) > 0 and x[0] == token:
  44             return i
  45     return -1
  46
  47 def find_tokens(lines, tokens, start, end = 0):
  48     if end == 0:
  49         end = len(lines)
  50     for i in xrange(start, end):
  51         line = lines[i]
  52         for token in tokens:
  53             if line[:len(token)] == token:
  54                 return i
  55     return -1
  56
  57 def find_re(lines, rexp, start, end = 0):
  58     if end == 0:
  59         end = len(lines)
  60     for i in xrange(start, end):
  61         if rexp.match(lines[i]):
  62                 return i
  63     return -1
  64
  65 def find_token_backwards(lines, token, start):
  66     m = len(token)
  67     for i in xrange(start, -1, -1):
  68         line = lines[i]
  69         if line[:m] == token:
  70             return i
  71     return -1
  72
  73 def find_tokens_backwards(lines, tokens, start):
  74     for i in xrange(start, -1, -1):
  75         line = lines[i]
  76         for token in tokens:
  77             if line[:len(token)] == token:
  78                 return i
  79     return -1
  80
  81 def get_value(lines, token, start, end = 0):
  82     i = find_token2(lines, token, start, end)
  83     if i == -1:
  84         return ""
  85     if len(string.split(lines[i])) > 1:
  86         return string.split(lines[i])[1]
  87     else:
  88         return ""
  89
  90 def del_token(lines, token, i, j):
  91     k = find_token2(lines, token, i, j)
  92     if k == -1:
  93         return j
  94     else:
  95         del lines[k]
  96         return j-1
  97
  98 # Finds the paragraph that contains line i.
  99 def get_paragraph(lines, i):
 100     while i != -1:
 101         i = find_tokens_backwards(lines, ["\\end_inset", "\\layout"], i)
 102         if i == -1: return -1
 103         if check_token(lines[i], "\\layout"):
 104             return i
 105         i = find_beginning_of_inset(lines, i)
 106     return -1
 107
 108 # Finds the paragraph after the paragraph that contains line i.
 109 def get_next_paragraph(lines, i):
 110     while i != -1:
 111         i = find_tokens(lines, ["\\begin_inset", "\\layout", "\\end_float", "\\the_end"], i)
 112         if not check_token(lines[i], "\\begin_inset"):
 113             return i
 114         i = find_end_of_inset(lines, i)
 115     return -1
 116
 117 def find_end_of(lines, i, start_token, end_token):
 118     count = 1
 119     n = len(lines)
 120     while i < n:
 121         i = find_tokens(lines, [end_token, start_token], i+1)
 122         if check_token(lines[i], start_token):
 123             count = count+1
 124         else:
 125             count = count-1
 126         if count == 0:
 127             return i
 128     return -1
 129
 130 # Finds the matching \end_inset
 131 def find_beginning_of(lines, i, start_token, end_token):
 132     count = 1
 133     while i > 0:
 134         i = find_tokens_backwards(lines, [start_token, end_token], i-1)
 135         if check_token(lines[i], end_token):
 136             count = count+1
 137         else:
 138             count = count-1
 139         if count == 0:
 140             return i
 141     return -1
 142
 143 # Finds the matching \end_inset
 144 def find_end_of_inset(lines, i):
 145     return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
 146
 147 # Finds the matching \end_inset
 148 def find_beginning_of_inset(lines, i):
 149     return find_beginning_of(lines, i, "\\begin_inset", "\\end_inset")
 150
 151 def find_end_of_tabular(lines, i):
 152     return find_end_of(lines, i, "<lyxtabular", "</lyxtabular")
 153
 154 def get_tabular_lines(lines, i):
 155     result = []
 156     i = i+1
 157     j = find_end_of_tabular(lines, i)
 158     if j == -1:
 159         return []
 160
 161     while i <= j:
 162         if check_token(lines[i], "\\begin_inset"):
 163             i = find_end_of_inset(lines, i)+1
 164         else:
 165             result.append(i)
 166             i = i+1
 167     return result
 168
 169 def is_nonempty_line(line):
 170     return line != " "*len(line)
 171
 172 def find_nonempty_line(lines, start, end = 0):
 173     if end == 0:
 174         end = len(lines)
 175     for i in xrange(start, end):
 176         if is_nonempty_line(lines[i]):
 177             return i
 178     return -1
 179
 180 ##
 181 # Tools for file reading
 182 #
 183 def read_file(header, body, opt):
 184     """Reads a file into the header and body parts"""
 185     preamble = 0
 186
 187     while 1:
 188         line = opt.input.readline()
 189         if not line:
 190             opt.error("Invalid LyX file.")
 191
 192         line = line[:-1]
 193         if check_token(line, '\\begin_preamble'):
 194             preamble = 1
 195         if check_token(line, '\\end_preamble'):
 196             preamble = 0
 197
 198         if not preamble:
 199             line = string.strip(line)
 200
 201         if not line and not preamble:
 202             break
 203
 204         header.append(line)
 205
 206     while 1:
 207         line = opt.input.readline()
 208         if not line:
 209             break
 210         body.append(line[:-1])
 211
 212 def write_file(header, body, opt):
 213     for line in header:
 214         opt.output.write(line+"\n")
 215     opt.output.write("\n")
 216     for line in body:
 217         opt.output.write(line+"\n")
 218
 219 ##
 220 # lyx version
 221 #
 222 original_version = re.compile(r"\#LyX (\S*)")
 223
 224 def read_version(header):
 225     for line in header:
 226         if line[0] != "#":
 227             return None
 228
 229         result = original_version.match(line)
 230         if result:
 231             return result.group(1)
 232     return None
 233
 234 def set_version(lines, version):
 235     lines[0] = "#LyX %s created this file. For more info see http://www.lyx.org/" % version
 236     if lines[1][0] == '#':
 237         del lines[1]
 238
 239 ##
 240 # file format version
 241 #
 242 format_re = re.compile(r"(\d)[\.,]?(\d\d)")
 243 fileformat = re.compile(r"\\lyxformat\s*(\S*)")
 244 lst_ft = [210, 215, 216, 217, 218, 220, 221, 223, 224, 225, 226, 227, 228, 229,
 245           230, 231, 232]
 246
 247 format_relation = [("0_10",  [210], ["0.10.7","0.10"]),
 248                    ("0_12",  [215], ["0.12","0.12.1","0.12"]),
 249                    ("1_0_0", [215], ["1.0.0","1.0"]),
 250                    ("1_0_1", [215], ["1.0.1","1.0.2","1.0.3","1.0.4", "1.1.2","1.1"]),
 251                    ("1_1_4", [215], ["1.1.4","1.1"]),
 252                    ("1_1_5", [216], ["1.1.5","1.1.5fix1","1.1.5fix2","1.1"]),
 253                    ("1_1_6", [217], ["1.1.6","1.1.6fix1","1.1.6fix2","1.1"]),
 254                    ("1_1_6fix3", [218], ["1.1.6fix3","1.1.6fix4","1.1"]),
 255                    ("1_2", [220], ["1.2.0","1.2.1","1.2.3","1.2.4","1.2"]),
 256                    ("1_3", [221], ["1.3.0","1.3.1","1.3.2","1.3.3","1.3.4","1.3"]),
 257                    ("1_4", [223,224,225,226,227,228,229,230,231,232], ["1.4.0cvs","1.4"])]
 258
 259 def lyxformat(format, opt):
 260     result = format_re.match(format)
 261     if result:
 262         format = int(result.group(1) + result.group(2))
 263     else:
 264         opt.error(str(format) + ": " + "Invalid LyX file.")
 265
 266     if format in lst_ft:
 267         return format
 268
 269     opt.error(str(format) + ": " + "Format no supported.")
 270     return None
 271
 272 def read_format(header, opt):
 273     for line in header:
 274         result = fileformat.match(line)
 275         if result:
 276             return lyxformat(result.group(1), opt)
 277     else:
 278         opt.error("Invalid LyX File.")
 279     return None
 280
 281 def set_format(lines, number):
 282     if int(number) <= 217:
 283         number = float(number)/100
 284     i = find_token(lines, "\\lyxformat", 0)
 285     lines[i] = "\\lyxformat %s" % number
 286
 287 def get_end_format():
 288     return format_relation[-1:][0][1][-1:][0]
 289
 290 def chain(opt, initial_version):
 291     """ This is where all the decisions related with the convertion are taken"""
 292
 293     format = opt.format
 294     if opt.start:
 295         if opt.start != format:
 296             opt.warning("%s: %s %s" % ("Proposed file format and input file formats do not match:", opt.start, format))
 297     else:
 298         opt.start = format
 299
 300     if not opt.end:
 301         opt.end = get_end_format()
 302
 303     correct_version = 0
 304
 305     for rel in format_relation:
 306         if initial_version in rel[2]:
 307             if format in rel[1]:
 308                 initial_step = rel[0]
 309                 correct_version = 1
 310                 break
 311
 312     if not correct_version:
 313         if format <= 215:
 314             opt.warning("Version does not match file format, discarding it.")
 315         for rel in format_relation:
 316             if format in rel[1]:
 317                 initial_step = rel[0]
 318                 break
 319         else:
 320             # This should not happen, really.
 321             opt.error("Format not supported.")
 322
 323     # Find the final step
 324     for rel in format_relation:
 325         if opt.end in rel[1]:
 326             final_step = rel[0]
 327             break
 328     else:
 329         opt.error("Format not supported.")
 330
 331     # Convertion mode, back or forth
 332     steps = []
 333     if (initial_step, opt.start) < (final_step, opt.end):
 334         mode = "convert"
 335         first_step = 1
 336         for step in format_relation:
 337             if  initial_step <= step[0] <= final_step:
 338                 if first_step and len(step[1]) == 1:
 339                     first_step = 0
 340                     continue
 341                 steps.append(step[0])
 342     else:
 343         mode = "revert"
 344         for step in format_relation:
 345             if  final_step <= step[0] <= initial_step:
 346                 steps.insert(0, step[0])
 347
 348         if step[1][-1:] == opt.end:
 349             del steps[0]
 350
 351     return mode, steps