lib/lyx2lyx/parser_tools.py

   1 # This file is part of lyx2lyx
   2 # -*- coding: iso-8859-1 -*-
   3 # Copyright (C) 2002-2004 Dekel Tsur <dekel@lyx.org>, José Matos <jamatos@lyx.org>
   4 #
   5 # This program is free software; you can redistribute it and/or
   6 # modify it under the terms of the GNU General Public License
   7 # as published by the Free Software Foundation; either version 2
   8 # of the License, or (at your option) any later version.
   9 #
  10 # This program is distributed in the hope that it will be useful,
  11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 # GNU General Public License for more details.
  14 #
  15 # You should have received a copy of the GNU General Public License
  16 # along with this program; if not, write to the Free Software
  17 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  18
  19 import string
  20 import re
  21
  22
  23 def check_token(line, token):
  24     if line[:len(token)] == token:
  25         return 1
  26     return 0
  27
  28
  29 # We need to check that the char after the token is space, but I think
  30 # we can ignore this
  31 def find_token(lines, token, start, end = 0):
  32     if end == 0:
  33         end = len(lines)
  34     m = len(token)
  35     for i in xrange(start, end):
  36         if lines[i][:m] == token:
  37             return i
  38     return -1
  39
  40
  41 def find_token2(lines, token, start, end = 0):
  42     if end == 0:
  43         end = len(lines)
  44     for i in xrange(start, end):
  45         x = string.split(lines[i])
  46         if len(x) > 0 and x[0] == token:
  47             return i
  48     return -1
  49
  50
  51 def find_tokens(lines, tokens, start, end = 0):
  52     if end == 0:
  53         end = len(lines)
  54     for i in xrange(start, end):
  55         line = lines[i]
  56         for token in tokens:
  57             if line[:len(token)] == token:
  58                 return i
  59     return -1
  60
  61
  62 def find_re(lines, rexp, start, end = 0):
  63     if end == 0:
  64         end = len(lines)
  65     for i in xrange(start, end):
  66         if rexp.match(lines[i]):
  67                 return i
  68     return -1
  69
  70
  71 def find_token_backwards(lines, token, start):
  72     m = len(token)
  73     for i in xrange(start, -1, -1):
  74         line = lines[i]
  75         if line[:m] == token:
  76             return i
  77     return -1
  78
  79
  80 def find_tokens_backwards(lines, tokens, start):
  81     for i in xrange(start, -1, -1):
  82         line = lines[i]
  83         for token in tokens:
  84             if line[:len(token)] == token:
  85                 return i
  86     return -1
  87
  88
  89 def get_value(lines, token, start, end = 0):
  90     i = find_token2(lines, token, start, end)
  91     if i == -1:
  92         return ""
  93     if len(string.split(lines[i])) > 1:
  94         return string.split(lines[i])[1]
  95     else:
  96         return ""
  97
  98
  99 def del_token(lines, token, i, j):
 100     k = find_token2(lines, token, i, j)
 101     if k == -1:
 102         return j
 103     else:
 104         del lines[k]
 105         return j-1
 106
 107
 108 # Finds the paragraph that contains line i.
 109 def get_paragraph(lines, i):
 110     while i != -1:
 111         i = find_tokens_backwards(lines, ["\\end_inset", "\\layout"], i)
 112         if i == -1: return -1
 113         if check_token(lines[i], "\\layout"):
 114             return i
 115         i = find_beginning_of_inset(lines, i)
 116     return -1
 117
 118
 119 # Finds the paragraph after the paragraph that contains line i.
 120 def get_next_paragraph(lines, i):
 121     while i != -1:
 122         i = find_tokens(lines, ["\\begin_inset", "\\layout", "\\end_float", "\\the_end"], i)
 123         if not check_token(lines[i], "\\begin_inset"):
 124             return i
 125         i = find_end_of_inset(lines, i)
 126     return -1
 127
 128
 129 def find_end_of(lines, i, start_token, end_token):
 130     count = 1
 131     n = len(lines)
 132     while i < n:
 133         i = find_tokens(lines, [end_token, start_token], i+1)
 134         if check_token(lines[i], start_token):
 135             count = count+1
 136         else:
 137             count = count-1
 138         if count == 0:
 139             return i
 140     return -1
 141
 142
 143 # Finds the matching \end_inset
 144 def find_beginning_of(lines, i, start_token, end_token):
 145     count = 1
 146     while i > 0:
 147         i = find_tokens_backwards(lines, [start_token, end_token], i-1)
 148         if check_token(lines[i], end_token):
 149             count = count+1
 150         else:
 151             count = count-1
 152         if count == 0:
 153             return i
 154     return -1
 155
 156
 157 # Finds the matching \end_inset
 158 def find_end_of_inset(lines, i):
 159     return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
 160
 161
 162 # Finds the matching \end_inset
 163 def find_beginning_of_inset(lines, i):
 164     return find_beginning_of(lines, i, "\\begin_inset", "\\end_inset")
 165
 166
 167 def find_end_of_tabular(lines, i):
 168     return find_end_of(lines, i, "<lyxtabular", "</lyxtabular")
 169
 170
 171 def get_tabular_lines(lines, i):
 172     result = []
 173     i = i+1
 174     j = find_end_of_tabular(lines, i)
 175     if j == -1:
 176         return []
 177
 178     while i <= j:
 179         if check_token(lines[i], "\\begin_inset"):
 180             i = find_end_of_inset(lines, i)+1
 181         else:
 182             result.append(i)
 183             i = i+1
 184     return result
 185
 186
 187 def is_nonempty_line(line):
 188     return line != " "*len(line)
 189
 190
 191 def find_nonempty_line(lines, start, end = 0):
 192     if end == 0:
 193         end = len(lines)
 194     for i in xrange(start, end):
 195         if is_nonempty_line(lines[i]):
 196             return i
 197     return -1
 198
 199
 200 ##
 201 # Tools for file reading
 202 #
 203 def read_file(header, body, opt):
 204     """Reads a file into the header and body parts"""
 205     preamble = 0
 206
 207     while 1:
 208         line = opt.input.readline()
 209         if not line:
 210             opt.error("Invalid LyX file.")
 211
 212         line = line[:-1]
 213         if check_token(line, '\\begin_preamble'):
 214             preamble = 1
 215         if check_token(line, '\\end_preamble'):
 216             preamble = 0
 217
 218         if not preamble:
 219             line = string.strip(line)
 220
 221         if not line and not preamble:
 222             break
 223
 224         header.append(line)
 225
 226     while 1:
 227         line = opt.input.readline()
 228         if not line:
 229             break
 230         body.append(line[:-1])
 231
 232
 233 def write_file(header, body, opt):
 234     for line in header:
 235         opt.output.write(line+"\n")
 236     opt.output.write("\n")
 237     for line in body:
 238         opt.output.write(line+"\n")
 239
 240
 241 ##
 242 # lyx version
 243 #
 244 original_version = re.compile(r"\#LyX (\S*)")
 245
 246 def read_version(header):
 247     for line in header:
 248         if line[0] != "#":
 249             return None
 250
 251         result = original_version.match(line)
 252         if result:
 253             return result.group(1)
 254     return None
 255
 256
 257 def set_version(lines, version):
 258     lines[0] = "#LyX %s created this file. For more info see http://www.lyx.org/" % version
 259     if lines[1][0] == '#':
 260         del lines[1]
 261
 262 ##
 263 # file format version
 264 #
 265 format_re = re.compile(r"(\d)[\.,]?(\d\d)")
 266 fileformat = re.compile(r"\\lyxformat\s*(\S*)")
 267 lst_ft = [210, 215, 216, 217, 218, 220, 221, 223, 224, 225, 226, 227, 228, 229,
 268           230, 231, 232, 233, 234]
 269
 270 format_relation = [("0_10",  [210], ["0.10.7","0.10"]),
 271                    ("0_12",  [215], ["0.12","0.12.1","0.12"]),
 272                    ("1_0_0", [215], ["1.0.0","1.0"]),
 273                    ("1_0_1", [215], ["1.0.1","1.0.2","1.0.3","1.0.4", "1.1.2","1.1"]),
 274                    ("1_1_4", [215], ["1.1.4","1.1"]),
 275                    ("1_1_5", [216], ["1.1.5","1.1.5fix1","1.1.5fix2","1.1"]),
 276                    ("1_1_6", [217], ["1.1.6","1.1.6fix1","1.1.6fix2","1.1"]),
 277                    ("1_1_6fix3", [218], ["1.1.6fix3","1.1.6fix4","1.1"]),
 278                    ("1_2", [220], ["1.2.0","1.2.1","1.2.3","1.2.4","1.2"]),
 279                    ("1_3", [221], ["1.3.0","1.3.1","1.3.2","1.3.3","1.3.4","1.3"]),
 280                    ("1_4", [223,224,225,226,227,228,229,230,231,232,233,234], ["1.4.0cvs","1.4"])]
 281
 282
 283 def lyxformat(format, opt):
 284     result = format_re.match(format)
 285     if result:
 286         format = int(result.group(1) + result.group(2))
 287     else:
 288         opt.error(str(format) + ": " + "Invalid LyX file.")
 289
 290     if format in lst_ft:
 291         return format
 292
 293     opt.error(str(format) + ": " + "Format not supported.")
 294     return None
 295
 296
 297 def read_format(header, opt):
 298     for line in header:
 299         result = fileformat.match(line)
 300         if result:
 301             return lyxformat(result.group(1), opt)
 302     else:
 303         opt.error("Invalid LyX File.")
 304     return None
 305
 306
 307 def set_format(lines, number):
 308     if int(number) <= 217:
 309         number = float(number)/100
 310     i = find_token(lines, "\\lyxformat", 0)
 311     lines[i] = "\\lyxformat %s" % number
 312
 313
 314 def get_end_format():
 315     return format_relation[-1:][0][1][-1:][0]
 316
 317
 318 def get_backend(textclass):
 319     if textclass == "linuxdoc" or textclass == "manpage":
 320         return "linuxdoc"
 321     if textclass[:7] == "docbook":
 322         return "docbook"
 323     return "latex"
 324
 325
 326 def chain(opt, initial_version):
 327     """ This is where all the decisions related with the convertion are taken"""
 328
 329     format = opt.format
 330     if opt.start:
 331         if opt.start != format:
 332             opt.warning("%s: %s %s" % ("Proposed file format and input file formats do not match:", opt.start, format))
 333     else:
 334         opt.start = format
 335
 336     if not opt.end:
 337         opt.end = get_end_format()
 338
 339     correct_version = 0
 340
 341     for rel in format_relation:
 342         if initial_version in rel[2]:
 343             if format in rel[1]:
 344                 initial_step = rel[0]
 345                 correct_version = 1
 346                 break
 347
 348     if not correct_version:
 349         if format <= 215:
 350             opt.warning("Version does not match file format, discarding it.")
 351         for rel in format_relation:
 352             if format in rel[1]:
 353                 initial_step = rel[0]
 354                 break
 355         else:
 356             # This should not happen, really.
 357             opt.error("Format not supported.")
 358
 359     # Find the final step
 360     for rel in format_relation:
 361         if opt.end in rel[1]:
 362             final_step = rel[0]
 363             break
 364     else:
 365         opt.error("Format not supported.")
 366
 367     # Convertion mode, back or forth
 368     steps = []
 369     if (initial_step, opt.start) < (final_step, opt.end):
 370         mode = "convert"
 371         first_step = 1
 372         for step in format_relation:
 373             if  initial_step <= step[0] <= final_step:
 374                 if first_step and len(step[1]) == 1:
 375                     first_step = 0
 376                     continue
 377                 steps.append(step[0])
 378     else:
 379         mode = "revert"
 380         relation_format = format_relation
 381         relation_format.reverse()
 382         last_step = None
 383
 384         for step in relation_format:
 385             if  final_step <= step[0] <= initial_step:
 386                 steps.append(step[0])
 387                 last_step = step
 388
 389         if last_step[1][-1] == opt.end:
 390             steps.pop()
 391
 392     return mode, steps