lib/lyx2lyx/lyx_1_2.py

   1 # This file is part of lyx2lyx
   2 # -*- coding: iso-8859-1 -*-
   3 # Copyright (C) 2002 Dekel Tsur <dekel@lyx.org>
   4 # Copyright (C) 2004 José Matos <jamatos@lyx.org>
   5 #
   6 # This program is free software; you can redistribute it and/or
   7 # modify it under the terms of the GNU General Public License
   8 # as published by the Free Software Foundation; either version 2
   9 # of the License, or (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful,
  12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 # GNU General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  19
  20 import string
  21 import re
  22
  23 from parser_tools import find_token, find_token_backwards, get_next_paragraph,\
  24                          find_tokens, find_end_of_inset, find_re, \
  25                          is_nonempty_line, get_paragraph, find_nonempty_line, \
  26                          get_value, get_tabular_lines, check_token
  27
  28 floats = {
  29     "footnote": ["\\begin_inset Foot",
  30                  "collapsed true"],
  31     "margin":   ["\\begin_inset Marginal",
  32                  "collapsed true"],
  33     "fig":      ["\\begin_inset Float figure",
  34                  "wide false",
  35                  "collapsed false"],
  36     "tab":      ["\\begin_inset Float table",
  37                  "wide false",
  38                  "collapsed false"],
  39     "alg":      ["\\begin_inset Float algorithm",
  40                  "wide false",
  41                  "collapsed false"],
  42     "wide-fig": ["\\begin_inset Float figure",
  43                  "wide true",
  44                  "collapsed false"],
  45     "wide-tab": ["\\begin_inset Float table",
  46                  "wide true",
  47                  "collapsed false"]
  48 }
  49
  50 font_tokens = ["\\family", "\\series", "\\shape", "\\size", "\\emph",
  51                "\\bar", "\\noun", "\\color", "\\lang", "\\latex"]
  52
  53 pextra_type3_rexp = re.compile(r".*\\pextra_type\s+3")
  54 pextra_rexp = re.compile(r"\\pextra_type\s+(\S+)"+\
  55                          r"(\s+\\pextra_alignment\s+(\S+))?"+\
  56                          r"(\s+\\pextra_hfill\s+(\S+))?"+\
  57                          r"(\s+\\pextra_start_minipage\s+(\S+))?"+\
  58                          r"(\s+(\\pextra_widthp?)\s+(\S*))?")
  59
  60
  61 def get_width(mo):
  62     if mo.group(10):
  63         if mo.group(9) == "\\pextra_widthp":
  64             return mo.group(10)+"col%"
  65         else:
  66             return mo.group(10)
  67     else:
  68         return "100col%"
  69
  70
  71 #
  72 # Change \begin_float .. \end_float into \begin_inset Float .. \end_inset
  73 #
  74 def remove_oldfloat(lines, opt):
  75     i = 0
  76     while 1:
  77         i = find_token(lines, "\\begin_float", i)
  78         if i == -1:
  79             break
  80         # There are no nested floats, so finding the end of the float is simple
  81         j = find_token(lines, "\\end_float", i+1)
  82
  83         floattype = string.split(lines[i])[1]
  84         if not floats.has_key(floattype):
  85             opt.warning("Error! Unknown float type " + floattype)
  86             floattype = "fig"
  87
  88         # skip \end_deeper tokens
  89         i2 = i+1
  90         while check_token(lines[i2], "\\end_deeper"):
  91             i2 = i2+1
  92         if i2 > i+1:
  93             j2 = get_next_paragraph(lines, j+1)
  94             lines[j2:j2] = ["\\end_deeper "]*(i2-(i+1))
  95
  96         new = floats[floattype]+[""]
  97
  98         # Check if the float is floatingfigure
  99         k = find_re(lines, pextra_type3_rexp, i, j)
 100         if k != -1:
 101             mo = pextra_rexp.search(lines[k])
 102             width = get_width(mo)
 103             lines[k] = re.sub(pextra_rexp, "", lines[k])
 104             new = ["\\begin_inset Wrap figure",
 105                    'width "%s"' % width,
 106                    "collapsed false",
 107                    ""]
 108
 109         new = new+lines[i2:j]+["\\end_inset ", ""]
 110
 111         # After a float, all font attributes are reseted.
 112         # We need to output '\foo default' for every attribute foo
 113         # whose value is not default before the float.
 114         # The check here is not accurate, but it doesn't matter
 115         # as extra '\foo default' commands are ignored.
 116         # In fact, it might be safer to output '\foo default' for all
 117         # font attributes.
 118         k = get_paragraph(lines, i)
 119         flag = 0
 120         for token in font_tokens:
 121             if find_token(lines, token, k, i) != -1:
 122                 if not flag:
 123                     # This is not necessary, but we want the output to be
 124                     # as similar as posible to the lyx format
 125                     flag = 1
 126                     new.append("")
 127                 if token == "\\lang":
 128                     new.append(token+" "+ opt.language)
 129                 else:
 130                     new.append(token+" default ")
 131
 132         lines[i:j+1] = new
 133         i = i+1
 134
 135
 136 pextra_type2_rexp = re.compile(r".*\\pextra_type\s+[12]")
 137 pextra_type2_rexp2 = re.compile(r".*(\\layout|\\pextra_type\s+2)")
 138
 139 def remove_pextra(lines):
 140     i = 0
 141     flag = 0
 142     while 1:
 143         i = find_re(lines, pextra_type2_rexp, i)
 144         if i == -1:
 145             break
 146
 147         mo = pextra_rexp.search(lines[i])
 148         width = get_width(mo)
 149
 150         if mo.group(1) == "1":
 151             # handle \pextra_type 1 (indented paragraph)
 152             lines[i] = re.sub(pextra_rexp, "\\leftindent "+width+" ", lines[i])
 153             i = i+1
 154             continue
 155
 156         # handle \pextra_type 2 (minipage)
 157         position = mo.group(3)
 158         hfill = mo.group(5)
 159         lines[i] = re.sub(pextra_rexp, "", lines[i])
 160
 161         start = ["\\begin_inset Minipage",
 162                  "position " + position,
 163                  "inner_position 0",
 164                  'height "0pt"',
 165                  'width "%s"' % width,
 166                  "collapsed false"
 167                  ]
 168         if flag:
 169             flag = 0
 170             if hfill:
 171                 start = ["","\hfill",""]+start
 172         else:
 173             start = ["\\layout Standard"] + start
 174
 175         j0 = find_token_backwards(lines,"\\layout", i-1)
 176         j = get_next_paragraph(lines, i)
 177
 178         count = 0
 179         while 1:
 180             # collect more paragraphs to the minipage
 181             count = count+1
 182             if j == -1 or not check_token(lines[j], "\\layout"):
 183                 break
 184             i = find_re(lines, pextra_type2_rexp2, j+1)
 185             if i == -1:
 186                 break
 187             mo = pextra_rexp.search(lines[i])
 188             if not mo:
 189                 break
 190             if mo.group(7) == "1":
 191                 flag = 1
 192                 break
 193             lines[i] = re.sub(pextra_rexp, "", lines[i])
 194             j = find_tokens(lines, ["\\layout", "\\end_float"], i+1)
 195
 196         mid = lines[j0:j]
 197         end = ["\\end_inset "]
 198
 199         lines[j0:j] = start+mid+end
 200         i = i+1
 201
 202
 203 def is_empty(lines):
 204     return filter(is_nonempty_line, lines) == []
 205
 206
 207 move_rexp =  re.compile(r"\\(family|series|shape|size|emph|numeric|bar|noun|end_deeper)")
 208 ert_rexp = re.compile(r"\\begin_inset|\\hfill|.*\\SpecialChar")
 209 spchar_rexp = re.compile(r"(.*)(\\SpecialChar.*)")
 210 ert_begin = ["\\begin_inset ERT",
 211              "status Collapsed",
 212              "",
 213              "\\layout Standard"]
 214
 215
 216 def remove_oldert(lines):
 217     i = 0
 218     while 1:
 219         i = find_tokens(lines, ["\\latex latex", "\\layout LaTeX"], i)
 220         if i == -1:
 221             break
 222         j = i+1
 223         while 1:
 224             # \end_inset is for ert inside a tabular cell. The other tokens
 225             # are obvious.
 226             j = find_tokens(lines, ["\\latex default", "\\layout", "\\begin_inset", "\\end_inset", "\\end_float", "\\the_end"],
 227                             j)
 228             if check_token(lines[j], "\\begin_inset"):
 229                 j = find_end_of_inset(lines, j)+1
 230             else:
 231                 break
 232
 233         if check_token(lines[j], "\\layout"):
 234             while j-1 >= 0 and check_token(lines[j-1], "\\begin_deeper"):
 235                 j = j-1
 236
 237         # We need to remove insets, special chars & font commands from ERT text
 238         new = []
 239         new2 = []
 240         if check_token(lines[i], "\\layout LaTeX"):
 241             new = ["\layout Standard", "", ""]
 242             # We have a problem with classes in which Standard is not the default layout!
 243
 244         k = i+1
 245         while 1:
 246             k2 = find_re(lines, ert_rexp, k, j)
 247             inset = hfill = specialchar = 0
 248             if k2 == -1:
 249                 k2 = j
 250             elif check_token(lines[k2], "\\begin_inset"):
 251                 inset = 1
 252             elif check_token(lines[k2], "\\hfill"):
 253                 hfill = 1
 254                 del lines[k2]
 255                 j = j-1
 256             else:
 257                 specialchar = 1
 258                 mo = spchar_rexp.match(lines[k2])
 259                 lines[k2] = mo.group(1)
 260                 specialchar_str = mo.group(2)
 261                 k2 = k2+1
 262
 263             tmp = []
 264             for line in lines[k:k2]:
 265                 # Move some lines outside the ERT inset:
 266                 if move_rexp.match(line):
 267                     if new2 == []:
 268                         # This is not necessary, but we want the output to be
 269                         # as similar as posible to the lyx format
 270                         new2 = [""]
 271                     new2.append(line)
 272                 elif not check_token(line, "\\latex"):
 273                     tmp.append(line)
 274
 275             if is_empty(tmp):
 276                 if filter(lambda x:x != "", tmp) != []:
 277                     if new == []:
 278                         # This is not necessary, but we want the output to be
 279                         # as similar as posible to the lyx format
 280                         lines[i-1] = lines[i-1]+" "
 281                     else:
 282                         new = new+[" "]
 283             else:
 284                 new = new+ert_begin+tmp+["\\end_inset ", ""]
 285
 286             if inset:
 287                 k3 = find_end_of_inset(lines, k2)
 288                 new = new+[""]+lines[k2:k3+1]+[""] # Put an empty line after \end_inset
 289                 k = k3+1
 290                 # Skip the empty line after \end_inset
 291                 if not is_nonempty_line(lines[k]):
 292                     k = k+1
 293                     new.append("")
 294             elif hfill:
 295                 new = new+["\hfill", ""]
 296                 k = k2
 297             elif specialchar:
 298                 if new == []:
 299                     # This is not necessary, but we want the output to be
 300                     # as similar as posible to the lyx format
 301                     lines[i-1] = lines[i-1]+specialchar_str
 302                     new = [""]
 303                 else:
 304                     new = new+[specialchar_str, ""]
 305                 k = k2
 306             else:
 307                 break
 308
 309         new = new+new2
 310         if not check_token(lines[j], "\\latex "):
 311             new = new+[""]+[lines[j]]
 312         lines[i:j+1] = new
 313         i = i+1
 314
 315     # Delete remaining "\latex xxx" tokens
 316     i = 0
 317     while 1:
 318         i = find_token(lines, "\\latex ", i)
 319         if i == -1:
 320             break
 321         del lines[i]
 322
 323
 324 # ERT insert are hidden feature of lyx 1.1.6. This might be removed in the future.
 325 def remove_oldertinset(lines):
 326     i = 0
 327     while 1:
 328         i = find_token(lines, "\\begin_inset ERT", i)
 329         if i == -1:
 330             break
 331         j = find_end_of_inset(lines, i)
 332         k = find_token(lines, "\\layout", i+1)
 333         l = get_paragraph(lines, i)
 334         if lines[k] == lines[l]: # same layout
 335             k = k+1
 336         new = lines[k:j]
 337         lines[i:j+1] = new
 338         i = i+1
 339
 340
 341 def is_ert_paragraph(lines, i):
 342     if not check_token(lines[i], "\\layout Standard"):
 343         return 0
 344
 345     i = find_nonempty_line(lines, i+1)
 346     if not check_token(lines[i], "\\begin_inset ERT"):
 347         return 0
 348
 349     j = find_end_of_inset(lines, i)
 350     k = find_nonempty_line(lines, j+1)
 351     return check_token(lines[k], "\\layout")
 352
 353
 354 def combine_ert(lines):
 355     i = 0
 356     while 1:
 357         i = find_token(lines, "\\begin_inset ERT", i)
 358         if i == -1:
 359             break
 360         j = get_paragraph(lines, i)
 361         count = 0
 362         text = []
 363         while is_ert_paragraph(lines, j):
 364
 365             count = count+1
 366             i2 = find_token(lines, "\\layout", j+1)
 367             k = find_token(lines, "\\end_inset", i2+1)
 368             text = text+lines[i2:k]
 369             j = find_token(lines, "\\layout", k+1)
 370             if j == -1:
 371                 break
 372
 373         if count >= 2:
 374             j = find_token(lines, "\\layout", i+1)
 375             lines[j:k] = text
 376
 377         i = i+1
 378
 379
 380 oldunits = ["pt", "cm", "in", "text%", "col%"]
 381
 382 def get_length(lines, name, start, end):
 383     i = find_token(lines, name, start, end)
 384     if i == -1:
 385         return ""
 386     x = string.split(lines[i])
 387     return x[2]+oldunits[int(x[1])]
 388
 389
 390 def write_attribute(x, token, value):
 391     if value != "":
 392         x.append("\t"+token+" "+value)
 393
 394
 395 def remove_figinset(lines):
 396     i = 0
 397     while 1:
 398         i = find_token(lines, "\\begin_inset Figure", i)
 399         if i == -1:
 400             break
 401         j = find_end_of_inset(lines, i)
 402
 403         if ( len(string.split(lines[i])) > 2 ):
 404             lyxwidth = string.split(lines[i])[3]+"pt"
 405             lyxheight = string.split(lines[i])[4]+"pt"
 406         else:
 407             lyxwidth = ""
 408             lyxheight = ""
 409
 410         filename = get_value(lines, "file", i+1, j)
 411
 412         width = get_length(lines, "width", i+1, j)
 413         # what does width=5 mean ?
 414         height = get_length(lines, "height", i+1, j)
 415         rotateAngle = get_value(lines, "angle", i+1, j)
 416         if width == "" and height == "":
 417             size_type = "0"
 418         else:
 419             size_type = "1"
 420
 421         flags = get_value(lines, "flags", i+1, j)
 422         x = int(flags)%4
 423         if x == 1:
 424             display = "monochrome"
 425         elif x == 2:
 426             display = "gray"
 427         else:
 428             display = "color"
 429
 430         subcaptionText = ""
 431         subcaptionLine = find_token(lines, "subcaption", i+1, j)
 432         if subcaptionLine != -1:
 433             subcaptionText = lines[subcaptionLine][11:]
 434             if subcaptionText != "":
 435                 subcaptionText = '"'+subcaptionText+'"'
 436
 437         k = find_token(lines, "subfigure", i+1,j)
 438         if k == -1:
 439             subcaption = 0
 440         else:
 441             subcaption = 1
 442
 443         new = ["\\begin_inset Graphics FormatVersion 1"]
 444         write_attribute(new, "filename", filename)
 445         write_attribute(new, "display", display)
 446         if subcaption:
 447             new.append("\tsubcaption")
 448         write_attribute(new, "subcaptionText", subcaptionText)
 449         write_attribute(new, "size_type", size_type)
 450         write_attribute(new, "width", width)
 451         write_attribute(new, "height", height)
 452         if rotateAngle != "":
 453             new.append("\trotate")
 454             write_attribute(new, "rotateAngle", rotateAngle)
 455         write_attribute(new, "rotateOrigin", "leftBaseline")
 456         write_attribute(new, "lyxsize_type", "1")
 457         write_attribute(new, "lyxwidth", lyxwidth)
 458         write_attribute(new, "lyxheight", lyxheight)
 459         new = new + ["\end_inset"]
 460         lines[i:j+1] = new
 461
 462
 463 attr_re = re.compile(r' \w*="(false|0|)"')
 464 line_re = re.compile(r'<(features|column|row|cell)')
 465
 466 def update_tabular(lines):
 467     i = 0
 468     while 1:
 469         i = find_token(lines, '\\begin_inset  Tabular', i)
 470         if i == -1:
 471             break
 472
 473         for k in get_tabular_lines(lines, i):
 474             if check_token(lines[k], "<lyxtabular"):
 475                 lines[k] = string.replace(lines[k], 'version="2"', 'version="3"')
 476             elif check_token(lines[k], "<column"):
 477                 lines[k] = string.replace(lines[k], 'width=""', 'width="0pt"')
 478
 479             if line_re.match(lines[k]):
 480                 lines[k] = re.sub(attr_re, "", lines[k])
 481
 482         i = i+1
 483
 484
 485 # Figure insert are hidden feature of lyx 1.1.6. This might be removed in the future.
 486 def fix_oldfloatinset(lines):
 487     i = 0
 488     while 1:
 489         i = find_token(lines, "\\begin_inset Float ", i)
 490         if i == -1:
 491             break
 492         j = find_token(lines, "collapsed", i)
 493         if j != -1:
 494             lines[j:j] = ["wide false"]
 495         i = i+1
 496
 497
 498 def change_listof(lines):
 499     i = 0
 500     while 1:
 501         i = find_token(lines, "\\begin_inset LatexCommand \\listof", i)
 502         if i == -1:
 503             break
 504         type = re.search(r"listof(\w*)", lines[i]).group(1)[:-1]
 505         lines[i] = "\\begin_inset FloatList "+type
 506         i = i+1
 507
 508
 509 def change_infoinset(lines):
 510     i = 0
 511     while 1:
 512         i = find_token(lines, "\\begin_inset Info", i)
 513         if i == -1:
 514             break
 515         txt = string.lstrip(lines[i][18:])
 516         new = ["\\begin_inset Note", "collapsed true", ""]
 517         j = find_token(lines, "\\end_inset", i)
 518         if j == -1:
 519             break
 520
 521         note_lines = lines[i+1:j]
 522         if len(txt) > 0:
 523             note_lines = [txt]+note_lines
 524
 525         for line in note_lines:
 526             new = new + ["\layout Standard", ""]
 527             tmp = string.split(line, '\\')
 528             new = new + [tmp[0]]
 529             for x in tmp[1:]:
 530                 new = new + ["\\backslash ", x]
 531         lines[i:j] = new
 532         i = i+5
 533
 534
 535 def change_preamble(lines):
 536     i = find_token(lines, "\\use_amsmath", 0)
 537     if i == -1:
 538         return
 539     lines[i+1:i+1] = ["\\use_natbib 0",
 540                       "\use_numerical_citations 0"]
 541
 542
 543 def convert(file):
 544     change_preamble(file.header)
 545     change_listof(file.body)
 546     fix_oldfloatinset(file.body)
 547     update_tabular(file.body)
 548     remove_pextra(file.body)
 549     remove_oldfloat(file.body, file)
 550     remove_figinset(file.body)
 551     remove_oldertinset(file.body)
 552     remove_oldert(file.body)
 553     combine_ert(file.body)
 554     change_infoinset(file.body)
 555     file.format = 220
 556
 557
 558 def revert(file):
 559     file.error("The convertion to an older format (%s) is not implemented." % file.format)
 560
 561
 562 if __name__ == "__main__":
 563     pass