lib/lyx2lyx/lyxconvert_218.py

   1 # This file is part of lyx2lyx
   2 # -*- coding: iso-8859-1 -*-
   3 # Copyright (C) 2002 Dekel Tsur <dekel@lyx.org>
   4 #
   5 # This program is free software; you can redistribute it and/or
   6 # modify it under the terms of the GNU General Public License
   7 # as published by the Free Software Foundation; either version 2
   8 # of the License, or (at your option) any later version.
   9 #
  10 # This program is distributed in the hope that it will be useful,
  11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 # GNU General Public License for more details.
  14 #
  15 # You should have received a copy of the GNU General Public License
  16 # along with this program; if not, write to the Free Software
  17 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  18
  19
  20 import sys,string,re
  21 from parser_tools import *
  22
  23 floats = {
  24     "footnote": ["\\begin_inset Foot",
  25                  "collapsed true"],
  26     "margin":   ["\\begin_inset Marginal",
  27                  "collapsed true"],
  28     "fig":      ["\\begin_inset Float figure",
  29                  "wide false",
  30                  "collapsed false"],
  31     "tab":      ["\\begin_inset Float table",
  32                  "wide false",
  33                  "collapsed false"],
  34     "alg":      ["\\begin_inset Float algorithm",
  35                  "wide false",
  36                  "collapsed false"],
  37     "wide-fig": ["\\begin_inset Float figure",
  38                  "wide true",
  39                  "collapsed false"],
  40     "wide-tab": ["\\begin_inset Float table",
  41                  "wide true",
  42                  "collapsed false"]
  43 }
  44
  45 font_tokens = ["\\family", "\\series", "\\shape", "\\size", "\\emph",
  46                "\\bar", "\\noun", "\\color", "\\lang", "\\latex"]
  47
  48 pextra_type3_rexp = re.compile(r".*\\pextra_type\s+3")
  49 pextra_rexp = re.compile(r"\\pextra_type\s+(\S+)"+\
  50                          r"(\s+\\pextra_alignment\s+(\S+))?"+\
  51                          r"(\s+\\pextra_hfill\s+(\S+))?"+\
  52                          r"(\s+\\pextra_start_minipage\s+(\S+))?"+\
  53                          r"(\s+(\\pextra_widthp?)\s+(\S*))?")
  54
  55 def get_width(mo):
  56     if mo.group(10):
  57         if mo.group(9) == "\\pextra_widthp":
  58             return mo.group(10)+"col%"
  59         else:
  60             return mo.group(10)
  61     else:
  62         return "100col%"
  63
  64 #
  65 # Change \begin_float .. \end_float into \begin_inset Float .. \end_inset
  66 #
  67
  68 def remove_oldfloat(lines, language):
  69     i = 0
  70     while 1:
  71         i = find_token(lines, "\\begin_float", i)
  72         if i == -1:
  73             break
  74         # There are no nested floats, so finding the end of the float is simple
  75         j = find_token(lines, "\\end_float", i+1)
  76
  77         floattype = string.split(lines[i])[1]
  78         if not floats.has_key(floattype):
  79             sys.stderr.write("Error! Unknown float type "+floattype+"\n")
  80             floattype = "fig"
  81
  82         # skip \end_deeper tokens
  83         i2 = i+1
  84         while check_token(lines[i2], "\\end_deeper"):
  85             i2 = i2+1
  86         if i2 > i+1:
  87             j2 = get_next_paragraph(lines, j+1)
  88             lines[j2:j2] = ["\\end_deeper "]*(i2-(i+1))
  89
  90         new = floats[floattype]+[""]
  91
  92         # Check if the float is floatingfigure
  93         k = find_re(lines, pextra_type3_rexp, i, j)
  94         if k != -1:
  95             mo = pextra_rexp.search(lines[k])
  96             width = get_width(mo)
  97             lines[k] = re.sub(pextra_rexp, "", lines[k])
  98             new = ["\\begin_inset Wrap figure",
  99                    'width "%s"' % width,
 100                    "collapsed false",
 101                    ""]
 102
 103         new = new+lines[i2:j]+["\\end_inset ", ""]
 104
 105         # After a float, all font attributes are reseted.
 106         # We need to output '\foo default' for every attribute foo
 107         # whose value is not default before the float.
 108         # The check here is not accurate, but it doesn't matter
 109         # as extra '\foo default' commands are ignored.
 110         # In fact, it might be safer to output '\foo default' for all
 111         # font attributes.
 112         k = get_paragraph(lines, i)
 113         flag = 0
 114         for token in font_tokens:
 115             if find_token(lines, token, k, i) != -1:
 116                 if not flag:
 117                     # This is not necessary, but we want the output to be
 118                     # as similar as posible to the lyx format
 119                     flag = 1
 120                     new.append("")
 121                 if token == "\\lang":
 122                     new.append(token+" "+language)
 123                 else:
 124                     new.append(token+" default ")
 125
 126         lines[i:j+1] = new
 127         i = i+1
 128
 129 pextra_type2_rexp = re.compile(r".*\\pextra_type\s+[12]")
 130 pextra_type2_rexp2 = re.compile(r".*(\\layout|\\pextra_type\s+2)")
 131
 132 def remove_pextra(lines):
 133     i = 0
 134     flag = 0
 135     while 1:
 136         i = find_re(lines, pextra_type2_rexp, i)
 137         if i == -1:
 138             break
 139
 140         mo = pextra_rexp.search(lines[i])
 141         width = get_width(mo)
 142
 143         if mo.group(1) == "1":
 144             # handle \pextra_type 1 (indented paragraph)
 145             lines[i] = re.sub(pextra_rexp, "\\leftindent "+width+" ", lines[i])
 146             i = i+1
 147             continue
 148
 149         # handle \pextra_type 2 (minipage)
 150         position = mo.group(3)
 151         hfill = mo.group(5)
 152         lines[i] = re.sub(pextra_rexp, "", lines[i])
 153
 154         start = ["\\begin_inset Minipage",
 155                  "position " + position,
 156                  "inner_position 0",
 157                  'height "0pt"',
 158                  'width "%s"' % width,
 159                  "collapsed false"
 160                  ]
 161         if flag:
 162             flag = 0
 163             if hfill:
 164                 start = ["","\hfill",""]+start
 165         else:
 166             start = ["\\layout Standard"] + start
 167
 168         j0 = find_token_backwards(lines,"\\layout", i-1)
 169         j = get_next_paragraph(lines, i)
 170
 171         count = 0
 172         while 1:
 173             # collect more paragraphs to the minipage
 174             count = count+1
 175             if j == -1 or not check_token(lines[j], "\\layout"):
 176                 break
 177             i = find_re(lines, pextra_type2_rexp2, j+1)
 178             if i == -1:
 179                 break
 180             mo = pextra_rexp.search(lines[i])
 181             if not mo:
 182                 break
 183             if mo.group(7) == "1":
 184                 flag = 1
 185                 break
 186             lines[i] = re.sub(pextra_rexp, "", lines[i])
 187             j = find_tokens(lines, ["\\layout", "\\end_float"], i+1)
 188
 189         mid = lines[j0:j]
 190         end = ["\\end_inset "]
 191
 192         lines[j0:j] = start+mid+end
 193         i = i+1
 194
 195 def is_empty(lines):
 196     return filter(is_nonempty_line, lines) == []
 197
 198 move_rexp =  re.compile(r"\\(family|series|shape|size|emph|numeric|bar|noun|end_deeper)")
 199 ert_rexp = re.compile(r"\\begin_inset|\\hfill|.*\\SpecialChar")
 200 spchar_rexp = re.compile(r"(.*)(\\SpecialChar.*)")
 201 ert_begin = ["\\begin_inset ERT",
 202              "status Collapsed",
 203              "",
 204              "\\layout Standard"]
 205
 206 def remove_oldert(lines):
 207     i = 0
 208     while 1:
 209         i = find_tokens(lines, ["\\latex latex", "\\layout LaTeX"], i)
 210         if i == -1:
 211             break
 212         j = i+1
 213         while 1:
 214             # \end_inset is for ert inside a tabular cell. The other tokens
 215             # are obvious.
 216             j = find_tokens(lines, ["\\latex default", "\\layout", "\\begin_inset", "\\end_inset", "\\end_float", "\\the_end"],
 217                             j)
 218             if check_token(lines[j], "\\begin_inset"):
 219                 j = find_end_of_inset(lines, j)+1
 220             else:
 221                 break
 222
 223         if check_token(lines[j], "\\layout"):
 224             while j-1 >= 0 and check_token(lines[j-1], "\\begin_deeper"):
 225                 j = j-1
 226
 227         # We need to remove insets, special chars & font commands from ERT text
 228         new = []
 229         new2 = []
 230         if check_token(lines[i], "\\layout LaTeX"):
 231             new = ["\layout Standard", "", ""]
 232             # We have a problem with classes in which Standard is not the default layout!
 233
 234         k = i+1
 235         while 1:
 236             k2 = find_re(lines, ert_rexp, k, j)
 237             inset = hfill = specialchar = 0
 238             if k2 == -1:
 239                 k2 = j
 240             elif check_token(lines[k2], "\\begin_inset"):
 241                 inset = 1
 242             elif check_token(lines[k2], "\\hfill"):
 243                 hfill = 1
 244                 del lines[k2]
 245                 j = j-1
 246             else:
 247                 specialchar = 1
 248                 mo = spchar_rexp.match(lines[k2])
 249                 lines[k2] = mo.group(1)
 250                 specialchar_str = mo.group(2)
 251                 k2 = k2+1
 252
 253             tmp = []
 254             for line in lines[k:k2]:
 255                 # Move some lines outside the ERT inset:
 256                 if move_rexp.match(line):
 257                     if new2 == []:
 258                         # This is not necessary, but we want the output to be
 259                         # as similar as posible to the lyx format
 260                         new2 = [""]
 261                     new2.append(line)
 262                 elif not check_token(line, "\\latex"):
 263                     tmp.append(line)
 264
 265             if is_empty(tmp):
 266                 if filter(lambda x:x != "", tmp) != []:
 267                     if new == []:
 268                         # This is not necessary, but we want the output to be
 269                         # as similar as posible to the lyx format
 270                         lines[i-1] = lines[i-1]+" "
 271                     else:
 272                         new = new+[" "]
 273             else:
 274                 new = new+ert_begin+tmp+["\\end_inset ", ""]
 275
 276             if inset:
 277                 k3 = find_end_of_inset(lines, k2)
 278                 new = new+[""]+lines[k2:k3+1]+[""] # Put an empty line after \end_inset
 279                 k = k3+1
 280                 # Skip the empty line after \end_inset
 281                 if not is_nonempty_line(lines[k]):
 282                     k = k+1
 283                     new.append("")
 284             elif hfill:
 285                 new = new+["\hfill", ""]
 286                 k = k2
 287             elif specialchar:
 288                 if new == []:
 289                     # This is not necessary, but we want the output to be
 290                     # as similar as posible to the lyx format
 291                     lines[i-1] = lines[i-1]+specialchar_str
 292                     new = [""]
 293                 else:
 294                     new = new+[specialchar_str, ""]
 295                 k = k2
 296             else:
 297                 break
 298
 299         new = new+new2
 300         if not check_token(lines[j], "\\latex "):
 301             new = new+[""]+[lines[j]]
 302         lines[i:j+1] = new
 303         i = i+1
 304
 305     # Delete remaining "\latex xxx" tokens
 306     i = 0
 307     while 1:
 308         i = find_token(lines, "\\latex ", i)
 309         if i == -1:
 310             break
 311         del lines[i]
 312
 313 # ERT insert are hidden feature of lyx 1.1.6. This might be removed in the future.
 314 def remove_oldertinset(lines):
 315     i = 0
 316     while 1:
 317         i = find_token(lines, "\\begin_inset ERT", i)
 318         if i == -1:
 319             break
 320         j = find_end_of_inset(lines, i)
 321         k = find_token(lines, "\\layout", i+1)
 322         l = get_paragraph(lines, i)
 323         if lines[k] == lines[l]: # same layout
 324             k = k+1
 325         new = lines[k:j]
 326         lines[i:j+1] = new
 327         i = i+1
 328
 329 def is_ert_paragraph(lines, i):
 330     if not check_token(lines[i], "\\layout Standard"):
 331         return 0
 332
 333     i = find_nonempty_line(lines, i+1)
 334     if not check_token(lines[i], "\\begin_inset ERT"):
 335         return 0
 336
 337     j = find_end_of_inset(lines, i)
 338     k = find_nonempty_line(lines, j+1)
 339     return check_token(lines[k], "\\layout")
 340
 341 def combine_ert(lines):
 342     i = 0
 343     while 1:
 344         i = find_token(lines, "\\begin_inset ERT", i)
 345         if i == -1:
 346             break
 347         j = get_paragraph(lines, i)
 348         count = 0
 349         text = []
 350         while is_ert_paragraph(lines, j):
 351
 352             count = count+1
 353             i2 = find_token(lines, "\\layout", j+1)
 354             k = find_token(lines, "\\end_inset", i2+1)
 355             text = text+lines[i2:k]
 356             j = find_token(lines, "\\layout", k+1)
 357             if j == -1:
 358                 break
 359
 360         if count >= 2:
 361             j = find_token(lines, "\\layout", i+1)
 362             lines[j:k] = text
 363
 364         i = i+1
 365
 366 oldunits = ["pt", "cm", "in", "text%", "col%"]
 367
 368 def get_length(lines, name, start, end):
 369     i = find_token(lines, name, start, end)
 370     if i == -1:
 371         return ""
 372     x = string.split(lines[i])
 373     return x[2]+oldunits[int(x[1])]
 374
 375 def write_attribute(x, token, value):
 376     if value != "":
 377         x.append("\t"+token+" "+value)
 378
 379 def remove_figinset(lines):
 380     i = 0
 381     while 1:
 382         i = find_token(lines, "\\begin_inset Figure", i)
 383         if i == -1:
 384             break
 385         j = find_end_of_inset(lines, i)
 386
 387         if ( len(string.split(lines[i])) > 2 ):
 388             lyxwidth = string.split(lines[i])[3]+"pt"
 389             lyxheight = string.split(lines[i])[4]+"pt"
 390         else:
 391             lyxwidth = ""
 392             lyxheight = ""
 393
 394         filename = get_value(lines, "file", i+1, j)
 395
 396         width = get_length(lines, "width", i+1, j)
 397         # what does width=5 mean ?
 398         height = get_length(lines, "height", i+1, j)
 399         rotateAngle = get_value(lines, "angle", i+1, j)
 400         if width == "" and height == "":
 401             size_type = "0"
 402         else:
 403             size_type = "1"
 404
 405         flags = get_value(lines, "flags", i+1, j)
 406         x = int(flags)%4
 407         if x == 1:
 408             display = "monochrome"
 409         elif x == 2:
 410             display = "gray"
 411         else:
 412             display = "color"
 413
 414         subcaptionText = ""
 415         subcaptionLine = find_token(lines, "subcaption", i+1, j)
 416         if subcaptionLine != -1:
 417             subcaptionText = lines[subcaptionLine][11:]
 418             if subcaptionText != "":
 419                 subcaptionText = '"'+subcaptionText+'"'
 420
 421         k = find_token(lines, "subfigure", i+1,j)
 422         if k == -1:
 423             subcaption = 0
 424         else:
 425             subcaption = 1
 426
 427         new = ["\\begin_inset Graphics FormatVersion 1"]
 428         write_attribute(new, "filename", filename)
 429         write_attribute(new, "display", display)
 430         if subcaption:
 431             new.append("\tsubcaption")
 432         write_attribute(new, "subcaptionText", subcaptionText)
 433         write_attribute(new, "size_type", size_type)
 434         write_attribute(new, "width", width)
 435         write_attribute(new, "height", height)
 436         if rotateAngle != "":
 437             new.append("\trotate")
 438             write_attribute(new, "rotateAngle", rotateAngle)
 439         write_attribute(new, "rotateOrigin", "leftBaseline")
 440         write_attribute(new, "lyxsize_type", "1")
 441         write_attribute(new, "lyxwidth", lyxwidth)
 442         write_attribute(new, "lyxheight", lyxheight)
 443         new = new + ["\end_inset"]
 444         lines[i:j+1] = new
 445
 446 attr_re = re.compile(r' \w*="(false|0|)"')
 447 line_re = re.compile(r'<(features|column|row|cell)')
 448
 449 def update_tabular(lines):
 450     i = 0
 451     while 1:
 452         i = find_token(lines, '\\begin_inset  Tabular', i)
 453         if i == -1:
 454             break
 455
 456         for k in get_tabular_lines(lines, i):
 457             if check_token(lines[k], "<lyxtabular"):
 458                 lines[k] = string.replace(lines[k], 'version="2"', 'version="3"')
 459             elif check_token(lines[k], "<column"):
 460                 lines[k] = string.replace(lines[k], 'width=""', 'width="0pt"')
 461
 462             if line_re.match(lines[k]):
 463                 lines[k] = re.sub(attr_re, "", lines[k])
 464
 465         i = i+1
 466
 467 # Figure insert are hidden feature of lyx 1.1.6. This might be removed in the future.
 468 def fix_oldfloatinset(lines):
 469     i = 0
 470     while 1:
 471         i = find_token(lines, "\\begin_inset Float ", i)
 472         if i == -1:
 473             break
 474         j = find_token(lines, "collapsed", i)
 475         if j != -1:
 476             lines[j:j] = ["wide false"]
 477         i = i+1
 478
 479 def change_listof(lines):
 480     i = 0
 481     while 1:
 482         i = find_token(lines, "\\begin_inset LatexCommand \\listof", i)
 483         if i == -1:
 484             break
 485         type = re.search(r"listof(\w*)", lines[i]).group(1)[:-1]
 486         lines[i] = "\\begin_inset FloatList "+type
 487         i = i+1
 488
 489 def change_infoinset(lines):
 490     i = 0
 491     while 1:
 492         i = find_token(lines, "\\begin_inset Info", i)
 493         if i == -1:
 494             break
 495         txt = string.lstrip(lines[i][18:])
 496         new = ["\\begin_inset Note", "collapsed true", ""]
 497         j = find_token(lines, "\\end_inset", i)
 498         if j == -1:
 499             break
 500
 501         note_lines = lines[i+1:j]
 502         if len(txt) > 0:
 503             note_lines = [txt]+note_lines
 504
 505         for line in note_lines:
 506             new = new + ["\layout Standard", ""]
 507             tmp = string.split(line, '\\')
 508             new = new + [tmp[0]]
 509             for x in tmp[1:]:
 510                 new = new + ["\\backslash ", x]
 511         lines[i:j] = new
 512         i = i+5
 513
 514 def change_preamble(lines):
 515     i = find_token(lines, "\\use_amsmath", 0)
 516     if i == -1:
 517         return
 518     lines[i+1:i+1] = ["\\use_natbib 0",
 519                       "\use_numerical_citations 0"]
 520
 521 def convert(header, body):
 522     language = get_value(header, "\\language", 0)
 523     if language == "":
 524         language = "english"
 525
 526     change_preamble(header)
 527     change_listof(body)
 528     fix_oldfloatinset(body)
 529     update_tabular(body)
 530     remove_pextra(body)
 531     remove_oldfloat(body, language)
 532     remove_figinset(body)
 533     remove_oldertinset(body)
 534     remove_oldert(body)
 535     combine_ert(body)
 536     change_infoinset(body)
 537
 538 if __name__ == "__main__":
 539     pass