lib/lyx2lyx/lyxconvert_218.py

   1 # This file is part of lyx2lyx
   2 # Copyright (C) 2002 Dekel Tsur <dekel@lyx.org>
   3 #
   4 # This program is free software; you can redistribute it and/or
   5 # modify it under the terms of the GNU General Public License
   6 # as published by the Free Software Foundation; either version 2
   7 # of the License, or (at your option) any later version.
   8 #
   9 # This program is distributed in the hope that it will be useful,
  10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 # GNU General Public License for more details.
  13 #
  14 # You should have received a copy of the GNU General Public License
  15 # along with this program; if not, write to the Free Software
  16 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  17
  18
  19 import sys,string,re
  20 from parser_tools import *
  21
  22 floats = {
  23     "footnote": ["\\begin_inset Foot",
  24                  "collapsed true"],
  25     "margin":   ["\\begin_inset Marginal",
  26                  "collapsed true"],
  27     "fig":      ["\\begin_inset Float figure",
  28                  "wide false",
  29                  "collapsed false"],
  30     "tab":      ["\\begin_inset Float table",
  31                  "wide false",
  32                  "collapsed false"],
  33     "alg":      ["\\begin_inset Float algorithm",
  34                  "wide false",
  35                  "collapsed false"],
  36     "wide-fig": ["\\begin_inset Float figure",
  37                  "wide true",
  38                  "collapsed false"],
  39     "wide-tab": ["\\begin_inset Float table",
  40                  "wide true",
  41                  "collapsed false"]
  42 }
  43
  44 font_tokens = ["\\family", "\\series", "\\shape", "\\size", "\\emph",
  45                "\\bar", "\\noun", "\\color", "\\lang", "\\latex"]
  46
  47 pextra_type3_rexp = re.compile(r".*\\pextra_type\s+3")
  48 pextra_rexp = re.compile(r"\\pextra_type\s+(\S+)"+\
  49                          r"(\s+\\pextra_alignment\s+(\S+))?"+\
  50                          r"(\s+\\pextra_hfill\s+(\S+))?"+\
  51                          r"(\s+\\pextra_start_minipage\s+(\S+))?"+\
  52                          r"(\s+(\\pextra_widthp?)\s+(\S*))?")
  53
  54 def get_width(mo):
  55     if mo.group(10):
  56         if mo.group(9) == "\\pextra_widthp":
  57             return mo.group(10)+"col%"
  58         else:
  59             return mo.group(10)
  60     else:
  61         return "100col%"
  62
  63 #
  64 # Change \begin_float .. \end_float into \begin_inset Float .. \end_inset
  65 #
  66
  67 def remove_oldfloat(lines, language):
  68     i = 0
  69     while 1:
  70         i = find_token(lines, "\\begin_float", i)
  71         if i == -1:
  72             break
  73         # There are no nested floats, so finding the end of the float is simple
  74         j = find_token(lines, "\\end_float", i+1)
  75
  76         floattype = string.split(lines[i])[1]
  77         if not floats.has_key(floattype):
  78             sys.stderr.write("Error! Unknown float type "+floattype+"\n")
  79             floattype = "fig"
  80
  81         # skip \end_deeper tokens
  82         i2 = i+1
  83         while check_token(lines[i2], "\\end_deeper"):
  84             i2 = i2+1
  85         if i2 > i+1:
  86             j2 = get_next_paragraph(lines, j+1)
  87             lines[j2:j2] = ["\\end_deeper "]*(i2-(i+1))
  88
  89         new = floats[floattype]+[""]
  90
  91         # Check if the float is floatingfigure
  92         k = find_re(lines, pextra_type3_rexp, i, j)
  93         if k != -1:
  94             mo = pextra_rexp.search(lines[k])
  95             width = get_width(mo)
  96             lines[k] = re.sub(pextra_rexp, "", lines[k])
  97             new = ["\\begin_inset Wrap figure",
  98                    'width "%s"' % width,
  99                    "collapsed false",
 100                    ""]
 101
 102         new = new+lines[i2:j]+["\\end_inset ", ""]
 103
 104         # After a float, all font attributes are reseted.
 105         # We need to output '\foo default' for every attribute foo
 106         # whose value is not default before the float.
 107         # The check here is not accurate, but it doesn't matter
 108         # as extra '\foo default' commands are ignored.
 109         # In fact, it might be safer to output '\foo default' for all
 110         # font attributes.
 111         k = get_paragraph(lines, i)
 112         flag = 0
 113         for token in font_tokens:
 114             if find_token(lines, token, k, i) != -1:
 115                 if not flag:
 116                     # This is not necessary, but we want the output to be
 117                     # as similar as posible to the lyx format
 118                     flag = 1
 119                     new.append("")
 120                 if token == "\\lang":
 121                     new.append(token+" "+language)
 122                 else:
 123                     new.append(token+" default ")
 124
 125         lines[i:j+1] = new
 126         i = i+1
 127
 128 pextra_type2_rexp = re.compile(r".*\\pextra_type\s+[12]")
 129 pextra_type2_rexp2 = re.compile(r".*(\\layout|\\pextra_type\s+2)")
 130
 131 def remove_pextra(lines):
 132     i = 0
 133     flag = 0
 134     while 1:
 135         i = find_re(lines, pextra_type2_rexp, i)
 136         if i == -1:
 137             break
 138
 139         mo = pextra_rexp.search(lines[i])
 140         width = get_width(mo)
 141
 142         if mo.group(1) == "1":
 143             # handle \pextra_type 1 (indented paragraph)
 144             lines[i] = re.sub(pextra_rexp, "\\leftindent "+width+" ", lines[i])
 145             i = i+1
 146             continue
 147
 148         # handle \pextra_type 2 (minipage)
 149         position = mo.group(3)
 150         hfill = mo.group(5)
 151         lines[i] = re.sub(pextra_rexp, "", lines[i])
 152
 153         start = ["\\begin_inset Minipage",
 154                  "position " + position,
 155                  "inner_position 0",
 156                  'height "0pt"',
 157                  'width "%s"' % width,
 158                  "collapsed false"
 159                  ]
 160         if flag:
 161             flag = 0
 162             if hfill:
 163                 start = ["","\hfill",""]+start
 164         else:
 165             start = ["\\layout Standard"] + start
 166
 167         j0 = find_token_backwards(lines,"\\layout", i-1)
 168         j = get_next_paragraph(lines, i)
 169
 170         count = 0
 171         while 1:
 172             # collect more paragraphs to the minipage
 173             count = count+1
 174             if j == -1 or not check_token(lines[j], "\\layout"):
 175                 break
 176             i = find_re(lines, pextra_type2_rexp2, j+1)
 177             if i == -1:
 178                 break
 179             mo = pextra_rexp.search(lines[i])
 180             if not mo:
 181                 break
 182             if mo.group(7) == "1":
 183                 flag = 1
 184                 break
 185             lines[i] = re.sub(pextra_rexp, "", lines[i])
 186             j = find_tokens(lines, ["\\layout", "\\end_float"], i+1)
 187
 188         mid = lines[j0:j]
 189         end = ["\\end_inset "]
 190
 191         lines[j0:j] = start+mid+end
 192         i = i+1
 193
 194 def is_empty(lines):
 195     return filter(is_nonempty_line, lines) == []
 196
 197 move_rexp =  re.compile(r"\\(family|series|shape|size|emph|numeric|bar|noun|end_deeper)")
 198 ert_rexp = re.compile(r"\\begin_inset|\\hfill|.*\\SpecialChar")
 199 spchar_rexp = re.compile(r"(.*)(\\SpecialChar.*)")
 200 ert_begin = ["\\begin_inset ERT",
 201              "status Collapsed",
 202              "",
 203              "\\layout Standard"]
 204
 205 def remove_oldert(lines):
 206     i = 0
 207     while 1:
 208         i = find_tokens(lines, ["\\latex latex", "\\layout LaTeX"], i)
 209         if i == -1:
 210             break
 211         j = i+1
 212         while 1:
 213             # \end_inset is for ert inside a tabular cell. The other tokens
 214             # are obvious.
 215             j = find_tokens(lines, ["\\latex default", "\\layout", "\\begin_inset", "\\end_inset", "\\end_float", "\\the_end"],
 216                             j)
 217             if check_token(lines[j], "\\begin_inset"):
 218                 j = find_end_of_inset(lines, j)+1
 219             else:
 220                 break
 221
 222         if check_token(lines[j], "\\layout"):
 223             while j-1 >= 0 and check_token(lines[j-1], "\\begin_deeper"):
 224                 j = j-1
 225
 226         # We need to remove insets, special chars & font commands from ERT text
 227         new = []
 228         new2 = []
 229         if check_token(lines[i], "\\layout LaTeX"):
 230             new = ["\layout Standard", "", ""]
 231             # We have a problem with classes in which Standard is not the default layout!
 232
 233         k = i+1
 234         while 1:
 235             k2 = find_re(lines, ert_rexp, k, j)
 236             inset = hfill = specialchar = 0
 237             if k2 == -1:
 238                 k2 = j
 239             elif check_token(lines[k2], "\\begin_inset"):
 240                 inset = 1
 241             elif check_token(lines[k2], "\\hfill"):
 242                 hfill = 1
 243                 del lines[k2]
 244                 j = j-1
 245             else:
 246                 specialchar = 1
 247                 mo = spchar_rexp.match(lines[k2])
 248                 lines[k2] = mo.group(1)
 249                 specialchar_str = mo.group(2)
 250                 k2 = k2+1
 251
 252             tmp = []
 253             for line in lines[k:k2]:
 254                 # Move some lines outside the ERT inset:
 255                 if move_rexp.match(line):
 256                     if new2 == []:
 257                         # This is not necessary, but we want the output to be
 258                         # as similar as posible to the lyx format
 259                         new2 = [""]
 260                     new2.append(line)
 261                 elif not check_token(line, "\\latex"):
 262                     tmp.append(line)
 263
 264             if is_empty(tmp):
 265                 if filter(lambda x:x != "", tmp) != []:
 266                     if new == []:
 267                         # This is not necessary, but we want the output to be
 268                         # as similar as posible to the lyx format
 269                         lines[i-1] = lines[i-1]+" "
 270                     else:
 271                         new = new+[" "]
 272             else:
 273                 new = new+ert_begin+tmp+["\\end_inset ", ""]
 274
 275             if inset:
 276                 k3 = find_end_of_inset(lines, k2)
 277                 new = new+[""]+lines[k2:k3+1]+[""] # Put an empty line after \end_inset
 278                 k = k3+1
 279                 # Skip the empty line after \end_inset
 280                 if not is_nonempty_line(lines[k]):
 281                     k = k+1
 282                     new.append("")
 283             elif hfill:
 284                 new = new+["\hfill", ""]
 285                 k = k2
 286             elif specialchar:
 287                 if new == []:
 288                     # This is not necessary, but we want the output to be
 289                     # as similar as posible to the lyx format
 290                     lines[i-1] = lines[i-1]+specialchar_str
 291                     new = [""]
 292                 else:
 293                     new = new+[specialchar_str, ""]
 294                 k = k2
 295             else:
 296                 break
 297
 298         new = new+new2
 299         if not check_token(lines[j], "\\latex "):
 300             new = new+[""]+[lines[j]]
 301         lines[i:j+1] = new
 302         i = i+1
 303
 304     # Delete remaining "\latex xxx" tokens
 305     i = 0
 306     while 1:
 307         i = find_token(lines, "\\latex ", i)
 308         if i == -1:
 309             break
 310         del lines[i]
 311
 312 # ERT insert are hidden feature of lyx 1.1.6. This might be removed in the future.
 313 def remove_oldertinset(lines):
 314     i = 0
 315     while 1:
 316         i = find_token(lines, "\\begin_inset ERT", i)
 317         if i == -1:
 318             break
 319         j = find_end_of_inset(lines, i)
 320         k = find_token(lines, "\\layout", i+1)
 321         l = get_paragraph(lines, i)
 322         if lines[k] == lines[l]: # same layout
 323             k = k+1
 324         new = lines[k:j]
 325         lines[i:j+1] = new
 326         i = i+1
 327
 328 def is_ert_paragraph(lines, i):
 329     if not check_token(lines[i], "\\layout Standard"):
 330         return 0
 331
 332     i = find_nonempty_line(lines, i+1)
 333     if not check_token(lines[i], "\\begin_inset ERT"):
 334         return 0
 335
 336     j = find_end_of_inset(lines, i)
 337     k = find_nonempty_line(lines, j+1)
 338     return check_token(lines[k], "\\layout")
 339
 340 def combine_ert(lines):
 341     i = 0
 342     while 1:
 343         i = find_token(lines, "\\begin_inset ERT", i)
 344         if i == -1:
 345             break
 346         j = get_paragraph(lines, i)
 347         count = 0
 348         text = []
 349         while is_ert_paragraph(lines, j):
 350
 351             count = count+1
 352             i2 = find_token(lines, "\\layout", j+1)
 353             k = find_token(lines, "\\end_inset", i2+1)
 354             text = text+lines[i2:k]
 355             j = find_token(lines, "\\layout", k+1)
 356             if j == -1:
 357                 break
 358
 359         if count >= 2:
 360             j = find_token(lines, "\\layout", i+1)
 361             lines[j:k] = text
 362
 363         i = i+1
 364
 365 oldunits = ["pt", "cm", "in", "text%", "col%"]
 366
 367 def get_length(lines, name, start, end):
 368     i = find_token(lines, name, start, end)
 369     if i == -1:
 370         return ""
 371     x = string.split(lines[i])
 372     return x[2]+oldunits[int(x[1])]
 373
 374 def write_attribute(x, token, value):
 375     if value != "":
 376         x.append("\t"+token+" "+value)
 377
 378 def remove_figinset(lines):
 379     i = 0
 380     while 1:
 381         i = find_token(lines, "\\begin_inset Figure", i)
 382         if i == -1:
 383             break
 384         j = find_end_of_inset(lines, i)
 385
 386         if ( len(string.split(lines[i])) > 2 ):
 387             lyxwidth = string.split(lines[i])[3]+"pt"
 388             lyxheight = string.split(lines[i])[4]+"pt"
 389         else:
 390             lyxwidth = ""
 391             lyxheight = ""
 392
 393         filename = get_value(lines, "file", i+1, j)
 394
 395         width = get_length(lines, "width", i+1, j)
 396         # what does width=5 mean ?
 397         height = get_length(lines, "height", i+1, j)
 398         rotateAngle = get_value(lines, "angle", i+1, j)
 399         if width == "" and height == "":
 400             size_type = "0"
 401         else:
 402             size_type = "1"
 403
 404         flags = get_value(lines, "flags", i+1, j)
 405         x = int(flags)%4
 406         if x == 1:
 407             display = "monochrome"
 408         elif x == 2:
 409             display = "gray"
 410         else:
 411             display = "color"
 412
 413         subcaptionText = ""
 414         subcaptionLine = find_token(lines, "subcaption", i+1, j)
 415         if subcaptionLine != -1:
 416             subcaptionText = lines[subcaptionLine][11:]
 417             if subcaptionText != "":
 418                 subcaptionText = '"'+subcaptionText+'"'
 419
 420         k = find_token(lines, "subfigure", i+1,j)
 421         if k == -1:
 422             subcaption = 0
 423         else:
 424             subcaption = 1
 425
 426         new = ["\\begin_inset Graphics FormatVersion 1"]
 427         write_attribute(new, "filename", filename)
 428         write_attribute(new, "display", display)
 429         if subcaption:
 430             new.append("\tsubcaption")
 431         write_attribute(new, "subcaptionText", subcaptionText)
 432         write_attribute(new, "size_type", size_type)
 433         write_attribute(new, "width", width)
 434         write_attribute(new, "height", height)
 435         if rotateAngle != "":
 436             new.append("\trotate")
 437             write_attribute(new, "rotateAngle", rotateAngle)
 438         write_attribute(new, "rotateOrigin", "leftBaseline")
 439         write_attribute(new, "lyxsize_type", "1")
 440         write_attribute(new, "lyxwidth", lyxwidth)
 441         write_attribute(new, "lyxheight", lyxheight)
 442         new = new + ["\end_inset"]
 443         lines[i:j+1] = new
 444
 445 attr_re = re.compile(r' \w*="(false|0|)"')
 446 line_re = re.compile(r'<(features|column|row|cell)')
 447
 448 def update_tabular(lines):
 449     i = 0
 450     while 1:
 451         i = find_token(lines, '\\begin_inset  Tabular', i)
 452         if i == -1:
 453             break
 454
 455         for k in get_tabular_lines(lines, i):
 456             if check_token(lines[k], "<lyxtabular"):
 457                 lines[k] = string.replace(lines[k], 'version="2"', 'version="3"')
 458             elif check_token(lines[k], "<column"):
 459                 lines[k] = string.replace(lines[k], 'width=""', 'width="0pt"')
 460
 461             if line_re.match(lines[k]):
 462                 lines[k] = re.sub(attr_re, "", lines[k])
 463
 464         i = i+1
 465
 466 # Figure insert are hidden feature of lyx 1.1.6. This might be removed in the future.
 467 def fix_oldfloatinset(lines):
 468     i = 0
 469     while 1:
 470         i = find_token(lines, "\\begin_inset Float ", i)
 471         if i == -1:
 472             break
 473         j = find_token(lines, "collapsed", i)
 474         if j != -1:
 475             lines[j:j] = ["wide false"]
 476         i = i+1
 477
 478 def change_listof(lines):
 479     i = 0
 480     while 1:
 481         i = find_token(lines, "\\begin_inset LatexCommand \\listof", i)
 482         if i == -1:
 483             break
 484         type = re.search(r"listof(\w*)", lines[i]).group(1)[:-1]
 485         lines[i] = "\\begin_inset FloatList "+type
 486         i = i+1
 487
 488 def change_infoinset(lines):
 489     i = 0
 490     while 1:
 491         i = find_token(lines, "\\begin_inset Info", i)
 492         if i == -1:
 493             break
 494         txt = string.lstrip(lines[i][18:])
 495         new = ["\\begin_inset Note", "collapsed true", ""]
 496         j = find_token(lines, "\\end_inset", i)
 497         if j == -1:
 498             break
 499
 500         note_lines = lines[i+1:j]
 501         if len(txt) > 0:
 502             note_lines = [txt]+note_lines
 503
 504         for line in note_lines:
 505             new = new + ["\layout Standard", ""]
 506             tmp = string.split(line, '\\')
 507             new = new + [tmp[0]]
 508             for x in tmp[1:]:
 509                 new = new + ["\\backslash ", x]
 510         lines[i:j] = new
 511         i = i+5
 512
 513 def change_preamble(lines):
 514     i = find_token(lines, "\\use_amsmath", 0)
 515     if i == -1:
 516         return
 517     lines[i+1:i+1] = ["\\use_natbib 0",
 518                       "\use_numerical_citations 0"]
 519
 520 def convert(header, body):
 521     language = get_value(header, "\\language", 0)
 522     if language == "":
 523         language = "english"
 524
 525     change_preamble(header)
 526     change_listof(body)
 527     fix_oldfloatinset(body)
 528     update_tabular(body)
 529     remove_pextra(body)
 530     remove_oldfloat(body, language)
 531     remove_figinset(body)
 532     remove_oldertinset(body)
 533     remove_oldert(body)
 534     combine_ert(body)
 535     change_infoinset(body)
 536
 537 if __name__ == "__main__":
 538     pass