lib/lyx2lyx/lyx_1_6.py

   1 # This file is part of lyx2lyx
   2 # -*- coding: utf-8 -*-
   3 # Copyright (C) 2007 José Matos <jamatos@lyx.org>
   4 #
   5 # This program is free software; you can redistribute it and/or
   6 # modify it under the terms of the GNU General Public License
   7 # as published by the Free Software Foundation; either version 2
   8 # of the License, or (at your option) any later version.
   9 #
  10 # This program is distributed in the hope that it will be useful,
  11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 # GNU General Public License for more details.
  14 #
  15 # You should have received a copy of the GNU General Public License
  16 # along with this program; if not, write to the Free Software
  17 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  18
  19 """ Convert files to the file format generated by lyx 1.6"""
  20
  21 import re
  22 import unicodedata
  23 import sys, os
  24
  25 from parser_tools import find_token, find_end_of, find_tokens, get_value
  26
  27 ####################################################################
  28 # Private helper functions
  29
  30 def find_end_of_inset(lines, i):
  31     " Find end of inset, where lines[i] is included."
  32     return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
  33
  34
  35 ####################################################################
  36
  37 def fix_wrong_tables(document):
  38     i = 0
  39     while True:
  40         i = find_token(document.body, "\\begin_inset Tabular", i)
  41         if i == -1:
  42             return
  43         j = find_end_of_inset(document.body, i + 1)
  44         if j == -1:
  45             document.warning("Malformed LyX document: Could not find end of tabular.")
  46             continue
  47
  48         m = i + 1
  49         nrows = int(document.body[i+1].split('"')[3])
  50         ncols = int(document.body[i+1].split('"')[5])
  51
  52         for l in range(nrows):
  53             prev_multicolumn = 0
  54             for k in range(ncols):
  55                 m = find_token(document.body, '<cell', m)
  56
  57                 if document.body[m].find('multicolumn') != -1:
  58                     multicol_cont = int(document.body[m].split('"')[1])
  59
  60                     if multicol_cont == 2 and (k == 0 or prev_multicolumn == 0):
  61                         document.body[m] = document.body[m][:5] + document.body[m][21:]
  62                         prev_multicolumn = 0
  63                     else:
  64                         prev_multicolumn = multicol_cont
  65                 else:
  66                     prev_multicolumn = 0
  67
  68         i = j + 1
  69
  70
  71 def close_begin_deeper(document):
  72     i = 0
  73     depth = 0
  74     while True:
  75         i = find_tokens(document.body, ["\\begin_deeper", "\\end_deeper"], i)
  76
  77         if i == -1:
  78             break
  79
  80         if document.body[i][:13] == "\\begin_deeper":
  81             depth += 1
  82         else:
  83             depth -= 1
  84
  85         i += 1
  86
  87     document.body[-2:-2] = ['\\end_deeper' for i in range(depth)]
  88
  89
  90 def long_charstyle_names(document):
  91     i = 0
  92     while True:
  93         i = find_token(document.body, "\\begin_inset CharStyle", i)
  94         if i == -1:
  95             return
  96         document.body[i] = document.body[i].replace("CharStyle ", "CharStyle CharStyle:")
  97         i += 1
  98
  99 def revert_long_charstyle_names(document):
 100     i = 0
 101     while True:
 102         i = find_token(document.body, "\\begin_inset CharStyle", i)
 103         if i == -1:
 104             return
 105         document.body[i] = document.body[i].replace("CharStyle CharStyle:", "CharStyle")
 106         i += 1
 107
 108
 109 def axe_show_label(document):
 110     i = 0
 111     while True:
 112         i = find_token(document.body, "\\begin_inset CharStyle", i)
 113         if i == -1:
 114             return
 115         if document.body[i + 1].find("show_label") != -1:
 116             if document.body[i + 1].find("true") != -1:
 117                 document.body[i + 1] = "status open"
 118                 del document.body[ i + 2]
 119             else:
 120                 if document.body[i + 1].find("false") != -1:
 121                     document.body[i + 1] = "status collapsed"
 122                     del document.body[ i + 2]
 123                 else:
 124                     document.warning("Malformed LyX document: show_label neither false nor true.")
 125         else:
 126             document.warning("Malformed LyX document: show_label missing in CharStyle.")
 127
 128         i += 1
 129
 130
 131 def revert_show_label(document):
 132     i = 0
 133     while True:
 134         i = find_token(document.body, "\\begin_inset CharStyle", i)
 135         if i == -1:
 136             return
 137         if document.body[i + 1].find("status open") != -1:
 138             document.body.insert(i + 1, "show_label true")
 139         else:
 140             if document.body[i + 1].find("status collapsed") != -1:
 141                 document.body.insert(i + 1, "show_label false")
 142             else:
 143                 document.warning("Malformed LyX document: no legal status line in CharStyle.")
 144         i += 1
 145
 146 def revert_begin_modules(document):
 147     i = 0
 148     while True:
 149         i = find_token(document.header, "\\begin_modules", i)
 150         if i == -1:
 151             return
 152         j = find_end_of(document.header, i, "\\begin_modules", "\\end_modules")
 153         if j == -1:
 154             # this should not happen
 155             break
 156         document.header[i : j + 1] = []
 157
 158 def convert_flex(document):
 159     "Convert CharStyle to Flex"
 160     i = 0
 161     while True:
 162         i = find_token(document.body, "\\begin_inset CharStyle", i)
 163         if i == -1:
 164             return
 165         document.body[i] = document.body[i].replace('\\begin_inset CharStyle', '\\begin_inset Flex')
 166
 167 def revert_flex(document):
 168     "Convert Flex to CharStyle"
 169     i = 0
 170     while True:
 171         i = find_token(document.body, "\\begin_inset Flex", i)
 172         if i == -1:
 173             return
 174         document.body[i] = document.body[i].replace('\\begin_inset Flex', '\\begin_inset CharStyle')
 175
 176
 177 def remove_manifest(document):
 178     "Remove the manifest section"
 179     document.manifest = None
 180
 181
 182 #  Discard PDF options for hyperref
 183 def revert_pdf_options(document):
 184         "Revert PDF options for hyperref."
 185         i = 0
 186         i = find_token(document.header, "\\use_hyperref", i)
 187         if i != -1:
 188             del document.header[i]
 189         i = find_token(document.header, "\\pdf_store_options", i)
 190         if i != -1:
 191             del document.header[i]
 192         i = find_token(document.header, "\\pdf_title", 0)
 193         if i != -1:
 194             del document.header[i]
 195         i = find_token(document.header, "\\pdf_author", 0)
 196         if i != -1:
 197             del document.header[i]
 198         i = find_token(document.header, "\\pdf_subject", 0)
 199         if i != -1:
 200             del document.header[i]
 201         i = find_token(document.header, "\\pdf_keywords", 0)
 202         if i != -1:
 203             del document.header[i]
 204         i = find_token(document.header, "\\pdf_bookmarks", 0)
 205         if i != -1:
 206             del document.header[i]
 207         i = find_token(document.header, "\\pdf_bookmarksnumbered", i)
 208         if i != -1:
 209             del document.header[i]
 210         i = find_token(document.header, "\\pdf_bookmarksopen", i)
 211         if i != -1:
 212             del document.header[i]
 213         i = find_token(document.header, "\\pdf_bookmarksopenlevel", i)
 214         if i != -1:
 215             del document.header[i]
 216         i = find_token(document.header, "\\pdf_breaklinks", i)
 217         if i != -1:
 218             del document.header[i]
 219         i = find_token(document.header, "\\pdf_pdfborder", i)
 220         if i != -1:
 221             del document.header[i]
 222         i = find_token(document.header, "\\pdf_colorlinks", i)
 223         if i != -1:
 224             del document.header[i]
 225         i = find_token(document.header, "\\pdf_backref", i)
 226         if i != -1:
 227             del document.header[i]
 228         i = find_token(document.header, "\\pdf_pagebackref", i)
 229         if i != -1:
 230             del document.header[i]
 231         i = find_token(document.header, "\\pdf_pagemode", 0)
 232         if i != -1:
 233             del document.header[i]
 234         i = find_token(document.header, "\\pdf_quoted_options", 0)
 235         if i != -1:
 236             del document.header[i]
 237
 238
 239 def remove_inzip_options(document):
 240     "Remove inzipName and embed options from the Graphics inset"
 241     i = 0
 242     while 1:
 243         i = find_token(document.body, "\\begin_inset Graphics", i)
 244         if i == -1:
 245             return
 246         j = find_end_of_inset(document.body, i + 1)
 247         if j == -1:
 248             # should not happen
 249             document.warning("Malformed LyX document: Could not find end of graphics inset.")
 250         # If there's a inzip param, just remove that
 251         k = find_token(document.body, "\tinzipName", i + 1, j)
 252         if k != -1:
 253             del document.body[k]
 254             # embed option must follow the inzipName option
 255             del document.body[k+1]
 256         i = i + 1
 257
 258
 259 def convert_inset_command(document):
 260     """
 261         Convert:
 262             \begin_inset LatexCommand cmd
 263         to
 264             \begin_inset CommandInset InsetType
 265             LatexCommand cmd
 266     """
 267     i = 0
 268     while 1:
 269         i = find_token(document.body, "\\begin_inset LatexCommand", i)
 270         if i == -1:
 271             return
 272         line = document.body[i]
 273         r = re.compile(r'\\begin_inset LatexCommand (.*)$')
 274         m = r.match(line)
 275         cmdName = m.group(1)
 276         insetName = ""
 277         #this is adapted from factory.cpp
 278         if cmdName[0:4].lower() == "cite":
 279             insetName = "citation"
 280         elif cmdName == "url" or cmdName == "htmlurl":
 281             insetName = "url"
 282         elif cmdName[-3:] == "ref":
 283             insetName = "ref"
 284         elif cmdName == "tableofcontents":
 285             insetName = "toc"
 286         elif cmdName == "printnomenclature":
 287             insetName = "nomencl_print"
 288         elif cmdName == "printindex":
 289             insetName = "index_print"
 290         else:
 291             insetName = cmdName
 292         insertion = ["\\begin_inset CommandInset " + insetName, "LatexCommand " + cmdName]
 293         document.body[i : i+1] = insertion
 294
 295
 296 def revert_inset_command(document):
 297     """
 298         Convert:
 299             \begin_inset CommandInset InsetType
 300             LatexCommand cmd
 301         to
 302             \begin_inset LatexCommand cmd
 303         Some insets may end up being converted to insets earlier versions of LyX
 304         will not be able to recognize. Not sure what to do about that.
 305     """
 306     i = 0
 307     while 1:
 308         i = find_token(document.body, "\\begin_inset CommandInset", i)
 309         if i == -1:
 310             return
 311         nextline = document.body[i+1]
 312         r = re.compile(r'LatexCommand\s+(.*)$')
 313         m = r.match(nextline)
 314         if not m:
 315             document.warning("Malformed LyX document: Missing LatexCommand in " + document.body[i] + ".")
 316             continue
 317         cmdName = m.group(1)
 318         insertion = ["\\begin_inset LatexCommand " + cmdName]
 319         document.body[i : i+2] = insertion
 320
 321
 322 def convert_wrapfig_options(document):
 323     "Convert optional options for wrap floats (wrapfig)."
 324     # adds the tokens "lines", "placement", and "overhang"
 325     i = 0
 326     while True:
 327         i = find_token(document.body, "\\begin_inset Wrap figure", i)
 328         if i == -1:
 329             return
 330         document.body.insert(i + 1, "lines 0")
 331         j = find_token(document.body, "placement", i)
 332         # placement can be already set or not; if not, set it
 333         if j == i+2:
 334             document.body.insert(i + 3, "overhang 0col%")
 335         else:
 336            document.body.insert(i + 2, "placement o")
 337            document.body.insert(i + 3, "overhang 0col%")
 338         i = i + 1
 339
 340
 341 def revert_wrapfig_options(document):
 342     "Revert optional options for wrap floats (wrapfig)."
 343     i = 0
 344     while True:
 345         i = find_token(document.body, "lines", i)
 346         if i == -1:
 347             return
 348         j = find_token(document.body, "overhang", i+1)
 349         if j != i + 2 and j != -1:
 350             document.warning("Malformed LyX document: Couldn't find overhang parameter of wrap float.")
 351         if j == -1:
 352             return
 353         del document.body[i]
 354         del document.body[j-1]
 355         i = i + 1
 356
 357
 358 def convert_latexcommand_index(document):
 359     "Convert from LatexCommand form to collapsable form."
 360     i = 0
 361     while True:
 362         i = find_token(document.body, "\\begin_inset CommandInset index", i)
 363         if i == -1:
 364             return
 365         if document.body[i + 1] != "LatexCommand index": # Might also be index_print
 366             return
 367         fullcommand = document.body[i + 2]
 368         document.body[i] = "\\begin_inset Index"
 369         document.body[i + 1] = "status collapsed"
 370         document.body[i + 2] = "\\begin_layout standard"
 371         document.body.insert(i + 3, fullcommand[6:].strip('"'))
 372         document.body.insert(i + 4, "\\end_layout")
 373         i = i + 5
 374
 375
 376 def revert_latexcommand_index(document):
 377     "Revert from collapsable form toLatexCommand form."
 378     i = 0
 379     while True:
 380         i = find_token(document.body, "\\begin_inset Index", i)
 381         if i == -1:
 382             return
 383         j = find_end_of_inset(document.body, i)
 384         del document.body[j - 1]
 385         del document.body[j - 2] # \end_layout
 386         document.body[i] =  "\\begin_inset CommandInset index"
 387         document.body[i + 1] =  "LatexCommand index"
 388         document.body[i + 3] = "name " + '"' + document.body[i + 3] + '"'
 389         document.body.insert(i + 4, "")
 390         del document.body[i + 2] # \begin_layout standard
 391         i = i + 5
 392
 393
 394 def revert_wraptable(document):
 395     "Revert wrap table to wrap figure."
 396     i = 0
 397     while True:
 398         i = find_token(document.body, "\\begin_inset Wrap table", i)
 399         if i == -1:
 400             return
 401         document.body[i] = document.body[i].replace('\\begin_inset Wrap table', '\\begin_inset Wrap figure')
 402         i = i + 1
 403
 404
 405 def revert_vietnamese(document):
 406     "Set language Vietnamese to English"
 407     # Set document language from Vietnamese to English
 408     i = 0
 409     if document.language == "vietnamese":
 410         document.language = "english"
 411         i = find_token(document.header, "\\language", 0)
 412         if i != -1:
 413             document.header[i] = "\\language english"
 414     j = 0
 415     while True:
 416         j = find_token(document.body, "\\lang vietnamese", j)
 417         if j == -1:
 418             return
 419         document.body[j] = document.body[j].replace("\\lang vietnamese", "\\lang english")
 420         j = j + 1
 421
 422
 423 def revert_japanese(document):
 424     "Set language japanese-plain to japanese"
 425     # Set document language from japanese-plain to japanese
 426     i = 0
 427     if document.language == "japanese-plain":
 428         document.language = "japanese"
 429         i = find_token(document.header, "\\language", 0)
 430         if i != -1:
 431             document.header[i] = "\\language japanese"
 432     j = 0
 433     while True:
 434         j = find_token(document.body, "\\lang japanese-plain", j)
 435         if j == -1:
 436             return
 437         document.body[j] = document.body[j].replace("\\lang japanese-plain", "\\lang japanese")
 438         j = j + 1
 439
 440
 441 def revert_japanese_encoding(document):
 442     "Set input encoding form EUC-JP-plain to EUC-JP etc."
 443     # Set input encoding form EUC-JP-plain to EUC-JP etc.
 444     i = 0
 445     i = find_token(document.header, "\\inputencoding EUC-JP-plain", 0)
 446     if i != -1:
 447         document.header[i] = "\\inputencoding EUC-JP"
 448     j = 0
 449     j = find_token(document.header, "\\inputencoding JIS-plain", 0)
 450     if j != -1:
 451         document.header[j] = "\\inputencoding JIS"
 452     k = 0
 453     k = find_token(document.header, "\\inputencoding SJIS-plain", 0)
 454     if k != -1: # convert to UTF8 since there is currently no SJIS encoding
 455         document.header[k] = "\\inputencoding UTF8"
 456
 457
 458 def revert_inset_info(document):
 459     'Replace info inset with its content'
 460     i = 0
 461     while 1:
 462         i = find_token(document.body, '\\begin_inset Info', i)
 463         if i == -1:
 464             return
 465         j = find_end_of_inset(document.body, i + 1)
 466         if j == -1:
 467             # should not happen
 468             document.warning("Malformed LyX document: Could not find end of Info inset.")
 469         type = 'unknown'
 470         arg = ''
 471         for k in range(i, j+1):
 472             if document.body[k].startswith("arg"):
 473                 arg = document.body[k][3:].strip().strip('"')
 474             if document.body[k].startswith("type"):
 475                 type = document.body[k][4:].strip().strip('"')
 476         # I think there is a newline after \\end_inset, which should be removed.
 477         if document.body[j + 1].strip() == "":
 478             document.body[i : (j + 2)] = [type + ':' + arg]
 479         else:
 480             document.body[i : (j + 1)] = [type + ':' + arg]
 481
 482
 483 def convert_pdf_options(document):
 484     # Set the pdfusetitle tag, delete the pdf_store_options,
 485     # set quotes for bookmarksopenlevel"
 486     has_hr = get_value(document.header, "\\use_hyperref", 0, default = "0")
 487     if has_hr == "1":
 488         k = find_token(document.header, "\\use_hyperref", 0)
 489         document.header.insert(k + 1, "\\pdf_pdfusetitle true")
 490     k = find_token(document.header, "\\pdf_store_options", 0)
 491     if k != -1:
 492         del document.header[k]
 493     i = find_token(document.header, "\\pdf_bookmarksopenlevel", k)
 494     if i == -1: return
 495     document.header[i] = document.header[i].replace('"', '')
 496
 497
 498 def revert_pdf_options_2(document):
 499     # reset the pdfusetitle tag, set quotes for bookmarksopenlevel"
 500     k = find_token(document.header, "\\use_hyperref", 0)
 501     i = find_token(document.header, "\\pdf_pdfusetitle", k)
 502     if i != -1:
 503         del document.header[i]
 504     i = find_token(document.header, "\\pdf_bookmarksopenlevel", k)
 505     if i == -1: return
 506     values = document.header[i].split()
 507     values[1] = ' "' + values[1] + '"'
 508     document.header[i] = ''.join(values)
 509
 510
 511 def convert_htmlurl(document):
 512     'Convert "htmlurl" to "href" insets for docbook'
 513     if document.backend != "docbook":
 514       return
 515     i = 0
 516     while True:
 517       i = find_token(document.body, "\\begin_inset CommandInset url", i)
 518       if i == -1:
 519         return
 520       document.body[i] = "\\begin_inset CommandInset href"
 521       document.body[i + 1] = "LatexCommand href"
 522       i = i + 1
 523
 524 def convert_url(document):
 525     'Convert url insets to url charstyles'
 526     if document.backend == "docbook":
 527       return
 528     r = re.compile(r'target\s+"(.*)"')
 529     didone = False
 530     i = 0
 531     while True:
 532       i = find_token(document.body, "\\begin_inset CommandInset url", i)
 533       if i == -1:
 534         break
 535       j = find_token(document.body, "target", i)
 536       if j == -1:
 537         document.warning("Malformed LyX document: Can't find target for url inset")
 538         i = j
 539         continue
 540       m = r.match(document.body[j])
 541       target = m.group(1)
 542       k = find_token(document.body, "\\end_inset", j)
 543       if k == -1:
 544         document.warning("Malformed LyX document: Can't find end of url inset")
 545         i = k
 546         continue
 547       newstuff = ["\\begin_inset Flex URL",
 548         "status collapsed", "",
 549         "\\begin_layout Standard",
 550         target,
 551         "\\end_layout",
 552         ""]
 553       document.body[i:k] = newstuff
 554       didone = True
 555       i = k
 556
 557     #If we did one, we need to add URL to the modules
 558     if didone:
 559       i = find_token(document.header, "\\begin_modules", 0)
 560       if i == -1:
 561         #No modules yet included
 562         i = find_token(document.header, "\\textclass", 0)
 563         if i == -1:
 564           document.warning("Malformed LyX document: No \\textclass!!")
 565           return
 566         modinfo = ["\\begin_modules", "URL", "\\end_modules"]
 567         document.header[i + 1: i + 1] = modinfo
 568         return
 569       j = find_token(document.header, "\\end_modules", i)
 570       if j == -1:
 571         document.warning("Malformed LyX document: No \\end_modules.")
 572         return
 573       k = find_token(document.header, "URL", i)
 574       if k != -1 and k < j:
 575         return
 576       document.header.insert(i + 1, "URL")
 577
 578 def revert_href(document):
 579     'Reverts hyperlink insets (href) to url insets (url)'
 580     i = 0
 581     while True:
 582       i = find_token(document.body, "\\begin_inset CommandInset href", i)
 583       if i == -1:
 584           return
 585       document.body[i : i + 2] =
 586         ["\\begin_inset CommandInset url", "LatexCommand url"]
 587       i = i + 2
 588
 589
 590 ##
 591 # Conversion hub
 592 #
 593
 594 supported_versions = ["1.6.0","1.6"]
 595 convert = [[277, [fix_wrong_tables]],
 596            [278, [close_begin_deeper]],
 597            [279, [long_charstyle_names]],
 598            [280, [axe_show_label]],
 599            [281, []],
 600            [282, []],
 601            [283, [convert_flex]],
 602            [284, []],
 603            [285, []], # an empty manifest is automatically added
 604            [286, []],
 605            [287, [convert_wrapfig_options]],
 606            [288, [convert_inset_command]],
 607            [289, [convert_latexcommand_index]],
 608            [290, []],
 609            [291, []],
 610            [292, []],
 611            [293, []],
 612            [294, [convert_pdf_options]],
 613            [295, [convert_htmlurl, convert_url]]
 614           ]
 615
 616 revert =  [[294, [revert_href]],
 617            [293, [revert_pdf_options_2]],
 618            [292, [revert_inset_info]],
 619            [291, [revert_japanese, revert_japanese_encoding]],
 620            [290, [revert_vietnamese]],
 621            [289, [revert_wraptable]],
 622            [288, [revert_latexcommand_index]],
 623            [287, [revert_inset_command]],
 624            [286, [revert_wrapfig_options]],
 625            [285, [revert_pdf_options]],
 626            [284, [remove_manifest, remove_inzip_options]],
 627            [283, []],
 628            [282, [revert_flex]],
 629            [281, []],
 630            [280, [revert_begin_modules]],
 631            [279, [revert_show_label]],
 632            [278, [revert_long_charstyle_names]],
 633            [277, []],
 634            [276, []]
 635           ]
 636
 637
 638 if __name__ == "__main__":
 639     pass