lib/lyx2lyx/lyx_1_6.py

   1 # This file is part of lyx2lyx
   2 # -*- coding: utf-8 -*-
   3 # Copyright (C) 2007 José Matos <jamatos@lyx.org>
   4 #
   5 # This program is free software; you can redistribute it and/or
   6 # modify it under the terms of the GNU General Public License
   7 # as published by the Free Software Foundation; either version 2
   8 # of the License, or (at your option) any later version.
   9 #
  10 # This program is distributed in the hope that it will be useful,
  11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 # GNU General Public License for more details.
  14 #
  15 # You should have received a copy of the GNU General Public License
  16 # along with this program; if not, write to the Free Software
  17 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  18
  19 """ Convert files to the file format generated by lyx 1.6"""
  20
  21 import re
  22 import unicodedata
  23 import sys, os
  24
  25 from parser_tools import find_token, find_end_of, find_tokens, get_value
  26
  27 ####################################################################
  28 # Private helper functions
  29
  30 def find_end_of_inset(lines, i):
  31     " Find end of inset, where lines[i] is included."
  32     return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
  33
  34 def wrap_into_ert(string, src, dst):
  35     " Wrap a something into an ERT"
  36     return string.replace(src, '\n\\begin_inset ERT\nstatus collapsed\n\\begin_layout Standard\n'
  37       + dst + '\n\\end_layout\n\\end_inset\n')
  38
  39 ####################################################################
  40
  41 def fix_wrong_tables(document):
  42     i = 0
  43     while True:
  44         i = find_token(document.body, "\\begin_inset Tabular", i)
  45         if i == -1:
  46             return
  47         j = find_end_of_inset(document.body, i + 1)
  48         if j == -1:
  49             document.warning("Malformed LyX document: Could not find end of tabular.")
  50             continue
  51
  52         m = i + 1
  53         nrows = int(document.body[i+1].split('"')[3])
  54         ncols = int(document.body[i+1].split('"')[5])
  55
  56         for l in range(nrows):
  57             prev_multicolumn = 0
  58             for k in range(ncols):
  59                 m = find_token(document.body, '<cell', m)
  60
  61                 if document.body[m].find('multicolumn') != -1:
  62                     multicol_cont = int(document.body[m].split('"')[1])
  63
  64                     if multicol_cont == 2 and (k == 0 or prev_multicolumn == 0):
  65                         document.body[m] = document.body[m][:5] + document.body[m][21:]
  66                         prev_multicolumn = 0
  67                     else:
  68                         prev_multicolumn = multicol_cont
  69                 else:
  70                     prev_multicolumn = 0
  71
  72         i = j + 1
  73
  74
  75 def close_begin_deeper(document):
  76     i = 0
  77     depth = 0
  78     while True:
  79         i = find_tokens(document.body, ["\\begin_deeper", "\\end_deeper"], i)
  80
  81         if i == -1:
  82             break
  83
  84         if document.body[i][:13] == "\\begin_deeper":
  85             depth += 1
  86         else:
  87             depth -= 1
  88
  89         i += 1
  90
  91     document.body[-2:-2] = ['\\end_deeper' for i in range(depth)]
  92
  93
  94 def long_charstyle_names(document):
  95     i = 0
  96     while True:
  97         i = find_token(document.body, "\\begin_inset CharStyle", i)
  98         if i == -1:
  99             return
 100         document.body[i] = document.body[i].replace("CharStyle ", "CharStyle CharStyle:")
 101         i += 1
 102
 103 def revert_long_charstyle_names(document):
 104     i = 0
 105     while True:
 106         i = find_token(document.body, "\\begin_inset CharStyle", i)
 107         if i == -1:
 108             return
 109         document.body[i] = document.body[i].replace("CharStyle CharStyle:", "CharStyle")
 110         i += 1
 111
 112
 113 def axe_show_label(document):
 114     i = 0
 115     while True:
 116         i = find_token(document.body, "\\begin_inset CharStyle", i)
 117         if i == -1:
 118             return
 119         if document.body[i + 1].find("show_label") != -1:
 120             if document.body[i + 1].find("true") != -1:
 121                 document.body[i + 1] = "status open"
 122                 del document.body[ i + 2]
 123             else:
 124                 if document.body[i + 1].find("false") != -1:
 125                     document.body[i + 1] = "status collapsed"
 126                     del document.body[ i + 2]
 127                 else:
 128                     document.warning("Malformed LyX document: show_label neither false nor true.")
 129         else:
 130             document.warning("Malformed LyX document: show_label missing in CharStyle.")
 131
 132         i += 1
 133
 134
 135 def revert_show_label(document):
 136     i = 0
 137     while True:
 138         i = find_token(document.body, "\\begin_inset CharStyle", i)
 139         if i == -1:
 140             return
 141         if document.body[i + 1].find("status open") != -1:
 142             document.body.insert(i + 1, "show_label true")
 143         else:
 144             if document.body[i + 1].find("status collapsed") != -1:
 145                 document.body.insert(i + 1, "show_label false")
 146             else:
 147                 document.warning("Malformed LyX document: no legal status line in CharStyle.")
 148         i += 1
 149
 150 def revert_begin_modules(document):
 151     i = 0
 152     while True:
 153         i = find_token(document.header, "\\begin_modules", i)
 154         if i == -1:
 155             return
 156         j = find_end_of(document.header, i, "\\begin_modules", "\\end_modules")
 157         if j == -1:
 158             # this should not happen
 159             break
 160         document.header[i : j + 1] = []
 161
 162 def convert_flex(document):
 163     "Convert CharStyle to Flex"
 164     i = 0
 165     while True:
 166         i = find_token(document.body, "\\begin_inset CharStyle", i)
 167         if i == -1:
 168             return
 169         document.body[i] = document.body[i].replace('\\begin_inset CharStyle', '\\begin_inset Flex')
 170
 171 def revert_flex(document):
 172     "Convert Flex to CharStyle"
 173     i = 0
 174     while True:
 175         i = find_token(document.body, "\\begin_inset Flex", i)
 176         if i == -1:
 177             return
 178         document.body[i] = document.body[i].replace('\\begin_inset Flex', '\\begin_inset CharStyle')
 179
 180
 181 #  Discard PDF options for hyperref
 182 def revert_pdf_options(document):
 183         "Revert PDF options for hyperref."
 184         i = 0
 185         i = find_token(document.header, "\\use_hyperref", i)
 186         if i != -1:
 187             del document.header[i]
 188         i = find_token(document.header, "\\pdf_store_options", i)
 189         if i != -1:
 190             del document.header[i]
 191         i = find_token(document.header, "\\pdf_title", 0)
 192         if i != -1:
 193             del document.header[i]
 194         i = find_token(document.header, "\\pdf_author", 0)
 195         if i != -1:
 196             del document.header[i]
 197         i = find_token(document.header, "\\pdf_subject", 0)
 198         if i != -1:
 199             del document.header[i]
 200         i = find_token(document.header, "\\pdf_keywords", 0)
 201         if i != -1:
 202             del document.header[i]
 203         i = find_token(document.header, "\\pdf_bookmarks", 0)
 204         if i != -1:
 205             del document.header[i]
 206         i = find_token(document.header, "\\pdf_bookmarksnumbered", i)
 207         if i != -1:
 208             del document.header[i]
 209         i = find_token(document.header, "\\pdf_bookmarksopen", i)
 210         if i != -1:
 211             del document.header[i]
 212         i = find_token(document.header, "\\pdf_bookmarksopenlevel", i)
 213         if i != -1:
 214             del document.header[i]
 215         i = find_token(document.header, "\\pdf_breaklinks", i)
 216         if i != -1:
 217             del document.header[i]
 218         i = find_token(document.header, "\\pdf_pdfborder", i)
 219         if i != -1:
 220             del document.header[i]
 221         i = find_token(document.header, "\\pdf_colorlinks", i)
 222         if i != -1:
 223             del document.header[i]
 224         i = find_token(document.header, "\\pdf_backref", i)
 225         if i != -1:
 226             del document.header[i]
 227         i = find_token(document.header, "\\pdf_pagebackref", i)
 228         if i != -1:
 229             del document.header[i]
 230         i = find_token(document.header, "\\pdf_pagemode", 0)
 231         if i != -1:
 232             del document.header[i]
 233         i = find_token(document.header, "\\pdf_quoted_options", 0)
 234         if i != -1:
 235             del document.header[i]
 236
 237
 238 def remove_inzip_options(document):
 239     "Remove inzipName and embed options from the Graphics inset"
 240     i = 0
 241     while 1:
 242         i = find_token(document.body, "\\begin_inset Graphics", i)
 243         if i == -1:
 244             return
 245         j = find_end_of_inset(document.body, i + 1)
 246         if j == -1:
 247             # should not happen
 248             document.warning("Malformed LyX document: Could not find end of graphics inset.")
 249         # If there's a inzip param, just remove that
 250         k = find_token(document.body, "\tinzipName", i + 1, j)
 251         if k != -1:
 252             del document.body[k]
 253             # embed option must follow the inzipName option
 254             del document.body[k+1]
 255         i = i + 1
 256
 257
 258 def convert_inset_command(document):
 259     """
 260         Convert:
 261             \begin_inset LatexCommand cmd
 262         to
 263             \begin_inset CommandInset InsetType
 264             LatexCommand cmd
 265     """
 266     i = 0
 267     while 1:
 268         i = find_token(document.body, "\\begin_inset LatexCommand", i)
 269         if i == -1:
 270             return
 271         line = document.body[i]
 272         r = re.compile(r'\\begin_inset LatexCommand (.*)$')
 273         m = r.match(line)
 274         cmdName = m.group(1)
 275         insetName = ""
 276         #this is adapted from factory.cpp
 277         if cmdName[0:4].lower() == "cite":
 278             insetName = "citation"
 279         elif cmdName == "url" or cmdName == "htmlurl":
 280             insetName = "url"
 281         elif cmdName[-3:] == "ref":
 282             insetName = "ref"
 283         elif cmdName == "tableofcontents":
 284             insetName = "toc"
 285         elif cmdName == "printnomenclature":
 286             insetName = "nomencl_print"
 287         elif cmdName == "printindex":
 288             insetName = "index_print"
 289         else:
 290             insetName = cmdName
 291         insertion = ["\\begin_inset CommandInset " + insetName, "LatexCommand " + cmdName]
 292         document.body[i : i+1] = insertion
 293
 294
 295 def revert_inset_command(document):
 296     """
 297         Convert:
 298             \begin_inset CommandInset InsetType
 299             LatexCommand cmd
 300         to
 301             \begin_inset LatexCommand cmd
 302         Some insets may end up being converted to insets earlier versions of LyX
 303         will not be able to recognize. Not sure what to do about that.
 304     """
 305     i = 0
 306     while 1:
 307         i = find_token(document.body, "\\begin_inset CommandInset", i)
 308         if i == -1:
 309             return
 310         nextline = document.body[i+1]
 311         r = re.compile(r'LatexCommand\s+(.*)$')
 312         m = r.match(nextline)
 313         if not m:
 314             document.warning("Malformed LyX document: Missing LatexCommand in " + document.body[i] + ".")
 315             continue
 316         cmdName = m.group(1)
 317         insertion = ["\\begin_inset LatexCommand " + cmdName]
 318         document.body[i : i+2] = insertion
 319
 320
 321 def convert_wrapfig_options(document):
 322     "Convert optional options for wrap floats (wrapfig)."
 323     # adds the tokens "lines", "placement", and "overhang"
 324     i = 0
 325     while True:
 326         i = find_token(document.body, "\\begin_inset Wrap figure", i)
 327         if i == -1:
 328             return
 329         document.body.insert(i + 1, "lines 0")
 330         j = find_token(document.body, "placement", i)
 331         # placement can be already set or not; if not, set it
 332         if j == i+2:
 333             document.body.insert(i + 3, "overhang 0col%")
 334         else:
 335            document.body.insert(i + 2, "placement o")
 336            document.body.insert(i + 3, "overhang 0col%")
 337         i = i + 1
 338
 339
 340 def revert_wrapfig_options(document):
 341     "Revert optional options for wrap floats (wrapfig)."
 342     i = 0
 343     while True:
 344         i = find_token(document.body, "lines", i)
 345         if i == -1:
 346             return
 347         j = find_token(document.body, "overhang", i+1)
 348         if j != i + 2 and j != -1:
 349             document.warning("Malformed LyX document: Couldn't find overhang parameter of wrap float.")
 350         if j == -1:
 351             return
 352         del document.body[i]
 353         del document.body[j-1]
 354         i = i + 1
 355
 356
 357 def convert_latexcommand_index(document):
 358     "Convert from LatexCommand form to collapsable form."
 359     i = 0
 360     while True:
 361         i = find_token(document.body, "\\begin_inset CommandInset index", i)
 362         if i == -1:
 363             return
 364         if document.body[i + 1] != "LatexCommand index": # Might also be index_print
 365             return
 366         fullcontent = document.body[i + 2][6:].strip('"')
 367         document.body[i:i + 2] = ["\\begin_inset Index",
 368           "status collapsed",
 369           "\\begin_layout Standard"]
 370         # Put here the conversions needed from LaTeX string to LyXText.
 371         # Here we do a minimal conversion to prevent crashes and data loss.
 372         # Manual patch-up may be needed.
 373         # Umlauted characters (most common ones, can be extended):
 374         fullcontent = fullcontent.replace(r'\\\"a', u'ä').replace(r'\\\"o', u'ö').replace(r'\\\"u', u'ü')
 375         # Generic, \" -> ":
 376         fullcontent = wrap_into_ert(fullcontent, r'\"', '"')
 377         #fullcontent = fullcontent.replace(r'\"', '\n\\begin_inset ERT\nstatus collapsed\n\\begin_layout standard\n"\n\\end_layout\n\\end_inset\n')
 378         # Math:
 379         r = re.compile('^(.*?)(\$.*?\$)(.*)')
 380         g = fullcontent
 381         while r.match(g):
 382           m = r.match(g)
 383           s = m.group(1)
 384           f = m.group(2).replace('\\\\', '\\')
 385           g = m.group(3)
 386           if s:
 387             # this is non-math!
 388             s = wrap_into_ert(s, r'\\', '\\backslash')
 389             s = wrap_into_ert(s, '{', '{')
 390             s = wrap_into_ert(s, '}', '}')
 391             document.body.insert(i + 3, s)
 392             i += 1
 393           document.body.insert(i + 3, "\\begin_inset Formula " + f)
 394           document.body.insert(i + 4, "\\end_inset")
 395           i += 2
 396         # Generic, \\ -> \backslash:
 397         g = wrap_into_ert(g, r'\\', '\\backslash{}')
 398         g = wrap_into_ert(g, '{', '{')
 399         g = wrap_into_ert(g, '}', '}')
 400         document.body.insert(i + 3, g)
 401         document.body[i + 4] = "\\end_layout"
 402         i = i + 5
 403
 404
 405 def revert_latexcommand_index(document):
 406     "Revert from collapsable form to LatexCommand form."
 407     i = 0
 408     while True:
 409         i = find_token(document.body, "\\begin_inset Index", i)
 410         if i == -1:
 411           return
 412         j = find_end_of_inset(document.body, i + 1)
 413         if j == -1:
 414           return
 415         del document.body[j - 1]
 416         del document.body[j - 2] # \end_layout
 417         document.body[i] =  "\\begin_inset CommandInset index"
 418         document.body[i + 1] =  "LatexCommand index"
 419         # clean up multiline stuff
 420         content = ""
 421         for k in range(i + 3, j - 2):
 422           line = document.body[k]
 423           if line.startswith("\\begin_inset ERT"):
 424             line = line[16:]
 425           if line.startswith("\\begin_inset Formula"):
 426             line = line[20:]
 427           if line.startswith("\\begin_layout Standard"):
 428             line = line[22:]
 429           if line.startswith("\\end_layout"):
 430             line = line[11:]
 431           if line.startswith("\\end_inset"):
 432             line = line[10:]
 433           if line.startswith("status collapsed"):
 434             line = line[16:]
 435           line = line.replace(u'ä', r'\\\"a').replace(u'ö', r'\\\"o').replace(u'ü', r'\\\"u')
 436           content = content + line;
 437         document.body[i + 3] = "name " + '"' + content + '"'
 438         for k in range(i + 4, j - 2):
 439           del document.body[i + 4]
 440         document.body.insert(i + 4, "")
 441         del document.body[i + 2] # \begin_layout standard
 442         i = i + 5
 443
 444
 445 def revert_wraptable(document):
 446     "Revert wrap table to wrap figure."
 447     i = 0
 448     while True:
 449         i = find_token(document.body, "\\begin_inset Wrap table", i)
 450         if i == -1:
 451             return
 452         document.body[i] = document.body[i].replace('\\begin_inset Wrap table', '\\begin_inset Wrap figure')
 453         i = i + 1
 454
 455
 456 def revert_vietnamese(document):
 457     "Set language Vietnamese to English"
 458     # Set document language from Vietnamese to English
 459     i = 0
 460     if document.language == "vietnamese":
 461         document.language = "english"
 462         i = find_token(document.header, "\\language", 0)
 463         if i != -1:
 464             document.header[i] = "\\language english"
 465     j = 0
 466     while True:
 467         j = find_token(document.body, "\\lang vietnamese", j)
 468         if j == -1:
 469             return
 470         document.body[j] = document.body[j].replace("\\lang vietnamese", "\\lang english")
 471         j = j + 1
 472
 473
 474 def revert_japanese(document):
 475     "Set language japanese-plain to japanese"
 476     # Set document language from japanese-plain to japanese
 477     i = 0
 478     if document.language == "japanese-plain":
 479         document.language = "japanese"
 480         i = find_token(document.header, "\\language", 0)
 481         if i != -1:
 482             document.header[i] = "\\language japanese"
 483     j = 0
 484     while True:
 485         j = find_token(document.body, "\\lang japanese-plain", j)
 486         if j == -1:
 487             return
 488         document.body[j] = document.body[j].replace("\\lang japanese-plain", "\\lang japanese")
 489         j = j + 1
 490
 491
 492 def revert_japanese_encoding(document):
 493     "Set input encoding form EUC-JP-plain to EUC-JP etc."
 494     # Set input encoding form EUC-JP-plain to EUC-JP etc.
 495     i = 0
 496     i = find_token(document.header, "\\inputencoding EUC-JP-plain", 0)
 497     if i != -1:
 498         document.header[i] = "\\inputencoding EUC-JP"
 499     j = 0
 500     j = find_token(document.header, "\\inputencoding JIS-plain", 0)
 501     if j != -1:
 502         document.header[j] = "\\inputencoding JIS"
 503     k = 0
 504     k = find_token(document.header, "\\inputencoding SJIS-plain", 0)
 505     if k != -1: # convert to UTF8 since there is currently no SJIS encoding
 506         document.header[k] = "\\inputencoding UTF8"
 507
 508
 509 def revert_inset_info(document):
 510     'Replace info inset with its content'
 511     i = 0
 512     while 1:
 513         i = find_token(document.body, '\\begin_inset Info', i)
 514         if i == -1:
 515             return
 516         j = find_end_of_inset(document.body, i + 1)
 517         if j == -1:
 518             # should not happen
 519             document.warning("Malformed LyX document: Could not find end of Info inset.")
 520         type = 'unknown'
 521         arg = ''
 522         for k in range(i, j+1):
 523             if document.body[k].startswith("arg"):
 524                 arg = document.body[k][3:].strip().strip('"')
 525             if document.body[k].startswith("type"):
 526                 type = document.body[k][4:].strip().strip('"')
 527         # I think there is a newline after \\end_inset, which should be removed.
 528         if document.body[j + 1].strip() == "":
 529             document.body[i : (j + 2)] = [type + ':' + arg]
 530         else:
 531             document.body[i : (j + 1)] = [type + ':' + arg]
 532
 533
 534 def convert_pdf_options(document):
 535     # Set the pdfusetitle tag, delete the pdf_store_options,
 536     # set quotes for bookmarksopenlevel"
 537     has_hr = get_value(document.header, "\\use_hyperref", 0, default = "0")
 538     if has_hr == "1":
 539         k = find_token(document.header, "\\use_hyperref", 0)
 540         document.header.insert(k + 1, "\\pdf_pdfusetitle true")
 541     k = find_token(document.header, "\\pdf_store_options", 0)
 542     if k != -1:
 543         del document.header[k]
 544     i = find_token(document.header, "\\pdf_bookmarksopenlevel", k)
 545     if i == -1: return
 546     document.header[i] = document.header[i].replace('"', '')
 547
 548
 549 def revert_pdf_options_2(document):
 550     # reset the pdfusetitle tag, set quotes for bookmarksopenlevel"
 551     k = find_token(document.header, "\\use_hyperref", 0)
 552     i = find_token(document.header, "\\pdf_pdfusetitle", k)
 553     if i != -1:
 554         del document.header[i]
 555     i = find_token(document.header, "\\pdf_bookmarksopenlevel", k)
 556     if i == -1: return
 557     values = document.header[i].split()
 558     values[1] = ' "' + values[1] + '"'
 559     document.header[i] = ''.join(values)
 560
 561
 562 def convert_htmlurl(document):
 563     'Convert "htmlurl" to "href" insets for docbook'
 564     if document.backend != "docbook":
 565       return
 566     i = 0
 567     while True:
 568       i = find_token(document.body, "\\begin_inset CommandInset url", i)
 569       if i == -1:
 570         return
 571       document.body[i] = "\\begin_inset CommandInset href"
 572       document.body[i + 1] = "LatexCommand href"
 573       i = i + 1
 574
 575
 576 def convert_url(document):
 577     'Convert url insets to url charstyles'
 578     if document.backend == "docbook":
 579       return
 580     i = 0
 581     while True:
 582       i = find_token(document.body, "\\begin_inset CommandInset url", i)
 583       if i == -1:
 584         break
 585       n = find_token(document.body, "name", i)
 586       if n == i + 2:
 587         # place the URL name in typewriter before the new URL insert
 588         # grab the name 'bla' from the e.g. the line 'name "bla"',
 589         # therefore start with the 6th character
 590         name = document.body[n][6:-1]
 591         newname = [name + " "]
 592         document.body[i:i] = newname
 593         i = i + 1
 594       j = find_token(document.body, "target", i)
 595       if j == -1:
 596         document.warning("Malformed LyX document: Can't find target for url inset")
 597         i = j
 598         continue
 599       target = document.body[j][8:-1]
 600       k = find_token(document.body, "\\end_inset", j)
 601       if k == -1:
 602         document.warning("Malformed LyX document: Can't find end of url inset")
 603         i = k
 604         continue
 605       newstuff = ["\\begin_inset Flex URL",
 606         "status collapsed", "",
 607         "\\begin_layout Standard",
 608         "",
 609         target,
 610         "\\end_layout",
 611         ""]
 612       document.body[i:k] = newstuff
 613       i = k
 614
 615 def convert_ams_classes(document):
 616   tc = document.textclass
 617   if (tc != "amsart" and tc != "amsart-plain" and
 618       tc == "amsart-seq" and tc == "amsbook"):
 619     return
 620   if tc == "amsart-plain":
 621     document.textclass = "amsart"
 622     document.set_textclass()
 623     document.add_module("Theorems (Starred)")
 624     return
 625   if tc == "amsart-seq":
 626     document.textclass = "amsart"
 627     document.set_textclass()
 628   document.add_module("Theorems (AMS)")
 629
 630   #Now we want to see if any of the environments in the extended theorems
 631   #module were used in this document. If so, we'll add that module, too.
 632   layouts = ["Criterion", "Algorithm", "Axiom", "Condition", "Note",  \
 633     "Notation", "Summary", "Acknowledgement", "Conclusion", "Fact", \
 634     "Assumption"]
 635
 636   r = re.compile(r'^\\begin_layout (.*?)\*?\s*$')
 637   i = 0
 638   while True:
 639     i = find_token(document.body, "\\begin_layout", i)
 640     if i == -1:
 641       return
 642     m = r.match(document.body[i])
 643     if m == None:
 644       document.warning("Weirdly formed \\begin_layout at line " + i + " of body!")
 645       i += 1
 646       continue
 647     m = m.group(1)
 648     if layouts.count(m) != 0:
 649       document.add_module("Theorems (AMS-Extended)")
 650       return
 651     i += 1
 652
 653 def revert_href(document):
 654     'Reverts hyperlink insets (href) to url insets (url)'
 655     i = 0
 656     while True:
 657       i = find_token(document.body, "\\begin_inset CommandInset href", i)
 658       if i == -1:
 659           return
 660       document.body[i : i + 2] = \
 661         ["\\begin_inset CommandInset url", "LatexCommand url"]
 662       i = i + 2
 663
 664
 665 def convert_include(document):
 666   'Converts include insets to new format.'
 667   i = 0
 668   r = re.compile(r'\\begin_inset Include\s+\\([^{]+){([^}]*)}(?:\[(.*)\])?')
 669   while True:
 670     i = find_token(document.body, "\\begin_inset Include", i)
 671     if i == -1:
 672       return
 673     line = document.body[i]
 674     previewline = document.body[i + 1]
 675     m = r.match(line)
 676     if m == None:
 677       document.warning("Unable to match line " + str(i) + " of body!")
 678       i += 1
 679       continue
 680     cmd = m.group(1)
 681     fn  = m.group(2)
 682     opt = m.group(3)
 683     insertion = ["\\begin_inset CommandInset include",
 684        "LatexCommand " + cmd, previewline,
 685        "filename \"" + fn + "\""]
 686     newlines = 2
 687     if opt:
 688       insertion.append("lstparams " + '"' + opt + '"')
 689       newlines += 1
 690     document.body[i : i + 2] = insertion
 691     i += newlines
 692
 693
 694 def revert_include(document):
 695   'Reverts include insets to old format.'
 696   i = 0
 697   r1 = re.compile('LatexCommand (.+)')
 698   r2 = re.compile('filename (.+)')
 699   r3 = re.compile('options (.*)')
 700   while True:
 701     i = find_token(document.body, "\\begin_inset CommandInset include", i)
 702     if i == -1:
 703       return
 704     previewline = document.body[i + 1]
 705     m = r1.match(document.body[i + 2])
 706     if m == None:
 707       document.warning("Malformed LyX document: No LatexCommand line for `" +
 708         document.body[i] + "' on line " + str(i) + ".")
 709       i += 1
 710       continue
 711     cmd = m.group(1)
 712     m = r2.match(document.body[i + 3])
 713     if m == None:
 714       document.warning("Malformed LyX document: No filename line for `" + \
 715         document.body[i] + "' on line " + str(i) + ".")
 716       i += 2
 717       continue
 718     fn = m.group(1)
 719     options = ""
 720     numlines = 4
 721     if (cmd == "lstinputlisting"):
 722       m = r3.match(document.body[i + 4])
 723       if m != None:
 724         options = m.group(1)
 725         numlines = 5
 726     newline = "\\begin_inset Include \\" + cmd + "{" + fn + "}"
 727     if options:
 728       newline += ("[" + options + "]")
 729     insertion = [newline, previewline]
 730     document.body[i : i + numlines] = insertion
 731     i += 2
 732
 733
 734 def revert_albanian(document):
 735     "Set language Albanian to English"
 736     i = 0
 737     if document.language == "albanian":
 738         document.language = "english"
 739         i = find_token(document.header, "\\language", 0)
 740         if i != -1:
 741             document.header[i] = "\\language english"
 742     j = 0
 743     while True:
 744         j = find_token(document.body, "\\lang albanian", j)
 745         if j == -1:
 746             return
 747         document.body[j] = document.body[j].replace("\\lang albanian", "\\lang english")
 748         j = j + 1
 749
 750
 751 def revert_lowersorbian(document):
 752     "Set language lower Sorbian to English"
 753     i = 0
 754     if document.language == "lowersorbian":
 755         document.language = "english"
 756         i = find_token(document.header, "\\language", 0)
 757         if i != -1:
 758             document.header[i] = "\\language english"
 759     j = 0
 760     while True:
 761         j = find_token(document.body, "\\lang lowersorbian", j)
 762         if j == -1:
 763             return
 764         document.body[j] = document.body[j].replace("\\lang lowersorbian", "\\lang english")
 765         j = j + 1
 766
 767
 768 def revert_uppersorbian(document):
 769     "Set language uppersorbian to usorbian as this was used in LyX 1.5"
 770     i = 0
 771     if document.language == "uppersorbian":
 772         document.language = "usorbian"
 773         i = find_token(document.header, "\\language", 0)
 774         if i != -1:
 775             document.header[i] = "\\language usorbian"
 776     j = 0
 777     while True:
 778         j = find_token(document.body, "\\lang uppersorbian", j)
 779         if j == -1:
 780             return
 781         document.body[j] = document.body[j].replace("\\lang uppersorbian", "\\lang usorbian")
 782         j = j + 1
 783
 784
 785 def convert_usorbian(document):
 786     "Set language usorbian to uppersorbian"
 787     i = 0
 788     if document.language == "usorbian":
 789         document.language = "uppersorbian"
 790         i = find_token(document.header, "\\language", 0)
 791         if i != -1:
 792             document.header[i] = "\\language uppersorbian"
 793     j = 0
 794     while True:
 795         j = find_token(document.body, "\\lang usorbian", j)
 796         if j == -1:
 797             return
 798         document.body[j] = document.body[j].replace("\\lang usorbian", "\\lang uppersorbian")
 799         j = j + 1
 800
 801
 802 def revert_macro_optional_params(document):
 803     "Convert macro definitions with optional parameters into ERTs"
 804     # Stub to convert macro definitions with one or more optional parameters
 805     # into uninterpreted ERT insets
 806
 807
 808 def revert_hyperlinktype(document):
 809     'Reverts hyperlink type'
 810     i = 0
 811     j = 0
 812     while True:
 813       i = find_token(document.body, "target", i)
 814       if i == -1:
 815           return
 816       j = find_token(document.body, "type", i)
 817       if j == -1:
 818           return
 819       if j == i + 1:
 820           del document.body[j]
 821       i = i + 1
 822
 823
 824 def revert_pagebreak(document):
 825     'Reverts pagebreak to ERT'
 826     i = 0
 827     while True:
 828       i = find_token(document.body, "\\pagebreak", i)
 829       if i == -1:
 830           return
 831       document.body[i] = '\\begin_inset ERT\nstatus collapsed\n\n' \
 832       '\\begin_layout Standard\n\n\n\\backslash\n' \
 833       'pagebreak{}\n\\end_layout\n\n\\end_inset\n\n'
 834       i = i + 1
 835
 836
 837 def revert_linebreak(document):
 838     'Reverts linebreak to ERT'
 839     i = 0
 840     while True:
 841       i = find_token(document.body, "\\linebreak", i)
 842       if i == -1:
 843           return
 844       document.body[i] = '\\begin_inset ERT\nstatus collapsed\n\n' \
 845       '\\begin_layout Standard\n\n\n\\backslash\n' \
 846       'linebreak{}\n\\end_layout\n\n\\end_inset\n\n'
 847       i = i + 1
 848
 849
 850 def revert_latin(document):
 851     "Set language Latin to English"
 852     i = 0
 853     if document.language == "latin":
 854         document.language = "english"
 855         i = find_token(document.header, "\\language", 0)
 856         if i != -1:
 857             document.header[i] = "\\language english"
 858     j = 0
 859     while True:
 860         j = find_token(document.body, "\\lang latin", j)
 861         if j == -1:
 862             return
 863         document.body[j] = document.body[j].replace("\\lang latin", "\\lang english")
 864         j = j + 1
 865
 866
 867 def revert_samin(document):
 868     "Set language North Sami to English"
 869     i = 0
 870     if document.language == "samin":
 871         document.language = "english"
 872         i = find_token(document.header, "\\language", 0)
 873         if i != -1:
 874             document.header[i] = "\\language english"
 875     j = 0
 876     while True:
 877         j = find_token(document.body, "\\lang samin", j)
 878         if j == -1:
 879             return
 880         document.body[j] = document.body[j].replace("\\lang samin", "\\lang english")
 881         j = j + 1
 882
 883
 884 def convert_serbocroatian(document):
 885     "Set language Serbocroatian to Croatian as this was really Croatian in LyX 1.5"
 886     i = 0
 887     if document.language == "serbocroatian":
 888         document.language = "croatian"
 889         i = find_token(document.header, "\\language", 0)
 890         if i != -1:
 891             document.header[i] = "\\language croatian"
 892     j = 0
 893     while True:
 894         j = find_token(document.body, "\\lang serbocroatian", j)
 895         if j == -1:
 896             return
 897         document.body[j] = document.body[j].replace("\\lang serbocroatian", "\\lang croatian")
 898         j = j + 1
 899
 900
 901 def convert_framed_notes(document):
 902     "Convert framed notes to boxes. "
 903     i = 0
 904     while 1:
 905         i = find_tokens(document.body, ["\\begin_inset Note Framed", "\\begin_inset Note Shaded"], i)
 906
 907         if i == -1:
 908             return
 909         document.body[i] = document.body[i].replace("\\begin_inset Note", "\\begin_inset Box")
 910         document.body.insert(i + 1, 'position "t"\nhor_pos "c"\nhas_inner_box 0\ninner_pos "t"\n' \
 911         'use_parbox 0\nwidth "100col%"\nspecial "none"\nheight "1in"\n' \
 912         'height_special "totalheight"')
 913         i = i + 1
 914
 915
 916 def revert_framed_notes(document):
 917     "Revert framed boxes to notes. "
 918     i = 0
 919     while 1:
 920         i = find_tokens(document.body, ["\\begin_inset Box Framed", "\\begin_inset Box Shaded"], i)
 921
 922         if i == -1:
 923             return
 924         j = find_end_of_inset(document.body, i + 1)
 925         if j == -1:
 926             # should not happen
 927             document.warning("Malformed LyX document: Could not find end of Box inset.")
 928         k = find_token(document.body, "status", i + 1, j)
 929         if k == -1:
 930             document.warning("Malformed LyX document: Missing `status' tag in Box inset.")
 931             return
 932         status = document.body[k]
 933         l = find_token(document.body, "\\begin_layout Standard", i + 1, j)
 934         if l == -1:
 935             document.warning("Malformed LyX document: Missing `\\begin_layout Standard' in Box inset.")
 936             return
 937         m = find_token(document.body, "\\end_layout", i + 1, j)
 938         if m == -1:
 939             document.warning("Malformed LyX document: Missing `\\end_layout' in Box inset.")
 940             return
 941         ibox = find_token(document.body, "has_inner_box 1", i + 1, k)
 942         pbox = find_token(document.body, "use_parbox 1", i + 1, k)
 943         if ibox == -1 and pbox == -1:
 944             document.body[i] = document.body[i].replace("\\begin_inset Box", "\\begin_inset Note")
 945             del document.body[i+1:k]
 946         else:
 947             document.body[i] = document.body[i].replace("\\begin_inset Box Shaded", "\\begin_inset Box Frameless")
 948             document.body.insert(l + 1, "\\begin_inset Note Shaded\n" + status + "\n\\begin_layout Standard\n")
 949             document.body.insert(m + 1, "\\end_layout\n\\end_inset")
 950         i = i + 1
 951
 952
 953 def revert_slash(document):
 954     'Revert \\SpecialChar \\slash{} to ERT'
 955     for i in range(len(document.body)):
 956         document.body[i] = document.body[i].replace('\\SpecialChar \\slash{}', \
 957         '\\begin_inset ERT\nstatus collapsed\n\n' \
 958         '\\begin_layout Standard\n\n\n\\backslash\n' \
 959         'slash{}\n\\end_layout\n\n\\end_inset\n\n')
 960
 961
 962 def revert_nobreakdash(document):
 963     'Revert \\SpecialChar \\nobreakdash- to ERT'
 964     found = 0
 965     for i in range(len(document.body)):
 966         line = document.body[i]
 967         r = re.compile(r'\\SpecialChar \\nobreakdash-')
 968         m = r.match(line)
 969         if m:
 970             found = 1
 971         document.body[i] = document.body[i].replace('\\SpecialChar \\nobreakdash-', \
 972         '\\begin_inset ERT\nstatus collapsed\n\n' \
 973         '\\begin_layout Standard\n\n\n\\backslash\n' \
 974         'nobreakdash-\n\\end_layout\n\n\\end_inset\n\n')
 975     if not found:
 976         return
 977     j = find_token(document.header, "\\use_amsmath", 0)
 978     if j == -1:
 979         document.warning("Malformed LyX document: Missing '\\use_amsmath'.")
 980         return
 981     document.header[j] = "\\use_amsmath 2"
 982
 983
 984 def revert_nocite_key(body, start, end):
 985     'key "..." -> \nocite{...}'
 986     for i in range(start, end):
 987         if (body[i][0:5] == 'key "'):
 988             body[i] = body[i].replace('key "', "\\backslash\nnocite{")
 989             body[i] = body[i].replace('"', "}")
 990         else:
 991             body[i] = ""
 992
 993
 994 def revert_nocite(document):
 995     "Revert LatexCommand nocite to ERT"
 996     i = 0
 997     while 1:
 998         i = find_token(document.body, "\\begin_inset CommandInset citation", i)
 999         if i == -1:
1000             return
1001         i = i + 1
1002         if (document.body[i] == "LatexCommand nocite"):
1003             j = find_end_of_inset(document.body, i + 1)
1004             if j == -1:
1005                 #this should not happen
1006                 document.warning("End of CommandInset citation not found in revert_nocite!")
1007                 revert_nocite_key(document.body, i + 1, len(document.body))
1008                 return
1009             revert_nocite_key(document.body, i + 1, j)
1010             document.body[i-1] = "\\begin_inset ERT"
1011             document.body[i] = "status collapsed\n\n" \
1012             "\\begin_layout Standard"
1013             document.body.insert(j, "\\end_layout\n");
1014             i = j
1015
1016
1017 def revert_btprintall(document):
1018     "Revert (non-bibtopic) btPrintAll option to ERT \nocite{*}"
1019     i = find_token(document.header, '\\use_bibtopic', 0)
1020     if i == -1:
1021         document.warning("Malformed lyx document: Missing '\\use_bibtopic'.")
1022         return
1023     if get_value(document.header, '\\use_bibtopic', 0) == "false":
1024         i = 0
1025         while i < len(document.body):
1026             i = find_token(document.body, "\\begin_inset CommandInset bibtex", i)
1027             if i == -1:
1028                 return
1029             j = find_end_of_inset(document.body, i + 1)
1030             if j == -1:
1031                 #this should not happen
1032                 document.warning("End of CommandInset bibtex not found in revert_btprintall!")
1033                 j = len(document.body)
1034             for k in range(i, j):
1035                 if (document.body[k] == 'btprint "btPrintAll"'):
1036                     del document.body[k]
1037                     document.body.insert(i, "\\begin_inset ERT\n" \
1038                     "status collapsed\n\n\\begin_layout Standard\n\n" \
1039                     "\\backslash\nnocite{*}\n" \
1040                     "\\end_layout\n\\end_inset\n")
1041             i = j
1042
1043
1044 def revert_bahasam(document):
1045     "Set language Bahasa Malaysia to Bahasa Indonesia"
1046     i = 0
1047     if document.language == "bahasam":
1048         document.language = "bahasa"
1049         i = find_token(document.header, "\\language", 0)
1050         if i != -1:
1051             document.header[i] = "\\language bahasa"
1052     j = 0
1053     while True:
1054         j = find_token(document.body, "\\lang bahasam", j)
1055         if j == -1:
1056             return
1057         document.body[j] = document.body[j].replace("\\lang bahasam", "\\lang bahasa")
1058         j = j + 1
1059
1060
1061 def revert_interlingua(document):
1062     "Set language Interlingua to English"
1063     i = 0
1064     if document.language == "interlingua":
1065         document.language = "english"
1066         i = find_token(document.header, "\\language", 0)
1067         if i != -1:
1068             document.header[i] = "\\language english"
1069     j = 0
1070     while True:
1071         j = find_token(document.body, "\\lang interlingua", j)
1072         if j == -1:
1073             return
1074         document.body[j] = document.body[j].replace("\\lang interlingua", "\\lang english")
1075         j = j + 1
1076
1077
1078 def revert_serbianlatin(document):
1079     "Set language Serbian-Latin to Croatian"
1080     i = 0
1081     if document.language == "serbian-latin":
1082         document.language = "croatian"
1083         i = find_token(document.header, "\\language", 0)
1084         if i != -1:
1085             document.header[i] = "\\language croatian"
1086     j = 0
1087     while True:
1088         j = find_token(document.body, "\\lang serbian-latin", j)
1089         if j == -1:
1090             return
1091         document.body[j] = document.body[j].replace("\\lang serbian-latin", "\\lang croatian")
1092         j = j + 1
1093
1094
1095 ##
1096 # Conversion hub
1097 #
1098
1099 supported_versions = ["1.6.0","1.6"]
1100 convert = [[277, [fix_wrong_tables]],
1101            [278, [close_begin_deeper]],
1102            [279, [long_charstyle_names]],
1103            [280, [axe_show_label]],
1104            [281, []],
1105            [282, []],
1106            [283, [convert_flex]],
1107            [284, []],
1108            [285, []],
1109            [286, []],
1110            [287, [convert_wrapfig_options]],
1111            [288, [convert_inset_command]],
1112            [289, [convert_latexcommand_index]],
1113            [290, []],
1114            [291, []],
1115            [292, []],
1116            [293, []],
1117            [294, [convert_pdf_options]],
1118            [295, [convert_htmlurl, convert_url]],
1119            [296, [convert_include]],
1120            [297, [convert_usorbian]],
1121            [298, []],
1122            [299, []],
1123            [300, []],
1124            [301, []],
1125            [302, []],
1126            [303, [convert_serbocroatian]],
1127            [304, [convert_framed_notes]],
1128            [305, []],
1129            [306, []],
1130            [307, []],
1131            [308, []],
1132            [309, []],
1133            [310, []],
1134            [311, [convert_ams_classes]]
1135           ]
1136
1137 revert =  [[310, []],
1138            [309, [revert_btprintall]],
1139            [308, [revert_nocite]],
1140            [307, [revert_serbianlatin]],
1141            [306, [revert_slash, revert_nobreakdash]],
1142            [305, [revert_interlingua]],
1143            [304, [revert_bahasam]],
1144            [303, [revert_framed_notes]],
1145            [302, []],
1146            [301, [revert_latin, revert_samin]],
1147            [300, [revert_linebreak]],
1148            [299, [revert_pagebreak]],
1149            [298, [revert_hyperlinktype]],
1150            [297, [revert_macro_optional_params]],
1151            [296, [revert_albanian, revert_lowersorbian, revert_uppersorbian]],
1152            [295, [revert_include]],
1153            [294, [revert_href]],
1154            [293, [revert_pdf_options_2]],
1155            [292, [revert_inset_info]],
1156            [291, [revert_japanese, revert_japanese_encoding]],
1157            [290, [revert_vietnamese]],
1158            [289, [revert_wraptable]],
1159            [288, [revert_latexcommand_index]],
1160            [287, [revert_inset_command]],
1161            [286, [revert_wrapfig_options]],
1162            [285, [revert_pdf_options]],
1163            [284, [remove_inzip_options]],
1164            [283, []],
1165            [282, [revert_flex]],
1166            [281, []],
1167            [280, [revert_begin_modules]],
1168            [279, [revert_show_label]],
1169            [278, [revert_long_charstyle_names]],
1170            [277, []],
1171            [276, []]
1172           ]
1173
1174
1175 if __name__ == "__main__":
1176     pass