lib/lyx2lyx/lyx_1_6.py

   1 # This file is part of lyx2lyx
   2 # -*- coding: utf-8 -*-
   3 # Copyright (C) 2007 José Matos <jamatos@lyx.org>
   4 #
   5 # This program is free software; you can redistribute it and/or
   6 # modify it under the terms of the GNU General Public License
   7 # as published by the Free Software Foundation; either version 2
   8 # of the License, or (at your option) any later version.
   9 #
  10 # This program is distributed in the hope that it will be useful,
  11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 # GNU General Public License for more details.
  14 #
  15 # You should have received a copy of the GNU General Public License
  16 # along with this program; if not, write to the Free Software
  17 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  18
  19 """ Convert files to the file format generated by lyx 1.6"""
  20
  21 import re
  22 import unicodedata
  23 import sys, os
  24
  25 from parser_tools import find_token, find_end_of, find_tokens, get_value
  26
  27 ####################################################################
  28 # Private helper functions
  29
  30 def find_end_of_inset(lines, i):
  31     " Find end of inset, where lines[i] is included."
  32     return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
  33
  34 def wrap_into_ert(string, src, dst):
  35     " Wrap a something into an ERT"
  36     return string.replace(src, '\n\\begin_inset ERT\nstatus collapsed\n\\begin_layout Standard\n'
  37       + dst + '\n\\end_layout\n\\end_inset\n')
  38
  39 def add_to_preamble(document, text):
  40     """ Add text to the preamble if it is not already there.
  41     Only the first line is checked!"""
  42
  43     if find_token(document.preamble, text[0], 0) != -1:
  44         return
  45
  46     document.preamble.extend(text)
  47
  48 ####################################################################
  49
  50 def fix_wrong_tables(document):
  51     i = 0
  52     while True:
  53         i = find_token(document.body, "\\begin_inset Tabular", i)
  54         if i == -1:
  55             return
  56         j = find_end_of_inset(document.body, i + 1)
  57         if j == -1:
  58             document.warning("Malformed LyX document: Could not find end of tabular.")
  59             continue
  60
  61         m = i + 1
  62         nrows = int(document.body[i+1].split('"')[3])
  63         ncols = int(document.body[i+1].split('"')[5])
  64
  65         for l in range(nrows):
  66             prev_multicolumn = 0
  67             for k in range(ncols):
  68                 m = find_token(document.body, '<cell', m)
  69
  70                 if document.body[m].find('multicolumn') != -1:
  71                     multicol_cont = int(document.body[m].split('"')[1])
  72
  73                     if multicol_cont == 2 and (k == 0 or prev_multicolumn == 0):
  74                         document.body[m] = document.body[m][:5] + document.body[m][21:]
  75                         prev_multicolumn = 0
  76                     else:
  77                         prev_multicolumn = multicol_cont
  78                 else:
  79                     prev_multicolumn = 0
  80
  81         i = j + 1
  82
  83
  84 def close_begin_deeper(document):
  85     i = 0
  86     depth = 0
  87     while True:
  88         i = find_tokens(document.body, ["\\begin_deeper", "\\end_deeper"], i)
  89
  90         if i == -1:
  91             break
  92
  93         if document.body[i][:13] == "\\begin_deeper":
  94             depth += 1
  95         else:
  96             depth -= 1
  97
  98         i += 1
  99
 100     document.body[-2:-2] = ['\\end_deeper' for i in range(depth)]
 101
 102
 103 def long_charstyle_names(document):
 104     i = 0
 105     while True:
 106         i = find_token(document.body, "\\begin_inset CharStyle", i)
 107         if i == -1:
 108             return
 109         document.body[i] = document.body[i].replace("CharStyle ", "CharStyle CharStyle:")
 110         i += 1
 111
 112 def revert_long_charstyle_names(document):
 113     i = 0
 114     while True:
 115         i = find_token(document.body, "\\begin_inset CharStyle", i)
 116         if i == -1:
 117             return
 118         document.body[i] = document.body[i].replace("CharStyle CharStyle:", "CharStyle")
 119         i += 1
 120
 121
 122 def axe_show_label(document):
 123     i = 0
 124     while True:
 125         i = find_token(document.body, "\\begin_inset CharStyle", i)
 126         if i == -1:
 127             return
 128         if document.body[i + 1].find("show_label") != -1:
 129             if document.body[i + 1].find("true") != -1:
 130                 document.body[i + 1] = "status open"
 131                 del document.body[ i + 2]
 132             else:
 133                 if document.body[i + 1].find("false") != -1:
 134                     document.body[i + 1] = "status collapsed"
 135                     del document.body[ i + 2]
 136                 else:
 137                     document.warning("Malformed LyX document: show_label neither false nor true.")
 138         else:
 139             document.warning("Malformed LyX document: show_label missing in CharStyle.")
 140
 141         i += 1
 142
 143
 144 def revert_show_label(document):
 145     i = 0
 146     while True:
 147         i = find_token(document.body, "\\begin_inset CharStyle", i)
 148         if i == -1:
 149             return
 150         if document.body[i + 1].find("status open") != -1:
 151             document.body.insert(i + 1, "show_label true")
 152         else:
 153             if document.body[i + 1].find("status collapsed") != -1:
 154                 document.body.insert(i + 1, "show_label false")
 155             else:
 156                 document.warning("Malformed LyX document: no legal status line in CharStyle.")
 157         i += 1
 158
 159 def revert_begin_modules(document):
 160     i = 0
 161     while True:
 162         i = find_token(document.header, "\\begin_modules", i)
 163         if i == -1:
 164             return
 165         j = find_end_of(document.header, i, "\\begin_modules", "\\end_modules")
 166         if j == -1:
 167             # this should not happen
 168             break
 169         document.header[i : j + 1] = []
 170
 171 def convert_flex(document):
 172     "Convert CharStyle to Flex"
 173     i = 0
 174     while True:
 175         i = find_token(document.body, "\\begin_inset CharStyle", i)
 176         if i == -1:
 177             return
 178         document.body[i] = document.body[i].replace('\\begin_inset CharStyle', '\\begin_inset Flex')
 179
 180 def revert_flex(document):
 181     "Convert Flex to CharStyle"
 182     i = 0
 183     while True:
 184         i = find_token(document.body, "\\begin_inset Flex", i)
 185         if i == -1:
 186             return
 187         document.body[i] = document.body[i].replace('\\begin_inset Flex', '\\begin_inset CharStyle')
 188
 189
 190 #  Discard PDF options for hyperref
 191 def revert_pdf_options(document):
 192         "Revert PDF options for hyperref."
 193         i = 0
 194         i = find_token(document.header, "\\use_hyperref", i)
 195         if i != -1:
 196             del document.header[i]
 197         i = find_token(document.header, "\\pdf_store_options", i)
 198         if i != -1:
 199             del document.header[i]
 200         i = find_token(document.header, "\\pdf_title", 0)
 201         if i != -1:
 202             del document.header[i]
 203         i = find_token(document.header, "\\pdf_author", 0)
 204         if i != -1:
 205             del document.header[i]
 206         i = find_token(document.header, "\\pdf_subject", 0)
 207         if i != -1:
 208             del document.header[i]
 209         i = find_token(document.header, "\\pdf_keywords", 0)
 210         if i != -1:
 211             del document.header[i]
 212         i = find_token(document.header, "\\pdf_bookmarks", 0)
 213         if i != -1:
 214             del document.header[i]
 215         i = find_token(document.header, "\\pdf_bookmarksnumbered", i)
 216         if i != -1:
 217             del document.header[i]
 218         i = find_token(document.header, "\\pdf_bookmarksopen", i)
 219         if i != -1:
 220             del document.header[i]
 221         i = find_token(document.header, "\\pdf_bookmarksopenlevel", i)
 222         if i != -1:
 223             del document.header[i]
 224         i = find_token(document.header, "\\pdf_breaklinks", i)
 225         if i != -1:
 226             del document.header[i]
 227         i = find_token(document.header, "\\pdf_pdfborder", i)
 228         if i != -1:
 229             del document.header[i]
 230         i = find_token(document.header, "\\pdf_colorlinks", i)
 231         if i != -1:
 232             del document.header[i]
 233         i = find_token(document.header, "\\pdf_backref", i)
 234         if i != -1:
 235             del document.header[i]
 236         i = find_token(document.header, "\\pdf_pagebackref", i)
 237         if i != -1:
 238             del document.header[i]
 239         i = find_token(document.header, "\\pdf_pagemode", 0)
 240         if i != -1:
 241             del document.header[i]
 242         i = find_token(document.header, "\\pdf_quoted_options", 0)
 243         if i != -1:
 244             del document.header[i]
 245
 246
 247 def remove_inzip_options(document):
 248     "Remove inzipName and embed options from the Graphics inset"
 249     i = 0
 250     while 1:
 251         i = find_token(document.body, "\\begin_inset Graphics", i)
 252         if i == -1:
 253             return
 254         j = find_end_of_inset(document.body, i + 1)
 255         if j == -1:
 256             # should not happen
 257             document.warning("Malformed LyX document: Could not find end of graphics inset.")
 258         # If there's a inzip param, just remove that
 259         k = find_token(document.body, "\tinzipName", i + 1, j)
 260         if k != -1:
 261             del document.body[k]
 262             # embed option must follow the inzipName option
 263             del document.body[k+1]
 264         i = i + 1
 265
 266
 267 def convert_inset_command(document):
 268     """
 269         Convert:
 270             \begin_inset LatexCommand cmd
 271         to
 272             \begin_inset CommandInset InsetType
 273             LatexCommand cmd
 274     """
 275     i = 0
 276     while 1:
 277         i = find_token(document.body, "\\begin_inset LatexCommand", i)
 278         if i == -1:
 279             return
 280         line = document.body[i]
 281         r = re.compile(r'\\begin_inset LatexCommand (.*)$')
 282         m = r.match(line)
 283         cmdName = m.group(1)
 284         insetName = ""
 285         #this is adapted from factory.cpp
 286         if cmdName[0:4].lower() == "cite":
 287             insetName = "citation"
 288         elif cmdName == "url" or cmdName == "htmlurl":
 289             insetName = "url"
 290         elif cmdName[-3:] == "ref":
 291             insetName = "ref"
 292         elif cmdName == "tableofcontents":
 293             insetName = "toc"
 294         elif cmdName == "printnomenclature":
 295             insetName = "nomencl_print"
 296         elif cmdName == "printindex":
 297             insetName = "index_print"
 298         else:
 299             insetName = cmdName
 300         insertion = ["\\begin_inset CommandInset " + insetName, "LatexCommand " + cmdName]
 301         document.body[i : i+1] = insertion
 302
 303
 304 def revert_inset_command(document):
 305     """
 306         Convert:
 307             \begin_inset CommandInset InsetType
 308             LatexCommand cmd
 309         to
 310             \begin_inset LatexCommand cmd
 311         Some insets may end up being converted to insets earlier versions of LyX
 312         will not be able to recognize. Not sure what to do about that.
 313     """
 314     i = 0
 315     while 1:
 316         i = find_token(document.body, "\\begin_inset CommandInset", i)
 317         if i == -1:
 318             return
 319         nextline = document.body[i+1]
 320         r = re.compile(r'LatexCommand\s+(.*)$')
 321         m = r.match(nextline)
 322         if not m:
 323             document.warning("Malformed LyX document: Missing LatexCommand in " + document.body[i] + ".")
 324             continue
 325         cmdName = m.group(1)
 326         insertion = ["\\begin_inset LatexCommand " + cmdName]
 327         document.body[i : i+2] = insertion
 328
 329
 330 def convert_wrapfig_options(document):
 331     "Convert optional options for wrap floats (wrapfig)."
 332     # adds the tokens "lines", "placement", and "overhang"
 333     i = 0
 334     while True:
 335         i = find_token(document.body, "\\begin_inset Wrap figure", i)
 336         if i == -1:
 337             return
 338         document.body.insert(i + 1, "lines 0")
 339         j = find_token(document.body, "placement", i)
 340         # placement can be already set or not; if not, set it
 341         if j == i+2:
 342             document.body.insert(i + 3, "overhang 0col%")
 343         else:
 344            document.body.insert(i + 2, "placement o")
 345            document.body.insert(i + 3, "overhang 0col%")
 346         i = i + 1
 347
 348
 349 def revert_wrapfig_options(document):
 350     "Revert optional options for wrap floats (wrapfig)."
 351     i = 0
 352     while True:
 353         i = find_token(document.body, "lines", i)
 354         if i == -1:
 355             return
 356         j = find_token(document.body, "overhang", i+1)
 357         if j != i + 2 and j != -1:
 358             document.warning("Malformed LyX document: Couldn't find overhang parameter of wrap float.")
 359         if j == -1:
 360             return
 361         del document.body[i]
 362         del document.body[j-1]
 363         i = i + 1
 364
 365
 366 def convert_latexcommand_index(document):
 367     "Convert from LatexCommand form to collapsable form."
 368     i = 0
 369     while True:
 370         i = find_token(document.body, "\\begin_inset CommandInset index", i)
 371         if i == -1:
 372             return
 373         if document.body[i + 1] != "LatexCommand index": # Might also be index_print
 374             return
 375         fullcontent = document.body[i + 2][6:].strip('"')
 376         document.body[i:i + 2] = ["\\begin_inset Index",
 377           "status collapsed",
 378           "\\begin_layout Standard"]
 379         # Put here the conversions needed from LaTeX string to LyXText.
 380         # Here we do a minimal conversion to prevent crashes and data loss.
 381         # Manual patch-up may be needed.
 382         # Umlauted characters (most common ones, can be extended):
 383         fullcontent = fullcontent.replace(r'\\\"a', u'ä').replace(r'\\\"o', u'ö').replace(r'\\\"u', u'ü')
 384         # Generic, \" -> ":
 385         fullcontent = wrap_into_ert(fullcontent, r'\"', '"')
 386         #fullcontent = fullcontent.replace(r'\"', '\n\\begin_inset ERT\nstatus collapsed\n\\begin_layout standard\n"\n\\end_layout\n\\end_inset\n')
 387         # Math:
 388         r = re.compile('^(.*?)(\$.*?\$)(.*)')
 389         g = fullcontent
 390         while r.match(g):
 391           m = r.match(g)
 392           s = m.group(1)
 393           f = m.group(2).replace('\\\\', '\\')
 394           g = m.group(3)
 395           if s:
 396             # this is non-math!
 397             s = wrap_into_ert(s, r'\\', '\\backslash')
 398             s = wrap_into_ert(s, '{', '{')
 399             s = wrap_into_ert(s, '}', '}')
 400             document.body.insert(i + 3, s)
 401             i += 1
 402           document.body.insert(i + 3, "\\begin_inset Formula " + f)
 403           document.body.insert(i + 4, "\\end_inset")
 404           i += 2
 405         # Generic, \\ -> \backslash:
 406         g = wrap_into_ert(g, r'\\', '\\backslash{}')
 407         g = wrap_into_ert(g, '{', '{')
 408         g = wrap_into_ert(g, '}', '}')
 409         document.body.insert(i + 3, g)
 410         document.body[i + 4] = "\\end_layout"
 411         i = i + 5
 412
 413
 414 def revert_latexcommand_index(document):
 415     "Revert from collapsable form to LatexCommand form."
 416     i = 0
 417     while True:
 418         i = find_token(document.body, "\\begin_inset Index", i)
 419         if i == -1:
 420           return
 421         j = find_end_of_inset(document.body, i + 1)
 422         if j == -1:
 423           return
 424         del document.body[j - 1]
 425         del document.body[j - 2] # \end_layout
 426         document.body[i] =  "\\begin_inset CommandInset index"
 427         document.body[i + 1] =  "LatexCommand index"
 428         # clean up multiline stuff
 429         content = ""
 430         for k in range(i + 3, j - 2):
 431           line = document.body[k]
 432           if line.startswith("\\begin_inset ERT"):
 433             line = line[16:]
 434           if line.startswith("\\begin_inset Formula"):
 435             line = line[20:]
 436           if line.startswith("\\begin_layout Standard"):
 437             line = line[22:]
 438           if line.startswith("\\end_layout"):
 439             line = line[11:]
 440           if line.startswith("\\end_inset"):
 441             line = line[10:]
 442           if line.startswith("status collapsed"):
 443             line = line[16:]
 444           line = line.replace(u'ä', r'\\\"a').replace(u'ö', r'\\\"o').replace(u'ü', r'\\\"u')
 445           content = content + line;
 446         document.body[i + 3] = "name " + '"' + content + '"'
 447         for k in range(i + 4, j - 2):
 448           del document.body[i + 4]
 449         document.body.insert(i + 4, "")
 450         del document.body[i + 2] # \begin_layout standard
 451         i = i + 5
 452
 453
 454 def revert_wraptable(document):
 455     "Revert wrap table to wrap figure."
 456     i = 0
 457     while True:
 458         i = find_token(document.body, "\\begin_inset Wrap table", i)
 459         if i == -1:
 460             return
 461         document.body[i] = document.body[i].replace('\\begin_inset Wrap table', '\\begin_inset Wrap figure')
 462         i = i + 1
 463
 464
 465 def revert_vietnamese(document):
 466     "Set language Vietnamese to English"
 467     # Set document language from Vietnamese to English
 468     i = 0
 469     if document.language == "vietnamese":
 470         document.language = "english"
 471         i = find_token(document.header, "\\language", 0)
 472         if i != -1:
 473             document.header[i] = "\\language english"
 474     j = 0
 475     while True:
 476         j = find_token(document.body, "\\lang vietnamese", j)
 477         if j == -1:
 478             return
 479         document.body[j] = document.body[j].replace("\\lang vietnamese", "\\lang english")
 480         j = j + 1
 481
 482
 483 def revert_japanese(document):
 484     "Set language japanese-plain to japanese"
 485     # Set document language from japanese-plain to japanese
 486     i = 0
 487     if document.language == "japanese-plain":
 488         document.language = "japanese"
 489         i = find_token(document.header, "\\language", 0)
 490         if i != -1:
 491             document.header[i] = "\\language japanese"
 492     j = 0
 493     while True:
 494         j = find_token(document.body, "\\lang japanese-plain", j)
 495         if j == -1:
 496             return
 497         document.body[j] = document.body[j].replace("\\lang japanese-plain", "\\lang japanese")
 498         j = j + 1
 499
 500
 501 def revert_japanese_encoding(document):
 502     "Set input encoding form EUC-JP-plain to EUC-JP etc."
 503     # Set input encoding form EUC-JP-plain to EUC-JP etc.
 504     i = 0
 505     i = find_token(document.header, "\\inputencoding EUC-JP-plain", 0)
 506     if i != -1:
 507         document.header[i] = "\\inputencoding EUC-JP"
 508     j = 0
 509     j = find_token(document.header, "\\inputencoding JIS-plain", 0)
 510     if j != -1:
 511         document.header[j] = "\\inputencoding JIS"
 512     k = 0
 513     k = find_token(document.header, "\\inputencoding SJIS-plain", 0)
 514     if k != -1: # convert to UTF8 since there is currently no SJIS encoding
 515         document.header[k] = "\\inputencoding UTF8"
 516
 517
 518 def revert_inset_info(document):
 519     'Replace info inset with its content'
 520     i = 0
 521     while 1:
 522         i = find_token(document.body, '\\begin_inset Info', i)
 523         if i == -1:
 524             return
 525         j = find_end_of_inset(document.body, i + 1)
 526         if j == -1:
 527             # should not happen
 528             document.warning("Malformed LyX document: Could not find end of Info inset.")
 529         type = 'unknown'
 530         arg = ''
 531         for k in range(i, j+1):
 532             if document.body[k].startswith("arg"):
 533                 arg = document.body[k][3:].strip().strip('"')
 534             if document.body[k].startswith("type"):
 535                 type = document.body[k][4:].strip().strip('"')
 536         # I think there is a newline after \\end_inset, which should be removed.
 537         if document.body[j + 1].strip() == "":
 538             document.body[i : (j + 2)] = [type + ':' + arg]
 539         else:
 540             document.body[i : (j + 1)] = [type + ':' + arg]
 541
 542
 543 def convert_pdf_options(document):
 544     # Set the pdfusetitle tag, delete the pdf_store_options,
 545     # set quotes for bookmarksopenlevel"
 546     has_hr = get_value(document.header, "\\use_hyperref", 0, default = "0")
 547     if has_hr == "1":
 548         k = find_token(document.header, "\\use_hyperref", 0)
 549         document.header.insert(k + 1, "\\pdf_pdfusetitle true")
 550     k = find_token(document.header, "\\pdf_store_options", 0)
 551     if k != -1:
 552         del document.header[k]
 553     i = find_token(document.header, "\\pdf_bookmarksopenlevel", k)
 554     if i == -1: return
 555     document.header[i] = document.header[i].replace('"', '')
 556
 557
 558 def revert_pdf_options_2(document):
 559     # reset the pdfusetitle tag, set quotes for bookmarksopenlevel"
 560     k = find_token(document.header, "\\use_hyperref", 0)
 561     i = find_token(document.header, "\\pdf_pdfusetitle", k)
 562     if i != -1:
 563         del document.header[i]
 564     i = find_token(document.header, "\\pdf_bookmarksopenlevel", k)
 565     if i == -1: return
 566     values = document.header[i].split()
 567     values[1] = ' "' + values[1] + '"'
 568     document.header[i] = ''.join(values)
 569
 570
 571 def convert_htmlurl(document):
 572     'Convert "htmlurl" to "href" insets for docbook'
 573     if document.backend != "docbook":
 574       return
 575     i = 0
 576     while True:
 577       i = find_token(document.body, "\\begin_inset CommandInset url", i)
 578       if i == -1:
 579         return
 580       document.body[i] = "\\begin_inset CommandInset href"
 581       document.body[i + 1] = "LatexCommand href"
 582       i = i + 1
 583
 584
 585 def convert_url(document):
 586     'Convert url insets to url charstyles'
 587     if document.backend == "docbook":
 588       return
 589     i = 0
 590     while True:
 591       i = find_token(document.body, "\\begin_inset CommandInset url", i)
 592       if i == -1:
 593         break
 594       n = find_token(document.body, "name", i)
 595       if n == i + 2:
 596         # place the URL name in typewriter before the new URL insert
 597         # grab the name 'bla' from the e.g. the line 'name "bla"',
 598         # therefore start with the 6th character
 599         name = document.body[n][6:-1]
 600         newname = [name + " "]
 601         document.body[i:i] = newname
 602         i = i + 1
 603       j = find_token(document.body, "target", i)
 604       if j == -1:
 605         document.warning("Malformed LyX document: Can't find target for url inset")
 606         i = j
 607         continue
 608       target = document.body[j][8:-1]
 609       k = find_token(document.body, "\\end_inset", j)
 610       if k == -1:
 611         document.warning("Malformed LyX document: Can't find end of url inset")
 612         i = k
 613         continue
 614       newstuff = ["\\begin_inset Flex URL",
 615         "status collapsed", "",
 616         "\\begin_layout Standard",
 617         "",
 618         target,
 619         "\\end_layout",
 620         ""]
 621       document.body[i:k] = newstuff
 622       i = k
 623
 624 def convert_ams_classes(document):
 625   tc = document.textclass
 626   if (tc != "amsart" and tc != "amsart-plain" and
 627       tc != "amsart-seq" and tc != "amsbook"):
 628     return
 629   if tc == "amsart-plain":
 630     document.textclass = "amsart"
 631     document.set_textclass()
 632     document.add_module("Theorems (Starred)")
 633     return
 634   if tc == "amsart-seq":
 635     document.textclass = "amsart"
 636     document.set_textclass()
 637   document.add_module("Theorems (AMS)")
 638
 639   #Now we want to see if any of the environments in the extended theorems
 640   #module were used in this document. If so, we'll add that module, too.
 641   layouts = ["Criterion", "Algorithm", "Axiom", "Condition", "Note",  \
 642     "Notation", "Summary", "Acknowledgement", "Conclusion", "Fact", \
 643     "Assumption"]
 644
 645   r = re.compile(r'^\\begin_layout (.*?)\*?\s*$')
 646   i = 0
 647   while True:
 648     i = find_token(document.body, "\\begin_layout", i)
 649     if i == -1:
 650       return
 651     m = r.match(document.body[i])
 652     if m == None:
 653       document.warning("Weirdly formed \\begin_layout at line " + i + " of body!")
 654       i += 1
 655       continue
 656     m = m.group(1)
 657     if layouts.count(m) != 0:
 658       document.add_module("Theorems (AMS-Extended)")
 659       return
 660     i += 1
 661
 662 def revert_href(document):
 663     'Reverts hyperlink insets (href) to url insets (url)'
 664     i = 0
 665     while True:
 666       i = find_token(document.body, "\\begin_inset CommandInset href", i)
 667       if i == -1:
 668           return
 669       document.body[i : i + 2] = \
 670         ["\\begin_inset CommandInset url", "LatexCommand url"]
 671       i = i + 2
 672
 673
 674 def convert_include(document):
 675   'Converts include insets to new format.'
 676   i = 0
 677   r = re.compile(r'\\begin_inset Include\s+\\([^{]+){([^}]*)}(?:\[(.*)\])?')
 678   while True:
 679     i = find_token(document.body, "\\begin_inset Include", i)
 680     if i == -1:
 681       return
 682     line = document.body[i]
 683     previewline = document.body[i + 1]
 684     m = r.match(line)
 685     if m == None:
 686       document.warning("Unable to match line " + str(i) + " of body!")
 687       i += 1
 688       continue
 689     cmd = m.group(1)
 690     fn  = m.group(2)
 691     opt = m.group(3)
 692     insertion = ["\\begin_inset CommandInset include",
 693        "LatexCommand " + cmd, previewline,
 694        "filename \"" + fn + "\""]
 695     newlines = 2
 696     if opt:
 697       insertion.append("lstparams " + '"' + opt + '"')
 698       newlines += 1
 699     document.body[i : i + 2] = insertion
 700     i += newlines
 701
 702
 703 def revert_include(document):
 704   'Reverts include insets to old format.'
 705   i = 0
 706   r1 = re.compile('LatexCommand (.+)')
 707   r2 = re.compile('filename (.+)')
 708   r3 = re.compile('options (.*)')
 709   while True:
 710     i = find_token(document.body, "\\begin_inset CommandInset include", i)
 711     if i == -1:
 712       return
 713     previewline = document.body[i + 1]
 714     m = r1.match(document.body[i + 2])
 715     if m == None:
 716       document.warning("Malformed LyX document: No LatexCommand line for `" +
 717         document.body[i] + "' on line " + str(i) + ".")
 718       i += 1
 719       continue
 720     cmd = m.group(1)
 721     m = r2.match(document.body[i + 3])
 722     if m == None:
 723       document.warning("Malformed LyX document: No filename line for `" + \
 724         document.body[i] + "' on line " + str(i) + ".")
 725       i += 2
 726       continue
 727     fn = m.group(1)
 728     options = ""
 729     numlines = 4
 730     if (cmd == "lstinputlisting"):
 731       m = r3.match(document.body[i + 4])
 732       if m != None:
 733         options = m.group(1)
 734         numlines = 5
 735     newline = "\\begin_inset Include \\" + cmd + "{" + fn + "}"
 736     if options:
 737       newline += ("[" + options + "]")
 738     insertion = [newline, previewline]
 739     document.body[i : i + numlines] = insertion
 740     i += 2
 741
 742
 743 def revert_albanian(document):
 744     "Set language Albanian to English"
 745     i = 0
 746     if document.language == "albanian":
 747         document.language = "english"
 748         i = find_token(document.header, "\\language", 0)
 749         if i != -1:
 750             document.header[i] = "\\language english"
 751     j = 0
 752     while True:
 753         j = find_token(document.body, "\\lang albanian", j)
 754         if j == -1:
 755             return
 756         document.body[j] = document.body[j].replace("\\lang albanian", "\\lang english")
 757         j = j + 1
 758
 759
 760 def revert_lowersorbian(document):
 761     "Set language lower Sorbian to English"
 762     i = 0
 763     if document.language == "lowersorbian":
 764         document.language = "english"
 765         i = find_token(document.header, "\\language", 0)
 766         if i != -1:
 767             document.header[i] = "\\language english"
 768     j = 0
 769     while True:
 770         j = find_token(document.body, "\\lang lowersorbian", j)
 771         if j == -1:
 772             return
 773         document.body[j] = document.body[j].replace("\\lang lowersorbian", "\\lang english")
 774         j = j + 1
 775
 776
 777 def revert_uppersorbian(document):
 778     "Set language uppersorbian to usorbian as this was used in LyX 1.5"
 779     i = 0
 780     if document.language == "uppersorbian":
 781         document.language = "usorbian"
 782         i = find_token(document.header, "\\language", 0)
 783         if i != -1:
 784             document.header[i] = "\\language usorbian"
 785     j = 0
 786     while True:
 787         j = find_token(document.body, "\\lang uppersorbian", j)
 788         if j == -1:
 789             return
 790         document.body[j] = document.body[j].replace("\\lang uppersorbian", "\\lang usorbian")
 791         j = j + 1
 792
 793
 794 def convert_usorbian(document):
 795     "Set language usorbian to uppersorbian"
 796     i = 0
 797     if document.language == "usorbian":
 798         document.language = "uppersorbian"
 799         i = find_token(document.header, "\\language", 0)
 800         if i != -1:
 801             document.header[i] = "\\language uppersorbian"
 802     j = 0
 803     while True:
 804         j = find_token(document.body, "\\lang usorbian", j)
 805         if j == -1:
 806             return
 807         document.body[j] = document.body[j].replace("\\lang usorbian", "\\lang uppersorbian")
 808         j = j + 1
 809
 810
 811 def revert_macro_optional_params(document):
 812     "Convert macro definitions with optional parameters into ERTs"
 813     # Stub to convert macro definitions with one or more optional parameters
 814     # into uninterpreted ERT insets
 815
 816
 817 def revert_hyperlinktype(document):
 818     'Reverts hyperlink type'
 819     i = 0
 820     j = 0
 821     while True:
 822       i = find_token(document.body, "target", i)
 823       if i == -1:
 824           return
 825       j = find_token(document.body, "type", i)
 826       if j == -1:
 827           return
 828       if j == i + 1:
 829           del document.body[j]
 830       i = i + 1
 831
 832
 833 def revert_pagebreak(document):
 834     'Reverts pagebreak to ERT'
 835     i = 0
 836     while True:
 837       i = find_token(document.body, "\\pagebreak", i)
 838       if i == -1:
 839           return
 840       document.body[i] = '\\begin_inset ERT\nstatus collapsed\n\n' \
 841       '\\begin_layout Standard\n\n\n\\backslash\n' \
 842       'pagebreak{}\n\\end_layout\n\n\\end_inset\n\n'
 843       i = i + 1
 844
 845
 846 def revert_linebreak(document):
 847     'Reverts linebreak to ERT'
 848     i = 0
 849     while True:
 850       i = find_token(document.body, "\\linebreak", i)
 851       if i == -1:
 852           return
 853       document.body[i] = '\\begin_inset ERT\nstatus collapsed\n\n' \
 854       '\\begin_layout Standard\n\n\n\\backslash\n' \
 855       'linebreak{}\n\\end_layout\n\n\\end_inset\n\n'
 856       i = i + 1
 857
 858
 859 def revert_latin(document):
 860     "Set language Latin to English"
 861     i = 0
 862     if document.language == "latin":
 863         document.language = "english"
 864         i = find_token(document.header, "\\language", 0)
 865         if i != -1:
 866             document.header[i] = "\\language english"
 867     j = 0
 868     while True:
 869         j = find_token(document.body, "\\lang latin", j)
 870         if j == -1:
 871             return
 872         document.body[j] = document.body[j].replace("\\lang latin", "\\lang english")
 873         j = j + 1
 874
 875
 876 def revert_samin(document):
 877     "Set language North Sami to English"
 878     i = 0
 879     if document.language == "samin":
 880         document.language = "english"
 881         i = find_token(document.header, "\\language", 0)
 882         if i != -1:
 883             document.header[i] = "\\language english"
 884     j = 0
 885     while True:
 886         j = find_token(document.body, "\\lang samin", j)
 887         if j == -1:
 888             return
 889         document.body[j] = document.body[j].replace("\\lang samin", "\\lang english")
 890         j = j + 1
 891
 892
 893 def convert_serbocroatian(document):
 894     "Set language Serbocroatian to Croatian as this was really Croatian in LyX 1.5"
 895     i = 0
 896     if document.language == "serbocroatian":
 897         document.language = "croatian"
 898         i = find_token(document.header, "\\language", 0)
 899         if i != -1:
 900             document.header[i] = "\\language croatian"
 901     j = 0
 902     while True:
 903         j = find_token(document.body, "\\lang serbocroatian", j)
 904         if j == -1:
 905             return
 906         document.body[j] = document.body[j].replace("\\lang serbocroatian", "\\lang croatian")
 907         j = j + 1
 908
 909
 910 def convert_framed_notes(document):
 911     "Convert framed notes to boxes. "
 912     i = 0
 913     while 1:
 914         i = find_tokens(document.body, ["\\begin_inset Note Framed", "\\begin_inset Note Shaded"], i)
 915
 916         if i == -1:
 917             return
 918         document.body[i] = document.body[i].replace("\\begin_inset Note", "\\begin_inset Box")
 919         document.body.insert(i + 1, 'position "t"\nhor_pos "c"\nhas_inner_box 0\ninner_pos "t"\n' \
 920         'use_parbox 0\nwidth "100col%"\nspecial "none"\nheight "1in"\n' \
 921         'height_special "totalheight"')
 922         i = i + 1
 923
 924
 925 def revert_framed_notes(document):
 926     "Revert framed boxes to notes. "
 927     i = 0
 928     while 1:
 929         i = find_tokens(document.body, ["\\begin_inset Box Framed", "\\begin_inset Box Shaded"], i)
 930
 931         if i == -1:
 932             return
 933         j = find_end_of_inset(document.body, i + 1)
 934         if j == -1:
 935             # should not happen
 936             document.warning("Malformed LyX document: Could not find end of Box inset.")
 937         k = find_token(document.body, "status", i + 1, j)
 938         if k == -1:
 939             document.warning("Malformed LyX document: Missing `status' tag in Box inset.")
 940             return
 941         status = document.body[k]
 942         l = find_token(document.body, "\\begin_layout Standard", i + 1, j)
 943         if l == -1:
 944             document.warning("Malformed LyX document: Missing `\\begin_layout Standard' in Box inset.")
 945             return
 946         m = find_token(document.body, "\\end_layout", i + 1, j)
 947         if m == -1:
 948             document.warning("Malformed LyX document: Missing `\\end_layout' in Box inset.")
 949             return
 950         ibox = find_token(document.body, "has_inner_box 1", i + 1, k)
 951         pbox = find_token(document.body, "use_parbox 1", i + 1, k)
 952         if ibox == -1 and pbox == -1:
 953             document.body[i] = document.body[i].replace("\\begin_inset Box", "\\begin_inset Note")
 954             del document.body[i+1:k]
 955         else:
 956             document.body[i] = document.body[i].replace("\\begin_inset Box Shaded", "\\begin_inset Box Frameless")
 957             document.body.insert(l + 1, "\\begin_inset Note Shaded\n" + status + "\n\\begin_layout Standard\n")
 958             document.body.insert(m + 1, "\\end_layout\n\\end_inset")
 959         i = i + 1
 960
 961
 962 def revert_slash(document):
 963     'Revert \\SpecialChar \\slash{} to ERT'
 964     for i in range(len(document.body)):
 965         document.body[i] = document.body[i].replace('\\SpecialChar \\slash{}', \
 966         '\\begin_inset ERT\nstatus collapsed\n\n' \
 967         '\\begin_layout Standard\n\n\n\\backslash\n' \
 968         'slash{}\n\\end_layout\n\n\\end_inset\n\n')
 969
 970
 971 def revert_nobreakdash(document):
 972     'Revert \\SpecialChar \\nobreakdash- to ERT'
 973     found = 0
 974     for i in range(len(document.body)):
 975         line = document.body[i]
 976         r = re.compile(r'\\SpecialChar \\nobreakdash-')
 977         m = r.match(line)
 978         if m:
 979             found = 1
 980         document.body[i] = document.body[i].replace('\\SpecialChar \\nobreakdash-', \
 981         '\\begin_inset ERT\nstatus collapsed\n\n' \
 982         '\\begin_layout Standard\n\n\n\\backslash\n' \
 983         'nobreakdash-\n\\end_layout\n\n\\end_inset\n\n')
 984     if not found:
 985         return
 986     j = find_token(document.header, "\\use_amsmath", 0)
 987     if j == -1:
 988         document.warning("Malformed LyX document: Missing '\\use_amsmath'.")
 989         return
 990     document.header[j] = "\\use_amsmath 2"
 991
 992
 993 def revert_nocite_key(body, start, end):
 994     'key "..." -> \nocite{...}'
 995     for i in range(start, end):
 996         if (body[i][0:5] == 'key "'):
 997             body[i] = body[i].replace('key "', "\\backslash\nnocite{")
 998             body[i] = body[i].replace('"', "}")
 999         else:
1000             body[i] = ""
1001
1002
1003 def revert_nocite(document):
1004     "Revert LatexCommand nocite to ERT"
1005     i = 0
1006     while 1:
1007         i = find_token(document.body, "\\begin_inset CommandInset citation", i)
1008         if i == -1:
1009             return
1010         i = i + 1
1011         if (document.body[i] == "LatexCommand nocite"):
1012             j = find_end_of_inset(document.body, i + 1)
1013             if j == -1:
1014                 #this should not happen
1015                 document.warning("End of CommandInset citation not found in revert_nocite!")
1016                 revert_nocite_key(document.body, i + 1, len(document.body))
1017                 return
1018             revert_nocite_key(document.body, i + 1, j)
1019             document.body[i-1] = "\\begin_inset ERT"
1020             document.body[i] = "status collapsed\n\n" \
1021             "\\begin_layout Standard"
1022             document.body.insert(j, "\\end_layout\n");
1023             i = j
1024
1025
1026 def revert_btprintall(document):
1027     "Revert (non-bibtopic) btPrintAll option to ERT \nocite{*}"
1028     i = find_token(document.header, '\\use_bibtopic', 0)
1029     if i == -1:
1030         document.warning("Malformed lyx document: Missing '\\use_bibtopic'.")
1031         return
1032     if get_value(document.header, '\\use_bibtopic', 0) == "false":
1033         i = 0
1034         while i < len(document.body):
1035             i = find_token(document.body, "\\begin_inset CommandInset bibtex", i)
1036             if i == -1:
1037                 return
1038             j = find_end_of_inset(document.body, i + 1)
1039             if j == -1:
1040                 #this should not happen
1041                 document.warning("End of CommandInset bibtex not found in revert_btprintall!")
1042                 j = len(document.body)
1043             for k in range(i, j):
1044                 if (document.body[k] == 'btprint "btPrintAll"'):
1045                     del document.body[k]
1046                     document.body.insert(i, "\\begin_inset ERT\n" \
1047                     "status collapsed\n\n\\begin_layout Standard\n\n" \
1048                     "\\backslash\nnocite{*}\n" \
1049                     "\\end_layout\n\\end_inset\n")
1050             i = j
1051
1052
1053 def revert_bahasam(document):
1054     "Set language Bahasa Malaysia to Bahasa Indonesia"
1055     i = 0
1056     if document.language == "bahasam":
1057         document.language = "bahasa"
1058         i = find_token(document.header, "\\language", 0)
1059         if i != -1:
1060             document.header[i] = "\\language bahasa"
1061     j = 0
1062     while True:
1063         j = find_token(document.body, "\\lang bahasam", j)
1064         if j == -1:
1065             return
1066         document.body[j] = document.body[j].replace("\\lang bahasam", "\\lang bahasa")
1067         j = j + 1
1068
1069
1070 def revert_interlingua(document):
1071     "Set language Interlingua to English"
1072     i = 0
1073     if document.language == "interlingua":
1074         document.language = "english"
1075         i = find_token(document.header, "\\language", 0)
1076         if i != -1:
1077             document.header[i] = "\\language english"
1078     j = 0
1079     while True:
1080         j = find_token(document.body, "\\lang interlingua", j)
1081         if j == -1:
1082             return
1083         document.body[j] = document.body[j].replace("\\lang interlingua", "\\lang english")
1084         j = j + 1
1085
1086
1087 def revert_serbianlatin(document):
1088     "Set language Serbian-Latin to Croatian"
1089     i = 0
1090     if document.language == "serbian-latin":
1091         document.language = "croatian"
1092         i = find_token(document.header, "\\language", 0)
1093         if i != -1:
1094             document.header[i] = "\\language croatian"
1095     j = 0
1096     while True:
1097         j = find_token(document.body, "\\lang serbian-latin", j)
1098         if j == -1:
1099             return
1100         document.body[j] = document.body[j].replace("\\lang serbian-latin", "\\lang croatian")
1101         j = j + 1
1102
1103
1104 def revert_rotfloat(document):
1105     " Revert sidewaysalgorithm. "
1106     i = 0
1107     while 1:
1108         i = find_token(document.body, '\\begin_inset Float algorithm', i)
1109         if i == -1:
1110             return
1111         j = find_end_of_inset(document.body, i)
1112         if j == -1:
1113             document.warning("Malformed lyx document: Missing '\\end_inset'.")
1114             i = i + 1
1115             continue
1116         if get_value(document.body, 'sideways', i, j) != "false":
1117             l = find_token(document.body, "\\begin_layout Standard", i + 1, j)
1118             if l == -1:
1119                 document.warning("Malformed LyX document: Missing `\\begin_layout Standard' in Float inset.")
1120                 return
1121             document.body[j] = '\\begin_layout Standard\n\\begin_inset ERT\nstatus collapsed\n\n' \
1122             '\\begin_layout Standard\n\n\n\\backslash\n' \
1123             'end{sidewaysalgorithm}\n\\end_layout\n\n\\end_inset\n'
1124             del document.body[i+1:l-1]
1125             document.body[i] = '\\begin_inset ERT\nstatus collapsed\n\n' \
1126             '\\begin_layout Standard\n\n\n\\backslash\n' \
1127             'begin{sidewaysalgorithm}\n\\end_layout\n\n\\end_inset\n\n\\end_layout\n\n'
1128             add_to_preamble(document,
1129                             ['% Commands inserted by lyx2lyx for sideways algorithm float',
1130                              '\\usepackage{rotfloat}\n'
1131                              '\\floatstyle{ruled}\n'
1132                              '\\newfloat{algorithm}{tbp}{loa}\n'
1133                              '\\floatname{algorithm}{Algorithm}\n'])
1134             i = i + 1
1135             continue
1136         i = i + 1
1137
1138
1139 def revert_widesideways(document):
1140     " Revert wide sideways floats. "
1141     i = 0
1142     while 1:
1143         i = find_token(document.body, '\\begin_inset Float', i)
1144         if i == -1:
1145             return
1146         floatline = document.body[i]
1147         j = find_end_of_inset(document.body, i)
1148         if j == -1:
1149             document.warning("Malformed lyx document: Missing '\\end_inset'.")
1150             i = i + 1
1151             continue
1152         if get_value(document.body, 'sideways', i, j) != "false":
1153             if get_value(document.body, 'wide', i, j) != "false":
1154                 l = find_token(document.body, "\\begin_layout Standard", i + 1, j)
1155                 if l == -1:
1156                     document.warning("Malformed LyX document: Missing `\\begin_layout Standard' in Float inset.")
1157                     return
1158                 floattype = "table"
1159                 if floatline == "\\begin_inset Float figure":
1160                     floattype = "figure"
1161                 document.body[j] = '\\begin_layout Standard\n\\begin_inset ERT\nstatus collapsed\n\n' \
1162                 '\\begin_layout Standard\n\n\n\\backslash\n' \
1163                 'end{sideways' + floattype + '*}\n\\end_layout\n\n\\end_inset\n'
1164                 del document.body[i+1:l-1]
1165                 document.body[i] = '\\begin_inset ERT\nstatus collapsed\n\n' \
1166                 '\\begin_layout Standard\n\n\n\\backslash\n' \
1167                 'begin{sideways' + floattype + '*}\n\\end_layout\n\n\\end_inset\n\n\\end_layout\n\n'
1168                 add_to_preamble(document,
1169                                 ['\\usepackage{rotfloat}\n'])
1170                 i = i + 1
1171                 continue
1172         i = i + 1
1173
1174
1175 ##
1176 # Conversion hub
1177 #
1178
1179 supported_versions = ["1.6.0","1.6"]
1180 convert = [[277, [fix_wrong_tables]],
1181            [278, [close_begin_deeper]],
1182            [279, [long_charstyle_names]],
1183            [280, [axe_show_label]],
1184            [281, []],
1185            [282, []],
1186            [283, [convert_flex]],
1187            [284, []],
1188            [285, []],
1189            [286, []],
1190            [287, [convert_wrapfig_options]],
1191            [288, [convert_inset_command]],
1192            [289, [convert_latexcommand_index]],
1193            [290, []],
1194            [291, []],
1195            [292, []],
1196            [293, []],
1197            [294, [convert_pdf_options]],
1198            [295, [convert_htmlurl, convert_url]],
1199            [296, [convert_include]],
1200            [297, [convert_usorbian]],
1201            [298, []],
1202            [299, []],
1203            [300, []],
1204            [301, []],
1205            [302, []],
1206            [303, [convert_serbocroatian]],
1207            [304, [convert_framed_notes]],
1208            [305, []],
1209            [306, []],
1210            [307, []],
1211            [308, []],
1212            [309, []],
1213            [310, []],
1214            [311, [convert_ams_classes]],
1215            [312, []],
1216           ]
1217
1218 revert =  [[311, [revert_rotfloat, revert_widesideways]],
1219            [310, []],
1220            [309, [revert_btprintall]],
1221            [308, [revert_nocite]],
1222            [307, [revert_serbianlatin]],
1223            [306, [revert_slash, revert_nobreakdash]],
1224            [305, [revert_interlingua]],
1225            [304, [revert_bahasam]],
1226            [303, [revert_framed_notes]],
1227            [302, []],
1228            [301, [revert_latin, revert_samin]],
1229            [300, [revert_linebreak]],
1230            [299, [revert_pagebreak]],
1231            [298, [revert_hyperlinktype]],
1232            [297, [revert_macro_optional_params]],
1233            [296, [revert_albanian, revert_lowersorbian, revert_uppersorbian]],
1234            [295, [revert_include]],
1235            [294, [revert_href]],
1236            [293, [revert_pdf_options_2]],
1237            [292, [revert_inset_info]],
1238            [291, [revert_japanese, revert_japanese_encoding]],
1239            [290, [revert_vietnamese]],
1240            [289, [revert_wraptable]],
1241            [288, [revert_latexcommand_index]],
1242            [287, [revert_inset_command]],
1243            [286, [revert_wrapfig_options]],
1244            [285, [revert_pdf_options]],
1245            [284, [remove_inzip_options]],
1246            [283, []],
1247            [282, [revert_flex]],
1248            [281, []],
1249            [280, [revert_begin_modules]],
1250            [279, [revert_show_label]],
1251            [278, [revert_long_charstyle_names]],
1252            [277, []],
1253            [276, []]
1254           ]
1255
1256
1257 if __name__ == "__main__":
1258     pass