lib/lyx2lyx/lyx_1_6.py

   1 # This file is part of lyx2lyx
   2 # -*- coding: utf-8 -*-
   3 # Copyright (C) 2007 José Matos <jamatos@lyx.org>
   4 #
   5 # This program is free software; you can redistribute it and/or
   6 # modify it under the terms of the GNU General Public License
   7 # as published by the Free Software Foundation; either version 2
   8 # of the License, or (at your option) any later version.
   9 #
  10 # This program is distributed in the hope that it will be useful,
  11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 # GNU General Public License for more details.
  14 #
  15 # You should have received a copy of the GNU General Public License
  16 # along with this program; if not, write to the Free Software
  17 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  18
  19 """ Convert files to the file format generated by lyx 1.6"""
  20
  21 import re
  22 import unicodedata
  23 import sys, os
  24
  25 from parser_tools import find_token, find_end_of, find_tokens, get_value
  26
  27 ####################################################################
  28 # Private helper functions
  29
  30 def find_end_of_inset(lines, i):
  31     " Find end of inset, where lines[i] is included."
  32     return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
  33
  34 def wrap_into_ert(string, src, dst):
  35     " Wrap a something into an ERT"
  36     return string.replace(src, '\n\\begin_inset ERT\nstatus collapsed\n\\begin_layout Standard\n'
  37       + dst + '\n\\end_layout\n\\end_inset\n')
  38
  39 def add_module(module):
  40   i = find_token(document.header, "\\begin_modules", 0)
  41   if i == -1:
  42     #No modules yet included
  43     i = find_token(document.header, "\\textclass", 0)
  44     if i == -1:
  45       document.warning("Malformed LyX document: No \\textclass!!")
  46       return
  47     modinfo = ["\\begin_modules", module, "\\end_modules"]
  48     document.header[i + 1: i + 1] = modinfo
  49     return
  50   j = find_token(document.header, "\\end_modules", i)
  51   if j == -1:
  52     document.warning("Malformed LyX document: No \\end_modules.")
  53     return
  54   k = find_token(document.header, module, i)
  55   if k != -1 and k < j:
  56     return
  57   document.header.insert(i + 1, module)
  58
  59
  60 ####################################################################
  61
  62 def fix_wrong_tables(document):
  63     i = 0
  64     while True:
  65         i = find_token(document.body, "\\begin_inset Tabular", i)
  66         if i == -1:
  67             return
  68         j = find_end_of_inset(document.body, i + 1)
  69         if j == -1:
  70             document.warning("Malformed LyX document: Could not find end of tabular.")
  71             continue
  72
  73         m = i + 1
  74         nrows = int(document.body[i+1].split('"')[3])
  75         ncols = int(document.body[i+1].split('"')[5])
  76
  77         for l in range(nrows):
  78             prev_multicolumn = 0
  79             for k in range(ncols):
  80                 m = find_token(document.body, '<cell', m)
  81
  82                 if document.body[m].find('multicolumn') != -1:
  83                     multicol_cont = int(document.body[m].split('"')[1])
  84
  85                     if multicol_cont == 2 and (k == 0 or prev_multicolumn == 0):
  86                         document.body[m] = document.body[m][:5] + document.body[m][21:]
  87                         prev_multicolumn = 0
  88                     else:
  89                         prev_multicolumn = multicol_cont
  90                 else:
  91                     prev_multicolumn = 0
  92
  93         i = j + 1
  94
  95
  96 def close_begin_deeper(document):
  97     i = 0
  98     depth = 0
  99     while True:
 100         i = find_tokens(document.body, ["\\begin_deeper", "\\end_deeper"], i)
 101
 102         if i == -1:
 103             break
 104
 105         if document.body[i][:13] == "\\begin_deeper":
 106             depth += 1
 107         else:
 108             depth -= 1
 109
 110         i += 1
 111
 112     document.body[-2:-2] = ['\\end_deeper' for i in range(depth)]
 113
 114
 115 def long_charstyle_names(document):
 116     i = 0
 117     while True:
 118         i = find_token(document.body, "\\begin_inset CharStyle", i)
 119         if i == -1:
 120             return
 121         document.body[i] = document.body[i].replace("CharStyle ", "CharStyle CharStyle:")
 122         i += 1
 123
 124 def revert_long_charstyle_names(document):
 125     i = 0
 126     while True:
 127         i = find_token(document.body, "\\begin_inset CharStyle", i)
 128         if i == -1:
 129             return
 130         document.body[i] = document.body[i].replace("CharStyle CharStyle:", "CharStyle")
 131         i += 1
 132
 133
 134 def axe_show_label(document):
 135     i = 0
 136     while True:
 137         i = find_token(document.body, "\\begin_inset CharStyle", i)
 138         if i == -1:
 139             return
 140         if document.body[i + 1].find("show_label") != -1:
 141             if document.body[i + 1].find("true") != -1:
 142                 document.body[i + 1] = "status open"
 143                 del document.body[ i + 2]
 144             else:
 145                 if document.body[i + 1].find("false") != -1:
 146                     document.body[i + 1] = "status collapsed"
 147                     del document.body[ i + 2]
 148                 else:
 149                     document.warning("Malformed LyX document: show_label neither false nor true.")
 150         else:
 151             document.warning("Malformed LyX document: show_label missing in CharStyle.")
 152
 153         i += 1
 154
 155
 156 def revert_show_label(document):
 157     i = 0
 158     while True:
 159         i = find_token(document.body, "\\begin_inset CharStyle", i)
 160         if i == -1:
 161             return
 162         if document.body[i + 1].find("status open") != -1:
 163             document.body.insert(i + 1, "show_label true")
 164         else:
 165             if document.body[i + 1].find("status collapsed") != -1:
 166                 document.body.insert(i + 1, "show_label false")
 167             else:
 168                 document.warning("Malformed LyX document: no legal status line in CharStyle.")
 169         i += 1
 170
 171 def revert_begin_modules(document):
 172     i = 0
 173     while True:
 174         i = find_token(document.header, "\\begin_modules", i)
 175         if i == -1:
 176             return
 177         j = find_end_of(document.header, i, "\\begin_modules", "\\end_modules")
 178         if j == -1:
 179             # this should not happen
 180             break
 181         document.header[i : j + 1] = []
 182
 183 def convert_flex(document):
 184     "Convert CharStyle to Flex"
 185     i = 0
 186     while True:
 187         i = find_token(document.body, "\\begin_inset CharStyle", i)
 188         if i == -1:
 189             return
 190         document.body[i] = document.body[i].replace('\\begin_inset CharStyle', '\\begin_inset Flex')
 191
 192 def revert_flex(document):
 193     "Convert Flex to CharStyle"
 194     i = 0
 195     while True:
 196         i = find_token(document.body, "\\begin_inset Flex", i)
 197         if i == -1:
 198             return
 199         document.body[i] = document.body[i].replace('\\begin_inset Flex', '\\begin_inset CharStyle')
 200
 201
 202 #  Discard PDF options for hyperref
 203 def revert_pdf_options(document):
 204         "Revert PDF options for hyperref."
 205         i = 0
 206         i = find_token(document.header, "\\use_hyperref", i)
 207         if i != -1:
 208             del document.header[i]
 209         i = find_token(document.header, "\\pdf_store_options", i)
 210         if i != -1:
 211             del document.header[i]
 212         i = find_token(document.header, "\\pdf_title", 0)
 213         if i != -1:
 214             del document.header[i]
 215         i = find_token(document.header, "\\pdf_author", 0)
 216         if i != -1:
 217             del document.header[i]
 218         i = find_token(document.header, "\\pdf_subject", 0)
 219         if i != -1:
 220             del document.header[i]
 221         i = find_token(document.header, "\\pdf_keywords", 0)
 222         if i != -1:
 223             del document.header[i]
 224         i = find_token(document.header, "\\pdf_bookmarks", 0)
 225         if i != -1:
 226             del document.header[i]
 227         i = find_token(document.header, "\\pdf_bookmarksnumbered", i)
 228         if i != -1:
 229             del document.header[i]
 230         i = find_token(document.header, "\\pdf_bookmarksopen", i)
 231         if i != -1:
 232             del document.header[i]
 233         i = find_token(document.header, "\\pdf_bookmarksopenlevel", i)
 234         if i != -1:
 235             del document.header[i]
 236         i = find_token(document.header, "\\pdf_breaklinks", i)
 237         if i != -1:
 238             del document.header[i]
 239         i = find_token(document.header, "\\pdf_pdfborder", i)
 240         if i != -1:
 241             del document.header[i]
 242         i = find_token(document.header, "\\pdf_colorlinks", i)
 243         if i != -1:
 244             del document.header[i]
 245         i = find_token(document.header, "\\pdf_backref", i)
 246         if i != -1:
 247             del document.header[i]
 248         i = find_token(document.header, "\\pdf_pagebackref", i)
 249         if i != -1:
 250             del document.header[i]
 251         i = find_token(document.header, "\\pdf_pagemode", 0)
 252         if i != -1:
 253             del document.header[i]
 254         i = find_token(document.header, "\\pdf_quoted_options", 0)
 255         if i != -1:
 256             del document.header[i]
 257
 258
 259 def remove_inzip_options(document):
 260     "Remove inzipName and embed options from the Graphics inset"
 261     i = 0
 262     while 1:
 263         i = find_token(document.body, "\\begin_inset Graphics", i)
 264         if i == -1:
 265             return
 266         j = find_end_of_inset(document.body, i + 1)
 267         if j == -1:
 268             # should not happen
 269             document.warning("Malformed LyX document: Could not find end of graphics inset.")
 270         # If there's a inzip param, just remove that
 271         k = find_token(document.body, "\tinzipName", i + 1, j)
 272         if k != -1:
 273             del document.body[k]
 274             # embed option must follow the inzipName option
 275             del document.body[k+1]
 276         i = i + 1
 277
 278
 279 def convert_inset_command(document):
 280     """
 281         Convert:
 282             \begin_inset LatexCommand cmd
 283         to
 284             \begin_inset CommandInset InsetType
 285             LatexCommand cmd
 286     """
 287     i = 0
 288     while 1:
 289         i = find_token(document.body, "\\begin_inset LatexCommand", i)
 290         if i == -1:
 291             return
 292         line = document.body[i]
 293         r = re.compile(r'\\begin_inset LatexCommand (.*)$')
 294         m = r.match(line)
 295         cmdName = m.group(1)
 296         insetName = ""
 297         #this is adapted from factory.cpp
 298         if cmdName[0:4].lower() == "cite":
 299             insetName = "citation"
 300         elif cmdName == "url" or cmdName == "htmlurl":
 301             insetName = "url"
 302         elif cmdName[-3:] == "ref":
 303             insetName = "ref"
 304         elif cmdName == "tableofcontents":
 305             insetName = "toc"
 306         elif cmdName == "printnomenclature":
 307             insetName = "nomencl_print"
 308         elif cmdName == "printindex":
 309             insetName = "index_print"
 310         else:
 311             insetName = cmdName
 312         insertion = ["\\begin_inset CommandInset " + insetName, "LatexCommand " + cmdName]
 313         document.body[i : i+1] = insertion
 314
 315
 316 def revert_inset_command(document):
 317     """
 318         Convert:
 319             \begin_inset CommandInset InsetType
 320             LatexCommand cmd
 321         to
 322             \begin_inset LatexCommand cmd
 323         Some insets may end up being converted to insets earlier versions of LyX
 324         will not be able to recognize. Not sure what to do about that.
 325     """
 326     i = 0
 327     while 1:
 328         i = find_token(document.body, "\\begin_inset CommandInset", i)
 329         if i == -1:
 330             return
 331         nextline = document.body[i+1]
 332         r = re.compile(r'LatexCommand\s+(.*)$')
 333         m = r.match(nextline)
 334         if not m:
 335             document.warning("Malformed LyX document: Missing LatexCommand in " + document.body[i] + ".")
 336             continue
 337         cmdName = m.group(1)
 338         insertion = ["\\begin_inset LatexCommand " + cmdName]
 339         document.body[i : i+2] = insertion
 340
 341
 342 def convert_wrapfig_options(document):
 343     "Convert optional options for wrap floats (wrapfig)."
 344     # adds the tokens "lines", "placement", and "overhang"
 345     i = 0
 346     while True:
 347         i = find_token(document.body, "\\begin_inset Wrap figure", i)
 348         if i == -1:
 349             return
 350         document.body.insert(i + 1, "lines 0")
 351         j = find_token(document.body, "placement", i)
 352         # placement can be already set or not; if not, set it
 353         if j == i+2:
 354             document.body.insert(i + 3, "overhang 0col%")
 355         else:
 356            document.body.insert(i + 2, "placement o")
 357            document.body.insert(i + 3, "overhang 0col%")
 358         i = i + 1
 359
 360
 361 def revert_wrapfig_options(document):
 362     "Revert optional options for wrap floats (wrapfig)."
 363     i = 0
 364     while True:
 365         i = find_token(document.body, "lines", i)
 366         if i == -1:
 367             return
 368         j = find_token(document.body, "overhang", i+1)
 369         if j != i + 2 and j != -1:
 370             document.warning("Malformed LyX document: Couldn't find overhang parameter of wrap float.")
 371         if j == -1:
 372             return
 373         del document.body[i]
 374         del document.body[j-1]
 375         i = i + 1
 376
 377
 378 def convert_latexcommand_index(document):
 379     "Convert from LatexCommand form to collapsable form."
 380     i = 0
 381     while True:
 382         i = find_token(document.body, "\\begin_inset CommandInset index", i)
 383         if i == -1:
 384             return
 385         if document.body[i + 1] != "LatexCommand index": # Might also be index_print
 386             return
 387         fullcontent = document.body[i + 2][6:].strip('"')
 388         document.body[i:i + 2] = ["\\begin_inset Index",
 389           "status collapsed",
 390           "\\begin_layout Standard"]
 391         # Put here the conversions needed from LaTeX string to LyXText.
 392         # Here we do a minimal conversion to prevent crashes and data loss.
 393         # Manual patch-up may be needed.
 394         # Umlauted characters (most common ones, can be extended):
 395         fullcontent = fullcontent.replace(r'\\\"a', u'ä').replace(r'\\\"o', u'ö').replace(r'\\\"u', u'ü')
 396         # Generic, \" -> ":
 397         fullcontent = wrap_into_ert(fullcontent, r'\"', '"')
 398         #fullcontent = fullcontent.replace(r'\"', '\n\\begin_inset ERT\nstatus collapsed\n\\begin_layout standard\n"\n\\end_layout\n\\end_inset\n')
 399         # Math:
 400         r = re.compile('^(.*?)(\$.*?\$)(.*)')
 401         g = fullcontent
 402         while r.match(g):
 403           m = r.match(g)
 404           s = m.group(1)
 405           f = m.group(2).replace('\\\\', '\\')
 406           g = m.group(3)
 407           if s:
 408             # this is non-math!
 409             s = wrap_into_ert(s, r'\\', '\\backslash')
 410             s = wrap_into_ert(s, '{', '{')
 411             s = wrap_into_ert(s, '}', '}')
 412             document.body.insert(i + 3, s)
 413             i += 1
 414           document.body.insert(i + 3, "\\begin_inset Formula " + f)
 415           document.body.insert(i + 4, "\\end_inset")
 416           i += 2
 417         # Generic, \\ -> \backslash:
 418         g = wrap_into_ert(g, r'\\', '\\backslash{}')
 419         g = wrap_into_ert(g, '{', '{')
 420         g = wrap_into_ert(g, '}', '}')
 421         document.body.insert(i + 3, g)
 422         document.body[i + 4] = "\\end_layout"
 423         i = i + 5
 424
 425
 426 def revert_latexcommand_index(document):
 427     "Revert from collapsable form to LatexCommand form."
 428     i = 0
 429     while True:
 430         i = find_token(document.body, "\\begin_inset Index", i)
 431         if i == -1:
 432           return
 433         j = find_end_of_inset(document.body, i + 1)
 434         if j == -1:
 435           return
 436         del document.body[j - 1]
 437         del document.body[j - 2] # \end_layout
 438         document.body[i] =  "\\begin_inset CommandInset index"
 439         document.body[i + 1] =  "LatexCommand index"
 440         # clean up multiline stuff
 441         content = ""
 442         for k in range(i + 3, j - 2):
 443           line = document.body[k]
 444           if line.startswith("\\begin_inset ERT"):
 445             line = line[16:]
 446           if line.startswith("\\begin_inset Formula"):
 447             line = line[20:]
 448           if line.startswith("\\begin_layout Standard"):
 449             line = line[22:]
 450           if line.startswith("\\end_layout"):
 451             line = line[11:]
 452           if line.startswith("\\end_inset"):
 453             line = line[10:]
 454           if line.startswith("status collapsed"):
 455             line = line[16:]
 456           line = line.replace(u'ä', r'\\\"a').replace(u'ö', r'\\\"o').replace(u'ü', r'\\\"u')
 457           content = content + line;
 458         document.body[i + 3] = "name " + '"' + content + '"'
 459         for k in range(i + 4, j - 2):
 460           del document.body[i + 4]
 461         document.body.insert(i + 4, "")
 462         del document.body[i + 2] # \begin_layout standard
 463         i = i + 5
 464
 465
 466 def revert_wraptable(document):
 467     "Revert wrap table to wrap figure."
 468     i = 0
 469     while True:
 470         i = find_token(document.body, "\\begin_inset Wrap table", i)
 471         if i == -1:
 472             return
 473         document.body[i] = document.body[i].replace('\\begin_inset Wrap table', '\\begin_inset Wrap figure')
 474         i = i + 1
 475
 476
 477 def revert_vietnamese(document):
 478     "Set language Vietnamese to English"
 479     # Set document language from Vietnamese to English
 480     i = 0
 481     if document.language == "vietnamese":
 482         document.language = "english"
 483         i = find_token(document.header, "\\language", 0)
 484         if i != -1:
 485             document.header[i] = "\\language english"
 486     j = 0
 487     while True:
 488         j = find_token(document.body, "\\lang vietnamese", j)
 489         if j == -1:
 490             return
 491         document.body[j] = document.body[j].replace("\\lang vietnamese", "\\lang english")
 492         j = j + 1
 493
 494
 495 def revert_japanese(document):
 496     "Set language japanese-plain to japanese"
 497     # Set document language from japanese-plain to japanese
 498     i = 0
 499     if document.language == "japanese-plain":
 500         document.language = "japanese"
 501         i = find_token(document.header, "\\language", 0)
 502         if i != -1:
 503             document.header[i] = "\\language japanese"
 504     j = 0
 505     while True:
 506         j = find_token(document.body, "\\lang japanese-plain", j)
 507         if j == -1:
 508             return
 509         document.body[j] = document.body[j].replace("\\lang japanese-plain", "\\lang japanese")
 510         j = j + 1
 511
 512
 513 def revert_japanese_encoding(document):
 514     "Set input encoding form EUC-JP-plain to EUC-JP etc."
 515     # Set input encoding form EUC-JP-plain to EUC-JP etc.
 516     i = 0
 517     i = find_token(document.header, "\\inputencoding EUC-JP-plain", 0)
 518     if i != -1:
 519         document.header[i] = "\\inputencoding EUC-JP"
 520     j = 0
 521     j = find_token(document.header, "\\inputencoding JIS-plain", 0)
 522     if j != -1:
 523         document.header[j] = "\\inputencoding JIS"
 524     k = 0
 525     k = find_token(document.header, "\\inputencoding SJIS-plain", 0)
 526     if k != -1: # convert to UTF8 since there is currently no SJIS encoding
 527         document.header[k] = "\\inputencoding UTF8"
 528
 529
 530 def revert_inset_info(document):
 531     'Replace info inset with its content'
 532     i = 0
 533     while 1:
 534         i = find_token(document.body, '\\begin_inset Info', i)
 535         if i == -1:
 536             return
 537         j = find_end_of_inset(document.body, i + 1)
 538         if j == -1:
 539             # should not happen
 540             document.warning("Malformed LyX document: Could not find end of Info inset.")
 541         type = 'unknown'
 542         arg = ''
 543         for k in range(i, j+1):
 544             if document.body[k].startswith("arg"):
 545                 arg = document.body[k][3:].strip().strip('"')
 546             if document.body[k].startswith("type"):
 547                 type = document.body[k][4:].strip().strip('"')
 548         # I think there is a newline after \\end_inset, which should be removed.
 549         if document.body[j + 1].strip() == "":
 550             document.body[i : (j + 2)] = [type + ':' + arg]
 551         else:
 552             document.body[i : (j + 1)] = [type + ':' + arg]
 553
 554
 555 def convert_pdf_options(document):
 556     # Set the pdfusetitle tag, delete the pdf_store_options,
 557     # set quotes for bookmarksopenlevel"
 558     has_hr = get_value(document.header, "\\use_hyperref", 0, default = "0")
 559     if has_hr == "1":
 560         k = find_token(document.header, "\\use_hyperref", 0)
 561         document.header.insert(k + 1, "\\pdf_pdfusetitle true")
 562     k = find_token(document.header, "\\pdf_store_options", 0)
 563     if k != -1:
 564         del document.header[k]
 565     i = find_token(document.header, "\\pdf_bookmarksopenlevel", k)
 566     if i == -1: return
 567     document.header[i] = document.header[i].replace('"', '')
 568
 569
 570 def revert_pdf_options_2(document):
 571     # reset the pdfusetitle tag, set quotes for bookmarksopenlevel"
 572     k = find_token(document.header, "\\use_hyperref", 0)
 573     i = find_token(document.header, "\\pdf_pdfusetitle", k)
 574     if i != -1:
 575         del document.header[i]
 576     i = find_token(document.header, "\\pdf_bookmarksopenlevel", k)
 577     if i == -1: return
 578     values = document.header[i].split()
 579     values[1] = ' "' + values[1] + '"'
 580     document.header[i] = ''.join(values)
 581
 582
 583 def convert_htmlurl(document):
 584     'Convert "htmlurl" to "href" insets for docbook'
 585     if document.backend != "docbook":
 586       return
 587     i = 0
 588     while True:
 589       i = find_token(document.body, "\\begin_inset CommandInset url", i)
 590       if i == -1:
 591         return
 592       document.body[i] = "\\begin_inset CommandInset href"
 593       document.body[i + 1] = "LatexCommand href"
 594       i = i + 1
 595
 596
 597 def convert_url(document):
 598     'Convert url insets to url charstyles'
 599     if document.backend == "docbook":
 600       return
 601     i = 0
 602     while True:
 603       i = find_token(document.body, "\\begin_inset CommandInset url", i)
 604       if i == -1:
 605         break
 606       n = find_token(document.body, "name", i)
 607       if n == i + 2:
 608         # place the URL name in typewriter before the new URL insert
 609         # grab the name 'bla' from the e.g. the line 'name "bla"',
 610         # therefore start with the 6th character
 611         name = document.body[n][6:-1]
 612         newname = [name + " "]
 613         document.body[i:i] = newname
 614         i = i + 1
 615       j = find_token(document.body, "target", i)
 616       if j == -1:
 617         document.warning("Malformed LyX document: Can't find target for url inset")
 618         i = j
 619         continue
 620       target = document.body[j][8:-1]
 621       k = find_token(document.body, "\\end_inset", j)
 622       if k == -1:
 623         document.warning("Malformed LyX document: Can't find end of url inset")
 624         i = k
 625         continue
 626       newstuff = ["\\begin_inset Flex URL",
 627         "status collapsed", "",
 628         "\\begin_layout Standard",
 629         "",
 630         target,
 631         "\\end_layout",
 632         ""]
 633       document.body[i:k] = newstuff
 634       i = k
 635
 636
 637 def revert_href(document):
 638     'Reverts hyperlink insets (href) to url insets (url)'
 639     i = 0
 640     while True:
 641       i = find_token(document.body, "\\begin_inset CommandInset href", i)
 642       if i == -1:
 643           return
 644       document.body[i : i + 2] = \
 645         ["\\begin_inset CommandInset url", "LatexCommand url"]
 646       i = i + 2
 647
 648
 649 def convert_include(document):
 650   'Converts include insets to new format.'
 651   i = 0
 652   r = re.compile(r'\\begin_inset Include\s+\\([^{]+){([^}]*)}(?:\[(.*)\])?')
 653   while True:
 654     i = find_token(document.body, "\\begin_inset Include", i)
 655     if i == -1:
 656       return
 657     line = document.body[i]
 658     previewline = document.body[i + 1]
 659     m = r.match(line)
 660     if m == None:
 661       document.warning("Unable to match line " + str(i) + " of body!")
 662       i += 1
 663       continue
 664     cmd = m.group(1)
 665     fn  = m.group(2)
 666     opt = m.group(3)
 667     insertion = ["\\begin_inset CommandInset include",
 668        "LatexCommand " + cmd, previewline,
 669        "filename \"" + fn + "\""]
 670     newlines = 2
 671     if opt:
 672       insertion.append("lstparams " + '"' + opt + '"')
 673       newlines += 1
 674     document.body[i : i + 2] = insertion
 675     i += newlines
 676
 677
 678 def revert_include(document):
 679   'Reverts include insets to old format.'
 680   i = 0
 681   r1 = re.compile('LatexCommand (.+)')
 682   r2 = re.compile('filename (.+)')
 683   r3 = re.compile('options (.*)')
 684   while True:
 685     i = find_token(document.body, "\\begin_inset CommandInset include", i)
 686     if i == -1:
 687       return
 688     previewline = document.body[i + 1]
 689     m = r1.match(document.body[i + 2])
 690     if m == None:
 691       document.warning("Malformed LyX document: No LatexCommand line for `" +
 692         document.body[i] + "' on line " + str(i) + ".")
 693       i += 1
 694       continue
 695     cmd = m.group(1)
 696     m = r2.match(document.body[i + 3])
 697     if m == None:
 698       document.warning("Malformed LyX document: No filename line for `" + \
 699         document.body[i] + "' on line " + str(i) + ".")
 700       i += 2
 701       continue
 702     fn = m.group(1)
 703     options = ""
 704     numlines = 4
 705     if (cmd == "lstinputlisting"):
 706       m = r3.match(document.body[i + 4])
 707       if m != None:
 708         options = m.group(1)
 709         numlines = 5
 710     newline = "\\begin_inset Include \\" + cmd + "{" + fn + "}"
 711     if options:
 712       newline += ("[" + options + "]")
 713     insertion = [newline, previewline]
 714     document.body[i : i + numlines] = insertion
 715     i += 2
 716
 717
 718 def revert_albanian(document):
 719     "Set language Albanian to English"
 720     i = 0
 721     if document.language == "albanian":
 722         document.language = "english"
 723         i = find_token(document.header, "\\language", 0)
 724         if i != -1:
 725             document.header[i] = "\\language english"
 726     j = 0
 727     while True:
 728         j = find_token(document.body, "\\lang albanian", j)
 729         if j == -1:
 730             return
 731         document.body[j] = document.body[j].replace("\\lang albanian", "\\lang english")
 732         j = j + 1
 733
 734
 735 def revert_lowersorbian(document):
 736     "Set language lower Sorbian to English"
 737     i = 0
 738     if document.language == "lowersorbian":
 739         document.language = "english"
 740         i = find_token(document.header, "\\language", 0)
 741         if i != -1:
 742             document.header[i] = "\\language english"
 743     j = 0
 744     while True:
 745         j = find_token(document.body, "\\lang lowersorbian", j)
 746         if j == -1:
 747             return
 748         document.body[j] = document.body[j].replace("\\lang lowersorbian", "\\lang english")
 749         j = j + 1
 750
 751
 752 def revert_uppersorbian(document):
 753     "Set language uppersorbian to usorbian as this was used in LyX 1.5"
 754     i = 0
 755     if document.language == "uppersorbian":
 756         document.language = "usorbian"
 757         i = find_token(document.header, "\\language", 0)
 758         if i != -1:
 759             document.header[i] = "\\language usorbian"
 760     j = 0
 761     while True:
 762         j = find_token(document.body, "\\lang uppersorbian", j)
 763         if j == -1:
 764             return
 765         document.body[j] = document.body[j].replace("\\lang uppersorbian", "\\lang usorbian")
 766         j = j + 1
 767
 768
 769 def convert_usorbian(document):
 770     "Set language usorbian to uppersorbian"
 771     i = 0
 772     if document.language == "usorbian":
 773         document.language = "uppersorbian"
 774         i = find_token(document.header, "\\language", 0)
 775         if i != -1:
 776             document.header[i] = "\\language uppersorbian"
 777     j = 0
 778     while True:
 779         j = find_token(document.body, "\\lang usorbian", j)
 780         if j == -1:
 781             return
 782         document.body[j] = document.body[j].replace("\\lang usorbian", "\\lang uppersorbian")
 783         j = j + 1
 784
 785
 786 def revert_macro_optional_params(document):
 787     "Convert macro definitions with optional parameters into ERTs"
 788     # Stub to convert macro definitions with one or more optional parameters
 789     # into uninterpreted ERT insets
 790
 791
 792 def revert_hyperlinktype(document):
 793     'Reverts hyperlink type'
 794     i = 0
 795     j = 0
 796     while True:
 797       i = find_token(document.body, "target", i)
 798       if i == -1:
 799           return
 800       j = find_token(document.body, "type", i)
 801       if j == -1:
 802           return
 803       if j == i + 1:
 804           del document.body[j]
 805       i = i + 1
 806
 807
 808 def revert_pagebreak(document):
 809     'Reverts pagebreak to ERT'
 810     i = 0
 811     while True:
 812       i = find_token(document.body, "\\pagebreak", i)
 813       if i == -1:
 814           return
 815       document.body[i] = '\\begin_inset ERT\nstatus collapsed\n\n' \
 816       '\\begin_layout Standard\n\n\n\\backslash\n' \
 817       'pagebreak{}\n\\end_layout\n\n\\end_inset\n\n'
 818       i = i + 1
 819
 820
 821 def revert_linebreak(document):
 822     'Reverts linebreak to ERT'
 823     i = 0
 824     while True:
 825       i = find_token(document.body, "\\linebreak", i)
 826       if i == -1:
 827           return
 828       document.body[i] = '\\begin_inset ERT\nstatus collapsed\n\n' \
 829       '\\begin_layout Standard\n\n\n\\backslash\n' \
 830       'linebreak{}\n\\end_layout\n\n\\end_inset\n\n'
 831       i = i + 1
 832
 833
 834 def revert_latin(document):
 835     "Set language Latin to English"
 836     i = 0
 837     if document.language == "latin":
 838         document.language = "english"
 839         i = find_token(document.header, "\\language", 0)
 840         if i != -1:
 841             document.header[i] = "\\language english"
 842     j = 0
 843     while True:
 844         j = find_token(document.body, "\\lang latin", j)
 845         if j == -1:
 846             return
 847         document.body[j] = document.body[j].replace("\\lang latin", "\\lang english")
 848         j = j + 1
 849
 850
 851 def revert_samin(document):
 852     "Set language North Sami to English"
 853     i = 0
 854     if document.language == "samin":
 855         document.language = "english"
 856         i = find_token(document.header, "\\language", 0)
 857         if i != -1:
 858             document.header[i] = "\\language english"
 859     j = 0
 860     while True:
 861         j = find_token(document.body, "\\lang samin", j)
 862         if j == -1:
 863             return
 864         document.body[j] = document.body[j].replace("\\lang samin", "\\lang english")
 865         j = j + 1
 866
 867
 868 def convert_serbocroatian(document):
 869     "Set language Serbocroatian to Croatian as this was really Croatian in LyX 1.5"
 870     i = 0
 871     if document.language == "serbocroatian":
 872         document.language = "croatian"
 873         i = find_token(document.header, "\\language", 0)
 874         if i != -1:
 875             document.header[i] = "\\language croatian"
 876     j = 0
 877     while True:
 878         j = find_token(document.body, "\\lang serbocroatian", j)
 879         if j == -1:
 880             return
 881         document.body[j] = document.body[j].replace("\\lang serbocroatian", "\\lang croatian")
 882         j = j + 1
 883
 884
 885 def convert_framed_notes(document):
 886     "Convert framed notes to boxes. "
 887     i = 0
 888     while 1:
 889         i = find_tokens(document.body, ["\\begin_inset Note Framed", "\\begin_inset Note Shaded"], i)
 890
 891         if i == -1:
 892             return
 893         document.body[i] = document.body[i].replace("\\begin_inset Note", "\\begin_inset Box")
 894         document.body.insert(i + 1, 'position "t"\nhor_pos "c"\nhas_inner_box 0\ninner_pos "t"\n' \
 895         'use_parbox 0\nwidth "100col%"\nspecial "none"\nheight "1in"\n' \
 896         'height_special "totalheight"')
 897         i = i + 1
 898
 899
 900 def revert_framed_notes(document):
 901     "Revert framed boxes to notes. "
 902     i = 0
 903     while 1:
 904         i = find_tokens(document.body, ["\\begin_inset Box Framed", "\\begin_inset Box Shaded"], i)
 905
 906         if i == -1:
 907             return
 908         j = find_end_of_inset(document.body, i + 1)
 909         if j == -1:
 910             # should not happen
 911             document.warning("Malformed LyX document: Could not find end of Box inset.")
 912         k = find_token(document.body, "status", i + 1, j)
 913         if k == -1:
 914             document.warning("Malformed LyX document: Missing `status' tag in Box inset.")
 915             return
 916         status = document.body[k]
 917         l = find_token(document.body, "\\begin_layout Standard", i + 1, j)
 918         if l == -1:
 919             document.warning("Malformed LyX document: Missing `\\begin_layout Standard' in Box inset.")
 920             return
 921         m = find_token(document.body, "\\end_layout", i + 1, j)
 922         if m == -1:
 923             document.warning("Malformed LyX document: Missing `\\end_layout' in Box inset.")
 924             return
 925         ibox = find_token(document.body, "has_inner_box 1", i + 1, k)
 926         pbox = find_token(document.body, "use_parbox 1", i + 1, k)
 927         if ibox == -1 and pbox == -1:
 928             document.body[i] = document.body[i].replace("\\begin_inset Box", "\\begin_inset Note")
 929             del document.body[i+1:k]
 930         else:
 931             document.body[i] = document.body[i].replace("\\begin_inset Box Shaded", "\\begin_inset Box Frameless")
 932             document.body.insert(l + 1, "\\begin_inset Note Shaded\n" + status + "\n\\begin_layout Standard\n")
 933             document.body.insert(m + 1, "\\end_layout\n\\end_inset")
 934         i = i + 1
 935
 936
 937 def revert_slash(document):
 938     'Revert \\SpecialChar \\slash{} to ERT'
 939     for i in range(len(document.body)):
 940         document.body[i] = document.body[i].replace('\\SpecialChar \\slash{}', \
 941         '\\begin_inset ERT\nstatus collapsed\n\n' \
 942         '\\begin_layout Standard\n\n\n\\backslash\n' \
 943         'slash{}\n\\end_layout\n\n\\end_inset\n\n')
 944
 945
 946 def revert_nobreakdash(document):
 947     'Revert \\SpecialChar \\nobreakdash- to ERT'
 948     found = 0
 949     for i in range(len(document.body)):
 950         line = document.body[i]
 951         r = re.compile(r'\\SpecialChar \\nobreakdash-')
 952         m = r.match(line)
 953         if m:
 954             found = 1
 955         document.body[i] = document.body[i].replace('\\SpecialChar \\nobreakdash-', \
 956         '\\begin_inset ERT\nstatus collapsed\n\n' \
 957         '\\begin_layout Standard\n\n\n\\backslash\n' \
 958         'nobreakdash-\n\\end_layout\n\n\\end_inset\n\n')
 959     if not found:
 960         return
 961     j = find_token(document.header, "\\use_amsmath", 0)
 962     if j == -1:
 963         document.warning("Malformed LyX document: Missing '\\use_amsmath'.")
 964         return
 965     document.header[j] = "\\use_amsmath 2"
 966
 967
 968 def revert_nocite_key(body, start, end):
 969     'key "..." -> \nocite{...}'
 970     for i in range(start, end):
 971         if (body[i][0:5] == 'key "'):
 972             body[i] = body[i].replace('key "', "\\backslash\nnocite{")
 973             body[i] = body[i].replace('"', "}")
 974         else:
 975             body[i] = ""
 976
 977
 978 def revert_nocite(document):
 979     "Revert LatexCommand nocite to ERT"
 980     i = 0
 981     while 1:
 982         i = find_token(document.body, "\\begin_inset CommandInset citation", i)
 983         if i == -1:
 984             return
 985         i = i + 1
 986         if (document.body[i] == "LatexCommand nocite"):
 987             j = find_end_of_inset(document.body, i + 1)
 988             if j == -1:
 989                 #this should not happen
 990                 document.warning("End of CommandInset citation not found in revert_nocite!")
 991                 revert_nocite_key(document.body, i + 1, len(document.body))
 992                 return
 993             revert_nocite_key(document.body, i + 1, j)
 994             document.body[i-1] = "\\begin_inset ERT"
 995             document.body[i] = "status collapsed\n\n" \
 996             "\\begin_layout Standard"
 997             document.body.insert(j, "\\end_layout\n");
 998             i = j
 999
1000
1001 def revert_bahasam(document):
1002     "Set language Bahasa Malaysia to Bahasa Indonesia"
1003     i = 0
1004     if document.language == "bahasam":
1005         document.language = "bahasa"
1006         i = find_token(document.header, "\\language", 0)
1007         if i != -1:
1008             document.header[i] = "\\language bahasa"
1009     j = 0
1010     while True:
1011         j = find_token(document.body, "\\lang bahasam", j)
1012         if j == -1:
1013             return
1014         document.body[j] = document.body[j].replace("\\lang bahasam", "\\lang bahasa")
1015         j = j + 1
1016
1017
1018 def revert_interlingua(document):
1019     "Set language Interlingua to English"
1020     i = 0
1021     if document.language == "interlingua":
1022         document.language = "english"
1023         i = find_token(document.header, "\\language", 0)
1024         if i != -1:
1025             document.header[i] = "\\language english"
1026     j = 0
1027     while True:
1028         j = find_token(document.body, "\\lang interlingua", j)
1029         if j == -1:
1030             return
1031         document.body[j] = document.body[j].replace("\\lang interlingua", "\\lang english")
1032         j = j + 1
1033
1034
1035 def revert_serbianlatin(document):
1036     "Set language Serbian-Latin to Croatian"
1037     i = 0
1038     if document.language == "serbian-latin":
1039         document.language = "croatian"
1040         i = find_token(document.header, "\\language", 0)
1041         if i != -1:
1042             document.header[i] = "\\language croatian"
1043     j = 0
1044     while True:
1045         j = find_token(document.body, "\\lang serbian-latin", j)
1046         if j == -1:
1047             return
1048         document.body[j] = document.body[j].replace("\\lang serbian-latin", "\\lang croatian")
1049         j = j + 1
1050
1051
1052 ##
1053 # Conversion hub
1054 #
1055
1056 supported_versions = ["1.6.0","1.6"]
1057 convert = [[277, [fix_wrong_tables]],
1058            [278, [close_begin_deeper]],
1059            [279, [long_charstyle_names]],
1060            [280, [axe_show_label]],
1061            [281, []],
1062            [282, []],
1063            [283, [convert_flex]],
1064            [284, []],
1065            [285, []],
1066            [286, []],
1067            [287, [convert_wrapfig_options]],
1068            [288, [convert_inset_command]],
1069            [289, [convert_latexcommand_index]],
1070            [290, []],
1071            [291, []],
1072            [292, []],
1073            [293, []],
1074            [294, [convert_pdf_options]],
1075            [295, [convert_htmlurl, convert_url]],
1076            [296, [convert_include]],
1077            [297, [convert_usorbian]],
1078            [298, []],
1079            [299, []],
1080            [300, []],
1081            [301, []],
1082            [302, []],
1083            [303, [convert_serbocroatian]],
1084            [304, [convert_framed_notes]],
1085            [305, []],
1086            [306, []],
1087            [307, []],
1088            [308, []],
1089            [309, []]
1090           ]
1091
1092 revert =  [[308, [revert_nocite]],
1093            [307, [revert_serbianlatin]],
1094            [306, [revert_slash, revert_nobreakdash]],
1095            [305, [revert_interlingua]],
1096            [304, [revert_bahasam]],
1097            [303, [revert_framed_notes]],
1098            [302, []],
1099            [301, [revert_latin, revert_samin]],
1100            [300, [revert_linebreak]],
1101            [299, [revert_pagebreak]],
1102            [298, [revert_hyperlinktype]],
1103            [297, [revert_macro_optional_params]],
1104            [296, [revert_albanian, revert_lowersorbian, revert_uppersorbian]],
1105            [295, [revert_include]],
1106            [294, [revert_href]],
1107            [293, [revert_pdf_options_2]],
1108            [292, [revert_inset_info]],
1109            [291, [revert_japanese, revert_japanese_encoding]],
1110            [290, [revert_vietnamese]],
1111            [289, [revert_wraptable]],
1112            [288, [revert_latexcommand_index]],
1113            [287, [revert_inset_command]],
1114            [286, [revert_wrapfig_options]],
1115            [285, [revert_pdf_options]],
1116            [284, [remove_inzip_options]],
1117            [283, []],
1118            [282, [revert_flex]],
1119            [281, []],
1120            [280, [revert_begin_modules]],
1121            [279, [revert_show_label]],
1122            [278, [revert_long_charstyle_names]],
1123            [277, []],
1124            [276, []]
1125           ]
1126
1127
1128 if __name__ == "__main__":
1129     pass