lib/lyx2lyx/lyx_2_0.py

   1 # -*- coding: utf-8 -*-
   2 # This file is part of lyx2lyx
   3 # Copyright (C) 2011 The LyX team
   4 #
   5 # This program is free software; you can redistribute it and/or
   6 # modify it under the terms of the GNU General Public License
   7 # as published by the Free Software Foundation; either version 2
   8 # of the License, or (at your option) any later version.
   9 #
  10 # This program is distributed in the hope that it will be useful,
  11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 # GNU General Public License for more details.
  14 #
  15 # You should have received a copy of the GNU General Public License
  16 # along with this program; if not, write to the Free Software
  17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
  18
  19 """ Convert files to the file format generated by lyx 2.0"""
  20
  21 import re, string
  22 import unicodedata
  23 import sys, os
  24
  25 from parser_tools import find_token, find_end_of, find_tokens, \
  26   find_token_exact, find_end_of_inset, find_end_of_layout, \
  27   find_token_backwards, is_in_inset, get_value, get_quoted_value, \
  28   del_token, check_token, get_option_value
  29
  30 from lyx2lyx_tools import add_to_preamble, insert_to_preamble, \
  31   put_cmd_in_ert, lyx2latex, latex_length, revert_flex_inset, \
  32   revert_font_attrs, hex2ratio, str2bool
  33
  34 ####################################################################
  35 # Private helper functions
  36
  37 def remove_option(lines, m, option):
  38     ''' removes option from line m. returns whether we did anything '''
  39     l = lines[m].find(option)
  40     if l == -1:
  41         return False
  42     val = lines[m][l:].split('"')[1]
  43     lines[m] = lines[m][:l - 1] + lines[m][l+len(option + '="' + val + '"'):]
  44     return True
  45
  46
  47 ###############################################################################
  48 ###
  49 ### Conversion and reversion routines
  50 ###
  51 ###############################################################################
  52
  53 def revert_swiss(document):
  54     " Set language german-ch to ngerman "
  55     i = 0
  56     if document.language == "german-ch":
  57         document.language = "ngerman"
  58         i = find_token(document.header, "\\language", 0)
  59         if i != -1:
  60             document.header[i] = "\\language ngerman"
  61     j = 0
  62     while True:
  63         j = find_token(document.body, "\\lang german-ch", j)
  64         if j == -1:
  65             return
  66         document.body[j] = document.body[j].replace("\\lang german-ch", "\\lang ngerman")
  67         j = j + 1
  68
  69
  70 def revert_tabularvalign(document):
  71    " Revert the tabular valign option "
  72    i = 0
  73    while True:
  74       i = find_token(document.body, "\\begin_inset Tabular", i)
  75       if i == -1:
  76           return
  77       end = find_end_of_inset(document.body, i)
  78       if end == -1:
  79           document.warning("Can't find end of inset at line " + str(i))
  80           i += 1
  81           continue
  82       fline = find_token(document.body, "<features", i, end)
  83       if fline == -1:
  84           document.warning("Can't find features for inset at line " + str(i))
  85           i += 1
  86           continue
  87       p = document.body[fline].find("islongtable")
  88       if p != -1:
  89           q = document.body[fline].find("tabularvalignment")
  90           if q != -1:
  91               document.body[fline] = re.sub(r' tabularvalignment=\"[a-z]+\"', "", document.body[fline])
  92           i += 1
  93           continue
  94
  95        # no longtable
  96       tabularvalignment = 'c'
  97       # which valignment is specified?
  98       m = document.body[fline].find('tabularvalignment="top"')
  99       if m != -1:
 100           tabularvalignment = 't'
 101       m = document.body[fline].find('tabularvalignment="bottom"')
 102       if m != -1:
 103           tabularvalignment = 'b'
 104       # delete tabularvalignment
 105       q = document.body[fline].find("tabularvalignment")
 106       if q != -1:
 107           document.body[fline] = re.sub(r' tabularvalignment=\"[a-z]+\"', "", document.body[fline])
 108
 109       # don't add a box when centered
 110       if tabularvalignment == 'c':
 111           i = end
 112           continue
 113       subst = ['\\end_inset', '\\end_layout']
 114       document.body[end:end] = subst # just inserts those lines
 115       subst = ['\\begin_inset Box Frameless',
 116           'position "' + tabularvalignment +'"',
 117           'hor_pos "c"',
 118           'has_inner_box 1',
 119           'inner_pos "c"',
 120           'use_parbox 0',
 121           # we don't know the width, assume 50%
 122           'width "50col%"',
 123           'special "none"',
 124           'height "1in"',
 125           'height_special "totalheight"',
 126           'status open',
 127           '',
 128           '\\begin_layout Plain Layout']
 129       document.body[i:i] = subst # this just inserts the array at i
 130       # since there could be a tabular inside a tabular, we cannot
 131       # jump to end
 132       i += len(subst)
 133
 134
 135 def revert_phantom_types(document, ptype, cmd):
 136     " Reverts phantom to ERT "
 137     i = 0
 138     while True:
 139       i = find_token(document.body, "\\begin_inset Phantom " + ptype, i)
 140       if i == -1:
 141           return
 142       end = find_end_of_inset(document.body, i)
 143       if end == -1:
 144           document.warning("Can't find end of inset at line " + str(i))
 145           i += 1
 146           continue
 147       blay = find_token(document.body, "\\begin_layout Plain Layout", i, end)
 148       if blay == -1:
 149           document.warning("Can't find layout for inset at line " + str(i))
 150           i = end
 151           continue
 152       bend = find_end_of_layout(document.body, blay)
 153       if bend == -1:
 154           document.warning("Malformed LyX document: Could not find end of Phantom inset's layout.")
 155           i = end
 156           continue
 157       substi = ["\\begin_inset ERT", "status collapsed", "",
 158                 "\\begin_layout Plain Layout", "", "", "\\backslash",
 159                 cmd + "{", "\\end_layout", "", "\\end_inset"]
 160       substj = ["\\size default", "", "\\begin_inset ERT", "status collapsed", "",
 161                 "\\begin_layout Plain Layout", "", "}", "\\end_layout", "", "\\end_inset"]
 162       # do the later one first so as not to mess up the numbering
 163       document.body[bend:end + 1] = substj
 164       document.body[i:blay + 1] = substi
 165       i = end + len(substi) + len(substj) - (end - bend) - (blay - i) - 2
 166
 167
 168 def revert_phantom(document):
 169     revert_phantom_types(document, "Phantom", "phantom")
 170
 171 def revert_hphantom(document):
 172     revert_phantom_types(document, "HPhantom", "hphantom")
 173
 174 def revert_vphantom(document):
 175     revert_phantom_types(document, "VPhantom", "vphantom")
 176
 177
 178 def revert_xetex(document):
 179     " Reverts documents that use XeTeX "
 180
 181     i = find_token(document.header, '\\use_xetex', 0)
 182     if i == -1:
 183         document.warning("Malformed LyX document: Missing \\use_xetex.")
 184         return
 185     if not str2bool(get_value(document.header, "\\use_xetex", i)):
 186         del document.header[i]
 187         return
 188     del document.header[i]
 189
 190     # 1.) set doc encoding to utf8-plain
 191     i = find_token(document.header, "\\inputencoding", 0)
 192     if i == -1:
 193         document.warning("Malformed LyX document: Missing \\inputencoding.")
 194     else:
 195         document.header[i] = "\\inputencoding utf8-plain"
 196
 197     # 2.) check font settings
 198     # defaults
 199     roman = sans = typew = "default"
 200     osf = False
 201     sf_scale = tt_scale = 100.0
 202
 203     i = find_token(document.header, "\\font_roman", 0)
 204     if i == -1:
 205         document.warning("Malformed LyX document: Missing \\font_roman.")
 206     else:
 207         roman = get_value(document.header, "\\font_roman", i)
 208         document.header[i] = "\\font_roman default"
 209
 210     i = find_token(document.header, "\\font_sans", 0)
 211     if i == -1:
 212         document.warning("Malformed LyX document: Missing \\font_sans.")
 213     else:
 214         sans = get_value(document.header, "\\font_sans", i)
 215         document.header[i] = "\\font_sans default"
 216
 217     i = find_token(document.header, "\\font_typewriter", 0)
 218     if i == -1:
 219         document.warning("Malformed LyX document: Missing \\font_typewriter.")
 220     else:
 221         typew = get_value(document.header, "\\font_typewriter", i)
 222         document.header[i] = "\\font_typewriter default"
 223
 224     i = find_token(document.header, "\\font_osf", 0)
 225     if i == -1:
 226         document.warning("Malformed LyX document: Missing \\font_osf.")
 227     else:
 228         osf = str2bool(get_value(document.header, "\\font_osf", i))
 229         document.header[i] = "\\font_osf false"
 230
 231     i = find_token(document.header, "\\font_sc", 0)
 232     if i == -1:
 233         document.warning("Malformed LyX document: Missing \\font_sc.")
 234     else:
 235         # we do not need this value.
 236         document.header[i] = "\\font_sc false"
 237
 238     i = find_token(document.header, "\\font_sf_scale", 0)
 239     if i == -1:
 240         document.warning("Malformed LyX document: Missing \\font_sf_scale.")
 241     else:
 242       val = get_value(document.header, '\\font_sf_scale', i)
 243       try:
 244         # float() can throw
 245         sf_scale = float(val)
 246       except:
 247         document.warning("Invalid font_sf_scale value: " + val)
 248       document.header[i] = "\\font_sf_scale 100"
 249
 250     i = find_token(document.header, "\\font_tt_scale", 0)
 251     if i == -1:
 252         document.warning("Malformed LyX document: Missing \\font_tt_scale.")
 253     else:
 254         val = get_value(document.header, '\\font_tt_scale', i)
 255         try:
 256           # float() can throw
 257           tt_scale = float(val)
 258         except:
 259           document.warning("Invalid font_tt_scale value: " + val)
 260         document.header[i] = "\\font_tt_scale 100"
 261
 262     # 3.) set preamble stuff
 263     pretext = ['%% This document must be processed with xelatex!']
 264     pretext.append('\\usepackage{fontspec}')
 265     if roman != "default":
 266         pretext.append('\\setmainfont[Mapping=tex-text]{' + roman + '}')
 267     if sans != "default":
 268         sf = '\\setsansfont['
 269         if sf_scale != 100.0:
 270             sf += 'Scale=' + str(sf_scale / 100.0) + ','
 271         sf += 'Mapping=tex-text]{' + sans + '}'
 272         pretext.append(sf)
 273     if typew != "default":
 274         tw = '\\setmonofont'
 275         if tt_scale != 100.0:
 276             tw += '[Scale=' + str(tt_scale / 100.0) + ']'
 277         tw += '{' + typew + '}'
 278         pretext.append(tw)
 279     if osf:
 280         pretext.append('\\defaultfontfeatures{Numbers=OldStyle}')
 281     pretext.append('\\usepackage{xunicode}')
 282     pretext.append('\\usepackage{xltxtra}')
 283     insert_to_preamble(document, pretext)
 284
 285
 286 def revert_outputformat(document):
 287     " Remove default output format param "
 288
 289     if not del_token(document.header, '\\default_output_format', 0):
 290         document.warning("Malformed LyX document: Missing \\default_output_format.")
 291
 292
 293 def revert_backgroundcolor(document):
 294     " Reverts background color to preamble code "
 295     i = find_token(document.header, "\\backgroundcolor", 0)
 296     if i == -1:
 297         return
 298     colorcode = get_value(document.header, '\\backgroundcolor', i)
 299     del document.header[i]
 300     # don't clutter the preamble if backgroundcolor is not set
 301     if colorcode == "#ffffff":
 302         return
 303     red   = hex2ratio(colorcode[1:3])
 304     green = hex2ratio(colorcode[3:5])
 305     blue  = hex2ratio(colorcode[5:7])
 306     insert_to_preamble(document, \
 307         ['% To set the background color',
 308         '\\@ifundefined{definecolor}{\\usepackage{color}}{}',
 309         '\\definecolor{page_backgroundcolor}{rgb}{' + red + ',' + green + ',' + blue + '}',
 310         '\\pagecolor{page_backgroundcolor}'])
 311
 312
 313 def add_use_indices(document):
 314     " Add \\use_indices if it is missing "
 315     i = find_token(document.header, '\\use_indices', 0)
 316     if i != -1:
 317         return i
 318     i = find_token(document.header, '\\use_bibtopic', 0)
 319     if i == -1:
 320         i = find_token(document.header, '\\cite_engine', 0)
 321     if i == -1:
 322         i = find_token(document.header, '\\use_mathdots', 0)
 323     if i == -1:
 324         i = find_token(document.header, '\\use_mhchem', 0)
 325     if i == -1:
 326         i = find_token(document.header, '\\use_esint', 0)
 327     if i == -1:
 328         i = find_token(document.header, '\\use_amsmath', 0)
 329     if i == -1:
 330         document.warning("Malformed LyX document: Missing \\use_indices.")
 331         return -1
 332     document.header.insert(i + 1, '\\use_indices 0')
 333     return i + 1
 334
 335
 336 def revert_splitindex(document):
 337     " Reverts splitindex-aware documents "
 338     i = add_use_indices(document)
 339     if i == -1:
 340         return
 341     useindices = str2bool(get_value(document.header, "\\use_indices", i))
 342     del document.header[i]
 343     preamble = []
 344     if useindices:
 345          preamble.append("\\usepackage{splitidx})")
 346
 347     # deal with index declarations in the preamble
 348     i = 0
 349     while True:
 350         i = find_token(document.header, "\\index", i)
 351         if i == -1:
 352             break
 353         k = find_token(document.header, "\\end_index", i)
 354         if k == -1:
 355             document.warning("Malformed LyX document: Missing \\end_index.")
 356             return
 357         if useindices:
 358           line = document.header[i]
 359           l = re.compile(r'\\index (.*)$')
 360           m = l.match(line)
 361           iname = m.group(1)
 362           ishortcut = get_value(document.header, '\\shortcut', i, k)
 363           if ishortcut != "":
 364               preamble.append("\\newindex[" + iname + "]{" + ishortcut + "}")
 365         del document.header[i:k + 1]
 366     if preamble:
 367         insert_to_preamble(document, preamble)
 368
 369     # deal with index insets
 370     # these need to have the argument removed
 371     i = 0
 372     while True:
 373         i = find_token(document.body, "\\begin_inset Index", i)
 374         if i == -1:
 375             break
 376         line = document.body[i]
 377         l = re.compile(r'\\begin_inset Index (.*)$')
 378         m = l.match(line)
 379         itype = m.group(1)
 380         if itype == "idx" or indices == "false":
 381             document.body[i] = "\\begin_inset Index"
 382         else:
 383             k = find_end_of_inset(document.body, i)
 384             if k == -1:
 385                 document.warning("Can't find end of index inset!")
 386                 i += 1
 387                 continue
 388             content = lyx2latex(document, document.body[i:k])
 389             # escape quotes
 390             content = content.replace('"', r'\"')
 391             subst = put_cmd_in_ert("\\sindex[" + itype + "]{" + content + "}")
 392             document.body[i:k + 1] = subst
 393         i = i + 1
 394
 395     # deal with index_print insets
 396     i = 0
 397     while True:
 398         i = find_token(document.body, "\\begin_inset CommandInset index_print", i)
 399         if i == -1:
 400             return
 401         k = find_end_of_inset(document.body, i)
 402         ptype = get_quoted_value(document.body, 'type', i, k)
 403         if ptype == "idx":
 404             j = find_token(document.body, "type", i, k)
 405             del document.body[j]
 406         elif not useindices:
 407             del document.body[i:k + 1]
 408         else:
 409             subst = put_cmd_in_ert("\\printindex[" + ptype + "]{}")
 410             document.body[i:k + 1] = subst
 411         i = i + 1
 412
 413
 414 def convert_splitindex(document):
 415     " Converts index and printindex insets to splitindex-aware format "
 416     add_use_indices(document)
 417     i = 0
 418     while True:
 419         i = find_token(document.body, "\\begin_inset Index", i)
 420         if i == -1:
 421             break
 422         document.body[i] = document.body[i].replace("\\begin_inset Index",
 423             "\\begin_inset Index idx")
 424         i = i + 1
 425     i = 0
 426     while True:
 427         i = find_token(document.body, "\\begin_inset CommandInset index_print", i)
 428         if i == -1:
 429             return
 430         if document.body[i + 1].find('LatexCommand printindex') == -1:
 431             document.warning("Malformed LyX document: Incomplete printindex inset.")
 432             return
 433         subst = ["LatexCommand printindex",
 434             "type \"idx\""]
 435         document.body[i + 1:i + 2] = subst
 436         i = i + 1
 437
 438
 439 def revert_subindex(document):
 440     " Reverts \\printsubindex CommandInset types "
 441     i = add_use_indices(document)
 442     if i == -1:
 443         return
 444     useindices = str2bool(get_value(document.header, "\\use_indices", i))
 445     i = 0
 446     while True:
 447         i = find_token(document.body, "\\begin_inset CommandInset index_print", i)
 448         if i == -1:
 449             return
 450         k = find_end_of_inset(document.body, i)
 451         ctype = get_value(document.body, 'LatexCommand', i, k)
 452         if ctype != "printsubindex":
 453             i = k + 1
 454             continue
 455         ptype = get_quoted_value(document.body, 'type', i, k)
 456         if not useindices:
 457             del document.body[i:k + 1]
 458         else:
 459             subst = put_cmd_in_ert("\\printsubindex[" + ptype + "]{}")
 460             document.body[i:k + 1] = subst
 461         i = i + 1
 462
 463
 464 def revert_printindexall(document):
 465     " Reverts \\print[sub]index* CommandInset types "
 466     i = add_use_indices(document)
 467     if i == -1:
 468         return
 469     useindices = str2bool(get_value(document.header, "\\use_indices", i))
 470     i = 0
 471     while True:
 472         i = find_token(document.body, "\\begin_inset CommandInset index_print", i)
 473         if i == -1:
 474             return
 475         k = find_end_of_inset(document.body, i)
 476         ctype = get_value(document.body, 'LatexCommand', i, k)
 477         if ctype != "printindex*" and ctype != "printsubindex*":
 478             i = k
 479             continue
 480         if not useindices:
 481             del document.body[i:k + 1]
 482         else:
 483             subst = put_cmd_in_ert("\\" + ctype + "{}")
 484             document.body[i:k + 1] = subst
 485         i = i + 1
 486
 487
 488 def revert_strikeout(document):
 489   " Reverts \\strikeout font attribute "
 490   changed = revert_font_attrs(document.body, "\\uuline", "\\uuline")
 491   changed = revert_font_attrs(document.body, "\\uwave", "\\uwave") or changed
 492   changed = revert_font_attrs(document.body, "\\strikeout", "\\sout")  or changed
 493   if changed == True:
 494     insert_to_preamble(document, \
 495         ['%  for proper underlining',
 496         '\\PassOptionsToPackage{normalem}{ulem}',
 497         '\\usepackage{ulem}'])
 498
 499
 500 def revert_ulinelatex(document):
 501     " Reverts \\uline font attribute "
 502     i = find_token(document.body, '\\bar under', 0)
 503     if i == -1:
 504         return
 505     insert_to_preamble(document,\
 506             ['%  for proper underlining',
 507             '\\PassOptionsToPackage{normalem}{ulem}',
 508             '\\usepackage{ulem}',
 509             '\\let\\cite@rig\\cite',
 510             '\\newcommand{\\b@xcite}[2][\\%]{\\def\\def@pt{\\%}\\def\\pas@pt{#1}',
 511             '  \\mbox{\\ifx\\def@pt\\pas@pt\\cite@rig{#2}\\else\\cite@rig[#1]{#2}\\fi}}',
 512             '\\renewcommand{\\underbar}[1]{{\\let\\cite\\b@xcite\\uline{#1}}}'])
 513
 514
 515 def revert_custom_processors(document):
 516     " Remove bibtex_command and index_command params "
 517
 518     if not del_token(document.header, '\\bibtex_command', 0):
 519         document.warning("Malformed LyX document: Missing \\bibtex_command.")
 520
 521     if not del_token(document.header, '\\index_command', 0):
 522         document.warning("Malformed LyX document: Missing \\index_command.")
 523
 524
 525 def convert_nomencl_width(document):
 526     " Add set_width param to nomencl_print "
 527     i = 0
 528     while True:
 529       i = find_token(document.body, "\\begin_inset CommandInset nomencl_print", i)
 530       if i == -1:
 531         break
 532       document.body.insert(i + 2, "set_width \"none\"")
 533       i = i + 1
 534
 535
 536 def revert_nomencl_width(document):
 537     " Remove set_width param from nomencl_print "
 538     i = 0
 539     while True:
 540       i = find_token(document.body, "\\begin_inset CommandInset nomencl_print", i)
 541       if i == -1:
 542         break
 543       j = find_end_of_inset(document.body, i)
 544       if not del_token(document.body, "set_width", i, j):
 545         document.warning("Can't find set_width option for nomencl_print!")
 546       i = j
 547
 548
 549 def revert_nomencl_cwidth(document):
 550     " Remove width param from nomencl_print "
 551     i = 0
 552     while True:
 553       i = find_token(document.body, "\\begin_inset CommandInset nomencl_print", i)
 554       if i == -1:
 555         break
 556       j = find_end_of_inset(document.body, i)
 557       l = find_token(document.body, "width", i, j)
 558       if l == -1:
 559         i = j
 560         continue
 561       width = get_quoted_value(document.body, "width", i, j)
 562       del document.body[l]
 563       insert_to_preamble(document, ["\\setlength{\\nomlabelwidth}{" + width + "}"])
 564       i = j - 1
 565
 566
 567 def revert_applemac(document):
 568     " Revert applemac encoding to auto "
 569     if document.encoding != "applemac":
 570       return
 571     document.encoding = "auto"
 572     i = find_token(document.header, "\\encoding", 0)
 573     if i != -1:
 574         document.header[i] = "\\encoding auto"
 575
 576
 577 def revert_longtable_align(document):
 578     " Remove longtable alignment setting "
 579     i = 0
 580     while True:
 581       i = find_token(document.body, "\\begin_inset Tabular", i)
 582       if i == -1:
 583           break
 584       end = find_end_of_inset(document.body, i)
 585       if end == -1:
 586           document.warning("Can't find end of inset at line " + str(i))
 587           i += 1
 588           continue
 589       fline = find_token(document.body, "<features", i, end)
 590       if fline == -1:
 591           document.warning("Can't find features for inset at line " + str(i))
 592           i += 1
 593           continue
 594       j = document.body[fline].find("longtabularalignment")
 595       if j == -1:
 596           i += 1
 597           continue
 598       # FIXME Is this correct? It wipes out everything after the
 599       # one we found.
 600       document.body[fline] = document.body[fline][:j - 1] + '>'
 601       # since there could be a tabular inside this one, we
 602       # cannot jump to end.
 603       i += 1
 604
 605
 606 def revert_branch_filename(document):
 607     " Remove \\filename_suffix parameter from branches "
 608     i = 0
 609     while True:
 610         i = find_token(document.header, "\\filename_suffix", i)
 611         if i == -1:
 612             return
 613         del document.header[i]
 614
 615
 616 def revert_paragraph_indentation(document):
 617     " Revert custom paragraph indentation to preamble code "
 618     i = find_token(document.header, "\\paragraph_indentation", 0)
 619     if i == -1:
 620       return
 621     length = get_value(document.header, "\\paragraph_indentation", i)
 622     # we need only remove the line if indentation is default
 623     if length != "default":
 624       # handle percent lengths
 625       length = latex_length(length)[1]
 626       insert_to_preamble(document, ["\\setlength{\\parindent}{" + length + "}"])
 627     del document.header[i]
 628
 629
 630 def revert_percent_skip_lengths(document):
 631     " Revert relative lengths for paragraph skip separation to preamble code "
 632     i = find_token(document.header, "\\defskip", 0)
 633     if i == -1:
 634         return
 635     length = get_value(document.header, "\\defskip", i)
 636     # only revert when a custom length was set and when
 637     # it used a percent length
 638     if length in ('smallskip', 'medskip', 'bigskip'):
 639         return
 640     # handle percent lengths
 641     percent, length = latex_length(length)
 642     if percent:
 643         insert_to_preamble(document, ["\\setlength{\\parskip}{" + length + "}"])
 644         # set defskip to medskip as default
 645         document.header[i] = "\\defskip medskip"
 646
 647
 648 def revert_percent_vspace_lengths(document):
 649     " Revert relative VSpace lengths to ERT "
 650     i = 0
 651     while True:
 652       i = find_token(document.body, "\\begin_inset VSpace", i)
 653       if i == -1:
 654           break
 655       # only revert if a custom length was set and if
 656       # it used a percent length
 657       r = re.compile(r'\\begin_inset VSpace (.*)$')
 658       m = r.match(document.body[i])
 659       length = m.group(1)
 660       if length in ('defskip', 'smallskip', 'medskip', 'bigskip', 'vfill'):
 661          i += 1
 662          continue
 663       # check if the space has a star (protected space)
 664       protected = (document.body[i].rfind("*") != -1)
 665       if protected:
 666           length = length.rstrip('*')
 667       # handle percent lengths
 668       percent, length = latex_length(length)
 669       # revert the VSpace inset to ERT
 670       if percent:
 671           if protected:
 672               subst = put_cmd_in_ert("\\vspace*{" + length + "}")
 673           else:
 674               subst = put_cmd_in_ert("\\vspace{" + length + "}")
 675           document.body[i:i + 2] = subst
 676       i += 1
 677
 678
 679 def revert_percent_hspace_lengths(document):
 680     " Revert relative HSpace lengths to ERT "
 681     i = 0
 682     while True:
 683       i = find_token_exact(document.body, "\\begin_inset space \\hspace", i)
 684       if i == -1:
 685           break
 686       j = find_end_of_inset(document.body, i)
 687       if j == -1:
 688           document.warning("Can't find end of inset at line " + str(i))
 689           i += 1
 690           continue
 691       # only revert if a custom length was set...
 692       length = get_value(document.body, '\\length', i + 1, j)
 693       if length == '':
 694           document.warning("Malformed lyx document: Missing '\\length' in Space inset.")
 695           i = j
 696           continue
 697       protected = ""
 698       if document.body[i].find("\\hspace*{}") != -1:
 699           protected = "*"
 700       # ...and if it used a percent length
 701       percent, length = latex_length(length)
 702       # revert the HSpace inset to ERT
 703       if percent:
 704           subst = put_cmd_in_ert("\\hspace" + protected + "{" + length + "}")
 705           document.body[i:j + 1] = subst
 706       # if we did a substitution, this will still be ok
 707       i = j
 708
 709
 710 def revert_hspace_glue_lengths(document):
 711     " Revert HSpace glue lengths to ERT "
 712     i = 0
 713     while True:
 714       i = find_token_exact(document.body, "\\begin_inset space \\hspace", i)
 715       if i == -1:
 716           break
 717       j = find_end_of_inset(document.body, i)
 718       if j == -1:
 719           document.warning("Can't find end of inset at line " + str(i))
 720           i += 1
 721           continue
 722       length = get_value(document.body, '\\length', i + 1, j)
 723       if length == '':
 724           document.warning("Malformed lyx document: Missing '\\length' in Space inset.")
 725           i = j
 726           continue
 727       protected = ""
 728       if document.body[i].find("\\hspace*{}") != -1:
 729           protected = "*"
 730       # only revert if the length contains a plus or minus at pos != 0
 731       if length.find('-',1) != -1 or length.find('+',1) != -1:
 732           # handle percent lengths
 733           length = latex_length(length)[1]
 734           # revert the HSpace inset to ERT
 735           subst = put_cmd_in_ert("\\hspace" + protected + "{" + length + "}")
 736           document.body[i:j+1] = subst
 737       i = j
 738
 739
 740 def convert_author_id(document):
 741     " Add the author_id to the \\author definition and make sure 0 is not used"
 742     i = 0
 743     anum = 1
 744     re_author = re.compile(r'(\\author) (\".*\")\s*(.*)$')
 745
 746     while True:
 747         i = find_token(document.header, "\\author", i)
 748         if i == -1:
 749             break
 750         m = re_author.match(document.header[i])
 751         if m:
 752             name = m.group(2)
 753             email = m.group(3)
 754             document.header[i] = "\\author %i %s %s" % (anum, name, email)
 755         anum += 1
 756         i += 1
 757
 758     i = 0
 759     while True:
 760         i = find_token(document.body, "\\change_", i)
 761         if i == -1:
 762             break
 763         change = document.body[i].split(' ');
 764         if len(change) == 3:
 765             type = change[0]
 766             author_id = int(change[1])
 767             time = change[2]
 768             document.body[i] = "%s %i %s" % (type, author_id + 1, time)
 769         i += 1
 770
 771
 772 def revert_author_id(document):
 773     " Remove the author_id from the \\author definition "
 774     i = 0
 775     anum = 0
 776     rx = re.compile(r'(\\author)\s+(-?\d+)\s+(\".*\")\s*(.*)$')
 777     idmap = dict()
 778
 779     while True:
 780         i = find_token(document.header, "\\author", i)
 781         if i == -1:
 782             break
 783         m = rx.match(document.header[i])
 784         if m:
 785             author_id = int(m.group(2))
 786             idmap[author_id] = anum
 787             name = m.group(3)
 788             email = m.group(4)
 789             document.header[i] = "\\author %s %s" % (name, email)
 790         i += 1
 791         # FIXME Should this be incremented if we didn't match?
 792         anum += 1
 793
 794     i = 0
 795     while True:
 796         i = find_token(document.body, "\\change_", i)
 797         if i == -1:
 798             break
 799         change = document.body[i].split(' ');
 800         if len(change) == 3:
 801             type = change[0]
 802             author_id = int(change[1])
 803             time = change[2]
 804             document.body[i] = "%s %i %s" % (type, idmap[author_id], time)
 805         i += 1
 806
 807
 808 def revert_suppress_date(document):
 809     " Revert suppressing of default document date to preamble code "
 810     i = find_token(document.header, "\\suppress_date", 0)
 811     if i == -1:
 812         return
 813     # remove the preamble line and write to the preamble
 814     # when suppress_date was true
 815     date = str2bool(get_value(document.header, "\\suppress_date", i))
 816     if date:
 817         add_to_preamble(document, ["\\date{}"])
 818     del document.header[i]
 819
 820
 821 def convert_mhchem(document):
 822     "Set mhchem to off for versions older than 1.6.x"
 823     if document.initial_format < 277:
 824         # LyX 1.5.x and older did never load mhchem.
 825         # Therefore we must switch it off: Documents that use mhchem have
 826         # a manual \usepackage anyway, and documents not using mhchem but
 827         # custom macros with the same names as mhchem commands might get
 828         # corrupted if mhchem is automatically loaded.
 829         mhchem = 0 # off
 830     else:
 831         # LyX 1.6.x did always load mhchem automatically.
 832         mhchem = 1 # auto
 833     i = find_token(document.header, "\\use_esint", 0)
 834     if i == -1:
 835         # pre-1.5.x document
 836         i = find_token(document.header, "\\use_amsmath", 0)
 837     if i == -1:
 838         document.warning("Malformed LyX document: Could not find amsmath os esint setting.")
 839         return
 840     document.header.insert(i + 1, "\\use_mhchem %d" % mhchem)
 841
 842
 843 def revert_mhchem(document):
 844     "Revert mhchem loading to preamble code"
 845
 846     mhchem = "off"
 847     i = find_token(document.header, "\\use_mhchem", 0)
 848     if i == -1:
 849         document.warning("Malformed LyX document: Could not find mhchem setting.")
 850         mhchem = "auto"
 851     else:
 852         val = get_value(document.header, "\\use_mhchem", i)
 853         if val == "1":
 854             mhchem = "auto"
 855         elif val == "2":
 856             mhchem = "on"
 857         del document.header[i]
 858
 859     if mhchem == "off":
 860       # don't load case
 861       return
 862
 863     if mhchem == "auto":
 864         i = 0
 865         while True:
 866             i = find_token(document.body, "\\begin_inset Formula", i)
 867             if i == -1:
 868                break
 869             line = document.body[i]
 870             if line.find("\\ce{") != -1 or line.find("\\cf{") != -1:
 871               mhchem = "on"
 872               break
 873             i += 1
 874
 875     if mhchem == "on":
 876         pre = ["\\PassOptionsToPackage{version=3}{mhchem}",
 877           "\\usepackage{mhchem}"]
 878         insert_to_preamble(document, pre)
 879
 880
 881 def revert_fontenc(document):
 882     " Remove fontencoding param "
 883     if not del_token(document.header, '\\fontencoding', 0):
 884         document.warning("Malformed LyX document: Missing \\fontencoding.")
 885
 886
 887 def merge_gbrief(document):
 888     " Merge g-brief-en and g-brief-de to one class "
 889
 890     if document.textclass != "g-brief-de":
 891         if document.textclass == "g-brief-en":
 892             document.textclass = "g-brief"
 893             document.set_textclass()
 894         return
 895
 896     obsoletedby = { "Brieftext":       "Letter",
 897                     "Unterschrift":    "Signature",
 898                     "Strasse":         "Street",
 899                     "Zusatz":          "Addition",
 900                     "Ort":             "Town",
 901                     "Land":            "State",
 902                     "RetourAdresse":   "ReturnAddress",
 903                     "MeinZeichen":     "MyRef",
 904                     "IhrZeichen":      "YourRef",
 905                     "IhrSchreiben":    "YourMail",
 906                     "Telefon":         "Phone",
 907                     "BLZ":             "BankCode",
 908                     "Konto":           "BankAccount",
 909                     "Postvermerk":     "PostalComment",
 910                     "Adresse":         "Address",
 911                     "Datum":           "Date",
 912                     "Betreff":         "Reference",
 913                     "Anrede":          "Opening",
 914                     "Anlagen":         "Encl.",
 915                     "Verteiler":       "cc",
 916                     "Gruss":           "Closing"}
 917     i = 0
 918     while True:
 919         i = find_token(document.body, "\\begin_layout", i)
 920         if i == -1:
 921             break
 922
 923         layout = document.body[i][14:]
 924         if layout in obsoletedby:
 925             document.body[i] = "\\begin_layout " + obsoletedby[layout]
 926
 927         i += 1
 928
 929     document.textclass = "g-brief"
 930     document.set_textclass()
 931
 932
 933 def revert_gbrief(document):
 934     " Revert g-brief to g-brief-en "
 935     if document.textclass == "g-brief":
 936         document.textclass = "g-brief-en"
 937         document.set_textclass()
 938
 939
 940 def revert_html_options(document):
 941     " Remove html options "
 942     del_token(document.header, '\\html_use_mathml', 0)
 943     del_token(document.header, '\\html_be_strict', 0)
 944
 945
 946 def revert_includeonly(document):
 947     i = 0
 948     while True:
 949         i = find_token(document.header, "\\begin_includeonly", i)
 950         if i == -1:
 951             return
 952         j = find_end_of(document.header, i, "\\begin_includeonly", "\\end_includeonly")
 953         if j == -1:
 954             document.warning("Unable to find end of includeonly section!!")
 955             break
 956         document.header[i : j + 1] = []
 957
 958
 959 def revert_includeall(document):
 960     " Remove maintain_unincluded_children param "
 961     del_token(document.header, '\\maintain_unincluded_children', 0)
 962
 963
 964 def revert_multirow(document):
 965     " Revert multirow cells in tables to TeX-code"
 966
 967     # first, let's find out if we need to do anything
 968     # cell type 3 is multirow begin cell
 969     i = find_token(document.body, '<cell multirow="3"', 0)
 970     if i == -1:
 971       return
 972
 973     add_to_preamble(document, ["\\usepackage{multirow}"])
 974
 975     begin_table = 0
 976     while True:
 977         # find begin/end of table
 978         begin_table = find_token(document.body, '<lyxtabular version=', begin_table)
 979         if begin_table == -1:
 980             break
 981         end_table = find_end_of(document.body, begin_table, '<lyxtabular', '</lyxtabular>')
 982         if end_table == -1:
 983             document.warning("Malformed LyX document: Could not find end of table.")
 984             begin_table += 1
 985             continue
 986         # does this table have multirow?
 987         i = find_token(document.body, '<cell multirow="3"', begin_table, end_table)
 988         if i == -1:
 989             begin_table = end_table
 990             continue
 991
 992         # store the number of rows and columns
 993         numrows = get_option_value(document.body[begin_table], "rows")
 994         numcols = get_option_value(document.body[begin_table], "columns")
 995         try:
 996           numrows = int(numrows)
 997           numcols = int(numcols)
 998         except:
 999           document.warning("Unable to determine rows and columns!")
1000           begin_table = end_table
1001           continue
1002
1003         mrstarts = []
1004         multirows = []
1005         # collect info on rows and columns of this table.
1006         begin_row = begin_table
1007         for row in range(numrows):
1008             begin_row = find_token(document.body, '<row>', begin_row, end_table)
1009             if begin_row == -1:
1010               document.warning("Can't find row " + str(row + 1))
1011               break
1012             end_row = find_end_of(document.body, begin_row, '<row>', '</row>')
1013             if end_row == -1:
1014               document.warning("Can't find end of row " + str(row + 1))
1015               break
1016             begin_cell = begin_row
1017             multirows.append([])
1018             for column in range(numcols):
1019                 begin_cell = find_token(document.body, '<cell ', begin_cell, end_row)
1020                 if begin_cell == -1:
1021                   document.warning("Can't find column " + str(column + 1) + \
1022                     "in row " + str(row + 1))
1023                   break
1024                 # NOTE
1025                 # this will fail if someone puts "</cell>" in a cell, but
1026                 # that seems fairly unlikely.
1027                 end_cell = find_end_of(document.body, begin_cell, '<cell', '</cell>')
1028                 if end_cell == -1:
1029                   document.warning("Can't find end of column " + str(column + 1) + \
1030                     "in row " + str(row + 1))
1031                   break
1032                 multirows[row].append([begin_cell, end_cell, 0])
1033                 if document.body[begin_cell].find('multirow="3"') != -1:
1034                   multirows[row][column][2] = 3 # begin multirow
1035                   mrstarts.append([row, column])
1036                 elif document.body[begin_cell].find('multirow="4"') != -1:
1037                   multirows[row][column][2] = 4 # in multirow
1038                 begin_cell = end_cell
1039             begin_row = end_row
1040         # end of table info collection
1041
1042         # work from the back to avoid messing up numbering
1043         mrstarts.reverse()
1044         for m in mrstarts:
1045             row = m[0]
1046             col = m[1]
1047             # get column width
1048             col_width = get_option_value(document.body[begin_table + 2 + col], "width")
1049             # "0pt" means that no width is specified
1050             if not col_width or col_width == "0pt":
1051               col_width = "*"
1052             # determine the number of cells that are part of the multirow
1053             nummrs = 1
1054             for r in range(row + 1, numrows):
1055                 if multirows[r][col][2] != 4:
1056                   break
1057                 nummrs += 1
1058                 # take the opportunity to revert this line
1059                 lineno = multirows[r][col][0]
1060                 document.body[lineno] = document.body[lineno].\
1061                   replace(' multirow="4" ', ' ').\
1062                   replace('valignment="middle"', 'valignment="top"').\
1063                   replace(' topline="true" ', ' ')
1064                 # remove bottom line of previous multirow-part cell
1065                 lineno = multirows[r-1][col][0]
1066                 document.body[lineno] = document.body[lineno].replace(' bottomline="true" ', ' ')
1067             # revert beginning cell
1068             bcell = multirows[row][col][0]
1069             ecell = multirows[row][col][1]
1070             document.body[bcell] = document.body[bcell].\
1071               replace(' multirow="3" ', ' ').\
1072               replace('valignment="middle"', 'valignment="top"')
1073             blay = find_token(document.body, "\\begin_layout", bcell, ecell)
1074             if blay == -1:
1075               document.warning("Can't find layout for cell!")
1076               continue
1077             bend = find_end_of_layout(document.body, blay)
1078             if bend == -1:
1079               document.warning("Can't find end of layout for cell!")
1080               continue
1081             # do the later one first, so as not to mess up the numbering
1082             # we are wrapping the whole cell in this ert
1083             # so before the end of the layout...
1084             document.body[bend:bend] = put_cmd_in_ert("}")
1085             # ...and after the beginning
1086             document.body[blay + 1:blay + 1] = \
1087               put_cmd_in_ert("\\multirow{" + str(nummrs) + "}{" + col_width + "}{")
1088
1089         begin_table = end_table
1090
1091
1092 def convert_math_output(document):
1093     " Convert \html_use_mathml to \html_math_output "
1094     i = find_token(document.header, "\\html_use_mathml", 0)
1095     if i == -1:
1096         return
1097     rgx = re.compile(r'\\html_use_mathml\s+(\w+)')
1098     m = rgx.match(document.header[i])
1099     newval = "0" # MathML
1100     if m:
1101       val = str2bool(m.group(1))
1102       if not val:
1103         newval = "2" # Images
1104     else:
1105       document.warning("Can't match " + document.header[i])
1106     document.header[i] = "\\html_math_output " + newval
1107
1108
1109 def revert_math_output(document):
1110     " Revert \html_math_output to \html_use_mathml "
1111     i = find_token(document.header, "\\html_math_output", 0)
1112     if i == -1:
1113         return
1114     rgx = re.compile(r'\\html_math_output\s+(\d)')
1115     m = rgx.match(document.header[i])
1116     newval = "true"
1117     if m:
1118         val = m.group(1)
1119         if val == "1" or val == "2":
1120             newval = "false"
1121     else:
1122         document.warning("Unable to match " + document.header[i])
1123     document.header[i] = "\\html_use_mathml " + newval
1124
1125
1126
1127 def revert_inset_preview(document):
1128     " Dissolves the preview inset "
1129     i = 0
1130     while True:
1131       i = find_token(document.body, "\\begin_inset Preview", i)
1132       if i == -1:
1133           return
1134       iend = find_end_of_inset(document.body, i)
1135       if iend == -1:
1136           document.warning("Malformed LyX document: Could not find end of Preview inset.")
1137           i += 1
1138           continue
1139
1140       # This has several issues.
1141       # We need to do something about the layouts inside InsetPreview.
1142       # If we just leave the first one, then we have something like:
1143       # \begin_layout Standard
1144       # ...
1145       # \begin_layout Standard
1146       # and we get a "no \end_layout" error. So something has to be done.
1147       # Ideally, we would check if it is the same as the layout we are in.
1148       # If so, we just remove it; if not, we end the active one. But it is
1149       # not easy to know what layout we are in, due to depth changes, etc,
1150       # and it is not clear to me how much work it is worth doing. In most
1151       # cases, the layout will probably be the same.
1152       #
1153       # For the same reason, we have to remove the \end_layout tag at the
1154       # end of the last layout in the inset. Again, that will sometimes be
1155       # wrong, but it will usually be right. To know what to do, we would
1156       # again have to know what layout the inset is in.
1157
1158       blay = find_token(document.body, "\\begin_layout", i, iend)
1159       if blay == -1:
1160           document.warning("Can't find layout for preview inset!")
1161           # always do the later one first...
1162           del document.body[iend]
1163           del document.body[i]
1164           # deletions mean we do not need to reset i
1165           continue
1166
1167       # This is where we would check what layout we are in.
1168       # The check for Standard is definitely wrong.
1169       #
1170       # lay = document.body[blay].split(None, 1)[1]
1171       # if lay != oldlayout:
1172       #     # record a boolean to tell us what to do later....
1173       #     # better to do it later, since (a) it won't mess up
1174       #     # the numbering and (b) we only modify at the end.
1175
1176       # we want to delete the last \\end_layout in this inset, too.
1177       # note that this may not be the \\end_layout that goes with blay!!
1178       bend = find_end_of_layout(document.body, blay)
1179       while True:
1180           tmp = find_token(document.body, "\\end_layout", bend + 1, iend)
1181           if tmp == -1:
1182               break
1183           bend = tmp
1184       if bend == blay:
1185           document.warning("Unable to find last layout in preview inset!")
1186           del document.body[iend]
1187           del document.body[i]
1188           # deletions mean we do not need to reset i
1189           continue
1190       # always do the later one first...
1191       del document.body[iend]
1192       del document.body[bend]
1193       del document.body[i:blay + 1]
1194       # we do not need to reset i
1195
1196
1197 def revert_equalspacing_xymatrix(document):
1198     " Revert a Formula with xymatrix@! to an ERT inset "
1199     i = 0
1200     has_preamble = False
1201     has_equal_spacing = False
1202
1203     while True:
1204       i = find_token(document.body, "\\begin_inset Formula", i)
1205       if i == -1:
1206           break
1207       j = find_end_of_inset(document.body, i)
1208       if j == -1:
1209           document.warning("Malformed LyX document: Could not find end of Formula inset.")
1210           i += 1
1211           continue
1212
1213       for curline in range(i,j):
1214           found = document.body[curline].find("\\xymatrix@!")
1215           if found != -1:
1216               break
1217
1218       if found != -1:
1219           has_equal_spacing = True
1220           content = [document.body[i][21:]]
1221           content += document.body[i + 1:j]
1222           subst = put_cmd_in_ert(content)
1223           document.body[i:j + 1] = subst
1224           i += len(subst) - (j - i) + 1
1225       else:
1226           for curline in range(i,j):
1227               l = document.body[curline].find("\\xymatrix")
1228               if l != -1:
1229                   has_preamble = True;
1230                   break;
1231           i = j + 1
1232
1233     if has_equal_spacing and not has_preamble:
1234         add_to_preamble(document, ['\\usepackage[all]{xy}'])
1235
1236
1237 def revert_notefontcolor(document):
1238     " Reverts greyed-out note font color to preamble code "
1239
1240     i = find_token(document.header, "\\notefontcolor", 0)
1241     if i == -1:
1242         return
1243
1244     colorcode = get_value(document.header, '\\notefontcolor', i)
1245     del document.header[i]
1246
1247     # are there any grey notes?
1248     if find_token(document.body, "\\begin_inset Note Greyedout", 0) == -1:
1249         # no need to do anything else, and \renewcommand will throw
1250         # an error since lyxgreyedout will not exist.
1251         return
1252
1253     # the color code is in the form #rrggbb where every character denotes a hex number
1254     red = hex2ratio(colorcode[1:3])
1255     green = hex2ratio(colorcode[3:5])
1256     blue = hex2ratio(colorcode[5:7])
1257     # write the preamble
1258     insert_to_preamble(document,
1259       [ '%  for greyed-out notes',
1260         '\\@ifundefined{definecolor}{\\usepackage{color}}{}'
1261         '\\definecolor{note_fontcolor}{rgb}{%s,%s,%s}' % (red, green, blue),
1262         '\\renewenvironment{lyxgreyedout}',
1263         ' {\\textcolor{note_fontcolor}\\bgroup}{\\egroup}'])
1264
1265
1266 def revert_turkmen(document):
1267     "Set language Turkmen to English"
1268
1269     if document.language == "turkmen":
1270         document.language = "english"
1271         i = find_token(document.header, "\\language", 0)
1272         if i != -1:
1273             document.header[i] = "\\language english"
1274
1275     j = 0
1276     while True:
1277         j = find_token(document.body, "\\lang turkmen", j)
1278         if j == -1:
1279             return
1280         document.body[j] = document.body[j].replace("\\lang turkmen", "\\lang english")
1281         j += 1
1282
1283
1284 def revert_fontcolor(document):
1285     " Reverts font color to preamble code "
1286     i = find_token(document.header, "\\fontcolor", 0)
1287     if i == -1:
1288         return
1289     colorcode = get_value(document.header, '\\fontcolor', i)
1290     del document.header[i]
1291     # don't clutter the preamble if font color is not set
1292     if colorcode == "#000000":
1293         return
1294     # the color code is in the form #rrggbb where every character denotes a hex number
1295     red = hex2ratio(colorcode[1:3])
1296     green = hex2ratio(colorcode[3:5])
1297     blue = hex2ratio(colorcode[5:7])
1298     # write the preamble
1299     insert_to_preamble(document,
1300       ['%  Set the font color',
1301       '\\@ifundefined{definecolor}{\\usepackage{color}}{}',
1302       '\\definecolor{document_fontcolor}{rgb}{%s,%s,%s}' % (red, green, blue),
1303       '\\color{document_fontcolor}'])
1304
1305
1306 def revert_shadedboxcolor(document):
1307     " Reverts shaded box color to preamble code "
1308     i = find_token(document.header, "\\boxbgcolor", 0)
1309     if i == -1:
1310         return
1311     colorcode = get_value(document.header, '\\boxbgcolor', i)
1312     del document.header[i]
1313     # the color code is in the form #rrggbb
1314     red = hex2ratio(colorcode[1:3])
1315     green = hex2ratio(colorcode[3:5])
1316     blue = hex2ratio(colorcode[5:7])
1317     # write the preamble
1318     insert_to_preamble(document,
1319       ['%  Set the color of boxes with shaded background',
1320       '\\@ifundefined{definecolor}{\\usepackage{color}}{}',
1321       "\\definecolor{shadecolor}{rgb}{%s,%s,%s}" % (red, green, blue)])
1322
1323
1324 def revert_lyx_version(document):
1325     " Reverts LyX Version information from Inset Info "
1326     version = "LyX version"
1327     try:
1328         import lyx2lyx_version
1329         version = lyx2lyx_version.version
1330     except:
1331         pass
1332
1333     i = 0
1334     while True:
1335         i = find_token(document.body, '\\begin_inset Info', i)
1336         if i == -1:
1337             return
1338         j = find_end_of_inset(document.body, i + 1)
1339         if j == -1:
1340             document.warning("Malformed LyX document: Could not find end of Info inset.")
1341             i += 1
1342             continue
1343
1344         # We expect:
1345         # \begin_inset Info
1346         # type  "lyxinfo"
1347         # arg   "version"
1348         # \end_inset
1349         typ = get_quoted_value(document.body, "type", i, j)
1350         arg = get_quoted_value(document.body, "arg", i, j)
1351         if arg != "version" or typ != "lyxinfo":
1352             i = j + 1
1353             continue
1354
1355         # We do not actually know the version of LyX used to produce the document.
1356         # But we can use our version, since we are reverting.
1357         s = [version]
1358         # Now we want to check if the line after "\end_inset" is empty. It normally
1359         # is, so we want to remove it, too.
1360         lastline = j + 1
1361         if document.body[j + 1].strip() == "":
1362             lastline = j + 2
1363         document.body[i: lastline] = s
1364         i = i + 1
1365
1366
1367 def revert_math_scale(document):
1368   " Remove math scaling and LaTeX options "
1369   del_token(document.header, '\\html_math_img_scale', 0)
1370   del_token(document.header, '\\html_latex_start', 0)
1371   del_token(document.header, '\\html_latex_end', 0)
1372
1373
1374 def revert_pagesizes(document):
1375   " Revert page sizes to default "
1376   i = find_token(document.header, '\\papersize', 0)
1377   if i != -1:
1378     size = document.header[i][11:]
1379     if size == "a0paper" or size == "a1paper" or size == "a2paper" \
1380     or size == "a6paper" or size == "b0paper" or size == "b1paper" \
1381     or size == "b2paper" or size == "b6paper" or size == "b0j" \
1382     or size == "b1j" or size == "b2j" or size == "b3j" or size == "b4j" \
1383     or size == "b5j" or size == "b6j":
1384       del document.header[i]
1385
1386
1387 def revert_DIN_C_pagesizes(document):
1388   " Revert DIN C page sizes to default "
1389   i = find_token(document.header, '\\papersize', 0)
1390   if i != -1:
1391     size = document.header[i][11:]
1392     if size == "c0paper" or size == "c1paper" or size == "c2paper" \
1393     or size == "c3paper" or size == "c4paper" or size == "c5paper" \
1394     or size == "c6paper":
1395       del document.header[i]
1396
1397
1398 def convert_html_quotes(document):
1399   " Remove quotes around html_latex_start and html_latex_end "
1400
1401   i = find_token(document.header, '\\html_latex_start', 0)
1402   if i != -1:
1403     line = document.header[i]
1404     l = re.compile(r'\\html_latex_start\s+"(.*)"')
1405     m = l.match(line)
1406     if m:
1407       document.header[i] = "\\html_latex_start " + m.group(1)
1408
1409   i = find_token(document.header, '\\html_latex_end', 0)
1410   if i != -1:
1411     line = document.header[i]
1412     l = re.compile(r'\\html_latex_end\s+"(.*)"')
1413     m = l.match(line)
1414     if m:
1415       document.header[i] = "\\html_latex_end " + m.group(1)
1416
1417
1418 def revert_html_quotes(document):
1419   " Remove quotes around html_latex_start and html_latex_end "
1420
1421   i = find_token(document.header, '\\html_latex_start', 0)
1422   if i != -1:
1423     line = document.header[i]
1424     l = re.compile(r'\\html_latex_start\s+(.*)')
1425     m = l.match(line)
1426     if not m:
1427         document.warning("Weird html_latex_start line: " + line)
1428         del document.header[i]
1429     else:
1430         document.header[i] = "\\html_latex_start \"" + m.group(1) + "\""
1431
1432   i = find_token(document.header, '\\html_latex_end', 0)
1433   if i != -1:
1434     line = document.header[i]
1435     l = re.compile(r'\\html_latex_end\s+(.*)')
1436     m = l.match(line)
1437     if not m:
1438         document.warning("Weird html_latex_end line: " + line)
1439         del document.header[i]
1440     else:
1441         document.header[i] = "\\html_latex_end \"" + m.group(1) + "\""
1442
1443
1444 def revert_output_sync(document):
1445   " Remove forward search options "
1446   del_token(document.header, '\\output_sync_macro', 0)
1447   del_token(document.header, '\\output_sync', 0)
1448
1449
1450 def revert_align_decimal(document):
1451   i = 0
1452   while True:
1453     i = find_token(document.body, "\\begin_inset Tabular", i)
1454     if i == -1:
1455       return
1456     j = find_end_of_inset(document.body, i)
1457     if j == -1:
1458       document.warning("Unable to find end of Tabular inset at line " + str(i))
1459       i += 1
1460       continue
1461     cell = find_token(document.body, "<cell", i, j)
1462     if cell == -1:
1463       document.warning("Can't find any cells in Tabular inset at line " + str(i))
1464       i = j
1465       continue
1466     k = i + 1
1467     while True:
1468       k = find_token(document.body, "<column", k, cell)
1469       if k == -1:
1470         return
1471       if document.body[k].find('alignment="decimal"') == -1:
1472         k += 1
1473         continue
1474       remove_option(document.body, k, 'decimal_point')
1475       document.body[k] = \
1476         document.body[k].replace('alignment="decimal"', 'alignment="center"')
1477       k += 1
1478
1479
1480 def convert_optarg(document):
1481   " Convert \\begin_inset OptArg to \\begin_inset Argument "
1482   i = 0
1483   while True:
1484     i = find_token(document.body, '\\begin_inset OptArg', i)
1485     if i == -1:
1486       return
1487     document.body[i] = "\\begin_inset Argument"
1488     i += 1
1489
1490
1491 def revert_argument(document):
1492   " Convert \\begin_inset Argument to \\begin_inset OptArg "
1493   i = 0
1494   while True:
1495     i = find_token(document.body, '\\begin_inset Argument', i)
1496     if i == -1:
1497       return
1498     document.body[i] = "\\begin_inset OptArg"
1499     i += 1
1500
1501
1502 def revert_makebox(document):
1503   " Convert \\makebox to TeX code "
1504   i = 0
1505   while True:
1506     i = find_token(document.body, '\\begin_inset Box', i)
1507     if i == -1:
1508       break
1509     z = find_end_of_inset(document.body, i)
1510     if z == -1:
1511       document.warning("Malformed LyX document: Can't find end of box inset.")
1512       i += 1
1513       continue
1514     blay = find_token(document.body, "\\begin_layout", i, z)
1515     if blay == -1:
1516       document.warning("Malformed LyX document: Can't find layout in box.")
1517       i = z
1518       continue
1519     j = find_token(document.body, 'use_makebox', i)
1520     if j == -1 or j != i +6:
1521       document.warning("Malformed LyX document: Can't find use_makebox statement in box.")
1522       i = z
1523       continue
1524     # delete use_makebox
1525     if not check_token(document.body[i], "\\begin_inset Box Frameless") \
1526       or get_value(document.body, 'use_makebox', j) != 1:
1527         del document.body[j]
1528         i += 1
1529         continue
1530     bend = find_end_of_layout(document.body, blay)
1531     if bend == -1 or bend > z:
1532         document.warning("Malformed LyX document: Can't find end of layout in box.")
1533         i = z
1534         continue
1535     # determine the alignment
1536     align = get_quoted_value(document.body, 'hor_pos', i, blay, "c")
1537     # determine the width
1538     length = get_quoted_value(document.body, 'width', i, blay, "50col%")
1539     length = latex_length(length)[1]
1540     # remove the \end_layout \end_inset pair
1541     document.body[bend:z + 1] = put_cmd_in_ert("}")
1542     subst = "\\makebox[" + length + "][" \
1543       + align + "]{"
1544     document.body[i:blay + 1] = put_cmd_in_ert(subst)
1545     i += 1
1546
1547
1548 def convert_use_makebox(document):
1549   " Adds use_makebox option for boxes "
1550   i = 0
1551   while True:
1552     i = find_token(document.body, '\\begin_inset Box', i)
1553     if i == -1:
1554       return
1555     k = find_token(document.body, 'use_parbox', i)
1556     if k == -1 or k != i + 5:
1557       document.warning("Malformed LyX document: Can't find use_parbox statement in box.")
1558       i += 1
1559       continue
1560     if k == i + 5:
1561       document.body.insert(k + 1, "use_makebox 0")
1562     i += 1
1563
1564
1565 def revert_IEEEtran(document):
1566   " Convert IEEEtran layouts and styles to TeX code "
1567
1568   if document.textclass != "IEEEtran":
1569     return
1570
1571   revert_flex_inset(document.body, "IEEE membership", "\\IEEEmembership")
1572   revert_flex_inset(document.body, "Lowercase", "\\MakeLowercase")
1573
1574   layouts = ("Special Paper Notice", "After Title Text", "Publication ID",
1575              "Page headings", "Biography without photo")
1576   latexcmd = {"Special Paper Notice": "\\IEEEspecialpapernotice",
1577               "After Title Text":     "\\IEEEaftertitletext",
1578               "Publication ID":       "\\IEEEpubid"}
1579   obsoletedby = {"Page headings":            "MarkBoth",
1580                  "Biography without photo":  "BiographyNoPhoto"}
1581
1582   for layout in layouts:
1583     i = 0
1584     while True:
1585         i = find_token(document.body, '\\begin_layout ' + layout, i)
1586         if i == -1:
1587           break
1588         j = find_end_of_layout(document.body, i)
1589         if j == -1:
1590           document.warning("Malformed LyX document: Can't find end of " + layout + " layout.")
1591           i += 1
1592           continue
1593         if layout in list(obsoletedby.keys()):
1594           document.body[i] = "\\begin_layout " + obsoletedby[layout]
1595           i = j
1596           continue
1597         content = lyx2latex(document, document.body[i:j + 1])
1598         add_to_preamble(document, [latexcmd[layout] + "{" + content + "}"])
1599         del document.body[i:j + 1]
1600         # no need to reset i
1601
1602
1603 def convert_prettyref(document):
1604         " Converts prettyref references to neutral formatted refs "
1605         re_ref = re.compile("^\s*reference\s+\"(\w+):(\S+)\"")
1606         nm_ref = re.compile("^\s*name\s+\"(\w+):(\S+)\"")
1607
1608         i = 0
1609         while True:
1610                 i = find_token(document.body, "\\begin_inset CommandInset ref", i)
1611                 if i == -1:
1612                         break
1613                 j = find_end_of_inset(document.body, i)
1614                 if j == -1:
1615                         document.warning("Malformed LyX document: No end of InsetRef!")
1616                         i += 1
1617                         continue
1618                 k = find_token(document.body, "LatexCommand prettyref", i, j)
1619                 if k != -1:
1620                         document.body[k] = "LatexCommand formatted"
1621                 i = j + 1
1622         document.header.insert(-1, "\\use_refstyle 0")
1623
1624
1625 def revert_refstyle(document):
1626         " Reverts neutral formatted refs to prettyref "
1627         re_ref = re.compile("^reference\s+\"(\w+):(\S+)\"")
1628         nm_ref = re.compile("^\s*name\s+\"(\w+):(\S+)\"")
1629
1630         i = 0
1631         while True:
1632                 i = find_token(document.body, "\\begin_inset CommandInset ref", i)
1633                 if i == -1:
1634                         break
1635                 j = find_end_of_inset(document.body, i)
1636                 if j == -1:
1637                         document.warning("Malformed LyX document: No end of InsetRef")
1638                         i += 1
1639                         continue
1640                 k = find_token(document.body, "LatexCommand formatted", i, j)
1641                 if k != -1:
1642                         document.body[k] = "LatexCommand prettyref"
1643                 i = j + 1
1644         i = find_token(document.header, "\\use_refstyle", 0)
1645         if i != -1:
1646                 document.header.pop(i)
1647
1648
1649 def revert_nameref(document):
1650   " Convert namerefs to regular references "
1651   cmds = ["Nameref", "nameref"]
1652   foundone = False
1653   rx = re.compile(r'reference "(.*)"')
1654   for cmd in cmds:
1655     i = 0
1656     oldcmd = "LatexCommand " + cmd
1657     while True:
1658       # It seems better to look for this, as most of the reference
1659       # insets won't be ones we care about.
1660       i = find_token(document.body, oldcmd, i)
1661       if i == -1:
1662         break
1663       cmdloc = i
1664       i += 1
1665       # Make sure it is actually in an inset!
1666       # A normal line could begin with "LatexCommand nameref"!
1667       val = is_in_inset(document.body, cmdloc, \
1668           "\\begin_inset CommandInset ref")
1669       if not val:
1670           continue
1671       stins, endins = val
1672
1673       # ok, so it is in an InsetRef
1674       refline = find_token(document.body, "reference", stins, endins)
1675       if refline == -1:
1676         document.warning("Can't find reference for inset at line " + stinst + "!!")
1677         continue
1678       m = rx.match(document.body[refline])
1679       if not m:
1680         document.warning("Can't match reference line: " + document.body[ref])
1681         continue
1682       foundone = True
1683       ref = m.group(1)
1684       newcontent = put_cmd_in_ert('\\' + cmd + '{' + ref + '}')
1685       document.body[stins:endins + 1] = newcontent
1686
1687   if foundone:
1688     add_to_preamble(document, ["\\usepackage{nameref}"])
1689
1690
1691 def remove_Nameref(document):
1692   " Convert Nameref commands to nameref commands "
1693   i = 0
1694   while True:
1695     # It seems better to look for this, as most of the reference
1696     # insets won't be ones we care about.
1697     i = find_token(document.body, "LatexCommand Nameref" , i)
1698     if i == -1:
1699       break
1700     cmdloc = i
1701     i += 1
1702
1703     # Make sure it is actually in an inset!
1704     val = is_in_inset(document.body, cmdloc, \
1705         "\\begin_inset CommandInset ref")
1706     if not val:
1707       continue
1708     document.body[cmdloc] = "LatexCommand nameref"
1709
1710
1711 def revert_mathrsfs(document):
1712     " Load mathrsfs if \mathrsfs us use in the document "
1713     i = 0
1714     for line in document.body:
1715       if line.find("\\mathscr{") != -1:
1716         add_to_preamble(document, ["\\usepackage{mathrsfs}"])
1717         return
1718
1719
1720 def convert_flexnames(document):
1721     "Convert \\begin_inset Flex Custom:Style to \\begin_inset Flex Style and similarly for CharStyle and Element."
1722
1723     i = 0
1724     rx = re.compile(r'^\\begin_inset Flex (?:Custom|CharStyle|Element):(.+)$')
1725     while True:
1726       i = find_token(document.body, "\\begin_inset Flex", i)
1727       if i == -1:
1728         return
1729       m = rx.match(document.body[i])
1730       if m:
1731         document.body[i] = "\\begin_inset Flex " + m.group(1)
1732       i += 1
1733
1734
1735 flex_insets = {
1736   "Alert" : "CharStyle:Alert",
1737   "Code" : "CharStyle:Code",
1738   "Concepts" : "CharStyle:Concepts",
1739   "E-Mail" : "CharStyle:E-Mail",
1740   "Emph" : "CharStyle:Emph",
1741   "Expression" : "CharStyle:Expression",
1742   "Initial" : "CharStyle:Initial",
1743   "Institute" : "CharStyle:Institute",
1744   "Meaning" : "CharStyle:Meaning",
1745   "Noun" : "CharStyle:Noun",
1746   "Strong" : "CharStyle:Strong",
1747   "Structure" : "CharStyle:Structure",
1748   "ArticleMode" : "Custom:ArticleMode",
1749   "Endnote" : "Custom:Endnote",
1750   "Glosse" : "Custom:Glosse",
1751   "PresentationMode" : "Custom:PresentationMode",
1752   "Tri-Glosse" : "Custom:Tri-Glosse"
1753 }
1754
1755 flex_elements = {
1756   "Abbrev" : "Element:Abbrev",
1757   "CCC-Code" : "Element:CCC-Code",
1758   "Citation-number" : "Element:Citation-number",
1759   "City" : "Element:City",
1760   "Code" : "Element:Code",
1761   "CODEN" : "Element:CODEN",
1762   "Country" : "Element:Country",
1763   "Day" : "Element:Day",
1764   "Directory" : "Element:Directory",
1765   "Dscr" : "Element:Dscr",
1766   "Email" : "Element:Email",
1767   "Emph" : "Element:Emph",
1768   "Filename" : "Element:Filename",
1769   "Firstname" : "Element:Firstname",
1770   "Fname" : "Element:Fname",
1771   "GuiButton" : "Element:GuiButton",
1772   "GuiMenu" : "Element:GuiMenu",
1773   "GuiMenuItem" : "Element:GuiMenuItem",
1774   "ISSN" : "Element:ISSN",
1775   "Issue-day" : "Element:Issue-day",
1776   "Issue-months" : "Element:Issue-months",
1777   "Issue-number" : "Element:Issue-number",
1778   "KeyCap" : "Element:KeyCap",
1779   "KeyCombo" : "Element:KeyCombo",
1780   "Keyword" : "Element:Keyword",
1781   "Literal" : "Element:Literal",
1782   "MenuChoice" : "Element:MenuChoice",
1783   "Month" : "Element:Month",
1784   "Orgdiv" : "Element:Orgdiv",
1785   "Orgname" : "Element:Orgname",
1786   "Postcode" : "Element:Postcode",
1787   "SS-Code" : "Element:SS-Code",
1788   "SS-Title" : "Element:SS-Title",
1789   "State" : "Element:State",
1790   "Street" : "Element:Street",
1791   "Surname" : "Element:Surname",
1792   "Volume" : "Element:Volume",
1793   "Year" : "Element:Year"
1794 }
1795
1796
1797 def revert_flexnames(document):
1798   if document.backend == "latex":
1799     flexlist = flex_insets
1800   else:
1801     flexlist = flex_elements
1802
1803   rx = re.compile(r'^\\begin_inset Flex\s+(.+)$')
1804   i = 0
1805   while True:
1806     i = find_token(document.body, "\\begin_inset Flex", i)
1807     if i == -1:
1808       return
1809     m = rx.match(document.body[i])
1810     if not m:
1811       document.warning("Illegal flex inset: " + document.body[i])
1812       i += 1
1813       continue
1814     style = m.group(1)
1815     if style in flexlist:
1816       document.body[i] = "\\begin_inset Flex " + flexlist[style]
1817     i += 1
1818
1819
1820 def convert_mathdots(document):
1821     " Load mathdots automatically "
1822     i = find_token(document.header, "\\use_mhchem" , 0)
1823     if i == -1:
1824         i = find_token(document.header, "\\use_esint" , 0)
1825     if i == -1:
1826         document.warning("Malformed LyX document: Can't find \\use_mhchem.")
1827         return;
1828     j = find_token(document.preamble, "\\usepackage{mathdots}", 0)
1829     if j == -1:
1830         document.header.insert(i + 1, "\\use_mathdots 0")
1831     else:
1832         document.header.insert(i + 1, "\\use_mathdots 2")
1833         del document.preamble[j]
1834
1835
1836 def revert_mathdots(document):
1837     " Load mathdots if used in the document "
1838
1839     mathdots = find_token(document.header, "\\use_mathdots" , 0)
1840     if mathdots == -1:
1841       document.warning("No \\use_mathdots line. Assuming auto.")
1842     else:
1843       val = get_value(document.header, "\\use_mathdots", mathdots)
1844       del document.header[mathdots]
1845       try:
1846         usedots = int(val)
1847       except:
1848         document.warning("Invalid \\use_mathdots value: " + val + ". Assuming auto.")
1849         # probably usedots has not been changed, but be safe.
1850         usedots = 1
1851
1852       if usedots == 0:
1853         # do not load case
1854         return
1855       if usedots == 2:
1856         # force load case
1857         add_to_preamble(document, ["\\usepackage{mathdots}"])
1858         return
1859
1860     # so we are in the auto case. we want to load mathdots if \iddots is used.
1861     i = 0
1862     while True:
1863       i = find_token(document.body, '\\begin_inset Formula', i)
1864       if i == -1:
1865         return
1866       j = find_end_of_inset(document.body, i)
1867       if j == -1:
1868         document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(i))
1869         i += 1
1870         continue
1871       code = "\n".join(document.body[i:j])
1872       if code.find("\\iddots") != -1:
1873         add_to_preamble(document, ["\\@ifundefined{iddots}{\\usepackage{mathdots}}"])
1874         return
1875       i = j
1876
1877
1878 def convert_rule(document):
1879     " Convert \\lyxline to CommandInset line. "
1880     i = 0
1881
1882     inset = ['\\begin_inset CommandInset line',
1883       'LatexCommand rule',
1884       'offset "0.5ex"',
1885       'width "100line%"',
1886       'height "1pt"', '',
1887       '\\end_inset', '', '']
1888
1889     # if paragraphs are indented, we may have to unindent to get the
1890     # line to be full-width.
1891     indent = get_value(document.header, "\\paragraph_separation", 0)
1892     have_indent = (indent == "indent")
1893
1894     while True:
1895       i = find_token(document.body, "\\lyxline" , i)
1896       if i == -1:
1897         return
1898
1899       # we need to find out if this line follows other content
1900       # in its paragraph. find its layout....
1901       lastlay = find_token_backwards(document.body, "\\begin_layout", i)
1902       if lastlay == -1:
1903         document.warning("Can't find layout for line at " + str(i))
1904         # do the best we can.
1905         document.body[i:i+1] = inset
1906         i += len(inset)
1907         continue
1908
1909       # ...and look for other content before it.
1910       lineisfirst = True
1911       for line in document.body[lastlay + 1:i]:
1912         # is it empty or a paragraph option?
1913         if not line or line[0] == '\\':
1914           continue
1915         lineisfirst = False
1916         break
1917
1918       if lineisfirst:
1919         document.body[i:i+1] = inset
1920         if indent:
1921           # we need to unindent, lest the line be too long
1922           document.body.insert(lastlay + 1, "\\noindent")
1923         i += len(inset)
1924       else:
1925         # so our line is in the middle of a paragraph
1926         # we need to add a new line, lest this line follow the
1927         # other content on that line and run off the side of the page
1928         document.body[i:i+1] = inset
1929         document.body[i:i] = ["\\begin_inset Newline newline", "\\end_inset", ""]
1930       i += len(inset)
1931
1932
1933 def revert_rule(document):
1934     " Revert line insets to Tex code "
1935     i = 0
1936     while True:
1937       i = find_token(document.body, "\\begin_inset CommandInset line" , i)
1938       if i == -1:
1939         return
1940       # find end of inset
1941       j = find_token(document.body, "\\end_inset" , i)
1942       if j == -1:
1943         document.warning("Malformed LyX document: Can't find end of line inset.")
1944         return
1945       # determine the optional offset
1946       offset = get_quoted_value(document.body, 'offset', i, j)
1947       if offset:
1948         offset = '[' + offset + ']'
1949       # determine the width
1950       width = get_quoted_value(document.body, 'width', i, j, "100col%")
1951       width = latex_length(width)[1]
1952       # determine the height
1953       height = get_quoted_value(document.body, 'height', i, j, "1pt")
1954       height = latex_length(height)[1]
1955       # output the \rule command
1956       subst = "\\rule[" + offset + "]{" + width + "}{" + height + "}"
1957       document.body[i:j + 1] = put_cmd_in_ert(subst)
1958       i += len(subst) - (j - i)
1959
1960
1961 def revert_diagram(document):
1962   " Add the feyn package if \\Diagram is used in math "
1963   i = 0
1964   while True:
1965     i = find_token(document.body, '\\begin_inset Formula', i)
1966     if i == -1:
1967       return
1968     j = find_end_of_inset(document.body, i)
1969     if j == -1:
1970         document.warning("Malformed LyX document: Can't find end of Formula inset.")
1971         return
1972     lines = "\n".join(document.body[i:j])
1973     if lines.find("\\Diagram") == -1:
1974       i = j
1975       continue
1976     add_to_preamble(document, ["\\usepackage{feyn}"])
1977     # only need to do it once!
1978     return
1979
1980 chapters = ("amsbook", "book", "docbook-book", "elsart", "extbook", "extreport",
1981     "jbook", "jreport", "jsbook", "literate-book", "literate-report", "memoir",
1982     "mwbk", "mwrep", "recipebook", "report", "scrbook", "scrreprt", "svmono",
1983     "svmult", "tbook", "treport", "tufte-book")
1984
1985 def convert_bibtex_clearpage(document):
1986   " insert a clear(double)page bibliographystyle if bibtotoc option is used "
1987
1988   if document.textclass not in chapters:
1989     return
1990
1991   i = find_token(document.header, '\\papersides', 0)
1992   sides = 0
1993   if i == -1:
1994     document.warning("Malformed LyX document: Can't find papersides definition.")
1995     document.warning("Assuming single sided.")
1996     sides = 1
1997   else:
1998     val = get_value(document.header, "\\papersides", i)
1999     try:
2000       sides = int(val)
2001     except:
2002       pass
2003     if sides != 1 and sides != 2:
2004       document.warning("Invalid papersides value: " + val)
2005       document.warning("Assuming single sided.")
2006       sides = 1
2007
2008   j = 0
2009   while True:
2010     j = find_token(document.body, "\\begin_inset CommandInset bibtex", j)
2011     if j == -1:
2012       return
2013
2014     k = find_end_of_inset(document.body, j)
2015     if k == -1:
2016       document.warning("Can't find end of Bibliography inset at line " + str(j))
2017       j += 1
2018       continue
2019
2020     # only act if there is the option "bibtotoc"
2021     val = get_value(document.body, 'options', j, k)
2022     if not val:
2023       document.warning("Can't find options for bibliography inset at line " + str(j))
2024       j = k
2025       continue
2026
2027     if val.find("bibtotoc") == -1:
2028       j = k
2029       continue
2030
2031     # so we want to insert a new page right before the paragraph that
2032     # this bibliography thing is in.
2033     lay = find_token_backwards(document.body, "\\begin_layout", j)
2034     if lay == -1:
2035       document.warning("Can't find layout containing bibliography inset at line " + str(j))
2036       j = k
2037       continue
2038
2039     if sides == 1:
2040       cmd = "clearpage"
2041     else:
2042       cmd = "cleardoublepage"
2043     subst = ['\\begin_layout Standard',
2044         '\\begin_inset Newpage ' + cmd,
2045         '\\end_inset', '', '',
2046         '\\end_layout', '']
2047     document.body[lay:lay] = subst
2048     j = k + len(subst)
2049
2050
2051 def check_passthru(document):
2052   tc = document.textclass
2053   ok = (tc == "literate-article" or tc == "literate-book" or tc == "literate-report")
2054   if not ok:
2055     mods = document.get_module_list()
2056     for mod in mods:
2057       if mod == "sweave" or mod == "noweb":
2058         ok = True
2059         break
2060   return ok
2061
2062
2063 def convert_passthru(document):
2064     " http://www.mail-archive.com/lyx-devel@lists.lyx.org/msg161298.html "
2065     if not check_passthru:
2066       return
2067
2068     rx = re.compile("\\\\begin_layout \s*(\w+)")
2069     beg = 0
2070     for lay in ["Chunk", "Scrap"]:
2071       while True:
2072         beg = find_token(document.body, "\\begin_layout " + lay, beg)
2073         if beg == -1:
2074           break
2075         end = find_end_of_layout(document.body, beg)
2076         if end == -1:
2077           document.warning("Can't find end of layout at line " + str(beg))
2078           beg += 1
2079           continue
2080
2081         # we are now going to replace newline insets within this layout
2082         # by new instances of this layout. so we have repeated layouts
2083         # instead of newlines.
2084
2085         # if the paragraph has any customization, however, we do not want to
2086         # do the replacement.
2087         if document.body[beg + 1].startswith("\\"):
2088           beg = end + 1
2089           continue
2090
2091         ns = beg
2092         while True:
2093           ns = find_token(document.body, "\\begin_inset Newline newline", ns, end)
2094           if ns == -1:
2095             break
2096           ne = find_end_of_inset(document.body, ns)
2097           if ne == -1 or ne > end:
2098             document.warning("Can't find end of inset at line " + str(nb))
2099             ns += 1
2100             continue
2101           if document.body[ne + 1] == "":
2102             ne += 1
2103           subst = ["\\end_layout", "", "\\begin_layout " + lay]
2104           document.body[ns:ne + 1] = subst
2105           # now we need to adjust end, in particular, but might as well
2106           # do ns properly, too
2107           newlines = (ne - ns) - len(subst)
2108           ns += newlines + 2
2109           end += newlines + 2
2110
2111         # ok, we now want to find out if the next layout is the
2112         # same as this one. if so, we will insert an extra copy of it
2113         didit = False
2114         next = find_token(document.body, "\\begin_layout", end)
2115         if next != -1:
2116           m = rx.match(document.body[next])
2117           if m:
2118             nextlay = m.group(1)
2119             if nextlay == lay:
2120               subst = ["\\begin_layout " + lay, "", "\\end_layout", ""]
2121               document.body[next:next] = subst
2122               didit = True
2123         beg = end + 1
2124         if didit:
2125           beg += 4 # for the extra layout
2126
2127
2128 def revert_passthru(document):
2129     " http://www.mail-archive.com/lyx-devel@lists.lyx.org/msg161298.html "
2130     if not check_passthru:
2131       return
2132     rx = re.compile("\\\\begin_layout \s*(\w+)")
2133     beg = 0
2134     for lay in ["Chunk", "Scrap"]:
2135       while True:
2136         beg = find_token(document.body, "\\begin_layout " + lay, beg)
2137         if beg == -1:
2138           break
2139         end = find_end_of_layout(document.body, beg)
2140         if end == -1:
2141           document.warning("Can't find end of layout at line " + str(beg))
2142           beg += 1
2143           continue
2144
2145         # we now want to find out if the next layout is the
2146         # same as this one. but we will need to do this over and
2147         # over again.
2148         while True:
2149           next = find_token(document.body, "\\begin_layout", end)
2150           if next == -1:
2151             break
2152           m = rx.match(document.body[next])
2153           if not m:
2154             break
2155           nextlay = m.group(1)
2156           if nextlay != lay:
2157             break
2158           # so it is the same layout again. we now want to know if it is empty.
2159           # but first let's check and make sure there is no content between the
2160           # two layouts. i'm not sure if that can happen or not.
2161           for l in range(end + 1, next):
2162             if document.body[l] != "":
2163               document.warning("Found content between adjacent " + lay + " layouts!")
2164               break
2165           nextend = find_end_of_layout(document.body, next)
2166           if nextend == -1:
2167             document.warning("Can't find end of layout at line " + str(next))
2168             break
2169           empty = True
2170           for l in range(next + 1, nextend):
2171             if document.body[l] != "":
2172               empty = False
2173               break
2174           if empty:
2175             # empty layouts just get removed
2176             # should we check if it's before yet another such layout?
2177             del document.body[next : nextend + 1]
2178             # and we do not want to check again. we know the next layout
2179             # should be another Chunk and should be left as is.
2180             break
2181           else:
2182             # if it's not empty, then we want to insert a newline in place
2183             # of the layout switch
2184             subst = ["\\begin_inset Newline newline", "\\end_inset", ""]
2185             document.body[end : next + 1] = subst
2186             # and now we have to find the end of the new, larger layout
2187             newend = find_end_of_layout(document.body, beg)
2188             if newend == -1:
2189               document.warning("Can't find end of new layout at line " + str(beg))
2190               break
2191             end = newend
2192         beg = end + 1
2193
2194
2195 def revert_multirowOffset(document):
2196     " Revert multirow cells with offset in tables to TeX-code"
2197     # this routine is the same as the revert_multirow routine except that
2198     # it checks additionally for the offset
2199
2200     # first, let's find out if we need to do anything
2201     i = find_token(document.body, '<cell multirow="3" mroffset=', 0)
2202     if i == -1:
2203       return
2204
2205     add_to_preamble(document, ["\\usepackage{multirow}"])
2206
2207     rgx = re.compile(r'mroffset="[^"]+?"')
2208     begin_table = 0
2209
2210     while True:
2211         # find begin/end of table
2212         begin_table = find_token(document.body, '<lyxtabular version=', begin_table)
2213         if begin_table == -1:
2214             break
2215         end_table = find_end_of(document.body, begin_table, '<lyxtabular', '</lyxtabular>')
2216         if end_table == -1:
2217             document.warning("Malformed LyX document: Could not find end of table.")
2218             begin_table += 1
2219             continue
2220         # does this table have multirow?
2221         i = find_token(document.body, '<cell multirow="3"', begin_table, end_table)
2222         if i == -1:
2223             begin_table = end_table
2224             continue
2225
2226         # store the number of rows and columns
2227         numrows = get_option_value(document.body[begin_table], "rows")
2228         numcols = get_option_value(document.body[begin_table], "columns")
2229         try:
2230           numrows = int(numrows)
2231           numcols = int(numcols)
2232         except:
2233           document.warning("Unable to determine rows and columns!")
2234           begin_table = end_table
2235           continue
2236
2237         mrstarts = []
2238         multirows = []
2239         # collect info on rows and columns of this table.
2240         begin_row = begin_table
2241         for row in range(numrows):
2242             begin_row = find_token(document.body, '<row>', begin_row, end_table)
2243             if begin_row == -1:
2244               document.warning("Can't find row " + str(row + 1))
2245               break
2246             end_row = find_end_of(document.body, begin_row, '<row>', '</row>')
2247             if end_row == -1:
2248               document.warning("Can't find end of row " + str(row + 1))
2249               break
2250             begin_cell = begin_row
2251             multirows.append([])
2252             for column in range(numcols):
2253                 begin_cell = find_token(document.body, '<cell ', begin_cell, end_row)
2254                 if begin_cell == -1:
2255                   document.warning("Can't find column " + str(column + 1) + \
2256                     "in row " + str(row + 1))
2257                   break
2258                 # NOTE
2259                 # this will fail if someone puts "</cell>" in a cell, but
2260                 # that seems fairly unlikely.
2261                 end_cell = find_end_of(document.body, begin_cell, '<cell', '</cell>')
2262                 if end_cell == -1:
2263                   document.warning("Can't find end of column " + str(column + 1) + \
2264                     "in row " + str(row + 1))
2265                   break
2266                 multirows[row].append([begin_cell, end_cell, 0])
2267                 if document.body[begin_cell].find('multirow="3" mroffset=') != -1:
2268                   multirows[row][column][2] = 3 # begin multirow
2269                   mrstarts.append([row, column])
2270                 elif document.body[begin_cell].find('multirow="4"') != -1:
2271                   multirows[row][column][2] = 4 # in multirow
2272                 begin_cell = end_cell
2273             begin_row = end_row
2274         # end of table info collection
2275
2276         # work from the back to avoid messing up numbering
2277         mrstarts.reverse()
2278         for m in mrstarts:
2279             row = m[0]
2280             col = m[1]
2281             # get column width
2282             col_width = get_option_value(document.body[begin_table + 2 + col], "width")
2283             # "0pt" means that no width is specified
2284             if not col_width or col_width == "0pt":
2285               col_width = "*"
2286             # determine the number of cells that are part of the multirow
2287             nummrs = 1
2288             for r in range(row + 1, numrows):
2289                 if multirows[r][col][2] != 4:
2290                   break
2291                 nummrs += 1
2292                 # take the opportunity to revert this line
2293                 lineno = multirows[r][col][0]
2294                 document.body[lineno] = document.body[lineno].\
2295                   replace(' multirow="4" ', ' ').\
2296                   replace('valignment="middle"', 'valignment="top"').\
2297                   replace(' topline="true" ', ' ')
2298                 # remove bottom line of previous multirow-part cell
2299                 lineno = multirows[r-1][col][0]
2300                 document.body[lineno] = document.body[lineno].replace(' bottomline="true" ', ' ')
2301             # revert beginning cell
2302             bcell = multirows[row][col][0]
2303             ecell = multirows[row][col][1]
2304             offset = get_option_value(document.body[bcell], "mroffset")
2305             document.body[bcell] = document.body[bcell].\
2306               replace(' multirow="3" ', ' ').\
2307               replace('valignment="middle"', 'valignment="top"')
2308             # remove mroffset option
2309             document.body[bcell] = rgx.sub('', document.body[bcell])
2310
2311             blay = find_token(document.body, "\\begin_layout", bcell, ecell)
2312             if blay == -1:
2313               document.warning("Can't find layout for cell!")
2314               continue
2315             bend = find_end_of_layout(document.body, blay)
2316             if bend == -1:
2317               document.warning("Can't find end of layout for cell!")
2318               continue
2319             # do the later one first, so as not to mess up the numbering
2320             # we are wrapping the whole cell in this ert
2321             # so before the end of the layout...
2322             document.body[bend:bend] = put_cmd_in_ert("}")
2323             # ...and after the beginning
2324             document.body[blay + 1:blay + 1] = \
2325               put_cmd_in_ert("\\multirow{" + str(nummrs) + "}{" + col_width + "}[" \
2326                   + offset + "]{")
2327
2328         # on to the next table
2329         begin_table = end_table
2330
2331
2332 def revert_script(document):
2333     " Convert subscript/superscript inset to TeX code "
2334     i = 0
2335     foundsubscript = False
2336     while True:
2337         i = find_token(document.body, '\\begin_inset script', i)
2338         if i == -1:
2339             break
2340         z = find_end_of_inset(document.body, i)
2341         if z == -1:
2342             document.warning("Malformed LyX document: Can't find end of script inset.")
2343             i += 1
2344             continue
2345         blay = find_token(document.body, "\\begin_layout", i, z)
2346         if blay == -1:
2347             document.warning("Malformed LyX document: Can't find layout in script inset.")
2348             i = z
2349             continue
2350
2351         if check_token(document.body[i], "\\begin_inset script subscript"):
2352             subst = '\\textsubscript{'
2353             foundsubscript = True
2354         elif check_token(document.body[i], "\\begin_inset script superscript"):
2355             subst = '\\textsuperscript{'
2356         else:
2357             document.warning("Malformed LyX document: Unknown type of script inset.")
2358             i = z
2359             continue
2360         bend = find_end_of_layout(document.body, blay)
2361         if bend == -1 or bend > z:
2362             document.warning("Malformed LyX document: Can't find end of layout in script inset.")
2363             i = z
2364             continue
2365         # remove the \end_layout \end_inset pair
2366         document.body[bend:z + 1] = put_cmd_in_ert("}")
2367         document.body[i:blay + 1] = put_cmd_in_ert(subst)
2368         i += 1
2369     # these classes provide a \textsubscript command:
2370     # FIXME: Would be nice if we could use the information of the .layout file here
2371     classes = ["memoir", "scrartcl", "scrbook", "scrlttr2", "scrreprt"]
2372     if foundsubscript and find_token_exact(classes, document.textclass, 0) == -1:
2373         add_to_preamble(document, ['\\usepackage{subscript}'])
2374
2375
2376 def convert_use_xetex(document):
2377     " convert \\use_xetex to \\use_non_tex_fonts "
2378     i = find_token(document.header, "\\use_xetex", 0)
2379     if i == -1:
2380         document.header.insert(-1, "\\use_non_tex_fonts 0")
2381     else:
2382         val = get_value(document.header, "\\use_xetex", 0)
2383         document.header[i] = "\\use_non_tex_fonts " + val
2384
2385
2386 def revert_use_xetex(document):
2387     " revert \\use_non_tex_fonts to \\use_xetex "
2388     i = 0
2389     i = find_token(document.header, "\\use_non_tex_fonts", 0)
2390     if i == -1:
2391         document.warning("Malformed document. No \\use_non_tex_fonts param!")
2392         return
2393
2394     val = get_value(document.header, "\\use_non_tex_fonts", 0)
2395     document.header[i] = "\\use_xetex " + val
2396
2397
2398 def revert_labeling(document):
2399     koma = ("scrartcl", "scrarticle-beamer", "scrbook", "scrlettr",
2400         "scrlttr2", "scrreprt")
2401     if document.textclass in koma:
2402         return
2403     i = 0
2404     while True:
2405         i = find_token_exact(document.body, "\\begin_layout Labeling", i)
2406         if i == -1:
2407             return
2408         document.body[i] = "\\begin_layout List"
2409
2410
2411 def revert_langpack(document):
2412     " revert \\language_package parameter "
2413     i = 0
2414     i = find_token(document.header, "\\language_package", 0)
2415     if i == -1:
2416         document.warning("Malformed document. No \\language_package param!")
2417         return
2418
2419     del document.header[i]
2420
2421
2422 def convert_langpack(document):
2423     " Add \\language_package parameter "
2424     i = find_token(document.header, "\language" , 0)
2425     if i == -1:
2426         document.warning("Malformed document. No \\language defined!")
2427         return
2428
2429     document.header.insert(i + 1, "\\language_package default")
2430
2431
2432 def revert_tabularwidth(document):
2433   i = 0
2434   while True:
2435     i = find_token(document.body, "\\begin_inset Tabular", i)
2436     if i == -1:
2437       return
2438     j = find_end_of_inset(document.body, i)
2439     if j == -1:
2440       document.warning("Unable to find end of Tabular inset at line " + str(i))
2441       i += 1
2442       continue
2443     i += 1
2444     features = find_token(document.body, "<features", i, j)
2445     if features == -1:
2446       document.warning("Can't find any features in Tabular inset at line " + str(i))
2447       i = j
2448       continue
2449     if document.body[features].find('alignment="tabularwidth"') != -1:
2450       remove_option(document.body, features, 'tabularwidth')
2451
2452 def revert_html_css_as_file(document):
2453   if not del_token(document.header, '\\html_css_as_file', 0):
2454     document.warning("Malformed LyX document: Missing \\html_css_as_file.")
2455
2456
2457 ##
2458 # Conversion hub
2459 #
2460
2461 supported_versions = ["2.0.0","2.0"]
2462 convert = [[346, []],
2463            [347, []],
2464            [348, []],
2465            [349, []],
2466            [350, []],
2467            [351, []],
2468            [352, [convert_splitindex]],
2469            [353, []],
2470            [354, []],
2471            [355, []],
2472            [356, []],
2473            [357, []],
2474            [358, []],
2475            [359, [convert_nomencl_width]],
2476            [360, []],
2477            [361, []],
2478            [362, []],
2479            [363, []],
2480            [364, []],
2481            [365, []],
2482            [366, []],
2483            [367, []],
2484            [368, []],
2485            [369, [convert_author_id]],
2486            [370, []],
2487            [371, [convert_mhchem]],
2488            [372, []],
2489            [373, [merge_gbrief]],
2490            [374, []],
2491            [375, []],
2492            [376, []],
2493            [377, []],
2494            [378, []],
2495            [379, [convert_math_output]],
2496            [380, []],
2497            [381, []],
2498            [382, []],
2499            [383, []],
2500            [384, []],
2501            [385, []],
2502            [386, []],
2503            [387, []],
2504            [388, []],
2505            [389, [convert_html_quotes]],
2506            [390, []],
2507            [391, []],
2508            [392, []],
2509            [393, [convert_optarg]],
2510            [394, [convert_use_makebox]],
2511            [395, []],
2512            [396, []],
2513            [397, [remove_Nameref]],
2514            [398, []],
2515            [399, [convert_mathdots]],
2516            [400, [convert_rule]],
2517            [401, []],
2518            [402, [convert_bibtex_clearpage]],
2519            [403, [convert_flexnames]],
2520            [404, [convert_prettyref]],
2521            [405, []],
2522            [406, [convert_passthru]],
2523            [407, []],
2524            [408, []],
2525            [409, [convert_use_xetex]],
2526            [410, []],
2527            [411, [convert_langpack]],
2528            [412, []],
2529            [413, []]
2530 ]
2531
2532 revert =  [[412, [revert_html_css_as_file]],
2533            [411, [revert_tabularwidth]],
2534            [410, [revert_langpack]],
2535            [409, [revert_labeling]],
2536            [408, [revert_use_xetex]],
2537            [407, [revert_script]],
2538            [406, [revert_multirowOffset]],
2539            [405, [revert_passthru]],
2540            [404, []],
2541            [403, [revert_refstyle]],
2542            [402, [revert_flexnames]],
2543            [401, []],
2544            [400, [revert_diagram]],
2545            [399, [revert_rule]],
2546            [398, [revert_mathdots]],
2547            [397, [revert_mathrsfs]],
2548            [396, []],
2549            [395, [revert_nameref]],
2550            [394, [revert_DIN_C_pagesizes]],
2551            [393, [revert_makebox]],
2552            [392, [revert_argument]],
2553            [391, []],
2554            [390, [revert_align_decimal, revert_IEEEtran]],
2555            [389, [revert_output_sync]],
2556            [388, [revert_html_quotes]],
2557            [387, [revert_pagesizes]],
2558            [386, [revert_math_scale]],
2559            [385, [revert_lyx_version]],
2560            [384, [revert_shadedboxcolor]],
2561            [383, [revert_fontcolor]],
2562            [382, [revert_turkmen]],
2563            [381, [revert_notefontcolor]],
2564            [380, [revert_equalspacing_xymatrix]],
2565            [379, [revert_inset_preview]],
2566            [378, [revert_math_output]],
2567            [377, []],
2568            [376, [revert_multirow]],
2569            [375, [revert_includeall]],
2570            [374, [revert_includeonly]],
2571            [373, [revert_html_options]],
2572            [372, [revert_gbrief]],
2573            [371, [revert_fontenc]],
2574            [370, [revert_mhchem]],
2575            [369, [revert_suppress_date]],
2576            [368, [revert_author_id]],
2577            [367, [revert_hspace_glue_lengths]],
2578            [366, [revert_percent_vspace_lengths, revert_percent_hspace_lengths]],
2579            [365, [revert_percent_skip_lengths]],
2580            [364, [revert_paragraph_indentation]],
2581            [363, [revert_branch_filename]],
2582            [362, [revert_longtable_align]],
2583            [361, [revert_applemac]],
2584            [360, []],
2585            [359, [revert_nomencl_cwidth]],
2586            [358, [revert_nomencl_width]],
2587            [357, [revert_custom_processors]],
2588            [356, [revert_ulinelatex]],
2589            [355, []],
2590            [354, [revert_strikeout]],
2591            [353, [revert_printindexall]],
2592            [352, [revert_subindex]],
2593            [351, [revert_splitindex]],
2594            [350, [revert_backgroundcolor]],
2595            [349, [revert_outputformat]],
2596            [348, [revert_xetex]],
2597            [347, [revert_phantom, revert_hphantom, revert_vphantom]],
2598            [346, [revert_tabularvalign]],
2599            [345, [revert_swiss]]
2600           ]
2601
2602
2603 if __name__ == "__main__":
2604     pass