lib/lyx2lyx/lyx_2_0.py

   1 # -*- coding: utf-8 -*-
   2 # This file is part of lyx2lyx
   3 # Copyright (C) 2011 The LyX team
   4 #
   5 # This program is free software; you can redistribute it and/or
   6 # modify it under the terms of the GNU General Public License
   7 # as published by the Free Software Foundation; either version 2
   8 # of the License, or (at your option) any later version.
   9 #
  10 # This program is distributed in the hope that it will be useful,
  11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 # GNU General Public License for more details.
  14 #
  15 # You should have received a copy of the GNU General Public License
  16 # along with this program; if not, write to the Free Software
  17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
  18
  19 """ Convert files to the file format generated by lyx 2.0"""
  20
  21 import re, string
  22 import unicodedata
  23 import sys, os
  24
  25 from parser_tools import find_token, find_end_of, find_tokens, \
  26   find_token_exact, find_end_of_inset, find_end_of_layout, \
  27   find_token_backwards, is_in_inset, get_value, get_quoted_value, \
  28   del_token, check_token, get_option_value
  29
  30 from lyx2lyx_tools import add_to_preamble, insert_to_preamble, \
  31   put_cmd_in_ert, lyx2latex, latex_length, revert_flex_inset, \
  32   revert_font_attrs, hex2ratio, str2bool
  33
  34 ####################################################################
  35 # Private helper functions
  36
  37 def remove_option(lines, m, option):
  38     ''' removes option from line m. returns whether we did anything '''
  39     l = lines[m].find(option)
  40     if l == -1:
  41         return False
  42     val = lines[m][l:].split('"')[1]
  43     lines[m] = lines[m][:l - 1] + lines[m][l+len(option + '="' + val + '"'):]
  44     return True
  45
  46
  47 ###############################################################################
  48 ###
  49 ### Conversion and reversion routines
  50 ###
  51 ###############################################################################
  52
  53 def revert_swiss(document):
  54     " Set language german-ch to ngerman "
  55     i = 0
  56     if document.language == "german-ch":
  57         document.language = "ngerman"
  58         i = find_token(document.header, "\\language", 0)
  59         if i != -1:
  60             document.header[i] = "\\language ngerman"
  61     j = 0
  62     while True:
  63         j = find_token(document.body, "\\lang german-ch", j)
  64         if j == -1:
  65             return
  66         document.body[j] = document.body[j].replace("\\lang german-ch", "\\lang ngerman")
  67         j = j + 1
  68
  69
  70 def revert_tabularvalign(document):
  71    " Revert the tabular valign option "
  72    i = 0
  73    while True:
  74       i = find_token(document.body, "\\begin_inset Tabular", i)
  75       if i == -1:
  76           return
  77       end = find_end_of_inset(document.body, i)
  78       if end == -1:
  79           document.warning("Can't find end of inset at line " + str(i))
  80           i += 1
  81           continue
  82       fline = find_token(document.body, "<features", i, end)
  83       if fline == -1:
  84           document.warning("Can't find features for inset at line " + str(i))
  85           i += 1
  86           continue
  87       p = document.body[fline].find("islongtable")
  88       if p != -1:
  89           q = document.body[fline].find("tabularvalignment")
  90           if q != -1:
  91               document.body[fline] = re.sub(r' tabularvalignment=\"[a-z]+\"', "", document.body[fline])
  92           i += 1
  93           continue
  94
  95        # no longtable
  96       tabularvalignment = 'c'
  97       # which valignment is specified?
  98       m = document.body[fline].find('tabularvalignment="top"')
  99       if m != -1:
 100           tabularvalignment = 't'
 101       m = document.body[fline].find('tabularvalignment="bottom"')
 102       if m != -1:
 103           tabularvalignment = 'b'
 104       # delete tabularvalignment
 105       q = document.body[fline].find("tabularvalignment")
 106       if q != -1:
 107           document.body[fline] = re.sub(r' tabularvalignment=\"[a-z]+\"', "", document.body[fline])
 108
 109       # don't add a box when centered
 110       if tabularvalignment == 'c':
 111           i = end
 112           continue
 113       subst = ['\\end_inset', '\\end_layout']
 114       document.body[end:end] = subst # just inserts those lines
 115       subst = ['\\begin_inset Box Frameless',
 116           'position "' + tabularvalignment +'"',
 117           'hor_pos "c"',
 118           'has_inner_box 1',
 119           'inner_pos "c"',
 120           'use_parbox 0',
 121           # we don't know the width, assume 50%
 122           'width "50col%"',
 123           'special "none"',
 124           'height "1in"',
 125           'height_special "totalheight"',
 126           'status open',
 127           '',
 128           '\\begin_layout Plain Layout']
 129       document.body[i:i] = subst # this just inserts the array at i
 130       # since there could be a tabular inside a tabular, we cannot
 131       # jump to end
 132       i += len(subst)
 133
 134
 135 def revert_phantom_types(document, ptype, cmd):
 136     " Reverts phantom to ERT "
 137     i = 0
 138     while True:
 139       i = find_token(document.body, "\\begin_inset Phantom " + ptype, i)
 140       if i == -1:
 141           return
 142       end = find_end_of_inset(document.body, i)
 143       if end == -1:
 144           document.warning("Can't find end of inset at line " + str(i))
 145           i += 1
 146           continue
 147       blay = find_token(document.body, "\\begin_layout Plain Layout", i, end)
 148       if blay == -1:
 149           document.warning("Can't find layout for inset at line " + str(i))
 150           i = end
 151           continue
 152       bend = find_end_of_layout(document.body, blay)
 153       if bend == -1:
 154           document.warning("Malformed LyX document: Could not find end of Phantom inset's layout.")
 155           i = end
 156           continue
 157       substi = ["\\begin_inset ERT", "status collapsed", "",
 158                 "\\begin_layout Plain Layout", "", "", "\\backslash",
 159                 cmd + "{", "\\end_layout", "", "\\end_inset"]
 160       substj = ["\\size default", "", "\\begin_inset ERT", "status collapsed", "",
 161                 "\\begin_layout Plain Layout", "", "}", "\\end_layout", "", "\\end_inset"]
 162       # do the later one first so as not to mess up the numbering
 163       document.body[bend:end + 1] = substj
 164       document.body[i:blay + 1] = substi
 165       i = end + len(substi) + len(substj) - (end - bend) - (blay - i) - 2
 166
 167
 168 def revert_phantom(document):
 169     revert_phantom_types(document, "Phantom", "phantom")
 170
 171 def revert_hphantom(document):
 172     revert_phantom_types(document, "HPhantom", "hphantom")
 173
 174 def revert_vphantom(document):
 175     revert_phantom_types(document, "VPhantom", "vphantom")
 176
 177
 178 def revert_xetex(document):
 179     " Reverts documents that use XeTeX "
 180
 181     i = find_token(document.header, '\\use_xetex', 0)
 182     if i == -1:
 183         document.warning("Malformed LyX document: Missing \\use_xetex.")
 184         return
 185     if not str2bool(get_value(document.header, "\\use_xetex", i)):
 186         del document.header[i]
 187         return
 188     del document.header[i]
 189
 190     # 1.) set doc encoding to utf8-plain
 191     i = find_token(document.header, "\\inputencoding", 0)
 192     if i == -1:
 193         document.warning("Malformed LyX document: Missing \\inputencoding.")
 194     else:
 195         document.header[i] = "\\inputencoding utf8-plain"
 196
 197     # 2.) check font settings
 198     # defaults
 199     roman = sans = typew = "default"
 200     osf = False
 201     sf_scale = tt_scale = 100.0
 202
 203     i = find_token(document.header, "\\font_roman", 0)
 204     if i == -1:
 205         document.warning("Malformed LyX document: Missing \\font_roman.")
 206     else:
 207         roman = get_value(document.header, "\\font_roman", i)
 208         document.header[i] = "\\font_roman default"
 209
 210     i = find_token(document.header, "\\font_sans", 0)
 211     if i == -1:
 212         document.warning("Malformed LyX document: Missing \\font_sans.")
 213     else:
 214         sans = get_value(document.header, "\\font_sans", i)
 215         document.header[i] = "\\font_sans default"
 216
 217     i = find_token(document.header, "\\font_typewriter", 0)
 218     if i == -1:
 219         document.warning("Malformed LyX document: Missing \\font_typewriter.")
 220     else:
 221         typew = get_value(document.header, "\\font_typewriter", i)
 222         document.header[i] = "\\font_typewriter default"
 223
 224     i = find_token(document.header, "\\font_osf", 0)
 225     if i == -1:
 226         document.warning("Malformed LyX document: Missing \\font_osf.")
 227     else:
 228         osf = str2bool(get_value(document.header, "\\font_osf", i))
 229         document.header[i] = "\\font_osf false"
 230
 231     i = find_token(document.header, "\\font_sc", 0)
 232     if i == -1:
 233         document.warning("Malformed LyX document: Missing \\font_sc.")
 234     else:
 235         # we do not need this value.
 236         document.header[i] = "\\font_sc false"
 237
 238     i = find_token(document.header, "\\font_sf_scale", 0)
 239     if i == -1:
 240         document.warning("Malformed LyX document: Missing \\font_sf_scale.")
 241     else:
 242       val = get_value(document.header, '\\font_sf_scale', i)
 243       try:
 244         # float() can throw
 245         sf_scale = float(val)
 246       except:
 247         document.warning("Invalid font_sf_scale value: " + val)
 248       document.header[i] = "\\font_sf_scale 100"
 249
 250     i = find_token(document.header, "\\font_tt_scale", 0)
 251     if i == -1:
 252         document.warning("Malformed LyX document: Missing \\font_tt_scale.")
 253     else:
 254         val = get_value(document.header, '\\font_tt_scale', i)
 255         try:
 256           # float() can throw
 257           tt_scale = float(val)
 258         except:
 259           document.warning("Invalid font_tt_scale value: " + val)
 260         document.header[i] = "\\font_tt_scale 100"
 261
 262     # 3.) set preamble stuff
 263     pretext = ['%% This document must be processed with xelatex!']
 264     pretext.append('\\usepackage{fontspec}')
 265     if roman != "default":
 266         pretext.append('\\setmainfont[Mapping=tex-text]{' + roman + '}')
 267     if sans != "default":
 268         sf = '\\setsansfont['
 269         if sf_scale != 100.0:
 270             sf += 'Scale=' + str(sf_scale / 100.0) + ','
 271         sf += 'Mapping=tex-text]{' + sans + '}'
 272         pretext.append(sf)
 273     if typew != "default":
 274         tw = '\\setmonofont'
 275         if tt_scale != 100.0:
 276             tw += '[Scale=' + str(tt_scale / 100.0) + ']'
 277         tw += '{' + typew + '}'
 278         pretext.append(tw)
 279     if osf:
 280         pretext.append('\\defaultfontfeatures{Numbers=OldStyle}')
 281     pretext.append('\\usepackage{xunicode}')
 282     pretext.append('\\usepackage{xltxtra}')
 283     insert_to_preamble(document, pretext)
 284
 285
 286 def revert_outputformat(document):
 287     " Remove default output format param "
 288
 289     if not del_token(document.header, '\\default_output_format', 0):
 290         document.warning("Malformed LyX document: Missing \\default_output_format.")
 291
 292
 293 def revert_backgroundcolor(document):
 294     " Reverts background color to preamble code "
 295     i = find_token(document.header, "\\backgroundcolor", 0)
 296     if i == -1:
 297         return
 298     colorcode = get_value(document.header, '\\backgroundcolor', i)
 299     del document.header[i]
 300     # don't clutter the preamble if backgroundcolor is not set
 301     if colorcode == "#ffffff":
 302         return
 303     red   = hex2ratio(colorcode[1:3])
 304     green = hex2ratio(colorcode[3:5])
 305     blue  = hex2ratio(colorcode[5:7])
 306     insert_to_preamble(document, \
 307         ['% To set the background color',
 308         '\\@ifundefined{definecolor}{\\usepackage{color}}{}',
 309         '\\definecolor{page_backgroundcolor}{rgb}{' + red + ',' + green + ',' + blue + '}',
 310         '\\pagecolor{page_backgroundcolor}'])
 311
 312
 313 def add_use_indices(document):
 314     " Add \\use_indices if it is missing "
 315     i = find_token(document.header, '\\use_indices', 0)
 316     if i != -1:
 317         return i
 318     i = find_token(document.header, '\\use_bibtopic', 0)
 319     if i == -1:
 320         i = find_token(document.header, '\\cite_engine', 0)
 321     if i == -1:
 322         i = find_token(document.header, '\\use_mathdots', 0)
 323     if i == -1:
 324         i = find_token(document.header, '\\use_mhchem', 0)
 325     if i == -1:
 326         i = find_token(document.header, '\\use_esint', 0)
 327     if i == -1:
 328         i = find_token(document.header, '\\use_amsmath', 0)
 329     if i == -1:
 330         document.warning("Malformed LyX document: Missing \\use_indices.")
 331         return -1
 332     document.header.insert(i + 1, '\\use_indices 0')
 333     return i + 1
 334
 335
 336 def revert_splitindex(document):
 337     " Reverts splitindex-aware documents "
 338     i = add_use_indices(document)
 339     if i == -1:
 340         return
 341     useindices = str2bool(get_value(document.header, "\\use_indices", i))
 342     del document.header[i]
 343     preamble = []
 344     if useindices:
 345          preamble.append("\\usepackage{splitidx})")
 346
 347     # deal with index declarations in the preamble
 348     i = 0
 349     while True:
 350         i = find_token(document.header, "\\index", i)
 351         if i == -1:
 352             break
 353         k = find_token(document.header, "\\end_index", i)
 354         if k == -1:
 355             document.warning("Malformed LyX document: Missing \\end_index.")
 356             return
 357         if useindices:
 358           line = document.header[i]
 359           l = re.compile(r'\\index (.*)$')
 360           m = l.match(line)
 361           iname = m.group(1)
 362           ishortcut = get_value(document.header, '\\shortcut', i, k)
 363           if ishortcut != "":
 364               preamble.append("\\newindex[" + iname + "]{" + ishortcut + "}")
 365         del document.header[i:k + 1]
 366     if preamble:
 367         insert_to_preamble(document, preamble)
 368
 369     # deal with index insets
 370     # these need to have the argument removed
 371     i = 0
 372     while True:
 373         i = find_token(document.body, "\\begin_inset Index", i)
 374         if i == -1:
 375             break
 376         line = document.body[i]
 377         l = re.compile(r'\\begin_inset Index (.*)$')
 378         m = l.match(line)
 379         itype = m.group(1)
 380         if itype == "idx" or indices == "false":
 381             document.body[i] = "\\begin_inset Index"
 382         else:
 383             k = find_end_of_inset(document.body, i)
 384             if k == -1:
 385                 document.warning("Can't find end of index inset!")
 386                 i += 1
 387                 continue
 388             content = lyx2latex(document, document.body[i:k])
 389             # escape quotes
 390             content = content.replace('"', r'\"')
 391             subst = put_cmd_in_ert("\\sindex[" + itype + "]{" + content + "}")
 392             document.body[i:k + 1] = subst
 393         i = i + 1
 394
 395     # deal with index_print insets
 396     i = 0
 397     while True:
 398         i = find_token(document.body, "\\begin_inset CommandInset index_print", i)
 399         if i == -1:
 400             return
 401         k = find_end_of_inset(document.body, i)
 402         ptype = get_quoted_value(document.body, 'type', i, k)
 403         if ptype == "idx":
 404             j = find_token(document.body, "type", i, k)
 405             del document.body[j]
 406         elif not useindices:
 407             del document.body[i:k + 1]
 408         else:
 409             subst = put_cmd_in_ert("\\printindex[" + ptype + "]{}")
 410             document.body[i:k + 1] = subst
 411         i = i + 1
 412
 413
 414 def convert_splitindex(document):
 415     " Converts index and printindex insets to splitindex-aware format "
 416     add_use_indices(document)
 417     i = 0
 418     while True:
 419         i = find_token(document.body, "\\begin_inset Index", i)
 420         if i == -1:
 421             break
 422         document.body[i] = document.body[i].replace("\\begin_inset Index",
 423             "\\begin_inset Index idx")
 424         i = i + 1
 425     i = 0
 426     while True:
 427         i = find_token(document.body, "\\begin_inset CommandInset index_print", i)
 428         if i == -1:
 429             return
 430         if document.body[i + 1].find('LatexCommand printindex') == -1:
 431             document.warning("Malformed LyX document: Incomplete printindex inset.")
 432             return
 433         subst = ["LatexCommand printindex",
 434             "type \"idx\""]
 435         document.body[i + 1:i + 2] = subst
 436         i = i + 1
 437
 438
 439 def revert_subindex(document):
 440     " Reverts \\printsubindex CommandInset types "
 441     i = add_use_indices(document)
 442     if i == -1:
 443         return
 444     useindices = str2bool(get_value(document.header, "\\use_indices", i))
 445     i = 0
 446     while True:
 447         i = find_token(document.body, "\\begin_inset CommandInset index_print", i)
 448         if i == -1:
 449             return
 450         k = find_end_of_inset(document.body, i)
 451         ctype = get_value(document.body, 'LatexCommand', i, k)
 452         if ctype != "printsubindex":
 453             i = k + 1
 454             continue
 455         ptype = get_quoted_value(document.body, 'type', i, k)
 456         if not useindices:
 457             del document.body[i:k + 1]
 458         else:
 459             subst = put_cmd_in_ert("\\printsubindex[" + ptype + "]{}")
 460             document.body[i:k + 1] = subst
 461         i = i + 1
 462
 463
 464 def revert_printindexall(document):
 465     " Reverts \\print[sub]index* CommandInset types "
 466     i = add_use_indices(document)
 467     if i == -1:
 468         return
 469     useindices = str2bool(get_value(document.header, "\\use_indices", i))
 470     i = 0
 471     while True:
 472         i = find_token(document.body, "\\begin_inset CommandInset index_print", i)
 473         if i == -1:
 474             return
 475         k = find_end_of_inset(document.body, i)
 476         ctype = get_value(document.body, 'LatexCommand', i, k)
 477         if ctype != "printindex*" and ctype != "printsubindex*":
 478             i = k
 479             continue
 480         if not useindices:
 481             del document.body[i:k + 1]
 482         else:
 483             subst = put_cmd_in_ert("\\" + ctype + "{}")
 484             document.body[i:k + 1] = subst
 485         i = i + 1
 486
 487
 488 def revert_strikeout(document):
 489   " Reverts \\strikeout font attribute "
 490   changed = revert_font_attrs(document.body, "\\uuline", "\\uuline")
 491   changed = revert_font_attrs(document.body, "\\uwave", "\\uwave") or changed
 492   changed = revert_font_attrs(document.body, "\\strikeout", "\\sout")  or changed
 493   if changed == True:
 494     insert_to_preamble(document, \
 495         ['%  for proper underlining',
 496         '\\PassOptionsToPackage{normalem}{ulem}',
 497         '\\usepackage{ulem}'])
 498
 499
 500 def revert_ulinelatex(document):
 501     " Reverts \\uline font attribute "
 502     i = find_token(document.body, '\\bar under', 0)
 503     if i == -1:
 504         return
 505     insert_to_preamble(document,\
 506             ['%  for proper underlining',
 507             '\\PassOptionsToPackage{normalem}{ulem}',
 508             '\\usepackage{ulem}',
 509             '\\let\\cite@rig\\cite',
 510             '\\newcommand{\\b@xcite}[2][\\%]{\\def\\def@pt{\\%}\\def\\pas@pt{#1}',
 511             '  \\mbox{\\ifx\\def@pt\\pas@pt\\cite@rig{#2}\\else\\cite@rig[#1]{#2}\\fi}}',
 512             '\\renewcommand{\\underbar}[1]{{\\let\\cite\\b@xcite\\uline{#1}}}'])
 513
 514
 515 def revert_custom_processors(document):
 516     " Remove bibtex_command and index_command params "
 517
 518     if not del_token(document.header, '\\bibtex_command', 0):
 519         document.warning("Malformed LyX document: Missing \\bibtex_command.")
 520
 521     if not del_token(document.header, '\\index_command', 0):
 522         document.warning("Malformed LyX document: Missing \\index_command.")
 523
 524
 525 def convert_nomencl_width(document):
 526     " Add set_width param to nomencl_print "
 527     i = 0
 528     while True:
 529       i = find_token(document.body, "\\begin_inset CommandInset nomencl_print", i)
 530       if i == -1:
 531         break
 532       document.body.insert(i + 2, "set_width \"none\"")
 533       i = i + 1
 534
 535
 536 def revert_nomencl_width(document):
 537     " Remove set_width param from nomencl_print "
 538     i = 0
 539     while True:
 540       i = find_token(document.body, "\\begin_inset CommandInset nomencl_print", i)
 541       if i == -1:
 542         break
 543       j = find_end_of_inset(document.body, i)
 544       if not del_token(document.body, "set_width", i, j):
 545         document.warning("Can't find set_width option for nomencl_print!")
 546       i = j
 547
 548
 549 def revert_nomencl_cwidth(document):
 550     " Remove width param from nomencl_print "
 551     i = 0
 552     while True:
 553       i = find_token(document.body, "\\begin_inset CommandInset nomencl_print", i)
 554       if i == -1:
 555         break
 556       j = find_end_of_inset(document.body, i)
 557       l = find_token(document.body, "width", i, j)
 558       if l == -1:
 559         i = j
 560         continue
 561       width = get_quoted_value(document.body, "width", i, j)
 562       del document.body[l]
 563       insert_to_preamble(document, ["\\setlength{\\nomlabelwidth}{" + width + "}"])
 564       i = j - 1
 565
 566
 567 def revert_applemac(document):
 568     " Revert applemac encoding to auto "
 569     if document.encoding != "applemac":
 570       return
 571     document.encoding = "auto"
 572     i = find_token(document.header, "\\encoding", 0)
 573     if i != -1:
 574         document.header[i] = "\\encoding auto"
 575
 576
 577 def revert_longtable_align(document):
 578     " Remove longtable alignment setting "
 579     i = 0
 580     while True:
 581       i = find_token(document.body, "\\begin_inset Tabular", i)
 582       if i == -1:
 583           break
 584       end = find_end_of_inset(document.body, i)
 585       if end == -1:
 586           document.warning("Can't find end of inset at line " + str(i))
 587           i += 1
 588           continue
 589       fline = find_token(document.body, "<features", i, end)
 590       if fline == -1:
 591           document.warning("Can't find features for inset at line " + str(i))
 592           i += 1
 593           continue
 594       j = document.body[fline].find("longtabularalignment")
 595       if j == -1:
 596           i += 1
 597           continue
 598       # FIXME Is this correct? It wipes out everything after the
 599       # one we found.
 600       document.body[fline] = document.body[fline][:j - 1] + '>'
 601       # since there could be a tabular inside this one, we
 602       # cannot jump to end.
 603       i += 1
 604
 605
 606 def revert_branch_filename(document):
 607     " Remove \\filename_suffix parameter from branches "
 608     i = 0
 609     while True:
 610         i = find_token(document.header, "\\filename_suffix", i)
 611         if i == -1:
 612             return
 613         del document.header[i]
 614
 615
 616 def revert_paragraph_indentation(document):
 617     " Revert custom paragraph indentation to preamble code "
 618     i = find_token(document.header, "\\paragraph_indentation", 0)
 619     if i == -1:
 620       return
 621     length = get_value(document.header, "\\paragraph_indentation", i)
 622     # we need only remove the line if indentation is default
 623     if length != "default":
 624       # handle percent lengths
 625       length = latex_length(length)[1]
 626       insert_to_preamble(document, ["\\setlength{\\parindent}{" + length + "}"])
 627     del document.header[i]
 628
 629
 630 def revert_percent_skip_lengths(document):
 631     " Revert relative lengths for paragraph skip separation to preamble code "
 632     i = find_token(document.header, "\\defskip", 0)
 633     if i == -1:
 634         return
 635     length = get_value(document.header, "\\defskip", i)
 636     # only revert when a custom length was set and when
 637     # it used a percent length
 638     if length in ('smallskip', 'medskip', 'bigskip'):
 639         return
 640     # handle percent lengths
 641     percent, length = latex_length(length)
 642     if percent:
 643         insert_to_preamble(document, ["\\setlength{\\parskip}{" + length + "}"])
 644         # set defskip to medskip as default
 645         document.header[i] = "\\defskip medskip"
 646
 647
 648 def revert_percent_vspace_lengths(document):
 649     " Revert relative VSpace lengths to ERT "
 650     i = 0
 651     while True:
 652       i = find_token(document.body, "\\begin_inset VSpace", i)
 653       if i == -1:
 654           break
 655       # only revert if a custom length was set and if
 656       # it used a percent length
 657       r = re.compile(r'\\begin_inset VSpace (.*)$')
 658       m = r.match(document.body[i])
 659       length = m.group(1)
 660       if length in ('defskip', 'smallskip', 'medskip', 'bigskip', 'vfill'):
 661          i += 1
 662          continue
 663       # check if the space has a star (protected space)
 664       protected = (document.body[i].rfind("*") != -1)
 665       if protected:
 666           length = length.rstrip('*')
 667       # handle percent lengths
 668       percent, length = latex_length(length)
 669       # revert the VSpace inset to ERT
 670       if percent:
 671           if protected:
 672               subst = put_cmd_in_ert("\\vspace*{" + length + "}")
 673           else:
 674               subst = put_cmd_in_ert("\\vspace{" + length + "}")
 675           document.body[i:i + 2] = subst
 676       i += 1
 677
 678
 679 def revert_percent_hspace_lengths(document):
 680     " Revert relative HSpace lengths to ERT "
 681     i = 0
 682     while True:
 683       i = find_token_exact(document.body, "\\begin_inset space \\hspace", i)
 684       if i == -1:
 685           break
 686       j = find_end_of_inset(document.body, i)
 687       if j == -1:
 688           document.warning("Can't find end of inset at line " + str(i))
 689           i += 1
 690           continue
 691       # only revert if a custom length was set...
 692       length = get_value(document.body, '\\length', i + 1, j)
 693       if length == '':
 694           document.warning("Malformed lyx document: Missing '\\length' in Space inset.")
 695           i = j
 696           continue
 697       protected = ""
 698       if document.body[i].find("\\hspace*{}") != -1:
 699           protected = "*"
 700       # ...and if it used a percent length
 701       percent, length = latex_length(length)
 702       # revert the HSpace inset to ERT
 703       if percent:
 704           subst = put_cmd_in_ert("\\hspace" + protected + "{" + length + "}")
 705           document.body[i:j + 1] = subst
 706       # if we did a substitution, this will still be ok
 707       i = j
 708
 709
 710 def revert_hspace_glue_lengths(document):
 711     " Revert HSpace glue lengths to ERT "
 712     i = 0
 713     while True:
 714       i = find_token_exact(document.body, "\\begin_inset space \\hspace", i)
 715       if i == -1:
 716           break
 717       j = find_end_of_inset(document.body, i)
 718       if j == -1:
 719           document.warning("Can't find end of inset at line " + str(i))
 720           i += 1
 721           continue
 722       length = get_value(document.body, '\\length', i + 1, j)
 723       if length == '':
 724           document.warning("Malformed lyx document: Missing '\\length' in Space inset.")
 725           i = j
 726           continue
 727       protected = ""
 728       if document.body[i].find("\\hspace*{}") != -1:
 729           protected = "*"
 730       # only revert if the length contains a plus or minus at pos != 0
 731       if length.find('-',1) != -1 or length.find('+',1) != -1:
 732           # handle percent lengths
 733           length = latex_length(length)[1]
 734           # revert the HSpace inset to ERT
 735           subst = put_cmd_in_ert("\\hspace" + protected + "{" + length + "}")
 736           document.body[i:j+1] = subst
 737       i = j
 738
 739
 740 def convert_author_id(document):
 741     " Add the author_id to the \\author definition and make sure 0 is not used"
 742     i = 0
 743     anum = 1
 744     re_author = re.compile(r'(\\author) (\".*\")\s*(.*)$')
 745
 746     while True:
 747         i = find_token(document.header, "\\author", i)
 748         if i == -1:
 749             break
 750         m = re_author.match(document.header[i])
 751         if m:
 752             name = m.group(2)
 753             email = m.group(3)
 754             document.header[i] = "\\author %i %s %s" % (anum, name, email)
 755         anum += 1
 756         i += 1
 757
 758     i = 0
 759     while True:
 760         i = find_token(document.body, "\\change_", i)
 761         if i == -1:
 762             break
 763         change = document.body[i].split(' ');
 764         if len(change) == 3:
 765             type = change[0]
 766             author_id = int(change[1])
 767             time = change[2]
 768             document.body[i] = "%s %i %s" % (type, author_id + 1, time)
 769         i += 1
 770
 771
 772 def revert_author_id(document):
 773     " Remove the author_id from the \\author definition "
 774     i = 0
 775     anum = 0
 776     rx = re.compile(r'(\\author)\s+(-?\d+)\s+(\".*\")\s*(.*)$')
 777     idmap = dict()
 778
 779     while True:
 780         i = find_token(document.header, "\\author", i)
 781         if i == -1:
 782             break
 783         m = rx.match(document.header[i])
 784         if m:
 785             author_id = int(m.group(2))
 786             idmap[author_id] = anum
 787             name = m.group(3)
 788             email = m.group(4)
 789             document.header[i] = "\\author %s %s" % (name, email)
 790         i += 1
 791         # FIXME Should this be incremented if we didn't match?
 792         anum += 1
 793
 794     i = 0
 795     while True:
 796         i = find_token(document.body, "\\change_", i)
 797         if i == -1:
 798             break
 799         change = document.body[i].split(' ');
 800         if len(change) == 3:
 801             type = change[0]
 802             author_id = int(change[1])
 803             time = change[2]
 804             document.body[i] = "%s %i %s" % (type, idmap[author_id], time)
 805         i += 1
 806
 807
 808 def revert_suppress_date(document):
 809     " Revert suppressing of default document date to preamble code "
 810     i = find_token(document.header, "\\suppress_date", 0)
 811     if i == -1:
 812         return
 813     # remove the preamble line and write to the preamble
 814     # when suppress_date was true
 815     date = str2bool(get_value(document.header, "\\suppress_date", i))
 816     if date:
 817         add_to_preamble(document, ["\\date{}"])
 818     del document.header[i]
 819
 820
 821 def convert_mhchem(document):
 822     "Set mhchem to off for versions older than 1.6.x"
 823     if document.initial_format < 277:
 824         # LyX 1.5.x and older did never load mhchem.
 825         # Therefore we must switch it off: Documents that use mhchem have
 826         # a manual \usepackage anyway, and documents not using mhchem but
 827         # custom macros with the same names as mhchem commands might get
 828         # corrupted if mhchem is automatically loaded.
 829         mhchem = 0 # off
 830     else:
 831         # LyX 1.6.x did always load mhchem automatically.
 832         mhchem = 1 # auto
 833     i = find_token(document.header, "\\use_esint", 0)
 834     if i == -1:
 835         # pre-1.5.x document
 836         i = find_token(document.header, "\\use_amsmath", 0)
 837     if i == -1:
 838         document.warning("Malformed LyX document: Could not find amsmath os esint setting.")
 839         return
 840     document.header.insert(i + 1, "\\use_mhchem %d" % mhchem)
 841
 842
 843 def revert_mhchem(document):
 844     "Revert mhchem loading to preamble code"
 845
 846     mhchem = "off"
 847     i = find_token(document.header, "\\use_mhchem", 0)
 848     if i == -1:
 849         document.warning("Malformed LyX document: Could not find mhchem setting.")
 850         mhchem = "auto"
 851     else:
 852         val = get_value(document.header, "\\use_mhchem", i)
 853         if val == "1":
 854             mhchem = "auto"
 855         elif val == "2":
 856             mhchem = "on"
 857         del document.header[i]
 858
 859     if mhchem == "off":
 860       # don't load case
 861       return
 862
 863     if mhchem == "auto":
 864         i = 0
 865         while True:
 866             i = find_token(document.body, "\\begin_inset Formula", i)
 867             if i == -1:
 868                break
 869             line = document.body[i]
 870             if line.find("\\ce{") != -1 or line.find("\\cf{") != -1:
 871               mhchem = "on"
 872               break
 873             i += 1
 874
 875     if mhchem == "on":
 876         pre = ["\\PassOptionsToPackage{version=3}{mhchem}",
 877           "\\usepackage{mhchem}"]
 878         insert_to_preamble(document, pre)
 879
 880
 881 def revert_fontenc(document):
 882     " Remove fontencoding param "
 883     if not del_token(document.header, '\\fontencoding', 0):
 884         document.warning("Malformed LyX document: Missing \\fontencoding.")
 885
 886
 887 def merge_gbrief(document):
 888     " Merge g-brief-en and g-brief-de to one class "
 889
 890     if document.textclass != "g-brief-de":
 891         if document.textclass == "g-brief-en":
 892             document.textclass = "g-brief"
 893             document.set_textclass()
 894         return
 895
 896     obsoletedby = { "Brieftext":       "Letter",
 897                     "Unterschrift":    "Signature",
 898                     "Strasse":         "Street",
 899                     "Zusatz":          "Addition",
 900                     "Ort":             "Town",
 901                     "Land":            "State",
 902                     "RetourAdresse":   "ReturnAddress",
 903                     "MeinZeichen":     "MyRef",
 904                     "IhrZeichen":      "YourRef",
 905                     "IhrSchreiben":    "YourMail",
 906                     "Telefon":         "Phone",
 907                     "BLZ":             "BankCode",
 908                     "Konto":           "BankAccount",
 909                     "Postvermerk":     "PostalComment",
 910                     "Adresse":         "Address",
 911                     "Datum":           "Date",
 912                     "Betreff":         "Reference",
 913                     "Anrede":          "Opening",
 914                     "Anlagen":         "Encl.",
 915                     "Verteiler":       "cc",
 916                     "Gruss":           "Closing"}
 917     i = 0
 918     while True:
 919         i = find_token(document.body, "\\begin_layout", i)
 920         if i == -1:
 921             break
 922
 923         layout = document.body[i][14:]
 924         if layout in obsoletedby:
 925             document.body[i] = "\\begin_layout " + obsoletedby[layout]
 926
 927         i += 1
 928
 929     document.textclass = "g-brief"
 930     document.set_textclass()
 931
 932
 933 def revert_gbrief(document):
 934     " Revert g-brief to g-brief-en "
 935     if document.textclass == "g-brief":
 936         document.textclass = "g-brief-en"
 937         document.set_textclass()
 938
 939
 940 def revert_html_options(document):
 941     " Remove html options "
 942     del_token(document.header, '\\html_use_mathml', 0)
 943     del_token(document.header, '\\html_be_strict', 0)
 944
 945
 946 def revert_includeonly(document):
 947     i = 0
 948     while True:
 949         i = find_token(document.header, "\\begin_includeonly", i)
 950         if i == -1:
 951             return
 952         j = find_end_of(document.header, i, "\\begin_includeonly", "\\end_includeonly")
 953         if j == -1:
 954             document.warning("Unable to find end of includeonly section!!")
 955             break
 956         document.header[i : j + 1] = []
 957
 958
 959 def revert_includeall(document):
 960     " Remove maintain_unincluded_children param "
 961     del_token(document.header, '\\maintain_unincluded_children', 0)
 962
 963
 964 def revert_multirow(document):
 965     " Revert multirow cells in tables to TeX-code"
 966
 967     # first, let's find out if we need to do anything
 968     # cell type 3 is multirow begin cell
 969     i = find_token(document.body, '<cell multirow="3"', 0)
 970     if i == -1:
 971       return
 972
 973     add_to_preamble(document, ["\\usepackage{multirow}"])
 974
 975     begin_table = 0
 976     while True:
 977         # find begin/end of table
 978         begin_table = find_token(document.body, '<lyxtabular version=', begin_table)
 979         if begin_table == -1:
 980             break
 981         end_table = find_end_of(document.body, begin_table, '<lyxtabular', '</lyxtabular>')
 982         if end_table == -1:
 983             document.warning("Malformed LyX document: Could not find end of table.")
 984             begin_table += 1
 985             continue
 986         # does this table have multirow?
 987         i = find_token(document.body, '<cell multirow="3"', begin_table, end_table)
 988         if i == -1:
 989             begin_table = end_table
 990             continue
 991
 992         # store the number of rows and columns
 993         numrows = get_option_value(document.body[begin_table], "rows")
 994         numcols = get_option_value(document.body[begin_table], "columns")
 995         try:
 996           numrows = int(numrows)
 997           numcols = int(numcols)
 998         except:
 999           document.warning("Unable to determine rows and columns!")
1000           begin_table = end_table
1001           continue
1002
1003         mrstarts = []
1004         multirows = []
1005         # collect info on rows and columns of this table.
1006         begin_row = begin_table
1007         for row in range(numrows):
1008             begin_row = find_token(document.body, '<row>', begin_row, end_table)
1009             if begin_row == -1:
1010               document.warning("Can't find row " + str(row + 1))
1011               break
1012             end_row = find_end_of(document.body, begin_row, '<row>', '</row>')
1013             if end_row == -1:
1014               document.warning("Can't find end of row " + str(row + 1))
1015               break
1016             begin_cell = begin_row
1017             multirows.append([])
1018             for column in range(numcols):
1019                 begin_cell = find_token(document.body, '<cell ', begin_cell, end_row)
1020                 if begin_cell == -1:
1021                   document.warning("Can't find column " + str(column + 1) + \
1022                     "in row " + str(row + 1))
1023                   break
1024                 # NOTE
1025                 # this will fail if someone puts "</cell>" in a cell, but
1026                 # that seems fairly unlikely.
1027                 end_cell = find_end_of(document.body, begin_cell, '<cell', '</cell>')
1028                 if end_cell == -1:
1029                   document.warning("Can't find end of column " + str(column + 1) + \
1030                     "in row " + str(row + 1))
1031                   break
1032                 multirows[row].append([begin_cell, end_cell, 0])
1033                 if document.body[begin_cell].find('multirow="3"') != -1:
1034                   multirows[row][column][2] = 3 # begin multirow
1035                   mrstarts.append([row, column])
1036                 elif document.body[begin_cell].find('multirow="4"') != -1:
1037                   multirows[row][column][2] = 4 # in multirow
1038                 begin_cell = end_cell
1039             begin_row = end_row
1040         # end of table info collection
1041
1042         # work from the back to avoid messing up numbering
1043         mrstarts.reverse()
1044         for m in mrstarts:
1045             row = m[0]
1046             col = m[1]
1047             # get column width
1048             col_width = get_option_value(document.body[begin_table + 2 + col], "width")
1049             # "0pt" means that no width is specified
1050             if not col_width or col_width == "0pt":
1051               col_width = "*"
1052             # determine the number of cells that are part of the multirow
1053             nummrs = 1
1054             for r in range(row + 1, numrows):
1055                 if multirows[r][col][2] != 4:
1056                   break
1057                 nummrs += 1
1058                 # take the opportunity to revert this line
1059                 lineno = multirows[r][col][0]
1060                 document.body[lineno] = document.body[lineno].\
1061                   replace(' multirow="4" ', ' ').\
1062                   replace('valignment="middle"', 'valignment="top"').\
1063                   replace(' topline="true" ', ' ')
1064                 # remove bottom line of previous multirow-part cell
1065                 lineno = multirows[r-1][col][0]
1066                 document.body[lineno] = document.body[lineno].replace(' bottomline="true" ', ' ')
1067             # revert beginning cell
1068             bcell = multirows[row][col][0]
1069             ecell = multirows[row][col][1]
1070             document.body[bcell] = document.body[bcell].\
1071               replace(' multirow="3" ', ' ').\
1072               replace('valignment="middle"', 'valignment="top"')
1073             blay = find_token(document.body, "\\begin_layout", bcell, ecell)
1074             if blay == -1:
1075               document.warning("Can't find layout for cell!")
1076               continue
1077             bend = find_end_of_layout(document.body, blay)
1078             if bend == -1:
1079               document.warning("Can't find end of layout for cell!")
1080               continue
1081             # do the later one first, so as not to mess up the numbering
1082             # we are wrapping the whole cell in this ert
1083             # so before the end of the layout...
1084             document.body[bend:bend] = put_cmd_in_ert("}")
1085             # ...and after the beginning
1086             document.body[blay + 1:blay + 1] = \
1087               put_cmd_in_ert("\\multirow{" + str(nummrs) + "}{" + col_width + "}{")
1088
1089         begin_table = end_table
1090
1091
1092 def convert_math_output(document):
1093     " Convert \html_use_mathml to \html_math_output "
1094     i = find_token(document.header, "\\html_use_mathml", 0)
1095     if i == -1:
1096         return
1097     rgx = re.compile(r'\\html_use_mathml\s+(\w+)')
1098     m = rgx.match(document.header[i])
1099     newval = "0" # MathML
1100     if m:
1101       val = str2bool(m.group(1))
1102       if not val:
1103         newval = "2" # Images
1104     else:
1105       document.warning("Can't match " + document.header[i])
1106     document.header[i] = "\\html_math_output " + newval
1107
1108
1109 def revert_math_output(document):
1110     " Revert \html_math_output to \html_use_mathml "
1111     i = find_token(document.header, "\\html_math_output", 0)
1112     if i == -1:
1113         return
1114     rgx = re.compile(r'\\html_math_output\s+(\d)')
1115     m = rgx.match(document.header[i])
1116     newval = "true"
1117     if m:
1118         val = m.group(1)
1119         if val == "1" or val == "2":
1120             newval = "false"
1121     else:
1122         document.warning("Unable to match " + document.header[i])
1123     document.header[i] = "\\html_use_mathml " + newval
1124
1125
1126
1127 def revert_inset_preview(document):
1128     " Dissolves the preview inset "
1129     i = 0
1130     while True:
1131       i = find_token(document.body, "\\begin_inset Preview", i)
1132       if i == -1:
1133           return
1134       iend = find_end_of_inset(document.body, i)
1135       if iend == -1:
1136           document.warning("Malformed LyX document: Could not find end of Preview inset.")
1137           i += 1
1138           continue
1139
1140       # This has several issues.
1141       # We need to do something about the layouts inside InsetPreview.
1142       # If we just leave the first one, then we have something like:
1143       # \begin_layout Standard
1144       # ...
1145       # \begin_layout Standard
1146       # and we get a "no \end_layout" error. So something has to be done.
1147       # Ideally, we would check if it is the same as the layout we are in.
1148       # If so, we just remove it; if not, we end the active one. But it is
1149       # not easy to know what layout we are in, due to depth changes, etc,
1150       # and it is not clear to me how much work it is worth doing. In most
1151       # cases, the layout will probably be the same.
1152       #
1153       # For the same reason, we have to remove the \end_layout tag at the
1154       # end of the last layout in the inset. Again, that will sometimes be
1155       # wrong, but it will usually be right. To know what to do, we would
1156       # again have to know what layout the inset is in.
1157
1158       blay = find_token(document.body, "\\begin_layout", i, iend)
1159       if blay == -1:
1160           document.warning("Can't find layout for preview inset!")
1161           # always do the later one first...
1162           del document.body[iend]
1163           del document.body[i]
1164           # deletions mean we do not need to reset i
1165           continue
1166
1167       # This is where we would check what layout we are in.
1168       # The check for Standard is definitely wrong.
1169       #
1170       # lay = document.body[blay].split(None, 1)[1]
1171       # if lay != oldlayout:
1172       #     # record a boolean to tell us what to do later....
1173       #     # better to do it later, since (a) it won't mess up
1174       #     # the numbering and (b) we only modify at the end.
1175
1176       # we want to delete the last \\end_layout in this inset, too.
1177       # note that this may not be the \\end_layout that goes with blay!!
1178       bend = find_end_of_layout(document.body, blay)
1179       while True:
1180           tmp = find_token(document.body, "\\end_layout", bend + 1, iend)
1181           if tmp == -1:
1182               break
1183           bend = tmp
1184       if bend == blay:
1185           document.warning("Unable to find last layout in preview inset!")
1186           del document.body[iend]
1187           del document.body[i]
1188           # deletions mean we do not need to reset i
1189           continue
1190       # always do the later one first...
1191       del document.body[iend]
1192       del document.body[bend]
1193       del document.body[i:blay + 1]
1194       # we do not need to reset i
1195
1196
1197 def revert_equalspacing_xymatrix(document):
1198     " Revert a Formula with xymatrix@! to an ERT inset "
1199     i = 0
1200     has_preamble = False
1201     has_equal_spacing = False
1202
1203     while True:
1204       i = find_token(document.body, "\\begin_inset Formula", i)
1205       if i == -1:
1206           break
1207       j = find_end_of_inset(document.body, i)
1208       if j == -1:
1209           document.warning("Malformed LyX document: Could not find end of Formula inset.")
1210           i += 1
1211           continue
1212
1213       for curline in range(i,j):
1214           found = document.body[curline].find("\\xymatrix@!")
1215           if found != -1:
1216               break
1217
1218       if found != -1:
1219           has_equal_spacing = True
1220           content = [document.body[i][21:]]
1221           content += document.body[i + 1:j]
1222           subst = put_cmd_in_ert(content)
1223           document.body[i:j + 1] = subst
1224           i += len(subst) - (j - i) + 1
1225       else:
1226           for curline in range(i,j):
1227               l = document.body[curline].find("\\xymatrix")
1228               if l != -1:
1229                   has_preamble = True;
1230                   break;
1231           i = j + 1
1232
1233     if has_equal_spacing and not has_preamble:
1234         add_to_preamble(document, ['\\usepackage[all]{xy}'])
1235
1236
1237 def revert_notefontcolor(document):
1238     " Reverts greyed-out note font color to preamble code "
1239
1240     i = find_token(document.header, "\\notefontcolor", 0)
1241     if i == -1:
1242         return
1243
1244     colorcode = get_value(document.header, '\\notefontcolor', i)
1245     del document.header[i]
1246
1247     # are there any grey notes?
1248     if find_token(document.body, "\\begin_inset Note Greyedout", 0) == -1:
1249         # no need to do anything else, and \renewcommand will throw
1250         # an error since lyxgreyedout will not exist.
1251         return
1252
1253     # the color code is in the form #rrggbb where every character denotes a hex number
1254     red = hex2ratio(colorcode[1:3])
1255     green = hex2ratio(colorcode[3:5])
1256     blue = hex2ratio(colorcode[5:7])
1257     # write the preamble
1258     insert_to_preamble(document,
1259       [ '%  for greyed-out notes',
1260         '\\@ifundefined{definecolor}{\\usepackage{color}}{}'
1261         '\\definecolor{note_fontcolor}{rgb}{%s,%s,%s}' % (red, green, blue),
1262         '\\renewenvironment{lyxgreyedout}',
1263         ' {\\textcolor{note_fontcolor}\\bgroup}{\\egroup}'])
1264
1265
1266 def revert_turkmen(document):
1267     "Set language Turkmen to English"
1268
1269     if document.language == "turkmen":
1270         document.language = "english"
1271         i = find_token(document.header, "\\language", 0)
1272         if i != -1:
1273             document.header[i] = "\\language english"
1274
1275     j = 0
1276     while True:
1277         j = find_token(document.body, "\\lang turkmen", j)
1278         if j == -1:
1279             return
1280         document.body[j] = document.body[j].replace("\\lang turkmen", "\\lang english")
1281         j += 1
1282
1283
1284 def revert_fontcolor(document):
1285     " Reverts font color to preamble code "
1286     i = find_token(document.header, "\\fontcolor", 0)
1287     if i == -1:
1288         return
1289     colorcode = get_value(document.header, '\\fontcolor', i)
1290     del document.header[i]
1291     # don't clutter the preamble if font color is not set
1292     if colorcode == "#000000":
1293         return
1294     # the color code is in the form #rrggbb where every character denotes a hex number
1295     red = hex2ratio(colorcode[1:3])
1296     green = hex2ratio(colorcode[3:5])
1297     blue = hex2ratio(colorcode[5:7])
1298     # write the preamble
1299     insert_to_preamble(document,
1300       ['%  Set the font color',
1301       '\\@ifundefined{definecolor}{\\usepackage{color}}{}',
1302       '\\definecolor{document_fontcolor}{rgb}{%s,%s,%s}' % (red, green, blue),
1303       '\\color{document_fontcolor}'])
1304
1305
1306 def revert_shadedboxcolor(document):
1307     " Reverts shaded box color to preamble code "
1308     i = find_token(document.header, "\\boxbgcolor", 0)
1309     if i == -1:
1310         return
1311     colorcode = get_value(document.header, '\\boxbgcolor', i)
1312     del document.header[i]
1313     # the color code is in the form #rrggbb
1314     red = hex2ratio(colorcode[1:3])
1315     green = hex2ratio(colorcode[3:5])
1316     blue = hex2ratio(colorcode[5:7])
1317     # write the preamble
1318     insert_to_preamble(document,
1319       ['%  Set the color of boxes with shaded background',
1320       '\\@ifundefined{definecolor}{\\usepackage{color}}{}',
1321       "\\definecolor{shadecolor}{rgb}{%s,%s,%s}" % (red, green, blue)])
1322
1323
1324 def revert_lyx_version(document):
1325     " Reverts LyX Version information from Inset Info "
1326     version = "LyX version"
1327     try:
1328         import lyx2lyx_version
1329         version = lyx2lyx_version.version
1330     except:
1331         pass
1332
1333     i = 0
1334     while True:
1335         i = find_token(document.body, '\\begin_inset Info', i)
1336         if i == -1:
1337             return
1338         j = find_end_of_inset(document.body, i + 1)
1339         if j == -1:
1340             document.warning("Malformed LyX document: Could not find end of Info inset.")
1341             i += 1
1342             continue
1343
1344         # We expect:
1345         # \begin_inset Info
1346         # type  "lyxinfo"
1347         # arg   "version"
1348         # \end_inset
1349         typ = get_quoted_value(document.body, "type", i, j)
1350         arg = get_quoted_value(document.body, "arg", i, j)
1351         if arg != "version" or typ != "lyxinfo":
1352             i = j + 1
1353             continue
1354
1355         # We do not actually know the version of LyX used to produce the document.
1356         # But we can use our version, since we are reverting.
1357         s = [version]
1358         # Now we want to check if the line after "\end_inset" is empty. It normally
1359         # is, so we want to remove it, too.
1360         lastline = j + 1
1361         if document.body[j + 1].strip() == "":
1362             lastline = j + 2
1363         document.body[i: lastline] = s
1364         i = i + 1
1365
1366
1367 def revert_math_scale(document):
1368   " Remove math scaling and LaTeX options "
1369   del_token(document.header, '\\html_math_img_scale', 0)
1370   del_token(document.header, '\\html_latex_start', 0)
1371   del_token(document.header, '\\html_latex_end', 0)
1372
1373
1374 def revert_pagesizes(document):
1375   " Revert page sizes to default "
1376   i = find_token(document.header, '\\papersize', 0)
1377   if i != -1:
1378     size = document.header[i][11:]
1379     if size == "a0paper" or size == "a1paper" or size == "a2paper" \
1380     or size == "a6paper" or size == "b0paper" or size == "b1paper" \
1381     or size == "b2paper" or size == "b6paper" or size == "b0j" \
1382     or size == "b1j" or size == "b2j" or size == "b3j" or size == "b4j" \
1383     or size == "b5j" or size == "b6j":
1384       del document.header[i]
1385
1386
1387 def revert_DIN_C_pagesizes(document):
1388   " Revert DIN C page sizes to default "
1389   i = find_token(document.header, '\\papersize', 0)
1390   if i != -1:
1391     size = document.header[i][11:]
1392     if size == "c0paper" or size == "c1paper" or size == "c2paper" \
1393     or size == "c3paper" or size == "c4paper" or size == "c5paper" \
1394     or size == "c6paper":
1395       del document.header[i]
1396
1397
1398 def convert_html_quotes(document):
1399   " Remove quotes around html_latex_start and html_latex_end "
1400
1401   i = find_token(document.header, '\\html_latex_start', 0)
1402   if i != -1:
1403     line = document.header[i]
1404     l = re.compile(r'\\html_latex_start\s+"(.*)"')
1405     m = l.match(line)
1406     if m:
1407       document.header[i] = "\\html_latex_start " + m.group(1)
1408
1409   i = find_token(document.header, '\\html_latex_end', 0)
1410   if i != -1:
1411     line = document.header[i]
1412     l = re.compile(r'\\html_latex_end\s+"(.*)"')
1413     m = l.match(line)
1414     if m:
1415       document.header[i] = "\\html_latex_end " + m.group(1)
1416
1417
1418 def revert_html_quotes(document):
1419   " Remove quotes around html_latex_start and html_latex_end "
1420
1421   i = find_token(document.header, '\\html_latex_start', 0)
1422   if i != -1:
1423     line = document.header[i]
1424     l = re.compile(r'\\html_latex_start\s+(.*)')
1425     m = l.match(line)
1426     if not m:
1427         document.warning("Weird html_latex_start line: " + line)
1428         del document.header[i]
1429     else:
1430         document.header[i] = "\\html_latex_start \"" + m.group(1) + "\""
1431
1432   i = find_token(document.header, '\\html_latex_end', 0)
1433   if i != -1:
1434     line = document.header[i]
1435     l = re.compile(r'\\html_latex_end\s+(.*)')
1436     m = l.match(line)
1437     if not m:
1438         document.warning("Weird html_latex_end line: " + line)
1439         del document.header[i]
1440     else:
1441         document.header[i] = "\\html_latex_end \"" + m.group(1) + "\""
1442
1443
1444 def revert_output_sync(document):
1445   " Remove forward search options "
1446   del_token(document.header, '\\output_sync_macro', 0)
1447   del_token(document.header, '\\output_sync', 0)
1448
1449
1450 def revert_align_decimal(document):
1451   i = 0
1452   while True:
1453     i = find_token(document.body, "\\begin_inset Tabular", i)
1454     if i == -1:
1455       return
1456     j = find_end_of_inset(document.body, i)
1457     if j == -1:
1458       document.warning("Unable to find end of Tabular inset at line " + str(i))
1459       i += 1
1460       continue
1461     cell = find_token(document.body, "<cell", i, j)
1462     if cell == -1:
1463       document.warning("Can't find any cells in Tabular inset at line " + str(i))
1464       i = j
1465       continue
1466     k = i + 1
1467     while True:
1468       k = find_token(document.body, "<column", k, cell)
1469       if k == -1:
1470         return
1471       if document.body[k].find('alignment="decimal"') == -1:
1472         k += 1
1473         continue
1474       remove_option(document.body, k, 'decimal_point')
1475       document.body[k] = \
1476         document.body[k].replace('alignment="decimal"', 'alignment="center"')
1477       k += 1
1478
1479
1480 def convert_optarg(document):
1481   " Convert \\begin_inset OptArg to \\begin_inset Argument "
1482   i = 0
1483   while True:
1484     i = find_token(document.body, '\\begin_inset OptArg', i)
1485     if i == -1:
1486       return
1487     document.body[i] = "\\begin_inset Argument"
1488     i += 1
1489
1490
1491 def revert_argument(document):
1492   " Convert \\begin_inset Argument to \\begin_inset OptArg "
1493   i = 0
1494   while True:
1495     i = find_token(document.body, '\\begin_inset Argument', i)
1496     if i == -1:
1497       return
1498     document.body[i] = "\\begin_inset OptArg"
1499     i += 1
1500
1501
1502 def revert_makebox(document):
1503   " Convert \\makebox to TeX code "
1504   i = 0
1505   while True:
1506     i = find_token(document.body, '\\begin_inset Box', i)
1507     if i == -1:
1508       break
1509     z = find_end_of_inset(document.body, i)
1510     if z == -1:
1511       document.warning("Malformed LyX document: Can't find end of box inset.")
1512       i += 1
1513       continue
1514     blay = find_token(document.body, "\\begin_layout", i, z)
1515     if blay == -1:
1516       document.warning("Malformed LyX document: Can't find layout in box.")
1517       i = z
1518       continue
1519     # by looking before the layout we make sure we're actually finding
1520     # an option, not text.
1521     j = find_token(document.body, 'use_makebox', i, blay)
1522     if j == -1:
1523         i = z
1524         continue
1525
1526     if not check_token(document.body[i], "\\begin_inset Box Frameless") \
1527       or get_value(document.body, 'use_makebox', j) != 1:
1528         del document.body[j]
1529         i = z
1530         continue
1531     bend = find_end_of_layout(document.body, blay)
1532     if bend == -1 or bend > z:
1533         document.warning("Malformed LyX document: Can't find end of layout in box.")
1534         i = z
1535         continue
1536     # determine the alignment
1537     align = get_quoted_value(document.body, 'hor_pos', i, blay, "c")
1538     # determine the width
1539     length = get_quoted_value(document.body, 'width', i, blay, "50col%")
1540     length = latex_length(length)[1]
1541     # remove the \end_layout \end_inset pair
1542     document.body[bend:z + 1] = put_cmd_in_ert("}")
1543     subst = "\\makebox[" + length + "][" \
1544       + align + "]{"
1545     document.body[i:blay + 1] = put_cmd_in_ert(subst)
1546     i += 1
1547
1548
1549 def convert_use_makebox(document):
1550   " Adds use_makebox option for boxes "
1551   i = 0
1552   while True:
1553     i = find_token(document.body, '\\begin_inset Box', i)
1554     if i == -1:
1555       return
1556     # all of this is to make sure we actually find the use_parbox
1557     # that is an option for this box, not some text elsewhere.
1558     z = find_end_of_inset(document.body, i)
1559     if z == -1:
1560       document.warning("Can't find end of box inset!!")
1561       i += 1
1562       continue
1563     blay = find_token(document.body, "\\begin_layout", i, z)
1564     if blay == -1:
1565       document.warning("Can't find layout in box inset!!")
1566       i = z
1567       continue
1568     # so now we are looking for use_parbox before the box's layout
1569     k = find_token(document.body, 'use_parbox', i, blay)
1570     if k == -1:
1571       document.warning("Malformed LyX document: Can't find use_parbox statement in box.")
1572       i = z
1573       continue
1574     document.body.insert(k + 1, "use_makebox 0")
1575     i = blay + 1 # not z + 1 (box insets may be nested)
1576
1577
1578 def revert_IEEEtran(document):
1579   " Convert IEEEtran layouts and styles to TeX code "
1580
1581   if document.textclass != "IEEEtran":
1582     return
1583
1584   revert_flex_inset(document.body, "IEEE membership", "\\IEEEmembership")
1585   revert_flex_inset(document.body, "Lowercase", "\\MakeLowercase")
1586
1587   layouts = ("Special Paper Notice", "After Title Text", "Publication ID",
1588              "Page headings", "Biography without photo")
1589   latexcmd = {"Special Paper Notice": "\\IEEEspecialpapernotice",
1590               "After Title Text":     "\\IEEEaftertitletext",
1591               "Publication ID":       "\\IEEEpubid"}
1592   obsoletedby = {"Page headings":            "MarkBoth",
1593                  "Biography without photo":  "BiographyNoPhoto"}
1594
1595   for layout in layouts:
1596     i = 0
1597     while True:
1598         i = find_token(document.body, '\\begin_layout ' + layout, i)
1599         if i == -1:
1600           break
1601         j = find_end_of_layout(document.body, i)
1602         if j == -1:
1603           document.warning("Malformed LyX document: Can't find end of " + layout + " layout.")
1604           i += 1
1605           continue
1606         if layout in list(obsoletedby.keys()):
1607           document.body[i] = "\\begin_layout " + obsoletedby[layout]
1608           i = j
1609           continue
1610         content = lyx2latex(document, document.body[i:j + 1])
1611         add_to_preamble(document, [latexcmd[layout] + "{" + content + "}"])
1612         del document.body[i:j + 1]
1613         # no need to reset i
1614
1615
1616 def convert_prettyref(document):
1617         " Converts prettyref references to neutral formatted refs "
1618         re_ref = re.compile("^\s*reference\s+\"(\w+):(\S+)\"")
1619         nm_ref = re.compile("^\s*name\s+\"(\w+):(\S+)\"")
1620
1621         i = 0
1622         while True:
1623                 i = find_token(document.body, "\\begin_inset CommandInset ref", i)
1624                 if i == -1:
1625                         break
1626                 j = find_end_of_inset(document.body, i)
1627                 if j == -1:
1628                         document.warning("Malformed LyX document: No end of InsetRef!")
1629                         i += 1
1630                         continue
1631                 k = find_token(document.body, "LatexCommand prettyref", i, j)
1632                 if k != -1:
1633                         document.body[k] = "LatexCommand formatted"
1634                 i = j + 1
1635         document.header.insert(-1, "\\use_refstyle 0")
1636
1637
1638 def revert_refstyle(document):
1639         " Reverts neutral formatted refs to prettyref "
1640         re_ref = re.compile("^reference\s+\"(\w+):(\S+)\"")
1641         nm_ref = re.compile("^\s*name\s+\"(\w+):(\S+)\"")
1642
1643         i = 0
1644         while True:
1645                 i = find_token(document.body, "\\begin_inset CommandInset ref", i)
1646                 if i == -1:
1647                         break
1648                 j = find_end_of_inset(document.body, i)
1649                 if j == -1:
1650                         document.warning("Malformed LyX document: No end of InsetRef")
1651                         i += 1
1652                         continue
1653                 k = find_token(document.body, "LatexCommand formatted", i, j)
1654                 if k != -1:
1655                         document.body[k] = "LatexCommand prettyref"
1656                 i = j + 1
1657         i = find_token(document.header, "\\use_refstyle", 0)
1658         if i != -1:
1659                 document.header.pop(i)
1660
1661
1662 def revert_nameref(document):
1663   " Convert namerefs to regular references "
1664   cmds = ["Nameref", "nameref"]
1665   foundone = False
1666   rx = re.compile(r'reference "(.*)"')
1667   for cmd in cmds:
1668     i = 0
1669     oldcmd = "LatexCommand " + cmd
1670     while True:
1671       # It seems better to look for this, as most of the reference
1672       # insets won't be ones we care about.
1673       i = find_token(document.body, oldcmd, i)
1674       if i == -1:
1675         break
1676       cmdloc = i
1677       i += 1
1678       # Make sure it is actually in an inset!
1679       # A normal line could begin with "LatexCommand nameref"!
1680       val = is_in_inset(document.body, cmdloc, \
1681           "\\begin_inset CommandInset ref")
1682       if not val:
1683           continue
1684       stins, endins = val
1685
1686       # ok, so it is in an InsetRef
1687       refline = find_token(document.body, "reference", stins, endins)
1688       if refline == -1:
1689         document.warning("Can't find reference for inset at line " + stinst + "!!")
1690         continue
1691       m = rx.match(document.body[refline])
1692       if not m:
1693         document.warning("Can't match reference line: " + document.body[ref])
1694         continue
1695       foundone = True
1696       ref = m.group(1)
1697       newcontent = put_cmd_in_ert('\\' + cmd + '{' + ref + '}')
1698       document.body[stins:endins + 1] = newcontent
1699
1700   if foundone:
1701     add_to_preamble(document, ["\\usepackage{nameref}"])
1702
1703
1704 def remove_Nameref(document):
1705   " Convert Nameref commands to nameref commands "
1706   i = 0
1707   while True:
1708     # It seems better to look for this, as most of the reference
1709     # insets won't be ones we care about.
1710     i = find_token(document.body, "LatexCommand Nameref" , i)
1711     if i == -1:
1712       break
1713     cmdloc = i
1714     i += 1
1715
1716     # Make sure it is actually in an inset!
1717     val = is_in_inset(document.body, cmdloc, \
1718         "\\begin_inset CommandInset ref")
1719     if not val:
1720       continue
1721     document.body[cmdloc] = "LatexCommand nameref"
1722
1723
1724 def revert_mathrsfs(document):
1725     " Load mathrsfs if \mathrsfs us use in the document "
1726     i = 0
1727     for line in document.body:
1728       if line.find("\\mathscr{") != -1:
1729         add_to_preamble(document, ["\\usepackage{mathrsfs}"])
1730         return
1731
1732
1733 def convert_flexnames(document):
1734     "Convert \\begin_inset Flex Custom:Style to \\begin_inset Flex Style and similarly for CharStyle and Element."
1735
1736     i = 0
1737     rx = re.compile(r'^\\begin_inset Flex (?:Custom|CharStyle|Element):(.+)$')
1738     while True:
1739       i = find_token(document.body, "\\begin_inset Flex", i)
1740       if i == -1:
1741         return
1742       m = rx.match(document.body[i])
1743       if m:
1744         document.body[i] = "\\begin_inset Flex " + m.group(1)
1745       i += 1
1746
1747
1748 flex_insets = {
1749   "Alert" : "CharStyle:Alert",
1750   "Code" : "CharStyle:Code",
1751   "Concepts" : "CharStyle:Concepts",
1752   "E-Mail" : "CharStyle:E-Mail",
1753   "Emph" : "CharStyle:Emph",
1754   "Expression" : "CharStyle:Expression",
1755   "Initial" : "CharStyle:Initial",
1756   "Institute" : "CharStyle:Institute",
1757   "Meaning" : "CharStyle:Meaning",
1758   "Noun" : "CharStyle:Noun",
1759   "Strong" : "CharStyle:Strong",
1760   "Structure" : "CharStyle:Structure",
1761   "ArticleMode" : "Custom:ArticleMode",
1762   "Endnote" : "Custom:Endnote",
1763   "Glosse" : "Custom:Glosse",
1764   "PresentationMode" : "Custom:PresentationMode",
1765   "Tri-Glosse" : "Custom:Tri-Glosse"
1766 }
1767
1768 flex_elements = {
1769   "Abbrev" : "Element:Abbrev",
1770   "CCC-Code" : "Element:CCC-Code",
1771   "Citation-number" : "Element:Citation-number",
1772   "City" : "Element:City",
1773   "Code" : "Element:Code",
1774   "CODEN" : "Element:CODEN",
1775   "Country" : "Element:Country",
1776   "Day" : "Element:Day",
1777   "Directory" : "Element:Directory",
1778   "Dscr" : "Element:Dscr",
1779   "Email" : "Element:Email",
1780   "Emph" : "Element:Emph",
1781   "Filename" : "Element:Filename",
1782   "Firstname" : "Element:Firstname",
1783   "Fname" : "Element:Fname",
1784   "GuiButton" : "Element:GuiButton",
1785   "GuiMenu" : "Element:GuiMenu",
1786   "GuiMenuItem" : "Element:GuiMenuItem",
1787   "ISSN" : "Element:ISSN",
1788   "Issue-day" : "Element:Issue-day",
1789   "Issue-months" : "Element:Issue-months",
1790   "Issue-number" : "Element:Issue-number",
1791   "KeyCap" : "Element:KeyCap",
1792   "KeyCombo" : "Element:KeyCombo",
1793   "Keyword" : "Element:Keyword",
1794   "Literal" : "Element:Literal",
1795   "MenuChoice" : "Element:MenuChoice",
1796   "Month" : "Element:Month",
1797   "Orgdiv" : "Element:Orgdiv",
1798   "Orgname" : "Element:Orgname",
1799   "Postcode" : "Element:Postcode",
1800   "SS-Code" : "Element:SS-Code",
1801   "SS-Title" : "Element:SS-Title",
1802   "State" : "Element:State",
1803   "Street" : "Element:Street",
1804   "Surname" : "Element:Surname",
1805   "Volume" : "Element:Volume",
1806   "Year" : "Element:Year"
1807 }
1808
1809
1810 def revert_flexnames(document):
1811   if document.backend == "latex":
1812     flexlist = flex_insets
1813   else:
1814     flexlist = flex_elements
1815
1816   rx = re.compile(r'^\\begin_inset Flex\s+(.+)$')
1817   i = 0
1818   while True:
1819     i = find_token(document.body, "\\begin_inset Flex", i)
1820     if i == -1:
1821       return
1822     m = rx.match(document.body[i])
1823     if not m:
1824       document.warning("Illegal flex inset: " + document.body[i])
1825       i += 1
1826       continue
1827     style = m.group(1)
1828     if style in flexlist:
1829       document.body[i] = "\\begin_inset Flex " + flexlist[style]
1830     i += 1
1831
1832
1833 def convert_mathdots(document):
1834     " Load mathdots automatically "
1835     i = find_token(document.header, "\\use_mhchem" , 0)
1836     if i == -1:
1837         i = find_token(document.header, "\\use_esint" , 0)
1838     if i == -1:
1839         document.warning("Malformed LyX document: Can't find \\use_mhchem.")
1840         return;
1841     j = find_token(document.preamble, "\\usepackage{mathdots}", 0)
1842     if j == -1:
1843         document.header.insert(i + 1, "\\use_mathdots 0")
1844     else:
1845         document.header.insert(i + 1, "\\use_mathdots 2")
1846         del document.preamble[j]
1847
1848
1849 def revert_mathdots(document):
1850     " Load mathdots if used in the document "
1851
1852     mathdots = find_token(document.header, "\\use_mathdots" , 0)
1853     if mathdots == -1:
1854       document.warning("No \\use_mathdots line. Assuming auto.")
1855     else:
1856       val = get_value(document.header, "\\use_mathdots", mathdots)
1857       del document.header[mathdots]
1858       try:
1859         usedots = int(val)
1860       except:
1861         document.warning("Invalid \\use_mathdots value: " + val + ". Assuming auto.")
1862         # probably usedots has not been changed, but be safe.
1863         usedots = 1
1864
1865       if usedots == 0:
1866         # do not load case
1867         return
1868       if usedots == 2:
1869         # force load case
1870         add_to_preamble(document, ["\\usepackage{mathdots}"])
1871         return
1872
1873     # so we are in the auto case. we want to load mathdots if \iddots is used.
1874     i = 0
1875     while True:
1876       i = find_token(document.body, '\\begin_inset Formula', i)
1877       if i == -1:
1878         return
1879       j = find_end_of_inset(document.body, i)
1880       if j == -1:
1881         document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(i))
1882         i += 1
1883         continue
1884       code = "\n".join(document.body[i:j])
1885       if code.find("\\iddots") != -1:
1886         add_to_preamble(document, ["\\@ifundefined{iddots}{\\usepackage{mathdots}}"])
1887         return
1888       i = j
1889
1890
1891 def convert_rule(document):
1892     " Convert \\lyxline to CommandInset line. "
1893     i = 0
1894
1895     inset = ['\\begin_inset CommandInset line',
1896       'LatexCommand rule',
1897       'offset "0.5ex"',
1898       'width "100line%"',
1899       'height "1pt"', '',
1900       '\\end_inset', '', '']
1901
1902     # if paragraphs are indented, we may have to unindent to get the
1903     # line to be full-width.
1904     indent = get_value(document.header, "\\paragraph_separation", 0)
1905     have_indent = (indent == "indent")
1906
1907     while True:
1908       i = find_token(document.body, "\\lyxline" , i)
1909       if i == -1:
1910         return
1911
1912       # we need to find out if this line follows other content
1913       # in its paragraph. find its layout....
1914       lastlay = find_token_backwards(document.body, "\\begin_layout", i)
1915       if lastlay == -1:
1916         document.warning("Can't find layout for line at " + str(i))
1917         # do the best we can.
1918         document.body[i:i+1] = inset
1919         i += len(inset)
1920         continue
1921
1922       # ...and look for other content before it.
1923       lineisfirst = True
1924       for line in document.body[lastlay + 1:i]:
1925         # is it empty or a paragraph option?
1926         if not line or line[0] == '\\':
1927           continue
1928         lineisfirst = False
1929         break
1930
1931       if lineisfirst:
1932         document.body[i:i+1] = inset
1933         if indent:
1934           # we need to unindent, lest the line be too long
1935           document.body.insert(lastlay + 1, "\\noindent")
1936         i += len(inset)
1937       else:
1938         # so our line is in the middle of a paragraph
1939         # we need to add a new line, lest this line follow the
1940         # other content on that line and run off the side of the page
1941         document.body[i:i+1] = inset
1942         document.body[i:i] = ["\\begin_inset Newline newline", "\\end_inset", ""]
1943       i += len(inset)
1944
1945
1946 def revert_rule(document):
1947     " Revert line insets to Tex code "
1948     i = 0
1949     while True:
1950       i = find_token(document.body, "\\begin_inset CommandInset line" , i)
1951       if i == -1:
1952         return
1953       # find end of inset
1954       j = find_token(document.body, "\\end_inset" , i)
1955       if j == -1:
1956         document.warning("Malformed LyX document: Can't find end of line inset.")
1957         return
1958       # determine the optional offset
1959       offset = get_quoted_value(document.body, 'offset', i, j)
1960       if offset:
1961         offset = '[' + offset + ']'
1962       # determine the width
1963       width = get_quoted_value(document.body, 'width', i, j, "100col%")
1964       width = latex_length(width)[1]
1965       # determine the height
1966       height = get_quoted_value(document.body, 'height', i, j, "1pt")
1967       height = latex_length(height)[1]
1968       # output the \rule command
1969       subst = "\\rule[" + offset + "]{" + width + "}{" + height + "}"
1970       document.body[i:j + 1] = put_cmd_in_ert(subst)
1971       i += len(subst) - (j - i)
1972
1973
1974 def revert_diagram(document):
1975   " Add the feyn package if \\Diagram is used in math "
1976   i = 0
1977   while True:
1978     i = find_token(document.body, '\\begin_inset Formula', i)
1979     if i == -1:
1980       return
1981     j = find_end_of_inset(document.body, i)
1982     if j == -1:
1983         document.warning("Malformed LyX document: Can't find end of Formula inset.")
1984         return
1985     lines = "\n".join(document.body[i:j])
1986     if lines.find("\\Diagram") == -1:
1987       i = j
1988       continue
1989     add_to_preamble(document, ["\\usepackage{feyn}"])
1990     # only need to do it once!
1991     return
1992
1993 chapters = ("amsbook", "book", "docbook-book", "elsart", "extbook", "extreport",
1994     "jbook", "jreport", "jsbook", "literate-book", "literate-report", "memoir",
1995     "mwbk", "mwrep", "recipebook", "report", "scrbook", "scrreprt", "svmono",
1996     "svmult", "tbook", "treport", "tufte-book")
1997
1998 def convert_bibtex_clearpage(document):
1999   " insert a clear(double)page bibliographystyle if bibtotoc option is used "
2000
2001   if document.textclass not in chapters:
2002     return
2003
2004   i = find_token(document.header, '\\papersides', 0)
2005   sides = 0
2006   if i == -1:
2007     document.warning("Malformed LyX document: Can't find papersides definition.")
2008     document.warning("Assuming single sided.")
2009     sides = 1
2010   else:
2011     val = get_value(document.header, "\\papersides", i)
2012     try:
2013       sides = int(val)
2014     except:
2015       pass
2016     if sides != 1 and sides != 2:
2017       document.warning("Invalid papersides value: " + val)
2018       document.warning("Assuming single sided.")
2019       sides = 1
2020
2021   j = 0
2022   while True:
2023     j = find_token(document.body, "\\begin_inset CommandInset bibtex", j)
2024     if j == -1:
2025       return
2026
2027     k = find_end_of_inset(document.body, j)
2028     if k == -1:
2029       document.warning("Can't find end of Bibliography inset at line " + str(j))
2030       j += 1
2031       continue
2032
2033     # only act if there is the option "bibtotoc"
2034     val = get_value(document.body, 'options', j, k)
2035     if not val:
2036       document.warning("Can't find options for bibliography inset at line " + str(j))
2037       j = k
2038       continue
2039
2040     if val.find("bibtotoc") == -1:
2041       j = k
2042       continue
2043
2044     # so we want to insert a new page right before the paragraph that
2045     # this bibliography thing is in.
2046     lay = find_token_backwards(document.body, "\\begin_layout", j)
2047     if lay == -1:
2048       document.warning("Can't find layout containing bibliography inset at line " + str(j))
2049       j = k
2050       continue
2051
2052     if sides == 1:
2053       cmd = "clearpage"
2054     else:
2055       cmd = "cleardoublepage"
2056     subst = ['\\begin_layout Standard',
2057         '\\begin_inset Newpage ' + cmd,
2058         '\\end_inset', '', '',
2059         '\\end_layout', '']
2060     document.body[lay:lay] = subst
2061     j = k + len(subst)
2062
2063
2064 def check_passthru(document):
2065   tc = document.textclass
2066   ok = (tc == "literate-article" or tc == "literate-book" or tc == "literate-report")
2067   if not ok:
2068     mods = document.get_module_list()
2069     for mod in mods:
2070       if mod == "sweave" or mod == "noweb":
2071         ok = True
2072         break
2073   return ok
2074
2075
2076 def convert_passthru(document):
2077     " http://www.mail-archive.com/lyx-devel@lists.lyx.org/msg161298.html "
2078     if not check_passthru:
2079       return
2080
2081     rx = re.compile("\\\\begin_layout \s*(\w+)")
2082     beg = 0
2083     for lay in ["Chunk", "Scrap"]:
2084       while True:
2085         beg = find_token(document.body, "\\begin_layout " + lay, beg)
2086         if beg == -1:
2087           break
2088         end = find_end_of_layout(document.body, beg)
2089         if end == -1:
2090           document.warning("Can't find end of layout at line " + str(beg))
2091           beg += 1
2092           continue
2093
2094         # we are now going to replace newline insets within this layout
2095         # by new instances of this layout. so we have repeated layouts
2096         # instead of newlines.
2097
2098         # if the paragraph has any customization, however, we do not want to
2099         # do the replacement.
2100         if document.body[beg + 1].startswith("\\"):
2101           beg = end + 1
2102           continue
2103
2104         ns = beg
2105         while True:
2106           ns = find_token(document.body, "\\begin_inset Newline newline", ns, end)
2107           if ns == -1:
2108             break
2109           ne = find_end_of_inset(document.body, ns)
2110           if ne == -1 or ne > end:
2111             document.warning("Can't find end of inset at line " + str(nb))
2112             ns += 1
2113             continue
2114           if document.body[ne + 1] == "":
2115             ne += 1
2116           subst = ["\\end_layout", "", "\\begin_layout " + lay]
2117           document.body[ns:ne + 1] = subst
2118           # now we need to adjust end, in particular, but might as well
2119           # do ns properly, too
2120           newlines = (ne - ns) - len(subst)
2121           ns += newlines + 2
2122           end += newlines + 2
2123
2124         # ok, we now want to find out if the next layout is the
2125         # same as this one. if so, we will insert an extra copy of it
2126         didit = False
2127         next = find_token(document.body, "\\begin_layout", end)
2128         if next != -1:
2129           m = rx.match(document.body[next])
2130           if m:
2131             nextlay = m.group(1)
2132             if nextlay == lay:
2133               subst = ["\\begin_layout " + lay, "", "\\end_layout", ""]
2134               document.body[next:next] = subst
2135               didit = True
2136         beg = end + 1
2137         if didit:
2138           beg += 4 # for the extra layout
2139
2140
2141 def revert_passthru(document):
2142     " http://www.mail-archive.com/lyx-devel@lists.lyx.org/msg161298.html "
2143     if not check_passthru:
2144       return
2145     rx = re.compile("\\\\begin_layout \s*(\w+)")
2146     beg = 0
2147     for lay in ["Chunk", "Scrap"]:
2148       while True:
2149         beg = find_token(document.body, "\\begin_layout " + lay, beg)
2150         if beg == -1:
2151           break
2152         end = find_end_of_layout(document.body, beg)
2153         if end == -1:
2154           document.warning("Can't find end of layout at line " + str(beg))
2155           beg += 1
2156           continue
2157
2158         # we now want to find out if the next layout is the
2159         # same as this one. but we will need to do this over and
2160         # over again.
2161         while True:
2162           next = find_token(document.body, "\\begin_layout", end)
2163           if next == -1:
2164             break
2165           m = rx.match(document.body[next])
2166           if not m:
2167             break
2168           nextlay = m.group(1)
2169           if nextlay != lay:
2170             break
2171           # so it is the same layout again. we now want to know if it is empty.
2172           # but first let's check and make sure there is no content between the
2173           # two layouts. i'm not sure if that can happen or not.
2174           for l in range(end + 1, next):
2175             if document.body[l] != "":
2176               document.warning("Found content between adjacent " + lay + " layouts!")
2177               break
2178           nextend = find_end_of_layout(document.body, next)
2179           if nextend == -1:
2180             document.warning("Can't find end of layout at line " + str(next))
2181             break
2182           empty = True
2183           for l in range(next + 1, nextend):
2184             if document.body[l] != "":
2185               empty = False
2186               break
2187           if empty:
2188             # empty layouts just get removed
2189             # should we check if it's before yet another such layout?
2190             del document.body[next : nextend + 1]
2191             # and we do not want to check again. we know the next layout
2192             # should be another Chunk and should be left as is.
2193             break
2194           else:
2195             # if it's not empty, then we want to insert a newline in place
2196             # of the layout switch
2197             subst = ["\\begin_inset Newline newline", "\\end_inset", ""]
2198             document.body[end : next + 1] = subst
2199             # and now we have to find the end of the new, larger layout
2200             newend = find_end_of_layout(document.body, beg)
2201             if newend == -1:
2202               document.warning("Can't find end of new layout at line " + str(beg))
2203               break
2204             end = newend
2205         beg = end + 1
2206
2207
2208 def revert_multirowOffset(document):
2209     " Revert multirow cells with offset in tables to TeX-code"
2210     # this routine is the same as the revert_multirow routine except that
2211     # it checks additionally for the offset
2212
2213     # first, let's find out if we need to do anything
2214     i = find_token(document.body, '<cell multirow="3" mroffset=', 0)
2215     if i == -1:
2216       return
2217
2218     add_to_preamble(document, ["\\usepackage{multirow}"])
2219
2220     rgx = re.compile(r'mroffset="[^"]+?"')
2221     begin_table = 0
2222
2223     while True:
2224         # find begin/end of table
2225         begin_table = find_token(document.body, '<lyxtabular version=', begin_table)
2226         if begin_table == -1:
2227             break
2228         end_table = find_end_of(document.body, begin_table, '<lyxtabular', '</lyxtabular>')
2229         if end_table == -1:
2230             document.warning("Malformed LyX document: Could not find end of table.")
2231             begin_table += 1
2232             continue
2233         # does this table have multirow?
2234         i = find_token(document.body, '<cell multirow="3"', begin_table, end_table)
2235         if i == -1:
2236             begin_table = end_table
2237             continue
2238
2239         # store the number of rows and columns
2240         numrows = get_option_value(document.body[begin_table], "rows")
2241         numcols = get_option_value(document.body[begin_table], "columns")
2242         try:
2243           numrows = int(numrows)
2244           numcols = int(numcols)
2245         except:
2246           document.warning("Unable to determine rows and columns!")
2247           begin_table = end_table
2248           continue
2249
2250         mrstarts = []
2251         multirows = []
2252         # collect info on rows and columns of this table.
2253         begin_row = begin_table
2254         for row in range(numrows):
2255             begin_row = find_token(document.body, '<row>', begin_row, end_table)
2256             if begin_row == -1:
2257               document.warning("Can't find row " + str(row + 1))
2258               break
2259             end_row = find_end_of(document.body, begin_row, '<row>', '</row>')
2260             if end_row == -1:
2261               document.warning("Can't find end of row " + str(row + 1))
2262               break
2263             begin_cell = begin_row
2264             multirows.append([])
2265             for column in range(numcols):
2266                 begin_cell = find_token(document.body, '<cell ', begin_cell, end_row)
2267                 if begin_cell == -1:
2268                   document.warning("Can't find column " + str(column + 1) + \
2269                     "in row " + str(row + 1))
2270                   break
2271                 # NOTE
2272                 # this will fail if someone puts "</cell>" in a cell, but
2273                 # that seems fairly unlikely.
2274                 end_cell = find_end_of(document.body, begin_cell, '<cell', '</cell>')
2275                 if end_cell == -1:
2276                   document.warning("Can't find end of column " + str(column + 1) + \
2277                     "in row " + str(row + 1))
2278                   break
2279                 multirows[row].append([begin_cell, end_cell, 0])
2280                 if document.body[begin_cell].find('multirow="3" mroffset=') != -1:
2281                   multirows[row][column][2] = 3 # begin multirow
2282                   mrstarts.append([row, column])
2283                 elif document.body[begin_cell].find('multirow="4"') != -1:
2284                   multirows[row][column][2] = 4 # in multirow
2285                 begin_cell = end_cell
2286             begin_row = end_row
2287         # end of table info collection
2288
2289         # work from the back to avoid messing up numbering
2290         mrstarts.reverse()
2291         for m in mrstarts:
2292             row = m[0]
2293             col = m[1]
2294             # get column width
2295             col_width = get_option_value(document.body[begin_table + 2 + col], "width")
2296             # "0pt" means that no width is specified
2297             if not col_width or col_width == "0pt":
2298               col_width = "*"
2299             # determine the number of cells that are part of the multirow
2300             nummrs = 1
2301             for r in range(row + 1, numrows):
2302                 if multirows[r][col][2] != 4:
2303                   break
2304                 nummrs += 1
2305                 # take the opportunity to revert this line
2306                 lineno = multirows[r][col][0]
2307                 document.body[lineno] = document.body[lineno].\
2308                   replace(' multirow="4" ', ' ').\
2309                   replace('valignment="middle"', 'valignment="top"').\
2310                   replace(' topline="true" ', ' ')
2311                 # remove bottom line of previous multirow-part cell
2312                 lineno = multirows[r-1][col][0]
2313                 document.body[lineno] = document.body[lineno].replace(' bottomline="true" ', ' ')
2314             # revert beginning cell
2315             bcell = multirows[row][col][0]
2316             ecell = multirows[row][col][1]
2317             offset = get_option_value(document.body[bcell], "mroffset")
2318             document.body[bcell] = document.body[bcell].\
2319               replace(' multirow="3" ', ' ').\
2320               replace('valignment="middle"', 'valignment="top"')
2321             # remove mroffset option
2322             document.body[bcell] = rgx.sub('', document.body[bcell])
2323
2324             blay = find_token(document.body, "\\begin_layout", bcell, ecell)
2325             if blay == -1:
2326               document.warning("Can't find layout for cell!")
2327               continue
2328             bend = find_end_of_layout(document.body, blay)
2329             if bend == -1:
2330               document.warning("Can't find end of layout for cell!")
2331               continue
2332             # do the later one first, so as not to mess up the numbering
2333             # we are wrapping the whole cell in this ert
2334             # so before the end of the layout...
2335             document.body[bend:bend] = put_cmd_in_ert("}")
2336             # ...and after the beginning
2337             document.body[blay + 1:blay + 1] = \
2338               put_cmd_in_ert("\\multirow{" + str(nummrs) + "}{" + col_width + "}[" \
2339                   + offset + "]{")
2340
2341         # on to the next table
2342         begin_table = end_table
2343
2344
2345 def revert_script(document):
2346     " Convert subscript/superscript inset to TeX code "
2347     i = 0
2348     foundsubscript = False
2349     while True:
2350         i = find_token(document.body, '\\begin_inset script', i)
2351         if i == -1:
2352             break
2353         z = find_end_of_inset(document.body, i)
2354         if z == -1:
2355             document.warning("Malformed LyX document: Can't find end of script inset.")
2356             i += 1
2357             continue
2358         blay = find_token(document.body, "\\begin_layout", i, z)
2359         if blay == -1:
2360             document.warning("Malformed LyX document: Can't find layout in script inset.")
2361             i = z
2362             continue
2363
2364         if check_token(document.body[i], "\\begin_inset script subscript"):
2365             subst = '\\textsubscript{'
2366             foundsubscript = True
2367         elif check_token(document.body[i], "\\begin_inset script superscript"):
2368             subst = '\\textsuperscript{'
2369         else:
2370             document.warning("Malformed LyX document: Unknown type of script inset.")
2371             i = z
2372             continue
2373         bend = find_end_of_layout(document.body, blay)
2374         if bend == -1 or bend > z:
2375             document.warning("Malformed LyX document: Can't find end of layout in script inset.")
2376             i = z
2377             continue
2378         # remove the \end_layout \end_inset pair
2379         document.body[bend:z + 1] = put_cmd_in_ert("}")
2380         document.body[i:blay + 1] = put_cmd_in_ert(subst)
2381         i += 1
2382     # these classes provide a \textsubscript command:
2383     # FIXME: Would be nice if we could use the information of the .layout file here
2384     classes = ["memoir", "scrartcl", "scrbook", "scrlttr2", "scrreprt"]
2385     if foundsubscript and find_token_exact(classes, document.textclass, 0) == -1:
2386         add_to_preamble(document, ['\\usepackage{subscript}'])
2387
2388
2389 def convert_use_xetex(document):
2390     " convert \\use_xetex to \\use_non_tex_fonts "
2391     i = find_token(document.header, "\\use_xetex", 0)
2392     if i == -1:
2393         document.header.insert(-1, "\\use_non_tex_fonts 0")
2394     else:
2395         val = get_value(document.header, "\\use_xetex", 0)
2396         document.header[i] = "\\use_non_tex_fonts " + val
2397
2398
2399 def revert_use_xetex(document):
2400     " revert \\use_non_tex_fonts to \\use_xetex "
2401     i = 0
2402     i = find_token(document.header, "\\use_non_tex_fonts", 0)
2403     if i == -1:
2404         document.warning("Malformed document. No \\use_non_tex_fonts param!")
2405         return
2406
2407     val = get_value(document.header, "\\use_non_tex_fonts", 0)
2408     document.header[i] = "\\use_xetex " + val
2409
2410
2411 def revert_labeling(document):
2412     koma = ("scrartcl", "scrarticle-beamer", "scrbook", "scrlettr",
2413         "scrlttr2", "scrreprt")
2414     if document.textclass in koma:
2415         return
2416     i = 0
2417     while True:
2418         i = find_token_exact(document.body, "\\begin_layout Labeling", i)
2419         if i == -1:
2420             return
2421         document.body[i] = "\\begin_layout List"
2422
2423
2424 def revert_langpack(document):
2425     " revert \\language_package parameter "
2426     i = 0
2427     i = find_token(document.header, "\\language_package", 0)
2428     if i == -1:
2429         document.warning("Malformed document. No \\language_package param!")
2430         return
2431
2432     del document.header[i]
2433
2434
2435 def convert_langpack(document):
2436     " Add \\language_package parameter "
2437     i = find_token(document.header, "\language" , 0)
2438     if i == -1:
2439         document.warning("Malformed document. No \\language defined!")
2440         return
2441
2442     document.header.insert(i + 1, "\\language_package default")
2443
2444
2445 def revert_tabularwidth(document):
2446   i = 0
2447   while True:
2448     i = find_token(document.body, "\\begin_inset Tabular", i)
2449     if i == -1:
2450       return
2451     j = find_end_of_inset(document.body, i)
2452     if j == -1:
2453       document.warning("Unable to find end of Tabular inset at line " + str(i))
2454       i += 1
2455       continue
2456     i += 1
2457     features = find_token(document.body, "<features", i, j)
2458     if features == -1:
2459       document.warning("Can't find any features in Tabular inset at line " + str(i))
2460       i = j
2461       continue
2462     if document.body[features].find('alignment="tabularwidth"') != -1:
2463       remove_option(document.body, features, 'tabularwidth')
2464
2465 def revert_html_css_as_file(document):
2466   if not del_token(document.header, '\\html_css_as_file', 0):
2467     document.warning("Malformed LyX document: Missing \\html_css_as_file.")
2468
2469
2470 ##
2471 # Conversion hub
2472 #
2473
2474 supported_versions = ["2.0.0","2.0"]
2475 convert = [[346, []],
2476            [347, []],
2477            [348, []],
2478            [349, []],
2479            [350, []],
2480            [351, []],
2481            [352, [convert_splitindex]],
2482            [353, []],
2483            [354, []],
2484            [355, []],
2485            [356, []],
2486            [357, []],
2487            [358, []],
2488            [359, [convert_nomencl_width]],
2489            [360, []],
2490            [361, []],
2491            [362, []],
2492            [363, []],
2493            [364, []],
2494            [365, []],
2495            [366, []],
2496            [367, []],
2497            [368, []],
2498            [369, [convert_author_id]],
2499            [370, []],
2500            [371, [convert_mhchem]],
2501            [372, []],
2502            [373, [merge_gbrief]],
2503            [374, []],
2504            [375, []],
2505            [376, []],
2506            [377, []],
2507            [378, []],
2508            [379, [convert_math_output]],
2509            [380, []],
2510            [381, []],
2511            [382, []],
2512            [383, []],
2513            [384, []],
2514            [385, []],
2515            [386, []],
2516            [387, []],
2517            [388, []],
2518            [389, [convert_html_quotes]],
2519            [390, []],
2520            [391, []],
2521            [392, []],
2522            [393, [convert_optarg]],
2523            [394, [convert_use_makebox]],
2524            [395, []],
2525            [396, []],
2526            [397, [remove_Nameref]],
2527            [398, []],
2528            [399, [convert_mathdots]],
2529            [400, [convert_rule]],
2530            [401, []],
2531            [402, [convert_bibtex_clearpage]],
2532            [403, [convert_flexnames]],
2533            [404, [convert_prettyref]],
2534            [405, []],
2535            [406, [convert_passthru]],
2536            [407, []],
2537            [408, []],
2538            [409, [convert_use_xetex]],
2539            [410, []],
2540            [411, [convert_langpack]],
2541            [412, []],
2542            [413, []]
2543 ]
2544
2545 revert =  [[412, [revert_html_css_as_file]],
2546            [411, [revert_tabularwidth]],
2547            [410, [revert_langpack]],
2548            [409, [revert_labeling]],
2549            [408, [revert_use_xetex]],
2550            [407, [revert_script]],
2551            [406, [revert_multirowOffset]],
2552            [405, [revert_passthru]],
2553            [404, []],
2554            [403, [revert_refstyle]],
2555            [402, [revert_flexnames]],
2556            [401, []],
2557            [400, [revert_diagram]],
2558            [399, [revert_rule]],
2559            [398, [revert_mathdots]],
2560            [397, [revert_mathrsfs]],
2561            [396, []],
2562            [395, [revert_nameref]],
2563            [394, [revert_DIN_C_pagesizes]],
2564            [393, [revert_makebox]],
2565            [392, [revert_argument]],
2566            [391, []],
2567            [390, [revert_align_decimal, revert_IEEEtran]],
2568            [389, [revert_output_sync]],
2569            [388, [revert_html_quotes]],
2570            [387, [revert_pagesizes]],
2571            [386, [revert_math_scale]],
2572            [385, [revert_lyx_version]],
2573            [384, [revert_shadedboxcolor]],
2574            [383, [revert_fontcolor]],
2575            [382, [revert_turkmen]],
2576            [381, [revert_notefontcolor]],
2577            [380, [revert_equalspacing_xymatrix]],
2578            [379, [revert_inset_preview]],
2579            [378, [revert_math_output]],
2580            [377, []],
2581            [376, [revert_multirow]],
2582            [375, [revert_includeall]],
2583            [374, [revert_includeonly]],
2584            [373, [revert_html_options]],
2585            [372, [revert_gbrief]],
2586            [371, [revert_fontenc]],
2587            [370, [revert_mhchem]],
2588            [369, [revert_suppress_date]],
2589            [368, [revert_author_id]],
2590            [367, [revert_hspace_glue_lengths]],
2591            [366, [revert_percent_vspace_lengths, revert_percent_hspace_lengths]],
2592            [365, [revert_percent_skip_lengths]],
2593            [364, [revert_paragraph_indentation]],
2594            [363, [revert_branch_filename]],
2595            [362, [revert_longtable_align]],
2596            [361, [revert_applemac]],
2597            [360, []],
2598            [359, [revert_nomencl_cwidth]],
2599            [358, [revert_nomencl_width]],
2600            [357, [revert_custom_processors]],
2601            [356, [revert_ulinelatex]],
2602            [355, []],
2603            [354, [revert_strikeout]],
2604            [353, [revert_printindexall]],
2605            [352, [revert_subindex]],
2606            [351, [revert_splitindex]],
2607            [350, [revert_backgroundcolor]],
2608            [349, [revert_outputformat]],
2609            [348, [revert_xetex]],
2610            [347, [revert_phantom, revert_hphantom, revert_vphantom]],
2611            [346, [revert_tabularvalign]],
2612            [345, [revert_swiss]]
2613           ]
2614
2615
2616 if __name__ == "__main__":
2617     pass