lib/lyx2lyx/lyx_2_0.py

   1 # -*- coding: utf-8 -*-
   2 # This file is part of lyx2lyx
   3 # -*- coding: utf-8 -*-
   4 # Copyright (C) 2011 The LyX team
   5 #
   6 # This program is free software; you can redistribute it and/or
   7 # modify it under the terms of the GNU General Public License
   8 # as published by the Free Software Foundation; either version 2
   9 # of the License, or (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful,
  12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 # GNU General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
  19
  20 """ Convert files to the file format generated by lyx 2.0"""
  21
  22 import re, string
  23 import unicodedata
  24 import sys, os
  25
  26 from parser_tools import find_token, find_end_of, find_tokens, \
  27   find_token_exact, find_end_of_inset, find_end_of_layout, \
  28   find_token_backwards, is_in_inset, get_value, get_quoted_value, \
  29   del_token, check_token, get_option_value
  30
  31 from lyx2lyx_tools import add_to_preamble, insert_to_preamble, \
  32   put_cmd_in_ert, lyx2latex, latex_length, revert_flex_inset, \
  33   revert_font_attrs, hex2ratio, str2bool
  34
  35 ####################################################################
  36 # Private helper functions
  37
  38 def remove_option(lines, m, option):
  39     ''' removes option from line m. returns whether we did anything '''
  40     l = lines[m].find(option)
  41     if l == -1:
  42         return False
  43     val = lines[m][l:].split('"')[1]
  44     lines[m] = lines[m][:l - 1] + lines[m][l+len(option + '="' + val + '"'):]
  45     return True
  46
  47
  48 ###############################################################################
  49 ###
  50 ### Conversion and reversion routines
  51 ###
  52 ###############################################################################
  53
  54 def revert_swiss(document):
  55     " Set language german-ch to ngerman "
  56     i = 0
  57     if document.language == "german-ch":
  58         document.language = "ngerman"
  59         i = find_token(document.header, "\\language", 0)
  60         if i != -1:
  61             document.header[i] = "\\language ngerman"
  62     j = 0
  63     while True:
  64         j = find_token(document.body, "\\lang german-ch", j)
  65         if j == -1:
  66             return
  67         document.body[j] = document.body[j].replace("\\lang german-ch", "\\lang ngerman")
  68         j = j + 1
  69
  70
  71 def revert_tabularvalign(document):
  72    " Revert the tabular valign option "
  73    i = 0
  74    while True:
  75       i = find_token(document.body, "\\begin_inset Tabular", i)
  76       if i == -1:
  77           return
  78       end = find_end_of_inset(document.body, i)
  79       if end == -1:
  80           document.warning("Can't find end of inset at line " + str(i))
  81           i += 1
  82           continue
  83       fline = find_token(document.body, "<features", i, end)
  84       if fline == -1:
  85           document.warning("Can't find features for inset at line " + str(i))
  86           i += 1
  87           continue
  88       p = document.body[fline].find("islongtable")
  89       if p != -1:
  90           q = document.body[fline].find("tabularvalignment")
  91           if q != -1:
  92               document.body[fline] = re.sub(r' tabularvalignment=\"[a-z]+\"', "", document.body[fline])
  93           i += 1
  94           continue
  95
  96        # no longtable
  97       tabularvalignment = 'c'
  98       # which valignment is specified?
  99       m = document.body[fline].find('tabularvalignment="top"')
 100       if m != -1:
 101           tabularvalignment = 't'
 102       m = document.body[fline].find('tabularvalignment="bottom"')
 103       if m != -1:
 104           tabularvalignment = 'b'
 105       # delete tabularvalignment
 106       q = document.body[fline].find("tabularvalignment")
 107       if q != -1:
 108           document.body[fline] = re.sub(r' tabularvalignment=\"[a-z]+\"', "", document.body[fline])
 109
 110       # don't add a box when centered
 111       if tabularvalignment == 'c':
 112           i = end
 113           continue
 114       subst = ['\\end_inset', '\\end_layout']
 115       document.body[end:end] = subst # just inserts those lines
 116       subst = ['\\begin_inset Box Frameless',
 117           'position "' + tabularvalignment +'"',
 118           'hor_pos "c"',
 119           'has_inner_box 1',
 120           'inner_pos "c"',
 121           'use_parbox 0',
 122           # we don't know the width, assume 50%
 123           'width "50col%"',
 124           'special "none"',
 125           'height "1in"',
 126           'height_special "totalheight"',
 127           'status open',
 128           '',
 129           '\\begin_layout Plain Layout']
 130       document.body[i:i] = subst # this just inserts the array at i
 131       # since there could be a tabular inside a tabular, we cannot
 132       # jump to end
 133       i += len(subst)
 134
 135
 136 def revert_phantom_types(document, ptype, cmd):
 137     " Reverts phantom to ERT "
 138     i = 0
 139     while True:
 140       i = find_token(document.body, "\\begin_inset Phantom " + ptype, i)
 141       if i == -1:
 142           return
 143       end = find_end_of_inset(document.body, i)
 144       if end == -1:
 145           document.warning("Can't find end of inset at line " + str(i))
 146           i += 1
 147           continue
 148       blay = find_token(document.body, "\\begin_layout Plain Layout", i, end)
 149       if blay == -1:
 150           document.warning("Can't find layout for inset at line " + str(i))
 151           i = end
 152           continue
 153       bend = find_end_of_layout(document.body, blay)
 154       if bend == -1:
 155           document.warning("Malformed LyX document: Could not find end of Phantom inset's layout.")
 156           i = end
 157           continue
 158       substi = ["\\begin_inset ERT", "status collapsed", "",
 159                 "\\begin_layout Plain Layout", "", "", "\\backslash",
 160                 cmd + "{", "\\end_layout", "", "\\end_inset"]
 161       substj = ["\\size default", "", "\\begin_inset ERT", "status collapsed", "",
 162                 "\\begin_layout Plain Layout", "", "}", "\\end_layout", "", "\\end_inset"]
 163       # do the later one first so as not to mess up the numbering
 164       document.body[bend:end + 1] = substj
 165       document.body[i:blay + 1] = substi
 166       i = end + len(substi) + len(substj) - (end - bend) - (blay - i) - 2
 167
 168
 169 def revert_phantom(document):
 170     revert_phantom_types(document, "Phantom", "phantom")
 171
 172 def revert_hphantom(document):
 173     revert_phantom_types(document, "HPhantom", "hphantom")
 174
 175 def revert_vphantom(document):
 176     revert_phantom_types(document, "VPhantom", "vphantom")
 177
 178
 179 def revert_xetex(document):
 180     " Reverts documents that use XeTeX "
 181
 182     i = find_token(document.header, '\\use_xetex', 0)
 183     if i == -1:
 184         document.warning("Malformed LyX document: Missing \\use_xetex.")
 185         return
 186     if not str2bool(get_value(document.header, "\\use_xetex", i)):
 187         del document.header[i]
 188         return
 189     del document.header[i]
 190
 191     # 1.) set doc encoding to utf8-plain
 192     i = find_token(document.header, "\\inputencoding", 0)
 193     if i == -1:
 194         document.warning("Malformed LyX document: Missing \\inputencoding.")
 195     else:
 196         document.header[i] = "\\inputencoding utf8-plain"
 197
 198     # 2.) check font settings
 199     # defaults
 200     roman = sans = typew = "default"
 201     osf = False
 202     sf_scale = tt_scale = 100.0
 203
 204     i = find_token(document.header, "\\font_roman", 0)
 205     if i == -1:
 206         document.warning("Malformed LyX document: Missing \\font_roman.")
 207     else:
 208         roman = get_value(document.header, "\\font_roman", i)
 209         document.header[i] = "\\font_roman default"
 210
 211     i = find_token(document.header, "\\font_sans", 0)
 212     if i == -1:
 213         document.warning("Malformed LyX document: Missing \\font_sans.")
 214     else:
 215         sans = get_value(document.header, "\\font_sans", i)
 216         document.header[i] = "\\font_sans default"
 217
 218     i = find_token(document.header, "\\font_typewriter", 0)
 219     if i == -1:
 220         document.warning("Malformed LyX document: Missing \\font_typewriter.")
 221     else:
 222         typew = get_value(document.header, "\\font_typewriter", i)
 223         document.header[i] = "\\font_typewriter default"
 224
 225     i = find_token(document.header, "\\font_osf", 0)
 226     if i == -1:
 227         document.warning("Malformed LyX document: Missing \\font_osf.")
 228     else:
 229         osf = str2bool(get_value(document.header, "\\font_osf", i))
 230         document.header[i] = "\\font_osf false"
 231
 232     i = find_token(document.header, "\\font_sc", 0)
 233     if i == -1:
 234         document.warning("Malformed LyX document: Missing \\font_sc.")
 235     else:
 236         # we do not need this value.
 237         document.header[i] = "\\font_sc false"
 238
 239     i = find_token(document.header, "\\font_sf_scale", 0)
 240     if i == -1:
 241         document.warning("Malformed LyX document: Missing \\font_sf_scale.")
 242     else:
 243       val = get_value(document.header, '\\font_sf_scale', i)
 244       try:
 245         # float() can throw
 246         sf_scale = float(val)
 247       except:
 248         document.warning("Invalid font_sf_scale value: " + val)
 249       document.header[i] = "\\font_sf_scale 100"
 250
 251     i = find_token(document.header, "\\font_tt_scale", 0)
 252     if i == -1:
 253         document.warning("Malformed LyX document: Missing \\font_tt_scale.")
 254     else:
 255         val = get_value(document.header, '\\font_tt_scale', i)
 256         try:
 257           # float() can throw
 258           tt_scale = float(val)
 259         except:
 260           document.warning("Invalid font_tt_scale value: " + val)
 261         document.header[i] = "\\font_tt_scale 100"
 262
 263     # 3.) set preamble stuff
 264     pretext = ['%% This document must be processed with xelatex!']
 265     pretext.append('\\usepackage{fontspec}')
 266     if roman != "default":
 267         pretext.append('\\setmainfont[Mapping=tex-text]{' + roman + '}')
 268     if sans != "default":
 269         sf = '\\setsansfont['
 270         if sf_scale != 100.0:
 271             sf += 'Scale=' + str(sf_scale / 100.0) + ','
 272         sf += 'Mapping=tex-text]{' + sans + '}'
 273         pretext.append(sf)
 274     if typew != "default":
 275         tw = '\\setmonofont'
 276         if tt_scale != 100.0:
 277             tw += '[Scale=' + str(tt_scale / 100.0) + ']'
 278         tw += '{' + typew + '}'
 279         pretext.append(tw)
 280     if osf:
 281         pretext.append('\\defaultfontfeatures{Numbers=OldStyle}')
 282     pretext.append('\\usepackage{xunicode}')
 283     pretext.append('\\usepackage{xltxtra}')
 284     insert_to_preamble(document, pretext)
 285
 286
 287 def revert_outputformat(document):
 288     " Remove default output format param "
 289
 290     if not del_token(document.header, '\\default_output_format', 0):
 291         document.warning("Malformed LyX document: Missing \\default_output_format.")
 292
 293
 294 def revert_backgroundcolor(document):
 295     " Reverts background color to preamble code "
 296     i = find_token(document.header, "\\backgroundcolor", 0)
 297     if i == -1:
 298         return
 299     colorcode = get_value(document.header, '\\backgroundcolor', i)
 300     del document.header[i]
 301     # don't clutter the preamble if backgroundcolor is not set
 302     if colorcode == "#ffffff":
 303         return
 304     red   = hex2ratio(colorcode[1:3])
 305     green = hex2ratio(colorcode[3:5])
 306     blue  = hex2ratio(colorcode[5:7])
 307     insert_to_preamble(document, \
 308         ['% To set the background color',
 309         '\\@ifundefined{definecolor}{\\usepackage{color}}{}',
 310         '\\definecolor{page_backgroundcolor}{rgb}{' + red + ',' + green + ',' + blue + '}',
 311         '\\pagecolor{page_backgroundcolor}'])
 312
 313
 314 def add_use_indices(document):
 315     " Add \\use_indices if it is missing "
 316     i = find_token(document.header, '\\use_indices', 0)
 317     if i != -1:
 318         return i
 319     i = find_token(document.header, '\\use_bibtopic', 0)
 320     if i == -1:
 321         i = find_token(document.header, '\\cite_engine', 0)
 322     if i == -1:
 323         i = find_token(document.header, '\\use_mathdots', 0)
 324     if i == -1:
 325         i = find_token(document.header, '\\use_mhchem', 0)
 326     if i == -1:
 327         i = find_token(document.header, '\\use_esint', 0)
 328     if i == -1:
 329         i = find_token(document.header, '\\use_amsmath', 0)
 330     if i == -1:
 331         document.warning("Malformed LyX document: Missing \\use_indices.")
 332         return -1
 333     document.header.insert(i + 1, '\\use_indices 0')
 334     return i + 1
 335
 336
 337 def revert_splitindex(document):
 338     " Reverts splitindex-aware documents "
 339     i = add_use_indices(document)
 340     if i == -1:
 341         return
 342     useindices = str2bool(get_value(document.header, "\\use_indices", i))
 343     del document.header[i]
 344     preamble = []
 345     if useindices:
 346          preamble.append("\\usepackage{splitidx})")
 347
 348     # deal with index declarations in the preamble
 349     i = 0
 350     while True:
 351         i = find_token(document.header, "\\index", i)
 352         if i == -1:
 353             break
 354         k = find_token(document.header, "\\end_index", i)
 355         if k == -1:
 356             document.warning("Malformed LyX document: Missing \\end_index.")
 357             return
 358         if useindices:
 359           line = document.header[i]
 360           l = re.compile(r'\\index (.*)$')
 361           m = l.match(line)
 362           iname = m.group(1)
 363           ishortcut = get_value(document.header, '\\shortcut', i, k)
 364           if ishortcut != "":
 365               preamble.append("\\newindex[" + iname + "]{" + ishortcut + "}")
 366         del document.header[i:k + 1]
 367     if preamble:
 368         insert_to_preamble(document, preamble)
 369
 370     # deal with index insets
 371     # these need to have the argument removed
 372     i = 0
 373     while True:
 374         i = find_token(document.body, "\\begin_inset Index", i)
 375         if i == -1:
 376             break
 377         line = document.body[i]
 378         l = re.compile(r'\\begin_inset Index (.*)$')
 379         m = l.match(line)
 380         itype = m.group(1)
 381         if itype == "idx" or indices == "false":
 382             document.body[i] = "\\begin_inset Index"
 383         else:
 384             k = find_end_of_inset(document.body, i)
 385             if k == -1:
 386                 document.warning("Can't find end of index inset!")
 387                 i += 1
 388                 continue
 389             content = lyx2latex(document, document.body[i:k])
 390             # escape quotes
 391             content = content.replace('"', r'\"')
 392             subst = put_cmd_in_ert("\\sindex[" + itype + "]{" + content + "}")
 393             document.body[i:k + 1] = subst
 394         i = i + 1
 395
 396     # deal with index_print insets
 397     i = 0
 398     while True:
 399         i = find_token(document.body, "\\begin_inset CommandInset index_print", i)
 400         if i == -1:
 401             return
 402         k = find_end_of_inset(document.body, i)
 403         ptype = get_quoted_value(document.body, 'type', i, k)
 404         if ptype == "idx":
 405             j = find_token(document.body, "type", i, k)
 406             del document.body[j]
 407         elif not useindices:
 408             del document.body[i:k + 1]
 409         else:
 410             subst = put_cmd_in_ert("\\printindex[" + ptype + "]{}")
 411             document.body[i:k + 1] = subst
 412         i = i + 1
 413
 414
 415 def convert_splitindex(document):
 416     " Converts index and printindex insets to splitindex-aware format "
 417     add_use_indices(document)
 418     i = 0
 419     while True:
 420         i = find_token(document.body, "\\begin_inset Index", i)
 421         if i == -1:
 422             break
 423         document.body[i] = document.body[i].replace("\\begin_inset Index",
 424             "\\begin_inset Index idx")
 425         i = i + 1
 426     i = 0
 427     while True:
 428         i = find_token(document.body, "\\begin_inset CommandInset index_print", i)
 429         if i == -1:
 430             return
 431         if document.body[i + 1].find('LatexCommand printindex') == -1:
 432             document.warning("Malformed LyX document: Incomplete printindex inset.")
 433             return
 434         subst = ["LatexCommand printindex",
 435             "type \"idx\""]
 436         document.body[i + 1:i + 2] = subst
 437         i = i + 1
 438
 439
 440 def revert_subindex(document):
 441     " Reverts \\printsubindex CommandInset types "
 442     i = add_use_indices(document)
 443     if i == -1:
 444         return
 445     useindices = str2bool(get_value(document.header, "\\use_indices", i))
 446     i = 0
 447     while True:
 448         i = find_token(document.body, "\\begin_inset CommandInset index_print", i)
 449         if i == -1:
 450             return
 451         k = find_end_of_inset(document.body, i)
 452         ctype = get_value(document.body, 'LatexCommand', i, k)
 453         if ctype != "printsubindex":
 454             i = k + 1
 455             continue
 456         ptype = get_quoted_value(document.body, 'type', i, k)
 457         if not useindices:
 458             del document.body[i:k + 1]
 459         else:
 460             subst = put_cmd_in_ert("\\printsubindex[" + ptype + "]{}")
 461             document.body[i:k + 1] = subst
 462         i = i + 1
 463
 464
 465 def revert_printindexall(document):
 466     " Reverts \\print[sub]index* CommandInset types "
 467     i = add_use_indices(document)
 468     if i == -1:
 469         return
 470     useindices = str2bool(get_value(document.header, "\\use_indices", i))
 471     i = 0
 472     while True:
 473         i = find_token(document.body, "\\begin_inset CommandInset index_print", i)
 474         if i == -1:
 475             return
 476         k = find_end_of_inset(document.body, i)
 477         ctype = get_value(document.body, 'LatexCommand', i, k)
 478         if ctype != "printindex*" and ctype != "printsubindex*":
 479             i = k
 480             continue
 481         if not useindices:
 482             del document.body[i:k + 1]
 483         else:
 484             subst = put_cmd_in_ert("\\" + ctype + "{}")
 485             document.body[i:k + 1] = subst
 486         i = i + 1
 487
 488
 489 def revert_strikeout(document):
 490   " Reverts \\strikeout font attribute "
 491   changed = revert_font_attrs(document.body, "\\uuline", "\\uuline")
 492   changed = revert_font_attrs(document.body, "\\uwave", "\\uwave") or changed
 493   changed = revert_font_attrs(document.body, "\\strikeout", "\\sout")  or changed
 494   if changed == True:
 495     insert_to_preamble(document, \
 496         ['%  for proper underlining',
 497         '\\PassOptionsToPackage{normalem}{ulem}',
 498         '\\usepackage{ulem}'])
 499
 500
 501 def revert_ulinelatex(document):
 502     " Reverts \\uline font attribute "
 503     i = find_token(document.body, '\\bar under', 0)
 504     if i == -1:
 505         return
 506     insert_to_preamble(document,\
 507             ['%  for proper underlining',
 508             '\\PassOptionsToPackage{normalem}{ulem}',
 509             '\\usepackage{ulem}',
 510             '\\let\\cite@rig\\cite',
 511             '\\newcommand{\\b@xcite}[2][\\%]{\\def\\def@pt{\\%}\\def\\pas@pt{#1}',
 512             '  \\mbox{\\ifx\\def@pt\\pas@pt\\cite@rig{#2}\\else\\cite@rig[#1]{#2}\\fi}}',
 513             '\\renewcommand{\\underbar}[1]{{\\let\\cite\\b@xcite\\uline{#1}}}'])
 514
 515
 516 def revert_custom_processors(document):
 517     " Remove bibtex_command and index_command params "
 518
 519     if not del_token(document.header, '\\bibtex_command', 0):
 520         document.warning("Malformed LyX document: Missing \\bibtex_command.")
 521
 522     if not del_token(document.header, '\\index_command', 0):
 523         document.warning("Malformed LyX document: Missing \\index_command.")
 524
 525
 526 def convert_nomencl_width(document):
 527     " Add set_width param to nomencl_print "
 528     i = 0
 529     while True:
 530       i = find_token(document.body, "\\begin_inset CommandInset nomencl_print", i)
 531       if i == -1:
 532         break
 533       document.body.insert(i + 2, "set_width \"none\"")
 534       i = i + 1
 535
 536
 537 def revert_nomencl_width(document):
 538     " Remove set_width param from nomencl_print "
 539     i = 0
 540     while True:
 541       i = find_token(document.body, "\\begin_inset CommandInset nomencl_print", i)
 542       if i == -1:
 543         break
 544       j = find_end_of_inset(document.body, i)
 545       if not del_token(document.body, "set_width", i, j):
 546         document.warning("Can't find set_width option for nomencl_print!")
 547       i = j
 548
 549
 550 def revert_nomencl_cwidth(document):
 551     " Remove width param from nomencl_print "
 552     i = 0
 553     while True:
 554       i = find_token(document.body, "\\begin_inset CommandInset nomencl_print", i)
 555       if i == -1:
 556         break
 557       j = find_end_of_inset(document.body, i)
 558       l = find_token(document.body, "width", i, j)
 559       if l == -1:
 560         i = j
 561         continue
 562       width = get_quoted_value(document.body, "width", i, j)
 563       del document.body[l]
 564       insert_to_preamble(document, ["\\setlength{\\nomlabelwidth}{" + width + "}"])
 565       i = j - 1
 566
 567
 568 def revert_applemac(document):
 569     " Revert applemac encoding to auto "
 570     if document.encoding != "applemac":
 571       return
 572     document.encoding = "auto"
 573     i = find_token(document.header, "\\encoding", 0)
 574     if i != -1:
 575         document.header[i] = "\\encoding auto"
 576
 577
 578 def revert_longtable_align(document):
 579     " Remove longtable alignment setting "
 580     i = 0
 581     while True:
 582       i = find_token(document.body, "\\begin_inset Tabular", i)
 583       if i == -1:
 584           break
 585       end = find_end_of_inset(document.body, i)
 586       if end == -1:
 587           document.warning("Can't find end of inset at line " + str(i))
 588           i += 1
 589           continue
 590       fline = find_token(document.body, "<features", i, end)
 591       if fline == -1:
 592           document.warning("Can't find features for inset at line " + str(i))
 593           i += 1
 594           continue
 595       j = document.body[fline].find("longtabularalignment")
 596       if j == -1:
 597           i += 1
 598           continue
 599       # FIXME Is this correct? It wipes out everything after the
 600       # one we found.
 601       document.body[fline] = document.body[fline][:j - 1] + '>'
 602       # since there could be a tabular inside this one, we
 603       # cannot jump to end.
 604       i += 1
 605
 606
 607 def revert_branch_filename(document):
 608     " Remove \\filename_suffix parameter from branches "
 609     i = 0
 610     while True:
 611         i = find_token(document.header, "\\filename_suffix", i)
 612         if i == -1:
 613             return
 614         del document.header[i]
 615
 616
 617 def revert_paragraph_indentation(document):
 618     " Revert custom paragraph indentation to preamble code "
 619     i = find_token(document.header, "\\paragraph_indentation", 0)
 620     if i == -1:
 621       return
 622     length = get_value(document.header, "\\paragraph_indentation", i)
 623     # we need only remove the line if indentation is default
 624     if length != "default":
 625       # handle percent lengths
 626       length = latex_length(length)[1]
 627       insert_to_preamble(document, ["\\setlength{\\parindent}{" + length + "}"])
 628     del document.header[i]
 629
 630
 631 def revert_percent_skip_lengths(document):
 632     " Revert relative lengths for paragraph skip separation to preamble code "
 633     i = find_token(document.header, "\\defskip", 0)
 634     if i == -1:
 635         return
 636     length = get_value(document.header, "\\defskip", i)
 637     # only revert when a custom length was set and when
 638     # it used a percent length
 639     if length in ('smallskip', 'medskip', 'bigskip'):
 640         return
 641     # handle percent lengths
 642     percent, length = latex_length(length)
 643     if percent:
 644         insert_to_preamble(document, ["\\setlength{\\parskip}{" + length + "}"])
 645         # set defskip to medskip as default
 646         document.header[i] = "\\defskip medskip"
 647
 648
 649 def revert_percent_vspace_lengths(document):
 650     " Revert relative VSpace lengths to ERT "
 651     i = 0
 652     while True:
 653       i = find_token(document.body, "\\begin_inset VSpace", i)
 654       if i == -1:
 655           break
 656       # only revert if a custom length was set and if
 657       # it used a percent length
 658       r = re.compile(r'\\begin_inset VSpace (.*)$')
 659       m = r.match(document.body[i])
 660       length = m.group(1)
 661       if length in ('defskip', 'smallskip', 'medskip', 'bigskip', 'vfill'):
 662          i += 1
 663          continue
 664       # check if the space has a star (protected space)
 665       protected = (document.body[i].rfind("*") != -1)
 666       if protected:
 667           length = length.rstrip('*')
 668       # handle percent lengths
 669       percent, length = latex_length(length)
 670       # revert the VSpace inset to ERT
 671       if percent:
 672           if protected:
 673               subst = put_cmd_in_ert("\\vspace*{" + length + "}")
 674           else:
 675               subst = put_cmd_in_ert("\\vspace{" + length + "}")
 676           document.body[i:i + 2] = subst
 677       i += 1
 678
 679
 680 def revert_percent_hspace_lengths(document):
 681     " Revert relative HSpace lengths to ERT "
 682     i = 0
 683     while True:
 684       i = find_token_exact(document.body, "\\begin_inset space \\hspace", i)
 685       if i == -1:
 686           break
 687       j = find_end_of_inset(document.body, i)
 688       if j == -1:
 689           document.warning("Can't find end of inset at line " + str(i))
 690           i += 1
 691           continue
 692       # only revert if a custom length was set...
 693       length = get_value(document.body, '\\length', i + 1, j)
 694       if length == '':
 695           document.warning("Malformed lyx document: Missing '\\length' in Space inset.")
 696           i = j
 697           continue
 698       protected = ""
 699       if document.body[i].find("\\hspace*{}") != -1:
 700           protected = "*"
 701       # ...and if it used a percent length
 702       percent, length = latex_length(length)
 703       # revert the HSpace inset to ERT
 704       if percent:
 705           subst = put_cmd_in_ert("\\hspace" + protected + "{" + length + "}")
 706           document.body[i:j + 1] = subst
 707       # if we did a substitution, this will still be ok
 708       i = j
 709
 710
 711 def revert_hspace_glue_lengths(document):
 712     " Revert HSpace glue lengths to ERT "
 713     i = 0
 714     while True:
 715       i = find_token_exact(document.body, "\\begin_inset space \\hspace", i)
 716       if i == -1:
 717           break
 718       j = find_end_of_inset(document.body, i)
 719       if j == -1:
 720           document.warning("Can't find end of inset at line " + str(i))
 721           i += 1
 722           continue
 723       length = get_value(document.body, '\\length', i + 1, j)
 724       if length == '':
 725           document.warning("Malformed lyx document: Missing '\\length' in Space inset.")
 726           i = j
 727           continue
 728       protected = ""
 729       if document.body[i].find("\\hspace*{}") != -1:
 730           protected = "*"
 731       # only revert if the length contains a plus or minus at pos != 0
 732       if length.find('-',1) != -1 or length.find('+',1) != -1:
 733           # handle percent lengths
 734           length = latex_length(length)[1]
 735           # revert the HSpace inset to ERT
 736           subst = put_cmd_in_ert("\\hspace" + protected + "{" + length + "}")
 737           document.body[i:j+1] = subst
 738       i = j
 739
 740
 741 def convert_author_id(document):
 742     " Add the author_id to the \\author definition and make sure 0 is not used"
 743     i = 0
 744     anum = 1
 745     re_author = re.compile(r'(\\author) (\".*\")\s*(.*)$')
 746
 747     while True:
 748         i = find_token(document.header, "\\author", i)
 749         if i == -1:
 750             break
 751         m = re_author.match(document.header[i])
 752         if m:
 753             name = m.group(2)
 754             email = m.group(3)
 755             document.header[i] = "\\author %i %s %s" % (anum, name, email)
 756         anum += 1
 757         i += 1
 758
 759     i = 0
 760     while True:
 761         i = find_token(document.body, "\\change_", i)
 762         if i == -1:
 763             break
 764         change = document.body[i].split(' ');
 765         if len(change) == 3:
 766             type = change[0]
 767             author_id = int(change[1])
 768             time = change[2]
 769             document.body[i] = "%s %i %s" % (type, author_id + 1, time)
 770         i += 1
 771
 772
 773 def revert_author_id(document):
 774     " Remove the author_id from the \\author definition "
 775     i = 0
 776     anum = 0
 777     rx = re.compile(r'(\\author)\s+(-?\d+)\s+(\".*\")\s*(.*)$')
 778     idmap = dict()
 779
 780     while True:
 781         i = find_token(document.header, "\\author", i)
 782         if i == -1:
 783             break
 784         m = rx.match(document.header[i])
 785         if m:
 786             author_id = int(m.group(2))
 787             idmap[author_id] = anum
 788             name = m.group(3)
 789             email = m.group(4)
 790             document.header[i] = "\\author %s %s" % (name, email)
 791         i += 1
 792         # FIXME Should this be incremented if we didn't match?
 793         anum += 1
 794
 795     i = 0
 796     while True:
 797         i = find_token(document.body, "\\change_", i)
 798         if i == -1:
 799             break
 800         change = document.body[i].split(' ');
 801         if len(change) == 3:
 802             type = change[0]
 803             author_id = int(change[1])
 804             time = change[2]
 805             document.body[i] = "%s %i %s" % (type, idmap[author_id], time)
 806         i += 1
 807
 808
 809 def revert_suppress_date(document):
 810     " Revert suppressing of default document date to preamble code "
 811     i = find_token(document.header, "\\suppress_date", 0)
 812     if i == -1:
 813         return
 814     # remove the preamble line and write to the preamble
 815     # when suppress_date was true
 816     date = str2bool(get_value(document.header, "\\suppress_date", i))
 817     if date:
 818         add_to_preamble(document, ["\\date{}"])
 819     del document.header[i]
 820
 821
 822 def convert_mhchem(document):
 823     "Set mhchem to off for versions older than 1.6.x"
 824     if document.start < 277:
 825         # LyX 1.5.x and older did never load mhchem.
 826         # Therefore we must switch it off: Documents that use mhchem have
 827         # a manual \usepackage anyway, and documents not using mhchem but
 828         # custom macros with the same names as mhchem commands might get
 829         # corrupted if mhchem is automatically loaded.
 830         mhchem = 0 # off
 831     else:
 832         # LyX 1.6.x did always load mhchem automatically.
 833         mhchem = 1 # auto
 834     i = find_token(document.header, "\\use_esint", 0)
 835     if i == -1:
 836         # pre-1.5.x document
 837         i = find_token(document.header, "\\use_amsmath", 0)
 838     if i == -1:
 839         document.warning("Malformed LyX document: Could not find amsmath os esint setting.")
 840         return
 841     document.header.insert(i + 1, "\\use_mhchem %d" % mhchem)
 842
 843
 844 def revert_mhchem(document):
 845     "Revert mhchem loading to preamble code"
 846
 847     mhchem = "off"
 848     i = find_token(document.header, "\\use_mhchem", 0)
 849     if i == -1:
 850         document.warning("Malformed LyX document: Could not find mhchem setting.")
 851         mhchem = "auto"
 852     else:
 853         val = get_value(document.header, "\\use_mhchem", i)
 854         if val == "1":
 855             mhchem = "auto"
 856         elif val == "2":
 857             mhchem = "on"
 858         del document.header[i]
 859
 860     if mhchem == "off":
 861       # don't load case
 862       return
 863
 864     if mhchem == "auto":
 865         i = 0
 866         while True:
 867             i = find_token(document.body, "\\begin_inset Formula", i)
 868             if i == -1:
 869                break
 870             line = document.body[i]
 871             if line.find("\\ce{") != -1 or line.find("\\cf{") != -1:
 872               mhchem = "on"
 873               break
 874             i += 1
 875
 876     if mhchem == "on":
 877         pre = ["\\PassOptionsToPackage{version=3}{mhchem}",
 878           "\\usepackage{mhchem}"]
 879         insert_to_preamble(document, pre)
 880
 881
 882 def revert_fontenc(document):
 883     " Remove fontencoding param "
 884     if not del_token(document.header, '\\fontencoding', 0):
 885         document.warning("Malformed LyX document: Missing \\fontencoding.")
 886
 887
 888 def merge_gbrief(document):
 889     " Merge g-brief-en and g-brief-de to one class "
 890
 891     if document.textclass != "g-brief-de":
 892         if document.textclass == "g-brief-en":
 893             document.textclass = "g-brief"
 894             document.set_textclass()
 895         return
 896
 897     obsoletedby = { "Brieftext":       "Letter",
 898                     "Unterschrift":    "Signature",
 899                     "Strasse":         "Street",
 900                     "Zusatz":          "Addition",
 901                     "Ort":             "Town",
 902                     "Land":            "State",
 903                     "RetourAdresse":   "ReturnAddress",
 904                     "MeinZeichen":     "MyRef",
 905                     "IhrZeichen":      "YourRef",
 906                     "IhrSchreiben":    "YourMail",
 907                     "Telefon":         "Phone",
 908                     "BLZ":             "BankCode",
 909                     "Konto":           "BankAccount",
 910                     "Postvermerk":     "PostalComment",
 911                     "Adresse":         "Address",
 912                     "Datum":           "Date",
 913                     "Betreff":         "Reference",
 914                     "Anrede":          "Opening",
 915                     "Anlagen":         "Encl.",
 916                     "Verteiler":       "cc",
 917                     "Gruss":           "Closing"}
 918     i = 0
 919     while 1:
 920         i = find_token(document.body, "\\begin_layout", i)
 921         if i == -1:
 922             break
 923
 924         layout = document.body[i][14:]
 925         if layout in obsoletedby:
 926             document.body[i] = "\\begin_layout " + obsoletedby[layout]
 927
 928         i += 1
 929
 930     document.textclass = "g-brief"
 931     document.set_textclass()
 932
 933
 934 def revert_gbrief(document):
 935     " Revert g-brief to g-brief-en "
 936     if document.textclass == "g-brief":
 937         document.textclass = "g-brief-en"
 938         document.set_textclass()
 939
 940
 941 def revert_html_options(document):
 942     " Remove html options "
 943     del_token(document.header, '\\html_use_mathml', 0)
 944     del_token(document.header, '\\html_be_strict', 0)
 945
 946
 947 def revert_includeonly(document):
 948     i = 0
 949     while True:
 950         i = find_token(document.header, "\\begin_includeonly", i)
 951         if i == -1:
 952             return
 953         j = find_end_of(document.header, i, "\\begin_includeonly", "\\end_includeonly")
 954         if j == -1:
 955             document.warning("Unable to find end of includeonly section!!")
 956             break
 957         document.header[i : j + 1] = []
 958
 959
 960 def revert_includeall(document):
 961     " Remove maintain_unincluded_children param "
 962     del_token(document.header, '\\maintain_unincluded_children', 0)
 963
 964
 965 def revert_multirow(document):
 966     " Revert multirow cells in tables to TeX-code"
 967
 968     # first, let's find out if we need to do anything
 969     # cell type 3 is multirow begin cell
 970     i = find_token(document.body, '<cell multirow="3"', 0)
 971     if i == -1:
 972       return
 973
 974     add_to_preamble(document, ["\\usepackage{multirow}"])
 975
 976     begin_table = 0
 977     while True:
 978         # find begin/end of table
 979         begin_table = find_token(document.body, '<lyxtabular version=', begin_table)
 980         if begin_table == -1:
 981             break
 982         end_table = find_end_of(document.body, begin_table, '<lyxtabular', '</lyxtabular>')
 983         if end_table == -1:
 984             document.warning("Malformed LyX document: Could not find end of table.")
 985             begin_table += 1
 986             continue
 987         # does this table have multirow?
 988         i = find_token(document.body, '<cell multirow="3"', begin_table, end_table)
 989         if i == -1:
 990             begin_table = end_table
 991             continue
 992
 993         # store the number of rows and columns
 994         numrows = get_option_value(document.body[begin_table], "rows")
 995         numcols = get_option_value(document.body[begin_table], "columns")
 996         try:
 997           numrows = int(numrows)
 998           numcols = int(numcols)
 999         except:
1000           document.warning("Unable to determine rows and columns!")
1001           begin_table = end_table
1002           continue
1003
1004         mrstarts = []
1005         multirows = []
1006         # collect info on rows and columns of this table.
1007         begin_row = begin_table
1008         for row in range(numrows):
1009             begin_row = find_token(document.body, '<row>', begin_row, end_table)
1010             if begin_row == -1:
1011               document.warning("Can't find row " + str(row + 1))
1012               break
1013             end_row = find_end_of(document.body, begin_row, '<row>', '</row>')
1014             if end_row == -1:
1015               document.warning("Can't find end of row " + str(row + 1))
1016               break
1017             begin_cell = begin_row
1018             multirows.append([])
1019             for column in range(numcols):
1020                 begin_cell = find_token(document.body, '<cell ', begin_cell, end_row)
1021                 if begin_cell == -1:
1022                   document.warning("Can't find column " + str(column + 1) + \
1023                     "in row " + str(row + 1))
1024                   break
1025                 # NOTE
1026                 # this will fail if someone puts "</cell>" in a cell, but
1027                 # that seems fairly unlikely.
1028                 end_cell = find_end_of(document.body, begin_cell, '<cell', '</cell>')
1029                 if end_cell == -1:
1030                   document.warning("Can't find end of column " + str(column + 1) + \
1031                     "in row " + str(row + 1))
1032                   break
1033                 multirows[row].append([begin_cell, end_cell, 0])
1034                 if document.body[begin_cell].find('multirow="3"') != -1:
1035                   multirows[row][column][2] = 3 # begin multirow
1036                   mrstarts.append([row, column])
1037                 elif document.body[begin_cell].find('multirow="4"') != -1:
1038                   multirows[row][column][2] = 4 # in multirow
1039                 begin_cell = end_cell
1040             begin_row = end_row
1041         # end of table info collection
1042
1043         # work from the back to avoid messing up numbering
1044         mrstarts.reverse()
1045         for m in mrstarts:
1046             row = m[0]
1047             col = m[1]
1048             # get column width
1049             col_width = get_option_value(document.body[begin_table + 2 + col], "width")
1050             # "0pt" means that no width is specified
1051             if not col_width or col_width == "0pt":
1052               col_width = "*"
1053             # determine the number of cells that are part of the multirow
1054             nummrs = 1
1055             for r in range(row + 1, numrows):
1056                 if multirows[r][col][2] != 4:
1057                   break
1058                 nummrs += 1
1059                 # take the opportunity to revert this line
1060                 lineno = multirows[r][col][0]
1061                 document.body[lineno] = document.body[lineno].\
1062                   replace(' multirow="4" ', ' ').\
1063                   replace('valignment="middle"', 'valignment="top"').\
1064                   replace(' topline="true" ', ' ')
1065                 # remove bottom line of previous multirow-part cell
1066                 lineno = multirows[r-1][col][0]
1067                 document.body[lineno] = document.body[lineno].replace(' bottomline="true" ', ' ')
1068             # revert beginning cell
1069             bcell = multirows[row][col][0]
1070             ecell = multirows[row][col][1]
1071             document.body[bcell] = document.body[bcell].\
1072               replace(' multirow="3" ', ' ').\
1073               replace('valignment="middle"', 'valignment="top"')
1074             blay = find_token(document.body, "\\begin_layout", bcell, ecell)
1075             if blay == -1:
1076               document.warning("Can't find layout for cell!")
1077               continue
1078             bend = find_end_of_layout(document.body, blay)
1079             if bend == -1:
1080               document.warning("Can't find end of layout for cell!")
1081               continue
1082             # do the later one first, so as not to mess up the numbering
1083             # we are wrapping the whole cell in this ert
1084             # so before the end of the layout...
1085             document.body[bend:bend] = put_cmd_in_ert("}")
1086             # ...and after the beginning
1087             document.body[blay + 1:blay + 1] = \
1088               put_cmd_in_ert("\\multirow{" + str(nummrs) + "}{" + col_width + "}{")
1089
1090         begin_table = end_table
1091
1092
1093 def convert_math_output(document):
1094     " Convert \html_use_mathml to \html_math_output "
1095     i = find_token(document.header, "\\html_use_mathml", 0)
1096     if i == -1:
1097         return
1098     rgx = re.compile(r'\\html_use_mathml\s+(\w+)')
1099     m = rgx.match(document.header[i])
1100     newval = "0" # MathML
1101     if m:
1102       val = str2bool(m.group(1))
1103       if not val:
1104         newval = "2" # Images
1105     else:
1106       document.warning("Can't match " + document.header[i])
1107     document.header[i] = "\\html_math_output " + newval
1108
1109
1110 def revert_math_output(document):
1111     " Revert \html_math_output to \html_use_mathml "
1112     i = find_token(document.header, "\\html_math_output", 0)
1113     if i == -1:
1114         return
1115     rgx = re.compile(r'\\html_math_output\s+(\d)')
1116     m = rgx.match(document.header[i])
1117     newval = "true"
1118     if m:
1119         val = m.group(1)
1120         if val == "1" or val == "2":
1121             newval = "false"
1122     else:
1123         document.warning("Unable to match " + document.header[i])
1124     document.header[i] = "\\html_use_mathml " + newval
1125
1126
1127
1128 def revert_inset_preview(document):
1129     " Dissolves the preview inset "
1130     i = 0
1131     while True:
1132       i = find_token(document.body, "\\begin_inset Preview", i)
1133       if i == -1:
1134           return
1135       iend = find_end_of_inset(document.body, i)
1136       if iend == -1:
1137           document.warning("Malformed LyX document: Could not find end of Preview inset.")
1138           i += 1
1139           continue
1140
1141       # This has several issues.
1142       # We need to do something about the layouts inside InsetPreview.
1143       # If we just leave the first one, then we have something like:
1144       # \begin_layout Standard
1145       # ...
1146       # \begin_layout Standard
1147       # and we get a "no \end_layout" error. So something has to be done.
1148       # Ideally, we would check if it is the same as the layout we are in.
1149       # If so, we just remove it; if not, we end the active one. But it is
1150       # not easy to know what layout we are in, due to depth changes, etc,
1151       # and it is not clear to me how much work it is worth doing. In most
1152       # cases, the layout will probably be the same.
1153       #
1154       # For the same reason, we have to remove the \end_layout tag at the
1155       # end of the last layout in the inset. Again, that will sometimes be
1156       # wrong, but it will usually be right. To know what to do, we would
1157       # again have to know what layout the inset is in.
1158
1159       blay = find_token(document.body, "\\begin_layout", i, iend)
1160       if blay == -1:
1161           document.warning("Can't find layout for preview inset!")
1162           # always do the later one first...
1163           del document.body[iend]
1164           del document.body[i]
1165           # deletions mean we do not need to reset i
1166           continue
1167
1168       # This is where we would check what layout we are in.
1169       # The check for Standard is definitely wrong.
1170       #
1171       # lay = document.body[blay].split(None, 1)[1]
1172       # if lay != oldlayout:
1173       #     # record a boolean to tell us what to do later....
1174       #     # better to do it later, since (a) it won't mess up
1175       #     # the numbering and (b) we only modify at the end.
1176
1177       # we want to delete the last \\end_layout in this inset, too.
1178       # note that this may not be the \\end_layout that goes with blay!!
1179       bend = find_end_of_layout(document.body, blay)
1180       while True:
1181           tmp = find_token(document.body, "\\end_layout", bend + 1, iend)
1182           if tmp == -1:
1183               break
1184           bend = tmp
1185       if bend == blay:
1186           document.warning("Unable to find last layout in preview inset!")
1187           del document.body[iend]
1188           del document.body[i]
1189           # deletions mean we do not need to reset i
1190           continue
1191       # always do the later one first...
1192       del document.body[iend]
1193       del document.body[bend]
1194       del document.body[i:blay + 1]
1195       # we do not need to reset i
1196
1197
1198 def revert_equalspacing_xymatrix(document):
1199     " Revert a Formula with xymatrix@! to an ERT inset "
1200     i = 0
1201     has_preamble = False
1202     has_equal_spacing = False
1203
1204     while True:
1205       i = find_token(document.body, "\\begin_inset Formula", i)
1206       if i == -1:
1207           break
1208       j = find_end_of_inset(document.body, i)
1209       if j == -1:
1210           document.warning("Malformed LyX document: Could not find end of Formula inset.")
1211           i += 1
1212           continue
1213
1214       for curline in range(i,j):
1215           found = document.body[curline].find("\\xymatrix@!")
1216           if found != -1:
1217               break
1218
1219       if found != -1:
1220           has_equal_spacing = True
1221           content = [document.body[i][21:]]
1222           content += document.body[i + 1:j]
1223           subst = put_cmd_in_ert(content)
1224           document.body[i:j + 1] = subst
1225           i += len(subst) - (j - i) + 1
1226       else:
1227           for curline in range(i,j):
1228               l = document.body[curline].find("\\xymatrix")
1229               if l != -1:
1230                   has_preamble = True;
1231                   break;
1232           i = j + 1
1233
1234     if has_equal_spacing and not has_preamble:
1235         add_to_preamble(document, ['\\usepackage[all]{xy}'])
1236
1237
1238 def revert_notefontcolor(document):
1239     " Reverts greyed-out note font color to preamble code "
1240
1241     i = find_token(document.header, "\\notefontcolor", 0)
1242     if i == -1:
1243         return
1244
1245     colorcode = get_value(document.header, '\\notefontcolor', i)
1246     del document.header[i]
1247
1248     # are there any grey notes?
1249     if find_token(document.body, "\\begin_inset Note Greyedout", 0) == -1:
1250         # no need to do anything else, and \renewcommand will throw
1251         # an error since lyxgreyedout will not exist.
1252         return
1253
1254     # the color code is in the form #rrggbb where every character denotes a hex number
1255     red = hex2ratio(colorcode[1:3])
1256     green = hex2ratio(colorcode[3:5])
1257     blue = hex2ratio(colorcode[5:7])
1258     # write the preamble
1259     insert_to_preamble(document,
1260       [ '%  for greyed-out notes',
1261         '\\@ifundefined{definecolor}{\\usepackage{color}}{}'
1262         '\\definecolor{note_fontcolor}{rgb}{%s,%s,%s}' % (red, green, blue),
1263         '\\renewenvironment{lyxgreyedout}',
1264         ' {\\textcolor{note_fontcolor}\\bgroup}{\\egroup}'])
1265
1266
1267 def revert_turkmen(document):
1268     "Set language Turkmen to English"
1269
1270     if document.language == "turkmen":
1271         document.language = "english"
1272         i = find_token(document.header, "\\language", 0)
1273         if i != -1:
1274             document.header[i] = "\\language english"
1275
1276     j = 0
1277     while True:
1278         j = find_token(document.body, "\\lang turkmen", j)
1279         if j == -1:
1280             return
1281         document.body[j] = document.body[j].replace("\\lang turkmen", "\\lang english")
1282         j += 1
1283
1284
1285 def revert_fontcolor(document):
1286     " Reverts font color to preamble code "
1287     i = find_token(document.header, "\\fontcolor", 0)
1288     if i == -1:
1289         return
1290     colorcode = get_value(document.header, '\\fontcolor', i)
1291     del document.header[i]
1292     # don't clutter the preamble if font color is not set
1293     if colorcode == "#000000":
1294         return
1295     # the color code is in the form #rrggbb where every character denotes a hex number
1296     red = hex2ratio(colorcode[1:3])
1297     green = hex2ratio(colorcode[3:5])
1298     blue = hex2ratio(colorcode[5:7])
1299     # write the preamble
1300     insert_to_preamble(document,
1301       ['%  Set the font color',
1302       '\\@ifundefined{definecolor}{\\usepackage{color}}{}',
1303       '\\definecolor{document_fontcolor}{rgb}{%s,%s,%s}' % (red, green, blue),
1304       '\\color{document_fontcolor}'])
1305
1306
1307 def revert_shadedboxcolor(document):
1308     " Reverts shaded box color to preamble code "
1309     i = find_token(document.header, "\\boxbgcolor", 0)
1310     if i == -1:
1311         return
1312     colorcode = get_value(document.header, '\\boxbgcolor', i)
1313     del document.header[i]
1314     # the color code is in the form #rrggbb
1315     red = hex2ratio(colorcode[1:3])
1316     green = hex2ratio(colorcode[3:5])
1317     blue = hex2ratio(colorcode[5:7])
1318     # write the preamble
1319     insert_to_preamble(document,
1320       ['%  Set the color of boxes with shaded background',
1321       '\\@ifundefined{definecolor}{\\usepackage{color}}{}',
1322       "\\definecolor{shadecolor}{rgb}{%s,%s,%s}" % (red, green, blue)])
1323
1324
1325 def revert_lyx_version(document):
1326     " Reverts LyX Version information from Inset Info "
1327     version = "LyX version"
1328     try:
1329         import lyx2lyx_version
1330         version = lyx2lyx_version.version
1331     except:
1332         pass
1333
1334     i = 0
1335     while 1:
1336         i = find_token(document.body, '\\begin_inset Info', i)
1337         if i == -1:
1338             return
1339         j = find_end_of_inset(document.body, i + 1)
1340         if j == -1:
1341             document.warning("Malformed LyX document: Could not find end of Info inset.")
1342             i += 1
1343             continue
1344
1345         # We expect:
1346         # \begin_inset Info
1347         # type  "lyxinfo"
1348         # arg   "version"
1349         # \end_inset
1350         typ = get_quoted_value(document.body, "type", i, j)
1351         arg = get_quoted_value(document.body, "arg", i, j)
1352         if arg != "version" or typ != "lyxinfo":
1353             i = j + 1
1354             continue
1355
1356         # We do not actually know the version of LyX used to produce the document.
1357         # But we can use our version, since we are reverting.
1358         s = [version]
1359         # Now we want to check if the line after "\end_inset" is empty. It normally
1360         # is, so we want to remove it, too.
1361         lastline = j + 1
1362         if document.body[j + 1].strip() == "":
1363             lastline = j + 2
1364         document.body[i: lastline] = s
1365         i = i + 1
1366
1367
1368 def revert_math_scale(document):
1369   " Remove math scaling and LaTeX options "
1370   del_token(document.header, '\\html_math_img_scale', 0)
1371   del_token(document.header, '\\html_latex_start', 0)
1372   del_token(document.header, '\\html_latex_end', 0)
1373
1374
1375 def revert_pagesizes(document):
1376   " Revert page sizes to default "
1377   i = find_token(document.header, '\\papersize', 0)
1378   if i != -1:
1379     size = document.header[i][11:]
1380     if size == "a0paper" or size == "a1paper" or size == "a2paper" \
1381     or size == "a6paper" or size == "b0paper" or size == "b1paper" \
1382     or size == "b2paper" or size == "b6paper" or size == "b0j" \
1383     or size == "b1j" or size == "b2j" or size == "b3j" or size == "b4j" \
1384     or size == "b5j" or size == "b6j":
1385       del document.header[i]
1386
1387
1388 def revert_DIN_C_pagesizes(document):
1389   " Revert DIN C page sizes to default "
1390   i = find_token(document.header, '\\papersize', 0)
1391   if i != -1:
1392     size = document.header[i][11:]
1393     if size == "c0paper" or size == "c1paper" or size == "c2paper" \
1394     or size == "c3paper" or size == "c4paper" or size == "c5paper" \
1395     or size == "c6paper":
1396       del document.header[i]
1397
1398
1399 def convert_html_quotes(document):
1400   " Remove quotes around html_latex_start and html_latex_end "
1401
1402   i = find_token(document.header, '\\html_latex_start', 0)
1403   if i != -1:
1404     line = document.header[i]
1405     l = re.compile(r'\\html_latex_start\s+"(.*)"')
1406     m = l.match(line)
1407     if m:
1408       document.header[i] = "\\html_latex_start " + m.group(1)
1409
1410   i = find_token(document.header, '\\html_latex_end', 0)
1411   if i != -1:
1412     line = document.header[i]
1413     l = re.compile(r'\\html_latex_end\s+"(.*)"')
1414     m = l.match(line)
1415     if m:
1416       document.header[i] = "\\html_latex_end " + m.group(1)
1417
1418
1419 def revert_html_quotes(document):
1420   " Remove quotes around html_latex_start and html_latex_end "
1421
1422   i = find_token(document.header, '\\html_latex_start', 0)
1423   if i != -1:
1424     line = document.header[i]
1425     l = re.compile(r'\\html_latex_start\s+(.*)')
1426     m = l.match(line)
1427     if not m:
1428         document.warning("Weird html_latex_start line: " + line)
1429         del document.header[i]
1430     else:
1431         document.header[i] = "\\html_latex_start \"" + m.group(1) + "\""
1432
1433   i = find_token(document.header, '\\html_latex_end', 0)
1434   if i != -1:
1435     line = document.header[i]
1436     l = re.compile(r'\\html_latex_end\s+(.*)')
1437     m = l.match(line)
1438     if not m:
1439         document.warning("Weird html_latex_end line: " + line)
1440         del document.header[i]
1441     else:
1442         document.header[i] = "\\html_latex_end \"" + m.group(1) + "\""
1443
1444
1445 def revert_output_sync(document):
1446   " Remove forward search options "
1447   del_token(document.header, '\\output_sync_macro', 0)
1448   del_token(document.header, '\\output_sync', 0)
1449
1450
1451 def revert_align_decimal(document):
1452   i = 0
1453   while True:
1454     i = find_token(document.body, "\\begin_inset Tabular", i)
1455     if i == -1:
1456       return
1457     j = find_end_of_inset(document.body, i)
1458     if j == -1:
1459       document.warning("Unable to find end of Tabular inset at line " + str(i))
1460       i += 1
1461       continue
1462     cell = find_token(document.body, "<cell", i, j)
1463     if cell == -1:
1464       document.warning("Can't find any cells in Tabular inset at line " + str(i))
1465       i = j
1466       continue
1467     k = i + 1
1468     while True:
1469       k = find_token(document.body, "<column", k, cell)
1470       if k == -1:
1471         return
1472       if document.body[k].find('alignment="decimal"') == -1:
1473         k += 1
1474         continue
1475       remove_option(document.body, k, 'decimal_point')
1476       document.body[k] = \
1477         document.body[k].replace('alignment="decimal"', 'alignment="center"')
1478       k += 1
1479
1480
1481 def convert_optarg(document):
1482   " Convert \\begin_inset OptArg to \\begin_inset Argument "
1483   i = 0
1484   while 1:
1485     i = find_token(document.body, '\\begin_inset OptArg', i)
1486     if i == -1:
1487       return
1488     document.body[i] = "\\begin_inset Argument"
1489     i += 1
1490
1491
1492 def revert_argument(document):
1493   " Convert \\begin_inset Argument to \\begin_inset OptArg "
1494   i = 0
1495   while 1:
1496     i = find_token(document.body, '\\begin_inset Argument', i)
1497     if i == -1:
1498       return
1499     document.body[i] = "\\begin_inset OptArg"
1500     i += 1
1501
1502
1503 def revert_makebox(document):
1504   " Convert \\makebox to TeX code "
1505   i = 0
1506   while 1:
1507     i = find_token(document.body, '\\begin_inset Box', i)
1508     if i == -1:
1509       break
1510     z = find_end_of_inset(document.body, i)
1511     if z == -1:
1512       document.warning("Malformed LyX document: Can't find end of box inset.")
1513       i += 1
1514       continue
1515     blay = find_token(document.body, "\\begin_layout", i, z)
1516     if blay == -1:
1517       document.warning("Malformed LyX document: Can't find layout in box.")
1518       i = z
1519       continue
1520     # by looking before the layout we make sure we're actually finding
1521     # an option, not text.
1522     j = find_token(document.body, 'use_makebox', i, blay)
1523     if j == -1:
1524         i = z
1525         continue
1526
1527     if not check_token(document.body[i], "\\begin_inset Box Frameless") \
1528       or get_value(document.body, 'use_makebox', j) != 1:
1529         del document.body[j]
1530         i = z
1531         continue
1532     bend = find_end_of_layout(document.body, blay)
1533     if bend == -1 or bend > z:
1534         document.warning("Malformed LyX document: Can't find end of layout in box.")
1535         i = z
1536         continue
1537     # determine the alignment
1538     align = get_quoted_value(document.body, 'hor_pos', i, blay, "c")
1539     # determine the width
1540     length = get_quoted_value(document.body, 'width', i, blay, "50col%")
1541     length = latex_length(length)[1]
1542     # remove the \end_layout \end_inset pair
1543     document.body[bend:z + 1] = put_cmd_in_ert("}")
1544     subst = "\\makebox[" + length + "][" \
1545       + align + "]{"
1546     document.body[i:blay + 1] = put_cmd_in_ert(subst)
1547     i += 1
1548
1549
1550 def convert_use_makebox(document):
1551   " Adds use_makebox option for boxes "
1552   i = 0
1553   while 1:
1554     i = find_token(document.body, '\\begin_inset Box', i)
1555     if i == -1:
1556       return
1557     # all of this is to make sure we actually find the use_parbox
1558     # that is an option for this box, not some text elsewhere.
1559     z = find_end_of_inset(document.body, i)
1560     if z == -1:
1561       document.warning("Can't find end of box inset!!")
1562       i += 1
1563       continue
1564     blay = find_token(document.body, "\\begin_layout", i, z)
1565     if blay == -1:
1566       document.warning("Can't find layout in box inset!!")
1567       i = z
1568       continue
1569     # so now we are looking for use_parbox before the box's layout
1570     k = find_token(document.body, 'use_parbox', i, blay)
1571     if k == -1:
1572       document.warning("Malformed LyX document: Can't find use_parbox statement in box.")
1573       i = z
1574       continue
1575     document.body.insert(k + 1, "use_makebox 0")
1576     i = blay + 1 # not z + 1 (box insets may be nested)
1577
1578
1579 def revert_IEEEtran(document):
1580   " Convert IEEEtran layouts and styles to TeX code "
1581
1582   if document.textclass != "IEEEtran":
1583     return
1584
1585   revert_flex_inset(document.body, "IEEE membership", "\\IEEEmembership")
1586   revert_flex_inset(document.body, "Lowercase", "\\MakeLowercase")
1587
1588   layouts = ("Special Paper Notice", "After Title Text", "Publication ID",
1589              "Page headings", "Biography without photo")
1590   latexcmd = {"Special Paper Notice": "\\IEEEspecialpapernotice",
1591               "After Title Text":     "\\IEEEaftertitletext",
1592               "Publication ID":       "\\IEEEpubid"}
1593   obsoletedby = {"Page headings":            "MarkBoth",
1594                  "Biography without photo":  "BiographyNoPhoto"}
1595
1596   for layout in layouts:
1597     i = 0
1598     while True:
1599         i = find_token(document.body, '\\begin_layout ' + layout, i)
1600         if i == -1:
1601           break
1602         j = find_end_of_layout(document.body, i)
1603         if j == -1:
1604           document.warning("Malformed LyX document: Can't find end of " + layout + " layout.")
1605           i += 1
1606           continue
1607         if layout in list(obsoletedby.keys()):
1608           document.body[i] = "\\begin_layout " + obsoletedby[layout]
1609           i = j
1610           continue
1611         content = lyx2latex(document, document.body[i:j + 1])
1612         add_to_preamble(document, [latexcmd[layout] + "{" + content + "}"])
1613         del document.body[i:j + 1]
1614         # no need to reset i
1615
1616
1617 def convert_prettyref(document):
1618         " Converts prettyref references to neutral formatted refs "
1619         re_ref = re.compile("^\s*reference\s+\"(\w+):(\S+)\"")
1620         nm_ref = re.compile("^\s*name\s+\"(\w+):(\S+)\"")
1621
1622         i = 0
1623         while True:
1624                 i = find_token(document.body, "\\begin_inset CommandInset ref", i)
1625                 if i == -1:
1626                         break
1627                 j = find_end_of_inset(document.body, i)
1628                 if j == -1:
1629                         document.warning("Malformed LyX document: No end of InsetRef!")
1630                         i += 1
1631                         continue
1632                 k = find_token(document.body, "LatexCommand prettyref", i, j)
1633                 if k != -1:
1634                         document.body[k] = "LatexCommand formatted"
1635                 i = j + 1
1636         document.header.insert(-1, "\\use_refstyle 0")
1637
1638
1639 def revert_refstyle(document):
1640         " Reverts neutral formatted refs to prettyref "
1641         re_ref = re.compile("^reference\s+\"(\w+):(\S+)\"")
1642         nm_ref = re.compile("^\s*name\s+\"(\w+):(\S+)\"")
1643
1644         i = 0
1645         while True:
1646                 i = find_token(document.body, "\\begin_inset CommandInset ref", i)
1647                 if i == -1:
1648                         break
1649                 j = find_end_of_inset(document.body, i)
1650                 if j == -1:
1651                         document.warning("Malformed LyX document: No end of InsetRef")
1652                         i += 1
1653                         continue
1654                 k = find_token(document.body, "LatexCommand formatted", i, j)
1655                 if k != -1:
1656                         document.body[k] = "LatexCommand prettyref"
1657                 i = j + 1
1658         i = find_token(document.header, "\\use_refstyle", 0)
1659         if i != -1:
1660                 document.header.pop(i)
1661
1662
1663 def revert_nameref(document):
1664   " Convert namerefs to regular references "
1665   cmds = ["Nameref", "nameref"]
1666   foundone = False
1667   rx = re.compile(r'reference "(.*)"')
1668   for cmd in cmds:
1669     i = 0
1670     oldcmd = "LatexCommand " + cmd
1671     while 1:
1672       # It seems better to look for this, as most of the reference
1673       # insets won't be ones we care about.
1674       i = find_token(document.body, oldcmd, i)
1675       if i == -1:
1676         break
1677       cmdloc = i
1678       i += 1
1679       # Make sure it is actually in an inset!
1680       # A normal line could begin with "LatexCommand nameref"!
1681       val = is_in_inset(document.body, cmdloc, \
1682           "\\begin_inset CommandInset ref")
1683       if not val:
1684           continue
1685       stins, endins = val
1686
1687       # ok, so it is in an InsetRef
1688       refline = find_token(document.body, "reference", stins, endins)
1689       if refline == -1:
1690         document.warning("Can't find reference for inset at line " + stinst + "!!")
1691         continue
1692       m = rx.match(document.body[refline])
1693       if not m:
1694         document.warning("Can't match reference line: " + document.body[ref])
1695         continue
1696       foundone = True
1697       ref = m.group(1)
1698       newcontent = put_cmd_in_ert('\\' + cmd + '{' + ref + '}')
1699       document.body[stins:endins + 1] = newcontent
1700
1701   if foundone:
1702     add_to_preamble(document, ["\\usepackage{nameref}"])
1703
1704
1705 def remove_Nameref(document):
1706   " Convert Nameref commands to nameref commands "
1707   i = 0
1708   while 1:
1709     # It seems better to look for this, as most of the reference
1710     # insets won't be ones we care about.
1711     i = find_token(document.body, "LatexCommand Nameref" , i)
1712     if i == -1:
1713       break
1714     cmdloc = i
1715     i += 1
1716
1717     # Make sure it is actually in an inset!
1718     val = is_in_inset(document.body, cmdloc, \
1719         "\\begin_inset CommandInset ref")
1720     if not val:
1721       continue
1722     document.body[cmdloc] = "LatexCommand nameref"
1723
1724
1725 def revert_mathrsfs(document):
1726     " Load mathrsfs if \mathrsfs us use in the document "
1727     i = 0
1728     for line in document.body:
1729       if line.find("\\mathscr{") != -1:
1730         add_to_preamble(document, ["\\usepackage{mathrsfs}"])
1731         return
1732
1733
1734 def convert_flexnames(document):
1735     "Convert \\begin_inset Flex Custom:Style to \\begin_inset Flex Style and similarly for CharStyle and Element."
1736
1737     i = 0
1738     rx = re.compile(r'^\\begin_inset Flex (?:Custom|CharStyle|Element):(.+)$')
1739     while True:
1740       i = find_token(document.body, "\\begin_inset Flex", i)
1741       if i == -1:
1742         return
1743       m = rx.match(document.body[i])
1744       if m:
1745         document.body[i] = "\\begin_inset Flex " + m.group(1)
1746       i += 1
1747
1748
1749 flex_insets = {
1750   "Alert" : "CharStyle:Alert",
1751   "Code" : "CharStyle:Code",
1752   "Concepts" : "CharStyle:Concepts",
1753   "E-Mail" : "CharStyle:E-Mail",
1754   "Emph" : "CharStyle:Emph",
1755   "Expression" : "CharStyle:Expression",
1756   "Initial" : "CharStyle:Initial",
1757   "Institute" : "CharStyle:Institute",
1758   "Meaning" : "CharStyle:Meaning",
1759   "Noun" : "CharStyle:Noun",
1760   "Strong" : "CharStyle:Strong",
1761   "Structure" : "CharStyle:Structure",
1762   "ArticleMode" : "Custom:ArticleMode",
1763   "Endnote" : "Custom:Endnote",
1764   "Glosse" : "Custom:Glosse",
1765   "PresentationMode" : "Custom:PresentationMode",
1766   "Tri-Glosse" : "Custom:Tri-Glosse"
1767 }
1768
1769 flex_elements = {
1770   "Abbrev" : "Element:Abbrev",
1771   "CCC-Code" : "Element:CCC-Code",
1772   "Citation-number" : "Element:Citation-number",
1773   "City" : "Element:City",
1774   "Code" : "Element:Code",
1775   "CODEN" : "Element:CODEN",
1776   "Country" : "Element:Country",
1777   "Day" : "Element:Day",
1778   "Directory" : "Element:Directory",
1779   "Dscr" : "Element:Dscr",
1780   "Email" : "Element:Email",
1781   "Emph" : "Element:Emph",
1782   "Filename" : "Element:Filename",
1783   "Firstname" : "Element:Firstname",
1784   "Fname" : "Element:Fname",
1785   "GuiButton" : "Element:GuiButton",
1786   "GuiMenu" : "Element:GuiMenu",
1787   "GuiMenuItem" : "Element:GuiMenuItem",
1788   "ISSN" : "Element:ISSN",
1789   "Issue-day" : "Element:Issue-day",
1790   "Issue-months" : "Element:Issue-months",
1791   "Issue-number" : "Element:Issue-number",
1792   "KeyCap" : "Element:KeyCap",
1793   "KeyCombo" : "Element:KeyCombo",
1794   "Keyword" : "Element:Keyword",
1795   "Literal" : "Element:Literal",
1796   "MenuChoice" : "Element:MenuChoice",
1797   "Month" : "Element:Month",
1798   "Orgdiv" : "Element:Orgdiv",
1799   "Orgname" : "Element:Orgname",
1800   "Postcode" : "Element:Postcode",
1801   "SS-Code" : "Element:SS-Code",
1802   "SS-Title" : "Element:SS-Title",
1803   "State" : "Element:State",
1804   "Street" : "Element:Street",
1805   "Surname" : "Element:Surname",
1806   "Volume" : "Element:Volume",
1807   "Year" : "Element:Year"
1808 }
1809
1810
1811 def revert_flexnames(document):
1812   if document.backend == "latex":
1813     flexlist = flex_insets
1814   else:
1815     flexlist = flex_elements
1816
1817   rx = re.compile(r'^\\begin_inset Flex\s+(.+)$')
1818   i = 0
1819   while True:
1820     i = find_token(document.body, "\\begin_inset Flex", i)
1821     if i == -1:
1822       return
1823     m = rx.match(document.body[i])
1824     if not m:
1825       document.warning("Illegal flex inset: " + document.body[i])
1826       i += 1
1827       continue
1828     style = m.group(1)
1829     if style in flexlist:
1830       document.body[i] = "\\begin_inset Flex " + flexlist[style]
1831     i += 1
1832
1833
1834 def convert_mathdots(document):
1835     " Load mathdots automatically "
1836     i = find_token(document.header, "\\use_mhchem" , 0)
1837     if i == -1:
1838         i = find_token(document.header, "\\use_esint" , 0)
1839     if i == -1:
1840         document.warning("Malformed LyX document: Can't find \\use_mhchem.")
1841         return;
1842     j = find_token(document.preamble, "\\usepackage{mathdots}", 0)
1843     if j == -1:
1844         document.header.insert(i + 1, "\\use_mathdots 0")
1845     else:
1846         document.header.insert(i + 1, "\\use_mathdots 2")
1847         del document.preamble[j]
1848
1849
1850 def revert_mathdots(document):
1851     " Load mathdots if used in the document "
1852
1853     mathdots = find_token(document.header, "\\use_mathdots" , 0)
1854     if mathdots == -1:
1855       document.warning("No \\use_mathdots line. Assuming auto.")
1856     else:
1857       val = get_value(document.header, "\\use_mathdots", mathdots)
1858       del document.header[mathdots]
1859       try:
1860         usedots = int(val)
1861       except:
1862         document.warning("Invalid \\use_mathdots value: " + val + ". Assuming auto.")
1863         # probably usedots has not been changed, but be safe.
1864         usedots = 1
1865
1866       if usedots == 0:
1867         # do not load case
1868         return
1869       if usedots == 2:
1870         # force load case
1871         add_to_preamble(document, ["\\usepackage{mathdots}"])
1872         return
1873
1874     # so we are in the auto case. we want to load mathdots if \iddots is used.
1875     i = 0
1876     while True:
1877       i = find_token(document.body, '\\begin_inset Formula', i)
1878       if i == -1:
1879         return
1880       j = find_end_of_inset(document.body, i)
1881       if j == -1:
1882         document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(i))
1883         i += 1
1884         continue
1885       code = "\n".join(document.body[i:j])
1886       if code.find("\\iddots") != -1:
1887         add_to_preamble(document, ["\\@ifundefined{iddots}{\\usepackage{mathdots}}"])
1888         return
1889       i = j
1890
1891
1892 def convert_rule(document):
1893     " Convert \\lyxline to CommandInset line. "
1894     i = 0
1895
1896     inset = ['\\begin_inset CommandInset line',
1897       'LatexCommand rule',
1898       'offset "0.5ex"',
1899       'width "100line%"',
1900       'height "1pt"', '',
1901       '\\end_inset', '', '']
1902
1903     # if paragraphs are indented, we may have to unindent to get the
1904     # line to be full-width.
1905     indent = get_value(document.header, "\\paragraph_separation", 0)
1906     have_indent = (indent == "indent")
1907
1908     while True:
1909       i = find_token(document.body, "\\lyxline" , i)
1910       if i == -1:
1911         return
1912
1913       # we need to find out if this line follows other content
1914       # in its paragraph. find its layout....
1915       lastlay = find_token_backwards(document.body, "\\begin_layout", i)
1916       if lastlay == -1:
1917         document.warning("Can't find layout for line at " + str(i))
1918         # do the best we can.
1919         document.body[i:i+1] = inset
1920         i += len(inset)
1921         continue
1922
1923       # ...and look for other content before it.
1924       lineisfirst = True
1925       for line in document.body[lastlay + 1:i]:
1926         # is it empty or a paragraph option?
1927         if not line or line[0] == '\\':
1928           continue
1929         lineisfirst = False
1930         break
1931
1932       if lineisfirst:
1933         document.body[i:i+1] = inset
1934         if indent:
1935           # we need to unindent, lest the line be too long
1936           document.body.insert(lastlay + 1, "\\noindent")
1937         i += len(inset)
1938       else:
1939         # so our line is in the middle of a paragraph
1940         # we need to add a new line, lest this line follow the
1941         # other content on that line and run off the side of the page
1942         document.body[i:i+1] = inset
1943         document.body[i:i] = ["\\begin_inset Newline newline", "\\end_inset", ""]
1944       i += len(inset)
1945
1946
1947 def revert_rule(document):
1948     " Revert line insets to Tex code "
1949     i = 0
1950     while 1:
1951       i = find_token(document.body, "\\begin_inset CommandInset line" , i)
1952       if i == -1:
1953         return
1954       # find end of inset
1955       j = find_token(document.body, "\\end_inset" , i)
1956       if j == -1:
1957         document.warning("Malformed LyX document: Can't find end of line inset.")
1958         return
1959       # determine the optional offset
1960       offset = get_quoted_value(document.body, 'offset', i, j)
1961       if offset:
1962         offset = '[' + offset + ']'
1963       # determine the width
1964       width = get_quoted_value(document.body, 'width', i, j, "100col%")
1965       width = latex_length(width)[1]
1966       # determine the height
1967       height = get_quoted_value(document.body, 'height', i, j, "1pt")
1968       height = latex_length(height)[1]
1969       # output the \rule command
1970       subst = "\\rule[" + offset + "]{" + width + "}{" + height + "}"
1971       document.body[i:j + 1] = put_cmd_in_ert(subst)
1972       i += len(subst) - (j - i)
1973
1974
1975 def revert_diagram(document):
1976   " Add the feyn package if \\Diagram is used in math "
1977   i = 0
1978   while True:
1979     i = find_token(document.body, '\\begin_inset Formula', i)
1980     if i == -1:
1981       return
1982     j = find_end_of_inset(document.body, i)
1983     if j == -1:
1984         document.warning("Malformed LyX document: Can't find end of Formula inset.")
1985         return
1986     lines = "\n".join(document.body[i:j])
1987     if lines.find("\\Diagram") == -1:
1988       i = j
1989       continue
1990     add_to_preamble(document, ["\\usepackage{feyn}"])
1991     # only need to do it once!
1992     return
1993
1994 chapters = ("amsbook", "book", "docbook-book", "elsart", "extbook", "extreport",
1995     "jbook", "jreport", "jsbook", "literate-book", "literate-report", "memoir",
1996     "mwbk", "mwrep", "recipebook", "report", "scrbook", "scrreprt", "svmono",
1997     "svmult", "tbook", "treport", "tufte-book")
1998
1999 def convert_bibtex_clearpage(document):
2000   " insert a clear(double)page bibliographystyle if bibtotoc option is used "
2001
2002   if document.textclass not in chapters:
2003     return
2004
2005   i = find_token(document.header, '\\papersides', 0)
2006   sides = 0
2007   if i == -1:
2008     document.warning("Malformed LyX document: Can't find papersides definition.")
2009     document.warning("Assuming single sided.")
2010     sides = 1
2011   else:
2012     val = get_value(document.header, "\\papersides", i)
2013     try:
2014       sides = int(val)
2015     except:
2016       pass
2017     if sides != 1 and sides != 2:
2018       document.warning("Invalid papersides value: " + val)
2019       document.warning("Assuming single sided.")
2020       sides = 1
2021
2022   j = 0
2023   while True:
2024     j = find_token(document.body, "\\begin_inset CommandInset bibtex", j)
2025     if j == -1:
2026       return
2027
2028     k = find_end_of_inset(document.body, j)
2029     if k == -1:
2030       document.warning("Can't find end of Bibliography inset at line " + str(j))
2031       j += 1
2032       continue
2033
2034     # only act if there is the option "bibtotoc"
2035     val = get_value(document.body, 'options', j, k)
2036     if not val:
2037       document.warning("Can't find options for bibliography inset at line " + str(j))
2038       j = k
2039       continue
2040
2041     if val.find("bibtotoc") == -1:
2042       j = k
2043       continue
2044
2045     # so we want to insert a new page right before the paragraph that
2046     # this bibliography thing is in.
2047     lay = find_token_backwards(document.body, "\\begin_layout", j)
2048     if lay == -1:
2049       document.warning("Can't find layout containing bibliography inset at line " + str(j))
2050       j = k
2051       continue
2052
2053     if sides == 1:
2054       cmd = "clearpage"
2055     else:
2056       cmd = "cleardoublepage"
2057     subst = ['\\begin_layout Standard',
2058         '\\begin_inset Newpage ' + cmd,
2059         '\\end_inset', '', '',
2060         '\\end_layout', '']
2061     document.body[lay:lay] = subst
2062     j = k + len(subst)
2063
2064
2065 def check_passthru(document):
2066   tc = document.textclass
2067   ok = (tc == "literate-article" or tc == "literate-book" or tc == "literate-report")
2068   if not ok:
2069     mods = document.get_module_list()
2070     for mod in mods:
2071       if mod == "sweave" or mod == "noweb":
2072         ok = True
2073         break
2074   return ok
2075
2076
2077 def convert_passthru(document):
2078     " http://www.mail-archive.com/lyx-devel@lists.lyx.org/msg161298.html "
2079     if not check_passthru:
2080       return
2081
2082     rx = re.compile("\\\\begin_layout \s*(\w+)")
2083     beg = 0
2084     for lay in ["Chunk", "Scrap"]:
2085       while True:
2086         beg = find_token(document.body, "\\begin_layout " + lay, beg)
2087         if beg == -1:
2088           break
2089         end = find_end_of_layout(document.body, beg)
2090         if end == -1:
2091           document.warning("Can't find end of layout at line " + str(beg))
2092           beg += 1
2093           continue
2094
2095         # we are now going to replace newline insets within this layout
2096         # by new instances of this layout. so we have repeated layouts
2097         # instead of newlines.
2098
2099         # if the paragraph has any customization, however, we do not want to
2100         # do the replacement.
2101         if document.body[beg + 1].startswith("\\"):
2102           beg = end + 1
2103           continue
2104
2105         ns = beg
2106         while True:
2107           ns = find_token(document.body, "\\begin_inset Newline newline", ns, end)
2108           if ns == -1:
2109             break
2110           ne = find_end_of_inset(document.body, ns)
2111           if ne == -1 or ne > end:
2112             document.warning("Can't find end of inset at line " + str(nb))
2113             ns += 1
2114             continue
2115           if document.body[ne + 1] == "":
2116             ne += 1
2117           subst = ["\\end_layout", "", "\\begin_layout " + lay]
2118           document.body[ns:ne + 1] = subst
2119           # now we need to adjust end, in particular, but might as well
2120           # do ns properly, too
2121           newlines = (ne - ns) - len(subst)
2122           ns += newlines + 2
2123           end += newlines + 2
2124
2125         # ok, we now want to find out if the next layout is the
2126         # same as this one. if so, we will insert an extra copy of it
2127         didit = False
2128         next = find_token(document.body, "\\begin_layout", end)
2129         if next != -1:
2130           m = rx.match(document.body[next])
2131           if m:
2132             nextlay = m.group(1)
2133             if nextlay == lay:
2134               subst = ["\\begin_layout " + lay, "", "\\end_layout", ""]
2135               document.body[next:next] = subst
2136               didit = True
2137         beg = end + 1
2138         if didit:
2139           beg += 4 # for the extra layout
2140
2141
2142 def revert_passthru(document):
2143     " http://www.mail-archive.com/lyx-devel@lists.lyx.org/msg161298.html "
2144     if not check_passthru:
2145       return
2146     rx = re.compile("\\\\begin_layout \s*(\w+)")
2147     beg = 0
2148     for lay in ["Chunk", "Scrap"]:
2149       while True:
2150         beg = find_token(document.body, "\\begin_layout " + lay, beg)
2151         if beg == -1:
2152           break
2153         end = find_end_of_layout(document.body, beg)
2154         if end == -1:
2155           document.warning("Can't find end of layout at line " + str(beg))
2156           beg += 1
2157           continue
2158
2159         # we now want to find out if the next layout is the
2160         # same as this one. but we will need to do this over and
2161         # over again.
2162         while True:
2163           next = find_token(document.body, "\\begin_layout", end)
2164           if next == -1:
2165             break
2166           m = rx.match(document.body[next])
2167           if not m:
2168             break
2169           nextlay = m.group(1)
2170           if nextlay != lay:
2171             break
2172           # so it is the same layout again. we now want to know if it is empty.
2173           # but first let's check and make sure there is no content between the
2174           # two layouts. i'm not sure if that can happen or not.
2175           for l in range(end + 1, next):
2176             document.warning("c'" + document.body[l] + "'")
2177             if document.body[l] != "":
2178               document.warning("Found content between adjacent " + lay + " layouts!")
2179               break
2180           nextend = find_end_of_layout(document.body, next)
2181           if nextend == -1:
2182             document.warning("Can't find end of layout at line " + str(next))
2183             break
2184           empty = True
2185           for l in range(next + 1, nextend):
2186             document.warning("e'" + document.body[l] + "'")
2187             if document.body[l] != "":
2188               empty = False
2189               break
2190           if empty:
2191             # empty layouts just get removed
2192             # should we check if it's before yet another such layout?
2193             del document.body[next : nextend + 1]
2194             # and we do not want to check again. we know the next layout
2195             # should be another Chunk and should be left as is.
2196             break
2197           else:
2198             # if it's not empty, then we want to insert a newline in place
2199             # of the layout switch
2200             subst = ["\\begin_inset Newline newline", "\\end_inset", ""]
2201             document.body[end : next + 1] = subst
2202             # and now we have to find the end of the new, larger layout
2203             newend = find_end_of_layout(document.body, beg)
2204             if newend == -1:
2205               document.warning("Can't find end of new layout at line " + str(beg))
2206               break
2207             end = newend
2208         beg = end + 1
2209
2210
2211 def revert_multirowOffset(document):
2212     " Revert multirow cells with offset in tables to TeX-code"
2213     # this routine is the same as the revert_multirow routine except that
2214     # it checks additionally for the offset
2215
2216     # first, let's find out if we need to do anything
2217     i = find_token(document.body, '<cell multirow="3" mroffset=', 0)
2218     if i == -1:
2219       return
2220
2221     add_to_preamble(document, ["\\usepackage{multirow}"])
2222
2223     rgx = re.compile(r'mroffset="[^"]+?"')
2224     begin_table = 0
2225
2226     while True:
2227         # find begin/end of table
2228         begin_table = find_token(document.body, '<lyxtabular version=', begin_table)
2229         if begin_table == -1:
2230             break
2231         end_table = find_end_of(document.body, begin_table, '<lyxtabular', '</lyxtabular>')
2232         if end_table == -1:
2233             document.warning("Malformed LyX document: Could not find end of table.")
2234             begin_table += 1
2235             continue
2236         # does this table have multirow?
2237         i = find_token(document.body, '<cell multirow="3"', begin_table, end_table)
2238         if i == -1:
2239             begin_table = end_table
2240             continue
2241
2242         # store the number of rows and columns
2243         numrows = get_option_value(document.body[begin_table], "rows")
2244         numcols = get_option_value(document.body[begin_table], "columns")
2245         try:
2246           numrows = int(numrows)
2247           numcols = int(numcols)
2248         except:
2249           document.warning("Unable to determine rows and columns!")
2250           begin_table = end_table
2251           continue
2252
2253         mrstarts = []
2254         multirows = []
2255         # collect info on rows and columns of this table.
2256         begin_row = begin_table
2257         for row in range(numrows):
2258             begin_row = find_token(document.body, '<row>', begin_row, end_table)
2259             if begin_row == -1:
2260               document.warning("Can't find row " + str(row + 1))
2261               break
2262             end_row = find_end_of(document.body, begin_row, '<row>', '</row>')
2263             if end_row == -1:
2264               document.warning("Can't find end of row " + str(row + 1))
2265               break
2266             begin_cell = begin_row
2267             multirows.append([])
2268             for column in range(numcols):
2269                 begin_cell = find_token(document.body, '<cell ', begin_cell, end_row)
2270                 if begin_cell == -1:
2271                   document.warning("Can't find column " + str(column + 1) + \
2272                     "in row " + str(row + 1))
2273                   break
2274                 # NOTE
2275                 # this will fail if someone puts "</cell>" in a cell, but
2276                 # that seems fairly unlikely.
2277                 end_cell = find_end_of(document.body, begin_cell, '<cell', '</cell>')
2278                 if end_cell == -1:
2279                   document.warning("Can't find end of column " + str(column + 1) + \
2280                     "in row " + str(row + 1))
2281                   break
2282                 multirows[row].append([begin_cell, end_cell, 0])
2283                 if document.body[begin_cell].find('multirow="3" mroffset=') != -1:
2284                   multirows[row][column][2] = 3 # begin multirow
2285                   mrstarts.append([row, column])
2286                 elif document.body[begin_cell].find('multirow="4"') != -1:
2287                   multirows[row][column][2] = 4 # in multirow
2288                 begin_cell = end_cell
2289             begin_row = end_row
2290         # end of table info collection
2291
2292         # work from the back to avoid messing up numbering
2293         mrstarts.reverse()
2294         for m in mrstarts:
2295             row = m[0]
2296             col = m[1]
2297             # get column width
2298             col_width = get_option_value(document.body[begin_table + 2 + col], "width")
2299             # "0pt" means that no width is specified
2300             if not col_width or col_width == "0pt":
2301               col_width = "*"
2302             # determine the number of cells that are part of the multirow
2303             nummrs = 1
2304             for r in range(row + 1, numrows):
2305                 if multirows[r][col][2] != 4:
2306                   break
2307                 nummrs += 1
2308                 # take the opportunity to revert this line
2309                 lineno = multirows[r][col][0]
2310                 document.body[lineno] = document.body[lineno].\
2311                   replace(' multirow="4" ', ' ').\
2312                   replace('valignment="middle"', 'valignment="top"').\
2313                   replace(' topline="true" ', ' ')
2314                 # remove bottom line of previous multirow-part cell
2315                 lineno = multirows[r-1][col][0]
2316                 document.body[lineno] = document.body[lineno].replace(' bottomline="true" ', ' ')
2317             # revert beginning cell
2318             bcell = multirows[row][col][0]
2319             ecell = multirows[row][col][1]
2320             offset = get_option_value(document.body[bcell], "mroffset")
2321             document.body[bcell] = document.body[bcell].\
2322               replace(' multirow="3" ', ' ').\
2323               replace('valignment="middle"', 'valignment="top"')
2324             # remove mroffset option
2325             document.body[bcell] = rgx.sub('', document.body[bcell])
2326
2327             blay = find_token(document.body, "\\begin_layout", bcell, ecell)
2328             if blay == -1:
2329               document.warning("Can't find layout for cell!")
2330               continue
2331             bend = find_end_of_layout(document.body, blay)
2332             if bend == -1:
2333               document.warning("Can't find end of layout for cell!")
2334               continue
2335             # do the later one first, so as not to mess up the numbering
2336             # we are wrapping the whole cell in this ert
2337             # so before the end of the layout...
2338             document.body[bend:bend] = put_cmd_in_ert("}")
2339             # ...and after the beginning
2340             document.body[blay + 1:blay + 1] = \
2341               put_cmd_in_ert("\\multirow{" + str(nummrs) + "}{" + col_width + "}[" \
2342                   + offset + "]{")
2343
2344         # on to the next table
2345         begin_table = end_table
2346
2347
2348 def revert_script(document):
2349     " Convert subscript/superscript inset to TeX code "
2350     i = 0
2351     foundsubscript = False
2352     while 1:
2353         i = find_token(document.body, '\\begin_inset script', i)
2354         if i == -1:
2355             break
2356         z = find_end_of_inset(document.body, i)
2357         if z == -1:
2358             document.warning("Malformed LyX document: Can't find end of script inset.")
2359             i += 1
2360             continue
2361         blay = find_token(document.body, "\\begin_layout", i, z)
2362         if blay == -1:
2363             document.warning("Malformed LyX document: Can't find layout in script inset.")
2364             i = z
2365             continue
2366
2367         if check_token(document.body[i], "\\begin_inset script subscript"):
2368             subst = '\\textsubscript{'
2369             foundsubscript = True
2370         elif check_token(document.body[i], "\\begin_inset script superscript"):
2371             subst = '\\textsuperscript{'
2372         else:
2373             document.warning("Malformed LyX document: Unknown type of script inset.")
2374             i = z
2375             continue
2376         bend = find_end_of_layout(document.body, blay)
2377         if bend == -1 or bend > z:
2378             document.warning("Malformed LyX document: Can't find end of layout in script inset.")
2379             i = z
2380             continue
2381         # remove the \end_layout \end_inset pair
2382         document.body[bend:z + 1] = put_cmd_in_ert("}")
2383         document.body[i:blay + 1] = put_cmd_in_ert(subst)
2384         i += 1
2385     # these classes provide a \textsubscript command:
2386     # FIXME: Would be nice if we could use the information of the .layout file here
2387     classes = ["memoir", "scrartcl", "scrbook", "scrlttr2", "scrreprt"]
2388     if foundsubscript and find_token_exact(classes, document.textclass, 0) == -1:
2389         add_to_preamble(document, ['\\usepackage{subscript}'])
2390
2391
2392 def convert_use_xetex(document):
2393     " convert \\use_xetex to \\use_non_tex_fonts "
2394     i = find_token(document.header, "\\use_xetex", 0)
2395     if i == -1:
2396         document.header.insert(-1, "\\use_non_tex_fonts 0")
2397     else:
2398         val = get_value(document.header, "\\use_xetex", 0)
2399         document.header[i] = "\\use_non_tex_fonts " + val
2400
2401
2402 def revert_use_xetex(document):
2403     " revert \\use_non_tex_fonts to \\use_xetex "
2404     i = 0
2405     i = find_token(document.header, "\\use_non_tex_fonts", 0)
2406     if i == -1:
2407         document.warning("Malformed document. No \\use_non_tex_fonts param!")
2408         return
2409
2410     val = get_value(document.header, "\\use_non_tex_fonts", 0)
2411     document.header[i] = "\\use_xetex " + val
2412
2413
2414 def revert_labeling(document):
2415     koma = ("scrartcl", "scrarticle-beamer", "scrbook", "scrlettr",
2416         "scrlttr2", "scrreprt")
2417     if document.textclass in koma:
2418         return
2419     i = 0
2420     while True:
2421         i = find_token_exact(document.body, "\\begin_layout Labeling", i)
2422         if i == -1:
2423             return
2424         document.body[i] = "\\begin_layout List"
2425
2426
2427 def revert_langpack(document):
2428     " revert \\language_package parameter "
2429     i = 0
2430     i = find_token(document.header, "\\language_package", 0)
2431     if i == -1:
2432         document.warning("Malformed document. No \\language_package param!")
2433         return
2434
2435     del document.header[i]
2436
2437
2438 def convert_langpack(document):
2439     " Add \\language_package parameter "
2440     i = find_token(document.header, "\language" , 0)
2441     if i == -1:
2442         document.warning("Malformed document. No \\language defined!")
2443         return
2444
2445     document.header.insert(i + 1, "\\language_package default")
2446
2447
2448 def revert_tabularwidth(document):
2449   i = 0
2450   while True:
2451     i = find_token(document.body, "\\begin_inset Tabular", i)
2452     if i == -1:
2453       return
2454     j = find_end_of_inset(document.body, i)
2455     if j == -1:
2456       document.warning("Unable to find end of Tabular inset at line " + str(i))
2457       i += 1
2458       continue
2459     i += 1
2460     features = find_token(document.body, "<features", i, j)
2461     if features == -1:
2462       document.warning("Can't find any features in Tabular inset at line " + str(i))
2463       i = j
2464       continue
2465     if document.body[features].find('alignment="tabularwidth"') != -1:
2466       remove_option(document.body, features, 'tabularwidth')
2467
2468 def revert_html_css_as_file(document):
2469   if not del_token(document.header, '\\html_css_as_file', 0):
2470     document.warning("Malformed LyX document: Missing \\html_css_as_file.")
2471
2472
2473 ##
2474 # Conversion hub
2475 #
2476
2477 supported_versions = ["2.0.0","2.0"]
2478 convert = [[346, []],
2479            [347, []],
2480            [348, []],
2481            [349, []],
2482            [350, []],
2483            [351, []],
2484            [352, [convert_splitindex]],
2485            [353, []],
2486            [354, []],
2487            [355, []],
2488            [356, []],
2489            [357, []],
2490            [358, []],
2491            [359, [convert_nomencl_width]],
2492            [360, []],
2493            [361, []],
2494            [362, []],
2495            [363, []],
2496            [364, []],
2497            [365, []],
2498            [366, []],
2499            [367, []],
2500            [368, []],
2501            [369, [convert_author_id]],
2502            [370, []],
2503            [371, [convert_mhchem]],
2504            [372, []],
2505            [373, [merge_gbrief]],
2506            [374, []],
2507            [375, []],
2508            [376, []],
2509            [377, []],
2510            [378, []],
2511            [379, [convert_math_output]],
2512            [380, []],
2513            [381, []],
2514            [382, []],
2515            [383, []],
2516            [384, []],
2517            [385, []],
2518            [386, []],
2519            [387, []],
2520            [388, []],
2521            [389, [convert_html_quotes]],
2522            [390, []],
2523            [391, []],
2524            [392, []],
2525            [393, [convert_optarg]],
2526            [394, [convert_use_makebox]],
2527            [395, []],
2528            [396, []],
2529            [397, [remove_Nameref]],
2530            [398, []],
2531            [399, [convert_mathdots]],
2532            [400, [convert_rule]],
2533            [401, []],
2534            [402, [convert_bibtex_clearpage]],
2535            [403, [convert_flexnames]],
2536            [404, [convert_prettyref]],
2537            [405, []],
2538            [406, [convert_passthru]],
2539            [407, []],
2540            [408, []],
2541            [409, [convert_use_xetex]],
2542            [410, []],
2543            [411, [convert_langpack]],
2544            [412, []],
2545            [413, []]
2546 ]
2547
2548 revert =  [[412, [revert_html_css_as_file]],
2549            [411, [revert_tabularwidth]],
2550            [410, [revert_langpack]],
2551            [409, [revert_labeling]],
2552            [408, [revert_use_xetex]],
2553            [407, [revert_script]],
2554            [406, [revert_multirowOffset]],
2555            [405, [revert_passthru]],
2556            [404, []],
2557            [403, [revert_refstyle]],
2558            [402, [revert_flexnames]],
2559            [401, []],
2560            [400, [revert_diagram]],
2561            [399, [revert_rule]],
2562            [398, [revert_mathdots]],
2563            [397, [revert_mathrsfs]],
2564            [396, []],
2565            [395, [revert_nameref]],
2566            [394, [revert_DIN_C_pagesizes]],
2567            [393, [revert_makebox]],
2568            [392, [revert_argument]],
2569            [391, []],
2570            [390, [revert_align_decimal, revert_IEEEtran]],
2571            [389, [revert_output_sync]],
2572            [388, [revert_html_quotes]],
2573            [387, [revert_pagesizes]],
2574            [386, [revert_math_scale]],
2575            [385, [revert_lyx_version]],
2576            [384, [revert_shadedboxcolor]],
2577            [383, [revert_fontcolor]],
2578            [382, [revert_turkmen]],
2579            [381, [revert_notefontcolor]],
2580            [380, [revert_equalspacing_xymatrix]],
2581            [379, [revert_inset_preview]],
2582            [378, [revert_math_output]],
2583            [377, []],
2584            [376, [revert_multirow]],
2585            [375, [revert_includeall]],
2586            [374, [revert_includeonly]],
2587            [373, [revert_html_options]],
2588            [372, [revert_gbrief]],
2589            [371, [revert_fontenc]],
2590            [370, [revert_mhchem]],
2591            [369, [revert_suppress_date]],
2592            [368, [revert_author_id]],
2593            [367, [revert_hspace_glue_lengths]],
2594            [366, [revert_percent_vspace_lengths, revert_percent_hspace_lengths]],
2595            [365, [revert_percent_skip_lengths]],
2596            [364, [revert_paragraph_indentation]],
2597            [363, [revert_branch_filename]],
2598            [362, [revert_longtable_align]],
2599            [361, [revert_applemac]],
2600            [360, []],
2601            [359, [revert_nomencl_cwidth]],
2602            [358, [revert_nomencl_width]],
2603            [357, [revert_custom_processors]],
2604            [356, [revert_ulinelatex]],
2605            [355, []],
2606            [354, [revert_strikeout]],
2607            [353, [revert_printindexall]],
2608            [352, [revert_subindex]],
2609            [351, [revert_splitindex]],
2610            [350, [revert_backgroundcolor]],
2611            [349, [revert_outputformat]],
2612            [348, [revert_xetex]],
2613            [347, [revert_phantom, revert_hphantom, revert_vphantom]],
2614            [346, [revert_tabularvalign]],
2615            [345, [revert_swiss]]
2616           ]
2617
2618
2619 if __name__ == "__main__":
2620     pass