lib/lyx2lyx/lyx_2_0.py

   1 # -*- coding: utf-8 -*-
   2 # This file is part of lyx2lyx
   3 # -*- coding: utf-8 -*-
   4 # Copyright (C) 2011 The LyX team
   5 #
   6 # This program is free software; you can redistribute it and/or
   7 # modify it under the terms of the GNU General Public License
   8 # as published by the Free Software Foundation; either version 2
   9 # of the License, or (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful,
  12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 # GNU General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
  19
  20 """ Convert files to the file format generated by lyx 2.0"""
  21
  22 import re, string
  23 import unicodedata
  24 import sys, os
  25
  26 from parser_tools import find_token, find_end_of, find_tokens, \
  27   find_token_exact, find_end_of_inset, find_end_of_layout, \
  28   find_token_backwards, is_in_inset, get_value, get_quoted_value, \
  29   del_token, check_token, get_option_value
  30
  31 from lyx2lyx_tools import add_to_preamble, insert_to_preamble, \
  32   put_cmd_in_ert, lyx2latex, latex_length, revert_flex_inset, \
  33   revert_font_attrs, hex2ratio, str2bool
  34
  35 ####################################################################
  36 # Private helper functions
  37
  38 def remove_option(lines, m, option):
  39     ''' removes option from line m. returns whether we did anything '''
  40     l = lines[m].find(option)
  41     if l == -1:
  42         return False
  43     val = lines[m][l:].split('"')[1]
  44     lines[m] = lines[m][:l - 1] + lines[m][l+len(option + '="' + val + '"'):]
  45     return True
  46
  47
  48 ###############################################################################
  49 ###
  50 ### Conversion and reversion routines
  51 ###
  52 ###############################################################################
  53
  54 def revert_swiss(document):
  55     " Set language german-ch to ngerman "
  56     i = 0
  57     if document.language == "german-ch":
  58         document.language = "ngerman"
  59         i = find_token(document.header, "\\language", 0)
  60         if i != -1:
  61             document.header[i] = "\\language ngerman"
  62     j = 0
  63     while True:
  64         j = find_token(document.body, "\\lang german-ch", j)
  65         if j == -1:
  66             return
  67         document.body[j] = document.body[j].replace("\\lang german-ch", "\\lang ngerman")
  68         j = j + 1
  69
  70
  71 def revert_tabularvalign(document):
  72    " Revert the tabular valign option "
  73    i = 0
  74    while True:
  75       i = find_token(document.body, "\\begin_inset Tabular", i)
  76       if i == -1:
  77           return
  78       end = find_end_of_inset(document.body, i)
  79       if end == -1:
  80           document.warning("Can't find end of inset at line " + str(i))
  81           i += 1
  82           continue
  83       fline = find_token(document.body, "<features", i, end)
  84       if fline == -1:
  85           document.warning("Can't find features for inset at line " + str(i))
  86           i += 1
  87           continue
  88       p = document.body[fline].find("islongtable")
  89       if p != -1:
  90           q = document.body[fline].find("tabularvalignment")
  91           if q != -1:
  92               document.body[fline] = re.sub(r' tabularvalignment=\"[a-z]+\"', "", document.body[fline])
  93           i += 1
  94           continue
  95
  96        # no longtable
  97       tabularvalignment = 'c'
  98       # which valignment is specified?
  99       m = document.body[fline].find('tabularvalignment="top"')
 100       if m != -1:
 101           tabularvalignment = 't'
 102       m = document.body[fline].find('tabularvalignment="bottom"')
 103       if m != -1:
 104           tabularvalignment = 'b'
 105       # delete tabularvalignment
 106       q = document.body[fline].find("tabularvalignment")
 107       if q != -1:
 108           document.body[fline] = re.sub(r' tabularvalignment=\"[a-z]+\"', "", document.body[fline])
 109
 110       # don't add a box when centered
 111       if tabularvalignment == 'c':
 112           i = end
 113           continue
 114       subst = ['\\end_inset', '\\end_layout']
 115       document.body[end:end] = subst # just inserts those lines
 116       subst = ['\\begin_inset Box Frameless',
 117           'position "' + tabularvalignment +'"',
 118           'hor_pos "c"',
 119           'has_inner_box 1',
 120           'inner_pos "c"',
 121           'use_parbox 0',
 122           # we don't know the width, assume 50%
 123           'width "50col%"',
 124           'special "none"',
 125           'height "1in"',
 126           'height_special "totalheight"',
 127           'status open',
 128           '',
 129           '\\begin_layout Plain Layout']
 130       document.body[i:i] = subst # this just inserts the array at i
 131       # since there could be a tabular inside a tabular, we cannot
 132       # jump to end
 133       i += len(subst)
 134
 135
 136 def revert_phantom_types(document, ptype, cmd):
 137     " Reverts phantom to ERT "
 138     i = 0
 139     while True:
 140       i = find_token(document.body, "\\begin_inset Phantom " + ptype, i)
 141       if i == -1:
 142           return
 143       end = find_end_of_inset(document.body, i)
 144       if end == -1:
 145           document.warning("Can't find end of inset at line " + str(i))
 146           i += 1
 147           continue
 148       blay = find_token(document.body, "\\begin_layout Plain Layout", i, end)
 149       if blay == -1:
 150           document.warning("Can't find layout for inset at line " + str(i))
 151           i = end
 152           continue
 153       bend = find_end_of_layout(document.body, blay)
 154       if bend == -1:
 155           document.warning("Malformed LyX document: Could not find end of Phantom inset's layout.")
 156           i = end
 157           continue
 158       substi = ["\\begin_inset ERT", "status collapsed", "",
 159                 "\\begin_layout Plain Layout", "", "", "\\backslash",
 160                 cmd + "{", "\\end_layout", "", "\\end_inset"]
 161       substj = ["\\size default", "", "\\begin_inset ERT", "status collapsed", "",
 162                 "\\begin_layout Plain Layout", "", "}", "\\end_layout", "", "\\end_inset"]
 163       # do the later one first so as not to mess up the numbering
 164       document.body[bend:end + 1] = substj
 165       document.body[i:blay + 1] = substi
 166       i = end + len(substi) + len(substj) - (end - bend) - (blay - i) - 2
 167
 168
 169 def revert_phantom(document):
 170     revert_phantom_types(document, "Phantom", "phantom")
 171
 172 def revert_hphantom(document):
 173     revert_phantom_types(document, "HPhantom", "hphantom")
 174
 175 def revert_vphantom(document):
 176     revert_phantom_types(document, "VPhantom", "vphantom")
 177
 178
 179 def revert_xetex(document):
 180     " Reverts documents that use XeTeX "
 181
 182     i = find_token(document.header, '\\use_xetex', 0)
 183     if i == -1:
 184         document.warning("Malformed LyX document: Missing \\use_xetex.")
 185         return
 186     if not str2bool(get_value(document.header, "\\use_xetex", i)):
 187         del document.header[i]
 188         return
 189     del document.header[i]
 190
 191     # 1.) set doc encoding to utf8-plain
 192     i = find_token(document.header, "\\inputencoding", 0)
 193     if i == -1:
 194         document.warning("Malformed LyX document: Missing \\inputencoding.")
 195     else:
 196         document.header[i] = "\\inputencoding utf8-plain"
 197
 198     # 2.) check font settings
 199     # defaults
 200     roman = sans = typew = "default"
 201     osf = False
 202     sf_scale = tt_scale = 100.0
 203
 204     i = find_token(document.header, "\\font_roman", 0)
 205     if i == -1:
 206         document.warning("Malformed LyX document: Missing \\font_roman.")
 207     else:
 208         roman = get_value(document.header, "\\font_roman", i)
 209         document.header[i] = "\\font_roman default"
 210
 211     i = find_token(document.header, "\\font_sans", 0)
 212     if i == -1:
 213         document.warning("Malformed LyX document: Missing \\font_sans.")
 214     else:
 215         sans = get_value(document.header, "\\font_sans", i)
 216         document.header[i] = "\\font_sans default"
 217
 218     i = find_token(document.header, "\\font_typewriter", 0)
 219     if i == -1:
 220         document.warning("Malformed LyX document: Missing \\font_typewriter.")
 221     else:
 222         typew = get_value(document.header, "\\font_typewriter", i)
 223         document.header[i] = "\\font_typewriter default"
 224
 225     i = find_token(document.header, "\\font_osf", 0)
 226     if i == -1:
 227         document.warning("Malformed LyX document: Missing \\font_osf.")
 228     else:
 229         osf = str2bool(get_value(document.header, "\\font_osf", i))
 230         document.header[i] = "\\font_osf false"
 231
 232     i = find_token(document.header, "\\font_sc", 0)
 233     if i == -1:
 234         document.warning("Malformed LyX document: Missing \\font_sc.")
 235     else:
 236         # we do not need this value.
 237         document.header[i] = "\\font_sc false"
 238
 239     i = find_token(document.header, "\\font_sf_scale", 0)
 240     if i == -1:
 241         document.warning("Malformed LyX document: Missing \\font_sf_scale.")
 242     else:
 243       val = get_value(document.header, '\\font_sf_scale', i)
 244       try:
 245         # float() can throw
 246         sf_scale = float(val)
 247       except:
 248         document.warning("Invalid font_sf_scale value: " + val)
 249       document.header[i] = "\\font_sf_scale 100"
 250
 251     i = find_token(document.header, "\\font_tt_scale", 0)
 252     if i == -1:
 253         document.warning("Malformed LyX document: Missing \\font_tt_scale.")
 254     else:
 255         val = get_value(document.header, '\\font_tt_scale', i)
 256         try:
 257           # float() can throw
 258           tt_scale = float(val)
 259         except:
 260           document.warning("Invalid font_tt_scale value: " + val)
 261         document.header[i] = "\\font_tt_scale 100"
 262
 263     # 3.) set preamble stuff
 264     pretext = ['%% This document must be processed with xelatex!']
 265     pretext.append('\\usepackage{fontspec}')
 266     if roman != "default":
 267         pretext.append('\\setmainfont[Mapping=tex-text]{' + roman + '}')
 268     if sans != "default":
 269         sf = '\\setsansfont['
 270         if sf_scale != 100.0:
 271             sf += 'Scale=' + str(sf_scale / 100.0) + ','
 272         sf += 'Mapping=tex-text]{' + sans + '}'
 273         pretext.append(sf)
 274     if typew != "default":
 275         tw = '\\setmonofont'
 276         if tt_scale != 100.0:
 277             tw += '[Scale=' + str(tt_scale / 100.0) + ']'
 278         tw += '{' + typew + '}'
 279         pretext.append(tw)
 280     if osf:
 281         pretext.append('\\defaultfontfeatures{Numbers=OldStyle}')
 282     pretext.append('\usepackage{xunicode}')
 283     pretext.append('\usepackage{xltxtra}')
 284     insert_to_preamble(document, pretext)
 285
 286
 287 def revert_outputformat(document):
 288     " Remove default output format param "
 289
 290     if not del_token(document.header, '\\default_output_format', 0):
 291         document.warning("Malformed LyX document: Missing \\default_output_format.")
 292
 293
 294 def revert_backgroundcolor(document):
 295     " Reverts background color to preamble code "
 296     i = find_token(document.header, "\\backgroundcolor", 0)
 297     if i == -1:
 298         return
 299     colorcode = get_value(document.header, '\\backgroundcolor', i)
 300     del document.header[i]
 301     # don't clutter the preamble if backgroundcolor is not set
 302     if colorcode == "#ffffff":
 303         return
 304     red   = hex2ratio(colorcode[1:3])
 305     green = hex2ratio(colorcode[3:5])
 306     blue  = hex2ratio(colorcode[5:7])
 307     insert_to_preamble(document, \
 308         ['% To set the background color',
 309         '\\@ifundefined{definecolor}{\\usepackage{color}}{}',
 310         '\\definecolor{page_backgroundcolor}{rgb}{' + red + ',' + green + ',' + blue + '}',
 311         '\\pagecolor{page_backgroundcolor}'])
 312
 313
 314 def add_use_indices(document):
 315     " Add \\use_indices if it is missing "
 316     i = find_token(document.header, '\\use_indices', 0)
 317     if i != -1:
 318         return i
 319     i = find_token(document.header, '\\use_bibtopic', 0)
 320     if i == -1:
 321         i = find_token(document.header, '\\cite_engine', 0)
 322     if i == -1:
 323         i = find_token(document.header, '\\use_mathdots', 0)
 324     if i == -1:
 325         i = find_token(document.header, '\\use_mhchem', 0)
 326     if i == -1:
 327         i = find_token(document.header, '\\use_esint', 0)
 328     if i == -1:
 329         i = find_token(document.header, '\\use_amsmath', 0)
 330     if i == -1:
 331         document.warning("Malformed LyX document: Missing \\use_indices.")
 332         return -1
 333     document.header.insert(i + 1, '\\use_indices 0')
 334     return i + 1
 335
 336
 337 def revert_splitindex(document):
 338     " Reverts splitindex-aware documents "
 339     i = add_use_indices(document)
 340     if i == -1:
 341         return
 342     useindices = str2bool(get_value(document.header, "\\use_indices", i))
 343     del document.header[i]
 344     preamble = []
 345     if useindices:
 346          preamble.append("\\usepackage{splitidx})")
 347
 348     # deal with index declarations in the preamble
 349     i = 0
 350     while True:
 351         i = find_token(document.header, "\\index", i)
 352         if i == -1:
 353             break
 354         k = find_token(document.header, "\\end_index", i)
 355         if k == -1:
 356             document.warning("Malformed LyX document: Missing \\end_index.")
 357             return
 358         if useindices:
 359           line = document.header[i]
 360           l = re.compile(r'\\index (.*)$')
 361           m = l.match(line)
 362           iname = m.group(1)
 363           ishortcut = get_value(document.header, '\\shortcut', i, k)
 364           if ishortcut != "":
 365               preamble.append("\\newindex[" + iname + "]{" + ishortcut + "}")
 366         del document.header[i:k + 1]
 367     if preamble:
 368         insert_to_preamble(document, preamble)
 369
 370     # deal with index insets
 371     # these need to have the argument removed
 372     i = 0
 373     while True:
 374         i = find_token(document.body, "\\begin_inset Index", i)
 375         if i == -1:
 376             break
 377         line = document.body[i]
 378         l = re.compile(r'\\begin_inset Index (.*)$')
 379         m = l.match(line)
 380         itype = m.group(1)
 381         if itype == "idx" or indices == "false":
 382             document.body[i] = "\\begin_inset Index"
 383         else:
 384             k = find_end_of_inset(document.body, i)
 385             if k == -1:
 386                 document.warning("Can't find end of index inset!")
 387                 i += 1
 388                 continue
 389             content = lyx2latex(document, document.body[i:k])
 390             # escape quotes
 391             content = content.replace('"', r'\"')
 392             subst = put_cmd_in_ert("\\sindex[" + itype + "]{" + content + "}")
 393             document.body[i:k + 1] = subst
 394         i = i + 1
 395
 396     # deal with index_print insets
 397     i = 0
 398     while True:
 399         i = find_token(document.body, "\\begin_inset CommandInset index_print", i)
 400         if i == -1:
 401             return
 402         k = find_end_of_inset(document.body, i)
 403         ptype = get_quoted_value(document.body, 'type', i, k)
 404         if ptype == "idx":
 405             j = find_token(document.body, "type", i, k)
 406             del document.body[j]
 407         elif not useindices:
 408             del document.body[i:k + 1]
 409         else:
 410             subst = put_cmd_in_ert("\\printindex[" + ptype + "]{}")
 411             document.body[i:k + 1] = subst
 412         i = i + 1
 413
 414
 415 def convert_splitindex(document):
 416     " Converts index and printindex insets to splitindex-aware format "
 417     add_use_indices(document)
 418     i = 0
 419     while True:
 420         i = find_token(document.body, "\\begin_inset Index", i)
 421         if i == -1:
 422             break
 423         document.body[i] = document.body[i].replace("\\begin_inset Index",
 424             "\\begin_inset Index idx")
 425         i = i + 1
 426     i = 0
 427     while True:
 428         i = find_token(document.body, "\\begin_inset CommandInset index_print", i)
 429         if i == -1:
 430             return
 431         if document.body[i + 1].find('LatexCommand printindex') == -1:
 432             document.warning("Malformed LyX document: Incomplete printindex inset.")
 433             return
 434         subst = ["LatexCommand printindex",
 435             "type \"idx\""]
 436         document.body[i + 1:i + 2] = subst
 437         i = i + 1
 438
 439
 440 def revert_subindex(document):
 441     " Reverts \\printsubindex CommandInset types "
 442     i = add_use_indices(document)
 443     if i == -1:
 444         return
 445     useindices = str2bool(get_value(document.header, "\\use_indices", i))
 446     i = 0
 447     while True:
 448         i = find_token(document.body, "\\begin_inset CommandInset index_print", i)
 449         if i == -1:
 450             return
 451         k = find_end_of_inset(document.body, i)
 452         ctype = get_value(document.body, 'LatexCommand', i, k)
 453         if ctype != "printsubindex":
 454             i = k + 1
 455             continue
 456         ptype = get_quoted_value(document.body, 'type', i, k)
 457         if not useindices:
 458             del document.body[i:k + 1]
 459         else:
 460             subst = put_cmd_in_ert("\\printsubindex[" + ptype + "]{}")
 461             document.body[i:k + 1] = subst
 462         i = i + 1
 463
 464
 465 def revert_printindexall(document):
 466     " Reverts \\print[sub]index* CommandInset types "
 467     i = add_use_indices(document)
 468     if i == -1:
 469         return
 470     useindices = str2bool(get_value(document.header, "\\use_indices", i))
 471     i = 0
 472     while True:
 473         i = find_token(document.body, "\\begin_inset CommandInset index_print", i)
 474         if i == -1:
 475             return
 476         k = find_end_of_inset(document.body, i)
 477         ctype = get_value(document.body, 'LatexCommand', i, k)
 478         if ctype != "printindex*" and ctype != "printsubindex*":
 479             i = k
 480             continue
 481         if not useindices:
 482             del document.body[i:k + 1]
 483         else:
 484             subst = put_cmd_in_ert("\\" + ctype + "{}")
 485             document.body[i:k + 1] = subst
 486         i = i + 1
 487
 488
 489 def revert_strikeout(document):
 490   " Reverts \\strikeout font attribute "
 491   changed = revert_font_attrs(document.body, "\\uuline", "\\uuline")
 492   changed = revert_font_attrs(document.body, "\\uwave", "\\uwave") or changed
 493   changed = revert_font_attrs(document.body, "\\strikeout", "\\sout")  or changed
 494   if changed == True:
 495     insert_to_preamble(document, \
 496         ['%  for proper underlining',
 497         '\\PassOptionsToPackage{normalem}{ulem}',
 498         '\\usepackage{ulem}'])
 499
 500
 501 def revert_ulinelatex(document):
 502     " Reverts \\uline font attribute "
 503     i = find_token(document.body, '\\bar under', 0)
 504     if i == -1:
 505         return
 506     insert_to_preamble(document,\
 507             ['%  for proper underlining',
 508             '\\PassOptionsToPackage{normalem}{ulem}',
 509             '\\usepackage{ulem}',
 510             '\\let\\cite@rig\\cite',
 511             '\\newcommand{\\b@xcite}[2][\\%]{\\def\\def@pt{\\%}\\def\\pas@pt{#1}',
 512             '  \\mbox{\\ifx\\def@pt\\pas@pt\\cite@rig{#2}\\else\\cite@rig[#1]{#2}\\fi}}',
 513             '\\renewcommand{\\underbar}[1]{{\\let\\cite\\b@xcite\\uline{#1}}}'])
 514
 515
 516 def revert_custom_processors(document):
 517     " Remove bibtex_command and index_command params "
 518
 519     if not del_token(document.header, '\\bibtex_command', 0):
 520         document.warning("Malformed LyX document: Missing \\bibtex_command.")
 521
 522     if not del_token(document.header, '\\index_command', 0):
 523         document.warning("Malformed LyX document: Missing \\index_command.")
 524
 525
 526 def convert_nomencl_width(document):
 527     " Add set_width param to nomencl_print "
 528     i = 0
 529     while True:
 530       i = find_token(document.body, "\\begin_inset CommandInset nomencl_print", i)
 531       if i == -1:
 532         break
 533       document.body.insert(i + 2, "set_width \"none\"")
 534       i = i + 1
 535
 536
 537 def revert_nomencl_width(document):
 538     " Remove set_width param from nomencl_print "
 539     i = 0
 540     while True:
 541       i = find_token(document.body, "\\begin_inset CommandInset nomencl_print", i)
 542       if i == -1:
 543         break
 544       j = find_end_of_inset(document.body, i)
 545       if not del_token(document.body, "set_width", i, j):
 546         document.warning("Can't find set_width option for nomencl_print!")
 547       i = j
 548
 549
 550 def revert_nomencl_cwidth(document):
 551     " Remove width param from nomencl_print "
 552     i = 0
 553     while True:
 554       i = find_token(document.body, "\\begin_inset CommandInset nomencl_print", i)
 555       if i == -1:
 556         break
 557       j = find_end_of_inset(document.body, i)
 558       l = find_token(document.body, "width", i, j)
 559       if l == -1:
 560         document.warning("Can't find width option for nomencl_print!")
 561         i = j
 562         continue
 563       width = get_quoted_value(document.body, "width", i, j)
 564       del document.body[l]
 565       insert_to_preamble(document, ["\\setlength{\\nomlabelwidth}{" + width + "}"])
 566       i = j - 1
 567
 568
 569 def revert_applemac(document):
 570     " Revert applemac encoding to auto "
 571     if document.encoding != "applemac":
 572       return
 573     document.encoding = "auto"
 574     i = find_token(document.header, "\\encoding", 0)
 575     if i != -1:
 576         document.header[i] = "\\encoding auto"
 577
 578
 579 def revert_longtable_align(document):
 580     " Remove longtable alignment setting "
 581     i = 0
 582     while True:
 583       i = find_token(document.body, "\\begin_inset Tabular", i)
 584       if i == -1:
 585           break
 586       end = find_end_of_inset(document.body, i)
 587       if end == -1:
 588           document.warning("Can't find end of inset at line " + str(i))
 589           i += 1
 590           continue
 591       fline = find_token(document.body, "<features", i, end)
 592       if fline == -1:
 593           document.warning("Can't find features for inset at line " + str(i))
 594           i += 1
 595           continue
 596       j = document.body[fline].find("longtabularalignment")
 597       if j == -1:
 598           i += 1
 599           continue
 600       # FIXME Is this correct? It wipes out everything after the
 601       # one we found.
 602       document.body[fline] = document.body[fline][:j - 1] + '>'
 603       # since there could be a tabular inside this one, we
 604       # cannot jump to end.
 605       i += 1
 606
 607
 608 def revert_branch_filename(document):
 609     " Remove \\filename_suffix parameter from branches "
 610     i = 0
 611     while True:
 612         i = find_token(document.header, "\\filename_suffix", i)
 613         if i == -1:
 614             return
 615         del document.header[i]
 616
 617
 618 def revert_paragraph_indentation(document):
 619     " Revert custom paragraph indentation to preamble code "
 620     i = find_token(document.header, "\\paragraph_indentation", 0)
 621     if i == -1:
 622       return
 623     length = get_value(document.header, "\\paragraph_indentation", i)
 624     # we need only remove the line if indentation is default
 625     if length != "default":
 626       # handle percent lengths
 627       length = latex_length(length)[1]
 628       insert_to_preamble(document, ["\\setlength{\\parindent}{" + length + "}"])
 629     del document.header[i]
 630
 631
 632 def revert_percent_skip_lengths(document):
 633     " Revert relative lengths for paragraph skip separation to preamble code "
 634     i = find_token(document.header, "\\defskip", 0)
 635     if i == -1:
 636         return
 637     length = get_value(document.header, "\\defskip", i)
 638     # only revert when a custom length was set and when
 639     # it used a percent length
 640     if length in ('smallskip', 'medskip', 'bigskip'):
 641         return
 642     # handle percent lengths
 643     percent, length = latex_length(length)
 644     if percent:
 645         insert_to_preamble(document, ["\\setlength{\\parskip}{" + length + "}"])
 646         # set defskip to medskip as default
 647         document.header[i] = "\\defskip medskip"
 648
 649
 650 def revert_percent_vspace_lengths(document):
 651     " Revert relative VSpace lengths to ERT "
 652     i = 0
 653     while True:
 654       i = find_token(document.body, "\\begin_inset VSpace", i)
 655       if i == -1:
 656           break
 657       # only revert if a custom length was set and if
 658       # it used a percent length
 659       r = re.compile(r'\\begin_inset VSpace (.*)$')
 660       m = r.match(document.body[i])
 661       length = m.group(1)
 662       if length in ('defskip', 'smallskip', 'medskip', 'bigskip', 'vfill'):
 663          i += 1
 664          continue
 665       # check if the space has a star (protected space)
 666       protected = (document.body[i].rfind("*") != -1)
 667       if protected:
 668           length = length.rstrip('*')
 669       # handle percent lengths
 670       percent, length = latex_length(length)
 671       # revert the VSpace inset to ERT
 672       if percent:
 673           if protected:
 674               subst = put_cmd_in_ert("\\vspace*{" + length + "}")
 675           else:
 676               subst = put_cmd_in_ert("\\vspace{" + length + "}")
 677           document.body[i:i + 2] = subst
 678       i += 1
 679
 680
 681 def revert_percent_hspace_lengths(document):
 682     " Revert relative HSpace lengths to ERT "
 683     i = 0
 684     while True:
 685       i = find_token_exact(document.body, "\\begin_inset space \\hspace", i)
 686       if i == -1:
 687           break
 688       j = find_end_of_inset(document.body, i)
 689       if j == -1:
 690           document.warning("Can't find end of inset at line " + str(i))
 691           i += 1
 692           continue
 693       # only revert if a custom length was set...
 694       length = get_value(document.body, '\\length', i + 1, j)
 695       if length == '':
 696           document.warning("Malformed lyx document: Missing '\\length' in Space inset.")
 697           i = j
 698           continue
 699       protected = ""
 700       if document.body[i].find("\\hspace*{}") != -1:
 701           protected = "*"
 702       # ...and if it used a percent length
 703       percent, length = latex_length(length)
 704       # revert the HSpace inset to ERT
 705       if percent:
 706           subst = put_cmd_in_ert("\\hspace" + protected + "{" + length + "}")
 707           document.body[i:j + 1] = subst
 708       # if we did a substitution, this will still be ok
 709       i = j
 710
 711
 712 def revert_hspace_glue_lengths(document):
 713     " Revert HSpace glue lengths to ERT "
 714     i = 0
 715     while True:
 716       i = find_token_exact(document.body, "\\begin_inset space \\hspace", i)
 717       if i == -1:
 718           break
 719       j = find_end_of_inset(document.body, i)
 720       if j == -1:
 721           document.warning("Can't find end of inset at line " + str(i))
 722           i += 1
 723           continue
 724       length = get_value(document.body, '\\length', i + 1, j)
 725       if length == '':
 726           document.warning("Malformed lyx document: Missing '\\length' in Space inset.")
 727           i = j
 728           continue
 729       protected = ""
 730       if document.body[i].find("\\hspace*{}") != -1:
 731           protected = "*"
 732       # only revert if the length contains a plus or minus at pos != 0
 733       if length.find('-',1) != -1 or length.find('+',1) != -1:
 734           # handle percent lengths
 735           length = latex_length(length)[1]
 736           # revert the HSpace inset to ERT
 737           subst = put_cmd_in_ert("\\hspace" + protected + "{" + length + "}")
 738           document.body[i:j+1] = subst
 739       i = j
 740
 741
 742 def convert_author_id(document):
 743     " Add the author_id to the \\author definition and make sure 0 is not used"
 744     i = 0
 745     anum = 1
 746     re_author = re.compile(r'(\\author) (\".*\")\s*(.*)$')
 747
 748     while True:
 749         i = find_token(document.header, "\\author", i)
 750         if i == -1:
 751             break
 752         m = re_author.match(document.header[i])
 753         if m:
 754             name = m.group(2)
 755             email = m.group(3)
 756             document.header[i] = "\\author %i %s %s" % (anum, name, email)
 757         anum += 1
 758         i += 1
 759
 760     i = 0
 761     while True:
 762         i = find_token(document.body, "\\change_", i)
 763         if i == -1:
 764             break
 765         change = document.body[i].split(' ');
 766         if len(change) == 3:
 767             type = change[0]
 768             author_id = int(change[1])
 769             time = change[2]
 770             document.body[i] = "%s %i %s" % (type, author_id + 1, time)
 771         i += 1
 772
 773
 774 def revert_author_id(document):
 775     " Remove the author_id from the \\author definition "
 776     i = 0
 777     anum = 0
 778     rx = re.compile(r'(\\author)\s+(-?\d+)\s+(\".*\")\s*(.*)$')
 779     idmap = dict()
 780
 781     while True:
 782         i = find_token(document.header, "\\author", i)
 783         if i == -1:
 784             break
 785         m = rx.match(document.header[i])
 786         if m:
 787             author_id = int(m.group(2))
 788             idmap[author_id] = anum
 789             name = m.group(3)
 790             email = m.group(4)
 791             document.header[i] = "\\author %s %s" % (name, email)
 792         i += 1
 793         # FIXME Should this be incremented if we didn't match?
 794         anum += 1
 795
 796     i = 0
 797     while True:
 798         i = find_token(document.body, "\\change_", i)
 799         if i == -1:
 800             break
 801         change = document.body[i].split(' ');
 802         if len(change) == 3:
 803             type = change[0]
 804             author_id = int(change[1])
 805             time = change[2]
 806             document.body[i] = "%s %i %s" % (type, idmap[author_id], time)
 807         i += 1
 808
 809
 810 def revert_suppress_date(document):
 811     " Revert suppressing of default document date to preamble code "
 812     i = find_token(document.header, "\\suppress_date", 0)
 813     if i == -1:
 814         return
 815     # remove the preamble line and write to the preamble
 816     # when suppress_date was true
 817     date = str2bool(get_value(document.header, "\\suppress_date", i))
 818     if date:
 819         add_to_preamble(document, ["\\date{}"])
 820     del document.header[i]
 821
 822
 823 def convert_mhchem(document):
 824     "Set mhchem to off for versions older than 1.6.x"
 825     if document.start < 277:
 826         # LyX 1.5.x and older did never load mhchem.
 827         # Therefore we must switch it off: Documents that use mhchem have
 828         # a manual \usepackage anyway, and documents not using mhchem but
 829         # custom macros with the same names as mhchem commands might get
 830         # corrupted if mhchem is automatically loaded.
 831         mhchem = 0 # off
 832     else:
 833         # LyX 1.6.x did always load mhchem automatically.
 834         mhchem = 1 # auto
 835     i = find_token(document.header, "\\use_esint", 0)
 836     if i == -1:
 837         # pre-1.5.x document
 838         i = find_token(document.header, "\\use_amsmath", 0)
 839     if i == -1:
 840         document.warning("Malformed LyX document: Could not find amsmath os esint setting.")
 841         return
 842     document.header.insert(i + 1, "\\use_mhchem %d" % mhchem)
 843
 844
 845 def revert_mhchem(document):
 846     "Revert mhchem loading to preamble code"
 847
 848     mhchem = "off"
 849     i = find_token(document.header, "\\use_mhchem", 0)
 850     if i == -1:
 851         document.warning("Malformed LyX document: Could not find mhchem setting.")
 852         mhchem = "auto"
 853     else:
 854         val = get_value(document.header, "\\use_mhchem", i)
 855         if val == "1":
 856             mhchem = "auto"
 857         elif val == "2":
 858             mhchem = "on"
 859         del document.header[i]
 860
 861     if mhchem == "off":
 862       # don't load case
 863       return
 864
 865     if mhchem == "auto":
 866         i = 0
 867         while True:
 868             i = find_token(document.body, "\\begin_inset Formula", i)
 869             if i == -1:
 870                break
 871             line = document.body[i]
 872             if line.find("\\ce{") != -1 or line.find("\\cf{") != -1:
 873               mhchem = "on"
 874               break
 875             i += 1
 876
 877     if mhchem == "on":
 878         pre = ["\\PassOptionsToPackage{version=3}{mhchem}",
 879           "\\usepackage{mhchem}"]
 880         insert_to_preamble(document, pre)
 881
 882
 883 def revert_fontenc(document):
 884     " Remove fontencoding param "
 885     if not del_token(document.header, '\\fontencoding', 0):
 886         document.warning("Malformed LyX document: Missing \\fontencoding.")
 887
 888
 889 def merge_gbrief(document):
 890     " Merge g-brief-en and g-brief-de to one class "
 891
 892     if document.textclass != "g-brief-de":
 893         if document.textclass == "g-brief-en":
 894             document.textclass = "g-brief"
 895             document.set_textclass()
 896         return
 897
 898     obsoletedby = { "Brieftext":       "Letter",
 899                     "Unterschrift":    "Signature",
 900                     "Strasse":         "Street",
 901                     "Zusatz":          "Addition",
 902                     "Ort":             "Town",
 903                     "Land":            "State",
 904                     "RetourAdresse":   "ReturnAddress",
 905                     "MeinZeichen":     "MyRef",
 906                     "IhrZeichen":      "YourRef",
 907                     "IhrSchreiben":    "YourMail",
 908                     "Telefon":         "Phone",
 909                     "BLZ":             "BankCode",
 910                     "Konto":           "BankAccount",
 911                     "Postvermerk":     "PostalComment",
 912                     "Adresse":         "Address",
 913                     "Datum":           "Date",
 914                     "Betreff":         "Reference",
 915                     "Anrede":          "Opening",
 916                     "Anlagen":         "Encl.",
 917                     "Verteiler":       "cc",
 918                     "Gruss":           "Closing"}
 919     i = 0
 920     while 1:
 921         i = find_token(document.body, "\\begin_layout", i)
 922         if i == -1:
 923             break
 924
 925         layout = document.body[i][14:]
 926         if layout in obsoletedby:
 927             document.body[i] = "\\begin_layout " + obsoletedby[layout]
 928
 929         i += 1
 930
 931     document.textclass = "g-brief"
 932     document.set_textclass()
 933
 934
 935 def revert_gbrief(document):
 936     " Revert g-brief to g-brief-en "
 937     if document.textclass == "g-brief":
 938         document.textclass = "g-brief-en"
 939         document.set_textclass()
 940
 941
 942 def revert_html_options(document):
 943     " Remove html options "
 944     del_token(document.header, '\\html_use_mathml', 0)
 945     del_token(document.header, '\\html_be_strict', 0)
 946
 947
 948 def revert_includeonly(document):
 949     i = 0
 950     while True:
 951         i = find_token(document.header, "\\begin_includeonly", i)
 952         if i == -1:
 953             return
 954         j = find_end_of(document.header, i, "\\begin_includeonly", "\\end_includeonly")
 955         if j == -1:
 956             document.warning("Unable to find end of includeonly section!!")
 957             break
 958         document.header[i : j + 1] = []
 959
 960
 961 def revert_includeall(document):
 962     " Remove maintain_unincluded_children param "
 963     del_token(document.header, '\\maintain_unincluded_children', 0)
 964
 965
 966 def revert_multirow(document):
 967     " Revert multirow cells in tables to TeX-code"
 968
 969     # first, let's find out if we need to do anything
 970     # cell type 3 is multirow begin cell
 971     i = find_token(document.body, '<cell multirow="3"', 0)
 972     if i == -1:
 973       return
 974
 975     add_to_preamble(document, ["\\usepackage{multirow}"])
 976
 977     begin_table = 0
 978     while True:
 979         # find begin/end of table
 980         begin_table = find_token(document.body, '<lyxtabular version=', begin_table)
 981         if begin_table == -1:
 982             break
 983         end_table = find_end_of(document.body, begin_table, '<lyxtabular', '</lyxtabular>')
 984         if end_table == -1:
 985             document.warning("Malformed LyX document: Could not find end of table.")
 986             begin_table += 1
 987             continue
 988         # does this table have multirow?
 989         i = find_token(document.body, '<cell multirow="3"', begin_table, end_table)
 990         if i == -1:
 991             begin_table = end_table
 992             continue
 993
 994         # store the number of rows and columns
 995         numrows = get_option_value(document.body[begin_table], "rows")
 996         numcols = get_option_value(document.body[begin_table], "columns")
 997         try:
 998           numrows = int(numrows)
 999           numcols = int(numcols)
1000         except:
1001           document.warning("Unable to determine rows and columns!")
1002           begin_table = end_table
1003           continue
1004
1005         mrstarts = []
1006         multirows = []
1007         # collect info on rows and columns of this table.
1008         begin_row = begin_table
1009         for row in range(numrows):
1010             begin_row = find_token(document.body, '<row>', begin_row, end_table)
1011             if begin_row == -1:
1012               document.warning("Can't find row " + str(row + 1))
1013               break
1014             end_row = find_end_of(document.body, begin_row, '<row>', '</row>')
1015             if end_row == -1:
1016               document.warning("Can't find end of row " + str(row + 1))
1017               break
1018             begin_cell = begin_row
1019             multirows.append([])
1020             for column in range(numcols):
1021                 begin_cell = find_token(document.body, '<cell ', begin_cell, end_row)
1022                 if begin_cell == -1:
1023                   document.warning("Can't find column " + str(column + 1) + \
1024                     "in row " + str(row + 1))
1025                   break
1026                 # NOTE
1027                 # this will fail if someone puts "</cell>" in a cell, but
1028                 # that seems fairly unlikely.
1029                 end_cell = find_end_of(document.body, begin_cell, '<cell', '</cell>')
1030                 if end_cell == -1:
1031                   document.warning("Can't find end of column " + str(column + 1) + \
1032                     "in row " + str(row + 1))
1033                   break
1034                 multirows[row].append([begin_cell, end_cell, 0])
1035                 if document.body[begin_cell].find('multirow="3"') != -1:
1036                   multirows[row][column][2] = 3 # begin multirow
1037                   mrstarts.append([row, column])
1038                 elif document.body[begin_cell].find('multirow="4"') != -1:
1039                   multirows[row][column][2] = 4 # in multirow
1040                 begin_cell = end_cell
1041             begin_row = end_row
1042         # end of table info collection
1043
1044         # work from the back to avoid messing up numbering
1045         mrstarts.reverse()
1046         for m in mrstarts:
1047             row = m[0]
1048             col = m[1]
1049             # get column width
1050             col_width = get_option_value(document.body[begin_table + 2 + col], "width")
1051             # "0pt" means that no width is specified
1052             if not col_width or col_width == "0pt":
1053               col_width = "*"
1054             # determine the number of cells that are part of the multirow
1055             nummrs = 1
1056             for r in range(row + 1, numrows):
1057                 if multirows[r][col][2] != 4:
1058                   break
1059                 nummrs += 1
1060                 # take the opportunity to revert this line
1061                 lineno = multirows[r][col][0]
1062                 document.body[lineno] = document.body[lineno].\
1063                   replace(' multirow="4" ', ' ').\
1064                   replace('valignment="middle"', 'valignment="top"').\
1065                   replace(' topline="true" ', ' ')
1066                 # remove bottom line of previous multirow-part cell
1067                 lineno = multirows[r-1][col][0]
1068                 document.body[lineno] = document.body[lineno].replace(' bottomline="true" ', ' ')
1069             # revert beginning cell
1070             bcell = multirows[row][col][0]
1071             ecell = multirows[row][col][1]
1072             document.body[bcell] = document.body[bcell].\
1073               replace(' multirow="3" ', ' ').\
1074               replace('valignment="middle"', 'valignment="top"')
1075             blay = find_token(document.body, "\\begin_layout", bcell, ecell)
1076             if blay == -1:
1077               document.warning("Can't find layout for cell!")
1078               continue
1079             bend = find_end_of_layout(document.body, blay)
1080             if bend == -1:
1081               document.warning("Can't find end of layout for cell!")
1082               continue
1083             # do the later one first, so as not to mess up the numbering
1084             # we are wrapping the whole cell in this ert
1085             # so before the end of the layout...
1086             document.body[bend:bend] = put_cmd_in_ert("}")
1087             # ...and after the beginning
1088             document.body[blay + 1:blay + 1] = \
1089               put_cmd_in_ert("\\multirow{" + str(nummrs) + "}{" + col_width + "}{")
1090
1091         begin_table = end_table
1092
1093
1094 def convert_math_output(document):
1095     " Convert \html_use_mathml to \html_math_output "
1096     i = find_token(document.header, "\\html_use_mathml", 0)
1097     if i == -1:
1098         return
1099     rgx = re.compile(r'\\html_use_mathml\s+(\w+)')
1100     m = rgx.match(document.header[i])
1101     newval = "0" # MathML
1102     if m:
1103       val = str2bool(m.group(1))
1104       if not val:
1105         newval = "2" # Images
1106     else:
1107       document.warning("Can't match " + document.header[i])
1108     document.header[i] = "\\html_math_output " + newval
1109
1110
1111 def revert_math_output(document):
1112     " Revert \html_math_output to \html_use_mathml "
1113     i = find_token(document.header, "\\html_math_output", 0)
1114     if i == -1:
1115         return
1116     rgx = re.compile(r'\\html_math_output\s+(\d)')
1117     m = rgx.match(document.header[i])
1118     newval = "true"
1119     if m:
1120         val = m.group(1)
1121         if val == "1" or val == "2":
1122             newval = "false"
1123     else:
1124         document.warning("Unable to match " + document.header[i])
1125     document.header[i] = "\\html_use_mathml " + newval
1126
1127
1128
1129 def revert_inset_preview(document):
1130     " Dissolves the preview inset "
1131     i = 0
1132     while True:
1133       i = find_token(document.body, "\\begin_inset Preview", i)
1134       if i == -1:
1135           return
1136       iend = find_end_of_inset(document.body, i)
1137       if iend == -1:
1138           document.warning("Malformed LyX document: Could not find end of Preview inset.")
1139           i += 1
1140           continue
1141
1142       # This has several issues.
1143       # We need to do something about the layouts inside InsetPreview.
1144       # If we just leave the first one, then we have something like:
1145       # \begin_layout Standard
1146       # ...
1147       # \begin_layout Standard
1148       # and we get a "no \end_layout" error. So something has to be done.
1149       # Ideally, we would check if it is the same as the layout we are in.
1150       # If so, we just remove it; if not, we end the active one. But it is
1151       # not easy to know what layout we are in, due to depth changes, etc,
1152       # and it is not clear to me how much work it is worth doing. In most
1153       # cases, the layout will probably be the same.
1154       #
1155       # For the same reason, we have to remove the \end_layout tag at the
1156       # end of the last layout in the inset. Again, that will sometimes be
1157       # wrong, but it will usually be right. To know what to do, we would
1158       # again have to know what layout the inset is in.
1159
1160       blay = find_token(document.body, "\\begin_layout", i, iend)
1161       if blay == -1:
1162           document.warning("Can't find layout for preview inset!")
1163           # always do the later one first...
1164           del document.body[iend]
1165           del document.body[i]
1166           # deletions mean we do not need to reset i
1167           continue
1168
1169       # This is where we would check what layout we are in.
1170       # The check for Standard is definitely wrong.
1171       #
1172       # lay = document.body[blay].split(None, 1)[1]
1173       # if lay != oldlayout:
1174       #     # record a boolean to tell us what to do later....
1175       #     # better to do it later, since (a) it won't mess up
1176       #     # the numbering and (b) we only modify at the end.
1177
1178       # we want to delete the last \\end_layout in this inset, too.
1179       # note that this may not be the \\end_layout that goes with blay!!
1180       bend = find_end_of_layout(document.body, blay)
1181       while True:
1182           tmp = find_token(document.body, "\\end_layout", bend + 1, iend)
1183           if tmp == -1:
1184               break
1185           bend = tmp
1186       if bend == blay:
1187           document.warning("Unable to find last layout in preview inset!")
1188           del document.body[iend]
1189           del document.body[i]
1190           # deletions mean we do not need to reset i
1191           continue
1192       # always do the later one first...
1193       del document.body[iend]
1194       del document.body[bend]
1195       del document.body[i:blay + 1]
1196       # we do not need to reset i
1197
1198
1199 def revert_equalspacing_xymatrix(document):
1200     " Revert a Formula with xymatrix@! to an ERT inset "
1201     i = 0
1202     has_preamble = False
1203     has_equal_spacing = False
1204
1205     while True:
1206       i = find_token(document.body, "\\begin_inset Formula", i)
1207       if i == -1:
1208           break
1209       j = find_end_of_inset(document.body, i)
1210       if j == -1:
1211           document.warning("Malformed LyX document: Could not find end of Formula inset.")
1212           i += 1
1213           continue
1214
1215       for curline in range(i,j):
1216           found = document.body[curline].find("\\xymatrix@!")
1217           if found != -1:
1218               break
1219
1220       if found != -1:
1221           has_equal_spacing = True
1222           content = [document.body[i][21:]]
1223           content += document.body[i + 1:j]
1224           subst = put_cmd_in_ert(content)
1225           document.body[i:j + 1] = subst
1226           i += len(subst) - (j - i) + 1
1227       else:
1228           for curline in range(i,j):
1229               l = document.body[curline].find("\\xymatrix")
1230               if l != -1:
1231                   has_preamble = True;
1232                   break;
1233           i = j + 1
1234
1235     if has_equal_spacing and not has_preamble:
1236         add_to_preamble(document, ['\\usepackage[all]{xy}'])
1237
1238
1239 def revert_notefontcolor(document):
1240     " Reverts greyed-out note font color to preamble code "
1241
1242     i = find_token(document.header, "\\notefontcolor", 0)
1243     if i == -1:
1244         return
1245
1246     colorcode = get_value(document.header, '\\notefontcolor', i)
1247     del document.header[i]
1248
1249     # are there any grey notes?
1250     if find_token(document.body, "\\begin_inset Note Greyedout", 0) == -1:
1251         # no need to do anything else, and \renewcommand will throw
1252         # an error since lyxgreyedout will not exist.
1253         return
1254
1255     # the color code is in the form #rrggbb where every character denotes a hex number
1256     red = hex2ratio(colorcode[1:3])
1257     green = hex2ratio(colorcode[3:5])
1258     blue = hex2ratio(colorcode[5:7])
1259     # write the preamble
1260     insert_to_preamble(document,
1261       [ '%  for greyed-out notes',
1262         '\\@ifundefined{definecolor}{\\usepackage{color}}{}'
1263         '\\definecolor{note_fontcolor}{rgb}{%s,%s,%s}' % (red, green, blue),
1264         '\\renewenvironment{lyxgreyedout}',
1265         ' {\\textcolor{note_fontcolor}\\bgroup}{\\egroup}'])
1266
1267
1268 def revert_turkmen(document):
1269     "Set language Turkmen to English"
1270
1271     if document.language == "turkmen":
1272         document.language = "english"
1273         i = find_token(document.header, "\\language", 0)
1274         if i != -1:
1275             document.header[i] = "\\language english"
1276
1277     j = 0
1278     while True:
1279         j = find_token(document.body, "\\lang turkmen", j)
1280         if j == -1:
1281             return
1282         document.body[j] = document.body[j].replace("\\lang turkmen", "\\lang english")
1283         j += 1
1284
1285
1286 def revert_fontcolor(document):
1287     " Reverts font color to preamble code "
1288     i = find_token(document.header, "\\fontcolor", 0)
1289     if i == -1:
1290         return
1291     colorcode = get_value(document.header, '\\fontcolor', i)
1292     del document.header[i]
1293     # don't clutter the preamble if font color is not set
1294     if colorcode == "#000000":
1295         return
1296     # the color code is in the form #rrggbb where every character denotes a hex number
1297     red = hex2ratio(colorcode[1:3])
1298     green = hex2ratio(colorcode[3:5])
1299     blue = hex2ratio(colorcode[5:7])
1300     # write the preamble
1301     insert_to_preamble(document,
1302       ['%  Set the font color',
1303       '\\@ifundefined{definecolor}{\\usepackage{color}}{}',
1304       '\\definecolor{document_fontcolor}{rgb}{%s,%s,%s}' % (red, green, blue),
1305       '\\color{document_fontcolor}'])
1306
1307
1308 def revert_shadedboxcolor(document):
1309     " Reverts shaded box color to preamble code "
1310     i = find_token(document.header, "\\boxbgcolor", 0)
1311     if i == -1:
1312         return
1313     colorcode = get_value(document.header, '\\boxbgcolor', i)
1314     del document.header[i]
1315     # the color code is in the form #rrggbb
1316     red = hex2ratio(colorcode[1:3])
1317     green = hex2ratio(colorcode[3:5])
1318     blue = hex2ratio(colorcode[5:7])
1319     # write the preamble
1320     insert_to_preamble(document,
1321       ['%  Set the color of boxes with shaded background',
1322       '\\@ifundefined{definecolor}{\\usepackage{color}}{}',
1323       "\\definecolor{shadecolor}{rgb}{%s,%s,%s}" % (red, green, blue)])
1324
1325
1326 def revert_lyx_version(document):
1327     " Reverts LyX Version information from Inset Info "
1328     version = "LyX version"
1329     try:
1330         import lyx2lyx_version
1331         version = lyx2lyx_version.version
1332     except:
1333         pass
1334
1335     i = 0
1336     while 1:
1337         i = find_token(document.body, '\\begin_inset Info', i)
1338         if i == -1:
1339             return
1340         j = find_end_of_inset(document.body, i + 1)
1341         if j == -1:
1342             document.warning("Malformed LyX document: Could not find end of Info inset.")
1343             i += 1
1344             continue
1345
1346         # We expect:
1347         # \begin_inset Info
1348         # type  "lyxinfo"
1349         # arg   "version"
1350         # \end_inset
1351         typ = get_quoted_value(document.body, "type", i, j)
1352         arg = get_quoted_value(document.body, "arg", i, j)
1353         if arg != "version" or typ != "lyxinfo":
1354             i = j + 1
1355             continue
1356
1357         # We do not actually know the version of LyX used to produce the document.
1358         # But we can use our version, since we are reverting.
1359         s = [version]
1360         # Now we want to check if the line after "\end_inset" is empty. It normally
1361         # is, so we want to remove it, too.
1362         lastline = j + 1
1363         if document.body[j + 1].strip() == "":
1364             lastline = j + 2
1365         document.body[i: lastline] = s
1366         i = i + 1
1367
1368
1369 def revert_math_scale(document):
1370   " Remove math scaling and LaTeX options "
1371   del_token(document.header, '\\html_math_img_scale', 0)
1372   del_token(document.header, '\\html_latex_start', 0)
1373   del_token(document.header, '\\html_latex_end', 0)
1374
1375
1376 def revert_pagesizes(document):
1377   " Revert page sizes to default "
1378   i = find_token(document.header, '\\papersize', 0)
1379   if i != -1:
1380     size = document.header[i][11:]
1381     if size == "a0paper" or size == "a1paper" or size == "a2paper" \
1382     or size == "a6paper" or size == "b0paper" or size == "b1paper" \
1383     or size == "b2paper" or size == "b6paper" or size == "b0j" \
1384     or size == "b1j" or size == "b2j" or size == "b3j" or size == "b4j" \
1385     or size == "b5j" or size == "b6j":
1386       del document.header[i]
1387
1388
1389 def revert_DIN_C_pagesizes(document):
1390   " Revert DIN C page sizes to default "
1391   i = find_token(document.header, '\\papersize', 0)
1392   if i != -1:
1393     size = document.header[i][11:]
1394     if size == "c0paper" or size == "c1paper" or size == "c2paper" \
1395     or size == "c3paper" or size == "c4paper" or size == "c5paper" \
1396     or size == "c6paper":
1397       del document.header[i]
1398
1399
1400 def convert_html_quotes(document):
1401   " Remove quotes around html_latex_start and html_latex_end "
1402
1403   i = find_token(document.header, '\\html_latex_start', 0)
1404   if i != -1:
1405     line = document.header[i]
1406     l = re.compile(r'\\html_latex_start\s+"(.*)"')
1407     m = l.match(line)
1408     if m:
1409       document.header[i] = "\\html_latex_start " + m.group(1)
1410
1411   i = find_token(document.header, '\\html_latex_end', 0)
1412   if i != -1:
1413     line = document.header[i]
1414     l = re.compile(r'\\html_latex_end\s+"(.*)"')
1415     m = l.match(line)
1416     if m:
1417       document.header[i] = "\\html_latex_end " + m.group(1)
1418
1419
1420 def revert_html_quotes(document):
1421   " Remove quotes around html_latex_start and html_latex_end "
1422
1423   i = find_token(document.header, '\\html_latex_start', 0)
1424   if i != -1:
1425     line = document.header[i]
1426     l = re.compile(r'\\html_latex_start\s+(.*)')
1427     m = l.match(line)
1428     if not m:
1429         document.warning("Weird html_latex_start line: " + line)
1430         del document.header[i]
1431     else:
1432         document.header[i] = "\\html_latex_start \"" + m.group(1) + "\""
1433
1434   i = find_token(document.header, '\\html_latex_end', 0)
1435   if i != -1:
1436     line = document.header[i]
1437     l = re.compile(r'\\html_latex_end\s+(.*)')
1438     m = l.match(line)
1439     if not m:
1440         document.warning("Weird html_latex_end line: " + line)
1441         del document.header[i]
1442     else:
1443         document.header[i] = "\\html_latex_end \"" + m.group(1) + "\""
1444
1445
1446 def revert_output_sync(document):
1447   " Remove forward search options "
1448   del_token(document.header, '\\output_sync_macro', 0)
1449   del_token(document.header, '\\output_sync', 0)
1450
1451
1452 def revert_align_decimal(document):
1453   i = 0
1454   while True:
1455     i = find_token(document.body, "\\begin_inset Tabular", i)
1456     if i == -1:
1457       return
1458     j = find_end_of_inset(document.body, i)
1459     if j == -1:
1460       document.warning("Unable to find end of Tabular inset at line " + str(i))
1461       i += 1
1462       continue
1463     cell = find_token(document.body, "<cell", i, j)
1464     if cell == -1:
1465       document.warning("Can't find any cells in Tabular inset at line " + str(i))
1466       i = j
1467       continue
1468     k = i + 1
1469     while True:
1470       k = find_token(document.body, "<column", k, cell)
1471       if k == -1:
1472         return
1473       if document.body[k].find('alignment="decimal"') == -1:
1474         k += 1
1475         continue
1476       remove_option(document.body, k, 'decimal_point')
1477       document.body[k] = \
1478         document.body[k].replace('alignment="decimal"', 'alignment="center"')
1479       k += 1
1480
1481
1482 def convert_optarg(document):
1483   " Convert \\begin_inset OptArg to \\begin_inset Argument "
1484   i = 0
1485   while 1:
1486     i = find_token(document.body, '\\begin_inset OptArg', i)
1487     if i == -1:
1488       return
1489     document.body[i] = "\\begin_inset Argument"
1490     i += 1
1491
1492
1493 def revert_argument(document):
1494   " Convert \\begin_inset Argument to \\begin_inset OptArg "
1495   i = 0
1496   while 1:
1497     i = find_token(document.body, '\\begin_inset Argument', i)
1498     if i == -1:
1499       return
1500     document.body[i] = "\\begin_inset OptArg"
1501     i += 1
1502
1503
1504 def revert_makebox(document):
1505   " Convert \\makebox to TeX code "
1506   i = 0
1507   while 1:
1508     i = find_token(document.body, '\\begin_inset Box', i)
1509     if i == -1:
1510       break
1511     z = find_end_of_inset(document.body, i)
1512     if z == -1:
1513       document.warning("Malformed LyX document: Can't find end of box inset.")
1514       i += 1
1515       continue
1516     blay = find_token(document.body, "\\begin_layout", i, z)
1517     if blay == -1:
1518       document.warning("Malformed LyX document: Can't find layout in box.")
1519       i = z
1520       continue
1521     # by looking before the layout we make sure we're actually finding
1522     # an option, not text.
1523     j = find_token(document.body, 'use_makebox', i, blay)
1524     if j == -1:
1525         i = z
1526         continue
1527
1528     if not check_token(document.body[i], "\\begin_inset Box Frameless") \
1529       or get_value(document.body, 'use_makebox', j) != 1:
1530         del document.body[j]
1531         i = z
1532         continue
1533     bend = find_end_of_layout(document.body, blay)
1534     if bend == -1 or bend > z:
1535         document.warning("Malformed LyX document: Can't find end of layout in box.")
1536         i = z
1537         continue
1538     # determine the alignment
1539     align = get_quoted_value(document.body, 'hor_pos', i, blay, "c")
1540     # determine the width
1541     length = get_quoted_value(document.body, 'width', i, blay, "50col%")
1542     length = latex_length(length)[1]
1543     # remove the \end_layout \end_inset pair
1544     document.body[bend:z + 1] = put_cmd_in_ert("}")
1545     subst = "\\makebox[" + length + "][" \
1546       + align + "]{"
1547     document.body[i:blay + 1] = put_cmd_in_ert(subst)
1548     i += 1
1549
1550
1551 def convert_use_makebox(document):
1552   " Adds use_makebox option for boxes "
1553   i = 0
1554   while 1:
1555     i = find_token(document.body, '\\begin_inset Box', i)
1556     if i == -1:
1557       return
1558     # all of this is to make sure we actually find the use_parbox
1559     # that is an option for this box, not some text elsewhere.
1560     z = find_end_of_inset(document.body, i)
1561     if z == -1:
1562       document.warning("Can't find end of box inset!!")
1563       i += 1
1564       continue
1565     blay = find_token(document.body, "\\begin_layout", i, z)
1566     if blay == -1:
1567       document.warning("Can't find layout in box inset!!")
1568       i = z
1569       continue
1570     # so now we are looking for use_parbox before the box's layout
1571     k = find_token(document.body, 'use_parbox', i, blay)
1572     if k == -1:
1573       document.warning("Malformed LyX document: Can't find use_parbox statement in box.")
1574       i = z
1575       continue
1576     document.body.insert(k + 1, "use_makebox 0")
1577     i = blay + 1 # not z + 1 (box insets may be nested)
1578
1579
1580 def revert_IEEEtran(document):
1581   " Convert IEEEtran layouts and styles to TeX code "
1582
1583   if document.textclass != "IEEEtran":
1584     return
1585
1586   revert_flex_inset(document.body, "IEEE membership", "\\IEEEmembership")
1587   revert_flex_inset(document.body, "Lowercase", "\\MakeLowercase")
1588
1589   layouts = ("Special Paper Notice", "After Title Text", "Publication ID",
1590              "Page headings", "Biography without photo")
1591   latexcmd = {"Special Paper Notice": "\\IEEEspecialpapernotice",
1592               "After Title Text":     "\\IEEEaftertitletext",
1593               "Publication ID":       "\\IEEEpubid"}
1594   obsoletedby = {"Page headings":            "MarkBoth",
1595                  "Biography without photo":  "BiographyNoPhoto"}
1596
1597   for layout in layouts:
1598     i = 0
1599     while True:
1600         i = find_token(document.body, '\\begin_layout ' + layout, i)
1601         if i == -1:
1602           break
1603         j = find_end_of_layout(document.body, i)
1604         if j == -1:
1605           document.warning("Malformed LyX document: Can't find end of " + layout + " layout.")
1606           i += 1
1607           continue
1608         if layout in list(obsoletedby.keys()):
1609           document.body[i] = "\\begin_layout " + obsoletedby[layout]
1610           i = j
1611           continue
1612         content = lyx2latex(document, document.body[i:j + 1])
1613         add_to_preamble(document, [latexcmd[layout] + "{" + content + "}"])
1614         del document.body[i:j + 1]
1615         # no need to reset i
1616
1617
1618 def convert_prettyref(document):
1619         " Converts prettyref references to neutral formatted refs "
1620         re_ref = re.compile("^\s*reference\s+\"(\w+):(\S+)\"")
1621         nm_ref = re.compile("^\s*name\s+\"(\w+):(\S+)\"")
1622
1623         i = 0
1624         while True:
1625                 i = find_token(document.body, "\\begin_inset CommandInset ref", i)
1626                 if i == -1:
1627                         break
1628                 j = find_end_of_inset(document.body, i)
1629                 if j == -1:
1630                         document.warning("Malformed LyX document: No end of InsetRef!")
1631                         i += 1
1632                         continue
1633                 k = find_token(document.body, "LatexCommand prettyref", i, j)
1634                 if k != -1:
1635                         document.body[k] = "LatexCommand formatted"
1636                 i = j + 1
1637         document.header.insert(-1, "\\use_refstyle 0")
1638
1639
1640 def revert_refstyle(document):
1641         " Reverts neutral formatted refs to prettyref "
1642         re_ref = re.compile("^reference\s+\"(\w+):(\S+)\"")
1643         nm_ref = re.compile("^\s*name\s+\"(\w+):(\S+)\"")
1644
1645         i = 0
1646         while True:
1647                 i = find_token(document.body, "\\begin_inset CommandInset ref", i)
1648                 if i == -1:
1649                         break
1650                 j = find_end_of_inset(document.body, i)
1651                 if j == -1:
1652                         document.warning("Malformed LyX document: No end of InsetRef")
1653                         i += 1
1654                         continue
1655                 k = find_token(document.body, "LatexCommand formatted", i, j)
1656                 if k != -1:
1657                         document.body[k] = "LatexCommand prettyref"
1658                 i = j + 1
1659         i = find_token(document.header, "\\use_refstyle", 0)
1660         if i != -1:
1661                 document.header.pop(i)
1662
1663
1664 def revert_nameref(document):
1665   " Convert namerefs to regular references "
1666   cmds = ["Nameref", "nameref"]
1667   foundone = False
1668   rx = re.compile(r'reference "(.*)"')
1669   for cmd in cmds:
1670     i = 0
1671     oldcmd = "LatexCommand " + cmd
1672     while 1:
1673       # It seems better to look for this, as most of the reference
1674       # insets won't be ones we care about.
1675       i = find_token(document.body, oldcmd, i)
1676       if i == -1:
1677         break
1678       cmdloc = i
1679       i += 1
1680       # Make sure it is actually in an inset!
1681       # A normal line could begin with "LatexCommand nameref"!
1682       val = is_in_inset(document.body, cmdloc, \
1683           "\\begin_inset CommandInset ref")
1684       if not val:
1685           continue
1686       stins, endins = val
1687
1688       # ok, so it is in an InsetRef
1689       refline = find_token(document.body, "reference", stins, endins)
1690       if refline == -1:
1691         document.warning("Can't find reference for inset at line " + stinst + "!!")
1692         continue
1693       m = rx.match(document.body[refline])
1694       if not m:
1695         document.warning("Can't match reference line: " + document.body[ref])
1696         continue
1697       foundone = True
1698       ref = m.group(1)
1699       newcontent = put_cmd_in_ert('\\' + cmd + '{' + ref + '}')
1700       document.body[stins:endins + 1] = newcontent
1701
1702   if foundone:
1703     add_to_preamble(document, ["\usepackage{nameref}"])
1704
1705
1706 def remove_Nameref(document):
1707   " Convert Nameref commands to nameref commands "
1708   i = 0
1709   while 1:
1710     # It seems better to look for this, as most of the reference
1711     # insets won't be ones we care about.
1712     i = find_token(document.body, "LatexCommand Nameref" , i)
1713     if i == -1:
1714       break
1715     cmdloc = i
1716     i += 1
1717
1718     # Make sure it is actually in an inset!
1719     val = is_in_inset(document.body, cmdloc, \
1720         "\\begin_inset CommandInset ref")
1721     if not val:
1722       continue
1723     document.body[cmdloc] = "LatexCommand nameref"
1724
1725
1726 def revert_mathrsfs(document):
1727     " Load mathrsfs if \mathrsfs us use in the document "
1728     i = 0
1729     for line in document.body:
1730       if line.find("\\mathscr{") != -1:
1731         add_to_preamble(document, ["\\usepackage{mathrsfs}"])
1732         return
1733
1734
1735 def convert_flexnames(document):
1736     "Convert \\begin_inset Flex Custom:Style to \\begin_inset Flex Style and similarly for CharStyle and Element."
1737
1738     i = 0
1739     rx = re.compile(r'^\\begin_inset Flex (?:Custom|CharStyle|Element):(.+)$')
1740     while True:
1741       i = find_token(document.body, "\\begin_inset Flex", i)
1742       if i == -1:
1743         return
1744       m = rx.match(document.body[i])
1745       if m:
1746         document.body[i] = "\\begin_inset Flex " + m.group(1)
1747       i += 1
1748
1749
1750 flex_insets = {
1751   "Alert" : "CharStyle:Alert",
1752   "Code" : "CharStyle:Code",
1753   "Concepts" : "CharStyle:Concepts",
1754   "E-Mail" : "CharStyle:E-Mail",
1755   "Emph" : "CharStyle:Emph",
1756   "Expression" : "CharStyle:Expression",
1757   "Initial" : "CharStyle:Initial",
1758   "Institute" : "CharStyle:Institute",
1759   "Meaning" : "CharStyle:Meaning",
1760   "Noun" : "CharStyle:Noun",
1761   "Strong" : "CharStyle:Strong",
1762   "Structure" : "CharStyle:Structure",
1763   "ArticleMode" : "Custom:ArticleMode",
1764   "Endnote" : "Custom:Endnote",
1765   "Glosse" : "Custom:Glosse",
1766   "PresentationMode" : "Custom:PresentationMode",
1767   "Tri-Glosse" : "Custom:Tri-Glosse"
1768 }
1769
1770 flex_elements = {
1771   "Abbrev" : "Element:Abbrev",
1772   "CCC-Code" : "Element:CCC-Code",
1773   "Citation-number" : "Element:Citation-number",
1774   "City" : "Element:City",
1775   "Code" : "Element:Code",
1776   "CODEN" : "Element:CODEN",
1777   "Country" : "Element:Country",
1778   "Day" : "Element:Day",
1779   "Directory" : "Element:Directory",
1780   "Dscr" : "Element:Dscr",
1781   "Email" : "Element:Email",
1782   "Emph" : "Element:Emph",
1783   "Filename" : "Element:Filename",
1784   "Firstname" : "Element:Firstname",
1785   "Fname" : "Element:Fname",
1786   "GuiButton" : "Element:GuiButton",
1787   "GuiMenu" : "Element:GuiMenu",
1788   "GuiMenuItem" : "Element:GuiMenuItem",
1789   "ISSN" : "Element:ISSN",
1790   "Issue-day" : "Element:Issue-day",
1791   "Issue-months" : "Element:Issue-months",
1792   "Issue-number" : "Element:Issue-number",
1793   "KeyCap" : "Element:KeyCap",
1794   "KeyCombo" : "Element:KeyCombo",
1795   "Keyword" : "Element:Keyword",
1796   "Literal" : "Element:Literal",
1797   "MenuChoice" : "Element:MenuChoice",
1798   "Month" : "Element:Month",
1799   "Orgdiv" : "Element:Orgdiv",
1800   "Orgname" : "Element:Orgname",
1801   "Postcode" : "Element:Postcode",
1802   "SS-Code" : "Element:SS-Code",
1803   "SS-Title" : "Element:SS-Title",
1804   "State" : "Element:State",
1805   "Street" : "Element:Street",
1806   "Surname" : "Element:Surname",
1807   "Volume" : "Element:Volume",
1808   "Year" : "Element:Year"
1809 }
1810
1811
1812 def revert_flexnames(document):
1813   if document.backend == "latex":
1814     flexlist = flex_insets
1815   else:
1816     flexlist = flex_elements
1817
1818   rx = re.compile(r'^\\begin_inset Flex\s+(.+)$')
1819   i = 0
1820   while True:
1821     i = find_token(document.body, "\\begin_inset Flex", i)
1822     if i == -1:
1823       return
1824     m = rx.match(document.body[i])
1825     if not m:
1826       document.warning("Illegal flex inset: " + document.body[i])
1827       i += 1
1828       continue
1829     style = m.group(1)
1830     if style in flexlist:
1831       document.body[i] = "\\begin_inset Flex " + flexlist[style]
1832     i += 1
1833
1834
1835 def convert_mathdots(document):
1836     " Load mathdots automatically "
1837     i = find_token(document.header, "\\use_mhchem" , 0)
1838     if i == -1:
1839         i = find_token(document.header, "\\use_esint" , 0)
1840     if i == -1:
1841         document.warning("Malformed LyX document: Can't find \\use_mhchem.")
1842         return;
1843     j = find_token(document.preamble, "\\usepackage{mathdots}", 0)
1844     if j == -1:
1845         document.header.insert(i + 1, "\\use_mathdots 0")
1846     else:
1847         document.header.insert(i + 1, "\\use_mathdots 2")
1848         del document.preamble[j]
1849
1850
1851 def revert_mathdots(document):
1852     " Load mathdots if used in the document "
1853
1854     mathdots = find_token(document.header, "\\use_mathdots" , 0)
1855     if mathdots == -1:
1856       document.warning("No \\use_mathdots line. Assuming auto.")
1857     else:
1858       val = get_value(document.header, "\\use_mathdots", mathdots)
1859       del document.header[mathdots]
1860       try:
1861         usedots = int(val)
1862       except:
1863         document.warning("Invalid \\use_mathdots value: " + val + ". Assuming auto.")
1864         # probably usedots has not been changed, but be safe.
1865         usedots = 1
1866
1867       if usedots == 0:
1868         # do not load case
1869         return
1870       if usedots == 2:
1871         # force load case
1872         add_to_preamble(document, ["\\usepackage{mathdots}"])
1873         return
1874
1875     # so we are in the auto case. we want to load mathdots if \iddots is used.
1876     i = 0
1877     while True:
1878       i = find_token(document.body, '\\begin_inset Formula', i)
1879       if i == -1:
1880         return
1881       j = find_end_of_inset(document.body, i)
1882       if j == -1:
1883         document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(i))
1884         i += 1
1885         continue
1886       code = "\n".join(document.body[i:j])
1887       if code.find("\\iddots") != -1:
1888         add_to_preamble(document, ["\\@ifundefined{iddots}{\\usepackage{mathdots}}"])
1889         return
1890       i = j
1891
1892
1893 def convert_rule(document):
1894     " Convert \\lyxline to CommandInset line. "
1895     i = 0
1896
1897     inset = ['\\begin_inset CommandInset line',
1898       'LatexCommand rule',
1899       'offset "0.5ex"',
1900       'width "100line%"',
1901       'height "1pt"', '',
1902       '\\end_inset', '', '']
1903
1904     # if paragraphs are indented, we may have to unindent to get the
1905     # line to be full-width.
1906     indent = get_value(document.header, "\\paragraph_separation", 0)
1907     have_indent = (indent == "indent")
1908
1909     while True:
1910       i = find_token(document.body, "\\lyxline" , i)
1911       if i == -1:
1912         return
1913
1914       # we need to find out if this line follows other content
1915       # in its paragraph. find its layout....
1916       lastlay = find_token_backwards(document.body, "\\begin_layout", i)
1917       if lastlay == -1:
1918         document.warning("Can't find layout for line at " + str(i))
1919         # do the best we can.
1920         document.body[i:i+1] = inset
1921         i += len(inset)
1922         continue
1923
1924       # ...and look for other content before it.
1925       lineisfirst = True
1926       for line in document.body[lastlay + 1:i]:
1927         # is it empty or a paragraph option?
1928         if not line or line[0] == '\\':
1929           continue
1930         lineisfirst = False
1931         break
1932
1933       if lineisfirst:
1934         document.body[i:i+1] = inset
1935         if indent:
1936           # we need to unindent, lest the line be too long
1937           document.body.insert(lastlay + 1, "\\noindent")
1938         i += len(inset)
1939       else:
1940         # so our line is in the middle of a paragraph
1941         # we need to add a new line, lest this line follow the
1942         # other content on that line and run off the side of the page
1943         document.body[i:i+1] = inset
1944         document.body[i:i] = ["\\begin_inset Newline newline", "\\end_inset", ""]
1945       i += len(inset)
1946
1947
1948 def revert_rule(document):
1949     " Revert line insets to Tex code "
1950     i = 0
1951     while 1:
1952       i = find_token(document.body, "\\begin_inset CommandInset line" , i)
1953       if i == -1:
1954         return
1955       # find end of inset
1956       j = find_token(document.body, "\\end_inset" , i)
1957       if j == -1:
1958         document.warning("Malformed LyX document: Can't find end of line inset.")
1959         return
1960       # determine the optional offset
1961       offset = get_quoted_value(document.body, 'offset', i, j)
1962       if offset:
1963         offset = '[' + offset + ']'
1964       # determine the width
1965       width = get_quoted_value(document.body, 'width', i, j, "100col%")
1966       width = latex_length(width)[1]
1967       # determine the height
1968       height = get_quoted_value(document.body, 'height', i, j, "1pt")
1969       height = latex_length(height)[1]
1970       # output the \rule command
1971       subst = "\\rule[" + offset + "]{" + width + "}{" + height + "}"
1972       document.body[i:j + 1] = put_cmd_in_ert(subst)
1973       i += len(subst) - (j - i)
1974
1975
1976 def revert_diagram(document):
1977   " Add the feyn package if \\Diagram is used in math "
1978   i = 0
1979   while True:
1980     i = find_token(document.body, '\\begin_inset Formula', i)
1981     if i == -1:
1982       return
1983     j = find_end_of_inset(document.body, i)
1984     if j == -1:
1985         document.warning("Malformed LyX document: Can't find end of Formula inset.")
1986         return
1987     lines = "\n".join(document.body[i:j])
1988     if lines.find("\\Diagram") == -1:
1989       i = j
1990       continue
1991     add_to_preamble(document, ["\\usepackage{feyn}"])
1992     # only need to do it once!
1993     return
1994
1995 chapters = ("amsbook", "book", "docbook-book", "elsart", "extbook", "extreport",
1996     "jbook", "jreport", "jsbook", "literate-book", "literate-report", "memoir",
1997     "mwbk", "mwrep", "recipebook", "report", "scrbook", "scrreprt", "svmono",
1998     "svmult", "tbook", "treport", "tufte-book")
1999
2000 def convert_bibtex_clearpage(document):
2001   " insert a clear(double)page bibliographystyle if bibtotoc option is used "
2002
2003   if document.textclass not in chapters:
2004     return
2005
2006   i = find_token(document.header, '\\papersides', 0)
2007   sides = 0
2008   if i == -1:
2009     document.warning("Malformed LyX document: Can't find papersides definition.")
2010     document.warning("Assuming single sided.")
2011     sides = 1
2012   else:
2013     val = get_value(document.header, "\\papersides", i)
2014     try:
2015       sides = int(val)
2016     except:
2017       pass
2018     if sides != 1 and sides != 2:
2019       document.warning("Invalid papersides value: " + val)
2020       document.warning("Assuming single sided.")
2021       sides = 1
2022
2023   j = 0
2024   while True:
2025     j = find_token(document.body, "\\begin_inset CommandInset bibtex", j)
2026     if j == -1:
2027       return
2028
2029     k = find_end_of_inset(document.body, j)
2030     if k == -1:
2031       document.warning("Can't find end of Bibliography inset at line " + str(j))
2032       j += 1
2033       continue
2034
2035     # only act if there is the option "bibtotoc"
2036     val = get_value(document.body, 'options', j, k)
2037     if not val:
2038       document.warning("Can't find options for bibliography inset at line " + str(j))
2039       j = k
2040       continue
2041
2042     if val.find("bibtotoc") == -1:
2043       j = k
2044       continue
2045
2046     # so we want to insert a new page right before the paragraph that
2047     # this bibliography thing is in.
2048     lay = find_token_backwards(document.body, "\\begin_layout", j)
2049     if lay == -1:
2050       document.warning("Can't find layout containing bibliography inset at line " + str(j))
2051       j = k
2052       continue
2053
2054     if sides == 1:
2055       cmd = "clearpage"
2056     else:
2057       cmd = "cleardoublepage"
2058     subst = ['\\begin_layout Standard',
2059         '\\begin_inset Newpage ' + cmd,
2060         '\\end_inset', '', '',
2061         '\\end_layout', '']
2062     document.body[lay:lay] = subst
2063     j = k + len(subst)
2064
2065
2066 def check_passthru(document):
2067   tc = document.textclass
2068   ok = (tc == "literate-article" or tc == "literate-book" or tc == "literate-report")
2069   if not ok:
2070     mods = document.get_module_list()
2071     for mod in mods:
2072       if mod == "sweave" or mod == "noweb":
2073         ok = True
2074         break
2075   return ok
2076
2077
2078 def convert_passthru(document):
2079     " http://www.mail-archive.com/lyx-devel@lists.lyx.org/msg161298.html "
2080     if not check_passthru:
2081       return
2082
2083     rx = re.compile("\\\\begin_layout \s*(\w+)")
2084     beg = 0
2085     for lay in ["Chunk", "Scrap"]:
2086       while True:
2087         beg = find_token(document.body, "\\begin_layout " + lay, beg)
2088         if beg == -1:
2089           break
2090         end = find_end_of_layout(document.body, beg)
2091         if end == -1:
2092           document.warning("Can't find end of layout at line " + str(beg))
2093           beg += 1
2094           continue
2095
2096         # we are now going to replace newline insets within this layout
2097         # by new instances of this layout. so we have repeated layouts
2098         # instead of newlines.
2099
2100         # if the paragraph has any customization, however, we do not want to
2101         # do the replacement.
2102         if document.body[beg + 1].startswith("\\"):
2103           beg = end + 1
2104           continue
2105
2106         ns = beg
2107         while True:
2108           ns = find_token(document.body, "\\begin_inset Newline newline", ns, end)
2109           if ns == -1:
2110             break
2111           ne = find_end_of_inset(document.body, ns)
2112           if ne == -1 or ne > end:
2113             document.warning("Can't find end of inset at line " + str(nb))
2114             ns += 1
2115             continue
2116           if document.body[ne + 1] == "":
2117             ne += 1
2118           subst = ["\\end_layout", "", "\\begin_layout " + lay]
2119           document.body[ns:ne + 1] = subst
2120           # now we need to adjust end, in particular, but might as well
2121           # do ns properly, too
2122           newlines = (ne - ns) - len(subst)
2123           ns += newlines + 2
2124           end += newlines + 2
2125
2126         # ok, we now want to find out if the next layout is the
2127         # same as this one. if so, we will insert an extra copy of it
2128         didit = False
2129         next = find_token(document.body, "\\begin_layout", end)
2130         if next != -1:
2131           m = rx.match(document.body[next])
2132           if m:
2133             nextlay = m.group(1)
2134             if nextlay == lay:
2135               subst = ["\\begin_layout " + lay, "", "\\end_layout", ""]
2136               document.body[next:next] = subst
2137               didit = True
2138         beg = end + 1
2139         if didit:
2140           beg += 4 # for the extra layout
2141
2142
2143 def revert_passthru(document):
2144     " http://www.mail-archive.com/lyx-devel@lists.lyx.org/msg161298.html "
2145     if not check_passthru:
2146       return
2147     rx = re.compile("\\\\begin_layout \s*(\w+)")
2148     beg = 0
2149     for lay in ["Chunk", "Scrap"]:
2150       while True:
2151         beg = find_token(document.body, "\\begin_layout " + lay, beg)
2152         if beg == -1:
2153           break
2154         end = find_end_of_layout(document.body, beg)
2155         if end == -1:
2156           document.warning("Can't find end of layout at line " + str(beg))
2157           beg += 1
2158           continue
2159
2160         # we now want to find out if the next layout is the
2161         # same as this one. but we will need to do this over and
2162         # over again.
2163         while True:
2164           next = find_token(document.body, "\\begin_layout", end)
2165           if next == -1:
2166             break
2167           m = rx.match(document.body[next])
2168           if not m:
2169             break
2170           nextlay = m.group(1)
2171           if nextlay != lay:
2172             break
2173           # so it is the same layout again. we now want to know if it is empty.
2174           # but first let's check and make sure there is no content between the
2175           # two layouts. i'm not sure if that can happen or not.
2176           for l in range(end + 1, next):
2177             document.warning("c'" + document.body[l] + "'")
2178             if document.body[l] != "":
2179               document.warning("Found content between adjacent " + lay + " layouts!")
2180               break
2181           nextend = find_end_of_layout(document.body, next)
2182           if nextend == -1:
2183             document.warning("Can't find end of layout at line " + str(next))
2184             break
2185           empty = True
2186           for l in range(next + 1, nextend):
2187             document.warning("e'" + document.body[l] + "'")
2188             if document.body[l] != "":
2189               empty = False
2190               break
2191           if empty:
2192             # empty layouts just get removed
2193             # should we check if it's before yet another such layout?
2194             del document.body[next : nextend + 1]
2195             # and we do not want to check again. we know the next layout
2196             # should be another Chunk and should be left as is.
2197             break
2198           else:
2199             # if it's not empty, then we want to insert a newline in place
2200             # of the layout switch
2201             subst = ["\\begin_inset Newline newline", "\\end_inset", ""]
2202             document.body[end : next + 1] = subst
2203             # and now we have to find the end of the new, larger layout
2204             newend = find_end_of_layout(document.body, beg)
2205             if newend == -1:
2206               document.warning("Can't find end of new layout at line " + str(beg))
2207               break
2208             end = newend
2209         beg = end + 1
2210
2211
2212 def revert_multirowOffset(document):
2213     " Revert multirow cells with offset in tables to TeX-code"
2214     # this routine is the same as the revert_multirow routine except that
2215     # it checks additionally for the offset
2216
2217     # first, let's find out if we need to do anything
2218     i = find_token(document.body, '<cell multirow="3" mroffset=', 0)
2219     if i == -1:
2220       return
2221
2222     add_to_preamble(document, ["\\usepackage{multirow}"])
2223
2224     rgx = re.compile(r'mroffset="[^"]+?"')
2225     begin_table = 0
2226
2227     while True:
2228         # find begin/end of table
2229         begin_table = find_token(document.body, '<lyxtabular version=', begin_table)
2230         if begin_table == -1:
2231             break
2232         end_table = find_end_of(document.body, begin_table, '<lyxtabular', '</lyxtabular>')
2233         if end_table == -1:
2234             document.warning("Malformed LyX document: Could not find end of table.")
2235             begin_table += 1
2236             continue
2237         # does this table have multirow?
2238         i = find_token(document.body, '<cell multirow="3"', begin_table, end_table)
2239         if i == -1:
2240             begin_table = end_table
2241             continue
2242
2243         # store the number of rows and columns
2244         numrows = get_option_value(document.body[begin_table], "rows")
2245         numcols = get_option_value(document.body[begin_table], "columns")
2246         try:
2247           numrows = int(numrows)
2248           numcols = int(numcols)
2249         except:
2250           document.warning("Unable to determine rows and columns!")
2251           begin_table = end_table
2252           continue
2253
2254         mrstarts = []
2255         multirows = []
2256         # collect info on rows and columns of this table.
2257         begin_row = begin_table
2258         for row in range(numrows):
2259             begin_row = find_token(document.body, '<row>', begin_row, end_table)
2260             if begin_row == -1:
2261               document.warning("Can't find row " + str(row + 1))
2262               break
2263             end_row = find_end_of(document.body, begin_row, '<row>', '</row>')
2264             if end_row == -1:
2265               document.warning("Can't find end of row " + str(row + 1))
2266               break
2267             begin_cell = begin_row
2268             multirows.append([])
2269             for column in range(numcols):
2270                 begin_cell = find_token(document.body, '<cell ', begin_cell, end_row)
2271                 if begin_cell == -1:
2272                   document.warning("Can't find column " + str(column + 1) + \
2273                     "in row " + str(row + 1))
2274                   break
2275                 # NOTE
2276                 # this will fail if someone puts "</cell>" in a cell, but
2277                 # that seems fairly unlikely.
2278                 end_cell = find_end_of(document.body, begin_cell, '<cell', '</cell>')
2279                 if end_cell == -1:
2280                   document.warning("Can't find end of column " + str(column + 1) + \
2281                     "in row " + str(row + 1))
2282                   break
2283                 multirows[row].append([begin_cell, end_cell, 0])
2284                 if document.body[begin_cell].find('multirow="3" mroffset=') != -1:
2285                   multirows[row][column][2] = 3 # begin multirow
2286                   mrstarts.append([row, column])
2287                 elif document.body[begin_cell].find('multirow="4"') != -1:
2288                   multirows[row][column][2] = 4 # in multirow
2289                 begin_cell = end_cell
2290             begin_row = end_row
2291         # end of table info collection
2292
2293         # work from the back to avoid messing up numbering
2294         mrstarts.reverse()
2295         for m in mrstarts:
2296             row = m[0]
2297             col = m[1]
2298             # get column width
2299             col_width = get_option_value(document.body[begin_table + 2 + col], "width")
2300             # "0pt" means that no width is specified
2301             if not col_width or col_width == "0pt":
2302               col_width = "*"
2303             # determine the number of cells that are part of the multirow
2304             nummrs = 1
2305             for r in range(row + 1, numrows):
2306                 if multirows[r][col][2] != 4:
2307                   break
2308                 nummrs += 1
2309                 # take the opportunity to revert this line
2310                 lineno = multirows[r][col][0]
2311                 document.body[lineno] = document.body[lineno].\
2312                   replace(' multirow="4" ', ' ').\
2313                   replace('valignment="middle"', 'valignment="top"').\
2314                   replace(' topline="true" ', ' ')
2315                 # remove bottom line of previous multirow-part cell
2316                 lineno = multirows[r-1][col][0]
2317                 document.body[lineno] = document.body[lineno].replace(' bottomline="true" ', ' ')
2318             # revert beginning cell
2319             bcell = multirows[row][col][0]
2320             ecell = multirows[row][col][1]
2321             offset = get_option_value(document.body[bcell], "mroffset")
2322             document.body[bcell] = document.body[bcell].\
2323               replace(' multirow="3" ', ' ').\
2324               replace('valignment="middle"', 'valignment="top"')
2325             # remove mroffset option
2326             document.body[bcell] = rgx.sub('', document.body[bcell])
2327
2328             blay = find_token(document.body, "\\begin_layout", bcell, ecell)
2329             if blay == -1:
2330               document.warning("Can't find layout for cell!")
2331               continue
2332             bend = find_end_of_layout(document.body, blay)
2333             if bend == -1:
2334               document.warning("Can't find end of layout for cell!")
2335               continue
2336             # do the later one first, so as not to mess up the numbering
2337             # we are wrapping the whole cell in this ert
2338             # so before the end of the layout...
2339             document.body[bend:bend] = put_cmd_in_ert("}")
2340             # ...and after the beginning
2341             document.body[blay + 1:blay + 1] = \
2342               put_cmd_in_ert("\\multirow{" + str(nummrs) + "}{" + col_width + "}[" \
2343                   + offset + "]{")
2344
2345         # on to the next table
2346         begin_table = end_table
2347
2348
2349 def revert_script(document):
2350     " Convert subscript/superscript inset to TeX code "
2351     i = 0
2352     foundsubscript = False
2353     while 1:
2354         i = find_token(document.body, '\\begin_inset script', i)
2355         if i == -1:
2356             break
2357         z = find_end_of_inset(document.body, i)
2358         if z == -1:
2359             document.warning("Malformed LyX document: Can't find end of script inset.")
2360             i += 1
2361             continue
2362         blay = find_token(document.body, "\\begin_layout", i, z)
2363         if blay == -1:
2364             document.warning("Malformed LyX document: Can't find layout in script inset.")
2365             i = z
2366             continue
2367
2368         if check_token(document.body[i], "\\begin_inset script subscript"):
2369             subst = '\\textsubscript{'
2370             foundsubscript = True
2371         elif check_token(document.body[i], "\\begin_inset script superscript"):
2372             subst = '\\textsuperscript{'
2373         else:
2374             document.warning("Malformed LyX document: Unknown type of script inset.")
2375             i = z
2376             continue
2377         bend = find_end_of_layout(document.body, blay)
2378         if bend == -1 or bend > z:
2379             document.warning("Malformed LyX document: Can't find end of layout in script inset.")
2380             i = z
2381             continue
2382         # remove the \end_layout \end_inset pair
2383         document.body[bend:z + 1] = put_cmd_in_ert("}")
2384         document.body[i:blay + 1] = put_cmd_in_ert(subst)
2385         i += 1
2386     # these classes provide a \textsubscript command:
2387     # FIXME: Would be nice if we could use the information of the .layout file here
2388     classes = ["memoir", "scrartcl", "scrbook", "scrlttr2", "scrreprt"]
2389     if foundsubscript and find_token_exact(classes, document.textclass, 0) == -1:
2390         add_to_preamble(document, ['\\usepackage{subscript}'])
2391
2392
2393 def convert_use_xetex(document):
2394     " convert \\use_xetex to \\use_non_tex_fonts "
2395     i = find_token(document.header, "\\use_xetex", 0)
2396     if i == -1:
2397         document.header.insert(-1, "\\use_non_tex_fonts 0")
2398     else:
2399         val = get_value(document.header, "\\use_xetex", 0)
2400         document.header[i] = "\\use_non_tex_fonts " + val
2401
2402
2403 def revert_use_xetex(document):
2404     " revert \\use_non_tex_fonts to \\use_xetex "
2405     i = 0
2406     i = find_token(document.header, "\\use_non_tex_fonts", 0)
2407     if i == -1:
2408         document.warning("Malformed document. No \\use_non_tex_fonts param!")
2409         return
2410
2411     val = get_value(document.header, "\\use_non_tex_fonts", 0)
2412     document.header[i] = "\\use_xetex " + val
2413
2414
2415 def revert_labeling(document):
2416     koma = ("scrartcl", "scrarticle-beamer", "scrbook", "scrlettr",
2417         "scrlttr2", "scrreprt")
2418     if document.textclass in koma:
2419         return
2420     i = 0
2421     while True:
2422         i = find_token_exact(document.body, "\\begin_layout Labeling", i)
2423         if i == -1:
2424             return
2425         document.body[i] = "\\begin_layout List"
2426
2427
2428 def revert_langpack(document):
2429     " revert \\language_package parameter "
2430     i = 0
2431     i = find_token(document.header, "\\language_package", 0)
2432     if i == -1:
2433         document.warning("Malformed document. No \\language_package param!")
2434         return
2435
2436     del document.header[i]
2437
2438
2439 def convert_langpack(document):
2440     " Add \\language_package parameter "
2441     i = find_token(document.header, "\language" , 0)
2442     if i == -1:
2443         document.warning("Malformed document. No \\language defined!")
2444         return
2445
2446     document.header.insert(i + 1, "\\language_package default")
2447
2448
2449 def revert_tabularwidth(document):
2450   i = 0
2451   while True:
2452     i = find_token(document.body, "\\begin_inset Tabular", i)
2453     if i == -1:
2454       return
2455     j = find_end_of_inset(document.body, i)
2456     if j == -1:
2457       document.warning("Unable to find end of Tabular inset at line " + str(i))
2458       i += 1
2459       continue
2460     i += 1
2461     features = find_token(document.body, "<features", i, j)
2462     if features == -1:
2463       document.warning("Can't find any features in Tabular inset at line " + str(i))
2464       i = j
2465       continue
2466     if document.body[features].find('alignment="tabularwidth"') != -1:
2467       remove_option(document.body, features, 'tabularwidth')
2468
2469 def revert_html_css_as_file(document):
2470   if not del_token(document.header, '\\html_css_as_file', 0):
2471     document.warning("Malformed LyX document: Missing \\html_css_as_file.")
2472
2473
2474 ##
2475 # Conversion hub
2476 #
2477
2478 supported_versions = ["2.0.0","2.0"]
2479 convert = [[346, []],
2480            [347, []],
2481            [348, []],
2482            [349, []],
2483            [350, []],
2484            [351, []],
2485            [352, [convert_splitindex]],
2486            [353, []],
2487            [354, []],
2488            [355, []],
2489            [356, []],
2490            [357, []],
2491            [358, []],
2492            [359, [convert_nomencl_width]],
2493            [360, []],
2494            [361, []],
2495            [362, []],
2496            [363, []],
2497            [364, []],
2498            [365, []],
2499            [366, []],
2500            [367, []],
2501            [368, []],
2502            [369, [convert_author_id]],
2503            [370, []],
2504            [371, [convert_mhchem]],
2505            [372, []],
2506            [373, [merge_gbrief]],
2507            [374, []],
2508            [375, []],
2509            [376, []],
2510            [377, []],
2511            [378, []],
2512            [379, [convert_math_output]],
2513            [380, []],
2514            [381, []],
2515            [382, []],
2516            [383, []],
2517            [384, []],
2518            [385, []],
2519            [386, []],
2520            [387, []],
2521            [388, []],
2522            [389, [convert_html_quotes]],
2523            [390, []],
2524            [391, []],
2525            [392, []],
2526            [393, [convert_optarg]],
2527            [394, [convert_use_makebox]],
2528            [395, []],
2529            [396, []],
2530            [397, [remove_Nameref]],
2531            [398, []],
2532            [399, [convert_mathdots]],
2533            [400, [convert_rule]],
2534            [401, []],
2535            [402, [convert_bibtex_clearpage]],
2536            [403, [convert_flexnames]],
2537            [404, [convert_prettyref]],
2538            [405, []],
2539            [406, [convert_passthru]],
2540            [407, []],
2541            [408, []],
2542            [409, [convert_use_xetex]],
2543            [410, []],
2544            [411, [convert_langpack]],
2545            [412, []],
2546            [413, []]
2547 ]
2548
2549 revert =  [[412, [revert_html_css_as_file]],
2550            [411, [revert_tabularwidth]],
2551            [410, [revert_langpack]],
2552            [409, [revert_labeling]],
2553            [408, [revert_use_xetex]],
2554            [407, [revert_script]],
2555            [406, [revert_multirowOffset]],
2556            [405, [revert_passthru]],
2557            [404, []],
2558            [403, [revert_refstyle]],
2559            [402, [revert_flexnames]],
2560            [401, []],
2561            [400, [revert_diagram]],
2562            [399, [revert_rule]],
2563            [398, [revert_mathdots]],
2564            [397, [revert_mathrsfs]],
2565            [396, []],
2566            [395, [revert_nameref]],
2567            [394, [revert_DIN_C_pagesizes]],
2568            [393, [revert_makebox]],
2569            [392, [revert_argument]],
2570            [391, []],
2571            [390, [revert_align_decimal, revert_IEEEtran]],
2572            [389, [revert_output_sync]],
2573            [388, [revert_html_quotes]],
2574            [387, [revert_pagesizes]],
2575            [386, [revert_math_scale]],
2576            [385, [revert_lyx_version]],
2577            [384, [revert_shadedboxcolor]],
2578            [383, [revert_fontcolor]],
2579            [382, [revert_turkmen]],
2580            [381, [revert_notefontcolor]],
2581            [380, [revert_equalspacing_xymatrix]],
2582            [379, [revert_inset_preview]],
2583            [378, [revert_math_output]],
2584            [377, []],
2585            [376, [revert_multirow]],
2586            [375, [revert_includeall]],
2587            [374, [revert_includeonly]],
2588            [373, [revert_html_options]],
2589            [372, [revert_gbrief]],
2590            [371, [revert_fontenc]],
2591            [370, [revert_mhchem]],
2592            [369, [revert_suppress_date]],
2593            [368, [revert_author_id]],
2594            [367, [revert_hspace_glue_lengths]],
2595            [366, [revert_percent_vspace_lengths, revert_percent_hspace_lengths]],
2596            [365, [revert_percent_skip_lengths]],
2597            [364, [revert_paragraph_indentation]],
2598            [363, [revert_branch_filename]],
2599            [362, [revert_longtable_align]],
2600            [361, [revert_applemac]],
2601            [360, []],
2602            [359, [revert_nomencl_cwidth]],
2603            [358, [revert_nomencl_width]],
2604            [357, [revert_custom_processors]],
2605            [356, [revert_ulinelatex]],
2606            [355, []],
2607            [354, [revert_strikeout]],
2608            [353, [revert_printindexall]],
2609            [352, [revert_subindex]],
2610            [351, [revert_splitindex]],
2611            [350, [revert_backgroundcolor]],
2612            [349, [revert_outputformat]],
2613            [348, [revert_xetex]],
2614            [347, [revert_phantom, revert_hphantom, revert_vphantom]],
2615            [346, [revert_tabularvalign]],
2616            [345, [revert_swiss]]
2617           ]
2618
2619
2620 if __name__ == "__main__":
2621     pass