lib/lyx2lyx/lyx_2_0.py

   1 # -*- coding: utf-8 -*-
   2 # This file is part of lyx2lyx
   3 # -*- coding: utf-8 -*-
   4 # Copyright (C) 2011 The LyX team
   5 #
   6 # This program is free software; you can redistribute it and/or
   7 # modify it under the terms of the GNU General Public License
   8 # as published by the Free Software Foundation; either version 2
   9 # of the License, or (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful,
  12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 # GNU General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
  19
  20 """ Convert files to the file format generated by lyx 2.0"""
  21
  22 import re, string
  23 import unicodedata
  24 import sys, os
  25
  26 from parser_tools import find_token, find_end_of, find_tokens, \
  27   find_token_exact, find_end_of_inset, find_end_of_layout, \
  28   find_token_backwards, is_in_inset, get_value, get_quoted_value, \
  29   del_token, check_token, get_option_value
  30
  31 from lyx2lyx_tools import add_to_preamble, insert_to_preamble, \
  32   put_cmd_in_ert, lyx2latex, latex_length, revert_flex_inset, \
  33   revert_font_attrs, hex2ratio, str2bool
  34
  35 ####################################################################
  36 # Private helper functions
  37
  38 def remove_option(lines, m, option):
  39     ''' removes option from line m. returns whether we did anything '''
  40     l = lines[m].find(option)
  41     if l == -1:
  42         return False
  43     val = lines[m][l:].split('"')[1]
  44     lines[m] = lines[m][:l - 1] + lines[m][l+len(option + '="' + val + '"'):]
  45     return True
  46
  47
  48 ###############################################################################
  49 ###
  50 ### Conversion and reversion routines
  51 ###
  52 ###############################################################################
  53
  54 def revert_swiss(document):
  55     " Set language german-ch to ngerman "
  56     i = 0
  57     if document.language == "german-ch":
  58         document.language = "ngerman"
  59         i = find_token(document.header, "\\language", 0)
  60         if i != -1:
  61             document.header[i] = "\\language ngerman"
  62     j = 0
  63     while True:
  64         j = find_token(document.body, "\\lang german-ch", j)
  65         if j == -1:
  66             return
  67         document.body[j] = document.body[j].replace("\\lang german-ch", "\\lang ngerman")
  68         j = j + 1
  69
  70
  71 def revert_tabularvalign(document):
  72    " Revert the tabular valign option "
  73    i = 0
  74    while True:
  75       i = find_token(document.body, "\\begin_inset Tabular", i)
  76       if i == -1:
  77           return
  78       end = find_end_of_inset(document.body, i)
  79       if end == -1:
  80           document.warning("Can't find end of inset at line " + str(i))
  81           i += 1
  82           continue
  83       fline = find_token(document.body, "<features", i, end)
  84       if fline == -1:
  85           document.warning("Can't find features for inset at line " + str(i))
  86           i += 1
  87           continue
  88       p = document.body[fline].find("islongtable")
  89       if p != -1:
  90           q = document.body[fline].find("tabularvalignment")
  91           if q != -1:
  92               # FIXME
  93               # This seems wrong: It removes everything after
  94               # tabularvalignment, too.
  95               document.body[fline] = document.body[fline][:q - 1] + '>'
  96           i += 1
  97           continue
  98
  99        # no longtable
 100       tabularvalignment = 'c'
 101       # which valignment is specified?
 102       m = document.body[fline].find('tabularvalignment="top"')
 103       if m != -1:
 104           tabularvalignment = 't'
 105       m = document.body[fline].find('tabularvalignment="bottom"')
 106       if m != -1:
 107           tabularvalignment = 'b'
 108       # delete tabularvalignment
 109       q = document.body[fline].find("tabularvalignment")
 110       if q != -1:
 111           # FIXME
 112           # This seems wrong: It removes everything after
 113           # tabularvalignment, too.
 114           document.body[fline] = document.body[fline][:q - 1] + '>'
 115
 116       # don't add a box when centered
 117       if tabularvalignment == 'c':
 118           i = end
 119           continue
 120       subst = ['\\end_layout', '\\end_inset']
 121       document.body[end:end] = subst # just inserts those lines
 122       subst = ['\\begin_inset Box Frameless',
 123           'position "' + tabularvalignment +'"',
 124           'hor_pos "c"',
 125           'has_inner_box 1',
 126           'inner_pos "c"',
 127           'use_parbox 0',
 128           # we don't know the width, assume 50%
 129           'width "50col%"',
 130           'special "none"',
 131           'height "1in"',
 132           'height_special "totalheight"',
 133           'status open',
 134           '',
 135           '\\begin_layout Plain Layout']
 136       document.body[i:i] = subst # this just inserts the array at i
 137       # since there could be a tabular inside a tabular, we cannot
 138       # jump to end
 139       i += len(subst)
 140
 141
 142 def revert_phantom_types(document, ptype, cmd):
 143     " Reverts phantom to ERT "
 144     i = 0
 145     while True:
 146       i = find_token(document.body, "\\begin_inset Phantom " + ptype, i)
 147       if i == -1:
 148           return
 149       end = find_end_of_inset(document.body, i)
 150       if end == -1:
 151           document.warning("Can't find end of inset at line " + str(i))
 152           i += 1
 153           continue
 154       blay = find_token(document.body, "\\begin_layout Plain Layout", i, end)
 155       if blay == -1:
 156           document.warning("Can't find layout for inset at line " + str(i))
 157           i = end
 158           continue
 159       bend = find_end_of_layout(document.body, blay)
 160       if bend == -1:
 161           document.warning("Malformed LyX document: Could not find end of Phantom inset's layout.")
 162           i = end
 163           continue
 164       substi = ["\\begin_inset ERT", "status collapsed", "",
 165                 "\\begin_layout Plain Layout", "", "", "\\backslash",
 166                 cmd + "{", "\\end_layout", "", "\\end_inset"]
 167       substj = ["\\size default", "", "\\begin_inset ERT", "status collapsed", "",
 168                 "\\begin_layout Plain Layout", "", "}", "\\end_layout", "", "\\end_inset"]
 169       # do the later one first so as not to mess up the numbering
 170       document.body[bend:end + 1] = substj
 171       document.body[i:blay + 1] = substi
 172       i = end + len(substi) + len(substj) - (end - bend) - (blay - i) - 2
 173
 174
 175 def revert_phantom(document):
 176     revert_phantom_types(document, "Phantom", "phantom")
 177
 178 def revert_hphantom(document):
 179     revert_phantom_types(document, "HPhantom", "hphantom")
 180
 181 def revert_vphantom(document):
 182     revert_phantom_types(document, "VPhantom", "vphantom")
 183
 184
 185 def revert_xetex(document):
 186     " Reverts documents that use XeTeX "
 187
 188     i = find_token(document.header, '\\use_xetex', 0)
 189     if i == -1:
 190         document.warning("Malformed LyX document: Missing \\use_xetex.")
 191         return
 192     if not str2bool(get_value(document.header, "\\use_xetex", i)):
 193         del document.header[i]
 194         return
 195     del document.header[i]
 196
 197     # 1.) set doc encoding to utf8-plain
 198     i = find_token(document.header, "\\inputencoding", 0)
 199     if i == -1:
 200         document.warning("Malformed LyX document: Missing \\inputencoding.")
 201     else:
 202         document.header[i] = "\\inputencoding utf8-plain"
 203
 204     # 2.) check font settings
 205     # defaults
 206     roman = sans = typew = "default"
 207     osf = False
 208     sf_scale = tt_scale = 100.0
 209
 210     i = find_token(document.header, "\\font_roman", 0)
 211     if i == -1:
 212         document.warning("Malformed LyX document: Missing \\font_roman.")
 213     else:
 214         roman = get_value(document.header, "\\font_roman", i)
 215         document.header[i] = "\\font_roman default"
 216
 217     i = find_token(document.header, "\\font_sans", 0)
 218     if i == -1:
 219         document.warning("Malformed LyX document: Missing \\font_sans.")
 220     else:
 221         sans = get_value(document.header, "\\font_sans", i)
 222         document.header[i] = "\\font_sans default"
 223
 224     i = find_token(document.header, "\\font_typewriter", 0)
 225     if i == -1:
 226         document.warning("Malformed LyX document: Missing \\font_typewriter.")
 227     else:
 228         typew = get_value(document.header, "\\font_typewriter", i)
 229         document.header[i] = "\\font_typewriter default"
 230
 231     i = find_token(document.header, "\\font_osf", 0)
 232     if i == -1:
 233         document.warning("Malformed LyX document: Missing \\font_osf.")
 234     else:
 235         osf = str2bool(get_value(document.header, "\\font_osf", i))
 236         document.header[i] = "\\font_osf false"
 237
 238     i = find_token(document.header, "\\font_sc", 0)
 239     if i == -1:
 240         document.warning("Malformed LyX document: Missing \\font_sc.")
 241     else:
 242         # we do not need this value.
 243         document.header[i] = "\\font_sc false"
 244
 245     i = find_token(document.header, "\\font_sf_scale", 0)
 246     if i == -1:
 247         document.warning("Malformed LyX document: Missing \\font_sf_scale.")
 248     else:
 249       val = get_value(document.header, '\\font_sf_scale', i)
 250       try:
 251         # float() can throw
 252         sf_scale = float(val)
 253       except:
 254         document.warning("Invalid font_sf_scale value: " + val)
 255       document.header[i] = "\\font_sf_scale 100"
 256
 257     i = find_token(document.header, "\\font_tt_scale", 0)
 258     if i == -1:
 259         document.warning("Malformed LyX document: Missing \\font_tt_scale.")
 260     else:
 261         val = get_value(document.header, '\\font_tt_scale', i)
 262         try:
 263           # float() can throw
 264           tt_scale = float(val)
 265         except:
 266           document.warning("Invalid font_tt_scale value: " + val)
 267         document.header[i] = "\\font_tt_scale 100"
 268
 269     # 3.) set preamble stuff
 270     pretext = ['%% This document must be processed with xelatex!']
 271     pretext.append('\\usepackage{fontspec}')
 272     if roman != "default":
 273         pretext.append('\\setmainfont[Mapping=tex-text]{' + roman + '}')
 274     if sans != "default":
 275         sf = '\\setsansfont['
 276         if sf_scale != 100.0:
 277             sf += 'Scale=' + str(sf_scale / 100.0) + ','
 278         sf += 'Mapping=tex-text]{' + sans + '}'
 279         pretext.append(sf)
 280     if typew != "default":
 281         tw = '\\setmonofont'
 282         if tt_scale != 100.0:
 283             tw += '[Scale=' + str(tt_scale / 100.0) + ']'
 284         tw += '{' + typew + '}'
 285         pretext.append(tw)
 286     if osf:
 287         pretext.append('\\defaultfontfeatures{Numbers=OldStyle}')
 288     pretext.append('\usepackage{xunicode}')
 289     pretext.append('\usepackage{xltxtra}')
 290     insert_to_preamble(document, pretext)
 291
 292
 293 def revert_outputformat(document):
 294     " Remove default output format param "
 295
 296     if not del_token(document.header, '\\default_output_format', 0):
 297         document.warning("Malformed LyX document: Missing \\default_output_format.")
 298
 299
 300 def revert_backgroundcolor(document):
 301     " Reverts background color to preamble code "
 302     i = find_token(document.header, "\\backgroundcolor", 0)
 303     if i == -1:
 304         return
 305     colorcode = get_value(document.header, '\\backgroundcolor', i)
 306     del document.header[i]
 307     # don't clutter the preamble if backgroundcolor is not set
 308     if colorcode == "#ffffff":
 309         return
 310     red   = hex2ratio(colorcode[1:3])
 311     green = hex2ratio(colorcode[3:5])
 312     blue  = hex2ratio(colorcode[5:7])
 313     insert_to_preamble(document, \
 314         ['% To set the background color',
 315         '\\@ifundefined{definecolor}{\\usepackage{color}}{}',
 316         '\\definecolor{page_backgroundcolor}{rgb}{' + red + ',' + green + ',' + blue + '}',
 317         '\\pagecolor{page_backgroundcolor}'])
 318
 319
 320 def add_use_indices(document):
 321     " Add \\use_indices if it is missing "
 322     i = find_token(document.header, '\\use_indices', 0)
 323     if i != -1:
 324         return i
 325     i = find_token(document.header, '\\use_bibtopic', 0)
 326     if i == -1:
 327         i = find_token(document.header, '\\cite_engine', 0)
 328     if i == -1:
 329         i = find_token(document.header, '\\use_mathdots', 0)
 330     if i == -1:
 331         i = find_token(document.header, '\\use_mhchem', 0)
 332     if i == -1:
 333         i = find_token(document.header, '\\use_esint', 0)
 334     if i == -1:
 335         i = find_token(document.header, '\\use_amsmath', 0)
 336     if i == -1:
 337         document.warning("Malformed LyX document: Missing \\use_indices.")
 338         return -1
 339     document.header.insert(i + 1, '\\use_indices 0')
 340     return i + 1
 341
 342
 343 def revert_splitindex(document):
 344     " Reverts splitindex-aware documents "
 345     i = add_use_indices(document)
 346     if i == -1:
 347         return
 348     useindices = str2bool(get_value(document.header, "\\use_indices", i))
 349     del document.header[i]
 350     preamble = []
 351     if useindices:
 352          preamble.append("\\usepackage{splitidx})")
 353
 354     # deal with index declarations in the preamble
 355     i = 0
 356     while True:
 357         i = find_token(document.header, "\\index", i)
 358         if i == -1:
 359             break
 360         k = find_token(document.header, "\\end_index", i)
 361         if k == -1:
 362             document.warning("Malformed LyX document: Missing \\end_index.")
 363             return
 364         if useindices:
 365           line = document.header[i]
 366           l = re.compile(r'\\index (.*)$')
 367           m = l.match(line)
 368           iname = m.group(1)
 369           ishortcut = get_value(document.header, '\\shortcut', i, k)
 370           if ishortcut != "":
 371               preamble.append("\\newindex[" + iname + "]{" + ishortcut + "}")
 372         del document.header[i:k + 1]
 373     if preamble:
 374         insert_to_preamble(document, preamble)
 375
 376     # deal with index insets
 377     # these need to have the argument removed
 378     i = 0
 379     while True:
 380         i = find_token(document.body, "\\begin_inset Index", i)
 381         if i == -1:
 382             break
 383         line = document.body[i]
 384         l = re.compile(r'\\begin_inset Index (.*)$')
 385         m = l.match(line)
 386         itype = m.group(1)
 387         if itype == "idx" or indices == "false":
 388             document.body[i] = "\\begin_inset Index"
 389         else:
 390             k = find_end_of_inset(document.body, i)
 391             if k == -1:
 392                 document.warning("Can't find end of index inset!")
 393                 i += 1
 394                 continue
 395             content = lyx2latex(document, document.body[i:k])
 396             # escape quotes
 397             content = content.replace('"', r'\"')
 398             subst = put_cmd_in_ert("\\sindex[" + itype + "]{" + content + "}")
 399             document.body[i:k + 1] = subst
 400         i = i + 1
 401
 402     # deal with index_print insets
 403     i = 0
 404     while True:
 405         i = find_token(document.body, "\\begin_inset CommandInset index_print", i)
 406         if i == -1:
 407             return
 408         k = find_end_of_inset(document.body, i)
 409         ptype = get_quoted_value(document.body, 'type', i, k)
 410         if ptype == "idx":
 411             j = find_token(document.body, "type", i, k)
 412             del document.body[j]
 413         elif not useindices:
 414             del document.body[i:k + 1]
 415         else:
 416             subst = put_cmd_in_ert("\\printindex[" + ptype + "]{}")
 417             document.body[i:k + 1] = subst
 418         i = i + 1
 419
 420
 421 def convert_splitindex(document):
 422     " Converts index and printindex insets to splitindex-aware format "
 423     add_use_indices(document)
 424     i = 0
 425     while True:
 426         i = find_token(document.body, "\\begin_inset Index", i)
 427         if i == -1:
 428             break
 429         document.body[i] = document.body[i].replace("\\begin_inset Index",
 430             "\\begin_inset Index idx")
 431         i = i + 1
 432     i = 0
 433     while True:
 434         i = find_token(document.body, "\\begin_inset CommandInset index_print", i)
 435         if i == -1:
 436             return
 437         if document.body[i + 1].find('LatexCommand printindex') == -1:
 438             document.warning("Malformed LyX document: Incomplete printindex inset.")
 439             return
 440         subst = ["LatexCommand printindex",
 441             "type \"idx\""]
 442         document.body[i + 1:i + 2] = subst
 443         i = i + 1
 444
 445
 446 def revert_subindex(document):
 447     " Reverts \\printsubindex CommandInset types "
 448     i = add_use_indices(document)
 449     if i == -1:
 450         return
 451     useindices = str2bool(get_value(document.header, "\\use_indices", i))
 452     i = 0
 453     while True:
 454         i = find_token(document.body, "\\begin_inset CommandInset index_print", i)
 455         if i == -1:
 456             return
 457         k = find_end_of_inset(document.body, i)
 458         ctype = get_value(document.body, 'LatexCommand', i, k)
 459         if ctype != "printsubindex":
 460             i = k + 1
 461             continue
 462         ptype = get_quoted_value(document.body, 'type', i, k)
 463         if not useindices:
 464             del document.body[i:k + 1]
 465         else:
 466             subst = put_cmd_in_ert("\\printsubindex[" + ptype + "]{}")
 467             document.body[i:k + 1] = subst
 468         i = i + 1
 469
 470
 471 def revert_printindexall(document):
 472     " Reverts \\print[sub]index* CommandInset types "
 473     i = add_use_indices(document)
 474     if i == -1:
 475         return
 476     useindices = str2bool(get_value(document.header, "\\use_indices", i))
 477     i = 0
 478     while True:
 479         i = find_token(document.body, "\\begin_inset CommandInset index_print", i)
 480         if i == -1:
 481             return
 482         k = find_end_of_inset(document.body, i)
 483         ctype = get_value(document.body, 'LatexCommand', i, k)
 484         if ctype != "printindex*" and ctype != "printsubindex*":
 485             i = k
 486             continue
 487         if not useindices:
 488             del document.body[i:k + 1]
 489         else:
 490             subst = put_cmd_in_ert("\\" + ctype + "{}")
 491             document.body[i:k + 1] = subst
 492         i = i + 1
 493
 494
 495 def revert_strikeout(document):
 496   " Reverts \\strikeout font attribute "
 497   changed = revert_font_attrs(document.body, "\\uuline", "\\uuline")
 498   changed = revert_font_attrs(document.body, "\\uwave", "\\uwave") or changed
 499   changed = revert_font_attrs(document.body, "\\strikeout", "\\sout")  or changed
 500   if changed == True:
 501     insert_to_preamble(document, \
 502         ['%  for proper underlining',
 503         '\\PassOptionsToPackage{normalem}{ulem}',
 504         '\\usepackage{ulem}'])
 505
 506
 507 def revert_ulinelatex(document):
 508     " Reverts \\uline font attribute "
 509     i = find_token(document.body, '\\bar under', 0)
 510     if i == -1:
 511         return
 512     insert_to_preamble(document,\
 513             ['%  for proper underlining',
 514             '\\PassOptionsToPackage{normalem}{ulem}',
 515             '\\usepackage{ulem}',
 516             '\\let\\cite@rig\\cite',
 517             '\\newcommand{\\b@xcite}[2][\\%]{\\def\\def@pt{\\%}\\def\\pas@pt{#1}',
 518             '  \\mbox{\\ifx\\def@pt\\pas@pt\\cite@rig{#2}\\else\\cite@rig[#1]{#2}\\fi}}',
 519             '\\renewcommand{\\underbar}[1]{{\\let\\cite\\b@xcite\\uline{#1}}}'])
 520
 521
 522 def revert_custom_processors(document):
 523     " Remove bibtex_command and index_command params "
 524
 525     if not del_token(document.header, '\\bibtex_command', 0):
 526         document.warning("Malformed LyX document: Missing \\bibtex_command.")
 527
 528     if not del_token(document.header, '\\index_command', 0):
 529         document.warning("Malformed LyX document: Missing \\index_command.")
 530
 531
 532 def convert_nomencl_width(document):
 533     " Add set_width param to nomencl_print "
 534     i = 0
 535     while True:
 536       i = find_token(document.body, "\\begin_inset CommandInset nomencl_print", i)
 537       if i == -1:
 538         break
 539       document.body.insert(i + 2, "set_width \"none\"")
 540       i = i + 1
 541
 542
 543 def revert_nomencl_width(document):
 544     " Remove set_width param from nomencl_print "
 545     i = 0
 546     while True:
 547       i = find_token(document.body, "\\begin_inset CommandInset nomencl_print", i)
 548       if i == -1:
 549         break
 550       j = find_end_of_inset(document.body, i)
 551       if not del_token(document.body, "set_width", i, j):
 552         document.warning("Can't find set_width option for nomencl_print!")
 553       i = j
 554
 555
 556 def revert_nomencl_cwidth(document):
 557     " Remove width param from nomencl_print "
 558     i = 0
 559     while True:
 560       i = find_token(document.body, "\\begin_inset CommandInset nomencl_print", i)
 561       if i == -1:
 562         break
 563       j = find_end_of_inset(document.body, i)
 564       l = find_token(document.body, "width", i, j)
 565       if l == -1:
 566         document.warning("Can't find width option for nomencl_print!")
 567         i = j
 568         continue
 569       width = get_quoted_value(document.body, "width", i, j)
 570       del document.body[l]
 571       insert_to_preamble(document, ["\\setlength{\\nomlabelwidth}{" + width + "}"])
 572       i = j - 1
 573
 574
 575 def revert_applemac(document):
 576     " Revert applemac encoding to auto "
 577     if document.encoding != "applemac":
 578       return
 579     document.encoding = "auto"
 580     i = find_token(document.header, "\\encoding", 0)
 581     if i != -1:
 582         document.header[i] = "\\encoding auto"
 583
 584
 585 def revert_longtable_align(document):
 586     " Remove longtable alignment setting "
 587     i = 0
 588     while True:
 589       i = find_token(document.body, "\\begin_inset Tabular", i)
 590       if i == -1:
 591           break
 592       end = find_end_of_inset(document.body, i)
 593       if end == -1:
 594           document.warning("Can't find end of inset at line " + str(i))
 595           i += 1
 596           continue
 597       fline = find_token(document.body, "<features", i, end)
 598       if fline == -1:
 599           document.warning("Can't find features for inset at line " + str(i))
 600           i += 1
 601           continue
 602       j = document.body[fline].find("longtabularalignment")
 603       if j == -1:
 604           i += 1
 605           continue
 606       # FIXME Is this correct? It wipes out everything after the
 607       # one we found.
 608       document.body[fline] = document.body[fline][:j - 1] + '>'
 609       # since there could be a tabular inside this one, we
 610       # cannot jump to end.
 611       i += 1
 612
 613
 614 def revert_branch_filename(document):
 615     " Remove \\filename_suffix parameter from branches "
 616     i = 0
 617     while True:
 618         i = find_token(document.header, "\\filename_suffix", i)
 619         if i == -1:
 620             return
 621         del document.header[i]
 622
 623
 624 def revert_paragraph_indentation(document):
 625     " Revert custom paragraph indentation to preamble code "
 626     i = find_token(document.header, "\\paragraph_indentation", 0)
 627     if i == -1:
 628       return
 629     length = get_value(document.header, "\\paragraph_indentation", i)
 630     # we need only remove the line if indentation is default
 631     if length != "default":
 632       # handle percent lengths
 633       length = latex_length(length)[1]
 634       insert_to_preamble(document, ["\\setlength{\\parindent}{" + length + "}"])
 635     del document.header[i]
 636
 637
 638 def revert_percent_skip_lengths(document):
 639     " Revert relative lengths for paragraph skip separation to preamble code "
 640     i = find_token(document.header, "\\defskip", 0)
 641     if i == -1:
 642         return
 643     length = get_value(document.header, "\\defskip", i)
 644     # only revert when a custom length was set and when
 645     # it used a percent length
 646     if length in ('smallskip', 'medskip', 'bigskip'):
 647         return
 648     # handle percent lengths
 649     percent, length = latex_length(length)
 650     if percent:
 651         insert_to_preamble(document, ["\\setlength{\\parskip}{" + length + "}"])
 652         # set defskip to medskip as default
 653         document.header[i] = "\\defskip medskip"
 654
 655
 656 def revert_percent_vspace_lengths(document):
 657     " Revert relative VSpace lengths to ERT "
 658     i = 0
 659     while True:
 660       i = find_token(document.body, "\\begin_inset VSpace", i)
 661       if i == -1:
 662           break
 663       # only revert if a custom length was set and if
 664       # it used a percent length
 665       r = re.compile(r'\\begin_inset VSpace (.*)$')
 666       m = r.match(document.body[i])
 667       length = m.group(1)
 668       if length in ('defskip', 'smallskip', 'medskip', 'bigskip', 'vfill'):
 669          i += 1
 670          continue
 671       # check if the space has a star (protected space)
 672       protected = (document.body[i].rfind("*") != -1)
 673       if protected:
 674           length = length.rstrip('*')
 675       # handle percent lengths
 676       percent, length = latex_length(length)
 677       # revert the VSpace inset to ERT
 678       if percent:
 679           if protected:
 680               subst = put_cmd_in_ert("\\vspace*{" + length + "}")
 681           else:
 682               subst = put_cmd_in_ert("\\vspace{" + length + "}")
 683           document.body[i:i + 2] = subst
 684       i += 1
 685
 686
 687 def revert_percent_hspace_lengths(document):
 688     " Revert relative HSpace lengths to ERT "
 689     i = 0
 690     while True:
 691       i = find_token_exact(document.body, "\\begin_inset space \\hspace", i)
 692       if i == -1:
 693           break
 694       j = find_end_of_inset(document.body, i)
 695       if j == -1:
 696           document.warning("Can't find end of inset at line " + str(i))
 697           i += 1
 698           continue
 699       # only revert if a custom length was set...
 700       length = get_value(document.body, '\\length', i + 1, j)
 701       if length == '':
 702           document.warning("Malformed lyx document: Missing '\\length' in Space inset.")
 703           i = j
 704           continue
 705       protected = ""
 706       if document.body[i].find("\\hspace*{}") != -1:
 707           protected = "*"
 708       # ...and if it used a percent length
 709       percent, length = latex_length(length)
 710       # revert the HSpace inset to ERT
 711       if percent:
 712           subst = put_cmd_in_ert("\\hspace" + protected + "{" + length + "}")
 713           document.body[i:j + 1] = subst
 714       # if we did a substitution, this will still be ok
 715       i = j
 716
 717
 718 def revert_hspace_glue_lengths(document):
 719     " Revert HSpace glue lengths to ERT "
 720     i = 0
 721     while True:
 722       i = find_token_exact(document.body, "\\begin_inset space \\hspace", i)
 723       if i == -1:
 724           break
 725       j = find_end_of_inset(document.body, i)
 726       if j == -1:
 727           document.warning("Can't find end of inset at line " + str(i))
 728           i += 1
 729           continue
 730       length = get_value(document.body, '\\length', i + 1, j)
 731       if length == '':
 732           document.warning("Malformed lyx document: Missing '\\length' in Space inset.")
 733           i = j
 734           continue
 735       protected = ""
 736       if document.body[i].find("\\hspace*{}") != -1:
 737           protected = "*"
 738       # only revert if the length contains a plus or minus at pos != 0
 739       if length.find('-',1) != -1 or length.find('+',1) != -1:
 740           # handle percent lengths
 741           length = latex_length(length)[1]
 742           # revert the HSpace inset to ERT
 743           subst = put_cmd_in_ert("\\hspace" + protected + "{" + length + "}")
 744           document.body[i:j+1] = subst
 745       i = j
 746
 747
 748 def convert_author_id(document):
 749     " Add the author_id to the \\author definition and make sure 0 is not used"
 750     i = 0
 751     anum = 1
 752     re_author = re.compile(r'(\\author) (\".*\")\s*(.*)$')
 753
 754     while True:
 755         i = find_token(document.header, "\\author", i)
 756         if i == -1:
 757             break
 758         m = re_author.match(document.header[i])
 759         if m:
 760             name = m.group(2)
 761             email = m.group(3)
 762             document.header[i] = "\\author %i %s %s" % (anum, name, email)
 763         anum += 1
 764         i += 1
 765
 766     i = 0
 767     while True:
 768         i = find_token(document.body, "\\change_", i)
 769         if i == -1:
 770             break
 771         change = document.body[i].split(' ');
 772         if len(change) == 3:
 773             type = change[0]
 774             author_id = int(change[1])
 775             time = change[2]
 776             document.body[i] = "%s %i %s" % (type, author_id + 1, time)
 777         i += 1
 778
 779
 780 def revert_author_id(document):
 781     " Remove the author_id from the \\author definition "
 782     i = 0
 783     anum = 0
 784     rx = re.compile(r'(\\author)\s+(-?\d+)\s+(\".*\")\s*(.*)$')
 785     idmap = dict()
 786
 787     while True:
 788         i = find_token(document.header, "\\author", i)
 789         if i == -1:
 790             break
 791         m = rx.match(document.header[i])
 792         if m:
 793             author_id = int(m.group(2))
 794             idmap[author_id] = anum
 795             name = m.group(3)
 796             email = m.group(4)
 797             document.header[i] = "\\author %s %s" % (name, email)
 798         i += 1
 799         # FIXME Should this be incremented if we didn't match?
 800         anum += 1
 801
 802     i = 0
 803     while True:
 804         i = find_token(document.body, "\\change_", i)
 805         if i == -1:
 806             break
 807         change = document.body[i].split(' ');
 808         if len(change) == 3:
 809             type = change[0]
 810             author_id = int(change[1])
 811             time = change[2]
 812             document.body[i] = "%s %i %s" % (type, idmap[author_id], time)
 813         i += 1
 814
 815
 816 def revert_suppress_date(document):
 817     " Revert suppressing of default document date to preamble code "
 818     i = find_token(document.header, "\\suppress_date", 0)
 819     if i == -1:
 820         return
 821     # remove the preamble line and write to the preamble
 822     # when suppress_date was true
 823     date = str2bool(get_value(document.header, "\\suppress_date", i))
 824     if date:
 825         add_to_preamble(document, ["\\date{}"])
 826     del document.header[i]
 827
 828
 829 def convert_mhchem(document):
 830     "Set mhchem to off for versions older than 1.6.x"
 831     if document.start < 277:
 832         # LyX 1.5.x and older did never load mhchem.
 833         # Therefore we must switch it off: Documents that use mhchem have
 834         # a manual \usepackage anyway, and documents not using mhchem but
 835         # custom macros with the same names as mhchem commands might get
 836         # corrupted if mhchem is automatically loaded.
 837         mhchem = 0 # off
 838     else:
 839         # LyX 1.6.x did always load mhchem automatically.
 840         mhchem = 1 # auto
 841     i = find_token(document.header, "\\use_esint", 0)
 842     if i == -1:
 843         # pre-1.5.x document
 844         i = find_token(document.header, "\\use_amsmath", 0)
 845     if i == -1:
 846         document.warning("Malformed LyX document: Could not find amsmath os esint setting.")
 847         return
 848     document.header.insert(i + 1, "\\use_mhchem %d" % mhchem)
 849
 850
 851 def revert_mhchem(document):
 852     "Revert mhchem loading to preamble code"
 853
 854     mhchem = "off"
 855     i = find_token(document.header, "\\use_mhchem", 0)
 856     if i == -1:
 857         document.warning("Malformed LyX document: Could not find mhchem setting.")
 858         mhchem = "auto"
 859     else:
 860         val = get_value(document.header, "\\use_mhchem", i)
 861         if val == "1":
 862             mhchem = "auto"
 863         elif val == "2":
 864             mhchem = "on"
 865         del document.header[i]
 866
 867     if mhchem == "off":
 868       # don't load case
 869       return
 870
 871     if mhchem == "auto":
 872         i = 0
 873         while True:
 874             i = find_token(document.body, "\\begin_inset Formula", i)
 875             if i == -1:
 876                break
 877             line = document.body[i]
 878             if line.find("\\ce{") != -1 or line.find("\\cf{") != -1:
 879               mhchem = "on"
 880               break
 881             i += 1
 882
 883     if mhchem == "on":
 884         pre = ["\\PassOptionsToPackage{version=3}{mhchem}",
 885           "\\usepackage{mhchem}"]
 886         insert_to_preamble(document, pre)
 887
 888
 889 def revert_fontenc(document):
 890     " Remove fontencoding param "
 891     if not del_token(document.header, '\\fontencoding', 0):
 892         document.warning("Malformed LyX document: Missing \\fontencoding.")
 893
 894
 895 def merge_gbrief(document):
 896     " Merge g-brief-en and g-brief-de to one class "
 897
 898     if document.textclass != "g-brief-de":
 899         if document.textclass == "g-brief-en":
 900             document.textclass = "g-brief"
 901             document.set_textclass()
 902         return
 903
 904     obsoletedby = { "Brieftext":       "Letter",
 905                     "Unterschrift":    "Signature",
 906                     "Strasse":         "Street",
 907                     "Zusatz":          "Addition",
 908                     "Ort":             "Town",
 909                     "Land":            "State",
 910                     "RetourAdresse":   "ReturnAddress",
 911                     "MeinZeichen":     "MyRef",
 912                     "IhrZeichen":      "YourRef",
 913                     "IhrSchreiben":    "YourMail",
 914                     "Telefon":         "Phone",
 915                     "BLZ":             "BankCode",
 916                     "Konto":           "BankAccount",
 917                     "Postvermerk":     "PostalComment",
 918                     "Adresse":         "Address",
 919                     "Datum":           "Date",
 920                     "Betreff":         "Reference",
 921                     "Anrede":          "Opening",
 922                     "Anlagen":         "Encl.",
 923                     "Verteiler":       "cc",
 924                     "Gruss":           "Closing"}
 925     i = 0
 926     while 1:
 927         i = find_token(document.body, "\\begin_layout", i)
 928         if i == -1:
 929             break
 930
 931         layout = document.body[i][14:]
 932         if layout in obsoletedby:
 933             document.body[i] = "\\begin_layout " + obsoletedby[layout]
 934
 935         i += 1
 936
 937     document.textclass = "g-brief"
 938     document.set_textclass()
 939
 940
 941 def revert_gbrief(document):
 942     " Revert g-brief to g-brief-en "
 943     if document.textclass == "g-brief":
 944         document.textclass = "g-brief-en"
 945         document.set_textclass()
 946
 947
 948 def revert_html_options(document):
 949     " Remove html options "
 950     del_token(document.header, '\\html_use_mathml', 0)
 951     del_token(document.header, '\\html_be_strict', 0)
 952
 953
 954 def revert_includeonly(document):
 955     i = 0
 956     while True:
 957         i = find_token(document.header, "\\begin_includeonly", i)
 958         if i == -1:
 959             return
 960         j = find_end_of(document.header, i, "\\begin_includeonly", "\\end_includeonly")
 961         if j == -1:
 962             document.warning("Unable to find end of includeonly section!!")
 963             break
 964         document.header[i : j + 1] = []
 965
 966
 967 def revert_includeall(document):
 968     " Remove maintain_unincluded_children param "
 969     del_token(document.header, '\\maintain_unincluded_children', 0)
 970
 971
 972 def revert_multirow(document):
 973     " Revert multirow cells in tables to TeX-code"
 974
 975     # first, let's find out if we need to do anything
 976     # cell type 3 is multirow begin cell
 977     i = find_token(document.body, '<cell multirow="3"', 0)
 978     if i == -1:
 979       return
 980
 981     add_to_preamble(document, ["\\usepackage{multirow}"])
 982
 983     begin_table = 0
 984     while True:
 985         # find begin/end of table
 986         begin_table = find_token(document.body, '<lyxtabular version=', begin_table)
 987         if begin_table == -1:
 988             break
 989         end_table = find_end_of(document.body, begin_table, '<lyxtabular', '</lyxtabular>')
 990         if end_table == -1:
 991             document.warning("Malformed LyX document: Could not find end of table.")
 992             begin_table += 1
 993             continue
 994         # does this table have multirow?
 995         i = find_token(document.body, '<cell multirow="3"', begin_table, end_table)
 996         if i == -1:
 997             begin_table = end_table
 998             continue
 999
1000         # store the number of rows and columns
1001         numrows = get_option_value(document.body[begin_table], "rows")
1002         numcols = get_option_value(document.body[begin_table], "columns")
1003         try:
1004           numrows = int(numrows)
1005           numcols = int(numcols)
1006         except:
1007           document.warning("Unable to determine rows and columns!")
1008           begin_table = end_table
1009           continue
1010
1011         mrstarts = []
1012         multirows = []
1013         # collect info on rows and columns of this table.
1014         begin_row = begin_table
1015         for row in range(numrows):
1016             begin_row = find_token(document.body, '<row>', begin_row, end_table)
1017             if begin_row == -1:
1018               document.warning("Can't find row " + str(row + 1))
1019               break
1020             end_row = find_end_of(document.body, begin_row, '<row>', '</row>')
1021             if end_row == -1:
1022               document.warning("Can't find end of row " + str(row + 1))
1023               break
1024             begin_cell = begin_row
1025             multirows.append([])
1026             for column in range(numcols):
1027                 begin_cell = find_token(document.body, '<cell ', begin_cell, end_row)
1028                 if begin_cell == -1:
1029                   document.warning("Can't find column " + str(column + 1) + \
1030                     "in row " + str(row + 1))
1031                   break
1032                 # NOTE
1033                 # this will fail if someone puts "</cell>" in a cell, but
1034                 # that seems fairly unlikely.
1035                 end_cell = find_end_of(document.body, begin_cell, '<cell', '</cell>')
1036                 if end_cell == -1:
1037                   document.warning("Can't find end of column " + str(column + 1) + \
1038                     "in row " + str(row + 1))
1039                   break
1040                 multirows[row].append([begin_cell, end_cell, 0])
1041                 if document.body[begin_cell].find('multirow="3"') != -1:
1042                   multirows[row][column][2] = 3 # begin multirow
1043                   mrstarts.append([row, column])
1044                 elif document.body[begin_cell].find('multirow="4"') != -1:
1045                   multirows[row][column][2] = 4 # in multirow
1046                 begin_cell = end_cell
1047             begin_row = end_row
1048         # end of table info collection
1049
1050         # work from the back to avoid messing up numbering
1051         mrstarts.reverse()
1052         for m in mrstarts:
1053             row = m[0]
1054             col = m[1]
1055             # get column width
1056             col_width = get_option_value(document.body[begin_table + 2 + col], "width")
1057             # "0pt" means that no width is specified
1058             if not col_width or col_width == "0pt":
1059               col_width = "*"
1060             # determine the number of cells that are part of the multirow
1061             nummrs = 1
1062             for r in range(row + 1, numrows):
1063                 if multirows[r][col][2] != 4:
1064                   break
1065                 nummrs += 1
1066                 # take the opportunity to revert this line
1067                 lineno = multirows[r][col][0]
1068                 document.body[lineno] = document.body[lineno].\
1069                   replace(' multirow="4" ', ' ').\
1070                   replace('valignment="middle"', 'valignment="top"').\
1071                   replace(' topline="true" ', ' ')
1072                 # remove bottom line of previous multirow-part cell
1073                 lineno = multirows[r-1][col][0]
1074                 document.body[lineno] = document.body[lineno].replace(' bottomline="true" ', ' ')
1075             # revert beginning cell
1076             bcell = multirows[row][col][0]
1077             ecell = multirows[row][col][1]
1078             document.body[bcell] = document.body[bcell].\
1079               replace(' multirow="3" ', ' ').\
1080               replace('valignment="middle"', 'valignment="top"')
1081             blay = find_token(document.body, "\\begin_layout", bcell, ecell)
1082             if blay == -1:
1083               document.warning("Can't find layout for cell!")
1084               continue
1085             bend = find_end_of_layout(document.body, blay)
1086             if bend == -1:
1087               document.warning("Can't find end of layout for cell!")
1088               continue
1089             # do the later one first, so as not to mess up the numbering
1090             # we are wrapping the whole cell in this ert
1091             # so before the end of the layout...
1092             document.body[bend:bend] = put_cmd_in_ert("}")
1093             # ...and after the beginning
1094             document.body[blay + 1:blay + 1] = \
1095               put_cmd_in_ert("\\multirow{" + str(nummrs) + "}{" + col_width + "}{")
1096
1097         begin_table = end_table
1098
1099
1100 def convert_math_output(document):
1101     " Convert \html_use_mathml to \html_math_output "
1102     i = find_token(document.header, "\\html_use_mathml", 0)
1103     if i == -1:
1104         return
1105     rgx = re.compile(r'\\html_use_mathml\s+(\w+)')
1106     m = rgx.match(document.header[i])
1107     newval = "0" # MathML
1108     if m:
1109       val = str2bool(m.group(1))
1110       if not val:
1111         newval = "2" # Images
1112     else:
1113       document.warning("Can't match " + document.header[i])
1114     document.header[i] = "\\html_math_output " + newval
1115
1116
1117 def revert_math_output(document):
1118     " Revert \html_math_output to \html_use_mathml "
1119     i = find_token(document.header, "\\html_math_output", 0)
1120     if i == -1:
1121         return
1122     rgx = re.compile(r'\\html_math_output\s+(\d)')
1123     m = rgx.match(document.header[i])
1124     newval = "true"
1125     if m:
1126         val = m.group(1)
1127         if val == "1" or val == "2":
1128             newval = "false"
1129     else:
1130         document.warning("Unable to match " + document.header[i])
1131     document.header[i] = "\\html_use_mathml " + newval
1132
1133
1134
1135 def revert_inset_preview(document):
1136     " Dissolves the preview inset "
1137     i = 0
1138     while True:
1139       i = find_token(document.body, "\\begin_inset Preview", i)
1140       if i == -1:
1141           return
1142       iend = find_end_of_inset(document.body, i)
1143       if iend == -1:
1144           document.warning("Malformed LyX document: Could not find end of Preview inset.")
1145           i += 1
1146           continue
1147
1148       # This has several issues.
1149       # We need to do something about the layouts inside InsetPreview.
1150       # If we just leave the first one, then we have something like:
1151       # \begin_layout Standard
1152       # ...
1153       # \begin_layout Standard
1154       # and we get a "no \end_layout" error. So something has to be done.
1155       # Ideally, we would check if it is the same as the layout we are in.
1156       # If so, we just remove it; if not, we end the active one. But it is
1157       # not easy to know what layout we are in, due to depth changes, etc,
1158       # and it is not clear to me how much work it is worth doing. In most
1159       # cases, the layout will probably be the same.
1160       #
1161       # For the same reason, we have to remove the \end_layout tag at the
1162       # end of the last layout in the inset. Again, that will sometimes be
1163       # wrong, but it will usually be right. To know what to do, we would
1164       # again have to know what layout the inset is in.
1165
1166       blay = find_token(document.body, "\\begin_layout", i, iend)
1167       if blay == -1:
1168           document.warning("Can't find layout for preview inset!")
1169           # always do the later one first...
1170           del document.body[iend]
1171           del document.body[i]
1172           # deletions mean we do not need to reset i
1173           continue
1174
1175       # This is where we would check what layout we are in.
1176       # The check for Standard is definitely wrong.
1177       #
1178       # lay = document.body[blay].split(None, 1)[1]
1179       # if lay != oldlayout:
1180       #     # record a boolean to tell us what to do later....
1181       #     # better to do it later, since (a) it won't mess up
1182       #     # the numbering and (b) we only modify at the end.
1183
1184       # we want to delete the last \\end_layout in this inset, too.
1185       # note that this may not be the \\end_layout that goes with blay!!
1186       bend = find_end_of_layout(document.body, blay)
1187       while True:
1188           tmp = find_token(document.body, "\\end_layout", bend + 1, iend)
1189           if tmp == -1:
1190               break
1191           bend = tmp
1192       if bend == blay:
1193           document.warning("Unable to find last layout in preview inset!")
1194           del document.body[iend]
1195           del document.body[i]
1196           # deletions mean we do not need to reset i
1197           continue
1198       # always do the later one first...
1199       del document.body[iend]
1200       del document.body[bend]
1201       del document.body[i:blay + 1]
1202       # we do not need to reset i
1203
1204
1205 def revert_equalspacing_xymatrix(document):
1206     " Revert a Formula with xymatrix@! to an ERT inset "
1207     i = 0
1208     has_preamble = False
1209     has_equal_spacing = False
1210
1211     while True:
1212       i = find_token(document.body, "\\begin_inset Formula", i)
1213       if i == -1:
1214           break
1215       j = find_end_of_inset(document.body, i)
1216       if j == -1:
1217           document.warning("Malformed LyX document: Could not find end of Formula inset.")
1218           i += 1
1219           continue
1220
1221       for curline in range(i,j):
1222           found = document.body[curline].find("\\xymatrix@!")
1223           if found != -1:
1224               break
1225
1226       if found != -1:
1227           has_equal_spacing = True
1228           content = [document.body[i][21:]]
1229           content += document.body[i + 1:j]
1230           subst = put_cmd_in_ert(content)
1231           document.body[i:j + 1] = subst
1232           i += len(subst) - (j - i) + 1
1233       else:
1234           for curline in range(i,j):
1235               l = document.body[curline].find("\\xymatrix")
1236               if l != -1:
1237                   has_preamble = True;
1238                   break;
1239           i = j + 1
1240
1241     if has_equal_spacing and not has_preamble:
1242         add_to_preamble(document, ['\\usepackage[all]{xy}'])
1243
1244
1245 def revert_notefontcolor(document):
1246     " Reverts greyed-out note font color to preamble code "
1247
1248     i = find_token(document.header, "\\notefontcolor", 0)
1249     if i == -1:
1250         return
1251
1252     colorcode = get_value(document.header, '\\notefontcolor', i)
1253     del document.header[i]
1254
1255     # are there any grey notes?
1256     if find_token(document.body, "\\begin_inset Note Greyedout", 0) == -1:
1257         # no need to do anything else, and \renewcommand will throw
1258         # an error since lyxgreyedout will not exist.
1259         return
1260
1261     # the color code is in the form #rrggbb where every character denotes a hex number
1262     red = hex2ratio(colorcode[1:3])
1263     green = hex2ratio(colorcode[3:5])
1264     blue = hex2ratio(colorcode[5:7])
1265     # write the preamble
1266     insert_to_preamble(document,
1267       [ '%  for greyed-out notes',
1268         '\\@ifundefined{definecolor}{\\usepackage{color}}{}'
1269         '\\definecolor{note_fontcolor}{rgb}{%s,%s,%s}' % (red, green, blue),
1270         '\\renewenvironment{lyxgreyedout}',
1271         ' {\\textcolor{note_fontcolor}\\bgroup}{\\egroup}'])
1272
1273
1274 def revert_turkmen(document):
1275     "Set language Turkmen to English"
1276
1277     if document.language == "turkmen":
1278         document.language = "english"
1279         i = find_token(document.header, "\\language", 0)
1280         if i != -1:
1281             document.header[i] = "\\language english"
1282
1283     j = 0
1284     while True:
1285         j = find_token(document.body, "\\lang turkmen", j)
1286         if j == -1:
1287             return
1288         document.body[j] = document.body[j].replace("\\lang turkmen", "\\lang english")
1289         j += 1
1290
1291
1292 def revert_fontcolor(document):
1293     " Reverts font color to preamble code "
1294     i = find_token(document.header, "\\fontcolor", 0)
1295     if i == -1:
1296         return
1297     colorcode = get_value(document.header, '\\fontcolor', i)
1298     del document.header[i]
1299     # don't clutter the preamble if font color is not set
1300     if colorcode == "#000000":
1301         return
1302     # the color code is in the form #rrggbb where every character denotes a hex number
1303     red = hex2ratio(colorcode[1:3])
1304     green = hex2ratio(colorcode[3:5])
1305     blue = hex2ratio(colorcode[5:7])
1306     # write the preamble
1307     insert_to_preamble(document,
1308       ['%  Set the font color',
1309       '\\@ifundefined{definecolor}{\\usepackage{color}}{}',
1310       '\\definecolor{document_fontcolor}{rgb}{%s,%s,%s}' % (red, green, blue),
1311       '\\color{document_fontcolor}'])
1312
1313
1314 def revert_shadedboxcolor(document):
1315     " Reverts shaded box color to preamble code "
1316     i = find_token(document.header, "\\boxbgcolor", 0)
1317     if i == -1:
1318         return
1319     colorcode = get_value(document.header, '\\boxbgcolor', i)
1320     del document.header[i]
1321     # the color code is in the form #rrggbb
1322     red = hex2ratio(colorcode[1:3])
1323     green = hex2ratio(colorcode[3:5])
1324     blue = hex2ratio(colorcode[5:7])
1325     # write the preamble
1326     insert_to_preamble(document,
1327       ['%  Set the color of boxes with shaded background',
1328       '\\@ifundefined{definecolor}{\\usepackage{color}}{}',
1329       "\\definecolor{shadecolor}{rgb}{%s,%s,%s}" % (red, green, blue)])
1330
1331
1332 def revert_lyx_version(document):
1333     " Reverts LyX Version information from Inset Info "
1334     version = "LyX version"
1335     try:
1336         import lyx2lyx_version
1337         version = lyx2lyx_version.version
1338     except:
1339         pass
1340
1341     i = 0
1342     while 1:
1343         i = find_token(document.body, '\\begin_inset Info', i)
1344         if i == -1:
1345             return
1346         j = find_end_of_inset(document.body, i + 1)
1347         if j == -1:
1348             document.warning("Malformed LyX document: Could not find end of Info inset.")
1349             i += 1
1350             continue
1351
1352         # We expect:
1353         # \begin_inset Info
1354         # type  "lyxinfo"
1355         # arg   "version"
1356         # \end_inset
1357         typ = get_quoted_value(document.body, "type", i, j)
1358         arg = get_quoted_value(document.body, "arg", i, j)
1359         if arg != "version" or typ != "lyxinfo":
1360             i = j + 1
1361             continue
1362
1363         # We do not actually know the version of LyX used to produce the document.
1364         # But we can use our version, since we are reverting.
1365         s = [version]
1366         # Now we want to check if the line after "\end_inset" is empty. It normally
1367         # is, so we want to remove it, too.
1368         lastline = j + 1
1369         if document.body[j + 1].strip() == "":
1370             lastline = j + 2
1371         document.body[i: lastline] = s
1372         i = i + 1
1373
1374
1375 def revert_math_scale(document):
1376   " Remove math scaling and LaTeX options "
1377   del_token(document.header, '\\html_math_img_scale', 0)
1378   del_token(document.header, '\\html_latex_start', 0)
1379   del_token(document.header, '\\html_latex_end', 0)
1380
1381
1382 def revert_pagesizes(document):
1383   " Revert page sizes to default "
1384   i = find_token(document.header, '\\papersize', 0)
1385   if i != -1:
1386     size = document.header[i][11:]
1387     if size == "a0paper" or size == "a1paper" or size == "a2paper" \
1388     or size == "a6paper" or size == "b0paper" or size == "b1paper" \
1389     or size == "b2paper" or size == "b6paper" or size == "b0j" \
1390     or size == "b1j" or size == "b2j" or size == "b3j" or size == "b4j" \
1391     or size == "b5j" or size == "b6j":
1392       del document.header[i]
1393
1394
1395 def revert_DIN_C_pagesizes(document):
1396   " Revert DIN C page sizes to default "
1397   i = find_token(document.header, '\\papersize', 0)
1398   if i != -1:
1399     size = document.header[i][11:]
1400     if size == "c0paper" or size == "c1paper" or size == "c2paper" \
1401     or size == "c3paper" or size == "c4paper" or size == "c5paper" \
1402     or size == "c6paper":
1403       del document.header[i]
1404
1405
1406 def convert_html_quotes(document):
1407   " Remove quotes around html_latex_start and html_latex_end "
1408
1409   i = find_token(document.header, '\\html_latex_start', 0)
1410   if i != -1:
1411     line = document.header[i]
1412     l = re.compile(r'\\html_latex_start\s+"(.*)"')
1413     m = l.match(line)
1414     if m:
1415       document.header[i] = "\\html_latex_start " + m.group(1)
1416
1417   i = find_token(document.header, '\\html_latex_end', 0)
1418   if i != -1:
1419     line = document.header[i]
1420     l = re.compile(r'\\html_latex_end\s+"(.*)"')
1421     m = l.match(line)
1422     if m:
1423       document.header[i] = "\\html_latex_end " + m.group(1)
1424
1425
1426 def revert_html_quotes(document):
1427   " Remove quotes around html_latex_start and html_latex_end "
1428
1429   i = find_token(document.header, '\\html_latex_start', 0)
1430   if i != -1:
1431     line = document.header[i]
1432     l = re.compile(r'\\html_latex_start\s+(.*)')
1433     m = l.match(line)
1434     if not m:
1435         document.warning("Weird html_latex_start line: " + line)
1436         del document.header[i]
1437     else:
1438         document.header[i] = "\\html_latex_start \"" + m.group(1) + "\""
1439
1440   i = find_token(document.header, '\\html_latex_end', 0)
1441   if i != -1:
1442     line = document.header[i]
1443     l = re.compile(r'\\html_latex_end\s+(.*)')
1444     m = l.match(line)
1445     if not m:
1446         document.warning("Weird html_latex_end line: " + line)
1447         del document.header[i]
1448     else:
1449         document.header[i] = "\\html_latex_end \"" + m.group(1) + "\""
1450
1451
1452 def revert_output_sync(document):
1453   " Remove forward search options "
1454   del_token(document.header, '\\output_sync_macro', 0)
1455   del_token(document.header, '\\output_sync', 0)
1456
1457
1458 def revert_align_decimal(document):
1459   i = 0
1460   while True:
1461     i = find_token(document.body, "\\begin_inset Tabular", i)
1462     if i == -1:
1463       return
1464     j = find_end_of_inset(document.body, i)
1465     if j == -1:
1466       document.warning("Unable to find end of Tabular inset at line " + str(i))
1467       i += 1
1468       continue
1469     cell = find_token(document.body, "<cell", i, j)
1470     if cell == -1:
1471       document.warning("Can't find any cells in Tabular inset at line " + str(i))
1472       i = j
1473       continue
1474     k = i + 1
1475     while True:
1476       k = find_token(document.body, "<column", k, cell)
1477       if k == -1:
1478         return
1479       if document.body[k].find('alignment="decimal"') == -1:
1480         k += 1
1481         continue
1482       remove_option(document.body, k, 'decimal_point')
1483       document.body[k] = \
1484         document.body[k].replace('alignment="decimal"', 'alignment="center"')
1485       k += 1
1486
1487
1488 def convert_optarg(document):
1489   " Convert \\begin_inset OptArg to \\begin_inset Argument "
1490   i = 0
1491   while 1:
1492     i = find_token(document.body, '\\begin_inset OptArg', i)
1493     if i == -1:
1494       return
1495     document.body[i] = "\\begin_inset Argument"
1496     i += 1
1497
1498
1499 def revert_argument(document):
1500   " Convert \\begin_inset Argument to \\begin_inset OptArg "
1501   i = 0
1502   while 1:
1503     i = find_token(document.body, '\\begin_inset Argument', i)
1504     if i == -1:
1505       return
1506     document.body[i] = "\\begin_inset OptArg"
1507     i += 1
1508
1509
1510 def revert_makebox(document):
1511   " Convert \\makebox to TeX code "
1512   i = 0
1513   while 1:
1514     i = find_token(document.body, '\\begin_inset Box', i)
1515     if i == -1:
1516       break
1517     z = find_end_of_inset(document.body, i)
1518     if z == -1:
1519       document.warning("Malformed LyX document: Can't find end of box inset.")
1520       i += 1
1521       continue
1522     blay = find_token(document.body, "\\begin_layout", i, z)
1523     if blay == -1:
1524       document.warning("Malformed LyX document: Can't find layout in box.")
1525       i = z
1526       continue
1527     # by looking before the layout we make sure we're actually finding
1528     # an option, not text.
1529     j = find_token(document.body, 'use_makebox', i, blay)
1530     if j == -1:
1531         i = z
1532         continue
1533
1534     if not check_token(document.body[i], "\\begin_inset Box Frameless") \
1535       or get_value(document.body, 'use_makebox', j) != 1:
1536         del document.body[j]
1537         i = z
1538         continue
1539     bend = find_end_of_layout(document.body, blay)
1540     if bend == -1 or bend > z:
1541         document.warning("Malformed LyX document: Can't find end of layout in box.")
1542         i = z
1543         continue
1544     # determine the alignment
1545     align = get_quoted_value(document.body, 'hor_pos', i, blay, "c")
1546     # determine the width
1547     length = get_quoted_value(document.body, 'width', i, blay, "50col%")
1548     length = latex_length(length)[1]
1549     # remove the \end_layout \end_inset pair
1550     document.body[bend:z + 1] = put_cmd_in_ert("}")
1551     subst = "\\makebox[" + length + "][" \
1552       + align + "]{"
1553     document.body[i:blay + 1] = put_cmd_in_ert(subst)
1554     i += 1
1555
1556
1557 def convert_use_makebox(document):
1558   " Adds use_makebox option for boxes "
1559   i = 0
1560   while 1:
1561     i = find_token(document.body, '\\begin_inset Box', i)
1562     if i == -1:
1563       return
1564     # all of this is to make sure we actually find the use_parbox
1565     # that is an option for this box, not some text elsewhere.
1566     z = find_end_of_inset(document.body, i)
1567     if z == -1:
1568       document.warning("Can't find end of box inset!!")
1569       i += 1
1570       continue
1571     blay = find_token(document.body, "\\begin_layout", i, z)
1572     if blay == -1:
1573       document.warning("Can't find layout in box inset!!")
1574       i = z
1575       continue
1576     # so now we are looking for use_parbox before the box's layout
1577     k = find_token(document.body, 'use_parbox', i, blay)
1578     if k == -1:
1579       document.warning("Malformed LyX document: Can't find use_parbox statement in box.")
1580       i = z
1581       continue
1582     document.body.insert(k + 1, "use_makebox 0")
1583     i = blay + 1 # not z + 1 (box insets may be nested)
1584
1585
1586 def revert_IEEEtran(document):
1587   " Convert IEEEtran layouts and styles to TeX code "
1588   if document.textclass != "IEEEtran":
1589     return
1590   revert_flex_inset(document.body, "IEEE membership", "\\IEEEmembership")
1591   revert_flex_inset(document.body, "Lowercase", "\\MakeLowercase")
1592   layouts = ("Special Paper Notice", "After Title Text", "Publication ID",
1593              "Page headings", "Biography without photo")
1594   latexcmd = {"Special Paper Notice": "\\IEEEspecialpapernotice",
1595               "After Title Text":     "\\IEEEaftertitletext",
1596               "Publication ID":       "\\IEEEpubid"}
1597   obsoletedby = {"Page headings":            "MarkBoth",
1598                  "Biography without photo":  "BiographyNoPhoto"}
1599   for layout in layouts:
1600     i = 0
1601     while True:
1602         i = find_token(document.body, '\\begin_layout ' + layout, i)
1603         if i == -1:
1604           break
1605         j = find_end_of_layout(document.body, i)
1606         if j == -1:
1607           document.warning("Malformed LyX document: Can't find end of " + layout + " layout.")
1608           i += 1
1609           continue
1610         if layout in obsoletedby:
1611           document.body[i] = "\\begin_layout " + obsoletedby[layout]
1612           i = j
1613           continue
1614         content = lyx2latex(document, document.body[i:j + 1])
1615         add_to_preamble(document, [latexcmd[layout] + "{" + content + "}"])
1616         del document.body[i:j + 1]
1617         # no need to reset i
1618
1619
1620 def convert_prettyref(document):
1621         " Converts prettyref references to neutral formatted refs "
1622         re_ref = re.compile("^\s*reference\s+\"(\w+):(\S+)\"")
1623         nm_ref = re.compile("^\s*name\s+\"(\w+):(\S+)\"")
1624
1625         i = 0
1626         while True:
1627                 i = find_token(document.body, "\\begin_inset CommandInset ref", i)
1628                 if i == -1:
1629                         break
1630                 j = find_end_of_inset(document.body, i)
1631                 if j == -1:
1632                         document.warning("Malformed LyX document: No end of InsetRef!")
1633                         i += 1
1634                         continue
1635                 k = find_token(document.body, "LatexCommand prettyref", i, j)
1636                 if k != -1:
1637                         document.body[k] = "LatexCommand formatted"
1638                 i = j + 1
1639         document.header.insert(-1, "\\use_refstyle 0")
1640
1641
1642 def revert_refstyle(document):
1643         " Reverts neutral formatted refs to prettyref "
1644         re_ref = re.compile("^reference\s+\"(\w+):(\S+)\"")
1645         nm_ref = re.compile("^\s*name\s+\"(\w+):(\S+)\"")
1646
1647         i = 0
1648         while True:
1649                 i = find_token(document.body, "\\begin_inset CommandInset ref", i)
1650                 if i == -1:
1651                         break
1652                 j = find_end_of_inset(document.body, i)
1653                 if j == -1:
1654                         document.warning("Malformed LyX document: No end of InsetRef")
1655                         i += 1
1656                         continue
1657                 k = find_token(document.body, "LatexCommand formatted", i, j)
1658                 if k != -1:
1659                         document.body[k] = "LatexCommand prettyref"
1660                 i = j + 1
1661         i = find_token(document.header, "\\use_refstyle", 0)
1662         if i != -1:
1663                 document.header.pop(i)
1664
1665
1666 def revert_nameref(document):
1667   " Convert namerefs to regular references "
1668   cmds = ["Nameref", "nameref"]
1669   foundone = False
1670   rx = re.compile(r'reference "(.*)"')
1671   for cmd in cmds:
1672     i = 0
1673     oldcmd = "LatexCommand " + cmd
1674     while 1:
1675       # It seems better to look for this, as most of the reference
1676       # insets won't be ones we care about.
1677       i = find_token(document.body, oldcmd, i)
1678       if i == -1:
1679         break
1680       cmdloc = i
1681       i += 1
1682       # Make sure it is actually in an inset!
1683       # A normal line could begin with "LatexCommand nameref"!
1684       val = is_in_inset(document.body, cmdloc, \
1685           "\\begin_inset CommandInset ref")
1686       if not val:
1687           continue
1688       stins, endins = val
1689
1690       # ok, so it is in an InsetRef
1691       refline = find_token(document.body, "reference", stins, endins)
1692       if refline == -1:
1693         document.warning("Can't find reference for inset at line " + stinst + "!!")
1694         continue
1695       m = rx.match(document.body[refline])
1696       if not m:
1697         document.warning("Can't match reference line: " + document.body[ref])
1698         continue
1699       foundone = True
1700       ref = m.group(1)
1701       newcontent = put_cmd_in_ert('\\' + cmd + '{' + ref + '}')
1702       document.body[stins:endins + 1] = newcontent
1703
1704   if foundone:
1705     add_to_preamble(document, ["\usepackage{nameref}"])
1706
1707
1708 def remove_Nameref(document):
1709   " Convert Nameref commands to nameref commands "
1710   i = 0
1711   while 1:
1712     # It seems better to look for this, as most of the reference
1713     # insets won't be ones we care about.
1714     i = find_token(document.body, "LatexCommand Nameref" , i)
1715     if i == -1:
1716       break
1717     cmdloc = i
1718     i += 1
1719
1720     # Make sure it is actually in an inset!
1721     val = is_in_inset(document.body, cmdloc, \
1722         "\\begin_inset CommandInset ref")
1723     if not val:
1724       continue
1725     document.body[cmdloc] = "LatexCommand nameref"
1726
1727
1728 def revert_mathrsfs(document):
1729     " Load mathrsfs if \mathrsfs us use in the document "
1730     i = 0
1731     for line in document.body:
1732       if line.find("\\mathscr{") != -1:
1733         add_to_preamble(document, ["\\usepackage{mathrsfs}"])
1734         return
1735
1736
1737 def convert_flexnames(document):
1738     "Convert \\begin_inset Flex Custom:Style to \\begin_inset Flex Style and similarly for CharStyle and Element."
1739
1740     i = 0
1741     rx = re.compile(r'^\\begin_inset Flex (?:Custom|CharStyle|Element):(.+)$')
1742     while True:
1743       i = find_token(document.body, "\\begin_inset Flex", i)
1744       if i == -1:
1745         return
1746       m = rx.match(document.body[i])
1747       if m:
1748         document.body[i] = "\\begin_inset Flex " + m.group(1)
1749       i += 1
1750
1751
1752 flex_insets = {
1753   "Alert" : "CharStyle:Alert",
1754   "Code" : "CharStyle:Code",
1755   "Concepts" : "CharStyle:Concepts",
1756   "E-Mail" : "CharStyle:E-Mail",
1757   "Emph" : "CharStyle:Emph",
1758   "Expression" : "CharStyle:Expression",
1759   "Initial" : "CharStyle:Initial",
1760   "Institute" : "CharStyle:Institute",
1761   "Meaning" : "CharStyle:Meaning",
1762   "Noun" : "CharStyle:Noun",
1763   "Strong" : "CharStyle:Strong",
1764   "Structure" : "CharStyle:Structure",
1765   "ArticleMode" : "Custom:ArticleMode",
1766   "Endnote" : "Custom:Endnote",
1767   "Glosse" : "Custom:Glosse",
1768   "PresentationMode" : "Custom:PresentationMode",
1769   "Tri-Glosse" : "Custom:Tri-Glosse"
1770 }
1771
1772 flex_elements = {
1773   "Abbrev" : "Element:Abbrev",
1774   "CCC-Code" : "Element:CCC-Code",
1775   "Citation-number" : "Element:Citation-number",
1776   "City" : "Element:City",
1777   "Code" : "Element:Code",
1778   "CODEN" : "Element:CODEN",
1779   "Country" : "Element:Country",
1780   "Day" : "Element:Day",
1781   "Directory" : "Element:Directory",
1782   "Dscr" : "Element:Dscr",
1783   "Email" : "Element:Email",
1784   "Emph" : "Element:Emph",
1785   "Filename" : "Element:Filename",
1786   "Firstname" : "Element:Firstname",
1787   "Fname" : "Element:Fname",
1788   "GuiButton" : "Element:GuiButton",
1789   "GuiMenu" : "Element:GuiMenu",
1790   "GuiMenuItem" : "Element:GuiMenuItem",
1791   "ISSN" : "Element:ISSN",
1792   "Issue-day" : "Element:Issue-day",
1793   "Issue-months" : "Element:Issue-months",
1794   "Issue-number" : "Element:Issue-number",
1795   "KeyCap" : "Element:KeyCap",
1796   "KeyCombo" : "Element:KeyCombo",
1797   "Keyword" : "Element:Keyword",
1798   "Literal" : "Element:Literal",
1799   "MenuChoice" : "Element:MenuChoice",
1800   "Month" : "Element:Month",
1801   "Orgdiv" : "Element:Orgdiv",
1802   "Orgname" : "Element:Orgname",
1803   "Postcode" : "Element:Postcode",
1804   "SS-Code" : "Element:SS-Code",
1805   "SS-Title" : "Element:SS-Title",
1806   "State" : "Element:State",
1807   "Street" : "Element:Street",
1808   "Surname" : "Element:Surname",
1809   "Volume" : "Element:Volume",
1810   "Year" : "Element:Year"
1811 }
1812
1813
1814 def revert_flexnames(document):
1815   if document.backend == "latex":
1816     flexlist = flex_insets
1817   else:
1818     flexlist = flex_elements
1819
1820   rx = re.compile(r'^\\begin_inset Flex\s+(.+)$')
1821   i = 0
1822   while True:
1823     i = find_token(document.body, "\\begin_inset Flex", i)
1824     if i == -1:
1825       return
1826     m = rx.match(document.body[i])
1827     if not m:
1828       document.warning("Illegal flex inset: " + document.body[i])
1829       i += 1
1830       continue
1831     style = m.group(1)
1832     if style in flexlist:
1833       document.body[i] = "\\begin_inset Flex " + flexlist[style]
1834     i += 1
1835
1836
1837 def convert_mathdots(document):
1838     " Load mathdots automatically "
1839     i = find_token(document.header, "\\use_mhchem" , 0)
1840     if i == -1:
1841         i = find_token(document.header, "\\use_esint" , 0)
1842     if i == -1:
1843         document.warning("Malformed LyX document: Can't find \\use_mhchem.")
1844         return;
1845     j = find_token(document.preamble, "\\usepackage{mathdots}", 0)
1846     if j == -1:
1847         document.header.insert(i + 1, "\\use_mathdots 0")
1848     else:
1849         document.header.insert(i + 1, "\\use_mathdots 2")
1850         del document.preamble[j]
1851
1852
1853 def revert_mathdots(document):
1854     " Load mathdots if used in the document "
1855
1856     mathdots = find_token(document.header, "\\use_mathdots" , 0)
1857     if mathdots == -1:
1858       document.warning("No \\use_mathdots line. Assuming auto.")
1859     else:
1860       val = get_value(document.header, "\\use_mathdots", mathdots)
1861       del document.header[mathdots]
1862       try:
1863         usedots = int(val)
1864       except:
1865         document.warning("Invalid \\use_mathdots value: " + val + ". Assuming auto.")
1866         # probably usedots has not been changed, but be safe.
1867         usedots = 1
1868
1869       if usedots == 0:
1870         # do not load case
1871         return
1872       if usedots == 2:
1873         # force load case
1874         add_to_preamble(document, ["\\usepackage{mathdots}"])
1875         return
1876
1877     # so we are in the auto case. we want to load mathdots if \iddots is used.
1878     i = 0
1879     while True:
1880       i = find_token(document.body, '\\begin_inset Formula', i)
1881       if i == -1:
1882         return
1883       j = find_end_of_inset(document.body, i)
1884       if j == -1:
1885         document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(i))
1886         i += 1
1887         continue
1888       code = "\n".join(document.body[i:j])
1889       if code.find("\\iddots") != -1:
1890         add_to_preamble(document, ["\\@ifundefined{iddots}{\\usepackage{mathdots}}"])
1891         return
1892       i = j
1893
1894
1895 def convert_rule(document):
1896     " Convert \\lyxline to CommandInset line. "
1897     i = 0
1898
1899     inset = ['\\begin_inset CommandInset line',
1900       'LatexCommand rule',
1901       'offset "0.5ex"',
1902       'width "100line%"',
1903       'height "1pt"', '',
1904       '\\end_inset', '', '']
1905
1906     # if paragraphs are indented, we may have to unindent to get the
1907     # line to be full-width.
1908     indent = get_value(document.header, "\\paragraph_separation", 0)
1909     have_indent = (indent == "indent")
1910
1911     while True:
1912       i = find_token(document.body, "\\lyxline" , i)
1913       if i == -1:
1914         return
1915
1916       # we need to find out if this line follows other content
1917       # in its paragraph. find its layout....
1918       lastlay = find_token_backwards(document.body, "\\begin_layout", i)
1919       if lastlay == -1:
1920         document.warning("Can't find layout for line at " + str(i))
1921         # do the best we can.
1922         document.body[i:i+1] = inset
1923         i += len(inset)
1924         continue
1925
1926       # ...and look for other content before it.
1927       lineisfirst = True
1928       for line in document.body[lastlay + 1:i]:
1929         # is it empty or a paragraph option?
1930         if not line or line[0] == '\\':
1931           continue
1932         lineisfirst = False
1933         break
1934
1935       if lineisfirst:
1936         document.body[i:i+1] = inset
1937         if indent:
1938           # we need to unindent, lest the line be too long
1939           document.body.insert(lastlay + 1, "\\noindent")
1940         i += len(inset)
1941       else:
1942         # so our line is in the middle of a paragraph
1943         # we need to add a new line, lest this line follow the
1944         # other content on that line and run off the side of the page
1945         document.body[i:i+1] = inset
1946         document.body[i:i] = ["\\begin_inset Newline newline", "\\end_inset", ""]
1947       i += len(inset)
1948
1949
1950 def revert_rule(document):
1951     " Revert line insets to Tex code "
1952     i = 0
1953     while 1:
1954       i = find_token(document.body, "\\begin_inset CommandInset line" , i)
1955       if i == -1:
1956         return
1957       # find end of inset
1958       j = find_token(document.body, "\\end_inset" , i)
1959       if j == -1:
1960         document.warning("Malformed LyX document: Can't find end of line inset.")
1961         return
1962       # determine the optional offset
1963       offset = get_quoted_value(document.body, 'offset', i, j)
1964       if offset:
1965         offset = '[' + offset + ']'
1966       # determine the width
1967       width = get_quoted_value(document.body, 'width', i, j, "100col%")
1968       width = latex_length(width)[1]
1969       # determine the height
1970       height = get_quoted_value(document.body, 'height', i, j, "1pt")
1971       height = latex_length(height)[1]
1972       # output the \rule command
1973       subst = "\\rule[" + offset + "]{" + width + "}{" + height + "}"
1974       document.body[i:j + 1] = put_cmd_in_ert(subst)
1975       i += len(subst) - (j - i)
1976
1977
1978 def revert_diagram(document):
1979   " Add the feyn package if \\Diagram is used in math "
1980   i = 0
1981   while True:
1982     i = find_token(document.body, '\\begin_inset Formula', i)
1983     if i == -1:
1984       return
1985     j = find_end_of_inset(document.body, i)
1986     if j == -1:
1987         document.warning("Malformed LyX document: Can't find end of Formula inset.")
1988         return
1989     lines = "\n".join(document.body[i:j])
1990     if lines.find("\\Diagram") == -1:
1991       i = j
1992       continue
1993     add_to_preamble(document, ["\\usepackage{feyn}"])
1994     # only need to do it once!
1995     return
1996
1997 chapters = ("amsbook", "book", "docbook-book", "elsart", "extbook", "extreport",
1998     "jbook", "jreport", "jsbook", "literate-book", "literate-report", "memoir",
1999     "mwbk", "mwrep", "recipebook", "report", "scrbook", "scrreprt", "svmono",
2000     "svmult", "tbook", "treport", "tufte-book")
2001
2002 def convert_bibtex_clearpage(document):
2003   " insert a clear(double)page bibliographystyle if bibtotoc option is used "
2004
2005   if document.textclass not in chapters:
2006     return
2007
2008   i = find_token(document.header, '\\papersides', 0)
2009   sides = 0
2010   if i == -1:
2011     document.warning("Malformed LyX document: Can't find papersides definition.")
2012     document.warning("Assuming single sided.")
2013     sides = 1
2014   else:
2015     val = get_value(document.header, "\\papersides", i)
2016     try:
2017       sides = int(val)
2018     except:
2019       pass
2020     if sides != 1 and sides != 2:
2021       document.warning("Invalid papersides value: " + val)
2022       document.warning("Assuming single sided.")
2023       sides = 1
2024
2025   j = 0
2026   while True:
2027     j = find_token(document.body, "\\begin_inset CommandInset bibtex", j)
2028     if j == -1:
2029       return
2030
2031     k = find_end_of_inset(document.body, j)
2032     if k == -1:
2033       document.warning("Can't find end of Bibliography inset at line " + str(j))
2034       j += 1
2035       continue
2036
2037     # only act if there is the option "bibtotoc"
2038     val = get_value(document.body, 'options', j, k)
2039     if not val:
2040       document.warning("Can't find options for bibliography inset at line " + str(j))
2041       j = k
2042       continue
2043
2044     if val.find("bibtotoc") == -1:
2045       j = k
2046       continue
2047
2048     # so we want to insert a new page right before the paragraph that
2049     # this bibliography thing is in.
2050     lay = find_token_backwards(document.body, "\\begin_layout", j)
2051     if lay == -1:
2052       document.warning("Can't find layout containing bibliography inset at line " + str(j))
2053       j = k
2054       continue
2055
2056     if sides == 1:
2057       cmd = "clearpage"
2058     else:
2059       cmd = "cleardoublepage"
2060     subst = ['\\begin_layout Standard',
2061         '\\begin_inset Newpage ' + cmd,
2062         '\\end_inset', '', '',
2063         '\\end_layout', '']
2064     document.body[lay:lay] = subst
2065     j = k + len(subst)
2066
2067
2068 def check_passthru(document):
2069   tc = document.textclass
2070   ok = (tc == "literate-article" or tc == "literate-book" or tc == "literate-report")
2071   if not ok:
2072     mods = document.get_module_list()
2073     for mod in mods:
2074       if mod == "sweave" or mod == "noweb":
2075         ok = True
2076         break
2077   return ok
2078
2079
2080 def convert_passthru(document):
2081     " http://www.mail-archive.com/lyx-devel@lists.lyx.org/msg161298.html "
2082     if not check_passthru:
2083       return
2084
2085     rx = re.compile("\\\\begin_layout \s*(\w+)")
2086     beg = 0
2087     for lay in ["Chunk", "Scrap"]:
2088       while True:
2089         beg = find_token(document.body, "\\begin_layout " + lay, beg)
2090         if beg == -1:
2091           break
2092         end = find_end_of_layout(document.body, beg)
2093         if end == -1:
2094           document.warning("Can't find end of layout at line " + str(beg))
2095           beg += 1
2096           continue
2097
2098         # we are now going to replace newline insets within this layout
2099         # by new instances of this layout. so we have repeated layouts
2100         # instead of newlines.
2101
2102         # if the paragraph has any customization, however, we do not want to
2103         # do the replacement.
2104         if document.body[beg + 1].startswith("\\"):
2105           beg = end + 1
2106           continue
2107
2108         ns = beg
2109         while True:
2110           ns = find_token(document.body, "\\begin_inset Newline newline", ns, end)
2111           if ns == -1:
2112             break
2113           ne = find_end_of_inset(document.body, ns)
2114           if ne == -1 or ne > end:
2115             document.warning("Can't find end of inset at line " + str(nb))
2116             ns += 1
2117             continue
2118           if document.body[ne + 1] == "":
2119             ne += 1
2120           subst = ["\\end_layout", "", "\\begin_layout " + lay]
2121           document.body[ns:ne + 1] = subst
2122           # now we need to adjust end, in particular, but might as well
2123           # do ns properly, too
2124           newlines = (ne - ns) - len(subst)
2125           ns += newlines + 2
2126           end += newlines + 2
2127
2128         # ok, we now want to find out if the next layout is the
2129         # same as this one. if so, we will insert an extra copy of it
2130         didit = False
2131         next = find_token(document.body, "\\begin_layout", end)
2132         if next != -1:
2133           m = rx.match(document.body[next])
2134           if m:
2135             nextlay = m.group(1)
2136             if nextlay == lay:
2137               subst = ["\\begin_layout " + lay, "", "\\end_layout", ""]
2138               document.body[next:next] = subst
2139               didit = True
2140         beg = end + 1
2141         if didit:
2142           beg += 4 # for the extra layout
2143
2144
2145 def revert_passthru(document):
2146     " http://www.mail-archive.com/lyx-devel@lists.lyx.org/msg161298.html "
2147     if not check_passthru:
2148       return
2149     rx = re.compile("\\\\begin_layout \s*(\w+)")
2150     beg = 0
2151     for lay in ["Chunk", "Scrap"]:
2152       while True:
2153         beg = find_token(document.body, "\\begin_layout " + lay, beg)
2154         if beg == -1:
2155           break
2156         end = find_end_of_layout(document.body, beg)
2157         if end == -1:
2158           document.warning("Can't find end of layout at line " + str(beg))
2159           beg += 1
2160           continue
2161
2162         # we now want to find out if the next layout is the
2163         # same as this one. but we will need to do this over and
2164         # over again.
2165         while True:
2166           next = find_token(document.body, "\\begin_layout", end)
2167           if next == -1:
2168             break
2169           m = rx.match(document.body[next])
2170           if not m:
2171             break
2172           nextlay = m.group(1)
2173           if nextlay != lay:
2174             break
2175           # so it is the same layout again. we now want to know if it is empty.
2176           # but first let's check and make sure there is no content between the
2177           # two layouts. i'm not sure if that can happen or not.
2178           for l in range(end + 1, next):
2179             document.warning("c'" + document.body[l] + "'")
2180             if document.body[l] != "":
2181               document.warning("Found content between adjacent " + lay + " layouts!")
2182               break
2183           nextend = find_end_of_layout(document.body, next)
2184           if nextend == -1:
2185             document.warning("Can't find end of layout at line " + str(next))
2186             break
2187           empty = True
2188           for l in range(next + 1, nextend):
2189             document.warning("e'" + document.body[l] + "'")
2190             if document.body[l] != "":
2191               empty = False
2192               break
2193           if empty:
2194             # empty layouts just get removed
2195             # should we check if it's before yet another such layout?
2196             del document.body[next : nextend + 1]
2197             # and we do not want to check again. we know the next layout
2198             # should be another Chunk and should be left as is.
2199             break
2200           else:
2201             # if it's not empty, then we want to insert a newline in place
2202             # of the layout switch
2203             subst = ["\\begin_inset Newline newline", "\\end_inset", ""]
2204             document.body[end : next + 1] = subst
2205             # and now we have to find the end of the new, larger layout
2206             newend = find_end_of_layout(document.body, beg)
2207             if newend == -1:
2208               document.warning("Can't find end of new layout at line " + str(beg))
2209               break
2210             end = newend
2211         beg = end + 1
2212
2213
2214 def revert_multirowOffset(document):
2215     " Revert multirow cells with offset in tables to TeX-code"
2216     # this routine is the same as the revert_multirow routine except that
2217     # it checks additionally for the offset
2218
2219     # first, let's find out if we need to do anything
2220     i = find_token(document.body, '<cell multirow="3" mroffset=', 0)
2221     if i == -1:
2222       return
2223
2224     add_to_preamble(document, ["\\usepackage{multirow}"])
2225
2226     rgx = re.compile(r'mroffset="[^"]+?"')
2227     begin_table = 0
2228
2229     while True:
2230         # find begin/end of table
2231         begin_table = find_token(document.body, '<lyxtabular version=', begin_table)
2232         if begin_table == -1:
2233             break
2234         end_table = find_end_of(document.body, begin_table, '<lyxtabular', '</lyxtabular>')
2235         if end_table == -1:
2236             document.warning("Malformed LyX document: Could not find end of table.")
2237             begin_table += 1
2238             continue
2239         # does this table have multirow?
2240         i = find_token(document.body, '<cell multirow="3"', begin_table, end_table)
2241         if i == -1:
2242             begin_table = end_table
2243             continue
2244
2245         # store the number of rows and columns
2246         numrows = get_option_value(document.body[begin_table], "rows")
2247         numcols = get_option_value(document.body[begin_table], "columns")
2248         try:
2249           numrows = int(numrows)
2250           numcols = int(numcols)
2251         except:
2252           document.warning("Unable to determine rows and columns!")
2253           begin_table = end_table
2254           continue
2255
2256         mrstarts = []
2257         multirows = []
2258         # collect info on rows and columns of this table.
2259         begin_row = begin_table
2260         for row in range(numrows):
2261             begin_row = find_token(document.body, '<row>', begin_row, end_table)
2262             if begin_row == -1:
2263               document.warning("Can't find row " + str(row + 1))
2264               break
2265             end_row = find_end_of(document.body, begin_row, '<row>', '</row>')
2266             if end_row == -1:
2267               document.warning("Can't find end of row " + str(row + 1))
2268               break
2269             begin_cell = begin_row
2270             multirows.append([])
2271             for column in range(numcols):
2272                 begin_cell = find_token(document.body, '<cell ', begin_cell, end_row)
2273                 if begin_cell == -1:
2274                   document.warning("Can't find column " + str(column + 1) + \
2275                     "in row " + str(row + 1))
2276                   break
2277                 # NOTE
2278                 # this will fail if someone puts "</cell>" in a cell, but
2279                 # that seems fairly unlikely.
2280                 end_cell = find_end_of(document.body, begin_cell, '<cell', '</cell>')
2281                 if end_cell == -1:
2282                   document.warning("Can't find end of column " + str(column + 1) + \
2283                     "in row " + str(row + 1))
2284                   break
2285                 multirows[row].append([begin_cell, end_cell, 0])
2286                 if document.body[begin_cell].find('multirow="3" mroffset=') != -1:
2287                   multirows[row][column][2] = 3 # begin multirow
2288                   mrstarts.append([row, column])
2289                 elif document.body[begin_cell].find('multirow="4"') != -1:
2290                   multirows[row][column][2] = 4 # in multirow
2291                 begin_cell = end_cell
2292             begin_row = end_row
2293         # end of table info collection
2294
2295         # work from the back to avoid messing up numbering
2296         mrstarts.reverse()
2297         for m in mrstarts:
2298             row = m[0]
2299             col = m[1]
2300             # get column width
2301             col_width = get_option_value(document.body[begin_table + 2 + col], "width")
2302             # "0pt" means that no width is specified
2303             if not col_width or col_width == "0pt":
2304               col_width = "*"
2305             # determine the number of cells that are part of the multirow
2306             nummrs = 1
2307             for r in range(row + 1, numrows):
2308                 if multirows[r][col][2] != 4:
2309                   break
2310                 nummrs += 1
2311                 # take the opportunity to revert this line
2312                 lineno = multirows[r][col][0]
2313                 document.body[lineno] = document.body[lineno].\
2314                   replace(' multirow="4" ', ' ').\
2315                   replace('valignment="middle"', 'valignment="top"').\
2316                   replace(' topline="true" ', ' ')
2317                 # remove bottom line of previous multirow-part cell
2318                 lineno = multirows[r-1][col][0]
2319                 document.body[lineno] = document.body[lineno].replace(' bottomline="true" ', ' ')
2320             # revert beginning cell
2321             bcell = multirows[row][col][0]
2322             ecell = multirows[row][col][1]
2323             offset = get_option_value(document.body[bcell], "mroffset")
2324             document.body[bcell] = document.body[bcell].\
2325               replace(' multirow="3" ', ' ').\
2326               replace('valignment="middle"', 'valignment="top"')
2327             # remove mroffset option
2328             document.body[bcell] = rgx.sub('', document.body[bcell])
2329
2330             blay = find_token(document.body, "\\begin_layout", bcell, ecell)
2331             if blay == -1:
2332               document.warning("Can't find layout for cell!")
2333               continue
2334             bend = find_end_of_layout(document.body, blay)
2335             if bend == -1:
2336               document.warning("Can't find end of layout for cell!")
2337               continue
2338             # do the later one first, so as not to mess up the numbering
2339             # we are wrapping the whole cell in this ert
2340             # so before the end of the layout...
2341             document.body[bend:bend] = put_cmd_in_ert("}")
2342             # ...and after the beginning
2343             document.body[blay + 1:blay + 1] = \
2344               put_cmd_in_ert("\\multirow{" + str(nummrs) + "}{" + col_width + "}[" \
2345                   + offset + "]{")
2346
2347         # on to the next table
2348         begin_table = end_table
2349
2350
2351 def revert_script(document):
2352     " Convert subscript/superscript inset to TeX code "
2353     i = 0
2354     foundsubscript = False
2355     while 1:
2356         i = find_token(document.body, '\\begin_inset script', i)
2357         if i == -1:
2358             break
2359         z = find_end_of_inset(document.body, i)
2360         if z == -1:
2361             document.warning("Malformed LyX document: Can't find end of script inset.")
2362             i += 1
2363             continue
2364         blay = find_token(document.body, "\\begin_layout", i, z)
2365         if blay == -1:
2366             document.warning("Malformed LyX document: Can't find layout in script inset.")
2367             i = z
2368             continue
2369
2370         if check_token(document.body[i], "\\begin_inset script subscript"):
2371             subst = '\\textsubscript{'
2372             foundsubscript = True
2373         elif check_token(document.body[i], "\\begin_inset script superscript"):
2374             subst = '\\textsuperscript{'
2375         else:
2376             document.warning("Malformed LyX document: Unknown type of script inset.")
2377             i = z
2378             continue
2379         bend = find_end_of_layout(document.body, blay)
2380         if bend == -1 or bend > z:
2381             document.warning("Malformed LyX document: Can't find end of layout in script inset.")
2382             i = z
2383             continue
2384         # remove the \end_layout \end_inset pair
2385         document.body[bend:z + 1] = put_cmd_in_ert("}")
2386         document.body[i:blay + 1] = put_cmd_in_ert(subst)
2387         i += 1
2388     # these classes provide a \textsubscript command:
2389     # FIXME: Would be nice if we could use the information of the .layout file here
2390     classes = ["memoir", "scrartcl", "scrbook", "scrlttr2", "scrreprt"]
2391     if foundsubscript and find_token_exact(classes, document.textclass, 0) == -1:
2392         add_to_preamble(document, ['\\usepackage{subscript}'])
2393
2394
2395 def convert_use_xetex(document):
2396     " convert \\use_xetex to \\use_non_tex_fonts "
2397     i = 0
2398     i = find_token(document.header, "\\use_xetex", 0)
2399     if i == -1:
2400         return
2401
2402     val = get_value(document.header, "\\use_xetex", 0)
2403     document.header[i] = "\\use_non_tex_fonts " + val
2404
2405
2406 def revert_use_xetex(document):
2407     " revert \\use_non_tex_fonts to \\use_xetex "
2408     i = 0
2409     i = find_token(document.header, "\\use_non_tex_fonts", 0)
2410     if i == -1:
2411         document.warning("Malformed document. No \\use_non_tex_fonts param!")
2412         return
2413
2414     val = get_value(document.header, "\\use_non_tex_fonts", 0)
2415     document.header[i] = "\\use_xetex " + val
2416
2417
2418 def revert_labeling(document):
2419     koma = ("scrartcl", "scrarticle-beamer", "scrbook", "scrlettr",
2420         "scrlttr2", "scrreprt")
2421     if document.textclass in koma:
2422         return
2423     i = 0
2424     while True:
2425         i = find_token_exact(document.body, "\\begin_layout Labeling", i)
2426         if i == -1:
2427             return
2428         document.body[i] = "\\begin_layout List"
2429
2430
2431 def revert_langpack(document):
2432     " revert \\language_package parameter "
2433     i = 0
2434     i = find_token(document.header, "\\language_package", 0)
2435     if i == -1:
2436         document.warning("Malformed document. No \\language_package param!")
2437         return
2438
2439     del document.header[i]
2440
2441
2442 def convert_langpack(document):
2443     " Add \\language_package parameter "
2444     i = find_token(document.header, "\language" , 0)
2445     if i == -1:
2446         document.warning("Malformed document. No \\language defined!")
2447         return
2448
2449     document.header.insert(i + 1, "\\language_package default")
2450
2451
2452 def revert_tabularwidth(document):
2453   i = 0
2454   while True:
2455     i = find_token(document.body, "\\begin_inset Tabular", i)
2456     if i == -1:
2457       return
2458     j = find_end_of_inset(document.body, i)
2459     if j == -1:
2460       document.warning("Unable to find end of Tabular inset at line " + str(i))
2461       i += 1
2462       continue
2463     i += 1
2464     features = find_token(document.body, "<features", i, j)
2465     if features == -1:
2466       document.warning("Can't find any features in Tabular inset at line " + str(i))
2467       i = j
2468       continue
2469     if document.body[features].find('alignment="tabularwidth"') != -1:
2470       remove_option(document.body, features, 'tabularwidth')
2471
2472 def revert_html_css_as_file(document):
2473   if not del_token(document.header, '\\html_css_as_file', 0):
2474     document.warning("Malformed LyX document: Missing \\html_css_as_file.")
2475
2476
2477 ##
2478 # Conversion hub
2479 #
2480
2481 supported_versions = ["2.0.0","2.0"]
2482 convert = [[346, []],
2483            [347, []],
2484            [348, []],
2485            [349, []],
2486            [350, []],
2487            [351, []],
2488            [352, [convert_splitindex]],
2489            [353, []],
2490            [354, []],
2491            [355, []],
2492            [356, []],
2493            [357, []],
2494            [358, []],
2495            [359, [convert_nomencl_width]],
2496            [360, []],
2497            [361, []],
2498            [362, []],
2499            [363, []],
2500            [364, []],
2501            [365, []],
2502            [366, []],
2503            [367, []],
2504            [368, []],
2505            [369, [convert_author_id]],
2506            [370, []],
2507            [371, [convert_mhchem]],
2508            [372, []],
2509            [373, [merge_gbrief]],
2510            [374, []],
2511            [375, []],
2512            [376, []],
2513            [377, []],
2514            [378, []],
2515            [379, [convert_math_output]],
2516            [380, []],
2517            [381, []],
2518            [382, []],
2519            [383, []],
2520            [384, []],
2521            [385, []],
2522            [386, []],
2523            [387, []],
2524            [388, []],
2525            [389, [convert_html_quotes]],
2526            [390, []],
2527            [391, []],
2528            [392, []],
2529            [393, [convert_optarg]],
2530            [394, [convert_use_makebox]],
2531            [395, []],
2532            [396, []],
2533            [397, [remove_Nameref]],
2534            [398, []],
2535            [399, [convert_mathdots]],
2536            [400, [convert_rule]],
2537            [401, []],
2538            [402, [convert_bibtex_clearpage]],
2539            [403, [convert_flexnames]],
2540            [404, [convert_prettyref]],
2541            [405, []],
2542            [406, [convert_passthru]],
2543            [407, []],
2544            [408, []],
2545            [409, [convert_use_xetex]],
2546            [410, []],
2547            [411, [convert_langpack]],
2548            [412, []],
2549            [413, []]
2550 ]
2551
2552 revert =  [[412, [revert_html_css_as_file]],
2553            [411, [revert_tabularwidth]],
2554            [410, [revert_langpack]],
2555            [409, [revert_labeling]],
2556            [408, [revert_use_xetex]],
2557            [407, [revert_script]],
2558            [406, [revert_multirowOffset]],
2559            [405, [revert_passthru]],
2560            [404, []],
2561            [403, [revert_refstyle]],
2562            [402, [revert_flexnames]],
2563            [401, []],
2564            [400, [revert_diagram]],
2565            [399, [revert_rule]],
2566            [398, [revert_mathdots]],
2567            [397, [revert_mathrsfs]],
2568            [396, []],
2569            [395, [revert_nameref]],
2570            [394, [revert_DIN_C_pagesizes]],
2571            [393, [revert_makebox]],
2572            [392, [revert_argument]],
2573            [391, []],
2574            [390, [revert_align_decimal, revert_IEEEtran]],
2575            [389, [revert_output_sync]],
2576            [388, [revert_html_quotes]],
2577            [387, [revert_pagesizes]],
2578            [386, [revert_math_scale]],
2579            [385, [revert_lyx_version]],
2580            [384, [revert_shadedboxcolor]],
2581            [383, [revert_fontcolor]],
2582            [382, [revert_turkmen]],
2583            [381, [revert_notefontcolor]],
2584            [380, [revert_equalspacing_xymatrix]],
2585            [379, [revert_inset_preview]],
2586            [378, [revert_math_output]],
2587            [377, []],
2588            [376, [revert_multirow]],
2589            [375, [revert_includeall]],
2590            [374, [revert_includeonly]],
2591            [373, [revert_html_options]],
2592            [372, [revert_gbrief]],
2593            [371, [revert_fontenc]],
2594            [370, [revert_mhchem]],
2595            [369, [revert_suppress_date]],
2596            [368, [revert_author_id]],
2597            [367, [revert_hspace_glue_lengths]],
2598            [366, [revert_percent_vspace_lengths, revert_percent_hspace_lengths]],
2599            [365, [revert_percent_skip_lengths]],
2600            [364, [revert_paragraph_indentation]],
2601            [363, [revert_branch_filename]],
2602            [362, [revert_longtable_align]],
2603            [361, [revert_applemac]],
2604            [360, []],
2605            [359, [revert_nomencl_cwidth]],
2606            [358, [revert_nomencl_width]],
2607            [357, [revert_custom_processors]],
2608            [356, [revert_ulinelatex]],
2609            [355, []],
2610            [354, [revert_strikeout]],
2611            [353, [revert_printindexall]],
2612            [352, [revert_subindex]],
2613            [351, [revert_splitindex]],
2614            [350, [revert_backgroundcolor]],
2615            [349, [revert_outputformat]],
2616            [348, [revert_xetex]],
2617            [347, [revert_phantom, revert_hphantom, revert_vphantom]],
2618            [346, [revert_tabularvalign]],
2619            [345, [revert_swiss]]
2620           ]
2621
2622
2623 if __name__ == "__main__":
2624     pass