lib/lyx2lyx/lyx_2_0.py

   1 # -*- coding: utf-8 -*-
   2 # This file is part of lyx2lyx
   3 # Copyright (C) 2011 The LyX team
   4 #
   5 # This program is free software; you can redistribute it and/or
   6 # modify it under the terms of the GNU General Public License
   7 # as published by the Free Software Foundation; either version 2
   8 # of the License, or (at your option) any later version.
   9 #
  10 # This program is distributed in the hope that it will be useful,
  11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 # GNU General Public License for more details.
  14 #
  15 # You should have received a copy of the GNU General Public License
  16 # along with this program; if not, write to the Free Software
  17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
  18
  19 """ Convert files to the file format generated by lyx 2.0"""
  20
  21 import re, string
  22 import unicodedata
  23 import sys, os
  24
  25 from parser_tools import del_complete_lines, \
  26   find_token, find_end_of, find_tokens, \
  27   find_token_exact, find_end_of_inset, find_end_of_layout, \
  28   find_token_backwards, is_in_inset, get_value, get_quoted_value, \
  29   del_token, check_token, get_option_value
  30
  31 from lyx2lyx_tools import add_to_preamble, insert_to_preamble, \
  32   put_cmd_in_ert, lyx2latex, latex_length, revert_flex_inset, \
  33   revert_font_attrs, hex2ratio, str2bool, revert_language
  34
  35 ####################################################################
  36 # Private helper functions
  37
  38 def remove_option(lines, m, option):
  39     ''' removes option from line m. returns whether we did anything '''
  40     l = lines[m].find(option)
  41     if l == -1:
  42         return False
  43     val = lines[m][l:].split('"')[1]
  44     lines[m] = lines[m][:l - 1] + lines[m][l+len(option + '="' + val + '"'):]
  45     return True
  46
  47
  48 ###############################################################################
  49 ###
  50 ### Conversion and reversion routines
  51 ###
  52 ###############################################################################
  53
  54 def revert_swiss(document):
  55     " Set language german-ch to ngerman "
  56     i = 0
  57     if document.language == "german-ch":
  58         document.language = "ngerman"
  59         i = find_token(document.header, "\\language", 0)
  60         if i != -1:
  61             document.header[i] = "\\language ngerman"
  62     j = 0
  63     while True:
  64         j = find_token(document.body, "\\lang german-ch", j)
  65         if j == -1:
  66             return
  67         document.body[j] = document.body[j].replace("\\lang german-ch", "\\lang ngerman")
  68         j = j + 1
  69
  70
  71 def revert_tabularvalign(document):
  72    " Revert the tabular valign option "
  73    i = 0
  74    while True:
  75       i = find_token(document.body, "\\begin_inset Tabular", i)
  76       if i == -1:
  77           return
  78       end = find_end_of_inset(document.body, i)
  79       if end == -1:
  80           document.warning("Can't find end of inset at line " + str(i))
  81           i += 1
  82           continue
  83       fline = find_token(document.body, "<features", i, end)
  84       if fline == -1:
  85           document.warning("Can't find features for inset at line " + str(i))
  86           i += 1
  87           continue
  88       p = document.body[fline].find("islongtable")
  89       if p != -1:
  90           q = document.body[fline].find("tabularvalignment")
  91           if q != -1:
  92               document.body[fline] = re.sub(r' tabularvalignment=\"[a-z]+\"', "", document.body[fline])
  93           i += 1
  94           continue
  95
  96        # no longtable
  97       tabularvalignment = 'c'
  98       # which valignment is specified?
  99       m = document.body[fline].find('tabularvalignment="top"')
 100       if m != -1:
 101           tabularvalignment = 't'
 102       m = document.body[fline].find('tabularvalignment="bottom"')
 103       if m != -1:
 104           tabularvalignment = 'b'
 105       # delete tabularvalignment
 106       q = document.body[fline].find("tabularvalignment")
 107       if q != -1:
 108           document.body[fline] = re.sub(r' tabularvalignment=\"[a-z]+\"', "", document.body[fline])
 109
 110       # don't add a box when centered
 111       if tabularvalignment == 'c':
 112           i = end
 113           continue
 114       subst = ['\\end_inset', '\\end_layout']
 115       document.body[end:end] = subst # just inserts those lines
 116       subst = ['\\begin_inset Box Frameless',
 117           'position "' + tabularvalignment +'"',
 118           'hor_pos "c"',
 119           'has_inner_box 1',
 120           'inner_pos "c"',
 121           'use_parbox 0',
 122           # we don't know the width, assume 50%
 123           'width "50col%"',
 124           'special "none"',
 125           'height "1in"',
 126           'height_special "totalheight"',
 127           'status open',
 128           '',
 129           '\\begin_layout Plain Layout']
 130       document.body[i:i] = subst # this just inserts the array at i
 131       # since there could be a tabular inside a tabular, we cannot
 132       # jump to end
 133       i += len(subst)
 134
 135
 136 def revert_phantom_types(document, ptype, cmd):
 137     " Reverts phantom to ERT "
 138     i = 0
 139     while True:
 140       i = find_token(document.body, "\\begin_inset Phantom " + ptype, i)
 141       if i == -1:
 142           return
 143       end = find_end_of_inset(document.body, i)
 144       if end == -1:
 145           document.warning("Can't find end of inset at line " + str(i))
 146           i += 1
 147           continue
 148       blay = find_token(document.body, "\\begin_layout Plain Layout", i, end)
 149       if blay == -1:
 150           document.warning("Can't find layout for inset at line " + str(i))
 151           i = end
 152           continue
 153       bend = find_end_of_layout(document.body, blay)
 154       if bend == -1:
 155           document.warning("Malformed LyX document: Could not find end of Phantom inset's layout.")
 156           i = end
 157           continue
 158       substi = ["\\begin_inset ERT", "status collapsed", "",
 159                 "\\begin_layout Plain Layout", "", "", "\\backslash",
 160                 cmd + "{", "\\end_layout", "", "\\end_inset"]
 161       substj = ["\\size default", "", "\\begin_inset ERT", "status collapsed", "",
 162                 "\\begin_layout Plain Layout", "", "}", "\\end_layout", "", "\\end_inset"]
 163       # do the later one first so as not to mess up the numbering
 164       document.body[bend:end + 1] = substj
 165       document.body[i:blay + 1] = substi
 166       i = end + len(substi) + len(substj) - (end - bend) - (blay - i) - 2
 167
 168
 169 def revert_phantom(document):
 170     revert_phantom_types(document, "Phantom", "phantom")
 171
 172 def revert_hphantom(document):
 173     revert_phantom_types(document, "HPhantom", "hphantom")
 174
 175 def revert_vphantom(document):
 176     revert_phantom_types(document, "VPhantom", "vphantom")
 177
 178
 179 def revert_xetex(document):
 180     " Reverts documents that use XeTeX "
 181
 182     i = find_token(document.header, '\\use_xetex', 0)
 183     if i == -1:
 184         document.warning("Malformed LyX document: Missing \\use_xetex.")
 185         return
 186     if not str2bool(get_value(document.header, "\\use_xetex", i)):
 187         del document.header[i]
 188         return
 189     del document.header[i]
 190
 191     # 1.) set doc encoding to utf8-plain
 192     i = find_token(document.header, "\\inputencoding", 0)
 193     if i == -1:
 194         document.warning("Malformed LyX document: Missing \\inputencoding.")
 195     else:
 196         document.header[i] = "\\inputencoding utf8-plain"
 197
 198     # 2.) check font settings
 199     # defaults
 200     roman = sans = typew = "default"
 201     osf = False
 202     sf_scale = tt_scale = 100.0
 203
 204     i = find_token(document.header, "\\font_roman", 0)
 205     if i == -1:
 206         document.warning("Malformed LyX document: Missing \\font_roman.")
 207     else:
 208         roman = get_value(document.header, "\\font_roman", i)
 209         document.header[i] = "\\font_roman default"
 210
 211     i = find_token(document.header, "\\font_sans", 0)
 212     if i == -1:
 213         document.warning("Malformed LyX document: Missing \\font_sans.")
 214     else:
 215         sans = get_value(document.header, "\\font_sans", i)
 216         document.header[i] = "\\font_sans default"
 217
 218     i = find_token(document.header, "\\font_typewriter", 0)
 219     if i == -1:
 220         document.warning("Malformed LyX document: Missing \\font_typewriter.")
 221     else:
 222         typew = get_value(document.header, "\\font_typewriter", i)
 223         document.header[i] = "\\font_typewriter default"
 224
 225     i = find_token(document.header, "\\font_osf", 0)
 226     if i == -1:
 227         document.warning("Malformed LyX document: Missing \\font_osf.")
 228     else:
 229         osf = str2bool(get_value(document.header, "\\font_osf", i))
 230         document.header[i] = "\\font_osf false"
 231
 232     i = find_token(document.header, "\\font_sc", 0)
 233     if i == -1:
 234         document.warning("Malformed LyX document: Missing \\font_sc.")
 235     else:
 236         # we do not need this value.
 237         document.header[i] = "\\font_sc false"
 238
 239     i = find_token(document.header, "\\font_sf_scale", 0)
 240     if i == -1:
 241         document.warning("Malformed LyX document: Missing \\font_sf_scale.")
 242     else:
 243       val = get_value(document.header, '\\font_sf_scale', i)
 244       try:
 245         # float() can throw
 246         sf_scale = float(val)
 247       except:
 248         document.warning("Invalid font_sf_scale value: " + val)
 249       document.header[i] = "\\font_sf_scale 100"
 250
 251     i = find_token(document.header, "\\font_tt_scale", 0)
 252     if i == -1:
 253         document.warning("Malformed LyX document: Missing \\font_tt_scale.")
 254     else:
 255         val = get_value(document.header, '\\font_tt_scale', i)
 256         try:
 257           # float() can throw
 258           tt_scale = float(val)
 259         except:
 260           document.warning("Invalid font_tt_scale value: " + val)
 261         document.header[i] = "\\font_tt_scale 100"
 262
 263     # 3.) set preamble stuff
 264     pretext = ['%% This document must be processed with xelatex!']
 265     pretext.append('\\usepackage{fontspec}')
 266     if roman != "default":
 267         pretext.append('\\setmainfont[Mapping=tex-text]{' + roman + '}')
 268     if sans != "default":
 269         sf = '\\setsansfont['
 270         if sf_scale != 100.0:
 271             sf += 'Scale=' + str(sf_scale / 100.0) + ','
 272         sf += 'Mapping=tex-text]{' + sans + '}'
 273         pretext.append(sf)
 274     if typew != "default":
 275         tw = '\\setmonofont'
 276         if tt_scale != 100.0:
 277             tw += '[Scale=' + str(tt_scale / 100.0) + ']'
 278         tw += '{' + typew + '}'
 279         pretext.append(tw)
 280     if osf:
 281         pretext.append('\\defaultfontfeatures{Numbers=OldStyle}')
 282     pretext.append('\\usepackage{xunicode}')
 283     pretext.append('\\usepackage{xltxtra}')
 284     insert_to_preamble(document, pretext)
 285
 286
 287 def revert_outputformat(document):
 288     " Remove default output format param "
 289
 290     if not del_token(document.header, '\\default_output_format', 0):
 291         document.warning("Malformed LyX document: Missing \\default_output_format.")
 292
 293
 294 def revert_backgroundcolor(document):
 295     " Reverts background color to preamble code "
 296     i = find_token(document.header, "\\backgroundcolor", 0)
 297     if i == -1:
 298         return
 299     colorcode = get_value(document.header, '\\backgroundcolor', i)
 300     del document.header[i]
 301     # don't clutter the preamble if backgroundcolor is not set
 302     if colorcode == "#ffffff":
 303         return
 304     red   = hex2ratio(colorcode[1:3])
 305     green = hex2ratio(colorcode[3:5])
 306     blue  = hex2ratio(colorcode[5:7])
 307     insert_to_preamble(document, \
 308         ['% To set the background color',
 309         '\\@ifundefined{definecolor}{\\usepackage{color}}{}',
 310         '\\definecolor{page_backgroundcolor}{rgb}{' + red + ',' + green + ',' + blue + '}',
 311         '\\pagecolor{page_backgroundcolor}'])
 312
 313
 314 def add_use_indices(document):
 315     " Add \\use_indices if it is missing "
 316     i = find_token(document.header, '\\use_indices', 0)
 317     if i != -1:
 318         return i
 319     i = find_token(document.header, '\\use_bibtopic', 0)
 320     if i == -1:
 321         i = find_token(document.header, '\\cite_engine', 0)
 322     if i == -1:
 323         i = find_token(document.header, '\\use_mathdots', 0)
 324     if i == -1:
 325         i = find_token(document.header, '\\use_mhchem', 0)
 326     if i == -1:
 327         i = find_token(document.header, '\\use_esint', 0)
 328     if i == -1:
 329         i = find_token(document.header, '\\use_amsmath', 0)
 330     if i == -1:
 331         document.warning("Malformed LyX document: Missing \\use_indices.")
 332         return -1
 333     document.header.insert(i + 1, '\\use_indices 0')
 334     return i + 1
 335
 336
 337 def revert_splitindex(document):
 338     " Reverts splitindex-aware documents "
 339     i = add_use_indices(document)
 340     if i == -1:
 341         return
 342     useindices = str2bool(get_value(document.header, "\\use_indices", i))
 343     del document.header[i]
 344     preamble = []
 345     if useindices:
 346          preamble.append("\\usepackage{splitidx})")
 347
 348     # deal with index declarations in the preamble
 349     i = 0
 350     while True:
 351         i = find_token(document.header, "\\index", i)
 352         if i == -1:
 353             break
 354         k = find_token(document.header, "\\end_index", i)
 355         if k == -1:
 356             document.warning("Malformed LyX document: Missing \\end_index.")
 357             return
 358         if useindices:
 359           line = document.header[i]
 360           l = re.compile(r'\\index (.*)$')
 361           m = l.match(line)
 362           iname = m.group(1)
 363           ishortcut = get_value(document.header, '\\shortcut', i, k)
 364           if ishortcut != "":
 365               preamble.append("\\newindex[" + iname + "]{" + ishortcut + "}")
 366         del document.header[i:k + 1]
 367     if preamble:
 368         insert_to_preamble(document, preamble)
 369
 370     # deal with index insets
 371     # these need to have the argument removed
 372     i = 0
 373     while True:
 374         i = find_token(document.body, "\\begin_inset Index", i)
 375         if i == -1:
 376             break
 377         line = document.body[i]
 378         l = re.compile(r'\\begin_inset Index (.*)$')
 379         m = l.match(line)
 380         itype = m.group(1)
 381         if itype == "idx" or indices == "false":
 382             document.body[i] = "\\begin_inset Index"
 383         else:
 384             k = find_end_of_inset(document.body, i)
 385             if k == -1:
 386                 document.warning("Can't find end of index inset!")
 387                 i += 1
 388                 continue
 389             content = lyx2latex(document, document.body[i:k])
 390             # escape quotes
 391             content = content.replace('"', r'\"')
 392             subst = put_cmd_in_ert("\\sindex[" + itype + "]{" + content + "}")
 393             document.body[i:k + 1] = subst
 394         i = i + 1
 395
 396     # deal with index_print insets
 397     i = 0
 398     while True:
 399         i = find_token(document.body, "\\begin_inset CommandInset index_print", i)
 400         if i == -1:
 401             return
 402         k = find_end_of_inset(document.body, i)
 403         ptype = get_quoted_value(document.body, 'type', i, k)
 404         if ptype == "idx":
 405             j = find_token(document.body, "type", i, k)
 406             del document.body[j]
 407         elif not useindices:
 408             del document.body[i:k + 1]
 409         else:
 410             subst = put_cmd_in_ert("\\printindex[" + ptype + "]{}")
 411             document.body[i:k + 1] = subst
 412         i = i + 1
 413
 414
 415 def convert_splitindex(document):
 416     " Converts index and printindex insets to splitindex-aware format "
 417     add_use_indices(document)
 418     i = 0
 419     while True:
 420         i = find_token(document.body, "\\begin_inset Index", i)
 421         if i == -1:
 422             break
 423         document.body[i] = document.body[i].replace("\\begin_inset Index",
 424             "\\begin_inset Index idx")
 425         i = i + 1
 426     i = 0
 427     while True:
 428         i = find_token(document.body, "\\begin_inset CommandInset index_print", i)
 429         if i == -1:
 430             return
 431         if document.body[i + 1].find('LatexCommand printindex') == -1:
 432             document.warning("Malformed LyX document: Incomplete printindex inset.")
 433             return
 434         subst = ["LatexCommand printindex",
 435             "type \"idx\""]
 436         document.body[i + 1:i + 2] = subst
 437         i = i + 1
 438
 439
 440 def revert_subindex(document):
 441     " Reverts \\printsubindex CommandInset types "
 442     i = add_use_indices(document)
 443     if i == -1:
 444         return
 445     useindices = str2bool(get_value(document.header, "\\use_indices", i))
 446     i = 0
 447     while True:
 448         i = find_token(document.body, "\\begin_inset CommandInset index_print", i)
 449         if i == -1:
 450             return
 451         k = find_end_of_inset(document.body, i)
 452         ctype = get_value(document.body, 'LatexCommand', i, k)
 453         if ctype != "printsubindex":
 454             i = k + 1
 455             continue
 456         ptype = get_quoted_value(document.body, 'type', i, k)
 457         if not useindices:
 458             del document.body[i:k + 1]
 459         else:
 460             subst = put_cmd_in_ert("\\printsubindex[" + ptype + "]{}")
 461             document.body[i:k + 1] = subst
 462         i = i + 1
 463
 464
 465 def revert_printindexall(document):
 466     " Reverts \\print[sub]index* CommandInset types "
 467     i = add_use_indices(document)
 468     if i == -1:
 469         return
 470     useindices = str2bool(get_value(document.header, "\\use_indices", i))
 471     i = 0
 472     while True:
 473         i = find_token(document.body, "\\begin_inset CommandInset index_print", i)
 474         if i == -1:
 475             return
 476         k = find_end_of_inset(document.body, i)
 477         ctype = get_value(document.body, 'LatexCommand', i, k)
 478         if ctype != "printindex*" and ctype != "printsubindex*":
 479             i = k
 480             continue
 481         if not useindices:
 482             del document.body[i:k + 1]
 483         else:
 484             subst = put_cmd_in_ert("\\" + ctype + "{}")
 485             document.body[i:k + 1] = subst
 486         i = i + 1
 487
 488 strikeout_preamble = ['%  for proper underlining',
 489                       r'\PassOptionsToPackage{normalem}{ulem}',
 490                       r'\usepackage{ulem}']
 491
 492 def convert_strikeout(document):
 493     " Remove preamble code loading 'ulem' package. "
 494     del_complete_lines(document.preamble,
 495                        ['% Added by lyx2lyx']+strikeout_preamble)
 496
 497
 498 def revert_strikeout(document):
 499   " Reverts \\strikeout font attribute "
 500   changed = revert_font_attrs(document.body, "\\uuline", "\\uuline")
 501   changed = revert_font_attrs(document.body, "\\uwave", "\\uwave") or changed
 502   changed = revert_font_attrs(document.body, "\\strikeout", "\\sout")  or changed
 503   if changed == True:
 504     insert_to_preamble(document, strikeout_preamble)
 505
 506
 507 ulinelatex_preamble = ['% fix underbar in citations',
 508     r'\let\cite@rig\cite',
 509     r'\newcommand{\b@xcite}[2][\%]{\def\def@pt{\%}\def\pas@pt{#1}',
 510     r'  \mbox{\ifx\def@pt\pas@pt\cite@rig{#2}\else\cite@rig[#1]{#2}\fi}}',
 511     r'\renewcommand{\underbar}[1]{{\let\cite\b@xcite\uline{#1}}}']
 512
 513 def convert_ulinelatex(document):
 514     " Remove preamble code for \\uline font attribute. "
 515     del_complete_lines(document.preamble,
 516                        ['% Added by lyx2lyx']+ulinelatex_preamble)
 517
 518 def revert_ulinelatex(document):
 519     " Add preamble code for \\uline font attribute in citations. "
 520     i = find_token(document.body, '\\bar under', 0)
 521     if i == -1:
 522         return
 523     try:
 524         document.preamble.index(r'\usepackage{ulem}')
 525     except ValueError:
 526         insert_to_preamble(document, strikeout_preamble)
 527     insert_to_preamble(document, ulinelatex_preamble)
 528
 529
 530 def revert_custom_processors(document):
 531     " Remove bibtex_command and index_command params "
 532
 533     if not del_token(document.header, '\\bibtex_command', 0):
 534         document.warning("Malformed LyX document: Missing \\bibtex_command.")
 535
 536     if not del_token(document.header, '\\index_command', 0):
 537         document.warning("Malformed LyX document: Missing \\index_command.")
 538
 539
 540 def convert_nomencl_width(document):
 541     " Add set_width param to nomencl_print "
 542     i = 0
 543     while True:
 544       i = find_token(document.body, "\\begin_inset CommandInset nomencl_print", i)
 545       if i == -1:
 546         break
 547       document.body.insert(i + 2, "set_width \"none\"")
 548       i = i + 1
 549
 550
 551 def revert_nomencl_width(document):
 552     " Remove set_width param from nomencl_print "
 553     i = 0
 554     while True:
 555       i = find_token(document.body, "\\begin_inset CommandInset nomencl_print", i)
 556       if i == -1:
 557         break
 558       j = find_end_of_inset(document.body, i)
 559       if not del_token(document.body, "set_width", i, j):
 560         document.warning("Can't find set_width option for nomencl_print!")
 561       i = j
 562
 563
 564 def revert_nomencl_cwidth(document):
 565     " Remove width param from nomencl_print "
 566     i = 0
 567     while True:
 568       i = find_token(document.body, "\\begin_inset CommandInset nomencl_print", i)
 569       if i == -1:
 570         break
 571       j = find_end_of_inset(document.body, i)
 572       l = find_token(document.body, "width", i, j)
 573       if l == -1:
 574         i = j
 575         continue
 576       width = get_quoted_value(document.body, "width", i, j)
 577       del document.body[l]
 578       insert_to_preamble(document, ["\\setlength{\\nomlabelwidth}{" + width + "}"])
 579       i = j - 1
 580
 581
 582 def revert_applemac(document):
 583     " Revert applemac encoding to auto "
 584     if document.encoding != "applemac":
 585       return
 586     document.encoding = "auto"
 587     i = find_token(document.header, "\\encoding", 0)
 588     if i != -1:
 589         document.header[i] = "\\encoding auto"
 590
 591
 592 def revert_longtable_align(document):
 593     " Remove longtable alignment setting "
 594     i = 0
 595     while True:
 596       i = find_token(document.body, "\\begin_inset Tabular", i)
 597       if i == -1:
 598           break
 599       end = find_end_of_inset(document.body, i)
 600       if end == -1:
 601           document.warning("Can't find end of inset at line " + str(i))
 602           i += 1
 603           continue
 604       fline = find_token(document.body, "<features", i, end)
 605       if fline == -1:
 606           document.warning("Can't find features for inset at line " + str(i))
 607           i += 1
 608           continue
 609       j = document.body[fline].find("longtabularalignment")
 610       if j == -1:
 611           i += 1
 612           continue
 613       # FIXME Is this correct? It wipes out everything after the
 614       # one we found.
 615       document.body[fline] = document.body[fline][:j - 1] + '>'
 616       # since there could be a tabular inside this one, we
 617       # cannot jump to end.
 618       i += 1
 619
 620
 621 def revert_branch_filename(document):
 622     " Remove \\filename_suffix parameter from branches "
 623     i = 0
 624     while True:
 625         i = find_token(document.header, "\\filename_suffix", i)
 626         if i == -1:
 627             return
 628         del document.header[i]
 629
 630
 631 def revert_paragraph_indentation(document):
 632     " Revert custom paragraph indentation to preamble code "
 633     i = find_token(document.header, "\\paragraph_indentation", 0)
 634     if i == -1:
 635       return
 636     length = get_value(document.header, "\\paragraph_indentation", i)
 637     # we need only remove the line if indentation is default
 638     if length != "default":
 639       # handle percent lengths
 640       length = latex_length(length)[1]
 641       insert_to_preamble(document, ["\\setlength{\\parindent}{" + length + "}"])
 642     del document.header[i]
 643
 644
 645 def revert_percent_skip_lengths(document):
 646     " Revert relative lengths for paragraph skip separation to preamble code "
 647     i = find_token(document.header, "\\defskip", 0)
 648     if i == -1:
 649         return
 650     length = get_value(document.header, "\\defskip", i)
 651     # only revert when a custom length was set and when
 652     # it used a percent length
 653     if length in ('smallskip', 'medskip', 'bigskip'):
 654         return
 655     # handle percent lengths
 656     percent, length = latex_length(length)
 657     if percent:
 658         insert_to_preamble(document, ["\\setlength{\\parskip}{" + length + "}"])
 659         # set defskip to medskip as default
 660         document.header[i] = "\\defskip medskip"
 661
 662
 663 def revert_percent_vspace_lengths(document):
 664     " Revert relative VSpace lengths to ERT "
 665     i = 0
 666     while True:
 667       i = find_token(document.body, "\\begin_inset VSpace", i)
 668       if i == -1:
 669           break
 670       # only revert if a custom length was set and if
 671       # it used a percent length
 672       r = re.compile(r'\\begin_inset VSpace (.*)$')
 673       m = r.match(document.body[i])
 674       length = m.group(1)
 675       if length in ('defskip', 'smallskip', 'medskip', 'bigskip', 'vfill'):
 676          i += 1
 677          continue
 678       # check if the space has a star (protected space)
 679       protected = (document.body[i].rfind("*") != -1)
 680       if protected:
 681           length = length.rstrip('*')
 682       # handle percent lengths
 683       percent, length = latex_length(length)
 684       # revert the VSpace inset to ERT
 685       if percent:
 686           if protected:
 687               subst = put_cmd_in_ert("\\vspace*{" + length + "}")
 688           else:
 689               subst = put_cmd_in_ert("\\vspace{" + length + "}")
 690           document.body[i:i + 2] = subst
 691       i += 1
 692
 693
 694 def revert_percent_hspace_lengths(document):
 695     " Revert relative HSpace lengths to ERT "
 696     i = 0
 697     while True:
 698       i = find_token_exact(document.body, "\\begin_inset space \\hspace", i)
 699       if i == -1:
 700           break
 701       j = find_end_of_inset(document.body, i)
 702       if j == -1:
 703           document.warning("Can't find end of inset at line " + str(i))
 704           i += 1
 705           continue
 706       # only revert if a custom length was set...
 707       length = get_value(document.body, '\\length', i + 1, j)
 708       if length == '':
 709           document.warning("Malformed lyx document: Missing '\\length' in Space inset.")
 710           i = j
 711           continue
 712       protected = ""
 713       if document.body[i].find("\\hspace*{}") != -1:
 714           protected = "*"
 715       # ...and if it used a percent length
 716       percent, length = latex_length(length)
 717       # revert the HSpace inset to ERT
 718       if percent:
 719           subst = put_cmd_in_ert("\\hspace" + protected + "{" + length + "}")
 720           document.body[i:j + 1] = subst
 721       # if we did a substitution, this will still be ok
 722       i = j
 723
 724
 725 def revert_hspace_glue_lengths(document):
 726     " Revert HSpace glue lengths to ERT "
 727     i = 0
 728     while True:
 729       i = find_token_exact(document.body, "\\begin_inset space \\hspace", i)
 730       if i == -1:
 731           break
 732       j = find_end_of_inset(document.body, i)
 733       if j == -1:
 734           document.warning("Can't find end of inset at line " + str(i))
 735           i += 1
 736           continue
 737       length = get_value(document.body, '\\length', i + 1, j)
 738       if length == '':
 739           document.warning("Malformed lyx document: Missing '\\length' in Space inset.")
 740           i = j
 741           continue
 742       protected = ""
 743       if document.body[i].find("\\hspace*{}") != -1:
 744           protected = "*"
 745       # only revert if the length contains a plus or minus at pos != 0
 746       if length.find('-',1) != -1 or length.find('+',1) != -1:
 747           # handle percent lengths
 748           length = latex_length(length)[1]
 749           # revert the HSpace inset to ERT
 750           subst = put_cmd_in_ert("\\hspace" + protected + "{" + length + "}")
 751           document.body[i:j+1] = subst
 752       i = j
 753
 754
 755 def convert_author_id(document):
 756     " Add the author_id to the \\author definition and make sure 0 is not used"
 757     i = 0
 758     anum = 1
 759     re_author = re.compile(r'(\\author) (\".*\")\s*(.*)$')
 760
 761     while True:
 762         i = find_token(document.header, "\\author", i)
 763         if i == -1:
 764             break
 765         m = re_author.match(document.header[i])
 766         if m:
 767             name = m.group(2)
 768             email = m.group(3)
 769             document.header[i] = "\\author %i %s %s" % (anum, name, email)
 770         anum += 1
 771         i += 1
 772
 773     i = 0
 774     while True:
 775         i = find_token(document.body, "\\change_", i)
 776         if i == -1:
 777             break
 778         change = document.body[i].split(' ');
 779         if len(change) == 3:
 780             type = change[0]
 781             author_id = int(change[1])
 782             time = change[2]
 783             document.body[i] = "%s %i %s" % (type, author_id + 1, time)
 784         i += 1
 785
 786
 787 def revert_author_id(document):
 788     " Remove the author_id from the \\author definition "
 789     i = 0
 790     anum = 0
 791     rx = re.compile(r'(\\author)\s+(-?\d+)\s+(\".*\")\s*(.*)$')
 792     idmap = dict()
 793
 794     while True:
 795         i = find_token(document.header, "\\author", i)
 796         if i == -1:
 797             break
 798         m = rx.match(document.header[i])
 799         if m:
 800             author_id = int(m.group(2))
 801             idmap[author_id] = anum
 802             name = m.group(3)
 803             email = m.group(4)
 804             document.header[i] = "\\author %s %s" % (name, email)
 805         i += 1
 806         # FIXME Should this be incremented if we didn't match?
 807         anum += 1
 808
 809     i = 0
 810     while True:
 811         i = find_token(document.body, "\\change_", i)
 812         if i == -1:
 813             break
 814         change = document.body[i].split(' ');
 815         if len(change) == 3:
 816             type = change[0]
 817             author_id = int(change[1])
 818             time = change[2]
 819             document.body[i] = "%s %i %s" % (type, idmap[author_id], time)
 820         i += 1
 821
 822
 823 def revert_suppress_date(document):
 824     " Revert suppressing of default document date to preamble code "
 825     i = find_token(document.header, "\\suppress_date", 0)
 826     if i == -1:
 827         return
 828     # remove the preamble line and write to the preamble
 829     # when suppress_date was true
 830     date = str2bool(get_value(document.header, "\\suppress_date", i))
 831     if date:
 832         add_to_preamble(document, ["\\date{}"])
 833     del document.header[i]
 834
 835
 836 mhchem_preamble = [r"\PassOptionsToPackage{version=3}{mhchem}",
 837                    r"\usepackage{mhchem}"]
 838
 839 def convert_mhchem(document):
 840     "Set mhchem to off for versions older than 1.6.x"
 841     if document.initial_format < 277:
 842         # LyX 1.5.x and older did never load mhchem.
 843         # Therefore we must switch it off: Documents that use mhchem have
 844         # a manual \usepackage anyway, and documents not using mhchem but
 845         # custom macros with the same names as mhchem commands might get
 846         # corrupted if mhchem is automatically loaded.
 847         mhchem = 0 # off
 848     else:
 849         # LyX 1.6.x did always load mhchem automatically.
 850         mhchem = 1 # auto
 851     i = find_token(document.header, "\\use_esint", 0)
 852     if i == -1:
 853         # pre-1.5.x document
 854         i = find_token(document.header, "\\use_amsmath", 0)
 855     if i == -1:
 856         document.warning("Malformed LyX document: "
 857                          "Could not find amsmath or esint setting.")
 858         return
 859     document.header.insert(i + 1, "\\use_mhchem %d" % mhchem)
 860     # remove LyX-inserted preamble
 861     if mhchem != 0:
 862         del_complete_lines(document.preamble,
 863                            ['% Added by lyx2lyx']+mhchem_preamble)
 864
 865
 866 def revert_mhchem(document):
 867     "Revert mhchem loading to preamble code."
 868
 869     mhchem = get_value(document.header, "\\use_mhchem", delete=True)
 870     try:
 871         mhchem = int(mhchem)
 872     except ValueError:
 873         document.warning("Malformed LyX document: "
 874                          "Could not find mhchem setting.")
 875         mhchem = 1 # "auto"
 876     # mhchem in {0: "off", 1: "auto", 2: "on"}
 877
 878     if mhchem == 1: # "auto"
 879         i = 0
 880         while i != 1 and mhchem == 1:
 881             i = find_token(document.body, "\\begin_inset Formula", i)
 882             j = find_end_of_inset(document.body, i)
 883             if j == -1:
 884                 break
 885             if (True for line in document.body[i:j]
 886                 if r"\ce{" in line or r"\cf{" in line):
 887                 mhchem = 2
 888                 break
 889             i += 1
 890
 891     if (mhchem == 2 # on
 892         and find_token(document.preamble, r"\usepackage{mhchem}") == -1):
 893         insert_to_preamble(document, mhchem_preamble)
 894
 895
 896 def revert_fontenc(document):
 897     " Remove fontencoding param "
 898     if not del_token(document.header, '\\fontencoding', 0):
 899         document.warning("Malformed LyX document: Missing \\fontencoding.")
 900
 901
 902 def merge_gbrief(document):
 903     " Merge g-brief-en and g-brief-de to one class "
 904
 905     if document.textclass != "g-brief-de":
 906         if document.textclass == "g-brief-en":
 907             document.textclass = "g-brief"
 908             document.set_textclass()
 909         return
 910
 911     obsoletedby = { "Brieftext":       "Letter",
 912                     "Unterschrift":    "Signature",
 913                     "Strasse":         "Street",
 914                     "Zusatz":          "Addition",
 915                     "Ort":             "Town",
 916                     "Land":            "State",
 917                     "RetourAdresse":   "ReturnAddress",
 918                     "MeinZeichen":     "MyRef",
 919                     "IhrZeichen":      "YourRef",
 920                     "IhrSchreiben":    "YourMail",
 921                     "Telefon":         "Phone",
 922                     "BLZ":             "BankCode",
 923                     "Konto":           "BankAccount",
 924                     "Postvermerk":     "PostalComment",
 925                     "Adresse":         "Address",
 926                     "Datum":           "Date",
 927                     "Betreff":         "Reference",
 928                     "Anrede":          "Opening",
 929                     "Anlagen":         "Encl.",
 930                     "Verteiler":       "cc",
 931                     "Gruss":           "Closing"}
 932     i = 0
 933     while True:
 934         i = find_token(document.body, "\\begin_layout", i)
 935         if i == -1:
 936             break
 937
 938         layout = document.body[i][14:]
 939         if layout in obsoletedby:
 940             document.body[i] = "\\begin_layout " + obsoletedby[layout]
 941
 942         i += 1
 943
 944     document.textclass = "g-brief"
 945     document.set_textclass()
 946
 947
 948 def revert_gbrief(document):
 949     " Revert g-brief to g-brief-en "
 950     if document.textclass == "g-brief":
 951         document.textclass = "g-brief-en"
 952         document.set_textclass()
 953
 954
 955 def revert_html_options(document):
 956     " Remove html options "
 957     del_token(document.header, '\\html_use_mathml', 0)
 958     del_token(document.header, '\\html_be_strict', 0)
 959
 960
 961 def revert_includeonly(document):
 962     i = 0
 963     while True:
 964         i = find_token(document.header, "\\begin_includeonly", i)
 965         if i == -1:
 966             return
 967         j = find_end_of(document.header, i, "\\begin_includeonly", "\\end_includeonly")
 968         if j == -1:
 969             document.warning("Unable to find end of includeonly section!!")
 970             break
 971         document.header[i : j + 1] = []
 972
 973
 974 def revert_includeall(document):
 975     " Remove maintain_unincluded_children param "
 976     del_token(document.header, '\\maintain_unincluded_children', 0)
 977
 978
 979 def revert_multirow(document):
 980     " Revert multirow cells in tables to TeX-code"
 981
 982     # first, let's find out if we need to do anything
 983     # cell type 3 is multirow begin cell
 984     i = find_token(document.body, '<cell multirow="3"', 0)
 985     if i == -1:
 986       return
 987
 988     add_to_preamble(document, ["\\usepackage{multirow}"])
 989
 990     begin_table = 0
 991     while True:
 992         # find begin/end of table
 993         begin_table = find_token(document.body, '<lyxtabular version=', begin_table)
 994         if begin_table == -1:
 995             break
 996         end_table = find_end_of(document.body, begin_table, '<lyxtabular', '</lyxtabular>')
 997         if end_table == -1:
 998             document.warning("Malformed LyX document: Could not find end of table.")
 999             begin_table += 1
1000             continue
1001         # does this table have multirow?
1002         i = find_token(document.body, '<cell multirow="3"', begin_table, end_table)
1003         if i == -1:
1004             begin_table = end_table
1005             continue
1006
1007         # store the number of rows and columns
1008         numrows = get_option_value(document.body[begin_table], "rows")
1009         numcols = get_option_value(document.body[begin_table], "columns")
1010         try:
1011           numrows = int(numrows)
1012           numcols = int(numcols)
1013         except:
1014           document.warning("Unable to determine rows and columns!")
1015           begin_table = end_table
1016           continue
1017
1018         mrstarts = []
1019         multirows = []
1020         # collect info on rows and columns of this table.
1021         begin_row = begin_table
1022         for row in range(numrows):
1023             begin_row = find_token(document.body, '<row>', begin_row, end_table)
1024             if begin_row == -1:
1025               document.warning("Can't find row " + str(row + 1))
1026               break
1027             end_row = find_end_of(document.body, begin_row, '<row>', '</row>')
1028             if end_row == -1:
1029               document.warning("Can't find end of row " + str(row + 1))
1030               break
1031             begin_cell = begin_row
1032             multirows.append([])
1033             for column in range(numcols):
1034                 begin_cell = find_token(document.body, '<cell ', begin_cell, end_row)
1035                 if begin_cell == -1:
1036                   document.warning("Can't find column " + str(column + 1) + \
1037                     "in row " + str(row + 1))
1038                   break
1039                 # NOTE
1040                 # this will fail if someone puts "</cell>" in a cell, but
1041                 # that seems fairly unlikely.
1042                 end_cell = find_end_of(document.body, begin_cell, '<cell', '</cell>')
1043                 if end_cell == -1:
1044                   document.warning("Can't find end of column " + str(column + 1) + \
1045                     "in row " + str(row + 1))
1046                   break
1047                 multirows[row].append([begin_cell, end_cell, 0])
1048                 if document.body[begin_cell].find('multirow="3"') != -1:
1049                   multirows[row][column][2] = 3 # begin multirow
1050                   mrstarts.append([row, column])
1051                 elif document.body[begin_cell].find('multirow="4"') != -1:
1052                   multirows[row][column][2] = 4 # in multirow
1053                 begin_cell = end_cell
1054             begin_row = end_row
1055         # end of table info collection
1056
1057         # work from the back to avoid messing up numbering
1058         mrstarts.reverse()
1059         for m in mrstarts:
1060             row = m[0]
1061             col = m[1]
1062             # get column width
1063             col_width = get_option_value(document.body[begin_table + 2 + col], "width")
1064             # "0pt" means that no width is specified
1065             if not col_width or col_width == "0pt":
1066               col_width = "*"
1067             # determine the number of cells that are part of the multirow
1068             nummrs = 1
1069             for r in range(row + 1, numrows):
1070                 if multirows[r][col][2] != 4:
1071                   break
1072                 nummrs += 1
1073                 # take the opportunity to revert this line
1074                 lineno = multirows[r][col][0]
1075                 document.body[lineno] = document.body[lineno].\
1076                   replace(' multirow="4" ', ' ').\
1077                   replace('valignment="middle"', 'valignment="top"').\
1078                   replace(' topline="true" ', ' ')
1079                 # remove bottom line of previous multirow-part cell
1080                 lineno = multirows[r-1][col][0]
1081                 document.body[lineno] = document.body[lineno].replace(' bottomline="true" ', ' ')
1082             # revert beginning cell
1083             bcell = multirows[row][col][0]
1084             ecell = multirows[row][col][1]
1085             document.body[bcell] = document.body[bcell].\
1086               replace(' multirow="3" ', ' ').\
1087               replace('valignment="middle"', 'valignment="top"')
1088             blay = find_token(document.body, "\\begin_layout", bcell, ecell)
1089             if blay == -1:
1090               document.warning("Can't find layout for cell!")
1091               continue
1092             bend = find_end_of_layout(document.body, blay)
1093             if bend == -1:
1094               document.warning("Can't find end of layout for cell!")
1095               continue
1096             # do the later one first, so as not to mess up the numbering
1097             # we are wrapping the whole cell in this ert
1098             # so before the end of the layout...
1099             document.body[bend:bend] = put_cmd_in_ert("}")
1100             # ...and after the beginning
1101             document.body[blay + 1:blay + 1] = \
1102               put_cmd_in_ert("\\multirow{" + str(nummrs) + "}{" + col_width + "}{")
1103
1104         begin_table = end_table
1105
1106
1107 def convert_math_output(document):
1108     " Convert \html_use_mathml to \html_math_output "
1109     i = find_token(document.header, "\\html_use_mathml", 0)
1110     if i == -1:
1111         return
1112     rgx = re.compile(r'\\html_use_mathml\s+(\w+)')
1113     m = rgx.match(document.header[i])
1114     newval = "0" # MathML
1115     if m:
1116       val = str2bool(m.group(1))
1117       if not val:
1118         newval = "2" # Images
1119     else:
1120       document.warning("Can't match " + document.header[i])
1121     document.header[i] = "\\html_math_output " + newval
1122
1123
1124 def revert_math_output(document):
1125     " Revert \html_math_output to \html_use_mathml "
1126     i = find_token(document.header, "\\html_math_output", 0)
1127     if i == -1:
1128         return
1129     rgx = re.compile(r'\\html_math_output\s+(\d)')
1130     m = rgx.match(document.header[i])
1131     newval = "true"
1132     if m:
1133         val = m.group(1)
1134         if val == "1" or val == "2":
1135             newval = "false"
1136     else:
1137         document.warning("Unable to match " + document.header[i])
1138     document.header[i] = "\\html_use_mathml " + newval
1139
1140
1141
1142 def revert_inset_preview(document):
1143     " Dissolves the preview inset "
1144     i = 0
1145     while True:
1146       i = find_token(document.body, "\\begin_inset Preview", i)
1147       if i == -1:
1148           return
1149       iend = find_end_of_inset(document.body, i)
1150       if iend == -1:
1151           document.warning("Malformed LyX document: Could not find end of Preview inset.")
1152           i += 1
1153           continue
1154
1155       # This has several issues.
1156       # We need to do something about the layouts inside InsetPreview.
1157       # If we just leave the first one, then we have something like:
1158       # \begin_layout Standard
1159       # ...
1160       # \begin_layout Standard
1161       # and we get a "no \end_layout" error. So something has to be done.
1162       # Ideally, we would check if it is the same as the layout we are in.
1163       # If so, we just remove it; if not, we end the active one. But it is
1164       # not easy to know what layout we are in, due to depth changes, etc,
1165       # and it is not clear to me how much work it is worth doing. In most
1166       # cases, the layout will probably be the same.
1167       #
1168       # For the same reason, we have to remove the \end_layout tag at the
1169       # end of the last layout in the inset. Again, that will sometimes be
1170       # wrong, but it will usually be right. To know what to do, we would
1171       # again have to know what layout the inset is in.
1172
1173       blay = find_token(document.body, "\\begin_layout", i, iend)
1174       if blay == -1:
1175           document.warning("Can't find layout for preview inset!")
1176           # always do the later one first...
1177           del document.body[iend]
1178           del document.body[i]
1179           # deletions mean we do not need to reset i
1180           continue
1181
1182       # This is where we would check what layout we are in.
1183       # The check for Standard is definitely wrong.
1184       #
1185       # lay = document.body[blay].split(None, 1)[1]
1186       # if lay != oldlayout:
1187       #     # record a boolean to tell us what to do later....
1188       #     # better to do it later, since (a) it won't mess up
1189       #     # the numbering and (b) we only modify at the end.
1190
1191       # we want to delete the last \\end_layout in this inset, too.
1192       # note that this may not be the \\end_layout that goes with blay!!
1193       bend = find_end_of_layout(document.body, blay)
1194       while True:
1195           tmp = find_token(document.body, "\\end_layout", bend + 1, iend)
1196           if tmp == -1:
1197               break
1198           bend = tmp
1199       if bend == blay:
1200           document.warning("Unable to find last layout in preview inset!")
1201           del document.body[iend]
1202           del document.body[i]
1203           # deletions mean we do not need to reset i
1204           continue
1205       # always do the later one first...
1206       del document.body[iend]
1207       del document.body[bend]
1208       del document.body[i:blay + 1]
1209       # we do not need to reset i
1210
1211
1212 def revert_equalspacing_xymatrix(document):
1213     " Revert a Formula with xymatrix@! to an ERT inset "
1214     i = 0
1215     has_preamble = False
1216     has_equal_spacing = False
1217
1218     while True:
1219       i = find_token(document.body, "\\begin_inset Formula", i)
1220       if i == -1:
1221           break
1222       j = find_end_of_inset(document.body, i)
1223       if j == -1:
1224           document.warning("Malformed LyX document: Could not find end of Formula inset.")
1225           i += 1
1226           continue
1227
1228       for curline in range(i,j):
1229           found = document.body[curline].find("\\xymatrix@!")
1230           if found != -1:
1231               break
1232
1233       if found != -1:
1234           has_equal_spacing = True
1235           content = [document.body[i][21:]]
1236           content += document.body[i + 1:j]
1237           subst = put_cmd_in_ert(content)
1238           document.body[i:j + 1] = subst
1239           i += len(subst) - (j - i) + 1
1240       else:
1241           for curline in range(i,j):
1242               l = document.body[curline].find("\\xymatrix")
1243               if l != -1:
1244                   has_preamble = True;
1245                   break;
1246           i = j + 1
1247
1248     if has_equal_spacing and not has_preamble:
1249         add_to_preamble(document, ['\\usepackage[all]{xy}'])
1250
1251
1252 def revert_notefontcolor(document):
1253     " Reverts greyed-out note font color to preamble code "
1254
1255     i = find_token(document.header, "\\notefontcolor", 0)
1256     if i == -1:
1257         return
1258
1259     colorcode = get_value(document.header, '\\notefontcolor', i)
1260     del document.header[i]
1261
1262     # are there any grey notes?
1263     if find_token(document.body, "\\begin_inset Note Greyedout", 0) == -1:
1264         # no need to do anything else, and \renewcommand will throw
1265         # an error since lyxgreyedout will not exist.
1266         return
1267
1268     # the color code is in the form #rrggbb where every character denotes a hex number
1269     red = hex2ratio(colorcode[1:3])
1270     green = hex2ratio(colorcode[3:5])
1271     blue = hex2ratio(colorcode[5:7])
1272     # write the preamble
1273     insert_to_preamble(document,
1274       [ '%  for greyed-out notes',
1275         '\\@ifundefined{definecolor}{\\usepackage{color}}{}'
1276         '\\definecolor{note_fontcolor}{rgb}{%s,%s,%s}' % (red, green, blue),
1277         '\\renewenvironment{lyxgreyedout}',
1278         ' {\\textcolor{note_fontcolor}\\bgroup}{\\egroup}'])
1279
1280
1281 def revert_turkmen(document):
1282     "Set language Turkmen to English"
1283
1284     revert_language(document, "turkmen", "turkmen", "turkmen")
1285
1286
1287 def revert_fontcolor(document):
1288     " Reverts font color to preamble code "
1289     i = find_token(document.header, "\\fontcolor", 0)
1290     if i == -1:
1291         return
1292     colorcode = get_value(document.header, '\\fontcolor', i)
1293     del document.header[i]
1294     # don't clutter the preamble if font color is not set
1295     if colorcode == "#000000":
1296         return
1297     # the color code is in the form #rrggbb where every character denotes a hex number
1298     red = hex2ratio(colorcode[1:3])
1299     green = hex2ratio(colorcode[3:5])
1300     blue = hex2ratio(colorcode[5:7])
1301     # write the preamble
1302     insert_to_preamble(document,
1303       ['%  Set the font color',
1304       '\\@ifundefined{definecolor}{\\usepackage{color}}{}',
1305       '\\definecolor{document_fontcolor}{rgb}{%s,%s,%s}' % (red, green, blue),
1306       '\\color{document_fontcolor}'])
1307
1308
1309 def revert_shadedboxcolor(document):
1310     " Reverts shaded box color to preamble code "
1311     i = find_token(document.header, "\\boxbgcolor", 0)
1312     if i == -1:
1313         return
1314     colorcode = get_value(document.header, '\\boxbgcolor', i)
1315     del document.header[i]
1316     # the color code is in the form #rrggbb
1317     red = hex2ratio(colorcode[1:3])
1318     green = hex2ratio(colorcode[3:5])
1319     blue = hex2ratio(colorcode[5:7])
1320     # write the preamble
1321     insert_to_preamble(document,
1322       ['%  Set the color of boxes with shaded background',
1323       '\\@ifundefined{definecolor}{\\usepackage{color}}{}',
1324       "\\definecolor{shadecolor}{rgb}{%s,%s,%s}" % (red, green, blue)])
1325
1326
1327 def revert_lyx_version(document):
1328     " Reverts LyX Version information from Inset Info "
1329     version = "LyX version"
1330     try:
1331         import lyx2lyx_version
1332         version = lyx2lyx_version.version
1333     except:
1334         pass
1335
1336     i = 0
1337     while True:
1338         i = find_token(document.body, '\\begin_inset Info', i)
1339         if i == -1:
1340             return
1341         j = find_end_of_inset(document.body, i + 1)
1342         if j == -1:
1343             document.warning("Malformed LyX document: Could not find end of Info inset.")
1344             i += 1
1345             continue
1346
1347         # We expect:
1348         # \begin_inset Info
1349         # type  "lyxinfo"
1350         # arg   "version"
1351         # \end_inset
1352         typ = get_quoted_value(document.body, "type", i, j)
1353         arg = get_quoted_value(document.body, "arg", i, j)
1354         if arg != "version" or typ != "lyxinfo":
1355             i = j + 1
1356             continue
1357
1358         # We do not actually know the version of LyX used to produce the document.
1359         # But we can use our version, since we are reverting.
1360         s = [version]
1361         # Now we want to check if the line after "\end_inset" is empty. It normally
1362         # is, so we want to remove it, too.
1363         lastline = j + 1
1364         if document.body[j + 1].strip() == "":
1365             lastline = j + 2
1366         document.body[i: lastline] = s
1367         i = i + 1
1368
1369
1370 def revert_math_scale(document):
1371   " Remove math scaling and LaTeX options "
1372   del_token(document.header, '\\html_math_img_scale', 0)
1373   del_token(document.header, '\\html_latex_start', 0)
1374   del_token(document.header, '\\html_latex_end', 0)
1375
1376
1377 def revert_pagesizes(document):
1378   " Revert page sizes to default "
1379   i = find_token(document.header, '\\papersize', 0)
1380   if i != -1:
1381     size = document.header[i][11:]
1382     if size == "a0paper" or size == "a1paper" or size == "a2paper" \
1383     or size == "a6paper" or size == "b0paper" or size == "b1paper" \
1384     or size == "b2paper" or size == "b6paper" or size == "b0j" \
1385     or size == "b1j" or size == "b2j" or size == "b3j" or size == "b4j" \
1386     or size == "b5j" or size == "b6j":
1387       del document.header[i]
1388
1389
1390 def revert_DIN_C_pagesizes(document):
1391   " Revert DIN C page sizes to default "
1392   i = find_token(document.header, '\\papersize', 0)
1393   if i != -1:
1394     size = document.header[i][11:]
1395     if size == "c0paper" or size == "c1paper" or size == "c2paper" \
1396     or size == "c3paper" or size == "c4paper" or size == "c5paper" \
1397     or size == "c6paper":
1398       del document.header[i]
1399
1400
1401 def convert_html_quotes(document):
1402   " Remove quotes around html_latex_start and html_latex_end "
1403
1404   i = find_token(document.header, '\\html_latex_start', 0)
1405   if i != -1:
1406     line = document.header[i]
1407     l = re.compile(r'\\html_latex_start\s+"(.*)"')
1408     m = l.match(line)
1409     if m:
1410       document.header[i] = "\\html_latex_start " + m.group(1)
1411
1412   i = find_token(document.header, '\\html_latex_end', 0)
1413   if i != -1:
1414     line = document.header[i]
1415     l = re.compile(r'\\html_latex_end\s+"(.*)"')
1416     m = l.match(line)
1417     if m:
1418       document.header[i] = "\\html_latex_end " + m.group(1)
1419
1420
1421 def revert_html_quotes(document):
1422   " Remove quotes around html_latex_start and html_latex_end "
1423
1424   i = find_token(document.header, '\\html_latex_start', 0)
1425   if i != -1:
1426     line = document.header[i]
1427     l = re.compile(r'\\html_latex_start\s+(.*)')
1428     m = l.match(line)
1429     if not m:
1430         document.warning("Weird html_latex_start line: " + line)
1431         del document.header[i]
1432     else:
1433         document.header[i] = "\\html_latex_start \"" + m.group(1) + "\""
1434
1435   i = find_token(document.header, '\\html_latex_end', 0)
1436   if i != -1:
1437     line = document.header[i]
1438     l = re.compile(r'\\html_latex_end\s+(.*)')
1439     m = l.match(line)
1440     if not m:
1441         document.warning("Weird html_latex_end line: " + line)
1442         del document.header[i]
1443     else:
1444         document.header[i] = "\\html_latex_end \"" + m.group(1) + "\""
1445
1446
1447 def revert_output_sync(document):
1448   " Remove forward search options "
1449   del_token(document.header, '\\output_sync_macro', 0)
1450   del_token(document.header, '\\output_sync', 0)
1451
1452
1453 def revert_align_decimal(document):
1454   i = 0
1455   while True:
1456     i = find_token(document.body, "\\begin_inset Tabular", i)
1457     if i == -1:
1458       return
1459     j = find_end_of_inset(document.body, i)
1460     if j == -1:
1461       document.warning("Unable to find end of Tabular inset at line " + str(i))
1462       i += 1
1463       continue
1464     cell = find_token(document.body, "<cell", i, j)
1465     if cell == -1:
1466       document.warning("Can't find any cells in Tabular inset at line " + str(i))
1467       i = j
1468       continue
1469     k = i + 1
1470     while True:
1471       k = find_token(document.body, "<column", k, cell)
1472       if k == -1:
1473         return
1474       if document.body[k].find('alignment="decimal"') == -1:
1475         k += 1
1476         continue
1477       remove_option(document.body, k, 'decimal_point')
1478       document.body[k] = \
1479         document.body[k].replace('alignment="decimal"', 'alignment="center"')
1480       k += 1
1481
1482
1483 def convert_optarg(document):
1484   " Convert \\begin_inset OptArg to \\begin_inset Argument "
1485   i = 0
1486   while True:
1487     i = find_token(document.body, '\\begin_inset OptArg', i)
1488     if i == -1:
1489       return
1490     document.body[i] = "\\begin_inset Argument"
1491     i += 1
1492
1493
1494 def revert_argument(document):
1495   " Convert \\begin_inset Argument to \\begin_inset OptArg "
1496   i = 0
1497   while True:
1498     i = find_token(document.body, '\\begin_inset Argument', i)
1499     if i == -1:
1500       return
1501     document.body[i] = "\\begin_inset OptArg"
1502     i += 1
1503
1504
1505 def revert_makebox(document):
1506   " Convert \\makebox to TeX code "
1507   i = 0
1508   while True:
1509     i = find_token(document.body, '\\begin_inset Box', i)
1510     if i == -1:
1511       break
1512     z = find_end_of_inset(document.body, i)
1513     if z == -1:
1514       document.warning("Malformed LyX document: Can't find end of box inset.")
1515       i += 1
1516       continue
1517     blay = find_token(document.body, "\\begin_layout", i, z)
1518     if blay == -1:
1519       document.warning("Malformed LyX document: Can't find layout in box.")
1520       i = z
1521       continue
1522     j = find_token(document.body, 'use_makebox', i)
1523     if j == -1 or j != i +6:
1524       document.warning("Malformed LyX document: Can't find use_makebox statement in box.")
1525       i = z
1526       continue
1527     # delete use_makebox
1528     if not check_token(document.body[i], "\\begin_inset Box Frameless") \
1529       or get_value(document.body, 'use_makebox', j) != 1:
1530         del document.body[j]
1531         i += 1
1532         continue
1533     bend = find_end_of_layout(document.body, blay)
1534     if bend == -1 or bend > z:
1535         document.warning("Malformed LyX document: Can't find end of layout in box.")
1536         i = z
1537         continue
1538     # determine the alignment
1539     align = get_quoted_value(document.body, 'hor_pos', i, blay, "c")
1540     # determine the width
1541     length = get_quoted_value(document.body, 'width', i, blay, "50col%")
1542     length = latex_length(length)[1]
1543     # remove the \end_layout \end_inset pair
1544     document.body[bend:z + 1] = put_cmd_in_ert("}")
1545     subst = "\\makebox[" + length + "][" \
1546       + align + "]{"
1547     document.body[i:blay + 1] = put_cmd_in_ert(subst)
1548     i += 1
1549
1550
1551 def convert_use_makebox(document):
1552   " Adds use_makebox option for boxes "
1553   i = 0
1554   while True:
1555     i = find_token(document.body, '\\begin_inset Box', i)
1556     if i == -1:
1557       return
1558     k = find_token(document.body, 'use_parbox', i)
1559     if k == -1 or k != i + 5:
1560       document.warning("Malformed LyX document: Can't find use_parbox statement in box.")
1561       i += 1
1562       continue
1563     if k == i + 5:
1564       document.body.insert(k + 1, "use_makebox 0")
1565     i += 1
1566
1567
1568 def revert_IEEEtran(document):
1569   " Convert IEEEtran layouts and styles to TeX code "
1570
1571   if document.textclass != "IEEEtran":
1572     return
1573
1574   revert_flex_inset(document.body, "IEEE membership", "\\IEEEmembership")
1575   revert_flex_inset(document.body, "Lowercase", "\\MakeLowercase")
1576
1577   layouts = ("Special Paper Notice", "After Title Text", "Publication ID",
1578              "Page headings", "Biography without photo")
1579   latexcmd = {"Special Paper Notice": "\\IEEEspecialpapernotice",
1580               "After Title Text":     "\\IEEEaftertitletext",
1581               "Publication ID":       "\\IEEEpubid"}
1582   obsoletedby = {"Page headings":            "MarkBoth",
1583                  "Biography without photo":  "BiographyNoPhoto"}
1584
1585   for layout in layouts:
1586     i = 0
1587     while True:
1588         i = find_token(document.body, '\\begin_layout ' + layout, i)
1589         if i == -1:
1590           break
1591         j = find_end_of_layout(document.body, i)
1592         if j == -1:
1593           document.warning("Malformed LyX document: Can't find end of " + layout + " layout.")
1594           i += 1
1595           continue
1596         if layout in list(obsoletedby.keys()):
1597           document.body[i] = "\\begin_layout " + obsoletedby[layout]
1598           i = j
1599           continue
1600         content = lyx2latex(document, document.body[i:j + 1])
1601         add_to_preamble(document, [latexcmd[layout] + "{" + content + "}"])
1602         del document.body[i:j + 1]
1603         # no need to reset i
1604
1605
1606 def convert_prettyref(document):
1607         " Converts prettyref references to neutral formatted refs "
1608         re_ref = re.compile("^\s*reference\s+\"(\w+):(\S+)\"")
1609         nm_ref = re.compile("^\s*name\s+\"(\w+):(\S+)\"")
1610
1611         i = 0
1612         while True:
1613                 i = find_token(document.body, "\\begin_inset CommandInset ref", i)
1614                 if i == -1:
1615                         break
1616                 j = find_end_of_inset(document.body, i)
1617                 if j == -1:
1618                         document.warning("Malformed LyX document: No end of InsetRef!")
1619                         i += 1
1620                         continue
1621                 k = find_token(document.body, "LatexCommand prettyref", i, j)
1622                 if k != -1:
1623                         document.body[k] = "LatexCommand formatted"
1624                 i = j + 1
1625         document.header.insert(-1, "\\use_refstyle 0")
1626
1627
1628 def revert_refstyle(document):
1629         " Reverts neutral formatted refs to prettyref "
1630         re_ref = re.compile("^reference\s+\"(\w+):(\S+)\"")
1631         nm_ref = re.compile("^\s*name\s+\"(\w+):(\S+)\"")
1632
1633         i = 0
1634         while True:
1635                 i = find_token(document.body, "\\begin_inset CommandInset ref", i)
1636                 if i == -1:
1637                         break
1638                 j = find_end_of_inset(document.body, i)
1639                 if j == -1:
1640                         document.warning("Malformed LyX document: No end of InsetRef")
1641                         i += 1
1642                         continue
1643                 k = find_token(document.body, "LatexCommand formatted", i, j)
1644                 if k != -1:
1645                         document.body[k] = "LatexCommand prettyref"
1646                 i = j + 1
1647         i = find_token(document.header, "\\use_refstyle", 0)
1648         if i != -1:
1649                 document.header.pop(i)
1650
1651
1652 def revert_nameref(document):
1653   " Convert namerefs to regular references "
1654   cmds = ["Nameref", "nameref"]
1655   foundone = False
1656   rx = re.compile(r'reference "(.*)"')
1657   for cmd in cmds:
1658     i = 0
1659     oldcmd = "LatexCommand " + cmd
1660     while True:
1661       # It seems better to look for this, as most of the reference
1662       # insets won't be ones we care about.
1663       i = find_token(document.body, oldcmd, i)
1664       if i == -1:
1665         break
1666       cmdloc = i
1667       i += 1
1668       # Make sure it is actually in an inset!
1669       # A normal line could begin with "LatexCommand nameref"!
1670       stins, endins = is_in_inset(document.body, cmdloc,
1671                                   "\\begin_inset CommandInset ref")
1672       if endins == -1:
1673           continue
1674       # ok, so it is in an InsetRef
1675       refline = find_token(document.body, "reference", stins, endins)
1676       if refline == -1:
1677         document.warning("Can't find reference for inset at line " + stinst + "!!")
1678         continue
1679       m = rx.match(document.body[refline])
1680       if not m:
1681         document.warning("Can't match reference line: " + document.body[ref])
1682         continue
1683       foundone = True
1684       ref = m.group(1)
1685       newcontent = put_cmd_in_ert('\\' + cmd + '{' + ref + '}')
1686       document.body[stins:endins + 1] = newcontent
1687
1688   if foundone:
1689     add_to_preamble(document, ["\\usepackage{nameref}"])
1690
1691
1692 def remove_Nameref(document):
1693   " Convert Nameref commands to nameref commands "
1694   i = 0
1695   while True:
1696     # It seems better to look for this, as most of the reference
1697     # insets won't be ones we care about.
1698     i = find_token(document.body, "LatexCommand Nameref" , i)
1699     if i == -1:
1700       break
1701     cmdloc = i
1702     i += 1
1703     # Make sure it is actually in an inset!
1704     val = is_in_inset(document.body, cmdloc,
1705                       "\\begin_inset CommandInset ref", default=False)
1706     if not val:
1707       continue
1708     document.body[cmdloc] = "LatexCommand nameref"
1709
1710
1711 def revert_mathrsfs(document):
1712     " Load mathrsfs if \mathrsfs us use in the document "
1713     i = 0
1714     for line in document.body:
1715       if line.find("\\mathscr{") != -1:
1716         add_to_preamble(document, ["\\usepackage{mathrsfs}"])
1717         return
1718
1719
1720 def convert_flexnames(document):
1721     "Convert \\begin_inset Flex Custom:Style to \\begin_inset Flex Style and similarly for CharStyle and Element."
1722
1723     i = 0
1724     rx = re.compile(r'^\\begin_inset Flex (?:Custom|CharStyle|Element):(.+)$')
1725     while True:
1726       i = find_token(document.body, "\\begin_inset Flex", i)
1727       if i == -1:
1728         return
1729       m = rx.match(document.body[i])
1730       if m:
1731         document.body[i] = "\\begin_inset Flex " + m.group(1)
1732       i += 1
1733
1734
1735 flex_insets = {
1736   "Alert" : "CharStyle:Alert",
1737   "Code" : "CharStyle:Code",
1738   "Concepts" : "CharStyle:Concepts",
1739   "E-Mail" : "CharStyle:E-Mail",
1740   "Emph" : "CharStyle:Emph",
1741   "Expression" : "CharStyle:Expression",
1742   "Initial" : "CharStyle:Initial",
1743   "Institute" : "CharStyle:Institute",
1744   "Meaning" : "CharStyle:Meaning",
1745   "Noun" : "CharStyle:Noun",
1746   "Strong" : "CharStyle:Strong",
1747   "Structure" : "CharStyle:Structure",
1748   "ArticleMode" : "Custom:ArticleMode",
1749   "Endnote" : "Custom:Endnote",
1750   "Glosse" : "Custom:Glosse",
1751   "PresentationMode" : "Custom:PresentationMode",
1752   "Tri-Glosse" : "Custom:Tri-Glosse"
1753 }
1754
1755 flex_elements = {
1756   "Abbrev" : "Element:Abbrev",
1757   "CCC-Code" : "Element:CCC-Code",
1758   "Citation-number" : "Element:Citation-number",
1759   "City" : "Element:City",
1760   "Code" : "Element:Code",
1761   "CODEN" : "Element:CODEN",
1762   "Country" : "Element:Country",
1763   "Day" : "Element:Day",
1764   "Directory" : "Element:Directory",
1765   "Dscr" : "Element:Dscr",
1766   "Email" : "Element:Email",
1767   "Emph" : "Element:Emph",
1768   "Filename" : "Element:Filename",
1769   "Firstname" : "Element:Firstname",
1770   "Fname" : "Element:Fname",
1771   "GuiButton" : "Element:GuiButton",
1772   "GuiMenu" : "Element:GuiMenu",
1773   "GuiMenuItem" : "Element:GuiMenuItem",
1774   "ISSN" : "Element:ISSN",
1775   "Issue-day" : "Element:Issue-day",
1776   "Issue-months" : "Element:Issue-months",
1777   "Issue-number" : "Element:Issue-number",
1778   "KeyCap" : "Element:KeyCap",
1779   "KeyCombo" : "Element:KeyCombo",
1780   "Keyword" : "Element:Keyword",
1781   "Literal" : "Element:Literal",
1782   "MenuChoice" : "Element:MenuChoice",
1783   "Month" : "Element:Month",
1784   "Orgdiv" : "Element:Orgdiv",
1785   "Orgname" : "Element:Orgname",
1786   "Postcode" : "Element:Postcode",
1787   "SS-Code" : "Element:SS-Code",
1788   "SS-Title" : "Element:SS-Title",
1789   "State" : "Element:State",
1790   "Street" : "Element:Street",
1791   "Surname" : "Element:Surname",
1792   "Volume" : "Element:Volume",
1793   "Year" : "Element:Year"
1794 }
1795
1796
1797 def revert_flexnames(document):
1798   if document.backend == "latex":
1799     flexlist = flex_insets
1800   else:
1801     flexlist = flex_elements
1802
1803   rx = re.compile(r'^\\begin_inset Flex\s+(.+)$')
1804   i = 0
1805   while True:
1806     i = find_token(document.body, "\\begin_inset Flex", i)
1807     if i == -1:
1808       return
1809     m = rx.match(document.body[i])
1810     if not m:
1811       document.warning("Illegal flex inset: " + document.body[i])
1812       i += 1
1813       continue
1814     style = m.group(1)
1815     if style in flexlist:
1816       document.body[i] = "\\begin_inset Flex " + flexlist[style]
1817     i += 1
1818
1819
1820 def convert_mathdots(document):
1821     " Load mathdots automatically "
1822     i = find_token(document.header, "\\use_mhchem" , 0)
1823     if i == -1:
1824         i = find_token(document.header, "\\use_esint" , 0)
1825     if i == -1:
1826         document.warning("Malformed LyX document: Can't find \\use_mhchem.")
1827         return;
1828     j = find_token(document.preamble, "\\usepackage{mathdots}", 0)
1829     if j == -1:
1830         document.header.insert(i + 1, "\\use_mathdots 0")
1831     else:
1832         document.header.insert(i + 1, "\\use_mathdots 2")
1833         del document.preamble[j]
1834
1835
1836 def revert_mathdots(document):
1837     " Load mathdots if used in the document "
1838
1839     mathdots = find_token(document.header, "\\use_mathdots" , 0)
1840     if mathdots == -1:
1841       document.warning("No \\use_mathdots line. Assuming auto.")
1842     else:
1843       val = get_value(document.header, "\\use_mathdots", mathdots)
1844       del document.header[mathdots]
1845       try:
1846         usedots = int(val)
1847       except:
1848         document.warning("Invalid \\use_mathdots value: " + val + ". Assuming auto.")
1849         # probably usedots has not been changed, but be safe.
1850         usedots = 1
1851
1852       if usedots == 0:
1853         # do not load case
1854         return
1855       if usedots == 2:
1856         # force load case
1857         add_to_preamble(document, ["\\usepackage{mathdots}"])
1858         return
1859
1860     # so we are in the auto case. we want to load mathdots if \iddots is used.
1861     i = 0
1862     while True:
1863       i = find_token(document.body, '\\begin_inset Formula', i)
1864       if i == -1:
1865         return
1866       j = find_end_of_inset(document.body, i)
1867       if j == -1:
1868         document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(i))
1869         i += 1
1870         continue
1871       code = "\n".join(document.body[i:j])
1872       if code.find("\\iddots") != -1:
1873         add_to_preamble(document, ["\\@ifundefined{iddots}{\\usepackage{mathdots}}"])
1874         return
1875       i = j
1876
1877
1878 def convert_rule(document):
1879     " Convert \\lyxline to CommandInset line. "
1880     i = 0
1881
1882     inset = ['\\begin_inset CommandInset line',
1883       'LatexCommand rule',
1884       'offset "0.5ex"',
1885       'width "100line%"',
1886       'height "1pt"', '',
1887       '\\end_inset', '', '']
1888
1889     # if paragraphs are indented, we may have to unindent to get the
1890     # line to be full-width.
1891     indent = get_value(document.header, "\\paragraph_separation", 0)
1892     have_indent = (indent == "indent")
1893
1894     while True:
1895       i = find_token(document.body, "\\lyxline" , i)
1896       if i == -1:
1897         return
1898
1899       # we need to find out if this line follows other content
1900       # in its paragraph. find its layout....
1901       lastlay = find_token_backwards(document.body, "\\begin_layout", i)
1902       if lastlay == -1:
1903         document.warning("Can't find layout for line at " + str(i))
1904         # do the best we can.
1905         document.body[i:i+1] = inset
1906         i += len(inset)
1907         continue
1908
1909       # ...and look for other content before it.
1910       lineisfirst = True
1911       for line in document.body[lastlay + 1:i]:
1912         # is it empty or a paragraph option?
1913         if not line or line[0] == '\\':
1914           continue
1915         lineisfirst = False
1916         break
1917
1918       if lineisfirst:
1919         document.body[i:i+1] = inset
1920         if indent:
1921           # we need to unindent, lest the line be too long
1922           document.body.insert(lastlay + 1, "\\noindent")
1923         i += len(inset)
1924       else:
1925         # so our line is in the middle of a paragraph
1926         # we need to add a new line, lest this line follow the
1927         # other content on that line and run off the side of the page
1928         document.body[i:i+1] = inset
1929         document.body[i:i] = ["\\begin_inset Newline newline", "\\end_inset", ""]
1930       i += len(inset)
1931
1932
1933 def revert_rule(document):
1934     " Revert line insets to Tex code "
1935     i = 0
1936     while True:
1937       i = find_token(document.body, "\\begin_inset CommandInset line" , i)
1938       if i == -1:
1939         return
1940       # find end of inset
1941       j = find_token(document.body, "\\end_inset" , i)
1942       if j == -1:
1943         document.warning("Malformed LyX document: Can't find end of line inset.")
1944         return
1945       # determine the optional offset
1946       offset = get_quoted_value(document.body, 'offset', i, j)
1947       if offset:
1948         offset = '[' + offset + ']'
1949       # determine the width
1950       width = get_quoted_value(document.body, 'width', i, j, "100col%")
1951       width = latex_length(width)[1]
1952       # determine the height
1953       height = get_quoted_value(document.body, 'height', i, j, "1pt")
1954       height = latex_length(height)[1]
1955       # output the \rule command
1956       subst = "\\rule[" + offset + "]{" + width + "}{" + height + "}"
1957       document.body[i:j + 1] = put_cmd_in_ert(subst)
1958       i += len(subst) - (j - i)
1959
1960
1961 def revert_diagram(document):
1962   " Add the feyn package if \\Diagram is used in math "
1963   i = 0
1964   while True:
1965     i = find_token(document.body, '\\begin_inset Formula', i)
1966     if i == -1:
1967       return
1968     j = find_end_of_inset(document.body, i)
1969     if j == -1:
1970         document.warning("Malformed LyX document: Can't find end of Formula inset.")
1971         return
1972     lines = "\n".join(document.body[i:j])
1973     if lines.find("\\Diagram") == -1:
1974       i = j
1975       continue
1976     add_to_preamble(document, ["\\usepackage{feyn}"])
1977     # only need to do it once!
1978     return
1979
1980 chapters = ("amsbook", "book", "docbook-book", "elsart", "extbook", "extreport",
1981     "jbook", "jreport", "jsbook", "literate-book", "literate-report", "memoir",
1982     "mwbk", "mwrep", "recipebook", "report", "scrbook", "scrreprt", "svmono",
1983     "svmult", "tbook", "treport", "tufte-book")
1984
1985 def convert_bibtex_clearpage(document):
1986   " insert a clear(double)page bibliographystyle if bibtotoc option is used "
1987
1988   if document.textclass not in chapters:
1989     return
1990
1991   i = find_token(document.header, '\\papersides', 0)
1992   sides = 0
1993   if i == -1:
1994     document.warning("Malformed LyX document: Can't find papersides definition.")
1995     document.warning("Assuming single sided.")
1996     sides = 1
1997   else:
1998     val = get_value(document.header, "\\papersides", i)
1999     try:
2000       sides = int(val)
2001     except:
2002       pass
2003     if sides != 1 and sides != 2:
2004       document.warning("Invalid papersides value: " + val)
2005       document.warning("Assuming single sided.")
2006       sides = 1
2007
2008   j = 0
2009   while True:
2010     j = find_token(document.body, "\\begin_inset CommandInset bibtex", j)
2011     if j == -1:
2012       return
2013
2014     k = find_end_of_inset(document.body, j)
2015     if k == -1:
2016       document.warning("Can't find end of Bibliography inset at line " + str(j))
2017       j += 1
2018       continue
2019
2020     # only act if there is the option "bibtotoc"
2021     val = get_value(document.body, 'options', j, k)
2022     if not val:
2023       document.warning("Can't find options for bibliography inset at line " + str(j))
2024       j = k
2025       continue
2026
2027     if val.find("bibtotoc") == -1:
2028       j = k
2029       continue
2030
2031     # so we want to insert a new page right before the paragraph that
2032     # this bibliography thing is in.
2033     lay = find_token_backwards(document.body, "\\begin_layout", j)
2034     if lay == -1:
2035       document.warning("Can't find layout containing bibliography inset at line " + str(j))
2036       j = k
2037       continue
2038
2039     if sides == 1:
2040       cmd = "clearpage"
2041     else:
2042       cmd = "cleardoublepage"
2043     subst = ['\\begin_layout Standard',
2044         '\\begin_inset Newpage ' + cmd,
2045         '\\end_inset', '', '',
2046         '\\end_layout', '']
2047     document.body[lay:lay] = subst
2048     j = k + len(subst)
2049
2050
2051 def check_passthru(document):
2052   tc = document.textclass
2053   ok = (tc == "literate-article" or tc == "literate-book" or tc == "literate-report")
2054   if not ok:
2055     mods = document.get_module_list()
2056     for mod in mods:
2057       if mod == "sweave" or mod == "noweb":
2058         ok = True
2059         break
2060   return ok
2061
2062
2063 def convert_passthru(document):
2064     " http://www.mail-archive.com/lyx-devel@lists.lyx.org/msg161298.html "
2065     if not check_passthru:
2066       return
2067
2068     rx = re.compile("\\\\begin_layout \s*(\w+)")
2069     beg = 0
2070     for lay in ["Chunk", "Scrap"]:
2071       while True:
2072         beg = find_token(document.body, "\\begin_layout " + lay, beg)
2073         if beg == -1:
2074           break
2075         end = find_end_of_layout(document.body, beg)
2076         if end == -1:
2077           document.warning("Can't find end of layout at line " + str(beg))
2078           beg += 1
2079           continue
2080
2081         # we are now going to replace newline insets within this layout
2082         # by new instances of this layout. so we have repeated layouts
2083         # instead of newlines.
2084
2085         # if the paragraph has any customization, however, we do not want to
2086         # do the replacement.
2087         if document.body[beg + 1].startswith("\\"):
2088           beg = end + 1
2089           continue
2090
2091         ns = beg
2092         while True:
2093           ns = find_token(document.body, "\\begin_inset Newline newline", ns, end)
2094           if ns == -1:
2095             break
2096           ne = find_end_of_inset(document.body, ns)
2097           if ne == -1 or ne > end:
2098             document.warning("Can't find end of inset at line " + str(nb))
2099             ns += 1
2100             continue
2101           if document.body[ne + 1] == "":
2102             ne += 1
2103           subst = ["\\end_layout", "", "\\begin_layout " + lay]
2104           document.body[ns:ne + 1] = subst
2105           # now we need to adjust end, in particular, but might as well
2106           # do ns properly, too
2107           newlines = (ne - ns) - len(subst)
2108           ns += newlines + 2
2109           end += newlines + 2
2110
2111         # ok, we now want to find out if the next layout is the
2112         # same as this one. if so, we will insert an extra copy of it
2113         didit = False
2114         next = find_token(document.body, "\\begin_layout", end)
2115         if next != -1:
2116           m = rx.match(document.body[next])
2117           if m:
2118             nextlay = m.group(1)
2119             if nextlay == lay:
2120               subst = ["\\begin_layout " + lay, "", "\\end_layout", ""]
2121               document.body[next:next] = subst
2122               didit = True
2123         beg = end + 1
2124         if didit:
2125           beg += 4 # for the extra layout
2126
2127
2128 def revert_passthru(document):
2129     " http://www.mail-archive.com/lyx-devel@lists.lyx.org/msg161298.html "
2130     if not check_passthru:
2131       return
2132     rx = re.compile("\\\\begin_layout \s*(\w+)")
2133     beg = 0
2134     for lay in ["Chunk", "Scrap"]:
2135       while True:
2136         beg = find_token(document.body, "\\begin_layout " + lay, beg)
2137         if beg == -1:
2138           break
2139         end = find_end_of_layout(document.body, beg)
2140         if end == -1:
2141           document.warning("Can't find end of layout at line " + str(beg))
2142           beg += 1
2143           continue
2144
2145         # we now want to find out if the next layout is the
2146         # same as this one. but we will need to do this over and
2147         # over again.
2148         while True:
2149           next = find_token(document.body, "\\begin_layout", end)
2150           if next == -1:
2151             break
2152           m = rx.match(document.body[next])
2153           if not m:
2154             break
2155           nextlay = m.group(1)
2156           if nextlay != lay:
2157             break
2158           # so it is the same layout again. we now want to know if it is empty.
2159           # but first let's check and make sure there is no content between the
2160           # two layouts. i'm not sure if that can happen or not.
2161           for l in range(end + 1, next):
2162             if document.body[l] != "":
2163               document.warning("Found content between adjacent " + lay + " layouts!")
2164               break
2165           nextend = find_end_of_layout(document.body, next)
2166           if nextend == -1:
2167             document.warning("Can't find end of layout at line " + str(next))
2168             break
2169           empty = True
2170           for l in range(next + 1, nextend):
2171             if document.body[l] != "":
2172               empty = False
2173               break
2174           if empty:
2175             # empty layouts just get removed
2176             # should we check if it's before yet another such layout?
2177             del document.body[next : nextend + 1]
2178             # and we do not want to check again. we know the next layout
2179             # should be another Chunk and should be left as is.
2180             break
2181           else:
2182             # if it's not empty, then we want to insert a newline in place
2183             # of the layout switch
2184             subst = ["\\begin_inset Newline newline", "\\end_inset", ""]
2185             document.body[end : next + 1] = subst
2186             # and now we have to find the end of the new, larger layout
2187             newend = find_end_of_layout(document.body, beg)
2188             if newend == -1:
2189               document.warning("Can't find end of new layout at line " + str(beg))
2190               break
2191             end = newend
2192         beg = end + 1
2193
2194
2195 def revert_multirowOffset(document):
2196     " Revert multirow cells with offset in tables to TeX-code"
2197     # this routine is the same as the revert_multirow routine except that
2198     # it checks additionally for the offset
2199
2200     # first, let's find out if we need to do anything
2201     i = find_token(document.body, '<cell multirow="3" mroffset=', 0)
2202     if i == -1:
2203       return
2204
2205     add_to_preamble(document, ["\\usepackage{multirow}"])
2206
2207     rgx = re.compile(r'mroffset="[^"]+?"')
2208     begin_table = 0
2209
2210     while True:
2211         # find begin/end of table
2212         begin_table = find_token(document.body, '<lyxtabular version=', begin_table)
2213         if begin_table == -1:
2214             break
2215         end_table = find_end_of(document.body, begin_table, '<lyxtabular', '</lyxtabular>')
2216         if end_table == -1:
2217             document.warning("Malformed LyX document: Could not find end of table.")
2218             begin_table += 1
2219             continue
2220         # does this table have multirow?
2221         i = find_token(document.body, '<cell multirow="3"', begin_table, end_table)
2222         if i == -1:
2223             begin_table = end_table
2224             continue
2225
2226         # store the number of rows and columns
2227         numrows = get_option_value(document.body[begin_table], "rows")
2228         numcols = get_option_value(document.body[begin_table], "columns")
2229         try:
2230           numrows = int(numrows)
2231           numcols = int(numcols)
2232         except:
2233           document.warning("Unable to determine rows and columns!")
2234           begin_table = end_table
2235           continue
2236
2237         mrstarts = []
2238         multirows = []
2239         # collect info on rows and columns of this table.
2240         begin_row = begin_table
2241         for row in range(numrows):
2242             begin_row = find_token(document.body, '<row>', begin_row, end_table)
2243             if begin_row == -1:
2244               document.warning("Can't find row " + str(row + 1))
2245               break
2246             end_row = find_end_of(document.body, begin_row, '<row>', '</row>')
2247             if end_row == -1:
2248               document.warning("Can't find end of row " + str(row + 1))
2249               break
2250             begin_cell = begin_row
2251             multirows.append([])
2252             for column in range(numcols):
2253                 begin_cell = find_token(document.body, '<cell ', begin_cell, end_row)
2254                 if begin_cell == -1:
2255                   document.warning("Can't find column " + str(column + 1) + \
2256                     "in row " + str(row + 1))
2257                   break
2258                 # NOTE
2259                 # this will fail if someone puts "</cell>" in a cell, but
2260                 # that seems fairly unlikely.
2261                 end_cell = find_end_of(document.body, begin_cell, '<cell', '</cell>')
2262                 if end_cell == -1:
2263                   document.warning("Can't find end of column " + str(column + 1) + \
2264                     "in row " + str(row + 1))
2265                   break
2266                 multirows[row].append([begin_cell, end_cell, 0])
2267                 if document.body[begin_cell].find('multirow="3" mroffset=') != -1:
2268                   multirows[row][column][2] = 3 # begin multirow
2269                   mrstarts.append([row, column])
2270                 elif document.body[begin_cell].find('multirow="4"') != -1:
2271                   multirows[row][column][2] = 4 # in multirow
2272                 begin_cell = end_cell
2273             begin_row = end_row
2274         # end of table info collection
2275
2276         # work from the back to avoid messing up numbering
2277         mrstarts.reverse()
2278         for m in mrstarts:
2279             row = m[0]
2280             col = m[1]
2281             # get column width
2282             col_width = get_option_value(document.body[begin_table + 2 + col], "width")
2283             # "0pt" means that no width is specified
2284             if not col_width or col_width == "0pt":
2285               col_width = "*"
2286             # determine the number of cells that are part of the multirow
2287             nummrs = 1
2288             for r in range(row + 1, numrows):
2289                 if multirows[r][col][2] != 4:
2290                   break
2291                 nummrs += 1
2292                 # take the opportunity to revert this line
2293                 lineno = multirows[r][col][0]
2294                 document.body[lineno] = document.body[lineno].\
2295                   replace(' multirow="4" ', ' ').\
2296                   replace('valignment="middle"', 'valignment="top"').\
2297                   replace(' topline="true" ', ' ')
2298                 # remove bottom line of previous multirow-part cell
2299                 lineno = multirows[r-1][col][0]
2300                 document.body[lineno] = document.body[lineno].replace(' bottomline="true" ', ' ')
2301             # revert beginning cell
2302             bcell = multirows[row][col][0]
2303             ecell = multirows[row][col][1]
2304             offset = get_option_value(document.body[bcell], "mroffset")
2305             document.body[bcell] = document.body[bcell].\
2306               replace(' multirow="3" ', ' ').\
2307               replace('valignment="middle"', 'valignment="top"')
2308             # remove mroffset option
2309             document.body[bcell] = rgx.sub('', document.body[bcell])
2310
2311             blay = find_token(document.body, "\\begin_layout", bcell, ecell)
2312             if blay == -1:
2313               document.warning("Can't find layout for cell!")
2314               continue
2315             bend = find_end_of_layout(document.body, blay)
2316             if bend == -1:
2317               document.warning("Can't find end of layout for cell!")
2318               continue
2319             # do the later one first, so as not to mess up the numbering
2320             # we are wrapping the whole cell in this ert
2321             # so before the end of the layout...
2322             document.body[bend:bend] = put_cmd_in_ert("}")
2323             # ...and after the beginning
2324             document.body[blay + 1:blay + 1] = \
2325               put_cmd_in_ert("\\multirow{" + str(nummrs) + "}{" + col_width + "}[" \
2326                   + offset + "]{")
2327
2328         # on to the next table
2329         begin_table = end_table
2330
2331
2332 def revert_script(document):
2333     " Convert subscript/superscript inset to TeX code "
2334     i = 0
2335     foundsubscript = False
2336     while True:
2337         i = find_token(document.body, '\\begin_inset script', i)
2338         if i == -1:
2339             break
2340         z = find_end_of_inset(document.body, i)
2341         if z == -1:
2342             document.warning("Malformed LyX document: Can't find end of script inset.")
2343             i += 1
2344             continue
2345         blay = find_token(document.body, "\\begin_layout", i, z)
2346         if blay == -1:
2347             document.warning("Malformed LyX document: Can't find layout in script inset.")
2348             i = z
2349             continue
2350
2351         if check_token(document.body[i], "\\begin_inset script subscript"):
2352             subst = '\\textsubscript{'
2353             foundsubscript = True
2354         elif check_token(document.body[i], "\\begin_inset script superscript"):
2355             subst = '\\textsuperscript{'
2356         else:
2357             document.warning("Malformed LyX document: Unknown type of script inset.")
2358             i = z
2359             continue
2360         bend = find_end_of_layout(document.body, blay)
2361         if bend == -1 or bend > z:
2362             document.warning("Malformed LyX document: Can't find end of layout in script inset.")
2363             i = z
2364             continue
2365         # remove the \end_layout \end_inset pair
2366         document.body[bend:z + 1] = put_cmd_in_ert("}")
2367         document.body[i:blay + 1] = put_cmd_in_ert(subst)
2368         i += 1
2369     # these classes provide a \textsubscript command:
2370     # FIXME: Would be nice if we could use the information of the .layout file here
2371     classes = ["memoir", "scrartcl", "scrbook", "scrlttr2", "scrreprt"]
2372     if foundsubscript and find_token_exact(classes, document.textclass, 0) == -1:
2373         add_to_preamble(document, ['\\usepackage{subscript}'])
2374
2375
2376 def convert_use_xetex(document):
2377     " convert \\use_xetex to \\use_non_tex_fonts "
2378     i = find_token(document.header, "\\use_xetex", 0)
2379     if i == -1:
2380         document.header.insert(-1, "\\use_non_tex_fonts 0")
2381     else:
2382         val = get_value(document.header, "\\use_xetex", 0)
2383         document.header[i] = "\\use_non_tex_fonts " + val
2384
2385
2386 def revert_use_xetex(document):
2387     " revert \\use_non_tex_fonts to \\use_xetex "
2388     i = 0
2389     i = find_token(document.header, "\\use_non_tex_fonts", 0)
2390     if i == -1:
2391         document.warning("Malformed document. No \\use_non_tex_fonts param!")
2392         return
2393
2394     val = get_value(document.header, "\\use_non_tex_fonts", 0)
2395     document.header[i] = "\\use_xetex " + val
2396
2397
2398 def revert_labeling(document):
2399     koma = ("scrartcl", "scrarticle-beamer", "scrbook", "scrlettr",
2400         "scrlttr2", "scrreprt")
2401     if document.textclass in koma:
2402         return
2403     i = 0
2404     while True:
2405         i = find_token_exact(document.body, "\\begin_layout Labeling", i)
2406         if i == -1:
2407             return
2408         document.body[i] = "\\begin_layout List"
2409
2410
2411 def revert_langpack(document):
2412     " revert \\language_package parameter "
2413     i = 0
2414     i = find_token(document.header, "\\language_package", 0)
2415     if i == -1:
2416         document.warning("Malformed document. No \\language_package param!")
2417         return
2418
2419     del document.header[i]
2420
2421
2422 def convert_langpack(document):
2423     " Add \\language_package parameter "
2424     i = find_token(document.header, "\language" , 0)
2425     if i == -1:
2426         document.warning("Malformed document. No \\language defined!")
2427         return
2428
2429     document.header.insert(i + 1, "\\language_package default")
2430
2431
2432 def revert_tabularwidth(document):
2433   i = 0
2434   while True:
2435     i = find_token(document.body, "\\begin_inset Tabular", i)
2436     if i == -1:
2437       return
2438     j = find_end_of_inset(document.body, i)
2439     if j == -1:
2440       document.warning("Unable to find end of Tabular inset at line " + str(i))
2441       i += 1
2442       continue
2443     i += 1
2444     features = find_token(document.body, "<features", i, j)
2445     if features == -1:
2446       document.warning("Can't find any features in Tabular inset at line " + str(i))
2447       i = j
2448       continue
2449     if document.body[features].find('alignment="tabularwidth"') != -1:
2450       remove_option(document.body, features, 'tabularwidth')
2451
2452 def revert_html_css_as_file(document):
2453   if not del_token(document.header, '\\html_css_as_file', 0):
2454     document.warning("Malformed LyX document: Missing \\html_css_as_file.")
2455
2456
2457 ##
2458 # Conversion hub
2459 #
2460
2461 supported_versions = ["2.0.0","2.0"]
2462 convert = [[346, []],
2463            [347, []],
2464            [348, []],
2465            [349, []],
2466            [350, []],
2467            [351, []],
2468            [352, [convert_splitindex]],
2469            [353, []],
2470            [354, []],
2471            [355, [convert_strikeout]],
2472            [356, []],
2473            [357, [convert_ulinelatex]],
2474            [358, []],
2475            [359, [convert_nomencl_width]],
2476            [360, []],
2477            [361, []],
2478            [362, []],
2479            [363, []],
2480            [364, []],
2481            [365, []],
2482            [366, []],
2483            [367, []],
2484            [368, []],
2485            [369, [convert_author_id]],
2486            [370, []],
2487            [371, [convert_mhchem]],
2488            [372, []],
2489            [373, [merge_gbrief]],
2490            [374, []],
2491            [375, []],
2492            [376, []],
2493            [377, []],
2494            [378, []],
2495            [379, [convert_math_output]],
2496            [380, []],
2497            [381, []],
2498            [382, []],
2499            [383, []],
2500            [384, []],
2501            [385, []],
2502            [386, []],
2503            [387, []],
2504            [388, []],
2505            [389, [convert_html_quotes]],
2506            [390, []],
2507            [391, []],
2508            [392, []],
2509            [393, [convert_optarg]],
2510            [394, [convert_use_makebox]],
2511            [395, []],
2512            [396, []],
2513            [397, [remove_Nameref]],
2514            [398, []],
2515            [399, [convert_mathdots]],
2516            [400, [convert_rule]],
2517            [401, []],
2518            [402, [convert_bibtex_clearpage]],
2519            [403, [convert_flexnames]],
2520            [404, [convert_prettyref]],
2521            [405, []],
2522            [406, [convert_passthru]],
2523            [407, []],
2524            [408, []],
2525            [409, [convert_use_xetex]],
2526            [410, []],
2527            [411, [convert_langpack]],
2528            [412, []],
2529            [413, []]
2530 ]
2531
2532 revert =  [[412, [revert_html_css_as_file]],
2533            [411, [revert_tabularwidth]],
2534            [410, [revert_langpack]],
2535            [409, [revert_labeling]],
2536            [408, [revert_use_xetex]],
2537            [407, [revert_script]],
2538            [406, [revert_multirowOffset]],
2539            [405, [revert_passthru]],
2540            [404, []],
2541            [403, [revert_refstyle]],
2542            [402, [revert_flexnames]],
2543            [401, []],
2544            [400, [revert_diagram]],
2545            [399, [revert_rule]],
2546            [398, [revert_mathdots]],
2547            [397, [revert_mathrsfs]],
2548            [396, []],
2549            [395, [revert_nameref]],
2550            [394, [revert_DIN_C_pagesizes]],
2551            [393, [revert_makebox]],
2552            [392, [revert_argument]],
2553            [391, []],
2554            [390, [revert_align_decimal, revert_IEEEtran]],
2555            [389, [revert_output_sync]],
2556            [388, [revert_html_quotes]],
2557            [387, [revert_pagesizes]],
2558            [386, [revert_math_scale]],
2559            [385, [revert_lyx_version]],
2560            [384, [revert_shadedboxcolor]],
2561            [383, [revert_fontcolor]],
2562            [382, [revert_turkmen]],
2563            [381, [revert_notefontcolor]],
2564            [380, [revert_equalspacing_xymatrix]],
2565            [379, [revert_inset_preview]],
2566            [378, [revert_math_output]],
2567            [377, []],
2568            [376, [revert_multirow]],
2569            [375, [revert_includeall]],
2570            [374, [revert_includeonly]],
2571            [373, [revert_html_options]],
2572            [372, [revert_gbrief]],
2573            [371, [revert_fontenc]],
2574            [370, [revert_mhchem]],
2575            [369, [revert_suppress_date]],
2576            [368, [revert_author_id]],
2577            [367, [revert_hspace_glue_lengths]],
2578            [366, [revert_percent_vspace_lengths, revert_percent_hspace_lengths]],
2579            [365, [revert_percent_skip_lengths]],
2580            [364, [revert_paragraph_indentation]],
2581            [363, [revert_branch_filename]],
2582            [362, [revert_longtable_align]],
2583            [361, [revert_applemac]],
2584            [360, []],
2585            [359, [revert_nomencl_cwidth]],
2586            [358, [revert_nomencl_width]],
2587            [357, [revert_custom_processors]],
2588            [356, [revert_ulinelatex]],
2589            [355, []],
2590            [354, [revert_strikeout]],
2591            [353, [revert_printindexall]],
2592            [352, [revert_subindex]],
2593            [351, [revert_splitindex]],
2594            [350, [revert_backgroundcolor]],
2595            [349, [revert_outputformat]],
2596            [348, [revert_xetex]],
2597            [347, [revert_phantom, revert_hphantom, revert_vphantom]],
2598            [346, [revert_tabularvalign]],
2599            [345, [revert_swiss]]
2600           ]
2601
2602
2603 if __name__ == "__main__":
2604     pass