lib/lyx2lyx/lyx_2_0.py

   1 # -*- coding: utf-8 -*-
   2 # This file is part of lyx2lyx
   3 # -*- coding: utf-8 -*-
   4 # Copyright (C) 2008 José Matos  <jamatos@lyx.org>
   5 #
   6 # This program is free software; you can redistribute it and/or
   7 # modify it under the terms of the GNU General Public License
   8 # as published by the Free Software Foundation; either version 2
   9 # of the License, or (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful,
  12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 # GNU General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  19
  20 """ Convert files to the file format generated by lyx 2.0"""
  21
  22 import re, string
  23 import unicodedata
  24 import sys, os
  25
  26 from parser_tools import find_token, find_end_of, find_tokens, get_value, get_value_string
  27
  28 ####################################################################
  29 # Private helper functions
  30
  31 def find_end_of_inset(lines, i):
  32     " Find end of inset, where lines[i] is included."
  33     return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
  34
  35
  36 def add_to_preamble(document, text):
  37     """ Add text to the preamble if it is not already there.
  38     Only the first line is checked!"""
  39
  40     if find_token(document.preamble, text[0], 0) != -1:
  41         return
  42
  43     document.preamble.extend(text)
  44
  45
  46 def insert_to_preamble(index, document, text):
  47     """ Insert text to the preamble at a given line"""
  48
  49     document.preamble.insert(index, text)
  50
  51
  52 def read_unicodesymbols():
  53     " Read the unicodesymbols list of unicode characters and corresponding commands."
  54     pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
  55     fp = open(os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols'))
  56     spec_chars = []
  57     # Two backslashes, followed by some non-word character, and then a character
  58     # in brackets. The idea is to check for constructs like: \"{u}, which is how
  59     # they are written in the unicodesymbols file; but they can also be written
  60     # as: \"u or even \" u.
  61     r = re.compile(r'\\\\(\W)\{(\w)\}')
  62     for line in fp.readlines():
  63         if line[0] != '#' and line.strip() != "":
  64             line=line.replace(' "',' ') # remove all quotation marks with spaces before
  65             line=line.replace('" ',' ') # remove all quotation marks with spaces after
  66             line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis)
  67             try:
  68                 [ucs4,command,dead] = line.split(None,2)
  69                 if command[0:1] != "\\":
  70                     continue
  71                 spec_chars.append([command, unichr(eval(ucs4))])
  72             except:
  73                 continue
  74             m = r.match(command)
  75             if m != None:
  76                 command = "\\\\"
  77                 # If the character is a double-quote, then we need to escape it, too,
  78                 # since it is done that way in the LyX file.
  79                 if m.group(1) == "\"":
  80                     command += "\\"
  81                 commandbl = command
  82                 command += m.group(1) + m.group(2)
  83                 commandbl += m.group(1) + ' ' + m.group(2)
  84                 spec_chars.append([command, unichr(eval(ucs4))])
  85                 spec_chars.append([commandbl, unichr(eval(ucs4))])
  86     fp.close()
  87     return spec_chars
  88
  89
  90 unicode_reps = read_unicodesymbols()
  91
  92
  93 def put_cmd_in_ert(string):
  94     for rep in unicode_reps:
  95         string = string.replace(rep[1], rep[0].replace('\\\\', '\\'))
  96     string = string.replace('\\', "\\backslash\n")
  97     string = "\\begin_inset ERT\nstatus collapsed\n\\begin_layout Plain Layout\n" \
  98       + string + "\n\\end_layout\n\\end_inset"
  99     return string
 100
 101
 102 def lyx2latex(document, lines):
 103     'Convert some LyX stuff into corresponding LaTeX stuff, as best we can.'
 104     # clean up multiline stuff
 105     content = ""
 106     ert_end = 0
 107
 108     for curline in range(len(lines)):
 109       line = lines[curline]
 110       if line.startswith("\\begin_inset ERT"):
 111           # We don't want to replace things inside ERT, so figure out
 112           # where the end of the inset is.
 113           ert_end = find_end_of_inset(lines, curline + 1)
 114           continue
 115       elif line.startswith("\\begin_inset Formula"):
 116           line = line[20:]
 117       elif line.startswith("\\begin_inset Quotes"):
 118           # For now, we do a very basic reversion. Someone who understands
 119           # quotes is welcome to fix it up.
 120           qtype = line[20:].strip()
 121           # lang = qtype[0]
 122           side = qtype[1]
 123           dbls = qtype[2]
 124           if side == "l":
 125               if dbls == "d":
 126                   line = "``"
 127               else:
 128                   line = "`"
 129           else:
 130               if dbls == "d":
 131                   line = "''"
 132               else:
 133                   line = "'"
 134       elif line.isspace() or \
 135             line.startswith("\\begin_layout") or \
 136             line.startswith("\\end_layout") or \
 137             line.startswith("\\begin_inset") or \
 138             line.startswith("\\end_inset") or \
 139             line.startswith("\\lang") or \
 140             line.strip() == "status collapsed" or \
 141             line.strip() == "status open":
 142           #skip all that stuff
 143           continue
 144
 145       # this needs to be added to the preamble because of cases like
 146       # \textmu, \textbackslash, etc.
 147       add_to_preamble(document, ['% added by lyx2lyx for converted index entries',
 148                                  '\\@ifundefined{textmu}',
 149                                  ' {\\usepackage{textcomp}}{}'])
 150       # a lossless reversion is not possible
 151       # try at least to handle some common insets and settings
 152       if ert_end >= curline:
 153           line = line.replace(r'\backslash', r'\\')
 154       else:
 155           line = line.replace('&', '\\&{}')
 156           line = line.replace('#', '\\#{}')
 157           line = line.replace('^', '\\^{}')
 158           line = line.replace('%', '\\%{}')
 159           line = line.replace('_', '\\_{}')
 160           line = line.replace('$', '\\${}')
 161
 162           # Do the LyX text --> LaTeX conversion
 163           for rep in unicode_reps:
 164             line = line.replace(rep[1], rep[0] + "{}")
 165           line = line.replace(r'\backslash', r'\textbackslash{}')
 166           line = line.replace(r'\series bold', r'\bfseries{}').replace(r'\series default', r'\mdseries{}')
 167           line = line.replace(r'\shape italic', r'\itshape{}').replace(r'\shape smallcaps', r'\scshape{}')
 168           line = line.replace(r'\shape slanted', r'\slshape{}').replace(r'\shape default', r'\upshape{}')
 169           line = line.replace(r'\emph on', r'\em{}').replace(r'\emph default', r'\em{}')
 170           line = line.replace(r'\noun on', r'\scshape{}').replace(r'\noun default', r'\upshape{}')
 171           line = line.replace(r'\bar under', r'\underbar{').replace(r'\bar default', r'}')
 172           line = line.replace(r'\family sans', r'\sffamily{}').replace(r'\family default', r'\normalfont{}')
 173           line = line.replace(r'\family typewriter', r'\ttfamily{}').replace(r'\family roman', r'\rmfamily{}')
 174           line = line.replace(r'\InsetSpace ', r'').replace(r'\SpecialChar ', r'')
 175       content += line
 176     return content
 177
 178
 179 def latex_length(string):
 180     'Convert lengths to their LaTeX representation.'
 181     i = 0
 182     percent = False
 183     # the string has the form
 184     # ValueUnit+ValueUnit-ValueUnit or
 185     # ValueUnit+-ValueUnit
 186     # the + and - (glue lengths) are optional
 187     # the + always precedes the -
 188
 189     # Convert relative lengths to LaTeX units
 190     units = {"text%":"\\textwidth", "col%":"\\columnwidth",
 191              "page%":"\\pagewidth", "line%":"\\linewidth",
 192              "theight%":"\\textheight", "pheight%":"\\pageheight"}
 193     for unit in units.keys():
 194         i = string.find(unit)
 195         if i != -1:
 196             percent = True
 197             minus = string.rfind("-", 1, i)
 198             plus = string.rfind("+", 0, i)
 199             latex_unit = units[unit]
 200             if plus == -1 and minus == -1:
 201                 value = string[:i]
 202                 value = str(float(value)/100)
 203                 end = string[i + len(unit):]
 204                 string = value + latex_unit + end
 205             if plus > minus:
 206                 value = string[plus+1:i]
 207                 value = str(float(value)/100)
 208                 begin = string[:plus+1]
 209                 end = string[i+len(unit):]
 210                 string = begin + value + latex_unit + end
 211             if plus < minus:
 212                 value = string[minus+1:i]
 213                 value = str(float(value)/100)
 214                 begin = string[:minus+1]
 215                 string = begin + value + latex_unit
 216
 217     # replace + and -, but only if the - is not the first character
 218     string = string[0] + string[1:].replace("+", " plus ").replace("-", " minus ")
 219     # handle the case where "+-1mm" was used, because LaTeX only understands
 220     # "plus 1mm minus 1mm"
 221     if string.find("plus  minus"):
 222         lastvaluepos = string.rfind(" ")
 223         lastvalue = string[lastvaluepos:]
 224         string = string.replace("  ", lastvalue + " ")
 225     if percent ==  False:
 226         return "False," + string
 227     else:
 228         return "True," + string
 229
 230
 231 ####################################################################
 232
 233
 234 def revert_swiss(document):
 235     " Set language german-ch to ngerman "
 236     i = 0
 237     if document.language == "german-ch":
 238         document.language = "ngerman"
 239         i = find_token(document.header, "\\language", 0)
 240         if i != -1:
 241             document.header[i] = "\\language ngerman"
 242     j = 0
 243     while True:
 244         j = find_token(document.body, "\\lang german-ch", j)
 245         if j == -1:
 246             return
 247         document.body[j] = document.body[j].replace("\\lang german-ch", "\\lang ngerman")
 248         j = j + 1
 249
 250
 251 def revert_tabularvalign(document):
 252    " Revert the tabular valign option "
 253    i = 0
 254    while True:
 255        i = find_token(document.body, "\\begin_inset Tabular", i)
 256        if i == -1:
 257            return
 258        j = find_end_of_inset(document.body, i)
 259        if j == -1:
 260            document.warning("Malformed LyX document: Could not find end of tabular.")
 261            i = j
 262            continue
 263        # don't set a box for longtables, only delete tabularvalignment
 264        # the alignment is 2 lines below \\begin_inset Tabular
 265        p = document.body[i+2].find("islongtable")
 266        if p > -1:
 267            q = document.body[i+2].find("tabularvalignment")
 268            if q > -1:
 269                document.body[i+2] = document.body[i+2][:q-1]
 270                document.body[i+2] = document.body[i+2] + '>'
 271            i = i + 1
 272
 273        # when no longtable
 274        if p == -1:
 275          tabularvalignment = 'c'
 276          # which valignment is specified?
 277          m = document.body[i+2].find('tabularvalignment="top"')
 278          if m > -1:
 279              tabularvalignment = 't'
 280          m = document.body[i+2].find('tabularvalignment="bottom"')
 281          if m > -1:
 282              tabularvalignment = 'b'
 283          # delete tabularvalignment
 284          q = document.body[i+2].find("tabularvalignment")
 285          if q > -1:
 286              document.body[i+2] = document.body[i+2][:q-1]
 287              document.body[i+2] = document.body[i+2] + '>'
 288
 289          # don't add a box when centered
 290          if tabularvalignment == 'c':
 291              i = j
 292              continue
 293          subst = ['\\end_layout', '\\end_inset']
 294          document.body[j+1:j+1] = subst # just inserts those lines
 295          subst = ['\\begin_inset Box Frameless',
 296              'position "' + tabularvalignment +'"',
 297              'hor_pos "c"',
 298              'has_inner_box 1',
 299              'inner_pos "c"',
 300              'use_parbox 0',
 301              # we don't know the width, assume 50%
 302              'width "50col%"',
 303              'special "none"',
 304              'height "1in"',
 305              'height_special "totalheight"',
 306              'status open',
 307              '',
 308              '\\begin_layout Plain Layout']
 309          document.body[i:i] = subst # this just inserts the array at i
 310          i += len(subst) + 2 # adjust i to save a few cycles
 311
 312
 313 def revert_phantom(document):
 314     " Reverts phantom to ERT "
 315     i = 0
 316     j = 0
 317     while True:
 318       i = find_token(document.body, "\\begin_inset Phantom Phantom", i)
 319       if i == -1:
 320           return
 321       substi = document.body[i].replace('\\begin_inset Phantom Phantom', \
 322                 '\\begin_inset ERT\nstatus collapsed\n\n' \
 323                 '\\begin_layout Plain Layout\n\n\n\\backslash\n' \
 324                 'phantom{\n\\end_layout\n\n\\end_inset\n')
 325       substi = substi.split('\n')
 326       document.body[i : i+4] = substi
 327       i += len(substi)
 328       j = find_token(document.body, "\\end_layout", i)
 329       if j == -1:
 330           document.warning("Malformed LyX document: Could not find end of Phantom inset.")
 331           return
 332       substj = document.body[j].replace('\\end_layout', \
 333                 '\\size default\n\n\\begin_inset ERT\nstatus collapsed\n\n' \
 334                 '\\begin_layout Plain Layout\n\n' \
 335                 '}\n\\end_layout\n\n\\end_inset\n')
 336       substj = substj.split('\n')
 337       document.body[j : j+4] = substj
 338       i += len(substj)
 339
 340
 341 def revert_hphantom(document):
 342     " Reverts hphantom to ERT "
 343     i = 0
 344     j = 0
 345     while True:
 346       i = find_token(document.body, "\\begin_inset Phantom HPhantom", i)
 347       if i == -1:
 348           return
 349       substi = document.body[i].replace('\\begin_inset Phantom HPhantom', \
 350                 '\\begin_inset ERT\nstatus collapsed\n\n' \
 351                 '\\begin_layout Plain Layout\n\n\n\\backslash\n' \
 352                 'hphantom{\n\\end_layout\n\n\\end_inset\n')
 353       substi = substi.split('\n')
 354       document.body[i : i+4] = substi
 355       i += len(substi)
 356       j = find_token(document.body, "\\end_layout", i)
 357       if j == -1:
 358           document.warning("Malformed LyX document: Could not find end of HPhantom inset.")
 359           return
 360       substj = document.body[j].replace('\\end_layout', \
 361                 '\\size default\n\n\\begin_inset ERT\nstatus collapsed\n\n' \
 362                 '\\begin_layout Plain Layout\n\n' \
 363                 '}\n\\end_layout\n\n\\end_inset\n')
 364       substj = substj.split('\n')
 365       document.body[j : j+4] = substj
 366       i += len(substj)
 367
 368
 369 def revert_vphantom(document):
 370     " Reverts vphantom to ERT "
 371     i = 0
 372     j = 0
 373     while True:
 374       i = find_token(document.body, "\\begin_inset Phantom VPhantom", i)
 375       if i == -1:
 376           return
 377       substi = document.body[i].replace('\\begin_inset Phantom VPhantom', \
 378                 '\\begin_inset ERT\nstatus collapsed\n\n' \
 379                 '\\begin_layout Plain Layout\n\n\n\\backslash\n' \
 380                 'vphantom{\n\\end_layout\n\n\\end_inset\n')
 381       substi = substi.split('\n')
 382       document.body[i : i+4] = substi
 383       i += len(substi)
 384       j = find_token(document.body, "\\end_layout", i)
 385       if j == -1:
 386           document.warning("Malformed LyX document: Could not find end of VPhantom inset.")
 387           return
 388       substj = document.body[j].replace('\\end_layout', \
 389                 '\\size default\n\n\\begin_inset ERT\nstatus collapsed\n\n' \
 390                 '\\begin_layout Plain Layout\n\n' \
 391                 '}\n\\end_layout\n\n\\end_inset\n')
 392       substj = substj.split('\n')
 393       document.body[j : j+4] = substj
 394       i += len(substj)
 395
 396
 397 def revert_xetex(document):
 398     " Reverts documents that use XeTeX "
 399     i = find_token(document.header, '\\use_xetex', 0)
 400     if i == -1:
 401         document.warning("Malformed LyX document: Missing \\use_xetex.")
 402         return
 403     if get_value(document.header, "\\use_xetex", i) == 'false':
 404         del document.header[i]
 405         return
 406     del document.header[i]
 407     # 1.) set doc encoding to utf8-plain
 408     i = find_token(document.header, "\\inputencoding", 0)
 409     if i == -1:
 410         document.warning("Malformed LyX document: Missing \\inputencoding.")
 411     document.header[i] = "\\inputencoding utf8-plain"
 412     # 2.) check font settings
 413     l = find_token(document.header, "\\font_roman", 0)
 414     if l == -1:
 415         document.warning("Malformed LyX document: Missing \\font_roman.")
 416     line = document.header[l]
 417     l = re.compile(r'\\font_roman (.*)$')
 418     m = l.match(line)
 419     roman = m.group(1)
 420     l = find_token(document.header, "\\font_sans", 0)
 421     if l == -1:
 422         document.warning("Malformed LyX document: Missing \\font_sans.")
 423     line = document.header[l]
 424     l = re.compile(r'\\font_sans (.*)$')
 425     m = l.match(line)
 426     sans = m.group(1)
 427     l = find_token(document.header, "\\font_typewriter", 0)
 428     if l == -1:
 429         document.warning("Malformed LyX document: Missing \\font_typewriter.")
 430     line = document.header[l]
 431     l = re.compile(r'\\font_typewriter (.*)$')
 432     m = l.match(line)
 433     typewriter = m.group(1)
 434     osf = get_value(document.header, '\\font_osf', 0) == "true"
 435     sf_scale = float(get_value(document.header, '\\font_sf_scale', 0))
 436     tt_scale = float(get_value(document.header, '\\font_tt_scale', 0))
 437     # 3.) set preamble stuff
 438     pretext = '%% This document must be processed with xelatex!\n'
 439     pretext += '\\usepackage{fontspec}\n'
 440     if roman != "default":
 441         pretext += '\\setmainfont[Mapping=tex-text]{' + roman + '}\n'
 442     if sans != "default":
 443         pretext += '\\setsansfont['
 444         if sf_scale != 100:
 445             pretext += 'Scale=' + str(sf_scale / 100) + ','
 446         pretext += 'Mapping=tex-text]{' + sans + '}\n'
 447     if typewriter != "default":
 448         pretext += '\\setmonofont'
 449         if tt_scale != 100:
 450             pretext += '[Scale=' + str(tt_scale / 100) + ']'
 451         pretext += '{' + typewriter + '}\n'
 452     if osf:
 453         pretext += '\\defaultfontfeatures{Numbers=OldStyle}\n'
 454     pretext += '\usepackage{xunicode}\n'
 455     pretext += '\usepackage{xltxtra}\n'
 456     insert_to_preamble(0, document, pretext)
 457     # 4.) reset font settings
 458     i = find_token(document.header, "\\font_roman", 0)
 459     if i == -1:
 460         document.warning("Malformed LyX document: Missing \\font_roman.")
 461     document.header[i] = "\\font_roman default"
 462     i = find_token(document.header, "\\font_sans", 0)
 463     if i == -1:
 464         document.warning("Malformed LyX document: Missing \\font_sans.")
 465     document.header[i] = "\\font_sans default"
 466     i = find_token(document.header, "\\font_typewriter", 0)
 467     if i == -1:
 468         document.warning("Malformed LyX document: Missing \\font_typewriter.")
 469     document.header[i] = "\\font_typewriter default"
 470     i = find_token(document.header, "\\font_osf", 0)
 471     if i == -1:
 472         document.warning("Malformed LyX document: Missing \\font_osf.")
 473     document.header[i] = "\\font_osf false"
 474     i = find_token(document.header, "\\font_sc", 0)
 475     if i == -1:
 476         document.warning("Malformed LyX document: Missing \\font_sc.")
 477     document.header[i] = "\\font_sc false"
 478     i = find_token(document.header, "\\font_sf_scale", 0)
 479     if i == -1:
 480         document.warning("Malformed LyX document: Missing \\font_sf_scale.")
 481     document.header[i] = "\\font_sf_scale 100"
 482     i = find_token(document.header, "\\font_tt_scale", 0)
 483     if i == -1:
 484         document.warning("Malformed LyX document: Missing \\font_tt_scale.")
 485     document.header[i] = "\\font_tt_scale 100"
 486
 487
 488 def revert_outputformat(document):
 489     " Remove default output format param "
 490     i = find_token(document.header, '\\default_output_format', 0)
 491     if i == -1:
 492         document.warning("Malformed LyX document: Missing \\default_output_format.")
 493         return
 494     del document.header[i]
 495
 496
 497 def revert_backgroundcolor(document):
 498     " Reverts background color to preamble code "
 499     i = 0
 500     colorcode = ""
 501     while True:
 502       i = find_token(document.header, "\\backgroundcolor", i)
 503       if i == -1:
 504           return
 505       colorcode = get_value(document.header, '\\backgroundcolor', 0)
 506       del document.header[i]
 507       # don't clutter the preamble if backgroundcolor is not set
 508       if colorcode == "#ffffff":
 509           continue
 510       # the color code is in the form #rrggbb where every character denotes a hex number
 511       # convert the string to an int
 512       red = string.atoi(colorcode[1:3],16)
 513       # we want the output "0.5" for the value "127" therefore add here
 514       if red != 0:
 515           red = red + 1
 516       redout = float(red) / 256
 517       green = string.atoi(colorcode[3:5],16)
 518       if green != 0:
 519           green = green + 1
 520       greenout = float(green) / 256
 521       blue = string.atoi(colorcode[5:7],16)
 522       if blue != 0:
 523           blue = blue + 1
 524       blueout = float(blue) / 256
 525       # write the preamble
 526       insert_to_preamble(0, document,
 527                            '% Commands inserted by lyx2lyx to set the background color\n'
 528                            + '\\@ifundefined{definecolor}{\\usepackage{color}}{}\n'
 529                            + '\\definecolor{page_backgroundcolor}{rgb}{'
 530                            + str(redout) + ', ' + str(greenout)
 531                            + ', ' + str(blueout) + '}\n'
 532                            + '\\pagecolor{page_backgroundcolor}\n')
 533
 534
 535 def revert_splitindex(document):
 536     " Reverts splitindex-aware documents "
 537     i = find_token(document.header, '\\use_indices', 0)
 538     if i == -1:
 539         document.warning("Malformed LyX document: Missing \\use_indices.")
 540         return
 541     indices = get_value(document.header, "\\use_indices", i)
 542     preamble = ""
 543     if indices == "true":
 544          preamble += "\\usepackage{splitidx}\n"
 545     del document.header[i]
 546     i = 0
 547     while True:
 548         i = find_token(document.header, "\\index", i)
 549         if i == -1:
 550             break
 551         k = find_token(document.header, "\\end_index", i)
 552         if k == -1:
 553             document.warning("Malformed LyX document: Missing \\end_index.")
 554             return
 555         line = document.header[i]
 556         l = re.compile(r'\\index (.*)$')
 557         m = l.match(line)
 558         iname = m.group(1)
 559         ishortcut = get_value(document.header, '\\shortcut', i, k)
 560         if ishortcut != "" and indices == "true":
 561             preamble += "\\newindex[" + iname + "]{" + ishortcut + "}\n"
 562         del document.header[i:k+1]
 563         i = 0
 564     if preamble != "":
 565         insert_to_preamble(0, document, preamble)
 566     i = 0
 567     while True:
 568         i = find_token(document.body, "\\begin_inset Index", i)
 569         if i == -1:
 570             break
 571         line = document.body[i]
 572         l = re.compile(r'\\begin_inset Index (.*)$')
 573         m = l.match(line)
 574         itype = m.group(1)
 575         if itype == "idx" or indices == "false":
 576             document.body[i] = "\\begin_inset Index"
 577         else:
 578             k = find_end_of_inset(document.body, i)
 579             if k == -1:
 580                  return
 581             content = lyx2latex(document, document.body[i:k])
 582             # escape quotes
 583             content = content.replace('"', r'\"')
 584             subst = [put_cmd_in_ert("\\sindex[" + itype + "]{" + content + "}")]
 585             document.body[i:k+1] = subst
 586         i = i + 1
 587     i = 0
 588     while True:
 589         i = find_token(document.body, "\\begin_inset CommandInset index_print", i)
 590         if i == -1:
 591             return
 592         k = find_end_of_inset(document.body, i)
 593         ptype = get_value(document.body, 'type', i, k).strip('"')
 594         if ptype == "idx":
 595             j = find_token(document.body, "type", i, k)
 596             del document.body[j]
 597         elif indices == "false":
 598             del document.body[i:k+1]
 599         else:
 600             subst = [put_cmd_in_ert("\\printindex[" + ptype + "]{}")]
 601             document.body[i:k+1] = subst
 602         i = i + 1
 603
 604
 605 def convert_splitindex(document):
 606     " Converts index and printindex insets to splitindex-aware format "
 607     i = 0
 608     while True:
 609         i = find_token(document.body, "\\begin_inset Index", i)
 610         if i == -1:
 611             break
 612         document.body[i] = document.body[i].replace("\\begin_inset Index",
 613             "\\begin_inset Index idx")
 614         i = i + 1
 615     i = 0
 616     while True:
 617         i = find_token(document.body, "\\begin_inset CommandInset index_print", i)
 618         if i == -1:
 619             return
 620         if document.body[i + 1].find('LatexCommand printindex') == -1:
 621             document.warning("Malformed LyX document: Incomplete printindex inset.")
 622             return
 623         subst = ["LatexCommand printindex",
 624             "type \"idx\""]
 625         document.body[i + 1:i + 2] = subst
 626         i = i + 1
 627
 628
 629 def revert_subindex(document):
 630     " Reverts \\printsubindex CommandInset types "
 631     i = find_token(document.header, '\\use_indices', 0)
 632     if i == -1:
 633         document.warning("Malformed LyX document: Missing \\use_indices.")
 634         return
 635     indices = get_value(document.header, "\\use_indices", i)
 636     i = 0
 637     while True:
 638         i = find_token(document.body, "\\begin_inset CommandInset index_print", i)
 639         if i == -1:
 640             return
 641         k = find_end_of_inset(document.body, i)
 642         ctype = get_value(document.body, 'LatexCommand', i, k)
 643         if ctype != "printsubindex":
 644             i = i + 1
 645             continue
 646         ptype = get_value(document.body, 'type', i, k).strip('"')
 647         if indices == "false":
 648             del document.body[i:k+1]
 649         else:
 650             subst = [put_cmd_in_ert("\\printsubindex[" + ptype + "]{}")]
 651             document.body[i:k+1] = subst
 652         i = i + 1
 653
 654
 655 def revert_printindexall(document):
 656     " Reverts \\print[sub]index* CommandInset types "
 657     i = find_token(document.header, '\\use_indices', 0)
 658     if i == -1:
 659         document.warning("Malformed LyX document: Missing \\use_indices.")
 660         return
 661     indices = get_value(document.header, "\\use_indices", i)
 662     i = 0
 663     while True:
 664         i = find_token(document.body, "\\begin_inset CommandInset index_print", i)
 665         if i == -1:
 666             return
 667         k = find_end_of_inset(document.body, i)
 668         ctype = get_value(document.body, 'LatexCommand', i, k)
 669         if ctype != "printindex*" and ctype != "printsubindex*":
 670             i = i + 1
 671             continue
 672         if indices == "false":
 673             del document.body[i:k+1]
 674         else:
 675             subst = [put_cmd_in_ert("\\" + ctype + "{}")]
 676             document.body[i:k+1] = subst
 677         i = i + 1
 678
 679
 680 def revert_strikeout(document):
 681     " Reverts \\strikeout character style "
 682     while True:
 683         i = find_token(document.body, '\\strikeout', 0)
 684         if i == -1:
 685             return
 686         del document.body[i]
 687
 688
 689 def revert_uulinewave(document):
 690     " Reverts \\uuline, and \\uwave character styles "
 691     while True:
 692         i = find_token(document.body, '\\uuline', 0)
 693         if i == -1:
 694             break
 695         del document.body[i]
 696     while True:
 697         i = find_token(document.body, '\\uwave', 0)
 698         if i == -1:
 699             return
 700         del document.body[i]
 701
 702
 703 def revert_ulinelatex(document):
 704     " Reverts \\uline character style "
 705     i = find_token(document.body, '\\bar under', 0)
 706     if i == -1:
 707         return
 708     insert_to_preamble(0, document,
 709             '% Commands inserted by lyx2lyx for proper underlining\n'
 710             + '\\PassOptionsToPackage{normalem}{ulem}\n'
 711             + '\\usepackage{ulem}\n'
 712             + '\\let\\cite@rig\\cite\n'
 713             + '\\newcommand{\\b@xcite}[2][\\%]{\\def\\def@pt{\\%}\\def\\pas@pt{#1}\n'
 714             + '  \\mbox{\\ifx\\def@pt\\pas@pt\\cite@rig{#2}\\else\\cite@rig[#1]{#2}\\fi}}\n'
 715             + '\\renewcommand{\\underbar}[1]{{\\let\\cite\\b@xcite\\uline{#1}}}\n')
 716
 717
 718 def revert_custom_processors(document):
 719     " Remove bibtex_command and index_command params "
 720     i = find_token(document.header, '\\bibtex_command', 0)
 721     if i == -1:
 722         document.warning("Malformed LyX document: Missing \\bibtex_command.")
 723         return
 724     del document.header[i]
 725     i = find_token(document.header, '\\index_command', 0)
 726     if i == -1:
 727         document.warning("Malformed LyX document: Missing \\index_command.")
 728         return
 729     del document.header[i]
 730
 731
 732 def convert_nomencl_width(document):
 733     " Add set_width param to nomencl_print "
 734     i = 0
 735     while True:
 736       i = find_token(document.body, "\\begin_inset CommandInset nomencl_print", i)
 737       if i == -1:
 738         break
 739       document.body.insert(i + 2, "set_width \"none\"")
 740       i = i + 1
 741
 742
 743 def revert_nomencl_width(document):
 744     " Remove set_width param from nomencl_print "
 745     i = 0
 746     while True:
 747       i = find_token(document.body, "\\begin_inset CommandInset nomencl_print", i)
 748       if i == -1:
 749         break
 750       j = find_end_of_inset(document.body, i)
 751       l = find_token(document.body, "set_width", i, j)
 752       if l == -1:
 753             document.warning("Can't find set_width option for nomencl_print!")
 754             i = j
 755             continue
 756       del document.body[l]
 757       i = i + 1
 758
 759
 760 def revert_nomencl_cwidth(document):
 761     " Remove width param from nomencl_print "
 762     i = 0
 763     while True:
 764       i = find_token(document.body, "\\begin_inset CommandInset nomencl_print", i)
 765       if i == -1:
 766         break
 767       j = find_end_of_inset(document.body, i)
 768       l = find_token(document.body, "width", i, j)
 769       if l == -1:
 770             document.warning("Can't find width option for nomencl_print!")
 771             i = j
 772             continue
 773       width = get_value(document.body, "width", i, j).strip('"')
 774       del document.body[l]
 775       add_to_preamble(document, ["\\setlength{\\nomlabelwidth}{" + width + "}"])
 776       i = i + 1
 777
 778
 779 def revert_applemac(document):
 780     " Revert applemac encoding to auto "
 781     i = 0
 782     if document.encoding == "applemac":
 783         document.encoding = "auto"
 784         i = find_token(document.header, "\\encoding", 0)
 785         if i != -1:
 786             document.header[i] = "\\encoding auto"
 787
 788
 789 def revert_longtable_align(document):
 790     " Remove longtable alignment setting "
 791     i = 0
 792     j = 0
 793     while True:
 794       i = find_token(document.body, "\\begin_inset Tabular", i)
 795       if i == -1:
 796           break
 797       # the alignment is 2 lines below \\begin_inset Tabular
 798       j = document.body[i+2].find("longtabularalignment")
 799       if j == -1:
 800           break
 801       document.body[i+2] = document.body[i+2][:j-1]
 802       document.body[i+2] = document.body[i+2] + '>'
 803       i = i + 1
 804
 805
 806 def revert_branch_filename(document):
 807     " Remove \\filename_suffix parameter from branches "
 808     i = 0
 809     while True:
 810         i = find_token(document.header, "\\filename_suffix", i)
 811         if i == -1:
 812             return
 813         del document.header[i]
 814
 815
 816 def revert_paragraph_indentation(document):
 817     " Revert custom paragraph indentation to preamble code "
 818     i = 0
 819     while True:
 820       i = find_token(document.header, "\\paragraph_indentation", i)
 821       if i == -1:
 822           break
 823       # only remove the preamble line if default
 824       # otherwise also write the value to the preamble
 825       length = get_value(document.header, "\\paragraph_indentation", i)
 826       if length == "default":
 827           del document.header[i]
 828           break
 829       else:
 830           # handle percent lengths
 831           # latex_length returns "bool,length"
 832           length = latex_length(length).split(",")[1]
 833           add_to_preamble(document, ["% this command was inserted by lyx2lyx"])
 834           add_to_preamble(document, ["\\setlength{\\parindent}{" + length + "}"])
 835           del document.header[i]
 836       i = i + 1
 837
 838
 839 def revert_percent_skip_lengths(document):
 840     " Revert relative lengths for paragraph skip separation to preamble code "
 841     i = 0
 842     while True:
 843       i = find_token(document.header, "\\defskip", i)
 844       if i == -1:
 845           break
 846       length = get_value(document.header, "\\defskip", i)
 847       # only revert when a custom length was set and when
 848       # it used a percent length
 849       if length not in ('smallskip', 'medskip', 'bigskip'):
 850           # handle percent lengths
 851           length = latex_length(length)
 852           # latex_length returns "bool,length"
 853           percent = length.split(",")[0]
 854           length = length.split(",")[1]
 855           if percent == "True":
 856               add_to_preamble(document, ["% this command was inserted by lyx2lyx"])
 857               add_to_preamble(document, ["\\setlength{\\parskip}{" + length + "}"])
 858               # set defskip to medskip as default
 859               document.header[i] = "\\defskip medskip"
 860       i = i + 1
 861
 862
 863 def revert_percent_vspace_lengths(document):
 864     " Revert relative VSpace lengths to ERT "
 865     i = 0
 866     while True:
 867       i = find_token(document.body, "\\begin_inset VSpace", i)
 868       if i == -1:
 869           break
 870       # only revert if a custom length was set and if
 871       # it used a percent length
 872       line = document.body[i]
 873       r = re.compile(r'\\begin_inset VSpace (.*)$')
 874       m = r.match(line)
 875       length = m.group(1)
 876       if length not in ('defskip', 'smallskip', 'medskip', 'bigskip', 'vfill'):
 877           # check if the space has a star (protected space)
 878           protected = (document.body[i].rfind("*") != -1)
 879           if protected:
 880               length = length.rstrip('*')
 881           # handle percent lengths
 882           length = latex_length(length)
 883           # latex_length returns "bool,length"
 884           percent = length.split(",")[0]
 885           length = length.split(",")[1]
 886           # revert the VSpace inset to ERT
 887           if percent == "True":
 888               if protected:
 889                   subst = [put_cmd_in_ert("\\vspace*{" + length + "}")]
 890               else:
 891                   subst = [put_cmd_in_ert("\\vspace{" + length + "}")]
 892               document.body[i:i+2] = subst
 893       i = i + 1
 894
 895
 896 def revert_percent_hspace_lengths(document):
 897     " Revert relative HSpace lengths to ERT "
 898     i = 0
 899     while True:
 900       i = find_token(document.body, "\\begin_inset space \\hspace", i)
 901       if i == -1:
 902           break
 903       protected = (document.body[i].find("\\hspace*{}") != -1)
 904       # only revert if a custom length was set and if
 905       # it used a percent length
 906       length = get_value(document.body, '\\length', i+1)
 907       if length == '':
 908           document.warning("Malformed lyx document: Missing '\\length' in Space inset.")
 909           return
 910       # handle percent lengths
 911       length = latex_length(length)
 912       # latex_length returns "bool,length"
 913       percent = length.split(",")[0]
 914       length = length.split(",")[1]
 915       # revert the HSpace inset to ERT
 916       if percent == "True":
 917           if protected:
 918               subst = [put_cmd_in_ert("\\hspace*{" + length + "}")]
 919           else:
 920               subst = [put_cmd_in_ert("\\hspace{" + length + "}")]
 921           document.body[i:i+3] = subst
 922       i = i + 2
 923
 924
 925 def revert_hspace_glue_lengths(document):
 926     " Revert HSpace glue lengths to ERT "
 927     i = 0
 928     while True:
 929       i = find_token(document.body, "\\begin_inset space \\hspace", i)
 930       if i == -1:
 931           break
 932       protected = (document.body[i].find("\\hspace*{}") != -1)
 933       length = get_value(document.body, '\\length', i+1)
 934       if length == '':
 935           document.warning("Malformed lyx document: Missing '\\length' in Space inset.")
 936           return
 937       # only revert if the length contains a plus or minus at pos != 0
 938       glue  = re.compile(r'.+[\+-]')
 939       if glue.search(length):
 940           # handle percent lengths
 941           # latex_length returns "bool,length"
 942           length = latex_length(length).split(",")[1]
 943           # revert the HSpace inset to ERT
 944           if protected:
 945               subst = [put_cmd_in_ert("\\hspace*{" + length + "}")]
 946           else:
 947               subst = [put_cmd_in_ert("\\hspace{" + length + "}")]
 948           document.body[i:i+3] = subst
 949       i = i + 2
 950
 951 def convert_author_id(document):
 952     " Add the author_id to the \\author definition and make sure 0 is not used"
 953     i = 0
 954     j = 1
 955     while True:
 956         i = find_token(document.header, "\\author", i)
 957         if i == -1:
 958             break
 959
 960         r = re.compile(r'(\\author) (\".*\")\s?(.*)$')
 961         m = r.match(document.header[i])
 962         if m != None:
 963             name = m.group(2)
 964
 965             email = ''
 966             if m.lastindex == 3:
 967                 email = m.group(3)
 968             document.header[i] = "\\author %i %s %s" % (j, name, email)
 969         j = j + 1
 970         i = i + 1
 971
 972     k = 0
 973     while True:
 974         k = find_token(document.body, "\\change_", k)
 975         if k == -1:
 976             break
 977
 978         change = document.body[k].split(' ');
 979         if len(change) == 3:
 980             type = change[0]
 981             author_id = int(change[1])
 982             time = change[2]
 983             document.body[k] = "%s %i %s" % (type, author_id + 1, time)
 984         k = k + 1
 985
 986 def revert_author_id(document):
 987     " Remove the author_id from the \\author definition "
 988     i = 0
 989     j = 0
 990     idmap = dict()
 991     while True:
 992         i = find_token(document.header, "\\author", i)
 993         if i == -1:
 994             break
 995
 996         r = re.compile(r'(\\author) (\d+) (\".*\")\s?(.*)$')
 997         m = r.match(document.header[i])
 998         if m != None:
 999             author_id = int(m.group(2))
1000             idmap[author_id] = j
1001             name = m.group(3)
1002
1003             email = ''
1004             if m.lastindex == 4:
1005                 email = m.group(4)
1006             document.header[i] = "\\author %s %s" % (name, email)
1007         i = i + 1
1008         j = j + 1
1009
1010     k = 0
1011     while True:
1012         k = find_token(document.body, "\\change_", k)
1013         if k == -1:
1014             break
1015
1016         change = document.body[k].split(' ');
1017         if len(change) == 3:
1018             type = change[0]
1019             author_id = int(change[1])
1020             time = change[2]
1021             document.body[k] = "%s %i %s" % (type, idmap[author_id], time)
1022         k = k + 1
1023
1024
1025 def revert_suppress_date(document):
1026     " Revert suppressing of default document date to preamble code "
1027     i = 0
1028     while True:
1029       i = find_token(document.header, "\\suppress_date", i)
1030       if i == -1:
1031           break
1032       # remove the preamble line and write to the preamble
1033       # when suppress_date was true
1034       date = get_value(document.header, "\\suppress_date", i)
1035       if date == "true":
1036           add_to_preamble(document, ["% this command was inserted by lyx2lyx"])
1037           add_to_preamble(document, ["\\date{}"])
1038       del document.header[i]
1039       i = i + 1
1040
1041
1042 def revert_mhchem(document):
1043     "Revert mhchem loading to preamble code"
1044     i = 0
1045     j = 0
1046     k = 0
1047     i = find_token(document.header, "\\use_mhchem 1", 0)
1048     if i != -1:
1049         mhchem = "auto"
1050     else:
1051         i = find_token(document.header, "\\use_mhchem 2", 0)
1052         if i != -1:
1053             mhchem = "on"
1054     if mhchem == "auto":
1055         j = find_token(document.body, "\\cf{", 0)
1056         if j != -1:
1057             mhchem = "on"
1058         else:
1059             j = find_token(document.body, "\\ce{", 0)
1060             if j != -1:
1061                 mhchem = "on"
1062     if mhchem == "on":
1063         add_to_preamble(document, ["% this command was inserted by lyx2lyx"])
1064         add_to_preamble(document, ["\\PassOptionsToPackage{version=3}{mhchem}"])
1065         add_to_preamble(document, ["\\usepackage{mhchem}"])
1066     k = find_token(document.header, "\\use_mhchem", 0)
1067     if k == -1:
1068         document.warning("Malformed LyX document: Could not find mhchem setting.")
1069         return
1070     del document.header[k]
1071
1072
1073 def revert_fontenc(document):
1074     " Remove fontencoding param "
1075     i = find_token(document.header, '\\fontencoding', 0)
1076     if i == -1:
1077         document.warning("Malformed LyX document: Missing \\fontencoding.")
1078         return
1079     del document.header[i]
1080
1081
1082 def merge_gbrief(document):
1083     " Merge g-brief-en and g-brief-de to one class "
1084
1085     if document.textclass != "g-brief-de":
1086         if document.textclass == "g-brief-en":
1087             document.textclass = "g-brief"
1088             document.set_textclass()
1089         return
1090
1091     obsoletedby = { "Brieftext":       "Letter",
1092                     "Unterschrift":    "Signature",
1093                     "Strasse":         "Street",
1094                     "Zusatz":          "Addition",
1095                     "Ort":             "Town",
1096                     "Land":            "State",
1097                     "RetourAdresse":   "ReturnAddress",
1098                     "MeinZeichen":     "MyRef",
1099                     "IhrZeichen":      "YourRef",
1100                     "IhrSchreiben":    "YourMail",
1101                     "Telefon":         "Phone",
1102                     "BLZ":             "BankCode",
1103                     "Konto":           "BankAccount",
1104                     "Postvermerk":     "PostalComment",
1105                     "Adresse":         "Address",
1106                     "Datum":           "Date",
1107                     "Betreff":         "Reference",
1108                     "Anrede":          "Opening",
1109                     "Anlagen":         "Encl.",
1110                     "Verteiler":       "cc",
1111                     "Gruss":           "Closing"}
1112     i = 0
1113     while 1:
1114         i = find_token(document.body, "\\begin_layout", i)
1115         if i == -1:
1116             break
1117
1118         layout = document.body[i][14:]
1119         if layout in obsoletedby:
1120             document.body[i] = "\\begin_layout " + obsoletedby[layout]
1121
1122         i += 1
1123
1124     document.textclass = "g-brief"
1125     document.set_textclass()
1126
1127
1128 def revert_gbrief(document):
1129     " Revert g-brief to g-brief-en "
1130     if document.textclass == "g-brief":
1131         document.textclass = "g-brief-en"
1132         document.set_textclass()
1133
1134
1135 def revert_html_options(document):
1136     " Remove html options "
1137     i = find_token(document.header, '\\html_use_mathml', 0)
1138     if i != -1:
1139         del document.header[i]
1140     i = find_token(document.header, '\\html_be_strict', 0)
1141     if i != -1:
1142         del document.header[i]
1143
1144
1145 def revert_includeonly(document):
1146     i = 0
1147     while True:
1148         i = find_token(document.header, "\\begin_includeonly", i)
1149         if i == -1:
1150             return
1151         j = find_end_of(document.header, i, "\\begin_includeonly", "\\end_includeonly")
1152         if j == -1:
1153             # this should not happen
1154             break
1155         document.header[i : j + 1] = []
1156
1157
1158 def revert_includeall(document):
1159     " Remove maintain_unincluded_children param "
1160     i = find_token(document.header, '\\maintain_unincluded_children', 0)
1161     if i != -1:
1162         del document.header[i]
1163
1164
1165 ##
1166 # Conversion hub
1167 #
1168
1169 supported_versions = ["2.0.0","2.0"]
1170 convert = [[346, []],
1171            [347, []],
1172            [348, []],
1173            [349, []],
1174            [350, []],
1175            [351, []],
1176            [352, [convert_splitindex]],
1177            [353, []],
1178            [354, []],
1179            [355, []],
1180            [356, []],
1181            [357, []],
1182            [358, []],
1183            [359, [convert_nomencl_width]],
1184            [360, []],
1185            [361, []],
1186            [362, []],
1187            [363, []],
1188            [364, []],
1189            [365, []],
1190            [366, []],
1191            [367, []],
1192            [368, []],
1193            [369, [convert_author_id]],
1194            [370, []],
1195            [371, []],
1196            [372, []],
1197            [373, [merge_gbrief]],
1198            [374, []],
1199            [375, []],
1200            [376, []]
1201           ]
1202
1203 revert =  [[375, [revert_includeall]],
1204            [374, [revert_includeonly]],
1205            [373, [revert_html_options]],
1206            [372, [revert_gbrief]],
1207            [371, [revert_fontenc]],
1208            [370, [revert_mhchem]],
1209            [369, [revert_suppress_date]],
1210            [368, [revert_author_id]],
1211            [367, [revert_hspace_glue_lengths]],
1212            [366, [revert_percent_vspace_lengths, revert_percent_hspace_lengths]],
1213            [365, [revert_percent_skip_lengths]],
1214            [364, [revert_paragraph_indentation]],
1215            [363, [revert_branch_filename]],
1216            [362, [revert_longtable_align]],
1217            [361, [revert_applemac]],
1218            [360, []],
1219            [359, [revert_nomencl_cwidth]],
1220            [358, [revert_nomencl_width]],
1221            [357, [revert_custom_processors]],
1222            [356, [revert_ulinelatex]],
1223            [355, [revert_uulinewave]],
1224            [354, [revert_strikeout]],
1225            [353, [revert_printindexall]],
1226            [352, [revert_subindex]],
1227            [351, [revert_splitindex]],
1228            [350, [revert_backgroundcolor]],
1229            [349, [revert_outputformat]],
1230            [348, [revert_xetex]],
1231            [347, [revert_phantom, revert_hphantom, revert_vphantom]],
1232            [346, [revert_tabularvalign]],
1233            [345, [revert_swiss]]
1234           ]
1235
1236
1237 if __name__ == "__main__":
1238     pass