lib/lyx2lyx/lyx_1_4.py

   1 # This file is part of lyx2lyx
   2 # -*- coding: utf-8 -*-
   3 # Copyright (C) 2002 Dekel Tsur <dekel@lyx.org>
   4 # Copyright (C) 2002-2004 José Matos <jamatos@lyx.org>
   5 # Copyright (C) 2004-2005 Georg Baum <Georg.Baum@post.rwth-aachen.de>
   6 #
   7 # This program is free software; you can redistribute it and/or
   8 # modify it under the terms of the GNU General Public License
   9 # as published by the Free Software Foundation; either version 2
  10 # of the License, or (at your option) any later version.
  11 #
  12 # This program is distributed in the hope that it will be useful,
  13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 # GNU General Public License for more details.
  16 #
  17 # You should have received a copy of the GNU General Public License
  18 # along with this program; if not, write to the Free Software
  19 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
  20
  21 """ Convert files to the file format generated by lyx 1.4"""
  22
  23 import re
  24 from os import access, F_OK
  25 import os.path
  26 from parser_tools import check_token, find_token, \
  27                          get_value, is_nonempty_line, \
  28                          find_tokens, find_end_of, find_beginning_of, find_token_exact, find_tokens_exact, \
  29                          find_re, find_tokens_backwards
  30 from sys import stdin
  31
  32 from lyx_0_12 import update_latexaccents
  33
  34 ####################################################################
  35 # Private helper functions
  36
  37 def get_layout(line, default_layout):
  38     " Get layout, if empty return the default layout."
  39     tokens = line.split()
  40     if len(tokens) > 1:
  41         return tokens[1]
  42     return default_layout
  43
  44
  45 def get_paragraph(lines, i, format):
  46     "Finds the paragraph that contains line i."
  47
  48     if format < 225:
  49         begin_layout = "\\layout"
  50     else:
  51         begin_layout = "\\begin_layout"
  52     while i != -1:
  53         i = find_tokens_backwards(lines, ["\\end_inset", begin_layout], i)
  54         if i == -1: return -1
  55         if check_token(lines[i], begin_layout):
  56             return i
  57         i = find_beginning_of_inset(lines, i)
  58     return -1
  59
  60
  61 def find_beginning_of_inset(lines, i):
  62     " Find beginning of inset, where lines[i] is included."
  63     return find_beginning_of(lines, i, "\\begin_inset", "\\end_inset")
  64
  65
  66 def get_next_paragraph(lines, i, format):
  67     "Finds the paragraph after the paragraph that contains line i."
  68
  69     if format < 225:
  70         tokens = ["\\begin_inset", "\\layout", "\\end_float", "\\the_end"]
  71     elif format < 236:
  72         tokens = ["\\begin_inset", "\\begin_layout", "\\end_float", "\\end_document"]
  73     else:
  74         tokens = ["\\begin_inset", "\\begin_layout", "\\end_float", "\\end_body", "\\end_document"]
  75     while i != -1:
  76         i = find_tokens(lines, tokens, i)
  77         if not check_token(lines[i], "\\begin_inset"):
  78             return i
  79         i = find_end_of_inset(lines, i)
  80     return -1
  81
  82
  83 def find_end_of_inset(lines, i):
  84     "Finds the matching \end_inset"
  85     return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
  86
  87 def del_token(lines, token, start, end):
  88     """ del_token(lines, token, start, end) -> int
  89
  90     Find the lower line in lines where token is the first element and
  91     delete that line.
  92
  93     Returns the number of lines remaining."""
  94
  95     k = find_token_exact(lines, token, start, end)
  96     if k == -1:
  97         return end
  98     else:
  99         del lines[k]
 100         return end - 1
 101
 102 # End of helper functions
 103 ####################################################################
 104
 105 def remove_color_default(document):
 106     " Remove \color default"
 107     i = 0
 108     while True:
 109         i = find_token(document.body, "\\color default", i)
 110         if i == -1:
 111             return
 112         document.body[i] = document.body[i].replace("\\color default",
 113                                                     "\\color inherit")
 114
 115
 116 def add_end_header(document):
 117     " Add \end_header"
 118     document.header.append("\\end_header");
 119
 120
 121 def rm_end_header(document):
 122     " Remove \end_header"
 123     i = find_token(document.header, "\\end_header", 0)
 124     if i == -1:
 125         return
 126     del document.header[i]
 127
 128
 129 def convert_amsmath(document):
 130     " Convert \\use_amsmath"
 131     i = find_token(document.header, "\\use_amsmath", 0)
 132     if i == -1:
 133         document.warning("Malformed LyX document: Missing '\\use_amsmath'.")
 134         return
 135     tokens = document.header[i].split()
 136     if len(tokens) != 2:
 137         document.warning("Malformed LyX document: Could not parse line '%s'." % document.header[i])
 138         use_amsmath = '0'
 139     else:
 140         use_amsmath = tokens[1]
 141     # old: 0 == off, 1 == on
 142     # new: 0 == off, 1 == auto, 2 == on
 143     # translate off -> auto, since old format 'off' means auto in reality
 144     if use_amsmath == '0':
 145         document.header[i] = "\\use_amsmath 1"
 146     else:
 147         document.header[i] = "\\use_amsmath 2"
 148
 149
 150 def revert_amsmath(document):
 151     " Revert \\use_amsmath"
 152     i = find_token(document.header, "\\use_amsmath", 0)
 153     if i == -1:
 154         document.warning("Malformed LyX document: Missing '\\use_amsmath'.")
 155         return
 156     tokens = document.header[i].split()
 157     if len(tokens) != 2:
 158         document.warning("Malformed LyX document: Could not parse line '%s'." % document.header[i])
 159         use_amsmath = '0'
 160     else:
 161         use_amsmath = tokens[1]
 162     # old: 0 == off, 1 == on
 163     # new: 0 == off, 1 == auto, 2 == on
 164     # translate auto -> off, since old format 'off' means auto in reality
 165     if use_amsmath == '2':
 166         document.header[i] = "\\use_amsmath 1"
 167     else:
 168         document.header[i] = "\\use_amsmath 0"
 169
 170
 171 def convert_spaces(document):
 172     " \SpecialChar ~ -> \InsetSpace ~"
 173     for i in range(len(document.body)):
 174         document.body[i] = document.body[i].replace("\\SpecialChar ~",
 175                                                     "\\InsetSpace ~")
 176
 177
 178 def revert_spaces(document):
 179     " \InsetSpace ~ -> \SpecialChar ~"
 180     regexp = re.compile(r'(.*)(\\InsetSpace\s+)(\S+)')
 181     i = 0
 182     while True:
 183         i = find_re(document.body, regexp, i)
 184         if i == -1:
 185             break
 186         space = regexp.match(document.body[i]).group(3)
 187         prepend = regexp.match(document.body[i]).group(1)
 188         if space == '~':
 189             document.body[i] = regexp.sub(prepend + '\\SpecialChar ~', document.body[i])
 190             i = i + 1
 191         else:
 192             document.body[i] = regexp.sub(prepend, document.body[i])
 193             document.body[i+1:i+1] = ''
 194             if space == "\\space":
 195                 space = "\\ "
 196             i = insert_ert(document.body, i+1, 'Collapsed', space, document.format - 1, document.default_layout)
 197
 198
 199 def rename_spaces(document):
 200     """ \InsetSpace \, -> \InsetSpace \thinspace{}
 201         \InsetSpace \space -> \InsetSpace \space{}"""
 202     for i in range(len(document.body)):
 203         document.body[i] = document.body[i].replace("\\InsetSpace \\space",
 204                                                     "\\InsetSpace \\space{}")
 205         document.body[i] = document.body[i].replace("\\InsetSpace \,",
 206                                                     "\\InsetSpace \\thinspace{}")
 207
 208
 209 def revert_space_names(document):
 210     """ \InsetSpace \thinspace{} -> \InsetSpace \,
 211          \InsetSpace \space{} -> \InsetSpace \space"""
 212     for i in range(len(document.body)):
 213         document.body[i] = document.body[i].replace("\\InsetSpace \\space{}",
 214                                                     "\\InsetSpace \\space")
 215         document.body[i] = document.body[i].replace("\\InsetSpace \\thinspace{}",
 216                                                     "\\InsetSpace \\,")
 217
 218
 219 def lyx_support_escape(lab):
 220     " Equivalent to pre-unicode lyx::support::escape()"
 221     hexdigit = ['0', '1', '2', '3', '4', '5', '6', '7',
 222                 '8', '9', 'A', 'B', 'C', 'D', 'E', 'F']
 223     enc = ""
 224     for c in lab:
 225         o = ord(c)
 226         if o >= 128 or c == '=' or c == '%':
 227             enc = enc + '='
 228             enc = enc + hexdigit[o >> 4]
 229             enc = enc + hexdigit[o & 15]
 230         else:
 231             enc = enc + c
 232     return enc;
 233
 234
 235 def revert_eqref(document):
 236     "\\begin_inset LatexCommand \\eqref -> ERT"
 237     regexp = re.compile(r'^\\begin_inset\s+LatexCommand\s+\\eqref')
 238     i = 0
 239     while True:
 240         i = find_re(document.body, regexp, i)
 241         if i == -1:
 242             break
 243         eqref = lyx_support_escape(regexp.sub("", document.body[i]))
 244         document.body[i:i+1] = ["\\begin_inset ERT", "status Collapsed", "",
 245                             '\\layout %s' % document.default_layout, "", "\\backslash ",
 246                             "eqref" + eqref]
 247         i = i + 7
 248
 249
 250 def convert_bibtex(document):
 251     " Convert BibTeX changes."
 252     for i in range(len(document.body)):
 253         document.body[i] = document.body[i].replace("\\begin_inset LatexCommand \\BibTeX",
 254                                                     "\\begin_inset LatexCommand \\bibtex")
 255
 256
 257 def revert_bibtex(document):
 258     " Revert BibTeX changes."
 259     for i in range(len(document.body)):
 260         document.body[i] = document.body[i].replace("\\begin_inset LatexCommand \\bibtex",
 261                                                     "\\begin_inset LatexCommand \\BibTeX")
 262
 263
 264 def remove_insetparent(document):
 265     " Remove \lyxparent"
 266     i = 0
 267     while True:
 268         i = find_token(document.body, "\\begin_inset LatexCommand \\lyxparent", i)
 269         if i == -1:
 270             break
 271         del document.body[i:i+3]
 272
 273
 274 def convert_external(document):
 275     " Convert inset External."
 276     external_rexp = re.compile(r'\\begin_inset External ([^,]*),"([^"]*)",')
 277     external_header = "\\begin_inset External"
 278     i = 0
 279     while True:
 280         i = find_token(document.body, external_header, i)
 281         if i == -1:
 282             break
 283         look = external_rexp.search(document.body[i])
 284         args = ['','']
 285         if look:
 286             args[0] = look.group(1)
 287             args[1] = look.group(2)
 288         #FIXME: if the previous search fails then warn
 289
 290         if args[0] == "RasterImage":
 291             # Convert a RasterImage External Inset to a Graphics Inset.
 292             top = "\\begin_inset Graphics"
 293             if args[1]:
 294                 filename = "\tfilename " + args[1]
 295             document.body[i:i+1] = [top, filename]
 296             i = i + 1
 297         else:
 298             # Convert the old External Inset format to the new.
 299             top = external_header
 300             template = "\ttemplate " + args[0]
 301             if args[1]:
 302                 filename = "\tfilename " + args[1]
 303                 document.body[i:i+1] = [top, template, filename]
 304                 i = i + 2
 305             else:
 306                 document.body[i:i+1] = [top, template]
 307                 i = i + 1
 308
 309
 310 def revert_external_1(document):
 311     " Revert inset External."
 312     external_header = "\\begin_inset External"
 313     i = 0
 314     while True:
 315         i = find_token(document.body, external_header, i)
 316         if i == -1:
 317             break
 318
 319         template = document.body[i+1].split()
 320         template.reverse()
 321         del document.body[i+1]
 322
 323         filename = document.body[i+1].split()
 324         filename.reverse()
 325         del document.body[i+1]
 326
 327         params = document.body[i+1].split()
 328         params.reverse()
 329         if document.body[i+1]: del document.body[i+1]
 330
 331         document.body[i] = document.body[i] + " " + template[0]+ ', "' + filename[0] + '", " '+ " ".join(params[1:]) + '"'
 332         i = i + 1
 333
 334
 335 def revert_external_2(document):
 336     " Revert inset External. (part II)"
 337     draft_token = '\tdraft'
 338     i = 0
 339     while True:
 340         i = find_token(document.body, '\\begin_inset External', i)
 341         if i == -1:
 342             break
 343         j = find_end_of_inset(document.body, i + 1)
 344         if j == -1:
 345             #this should not happen
 346             break
 347         k = find_token(document.body, draft_token, i+1, j-1)
 348         if (k != -1 and len(draft_token) == len(document.body[k])):
 349             del document.body[k]
 350         i = j + 1
 351
 352
 353 def convert_comment(document):
 354     " Convert \\layout comment"
 355     i = 0
 356     comment = "\\layout Comment"
 357     while True:
 358         i = find_token(document.body, comment, i)
 359         if i == -1:
 360             return
 361
 362         document.body[i:i+1] = ['\\layout %s' % document.default_layout,"","",
 363                         "\\begin_inset Comment",
 364                         "collapsed true","",
 365                         '\\layout %s' % document.default_layout]
 366         i = i + 7
 367
 368         while True:
 369                 old_i = i
 370                 i = find_token(document.body, "\\layout", i)
 371                 if i == -1:
 372                     i = len(document.body) - 1
 373                     document.body[i:i] = ["\\end_inset","",""]
 374                     return
 375
 376                 j = find_token(document.body, '\\begin_deeper', old_i, i)
 377                 if j == -1: j = i + 1
 378                 k = find_token(document.body, '\\begin_inset', old_i, i)
 379                 if k == -1: k = i + 1
 380
 381                 if j < i and j < k:
 382                     i = j
 383                     del document.body[i]
 384                     i = find_end_of( document.body, i, "\\begin_deeper","\\end_deeper")
 385                     if i == -1:
 386                         #This case should not happen
 387                         #but if this happens deal with it greacefully adding
 388                         #the missing \end_deeper.
 389                         i = len(document.body) - 1
 390                         document.body[i:i] = ["\\end_deeper",""]
 391                         return
 392                     else:
 393                         del document.body[i]
 394                         continue
 395
 396                 if k < i:
 397                     i = k
 398                     i = find_end_of( document.body, i, "\\begin_inset","\\end_inset")
 399                     if i == -1:
 400                         #This case should not happen
 401                         #but if this happens deal with it greacefully adding
 402                         #the missing \end_inset.
 403                         i = len(document.body) - 1
 404                         document.body[i:i] = ["\\end_inset","","","\\end_inset","",""]
 405                         return
 406                     else:
 407                         i = i + 1
 408                         continue
 409
 410                 if document.body[i].find(comment) == -1:
 411                     document.body[i:i] = ["\\end_inset"]
 412                     i = i + 1
 413                     break
 414                 document.body[i:i+1] = ['\\layout %s' % document.default_layout]
 415                 i = i + 1
 416
 417
 418 def revert_comment(document):
 419     " Revert comments"
 420     i = 0
 421     while True:
 422         i = find_tokens(document.body, ["\\begin_inset Comment", "\\begin_inset Greyedout"], i)
 423
 424         if i == -1:
 425             return
 426         document.body[i] = "\\begin_inset Note"
 427         i = i + 1
 428
 429
 430 def add_end_layout(document):
 431     " Add \end_layout"
 432     i = find_token(document.body, '\\layout', 0)
 433
 434     if i == -1:
 435         return
 436
 437     i = i + 1
 438     struct_stack = ["\\layout"]
 439
 440     while True:
 441         i = find_tokens(document.body, ["\\begin_inset", "\\end_inset", "\\layout",
 442                                 "\\begin_deeper", "\\end_deeper", "\\the_end"], i)
 443
 444         if i != -1:
 445             token = document.body[i].split()[0]
 446         else:
 447             document.warning("Truncated document.")
 448             i = len(document.body)
 449             document.body.insert(i, '\\the_end')
 450             token = ""
 451
 452         if token == "\\begin_inset":
 453             struct_stack.append(token)
 454             i = i + 1
 455             continue
 456
 457         if token == "\\end_inset":
 458             tail = struct_stack.pop()
 459             if tail == "\\layout":
 460                 document.body.insert(i,"")
 461                 document.body.insert(i,"\\end_layout")
 462                 i = i + 2
 463                 #Check if it is the correct tag
 464                 struct_stack.pop()
 465             i = i + 1
 466             continue
 467
 468         if token == "\\layout":
 469             tail = struct_stack.pop()
 470             if tail == token:
 471                 document.body.insert(i,"")
 472                 document.body.insert(i,"\\end_layout")
 473                 i = i + 3
 474             else:
 475                 struct_stack.append(tail)
 476                 i = i + 1
 477             struct_stack.append(token)
 478             continue
 479
 480         if token == "\\begin_deeper":
 481             document.body.insert(i,"")
 482             document.body.insert(i,"\\end_layout")
 483             i = i + 3
 484             # consecutive begin_deeper only insert one end_layout
 485             while document.body[i].startswith('\\begin_deeper'):
 486                 i += 1
 487             struct_stack.append(token)
 488             continue
 489
 490         if token == "\\end_deeper":
 491             if struct_stack[-1] == '\\layout':
 492                 document.body.insert(i, '\\end_layout')
 493                 i = i + 1
 494                 struct_stack.pop()
 495             i = i + 1
 496             continue
 497
 498         #case \end_document
 499         document.body.insert(i, "")
 500         document.body.insert(i, "\\end_layout")
 501         return
 502
 503
 504 def rm_end_layout(document):
 505     " Remove \end_layout"
 506     i = 0
 507     while True:
 508         i = find_token(document.body, '\\end_layout', i)
 509
 510         if i == -1:
 511             return
 512
 513         del document.body[i]
 514
 515
 516 def insert_tracking_changes(document):
 517     " Handle change tracking keywords."
 518     i = find_token(document.header, "\\tracking_changes", 0)
 519     if i == -1:
 520         document.header.append("\\tracking_changes 0")
 521
 522
 523 def rm_tracking_changes(document):
 524     " Remove change tracking keywords."
 525     i = find_token(document.header, "\\author", 0)
 526     if i != -1:
 527         del document.header[i]
 528
 529     i = find_token(document.header, "\\tracking_changes", 0)
 530     if i == -1:
 531         return
 532     del document.header[i]
 533
 534
 535 def rm_body_changes(document):
 536     " Remove body changes."
 537     i = 0
 538     while True:
 539         i = find_token(document.body, "\\change_", i)
 540         if i == -1:
 541             return
 542
 543         del document.body[i]
 544
 545
 546 def layout2begin_layout(document):
 547     " \layout -> \begin_layout "
 548     i = 0
 549     while True:
 550         i = find_token(document.body, '\\layout', i)
 551         if i == -1:
 552             return
 553
 554         document.body[i] = document.body[i].replace('\\layout', '\\begin_layout')
 555         i = i + 1
 556
 557
 558 def begin_layout2layout(document):
 559     " \begin_layout -> \layout "
 560     i = 0
 561     while True:
 562         i = find_token(document.body, '\\begin_layout', i)
 563         if i == -1:
 564             return
 565
 566         document.body[i] = document.body[i].replace('\\begin_layout', '\\layout')
 567         i = i + 1
 568
 569
 570 def convert_valignment_middle(body, start, end):
 571     'valignment="center" -> valignment="middle"'
 572     for i in range(start, end):
 573         if re.search('^<(column|cell) .*valignment="center".*>$', body[i]):
 574             body[i] = body[i].replace('valignment="center"', 'valignment="middle"')
 575
 576
 577 def convert_table_valignment_middle(document):
 578     " Convert table  valignment, center -> middle"
 579     regexp = re.compile(r'^\\begin_inset\s+Tabular')
 580     i = 0
 581     while True:
 582         i = find_re(document.body, regexp, i)
 583         if i == -1:
 584             return
 585         j = find_end_of_inset(document.body, i + 1)
 586         if j == -1:
 587             #this should not happen
 588             convert_valignment_middle(document.body, i + 1, len(document.body))
 589             return
 590         convert_valignment_middle(document.body, i + 1, j)
 591         i = j + 1
 592
 593
 594 def revert_table_valignment_middle(body, start, end):
 595     " valignment, middle -> center"
 596     for i in range(start, end):
 597         if re.search('^<(column|cell) .*valignment="middle".*>$', body[i]):
 598             body[i] = body[i].replace('valignment="middle"', 'valignment="center"')
 599
 600
 601 def revert_valignment_middle(document):
 602     " Convert table  valignment, middle -> center"
 603     regexp = re.compile(r'^\\begin_inset\s+Tabular')
 604     i = 0
 605     while True:
 606         i = find_re(document.body, regexp, i)
 607         if i == -1:
 608             return
 609         j = find_end_of_inset(document.body, i + 1)
 610         if j == -1:
 611             #this should not happen
 612             revert_table_valignment_middle(document.body, i + 1, len(document.body))
 613             return
 614         revert_table_valignment_middle(document.body, i + 1, j)
 615         i = j + 1
 616
 617
 618 def convert_end_document(document):
 619     "\\the_end -> \\end_document"
 620     i = find_token(document.body, "\\the_end", 0)
 621     if i == -1:
 622         document.body.append("\\end_document")
 623         return
 624     document.body[i] = "\\end_document"
 625
 626
 627 def revert_end_document(document):
 628     "\\end_document -> \\the_end"
 629     i = find_token(document.body, "\\end_document", 0)
 630     if i == -1:
 631         document.body.append("\\the_end")
 632         return
 633     document.body[i] = "\\the_end"
 634
 635
 636 def convert_breaks(document):
 637     r"""
 638 Convert line and page breaks
 639  Old:
 640 \layout Standard
 641 \line_top \line_bottom \pagebreak_top \pagebreak_bottom \added_space_top xxx \added_space_bottom yyy
 642 0
 643
 644  New:
 645 \begin layout Standard
 646
 647 \newpage
 648
 649 \lyxline
 650 \begin_inset ERT
 651 \begin layout Standard
 652 \backslash
 653 vspace{-1\backslash
 654 parskip}
 655 \end_layout
 656 \end_inset
 657
 658 \begin_inset VSpace xxx
 659 \end_inset
 660
 661 0
 662
 663 \begin_inset VSpace xxx
 664 \end_inset
 665 \lyxline
 666
 667 \newpage
 668
 669 \end_layout
 670     """
 671     par_params = ('added_space_bottom', 'added_space_top', 'align',
 672                  'labelwidthstring', 'line_bottom', 'line_top', 'noindent',
 673                  'pagebreak_bottom', 'pagebreak_top', 'paragraph_spacing',
 674                  'start_of_appendix')
 675     font_attributes = ['\\family', '\\series', '\\shape', '\\emph',
 676                        '\\numeric', '\\bar', '\\noun', '\\color', '\\lang']
 677     attribute_values = ['default', 'default', 'default', 'default',
 678                         'default', 'default', 'default', 'none', document.language]
 679     i = 0
 680     while True:
 681         i = find_token(document.body, "\\begin_layout", i)
 682         if i == -1:
 683             return
 684         layout = get_layout(document.body[i], document.default_layout)
 685         i = i + 1
 686
 687         # Merge all paragraph parameters into a single line
 688         # We cannot check for '\\' only because paragraphs may start e.g.
 689         # with '\\backslash'
 690         while document.body[i + 1][:1] == '\\' and document.body[i + 1][1:].split()[0] in par_params:
 691             document.body[i] = document.body[i + 1] + ' ' + document.body[i]
 692             del document.body[i+1]
 693
 694         line_top   = document.body[i].find("\\line_top")
 695         line_bot   = document.body[i].find("\\line_bottom")
 696         pb_top     = document.body[i].find("\\pagebreak_top")
 697         pb_bot     = document.body[i].find("\\pagebreak_bottom")
 698         vspace_top = document.body[i].find("\\added_space_top")
 699         vspace_bot = document.body[i].find("\\added_space_bottom")
 700
 701         if line_top == -1 and line_bot == -1 and pb_bot == -1 and pb_top == -1 and vspace_top == -1 and vspace_bot == -1:
 702             continue
 703
 704         # Do we have a nonstandard paragraph? We need to create new paragraphs
 705         # if yes to avoid putting lyxline etc. inside of special environments.
 706         # This is wrong for itemize and enumerate environments, but it is
 707         # impossible to convert these correctly.
 708         # We want to avoid new paragraphs if possible becauase we want to
 709         # inherit font sizes.
 710         nonstandard = 0
 711         if (not document.is_default_layout(layout) or
 712             document.body[i].find("\\align") != -1 or
 713             document.body[i].find("\\labelwidthstring") != -1 or
 714             document.body[i].find("\\noindent") != -1):
 715             nonstandard = 1
 716
 717         # get the font size of the beginning of this paragraph, since we need
 718         # it for the lyxline inset
 719         j = i + 1
 720         while not is_nonempty_line(document.body[j]):
 721             j = j + 1
 722         size_top = ""
 723         if document.body[j].find("\\size") != -1:
 724             size_top = document.body[j].split()[1]
 725
 726         for tag in "\\line_top", "\\line_bottom", "\\pagebreak_top", "\\pagebreak_bottom":
 727             document.body[i] = document.body[i].replace(tag, "")
 728
 729         if vspace_top != -1:
 730             # the position could be change because of the removal of other
 731             # paragraph properties above
 732             vspace_top = document.body[i].find("\\added_space_top")
 733             tmp_list = document.body[i][vspace_top:].split()
 734             vspace_top_value = tmp_list[1]
 735             document.body[i] = document.body[i][:vspace_top] + " ".join(tmp_list[2:])
 736
 737         if vspace_bot != -1:
 738             # the position could be change because of the removal of other
 739             # paragraph properties above
 740             vspace_bot = document.body[i].find("\\added_space_bottom")
 741             tmp_list = document.body[i][vspace_bot:].split()
 742             vspace_bot_value = tmp_list[1]
 743             document.body[i] = document.body[i][:vspace_bot] + " ".join(tmp_list[2:])
 744
 745         document.body[i] = document.body[i].strip()
 746         i = i + 1
 747
 748         # Create an empty paragraph or paragraph fragment for line and
 749         # page break that belong above the paragraph
 750         if pb_top !=-1 or line_top != -1 or vspace_top != -1:
 751
 752             paragraph_above = list()
 753             if nonstandard:
 754                 # We need to create an extra paragraph for nonstandard environments
 755                 paragraph_above = ['\\begin_layout %s' % document.default_layout, '']
 756
 757             if pb_top != -1:
 758                 paragraph_above.extend(['\\newpage ',''])
 759
 760             if vspace_top != -1:
 761                 paragraph_above.extend(['\\begin_inset VSpace ' + vspace_top_value,'\\end_inset','',''])
 762
 763             if line_top != -1:
 764                 if size_top != '':
 765                     paragraph_above.extend(['\\size ' + size_top + ' '])
 766                 # We need an additional vertical space of -\parskip.
 767                 # We can't use the vspace inset because it does not know \parskip.
 768                 paragraph_above.extend(['\\lyxline ', '', ''])
 769                 insert_ert(paragraph_above, len(paragraph_above) - 1, 'Collapsed',
 770                            '\\vspace{-1\\parskip}\n', document.format + 1, document.default_layout)
 771                 paragraph_above.extend([''])
 772
 773             if nonstandard:
 774                 paragraph_above.extend(['\\end_layout ',''])
 775                 # insert new paragraph above the current paragraph
 776                 document.body[i-2:i-2] = paragraph_above
 777             else:
 778                 # insert new lines at the beginning of the current paragraph
 779                 document.body[i:i] = paragraph_above
 780
 781             i = i + len(paragraph_above)
 782
 783         # Ensure that nested style are converted later.
 784         k = find_end_of(document.body, i, "\\begin_layout", "\\end_layout")
 785
 786         if k == -1:
 787             return
 788
 789         if pb_bot !=-1 or line_bot != -1 or vspace_bot != -1:
 790
 791             # get the font size of the end of this paragraph
 792             size_bot = size_top
 793             j = i + 1
 794             while j < k:
 795                 if document.body[j].find("\\size") != -1:
 796                     size_bot = document.body[j].split()[1]
 797                     j = j + 1
 798                 elif document.body[j].find("\\begin_inset") != -1:
 799                     # skip insets
 800                     j = find_end_of_inset(document.body, j)
 801                 else:
 802                     j = j + 1
 803
 804             paragraph_below = list()
 805             if nonstandard:
 806                 # We need to create an extra paragraph for nonstandard environments
 807                 paragraph_below = ['', '\\begin_layout %s' % document.default_layout, '']
 808             else:
 809                 for a in range(len(font_attributes)):
 810                     if find_token(document.body, font_attributes[a], i, k) != -1:
 811                         paragraph_below.extend([font_attributes[a] + ' ' + attribute_values[a]])
 812
 813             if line_bot != -1:
 814                 if nonstandard and size_bot != '':
 815                     paragraph_below.extend(['\\size ' + size_bot + ' '])
 816                 paragraph_below.extend(['\\lyxline ',''])
 817                 if size_bot != '':
 818                     paragraph_below.extend(['\\size default '])
 819
 820             if vspace_bot != -1:
 821                 paragraph_below.extend(['\\begin_inset VSpace ' + vspace_bot_value,'\\end_inset','',''])
 822
 823             if pb_bot != -1:
 824                 paragraph_below.extend(['\\newpage ',''])
 825
 826             if nonstandard:
 827                 paragraph_below.extend(['\\end_layout '])
 828                 # insert new paragraph below the current paragraph
 829                 document.body[k+1:k+1] = paragraph_below
 830             else:
 831                 # insert new lines at the end of the current paragraph
 832                 document.body[k:k] = paragraph_below
 833
 834
 835 def convert_note(document):
 836     " Convert Notes. "
 837     i = 0
 838     while True:
 839         i = find_tokens(document.body, ["\\begin_inset Note",
 840                                 "\\begin_inset Comment",
 841                                 "\\begin_inset Greyedout"], i)
 842         if i == -1:
 843             break
 844
 845         document.body[i] = document.body[i][0:13] + 'Note ' + document.body[i][13:]
 846         i = i + 1
 847
 848
 849 def revert_note(document):
 850     " Revert Notes. "
 851     note_header = "\\begin_inset Note "
 852     i = 0
 853     while True:
 854         i = find_token(document.body, note_header, i)
 855         if i == -1:
 856             break
 857
 858         document.body[i] = "\\begin_inset " + document.body[i][len(note_header):]
 859         i = i + 1
 860
 861
 862 def convert_box(document):
 863     " Convert Boxes. "
 864     i = 0
 865     while True:
 866         i = find_tokens(document.body, ["\\begin_inset Boxed",
 867                                 "\\begin_inset Doublebox",
 868                                 "\\begin_inset Frameless",
 869                                 "\\begin_inset ovalbox",
 870                                 "\\begin_inset Ovalbox",
 871                                 "\\begin_inset Shadowbox"], i)
 872         if i == -1:
 873             break
 874
 875         document.body[i] = document.body[i][0:13] + 'Box ' + document.body[i][13:]
 876         i = i + 1
 877
 878
 879 def revert_box(document):
 880     " Revert Boxes."
 881     box_header = "\\begin_inset Box "
 882     i = 0
 883     while True:
 884         i = find_token(document.body, box_header, i)
 885         if i == -1:
 886             break
 887
 888         document.body[i] = "\\begin_inset " + document.body[i][len(box_header):]
 889         i = i + 1
 890
 891
 892 def convert_collapsible(document):
 893     " Convert collapsed insets. "
 894     i = 0
 895     while True:
 896         i = find_tokens_exact(document.body, ["\\begin_inset Box",
 897                                 "\\begin_inset Branch",
 898                                 "\\begin_inset CharStyle",
 899                                 "\\begin_inset Float",
 900                                 "\\begin_inset Foot",
 901                                 "\\begin_inset Marginal",
 902                                 "\\begin_inset Note",
 903                                 "\\begin_inset OptArg",
 904                                 "\\begin_inset Wrap"], i)
 905         if i == -1:
 906             break
 907
 908         # Seach for a line starting 'collapsed'
 909         # If, however, we find a line starting '\begin_layout'
 910         # (_always_ present) then break with a warning message
 911         i = i + 1
 912         while True:
 913             if (document.body[i] == "collapsed false"):
 914                 document.body[i] = "status open"
 915                 break
 916             elif (document.body[i] == "collapsed true"):
 917                 document.body[i] = "status collapsed"
 918                 break
 919             elif (document.body[i][:13] == "\\begin_layout"):
 920                 document.warning("Malformed LyX document: Missing 'collapsed'.")
 921                 break
 922             i = i + 1
 923
 924         i = i + 1
 925
 926
 927 def revert_collapsible(document):
 928     " Revert collapsed insets. "
 929     i = 0
 930     while True:
 931         i = find_tokens_exact(document.body, ["\\begin_inset Box",
 932                                 "\\begin_inset Branch",
 933                                 "\\begin_inset CharStyle",
 934                                 "\\begin_inset Float",
 935                                 "\\begin_inset Foot",
 936                                 "\\begin_inset Marginal",
 937                                 "\\begin_inset Note",
 938                                 "\\begin_inset OptArg",
 939                                 "\\begin_inset Wrap"], i)
 940         if i == -1:
 941             break
 942
 943         # Seach for a line starting 'status'
 944         # If, however, we find a line starting '\begin_layout'
 945         # (_always_ present) then break with a warning message
 946         i = i + 1
 947         while True:
 948             if (document.body[i] == "status open"):
 949                 document.body[i] = "collapsed false"
 950                 break
 951             elif (document.body[i] == "status collapsed" or
 952                   document.body[i] == "status inlined"):
 953                 document.body[i] = "collapsed true"
 954                 break
 955             elif (document.body[i][:13] == "\\begin_layout"):
 956                 document.warning("Malformed LyX document: Missing 'status'.")
 957                 break
 958             i = i + 1
 959
 960         i = i + 1
 961
 962
 963 def convert_ert(document):
 964     " Convert ERT. "
 965     i = 0
 966     while True:
 967         i = find_token(document.body, "\\begin_inset ERT", i)
 968         if i == -1:
 969             break
 970
 971         # Seach for a line starting 'status'
 972         # If, however, we find a line starting '\begin_layout'
 973         # (_always_ present) then break with a warning message
 974         i = i + 1
 975         while True:
 976             if (document.body[i] == "status Open"):
 977                 document.body[i] = "status open"
 978                 break
 979             elif (document.body[i] == "status Collapsed"):
 980                 document.body[i] = "status collapsed"
 981                 break
 982             elif (document.body[i] == "status Inlined"):
 983                 document.body[i] = "status inlined"
 984                 break
 985             elif (document.body[i][:13] == "\\begin_layout"):
 986                 document.warning("Malformed LyX document: Missing 'status'.")
 987                 break
 988             i = i + 1
 989
 990         i = i + 1
 991
 992
 993 def revert_ert(document):
 994     " Revert ERT. "
 995     i = 0
 996     while True:
 997         i = find_token(document.body, "\\begin_inset ERT", i)
 998         if i == -1:
 999             break
1000
1001         # Seach for a line starting 'status'
1002         # If, however, we find a line starting '\begin_layout'
1003         # (_always_ present) then break with a warning message
1004         i = i + 1
1005         while True:
1006             if (document.body[i] == "status open"):
1007                 document.body[i] = "status Open"
1008                 break
1009             elif (document.body[i] == "status collapsed"):
1010                 document.body[i] = "status Collapsed"
1011                 break
1012             elif (document.body[i] == "status inlined"):
1013                 document.body[i] = "status Inlined"
1014                 break
1015             elif (document.body[i][:13] == "\\begin_layout"):
1016                 document.warning("Malformed LyX document : Missing 'status'.")
1017                 break
1018             i = i + 1
1019
1020         i = i + 1
1021
1022
1023 def convert_minipage(document):
1024     """ Convert minipages to the box inset.
1025     We try to use the same order of arguments as lyx does.
1026     """
1027     pos = ["t","c","b"]
1028     inner_pos = ["c","t","b","s"]
1029
1030     i = 0
1031     while True:
1032         i = find_token(document.body, "\\begin_inset Minipage", i)
1033         if i == -1:
1034             return
1035
1036         document.body[i] = "\\begin_inset Box Frameless"
1037         i = i + 1
1038
1039         # convert old to new position using the pos list
1040         if document.body[i][:8] == "position":
1041             document.body[i] = 'position "%s"' % pos[int(document.body[i][9])]
1042         else:
1043             document.body.insert(i, 'position "%s"' % pos[0])
1044         i = i + 1
1045
1046         document.body.insert(i, 'hor_pos "c"')
1047         i = i + 1
1048         document.body.insert(i, 'has_inner_box 1')
1049         i = i + 1
1050
1051         # convert the inner_position
1052         if document.body[i][:14] == "inner_position":
1053             innerpos = inner_pos[int(document.body[i][15])]
1054             del document.body[i]
1055         else:
1056             innerpos = inner_pos[0]
1057
1058         # We need this since the new file format has a height and width
1059         # in a different order.
1060         if document.body[i][:6] == "height":
1061             height = document.body[i][6:]
1062             # test for default value of 221 and convert it accordingly
1063             if height == ' "0pt"' or height == ' "0"':
1064                 height = ' "1pt"'
1065             del document.body[i]
1066         else:
1067             height = ' "1pt"'
1068
1069         if document.body[i][:5] == "width":
1070             width = document.body[i][5:]
1071             del document.body[i]
1072         else:
1073             width = ' "0"'
1074
1075         if document.body[i][:9] == "collapsed":
1076             if document.body[i][9:] == "true":
1077                 status = "collapsed"
1078             else:
1079                 status = "open"
1080             del document.body[i]
1081         else:
1082             status = "collapsed"
1083
1084         # Handle special default case:
1085         if height == ' "1pt"' and innerpos == 'c':
1086             innerpos = 't'
1087
1088         document.body.insert(i, 'inner_pos "' + innerpos + '"')
1089         i = i + 1
1090         document.body.insert(i, 'use_parbox 0')
1091         i = i + 1
1092         document.body.insert(i, 'width' + width)
1093         i = i + 1
1094         document.body.insert(i, 'special "none"')
1095         i = i + 1
1096         document.body.insert(i, 'height' + height)
1097         i = i + 1
1098         document.body.insert(i, 'height_special "totalheight"')
1099         i = i + 1
1100         document.body.insert(i, 'status ' + status)
1101         i = i + 1
1102
1103
1104 def convert_ertbackslash(body, i, ert, format, default_layout):
1105     r""" -------------------------------------------------------------------------------------------
1106     Convert backslashes and '\n' into valid ERT code, append the converted
1107     text to body[i] and return the (maybe incremented) line index i"""
1108
1109     for c in ert:
1110         if c == '\\':
1111             body[i] = body[i] + '\\backslash '
1112             i = i + 1
1113             body.insert(i, '')
1114         elif c == '\n':
1115             if format <= 240:
1116                 body[i+1:i+1] = ['\\newline ', '']
1117                 i = i + 2
1118             else:
1119                 body[i+1:i+1] = ['\\end_layout', '', '\\begin_layout %s' % default_layout, '']
1120                 i = i + 4
1121         else:
1122             body[i] = body[i] + c
1123     return i
1124
1125
1126 def ert2latex(lines, format):
1127     r""" Converts lines in ERT code to LaTeX
1128     The surrounding \begin_layout ... \end_layout pair must not be included"""
1129
1130     backslash = re.compile(r'\\backslash\s*$')
1131     newline = re.compile(r'\\newline\s*$')
1132     if format <= 224:
1133         begin_layout = re.compile(r'\\layout\s*\S+$')
1134     else:
1135         begin_layout = re.compile(r'\\begin_layout\s*\S+$')
1136     end_layout = re.compile(r'\\end_layout\s*$')
1137     ert = ''
1138     for i in range(len(lines)):
1139         line = backslash.sub('\\\\', lines[i])
1140         if format <= 240:
1141             if begin_layout.match(line):
1142                 line = '\n\n'
1143             else:
1144                 line = newline.sub('\n', line)
1145         else:
1146             if begin_layout.match(line):
1147                 line = '\n'
1148         if format > 224 and end_layout.match(line):
1149             line = ''
1150         ert = ert + line
1151     return ert
1152
1153
1154 def get_par_params(lines, i):
1155     """ get all paragraph parameters. They can be all on one line or on several lines.
1156     lines[i] must be the first parameter line"""
1157     par_params = ('added_space_bottom', 'added_space_top', 'align',
1158                  'labelwidthstring', 'line_bottom', 'line_top', 'noindent',
1159                  'pagebreak_bottom', 'pagebreak_top', 'paragraph_spacing',
1160                  'start_of_appendix')
1161     # We cannot check for '\\' only because paragraphs may start e.g.
1162     # with '\\backslash'
1163     params = ''
1164     while lines[i][:1] == '\\' and lines[i][1:].split()[0] in par_params:
1165         params = params + ' ' + lines[i].strip()
1166         i = i + 1
1167     return params.strip()
1168
1169
1170 def lyxsize2latexsize(lyxsize):
1171     " Convert LyX font size to LaTeX fontsize. "
1172     sizes = {"tiny" : "tiny", "scriptsize" : "scriptsize",
1173              "footnotesize" : "footnotesize", "small" : "small",
1174              "normal" : "normalsize", "large" : "large", "larger" : "Large",
1175              "largest" : "LARGE", "huge" : "huge", "giant" : "Huge"}
1176     if lyxsize in sizes:
1177         return '\\' + sizes[lyxsize]
1178     return ''
1179
1180
1181 def revert_breaks(document):
1182     """ Change vspace insets, page breaks and lyxlines to paragraph options
1183     (if possible) or ERT"""
1184
1185     # Get default spaceamount
1186     i = find_token(document.header, '\\defskip', 0)
1187     if i == -1:
1188         defskipamount = 'medskip'
1189     else:
1190         defskipamount = document.header[i].split()[1]
1191
1192     keys = {"\\begin_inset" : "vspace", "\\lyxline" : "lyxline",
1193             "\\newpage" : "newpage"}
1194     keywords_top = {"vspace" : "\\added_space_top", "lyxline" : "\\line_top",
1195                     "newpage" : "\\pagebreak_top"}
1196     keywords_bot = {"vspace" : "\\added_space_bottom", "lyxline" : "\\line_bottom",
1197                     "newpage" : "\\pagebreak_bottom"}
1198     tokens = ["\\begin_inset VSpace", "\\lyxline", "\\newpage"]
1199
1200     # Convert the insets
1201     i = 0
1202     while True:
1203         i = find_tokens(document.body, tokens, i)
1204         if i == -1:
1205             return
1206
1207         # Are we at the beginning of a paragraph?
1208         paragraph_start = 1
1209         this_par = get_paragraph(document.body, i, document.format - 1)
1210         start = this_par + 1
1211         params = get_par_params(document.body, start)
1212         size = "normal"
1213         # Paragraph parameters may be on one or more lines.
1214         # Find the start of the real paragraph text.
1215         while document.body[start][:1] == '\\' and document.body[start].split()[0] in params:
1216             start = start + 1
1217         for k in range(start, i):
1218             if document.body[k].find("\\size") != -1:
1219                 # store font size
1220                 size = document.body[k].split()[1]
1221             elif is_nonempty_line(document.body[k]):
1222                 paragraph_start = 0
1223                 break
1224         # Find the end of the real paragraph text.
1225         next_par = get_next_paragraph(document.body, i, document.format - 1)
1226         if next_par == -1:
1227             document.warning("Malformed LyX document: Missing next paragraph.")
1228             i = i + 1
1229             continue
1230
1231         # first line of our insets
1232         inset_start = i
1233         # last line of our insets
1234         inset_end = inset_start
1235         # Are we at the end of a paragraph?
1236         paragraph_end = 1
1237         # start and end line numbers to delete if we convert this inset
1238         del_lines = list()
1239         # is this inset a lyxline above a paragraph?
1240         top = list()
1241         # raw inset information
1242         lines = list()
1243         # name of this inset
1244         insets = list()
1245         # font size of this inset
1246         sizes = list()
1247
1248         # Detect subsequent lyxline, vspace and pagebreak insets created by convert_breaks()
1249         n = 0
1250         k = inset_start
1251         while k < next_par:
1252             if find_tokens(document.body, tokens, k) == k:
1253                 # inset to convert
1254                 lines.append(document.body[k].split())
1255                 insets.append(keys[lines[n][0]])
1256                 del_lines.append([k, k])
1257                 top.append(0)
1258                 sizes.append(size)
1259                 n = n + 1
1260                 inset_end = k
1261             elif document.body[k].find("\\size") != -1:
1262                 # store font size
1263                 size = document.body[k].split()[1]
1264             elif find_token(document.body, "\\begin_inset ERT", k) == k:
1265                 ert_begin = find_token(document.body, "\\layout", k) + 1
1266                 if ert_begin == 0:
1267                     document.warning("Malformed LyX document: Missing '\\layout'.")
1268                     continue
1269                 ert_end = find_end_of_inset(document.body, k)
1270                 if ert_end == -1:
1271                     document.warning("Malformed LyX document: Missing '\\end_inset'.")
1272                     continue
1273                 ert = ert2latex(document.body[ert_begin:ert_end], document.format - 1)
1274                 if (n > 0 and insets[n - 1] == "lyxline" and
1275                     ert == '\\vspace{-1\\parskip}\n'):
1276                     # vspace ERT created by convert_breaks() for top lyxline
1277                     top[n - 1] = 1
1278                     del_lines[n - 1][1] = ert_end
1279                     inset_end = ert_end
1280                     k = ert_end
1281                 else:
1282                     paragraph_end = 0
1283                     break
1284             elif (n > 0 and insets[n - 1] == "vspace" and
1285                   find_token(document.body, "\\end_inset", k) == k):
1286                 # ignore end of vspace inset
1287                 del_lines[n - 1][1] = k
1288                 inset_end = k
1289             elif is_nonempty_line(document.body[k]):
1290                 paragraph_end = 0
1291                 break
1292             k = k + 1
1293
1294         # Determine space amount for vspace insets
1295         spaceamount = list()
1296         arguments = list()
1297         for k in range(n):
1298             if insets[k] == "vspace":
1299                 spaceamount.append(lines[k][2])
1300                 arguments.append(' ' + spaceamount[k] + ' ')
1301             else:
1302                 spaceamount.append('')
1303                 arguments.append(' ')
1304
1305         # Can we convert to top paragraph parameters?
1306         before = 0
1307         if ((n == 3 and insets[0] == "newpage" and insets[1] == "vspace" and
1308              insets[2] == "lyxline" and top[2]) or
1309             (n == 2 and
1310              ((insets[0] == "newpage" and insets[1] == "vspace") or
1311               (insets[0] == "newpage" and insets[1] == "lyxline" and top[1]) or
1312               (insets[0] == "vspace"  and insets[1] == "lyxline" and top[1]))) or
1313             (n == 1 and insets[0] == "lyxline" and top[0])):
1314             # These insets have been created before a paragraph by
1315             # convert_breaks()
1316             before = 1
1317
1318         # Can we convert to bottom paragraph parameters?
1319         after = 0
1320         if ((n == 3 and insets[0] == "lyxline" and not top[0] and
1321              insets[1] == "vspace" and insets[2] == "newpage") or
1322             (n == 2 and
1323              ((insets[0] == "lyxline" and not top[0] and insets[1] == "vspace") or
1324               (insets[0] == "lyxline" and not top[0] and insets[1] == "newpage") or
1325               (insets[0] == "vspace"  and insets[1] == "newpage"))) or
1326             (n == 1 and insets[0] == "lyxline" and not top[0])):
1327             # These insets have been created after a paragraph by
1328             # convert_breaks()
1329             after = 1
1330
1331         if paragraph_start and paragraph_end:
1332             # We are in a paragraph of our own.
1333             # We must not delete this paragraph if it has parameters
1334             if params == '':
1335                 # First try to merge with the previous paragraph.
1336                 # We try the previous paragraph first because we would
1337                 # otherwise need ERT for two subsequent vspaces.
1338                 prev_par = get_paragraph(document.body, this_par - 1, document.format - 1) + 1
1339                 if prev_par > 0 and not before:
1340                     prev_params = get_par_params(document.body, prev_par + 1)
1341                     ert = 0
1342                     # determine font size
1343                     prev_size = "normal"
1344                     k = prev_par + 1
1345                     while document.body[k][:1] == '\\' and document.body[k].split()[0] in prev_params:
1346                         k = k + 1
1347                     while k < this_par:
1348                         if document.body[k].find("\\size") != -1:
1349                             prev_size = document.body[k].split()[1]
1350                             break
1351                         elif document.body[k].find("\\begin_inset") != -1:
1352                             # skip insets
1353                             k = find_end_of_inset(document.body, k)
1354                         elif is_nonempty_line(document.body[k]):
1355                             break
1356                         k = k + 1
1357                     for k in range(n):
1358                         if (keywords_bot[insets[k]] in prev_params or
1359                             (insets[k] == "lyxline" and sizes[k] != prev_size)):
1360                             ert = 1
1361                             break
1362                     if not ert:
1363                         for k in range(n):
1364                             document.body.insert(prev_par + 1,
1365                                              keywords_bot[insets[k]] + arguments[k])
1366                         del document.body[this_par+n:next_par-1+n]
1367                         i = this_par + n
1368                         continue
1369                 # Then try next paragraph
1370                 if next_par > 0 and not after:
1371                     next_params = get_par_params(document.body, next_par + 1)
1372                     ert = 0
1373                     while document.body[k][:1] == '\\' and document.body[k].split()[0] in next_params:
1374                         k = k + 1
1375                     # determine font size
1376                     next_size = "normal"
1377                     k = next_par + 1
1378                     while k < this_par:
1379                         if document.body[k].find("\\size") != -1:
1380                             next_size = document.body[k].split()[1]
1381                             break
1382                         elif is_nonempty_line(document.body[k]):
1383                             break
1384                         k = k + 1
1385                     for k in range(n):
1386                         if (keywords_top[insets[k]] in next_params or
1387                             (insets[k] == "lyxline" and sizes[k] != next_size)):
1388                             ert = 1
1389                             break
1390                     if not ert:
1391                         for k in range(n):
1392                             document.body.insert(next_par + 1,
1393                                              keywords_top[insets[k]] + arguments[k])
1394                         del document.body[this_par:next_par-1]
1395                         i = this_par
1396                         continue
1397         elif paragraph_start or paragraph_end:
1398             # Convert to paragraph formatting if we are at the beginning or end
1399             # of a paragraph and the resulting paragraph would not be empty
1400             # The order is important: del and insert invalidate some indices
1401             if paragraph_start:
1402                 keywords = keywords_top
1403             else:
1404                 keywords = keywords_bot
1405             ert = 0
1406             for k in range(n):
1407                 if keywords[insets[k]] in params:
1408                     ert = 1
1409                     break
1410             if not ert:
1411                 for k in range(n):
1412                     document.body.insert(this_par + 1,
1413                                      keywords[insets[k]] + arguments[k])
1414                     for j in range(k, n):
1415                         del_lines[j][0] = del_lines[j][0] + 1
1416                         del_lines[j][1] = del_lines[j][1] + 1
1417                     del document.body[del_lines[k][0]:del_lines[k][1]+1]
1418                     deleted = del_lines[k][1] - del_lines[k][0] + 1
1419                     for j in range(k + 1, n):
1420                         del_lines[j][0] = del_lines[j][0] - deleted
1421                         del_lines[j][1] = del_lines[j][1] - deleted
1422                 i = this_par
1423                 continue
1424
1425         # Convert the first inset to ERT.
1426         # The others are converted in the next loop runs (if they exist)
1427         if insets[0] == "vspace":
1428             document.body[i:i+1] = ['\\begin_inset ERT', 'status Collapsed', '',
1429                                 '\\layout %s' % document.default_layout, '', '\\backslash ']
1430             i = i + 6
1431             if spaceamount[0][-1] == '*':
1432                 spaceamount[0] = spaceamount[0][:-1]
1433                 keep = 1
1434             else:
1435                 keep = 0
1436
1437             # Replace defskip by the actual value
1438             if spaceamount[0] == 'defskip':
1439                 spaceamount[0] = defskipamount
1440
1441             # LaTeX does not know \\smallskip* etc
1442             if keep:
1443                 if spaceamount[0] == 'smallskip':
1444                     spaceamount[0] = '\\smallskipamount'
1445                 elif spaceamount[0] == 'medskip':
1446                     spaceamount[0] = '\\medskipamount'
1447                 elif spaceamount[0] == 'bigskip':
1448                     spaceamount[0] = '\\bigskipamount'
1449                 elif spaceamount[0] == 'vfill':
1450                     spaceamount[0] = '\\fill'
1451
1452             # Finally output the LaTeX code
1453             if (spaceamount[0] == 'smallskip' or spaceamount[0] == 'medskip' or
1454                 spaceamount[0] == 'bigskip'   or spaceamount[0] == 'vfill'):
1455                 document.body.insert(i, spaceamount[0] + '{}')
1456             else :
1457                 if keep:
1458                     document.body.insert(i, 'vspace*{')
1459                 else:
1460                     document.body.insert(i, 'vspace{')
1461                 i = convert_ertbackslash(document.body, i, spaceamount[0], document.format - 1, document.default_layout)
1462                 document.body[i] = document.body[i] + '}'
1463             i = i + 1
1464         elif insets[0] == "lyxline":
1465             document.body[i] = ''
1466             latexsize = lyxsize2latexsize(size)
1467             if latexsize == '':
1468                 document.warning("Could not convert LyX fontsize '%s' to LaTeX font size." % size)
1469                 latexsize = '\\normalsize'
1470             i = insert_ert(document.body, i, 'Collapsed',
1471                            '\\lyxline{%s}' % latexsize,
1472                            document.format - 1, document.default_layout)
1473             # We use \providecommand so that we don't get an error if native
1474             # lyxlines are used (LyX writes first its own preamble and then
1475             # the user specified one)
1476             add_to_preamble(document,
1477                             ['% Commands inserted by lyx2lyx for lyxlines',
1478                              '\\providecommand{\\lyxline}[1]{',
1479                              '  {#1 \\vspace{1ex} \\hrule width \\columnwidth \\vspace{1ex}}'
1480                              '}'])
1481         elif insets[0] == "newpage":
1482             document.body[i] = ''
1483             i = insert_ert(document.body, i, 'Collapsed', '\\newpage{}',
1484                            document.format - 1, document.default_layout)
1485
1486
1487 # Convert a LyX length into a LaTeX length
1488 def convert_len(len, special):
1489     units = {"text%":"\\textwidth", "col%":"\\columnwidth",
1490              "page%":"\\pagewidth", "line%":"\\linewidth",
1491              "theight%":"\\textheight", "pheight%":"\\pageheight"}
1492
1493     # Convert special lengths
1494     if special != 'none':
1495         len = '%f\\' % len2value(len) + special
1496
1497     # Convert LyX units to LaTeX units
1498     for unit in list(units.keys()):
1499         if len.find(unit) != -1:
1500             len = '%f' % (len2value(len) / 100) + units[unit]
1501             break
1502
1503     return len
1504
1505
1506 def convert_ertlen(body, i, len, special, format, default_layout):
1507     """ Convert a LyX length into valid ERT code and append it to body[i]
1508     Return the (maybe incremented) line index i
1509     Convert backslashes and insert the converted length into body. """
1510     return convert_ertbackslash(body, i, convert_len(len, special), format, default_layout)
1511
1512
1513 def len2value(len):
1514     " Return the value of len without the unit in numerical form. "
1515     result = re.search('([+-]?[0-9.]+)', len)
1516     if result:
1517         return float(result.group(1))
1518     # No number means 1.0
1519     return 1.0
1520
1521
1522 def insert_ert(body, i, status, text, format, default_layout):
1523     """ Convert text to ERT and insert it at body[i]
1524     Return the index of the line after the inserted ERT"""
1525
1526     body[i:i] = ['\\begin_inset ERT', 'status ' + status, '']
1527     i = i + 3
1528     if format <= 224:
1529         body[i:i] = ['\\layout %s' % default_layout, '']
1530     else:
1531         body[i:i] = ['\\begin_layout %s' % default_layout, '']
1532     i = i + 1       # i points now to the just created empty line
1533     i = convert_ertbackslash(body, i, text, format, default_layout) + 1
1534     if format > 224:
1535         body[i:i] = ['\\end_layout']
1536         i = i + 1
1537     body[i:i] = ['', '\\end_inset', '']
1538     i = i + 3
1539     return i
1540
1541
1542 def add_to_preamble(document, text):
1543     """ Add text to the preamble if it is not already there.
1544     Only the first line is checked!"""
1545
1546     if find_token(document.preamble, text[0], 0) != -1:
1547         return
1548
1549     document.preamble.extend(text)
1550
1551
1552 def convert_frameless_box(document):
1553     " Convert frameless box."
1554     pos = ['t', 'c', 'b']
1555     inner_pos = ['c', 't', 'b', 's']
1556     i = 0
1557     while True:
1558         i = find_token(document.body, '\\begin_inset Frameless', i)
1559         if i == -1:
1560             return
1561         j = find_end_of_inset(document.body, i)
1562         if j == -1:
1563             document.warning("Malformed LyX document: Missing '\\end_inset'.")
1564             i = i + 1
1565             continue
1566         del document.body[i]
1567         j = j - 1
1568
1569         # Gather parameters
1570         params = {'position':0, 'hor_pos':'c', 'has_inner_box':'1',
1571                   'inner_pos':1, 'use_parbox':'0', 'width':'100col%',
1572                   'special':'none', 'height':'1in',
1573                   'height_special':'totalheight', 'collapsed':'false'}
1574         for key in list(params.keys()):
1575             value = get_value(document.body, key, i, j).replace('"', '')
1576             if value != "":
1577                 if key == 'position':
1578                     # convert new to old position: 'position "t"' -> 0
1579                     value = find_token(pos, value, 0)
1580                     if value != -1:
1581                         params[key] = value
1582                 elif key == 'inner_pos':
1583                     # convert inner position
1584                     value = find_token(inner_pos, value, 0)
1585                     if value != -1:
1586                         params[key] = value
1587                 else:
1588                     params[key] = value
1589                 j = del_token(document.body, key, i, j)
1590         i = i + 1
1591
1592         # Convert to minipage or ERT?
1593         # Note that the inner_position and height parameters of a minipage
1594         # inset are ignored and not accessible for the user, although they
1595         # are present in the file format and correctly read in and written.
1596         # Therefore we convert to ERT if they do not have their LaTeX
1597         # defaults. These are:
1598         # - the value of "position" for "inner_pos"
1599         # - "\totalheight"          for "height"
1600         if (params['use_parbox'] != '0' or
1601             params['has_inner_box'] != '1' or
1602             params['special'] != 'none' or
1603             params['height_special'] != 'totalheight' or
1604             len2value(params['height']) != 1.0):
1605
1606             # Here we know that this box is not supported in file format 224.
1607             # Therefore we need to convert it to ERT. We can't simply convert
1608             # the beginning and end of the box to ERT, because the
1609             # box inset may contain layouts that are different from the
1610             # surrounding layout. After the conversion the contents of the
1611             # box inset is on the same level as the surrounding text, and
1612             # paragraph layouts and align parameters can get mixed up.
1613
1614             # A possible solution for this problem:
1615             # Convert the box to a minipage and redefine the minipage
1616             # environment in ERT so that the original box is simulated.
1617             # For minipages we could do this in a way that the width and
1618             # position can still be set from LyX, but this did not work well.
1619             # This is not possible for parboxes either, so we convert the
1620             # original box to ERT, put the minipage inset inside the box
1621             # and redefine the minipage environment to be empty.
1622
1623             # Commands that are independant of a particular box can go to
1624             # the preamble.
1625             # We need to define lyxtolyxrealminipage with 3 optional
1626             # arguments although LyX 1.3 uses only the first one.
1627             # Otherwise we will get LaTeX errors if this document is
1628             # converted to format 225 or above again (LyX 1.4 uses all
1629             # optional arguments).
1630             add_to_preamble(document,
1631                 ['% Commands inserted by lyx2lyx for frameless boxes',
1632                  '% Save the original minipage environment',
1633                  '\\let\\lyxtolyxrealminipage\\minipage',
1634                  '\\let\\endlyxtolyxrealminipage\\endminipage',
1635                  '% Define an empty lyxtolyximinipage environment',
1636                  '% with 3 optional arguments',
1637                  '\\newenvironment{lyxtolyxiiiminipage}[4]{}{}',
1638                  '\\newenvironment{lyxtolyxiiminipage}[2][\\lyxtolyxargi]%',
1639                  '  {\\begin{lyxtolyxiiiminipage}{\\lyxtolyxargi}{\\lyxtolyxargii}{#1}{#2}}%',
1640                  '  {\\end{lyxtolyxiiiminipage}}',
1641                  '\\newenvironment{lyxtolyximinipage}[1][\\totalheight]%',
1642                  '  {\\def\\lyxtolyxargii{{#1}}\\begin{lyxtolyxiiminipage}}%',
1643                  '  {\\end{lyxtolyxiiminipage}}',
1644                  '\\newenvironment{lyxtolyxminipage}[1][c]%',
1645                  '  {\\def\\lyxtolyxargi{{#1}}\\begin{lyxtolyximinipage}}',
1646                  '  {\\end{lyxtolyximinipage}}'])
1647
1648             if params['use_parbox'] != '0':
1649                 ert = '\\parbox'
1650             else:
1651                 ert = '\\begin{lyxtolyxrealminipage}'
1652
1653             # convert optional arguments only if not latex default
1654             if (pos[params['position']] != 'c' or
1655                 inner_pos[params['inner_pos']] != pos[params['position']] or
1656                 params['height_special'] != 'totalheight' or
1657                 len2value(params['height']) != 1.0):
1658                 ert = ert + '[' + pos[params['position']] + ']'
1659             if (inner_pos[params['inner_pos']] != pos[params['position']] or
1660                 params['height_special'] != 'totalheight' or
1661                 len2value(params['height']) != 1.0):
1662                 ert = ert + '[' + convert_len(params['height'],
1663                                               params['height_special']) + ']'
1664             if inner_pos[params['inner_pos']] != pos[params['position']]:
1665                 ert = ert + '[' + inner_pos[params['inner_pos']] + ']'
1666
1667             ert = ert + '{' + convert_len(params['width'],
1668                                           params['special']) + '}'
1669
1670             if params['use_parbox'] != '0':
1671                 ert = ert + '{'
1672             ert = ert + '\\let\\minipage\\lyxtolyxminipage%\n'
1673             ert = ert + '\\let\\endminipage\\endlyxtolyxminipage%\n'
1674
1675             old_i = i
1676             i = insert_ert(document.body, i, 'Collapsed', ert, document.format - 1, document.default_layout)
1677             j = j + i - old_i - 1
1678
1679             document.body[i:i] = ['\\begin_inset Minipage',
1680                               'position %d' % params['position'],
1681                               'inner_position 1',
1682                               'height "1in"',
1683                               'width "' + params['width'] + '"',
1684                               'collapsed ' + params['collapsed']]
1685             i = i + 6
1686             j = j + 6
1687
1688             # Restore the original minipage environment since we may have
1689             # minipages inside this box.
1690             # Start a new paragraph because the following may be nonstandard
1691             document.body[i:i] = ['\\layout %s' % document.default_layout, '', '']
1692             i = i + 2
1693             j = j + 3
1694             ert = '\\let\\minipage\\lyxtolyxrealminipage%\n'
1695             ert = ert + '\\let\\endminipage\\lyxtolyxrealendminipage%'
1696             old_i = i
1697             i = insert_ert(document.body, i, 'Collapsed', ert, document.format - 1, document.default_layout)
1698             j = j + i - old_i - 1
1699
1700             # Redefine the minipage end before the inset end.
1701             # Start a new paragraph because the previous may be nonstandard
1702             document.body[j:j] = ['\\layout %s' % document.default_layout, '', '']
1703             j = j + 2
1704             ert = '\\let\\endminipage\\endlyxtolyxminipage'
1705             j = insert_ert(document.body, j, 'Collapsed', ert, document.format - 1, document.default_layout)
1706             j = j + 1
1707             document.body.insert(j, '')
1708             j = j + 1
1709
1710             # LyX writes '%\n' after each box. Therefore we need to end our
1711             # ERT with '%\n', too, since this may swallow a following space.
1712             if params['use_parbox'] != '0':
1713                 ert = '}%\n'
1714             else:
1715                 ert = '\\end{lyxtolyxrealminipage}%\n'
1716             j = insert_ert(document.body, j, 'Collapsed', ert, document.format - 1, document.default_layout)
1717
1718             # We don't need to restore the original minipage after the inset
1719             # end because the scope of the redefinition is the original box.
1720
1721         else:
1722
1723             # Convert to minipage
1724             document.body[i:i] = ['\\begin_inset Minipage',
1725                               'position %d' % params['position'],
1726                               'inner_position %d' % params['inner_pos'],
1727                               'height "' + params['height'] + '"',
1728                               'width "' + params['width'] + '"',
1729                               'collapsed ' + params['collapsed']]
1730             i = i + 6
1731
1732
1733 def remove_branches(document):
1734     " Remove branches. "
1735     i = 0
1736     while True:
1737         i = find_token(document.header, "\\branch", i)
1738         if i == -1:
1739             break
1740         document.warning("Removing branch %s." % document.header[i].split()[1])
1741         j = find_token(document.header, "\\end_branch", i)
1742         if j == -1:
1743             document.warning("Malformed LyX document: Missing '\\end_branch'.")
1744             break
1745         del document.header[i:j+1]
1746
1747     i = 0
1748     while True:
1749         i = find_token(document.body, "\\begin_inset Branch", i)
1750         if i == -1:
1751             return
1752         j = find_end_of_inset(document.body, i)
1753         if j == -1:
1754             document.warning("Malformed LyX document: Missing '\\end_inset'.")
1755             i = i + 1
1756             continue
1757         del document.body[i]
1758         del document.body[j - 1]
1759         # Seach for a line starting 'collapsed'
1760         # If, however, we find a line starting '\layout'
1761         # (_always_ present) then break with a warning message
1762         collapsed_found = 0
1763         while True:
1764             if (document.body[i][:9] == "collapsed"):
1765                 del document.body[i]
1766                 collapsed_found = 1
1767                 continue
1768             elif (document.body[i][:7] == "\\layout"):
1769                 if collapsed_found == 0:
1770                     document.warning("Malformed LyX document: Missing 'collapsed'.")
1771                 # Delete this new paragraph, since it would not appear in
1772                 # .tex output. This avoids also empty paragraphs.
1773                 del document.body[i]
1774                 break
1775             i = i + 1
1776
1777
1778 def convert_jurabib(document):
1779     " Convert jurabib. "
1780     i = find_token(document.header, '\\use_numerical_citations', 0)
1781     if i == -1:
1782         document.warning("Malformed lyx document: Missing '\\use_numerical_citations'.")
1783         return
1784     document.header.insert(i + 1, '\\use_jurabib 0')
1785
1786
1787 def revert_jurabib(document):
1788     " Revert jurabib. "
1789     i = find_token(document.header, '\\use_jurabib', 0)
1790     if i == -1:
1791         document.warning("Malformed lyx document: Missing '\\use_jurabib'.")
1792         return
1793     if get_value(document.header, '\\use_jurabib', 0) != "0":
1794         document.warning("Conversion of '\\use_jurabib = 1' not yet implemented.")
1795         # Don't remove '\\use_jurabib' so that people will get warnings by lyx
1796         return
1797     del document.header[i]
1798
1799
1800 def convert_bibtopic(document):
1801     " Convert bibtopic. "
1802     i = find_token(document.header, '\\use_jurabib', 0)
1803     if i == -1:
1804         document.warning("Malformed lyx document: Missing '\\use_jurabib'.")
1805         return
1806     document.header.insert(i + 1, '\\use_bibtopic 0')
1807
1808
1809 def revert_bibtopic(document):
1810     " Revert bibtopic. "
1811     i = find_token(document.header, '\\use_bibtopic', 0)
1812     if i == -1:
1813         document.warning("Malformed lyx document: Missing '\\use_bibtopic'.")
1814         return
1815     if get_value(document.header, '\\use_bibtopic', 0) != "0":
1816         document.warning("Conversion of '\\use_bibtopic = 1' not yet implemented.")
1817         # Don't remove '\\use_jurabib' so that people will get warnings by lyx
1818     del document.header[i]
1819
1820
1821 def convert_float(document):
1822     " Convert sideway floats. "
1823     i = 0
1824     while True:
1825         i = find_token_exact(document.body, '\\begin_inset Float', i)
1826         if i == -1:
1827             return
1828         # Seach for a line starting 'wide'
1829         # If, however, we find a line starting '\begin_layout'
1830         # (_always_ present) then break with a warning message
1831         i = i + 1
1832         while True:
1833             if (document.body[i][:4] == "wide"):
1834                 document.body.insert(i + 1, 'sideways false')
1835                 break
1836             elif (document.body[i][:13] == "\\begin_layout"):
1837                 document.warning("Malformed lyx document: Missing 'wide'.")
1838                 break
1839             i = i + 1
1840         i = i + 1
1841
1842
1843 def revert_float(document):
1844     " Revert sideways floats. "
1845     i = 0
1846     while True:
1847         i = find_token_exact(document.body, '\\begin_inset Float', i)
1848         if i == -1:
1849             return
1850         line = document.body[i]
1851         r = re.compile(r'\\begin_inset Float (.*)$')
1852         m = r.match(line)
1853         floattype = m.group(1)
1854         if floattype != "figure" and floattype != "table":
1855             i = i + 1
1856             continue
1857         j = find_end_of_inset(document.body, i)
1858         if j == -1:
1859             document.warning("Malformed lyx document: Missing '\\end_inset'.")
1860             i = i + 1
1861             continue
1862         if get_value(document.body, 'sideways', i, j) != "false":
1863             l = find_token(document.body, "\\begin_layout Standard", i + 1, j)
1864             if l == -1:
1865                 document.warning("Malformed LyX document: Missing `\\begin_layout Standard' in Float inset.")
1866                 return
1867             document.body[j] = '\\layout Standard\n\\begin_inset ERT\nstatus Collapsed\n\n' \
1868             '\\layout Standard\n\n\n\\backslash\n' \
1869             'end{sideways' + floattype + '}\n\n\\end_inset\n'
1870             del document.body[i+1:l-1]
1871             document.body[i] = '\\begin_inset ERT\nstatus Collapsed\n\n' \
1872             '\\layout Standard\n\n\n\\backslash\n' \
1873             'begin{sideways' + floattype + '}\n\n\\end_inset\n\n'
1874             add_to_preamble(document,
1875                             ['\\usepackage{rotfloat}\n'])
1876             i = i + 1
1877             continue
1878         del_token(document.body, 'sideways', i, j)
1879         i = i + 1
1880
1881
1882 def convert_graphics(document):
1883     """ Add extension to documentnames of insetgraphics if necessary.
1884     """
1885     i = 0
1886     while True:
1887         i = find_token(document.body, "\\begin_inset Graphics", i)
1888         if i == -1:
1889             return
1890
1891         j = find_token_exact(document.body, "documentname", i)
1892         if j == -1:
1893             return
1894         i = i + 1
1895         filename = document.body[j].split()[1]
1896         if document.dir == u'' and not os.path.isabs(filename):
1897             # We don't know the directory and cannot check the document.
1898             # We could use a heuristic and take the current directory,
1899             # and we could try to find out if documentname has an extension,
1900             # but that would be just guesses and could be wrong.
1901             document.warning("""Warning: Cannot determine whether document
1902          %s
1903          needs an extension when reading from standard input.
1904          You may need to correct the document manually or run
1905          lyx2lyx again with the .lyx document as commandline argument.""" % filename)
1906             continue
1907         absname = os.path.normpath(os.path.join(document.dir, filename))
1908         # This needs to be the same algorithm as in pre 233 insetgraphics
1909         if access(absname, F_OK):
1910             continue
1911         if access(absname + ".ps", F_OK):
1912             document.body[j] = document.body[j].replace(filename, filename + ".ps")
1913             continue
1914         if access(absname + ".eps", F_OK):
1915             document.body[j] = document.body[j].replace(filename, filename + ".eps")
1916
1917
1918 def convert_names(document):
1919     """ Convert in the docbook backend from firstname and surname style
1920     to charstyles.
1921     """
1922     if document.backend != "docbook":
1923         return
1924
1925     i = 0
1926
1927     while True:
1928         i = find_token(document.body, "\\begin_layout Author", i)
1929         if i == -1:
1930             return
1931
1932         i = i + 1
1933         while document.body[i] == "":
1934             i = i + 1
1935
1936         if document.body[i][:11] != "\\end_layout" or document.body[i+2][:13] != "\\begin_deeper":
1937             i = i + 1
1938             continue
1939
1940         k = i
1941         i = find_end_of( document.body, i+3, "\\begin_deeper","\\end_deeper")
1942         if i == -1:
1943             # something is really wrong, abort
1944             document.warning("Missing \\end_deeper, after style Author.")
1945             document.warning("Aborted attempt to parse FirstName and Surname.")
1946             return
1947         firstname, surname = "", ""
1948
1949         name = document.body[k:i]
1950
1951         j = find_token(name, "\\begin_layout FirstName", 0)
1952         if j != -1:
1953             j = j + 1
1954             while(name[j] != "\\end_layout"):
1955                 firstname = firstname + name[j]
1956                 j = j + 1
1957
1958         j = find_token(name, "\\begin_layout Surname", 0)
1959         if j != -1:
1960             j = j + 1
1961             while(name[j] != "\\end_layout"):
1962                 surname = surname + name[j]
1963                 j = j + 1
1964
1965         # delete name
1966         del document.body[k+2:i+1]
1967
1968         document.body[k-1:k-1] = ["", "",
1969                           "\\begin_inset CharStyle Firstname",
1970                           "status inlined",
1971                           "",
1972                           '\\begin_layout %s' % document.default_layout,
1973                           "",
1974                           "%s" % firstname,
1975                           "\end_layout",
1976                           "",
1977                           "\end_inset",
1978                           "",
1979                           "",
1980                           "\\begin_inset CharStyle Surname",
1981                           "status inlined",
1982                           "",
1983                           '\\begin_layout %s' % document.default_layout,
1984                           "",
1985                           "%s" % surname,
1986                           "\\end_layout",
1987                           "",
1988                           "\\end_inset",
1989                           ""]
1990
1991
1992 def revert_names(document):
1993     """ Revert in the docbook backend from firstname and surname char style
1994     to styles.
1995     """
1996     if document.backend != "docbook":
1997         return
1998
1999
2000 def convert_cite_engine(document):
2001     r""" \use_natbib 1                       \cite_engine <style>
2002          \use_numerical_citations 0     ->   where <style> is one of
2003          \use_jurabib 0                      "basic", "natbib_authoryear","""
2004
2005     a = find_token(document.header, "\\use_natbib", 0)
2006     if a == -1:
2007         document.warning("Malformed lyx document: Missing '\\use_natbib'.")
2008         return
2009
2010     b = find_token(document.header, "\\use_numerical_citations", 0)
2011     if b == -1 or b != a+1:
2012         document.warning("Malformed lyx document: Missing '\\use_numerical_citations'.")
2013         return
2014
2015     c = find_token(document.header, "\\use_jurabib", 0)
2016     if c == -1 or c != b+1:
2017         document.warning("Malformed lyx document: Missing '\\use_jurabib'.")
2018         return
2019
2020     use_natbib = int(document.header[a].split()[1])
2021     use_numerical_citations = int(document.header[b].split()[1])
2022     use_jurabib = int(document.header[c].split()[1])
2023
2024     cite_engine = "basic"
2025     if use_natbib:
2026         if use_numerical_citations:
2027             cite_engine = "natbib_numerical"
2028         else:
2029              cite_engine = "natbib_authoryear"
2030     elif use_jurabib:
2031         cite_engine = "jurabib"
2032
2033     del document.header[a:c+1]
2034     document.header.insert(a, "\\cite_engine " + cite_engine)
2035
2036
2037 def revert_cite_engine(document):
2038     " Revert the cite engine. "
2039     i = find_token(document.header, "\\cite_engine", 0)
2040     if i == -1:
2041         document.warning("Malformed lyx document: Missing '\\cite_engine'.")
2042         return
2043
2044     cite_engine = document.header[i].split()[1]
2045
2046     use_natbib = '0'
2047     use_numerical = '0'
2048     use_jurabib = '0'
2049     if cite_engine == "natbib_numerical":
2050         use_natbib = '1'
2051         use_numerical = '1'
2052     elif cite_engine == "natbib_authoryear":
2053         use_natbib = '1'
2054     elif cite_engine == "jurabib":
2055         use_jurabib = '1'
2056
2057     del document.header[i]
2058     document.header.insert(i, "\\use_jurabib " + use_jurabib)
2059     document.header.insert(i, "\\use_numerical_citations " + use_numerical)
2060     document.header.insert(i, "\\use_natbib " + use_natbib)
2061
2062
2063 def convert_paperpackage(document):
2064     " Convert paper package. "
2065     i = find_token(document.header, "\\paperpackage", 0)
2066     if i == -1:
2067         return
2068
2069     packages = {'default':'none','a4':'none', 'a4wide':'a4', 'widemarginsa4':'a4wide'}
2070     if len(document.header[i].split()) > 1:
2071         paperpackage = document.header[i].split()[1]
2072         document.header[i] = document.header[i].replace(paperpackage, packages[paperpackage])
2073     else:
2074         document.header[i] = document.header[i] + ' widemarginsa4'
2075
2076
2077 def revert_paperpackage(document):
2078     " Revert paper package. "
2079     i = find_token(document.header, "\\paperpackage", 0)
2080     if i == -1:
2081         return
2082
2083     packages = {'none':'a4', 'a4':'a4wide', 'a4wide':'widemarginsa4',
2084                 'widemarginsa4':'', 'default': 'default'}
2085     if len(document.header[i].split()) > 1:
2086         paperpackage = document.header[i].split()[1]
2087     else:
2088         paperpackage = 'default'
2089     document.header[i] = document.header[i].replace(paperpackage, packages[paperpackage])
2090
2091
2092 def convert_bullets(document):
2093     " Convert bullets. "
2094     i = 0
2095     while True:
2096         i = find_token(document.header, "\\bullet", i)
2097         if i == -1:
2098             return
2099         if document.header[i][:12] == '\\bulletLaTeX':
2100             document.header[i] = document.header[i] + ' ' + document.header[i+1].strip()
2101             n = 3
2102         else:
2103             document.header[i] = document.header[i] + ' ' + document.header[i+1].strip() +\
2104                         ' ' + document.header[i+2].strip() + ' ' + document.header[i+3].strip()
2105             n = 5
2106         del document.header[i+1:i + n]
2107         i = i + 1
2108
2109
2110 def revert_bullets(document):
2111     " Revert bullets. "
2112     i = 0
2113     while True:
2114         i = find_token(document.header, "\\bullet", i)
2115         if i == -1:
2116             return
2117         if document.header[i][:12] == '\\bulletLaTeX':
2118             n = document.header[i].find('"')
2119             if n == -1:
2120                 document.warning("Malformed header.")
2121                 return
2122             else:
2123                 document.header[i:i+1] = [document.header[i][:n-1],'\t' + document.header[i][n:], '\\end_bullet']
2124             i = i + 3
2125         else:
2126             frag = document.header[i].split()
2127             if len(frag) != 5:
2128                 document.warning("Malformed header.")
2129                 return
2130             else:
2131                 document.header[i:i+1] = [frag[0] + ' ' + frag[1],
2132                                  '\t' + frag[2],
2133                                  '\t' + frag[3],
2134                                  '\t' + frag[4],
2135                                  '\\end_bullet']
2136                 i = i + 5
2137
2138
2139 def add_begin_header(document):
2140     r" Add \begin_header and \begin_document. "
2141     i = find_token(document.header, '\\lyxformat', 0)
2142     document.header.insert(i+1, '\\begin_header')
2143     document.header.insert(i+1, '\\begin_document')
2144
2145
2146 def remove_begin_header(document):
2147     r" Remove \begin_header and \begin_document. "
2148     i = find_token(document.header, "\\begin_document", 0)
2149     if i != -1:
2150         del document.header[i]
2151     i = find_token(document.header, "\\begin_header", 0)
2152     if i != -1:
2153         del document.header[i]
2154
2155
2156 def add_begin_body(document):
2157     r" Add and \begin_document and \end_document"
2158     document.body.insert(0, '\\begin_body')
2159     document.body.insert(1, '')
2160     i = find_token(document.body, "\\end_document", 0)
2161     document.body.insert(i, '\\end_body')
2162
2163 def remove_begin_body(document):
2164     r" Remove \begin_body and \end_body"
2165     i = find_token(document.body, "\\begin_body", 0)
2166     if i != -1:
2167         del document.body[i]
2168         if not document.body[i]:
2169             del document.body[i]
2170     i = find_token(document.body, "\\end_body", 0)
2171     if i != -1:
2172         del document.body[i]
2173
2174
2175 def normalize_papersize(document):
2176     r" Normalize \papersize"
2177     i = find_token(document.header, '\\papersize', 0)
2178     if i == -1:
2179         return
2180
2181     tmp = document.header[i].split()
2182     if tmp[1] == "Default":
2183         document.header[i] = '\\papersize default'
2184         return
2185     if tmp[1] == "Custom":
2186         document.header[i] = '\\papersize custom'
2187
2188
2189 def denormalize_papersize(document):
2190     r" Revert \papersize"
2191     i = find_token(document.header, '\\papersize', 0)
2192     if i == -1:
2193         return
2194
2195     tmp = document.header[i].split()
2196     if tmp[1] == "custom":
2197         document.header[i] = '\\papersize Custom'
2198
2199
2200 def strip_end_space(document):
2201     " Strip spaces at end of command line. "
2202     for i in range(len(document.body)):
2203         if document.body[i][:1] == '\\':
2204             document.body[i] = document.body[i].strip()
2205
2206
2207 def use_x_boolean(document):
2208     r" Use boolean values for \use_geometry, \use_bibtopic and \tracking_changes"
2209     bin2bool = {'0': 'false', '1': 'true'}
2210     for use in '\\use_geometry', '\\use_bibtopic', '\\tracking_changes':
2211         i = find_token(document.header, use, 0)
2212         if i == -1:
2213             continue
2214         decompose = document.header[i].split()
2215         document.header[i] = decompose[0] + ' ' + bin2bool[decompose[1]]
2216
2217
2218 def use_x_binary(document):
2219     r" Use digit values for \use_geometry, \use_bibtopic and \tracking_changes"
2220     bool2bin = {'false': '0', 'true': '1'}
2221     for use in '\\use_geometry', '\\use_bibtopic', '\\tracking_changes':
2222         i = find_token(document.header, use, 0)
2223         if i == -1:
2224             continue
2225         decompose = document.header[i].split()
2226         document.header[i] = decompose[0] + ' ' + bool2bin[decompose[1]]
2227
2228
2229 def normalize_paragraph_params(document):
2230     " Place all the paragraph parameters in their own line. "
2231     body = document.body
2232
2233     allowed_parameters = '\\paragraph_spacing', '\\noindent', \
2234                          '\\align', '\\labelwidthstring', "\\start_of_appendix", \
2235                          "\\leftindent"
2236
2237     i = 0
2238     while True:
2239         i = find_token(document.body, '\\begin_layout', i)
2240         if i == -1:
2241             return
2242
2243         i = i + 1
2244         while True:
2245             if body[i].strip() and body[i].split()[0] not in allowed_parameters:
2246                 break
2247
2248             j = body[i].find('\\', 1)
2249
2250             if j != -1:
2251                 body[i:i+1] = [body[i][:j].strip(), body[i][j:]]
2252
2253             i = i + 1
2254
2255
2256 def convert_output_changes (document):
2257     " Add output_changes parameter. "
2258     i = find_token(document.header, '\\tracking_changes', 0)
2259     if i == -1:
2260         document.warning("Malformed lyx document: Missing '\\tracking_changes'.")
2261         return
2262     document.header.insert(i+1, '\\output_changes true')
2263
2264
2265 def revert_output_changes (document):
2266     " Remove output_changes parameter. "
2267     i = find_token(document.header, '\\output_changes', 0)
2268     if i == -1:
2269         return
2270     del document.header[i]
2271
2272
2273 def convert_ert_paragraphs(document):
2274     " Convert paragraph breaks and sanitize paragraphs. "
2275     forbidden_settings = [
2276                           # paragraph parameters
2277                           '\\paragraph_spacing', '\\labelwidthstring',
2278                           '\\start_of_appendix', '\\noindent',
2279                           '\\leftindent', '\\align',
2280                           # font settings
2281                           '\\family', '\\series', '\\shape', '\\size',
2282                           '\\emph', '\\numeric', '\\bar', '\\noun',
2283                           '\\color', '\\lang']
2284     i = 0
2285     while True:
2286         i = find_token(document.body, '\\begin_inset ERT', i)
2287         if i == -1:
2288             return
2289         j = find_end_of_inset(document.body, i)
2290         if j == -1:
2291             document.warning("Malformed lyx document: Missing '\\end_inset'.")
2292             i = i + 1
2293             continue
2294
2295         # convert non-standard paragraphs to standard
2296         k = i
2297         while True:
2298             k = find_token(document.body, "\\begin_layout", k, j)
2299             if k == -1:
2300                 break
2301             document.body[k] = '\\begin_layout %s' % document.default_layout
2302             k = k + 1
2303
2304         # remove all paragraph parameters and font settings
2305         k = i
2306         while k < j:
2307             if (document.body[k].strip() and
2308                 document.body[k].split()[0] in forbidden_settings):
2309                 del document.body[k]
2310                 j = j - 1
2311             else:
2312                 k = k + 1
2313
2314         # insert an empty paragraph before each paragraph but the first
2315         k = i
2316         first_pagraph = 1
2317         while True:
2318             k = find_token(document.body, "\\begin_layout", k, j)
2319             if k == -1:
2320                 break
2321             if first_pagraph:
2322                 first_pagraph = 0
2323                 k = k + 1
2324                 continue
2325             document.body[k:k] = ['\\begin_layout %s' % document.default_layout, "",
2326                               "\\end_layout", ""]
2327             k = k + 5
2328             j = j + 4
2329
2330         # convert \\newline to new paragraph
2331         k = i
2332         while True:
2333             k = find_token(document.body, "\\newline", k, j)
2334             if k == -1:
2335                 break
2336             document.body[k:k+1] = ["\\end_layout", "", '\\begin_layout %s' % document.default_layout]
2337             k = k + 3
2338             j = j + 2
2339             # We need an empty line if document.default_layout == ''
2340             if document.body[k] != '':
2341                 document.body.insert(k, '')
2342                 k = k + 1
2343                 j = j + 1
2344         i = i + 1
2345
2346
2347 def revert_ert_paragraphs(document):
2348     " Remove double paragraph breaks. "
2349     i = 0
2350     while True:
2351         i = find_token(document.body, '\\begin_inset ERT', i)
2352         if i == -1:
2353             return
2354         j = find_end_of_inset(document.body, i)
2355         if j == -1:
2356             document.warning("Malformed lyx document: Missing '\\end_inset'.")
2357             i = i + 1
2358             continue
2359
2360         # replace paragraph breaks with \newline
2361         k = i
2362         while True:
2363             k = find_token(document.body, "\\end_layout", k, j)
2364             l = find_token(document.body, "\\begin_layout", k, j)
2365             if k == -1 or l == -1:
2366                 break
2367             document.body[k:l+1] = ["\\newline"]
2368             j = j - l + k
2369             k = k + 1
2370
2371         # replace double \newlines with paragraph breaks
2372         k = i
2373         while True:
2374             k = find_token(document.body, "\\newline", k, j)
2375             if k == -1:
2376                 break
2377             l = k + 1
2378             while document.body[l] == "":
2379                 l = l + 1
2380             if document.body[l].strip() and document.body[l].split()[0] == "\\newline":
2381                 document.body[k:l+1] = ["\\end_layout", "",
2382                                     '\\begin_layout %s' % document.default_layout]
2383                 j = j - l + k + 2
2384                 k = k + 3
2385                 # We need an empty line if document.default_layout == ''
2386                 if document.body[l+1] != '':
2387                     document.body.insert(l+1, '')
2388                     k = k + 1
2389                     j = j + 1
2390             else:
2391                 k = k + 1
2392         i = i + 1
2393
2394
2395 def convert_french(document):
2396     " Convert frenchb. "
2397     regexp = re.compile(r'^\\language\s+frenchb')
2398     i = find_re(document.header, regexp, 0)
2399     if i != -1:
2400         document.header[i] = "\\language french"
2401
2402     # Change language in the document body
2403     regexp = re.compile(r'^\\lang\s+frenchb')
2404     i = 0
2405     while True:
2406         i = find_re(document.body, regexp, i)
2407         if i == -1:
2408             break
2409         document.body[i] = "\\lang french"
2410         i = i + 1
2411
2412
2413 def remove_paperpackage(document):
2414     " Remove paper package. "
2415     i = find_token(document.header, '\\paperpackage', 0)
2416
2417     if i == -1:
2418         return
2419
2420     paperpackage = document.header[i].split()[1]
2421
2422     del document.header[i]
2423
2424     if paperpackage not in ("a4", "a4wide", "widemarginsa4"):
2425         return
2426
2427     conv = {"a4":"\\usepackage{a4}","a4wide": "\\usepackage{a4wide}",
2428             "widemarginsa4": "\\usepackage[widemargins]{a4}"}
2429     # for compatibility we ensure it is the first entry in preamble
2430     document.preamble[0:0] = [conv[paperpackage]]
2431
2432     i = find_token(document.header, '\\papersize', 0)
2433     if i != -1:
2434         document.header[i] = "\\papersize default"
2435
2436
2437 def remove_quotestimes(document):
2438     " Remove quotestimes. "
2439     i = find_token(document.header, '\\quotes_times', 0)
2440     if i == -1:
2441         return
2442     del document.header[i]
2443
2444
2445 def convert_sgml_paragraphs(document):
2446     " Convert SGML paragraphs. "
2447     if document.backend != "docbook":
2448         return
2449
2450     i = 0
2451     while True:
2452         i = find_token(document.body, "\\begin_layout SGML", i)
2453
2454         if i == -1:
2455             return
2456
2457         document.body[i] = "\\begin_layout Standard"
2458         j = find_token(document.body, "\\end_layout", i)
2459
2460         document.body[j+1:j+1] = ['','\\end_inset','','','\\end_layout']
2461         document.body[i+1:i+1] = ['\\begin_inset ERT','status inlined','','\\begin_layout Standard','']
2462
2463         i = i + 10
2464
2465 ##
2466 # Conversion hub
2467 #
2468
2469 supported_versions = ["1.4.%d" % i for i in range(3)] + ["1.4"]
2470 convert = [[222, [insert_tracking_changes, add_end_header, convert_amsmath]],
2471            [223, [remove_color_default, convert_spaces, convert_bibtex, remove_insetparent]],
2472            [224, [convert_external, convert_comment]],
2473            [225, [add_end_layout, layout2begin_layout, convert_end_document,
2474                   convert_table_valignment_middle, convert_breaks]],
2475            [226, [convert_note]],
2476            [227, [convert_box]],
2477            [228, [convert_collapsible, convert_ert]],
2478            [229, [convert_minipage]],
2479            [230, [convert_jurabib]],
2480            [231, [convert_float]],
2481            [232, [convert_bibtopic]],
2482            [233, [convert_graphics, convert_names]],
2483            [234, [convert_cite_engine]],
2484            [235, [convert_paperpackage]],
2485            [236, [convert_bullets, add_begin_header, add_begin_body,
2486                   normalize_papersize, strip_end_space]],
2487            [237, [use_x_boolean]],
2488            [238, [update_latexaccents]],
2489            [239, [normalize_paragraph_params]],
2490            [240, [convert_output_changes]],
2491            [241, [convert_ert_paragraphs]],
2492            [242, [convert_french]],
2493            [243, [remove_paperpackage]],
2494            [244, [rename_spaces]],
2495            [245, [remove_quotestimes, convert_sgml_paragraphs]]]
2496
2497 revert =  [[244, []],
2498            [243, [revert_space_names]],
2499            [242, []],
2500            [241, []],
2501            [240, [revert_ert_paragraphs]],
2502            [239, [revert_output_changes]],
2503            [238, []],
2504            [237, []],
2505            [236, [use_x_binary]],
2506            [235, [denormalize_papersize, remove_begin_body,remove_begin_header,
2507                   revert_bullets]],
2508            [234, [revert_paperpackage]],
2509            [233, [revert_cite_engine]],
2510            [232, [revert_names]],
2511            [231, [revert_bibtopic]],
2512            [230, [revert_float]],
2513            [229, [revert_jurabib]],
2514            [228, []],
2515            [227, [revert_collapsible, revert_ert]],
2516            [226, [revert_box, revert_external_2]],
2517            [225, [revert_note]],
2518            [224, [rm_end_layout, begin_layout2layout, revert_end_document,
2519                   revert_valignment_middle, revert_breaks, convert_frameless_box,
2520                   remove_branches]],
2521            [223, [revert_external_2, revert_comment, revert_eqref]],
2522            [222, [revert_spaces, revert_bibtex]],
2523            [221, [revert_amsmath, rm_end_header, rm_tracking_changes, rm_body_changes]]]
2524
2525
2526 if __name__ == "__main__":
2527     pass