lib/lyx2lyx/lyx_2_2.py

   1 # -*- coding: utf-8 -*-
   2 # This file is part of lyx2lyx
   3 # -*- coding: utf-8 -*-
   4 # Copyright (C) 2011 The LyX team
   5 #
   6 # This program is free software; you can redistribute it and/or
   7 # modify it under the terms of the GNU General Public License
   8 # as published by the Free Software Foundation; either version 2
   9 # of the License, or (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful,
  12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 # GNU General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  19
  20 """ Convert files to the file format generated by lyx 2.2"""
  21
  22 import re, string
  23 import unicodedata
  24 import sys, os
  25
  26 # Uncomment only what you need to import, please.
  27
  28 #from parser_tools import find_token, find_end_of, find_tokens, \
  29 #  find_token_exact, find_end_of_inset, find_end_of_layout, \
  30 #  find_token_backwards, is_in_inset, get_value, get_quoted_value, \
  31 #  del_token, check_token, get_option_value
  32
  33 from lyx2lyx_tools import add_to_preamble, put_cmd_in_ert, lyx2latex#, \
  34 #  insert_to_preamble, latex_length, revert_flex_inset, \
  35 #  revert_font_attrs, hex2ratio, str2bool
  36
  37 from parser_tools import find_token, find_token_backwards, find_re, \
  38      find_end_of_inset, find_end_of_layout, find_nonempty_line, \
  39      get_containing_layout, get_value, check_token
  40
  41 ###############################################################################
  42 ###
  43 ### Conversion and reversion routines
  44 ###
  45 ###############################################################################
  46
  47 def convert_separator(document):
  48     """
  49     Convert layout separators to separator insets and add (LaTeX) paragraph
  50     breaks in order to mimic previous LaTeX export.
  51     """
  52
  53     parins = ["\\begin_inset Separator parbreak", "\\end_inset", ""]
  54     parlay = ["\\begin_layout Standard", "\\begin_inset Separator parbreak",
  55               "\\end_inset", "", "\\end_layout", ""]
  56     sty_dict = {
  57         "family" : "default",
  58         "series" : "default",
  59         "shape"  : "default",
  60         "size"   : "default",
  61         "bar"    : "default",
  62         "color"  : "inherit"
  63         }
  64
  65     i = 0
  66     while 1:
  67         i = find_token(document.body, "\\begin_deeper", i)
  68         if i == -1:
  69             break
  70
  71         j = find_token_backwards(document.body, "\\end_layout", i-1)
  72         if j != -1:
  73             # reset any text style before inserting the inset
  74             lay = get_containing_layout(document.body, j-1)
  75             if lay != False:
  76                 content = "\n".join(document.body[lay[1]:lay[2]])
  77                 for val in list(sty_dict.keys()):
  78                     if content.find("\\%s" % val) != -1:
  79                         document.body[j:j] = ["\\%s %s" % (val, sty_dict[val])]
  80                         i = i + 1
  81                         j = j + 1
  82             document.body[j:j] = parins
  83             i = i + len(parins) + 1
  84         else:
  85             i = i + 1
  86
  87     i = 0
  88     while 1:
  89         i = find_token(document.body, "\\align", i)
  90         if i == -1:
  91             break
  92
  93         lay = get_containing_layout(document.body, i)
  94         if lay != False and lay[0] == "Plain Layout":
  95             i = i + 1
  96             continue
  97
  98         j = find_token_backwards(document.body, "\\end_layout", i-1)
  99         if j != -1:
 100             lay = get_containing_layout(document.body, j-1)
 101             if lay != False and lay[0] == "Standard" \
 102                and find_token(document.body, "\\align", lay[1], lay[2]) == -1 \
 103                and find_token(document.body, "\\begin_inset VSpace", lay[1], lay[2]) == -1:
 104                 # reset any text style before inserting the inset
 105                 content = "\n".join(document.body[lay[1]:lay[2]])
 106                 for val in list(sty_dict.keys()):
 107                     if content.find("\\%s" % val) != -1:
 108                         document.body[j:j] = ["\\%s %s" % (val, sty_dict[val])]
 109                         i = i + 1
 110                         j = j + 1
 111                 document.body[j:j] = parins
 112                 i = i + len(parins) + 1
 113             else:
 114                 i = i + 1
 115         else:
 116             i = i + 1
 117
 118     regexp = re.compile(r'^\\begin_layout (?:(-*)|(\s*))(Separator|EndOfSlide)(?:(-*)|(\s*))$', re.IGNORECASE)
 119
 120     i = 0
 121     while 1:
 122         i = find_re(document.body, regexp, i)
 123         if i == -1:
 124             return
 125
 126         j = find_end_of_layout(document.body, i)
 127         if j == -1:
 128             document.warning("Malformed LyX document: Missing `\\end_layout'.")
 129             return
 130
 131         lay = get_containing_layout(document.body, j-1)
 132         if lay != False:
 133             lines = document.body[lay[3]:lay[2]]
 134         else:
 135             lines = []
 136
 137         document.body[i:j+1] = parlay
 138         if len(lines) > 0:
 139             document.body[i+1:i+1] = lines
 140
 141         i = i + len(parlay) + len(lines) + 1
 142
 143
 144 def revert_separator(document):
 145     " Revert separator insets to layout separators "
 146
 147     beamer_classes = ["beamer", "article-beamer", "scrarticle-beamer"]
 148     if document.textclass in beamer_classes:
 149         beglaysep = "\\begin_layout Separator"
 150     else:
 151         beglaysep = "\\begin_layout --Separator--"
 152
 153     parsep = [beglaysep, "", "\\end_layout", ""]
 154     comert = ["\\begin_inset ERT", "status collapsed", "",
 155               "\\begin_layout Plain Layout", "%", "\\end_layout",
 156               "", "\\end_inset", ""]
 157     empert = ["\\begin_inset ERT", "status collapsed", "",
 158               "\\begin_layout Plain Layout", " ", "\\end_layout",
 159               "", "\\end_inset", ""]
 160
 161     i = 0
 162     while 1:
 163         i = find_token(document.body, "\\begin_inset Separator", i)
 164         if i == -1:
 165             return
 166
 167         lay = get_containing_layout(document.body, i)
 168         if lay == False:
 169             document.warning("Malformed LyX document: Can't convert separator inset at line " + str(i))
 170             i = i + 1
 171             continue
 172
 173         layoutname = lay[0]
 174         beg = lay[1]
 175         end = lay[2]
 176         kind = get_value(document.body, "\\begin_inset Separator", i, i+1, "plain").split()[1]
 177         before = document.body[beg+1:i]
 178         something_before = len(before) > 0 and len("".join(before)) > 0
 179         j = find_end_of_inset(document.body, i)
 180         after = document.body[j+1:end]
 181         something_after = len(after) > 0 and len("".join(after)) > 0
 182         if kind == "plain":
 183             beg = beg + len(before) + 1
 184         elif something_before:
 185             document.body[i:i] = ["\\end_layout", ""]
 186             i = i + 2
 187             j = j + 2
 188             beg = i
 189             end = end + 2
 190
 191         if kind == "plain":
 192             if something_after:
 193                 document.body[beg:j+1] = empert
 194                 i = i + len(empert)
 195             else:
 196                 document.body[beg:j+1] = comert
 197                 i = i + len(comert)
 198         else:
 199             if something_after:
 200                 if layoutname == "Standard":
 201                     if not something_before:
 202                         document.body[beg:j+1] = parsep
 203                         i = i + len(parsep)
 204                         document.body[i:i] = ["", "\\begin_layout Standard"]
 205                         i = i + 2
 206                     else:
 207                         document.body[beg:j+1] = ["\\begin_layout Standard"]
 208                         i = i + 1
 209                 else:
 210                     document.body[beg:j+1] = ["\\begin_deeper"]
 211                     i = i + 1
 212                     end = end + 1 - (j + 1 - beg)
 213                     if not something_before:
 214                         document.body[i:i] = parsep
 215                         i = i + len(parsep)
 216                         end = end + len(parsep)
 217                     document.body[i:i] = ["\\begin_layout Standard"]
 218                     document.body[end+2:end+2] = ["", "\\end_deeper", ""]
 219                     i = i + 4
 220             else:
 221                 next_par_is_aligned = False
 222                 k = find_nonempty_line(document.body, end+1)
 223                 if k != -1 and check_token(document.body[k], "\\begin_layout"):
 224                     lay = get_containing_layout(document.body, k)
 225                     next_par_is_aligned = lay != False and \
 226                             find_token(document.body, "\\align", lay[1], lay[2]) != -1
 227                 if k != -1 and not next_par_is_aligned \
 228                         and not check_token(document.body[k], "\\end_deeper") \
 229                         and not check_token(document.body[k], "\\begin_deeper"):
 230                     if layoutname == "Standard":
 231                         document.body[beg:j+1] = [beglaysep]
 232                         i = i + 1
 233                     else:
 234                         document.body[beg:j+1] = ["\\begin_deeper", beglaysep]
 235                         end = end + 2 - (j + 1 - beg)
 236                         document.body[end+1:end+1] = ["", "\\end_deeper", ""]
 237                         i = i + 3
 238                 else:
 239                     if something_before:
 240                         del document.body[i:end+1]
 241                     else:
 242                         del document.body[i:end-1]
 243
 244         i = i + 1
 245
 246
 247 def revert_smash(document):
 248     " Set amsmath to on if smash commands are used "
 249
 250     commands = ["smash[t]", "smash[b]", "notag"]
 251     i = find_token(document.header, "\\use_package amsmath", 0)
 252     if i == -1:
 253         document.warning("Malformed LyX document: Can't find \\use_package amsmath.")
 254         return;
 255     value = get_value(document.header, "\\use_package amsmath", i).split()[1]
 256     if value != "1":
 257         # nothing to do if package is not auto but on or off
 258         return;
 259     j = 0
 260     while True:
 261         j = find_token(document.body, '\\begin_inset Formula', j)
 262         if j == -1:
 263             return
 264         k = find_end_of_inset(document.body, j)
 265         if k == -1:
 266             document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(j))
 267             j += 1
 268             continue
 269         code = "\n".join(document.body[j:k])
 270         for c in commands:
 271             if code.find("\\%s" % c) != -1:
 272                 # set amsmath to on, since it is loaded by the newer format
 273                 document.header[i] = "\\use_package amsmath 2"
 274                 return
 275         j = k
 276
 277
 278 def revert_swissgerman(document):
 279     " Set language german-ch-old to german "
 280     i = 0
 281     if document.language == "german-ch-old":
 282         document.language = "german"
 283         i = find_token(document.header, "\\language", 0)
 284         if i != -1:
 285             document.header[i] = "\\language german"
 286     j = 0
 287     while True:
 288         j = find_token(document.body, "\\lang german-ch-old", j)
 289         if j == -1:
 290             return
 291         document.body[j] = document.body[j].replace("\\lang german-ch-old", "\\lang german")
 292         j = j + 1
 293
 294
 295 def revert_use_package(document, pkg, commands, oldauto):
 296     # oldauto defines how the version we are reverting to behaves:
 297     # if it is true, the old version uses the package automatically.
 298     # if it is false, the old version never uses the package.
 299     regexp = re.compile(r'(\\use_package\s+%s)' % pkg)
 300     i = find_re(document.header, regexp, 0)
 301     value = "1" # default is auto
 302     if i != -1:
 303         value = get_value(document.header, "\\use_package" , i).split()[1]
 304         del document.header[i]
 305     if value == "2": # on
 306         add_to_preamble(document, ["\\usepackage{" + pkg + "}"])
 307     elif value == "1" and not oldauto: # auto
 308         i = 0
 309         while True:
 310             i = find_token(document.body, '\\begin_inset Formula', i)
 311             if i == -1:
 312                 return
 313             j = find_end_of_inset(document.body, i)
 314             if j == -1:
 315                 document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(i))
 316                 i += 1
 317                 continue
 318             code = "\n".join(document.body[i:j])
 319             for c in commands:
 320                 if code.find("\\%s" % c) != -1:
 321                     add_to_preamble(document, ["\\usepackage{" + pkg + "}"])
 322                     return
 323             i = j
 324
 325
 326 mathtools_commands = ["xhookrightarrow", "xhookleftarrow", "xRightarrow", \
 327                 "xrightharpoondown", "xrightharpoonup", "xrightleftharpoons", \
 328                 "xLeftarrow", "xleftharpoondown", "xleftharpoonup", \
 329                 "xleftrightarrow", "xLeftrightarrow", "xleftrightharpoons", \
 330                 "xmapsto"]
 331
 332 def revert_xarrow(document):
 333     "remove use_package mathtools"
 334     revert_use_package(document, "mathtools", mathtools_commands, False)
 335
 336
 337 def revert_beamer_lemma(document):
 338     " Reverts beamer lemma layout to ERT "
 339
 340     beamer_classes = ["beamer", "article-beamer", "scrarticle-beamer"]
 341     if document.textclass not in beamer_classes:
 342         return
 343
 344     consecutive = False
 345     i = 0
 346     while True:
 347         i = find_token(document.body, "\\begin_layout Lemma", i)
 348         if i == -1:
 349             return
 350         j = find_end_of_layout(document.body, i)
 351         if j == -1:
 352             document.warning("Malformed LyX document: Can't find end of Lemma layout")
 353             i += 1
 354             continue
 355         arg1 = find_token(document.body, "\\begin_inset Argument 1", i, j)
 356         endarg1 = find_end_of_inset(document.body, arg1)
 357         arg2 = find_token(document.body, "\\begin_inset Argument 2", i, j)
 358         endarg2 = find_end_of_inset(document.body, arg2)
 359         subst1 = []
 360         subst2 = []
 361         if arg1 != -1:
 362             beginPlain1 = find_token(document.body, "\\begin_layout Plain Layout", arg1, endarg1)
 363             if beginPlain1 == -1:
 364                 document.warning("Malformed LyX document: Can't find arg1 plain Layout")
 365                 i += 1
 366                 continue
 367             endPlain1 = find_end_of_inset(document.body, beginPlain1)
 368             content1 = document.body[beginPlain1 + 1 : endPlain1 - 2]
 369             subst1 = put_cmd_in_ert("<") + content1 + put_cmd_in_ert(">")
 370         if arg2 != -1:
 371             beginPlain2 = find_token(document.body, "\\begin_layout Plain Layout", arg2, endarg2)
 372             if beginPlain2 == -1:
 373                 document.warning("Malformed LyX document: Can't find arg2 plain Layout")
 374                 i += 1
 375                 continue
 376             endPlain2 = find_end_of_inset(document.body, beginPlain2)
 377             content2 = document.body[beginPlain2 + 1 : endPlain2 - 2]
 378             subst2 = put_cmd_in_ert("[") + content2 + put_cmd_in_ert("]")
 379
 380         # remove Arg insets
 381         if arg1 < arg2:
 382             del document.body[arg2 : endarg2 + 1]
 383             if arg1 != -1:
 384                 del document.body[arg1 : endarg1 + 1]
 385         if arg2 < arg1:
 386             del document.body[arg1 : endarg1 + 1]
 387             if arg2 != -1:
 388                 del document.body[arg2 : endarg2 + 1]
 389
 390         # index of end layout has probably changed
 391         j = find_end_of_layout(document.body, i)
 392         if j == -1:
 393             document.warning("Malformed LyX document: Can't find end of Lemma layout")
 394             i += 1
 395             continue
 396
 397         begcmd = []
 398
 399         # if this is not a consecutive env, add start command
 400         if not consecutive:
 401             begcmd = put_cmd_in_ert("\\begin{lemma}")
 402
 403         # has this a consecutive lemma?
 404         consecutive = document.body[j + 2] == "\\begin_layout Lemma"
 405
 406         # if this is not followed by a consecutive env, add end command
 407         if not consecutive:
 408             document.body[j : j + 1] = put_cmd_in_ert("\\end{lemma}") + ["\\end_layout"]
 409
 410         document.body[i : i + 1] = ["\\begin_layout Standard", ""] + begcmd + subst1 + subst2
 411
 412         i = j
 413
 414
 415
 416 def revert_question_env(document):
 417     """
 418     Reverts question and question* environments of
 419     theorems-ams-extended-bytype module to ERT
 420     """
 421
 422     # Do we use theorems-ams-extended-bytype module?
 423     have_mod = False
 424     mods = document.get_module_list()
 425     for mod in mods:
 426         if mod == "theorems-ams-extended-bytype":
 427             have_mod = True
 428             continue
 429
 430     if not have_mod:
 431         return
 432
 433     consecutive = False
 434     i = 0
 435     while True:
 436         i = find_token(document.body, "\\begin_layout Question", i)
 437         if i == -1:
 438             return
 439
 440         starred = document.body[i] == "\\begin_layout Question*"
 441
 442         j = find_end_of_layout(document.body, i)
 443         if j == -1:
 444             document.warning("Malformed LyX document: Can't find end of Question layout")
 445             i += 1
 446             continue
 447
 448         # if this is not a consecutive env, add start command
 449         begcmd = []
 450         if not consecutive:
 451             if starred:
 452                 begcmd = put_cmd_in_ert("\\begin{question*}")
 453             else:
 454                 begcmd = put_cmd_in_ert("\\begin{question}")
 455
 456         # has this a consecutive theorem of same type?
 457         consecutive = False
 458         if starred:
 459             consecutive = document.body[j + 2] == "\\begin_layout Question*"
 460         else:
 461             consecutive = document.body[j + 2] == "\\begin_layout Question"
 462
 463         # if this is not followed by a consecutive env, add end command
 464         if not consecutive:
 465             if starred:
 466                 document.body[j : j + 1] = put_cmd_in_ert("\\end{question*}") + ["\\end_layout"]
 467             else:
 468                 document.body[j : j + 1] = put_cmd_in_ert("\\end{question}") + ["\\end_layout"]
 469
 470         document.body[i : i + 1] = ["\\begin_layout Standard", ""] + begcmd
 471
 472         add_to_preamble(document, "\\providecommand{\questionname}{Question}")
 473
 474         if starred:
 475             add_to_preamble(document, "\\theoremstyle{plain}\n" \
 476                                       "\\newtheorem*{question*}{\\protect\\questionname}")
 477         else:
 478             add_to_preamble(document, "\\theoremstyle{plain}\n" \
 479                                       "\\newtheorem{question}{\\protect\\questionname}")
 480
 481         i = j
 482
 483
 484 def convert_dashes(document):
 485     "convert -- and --- to \\twohyphens and \\threehyphens"
 486
 487     if document.backend != "latex":
 488         return
 489
 490     i = 0
 491     while i < len(document.body):
 492         words = document.body[i].split()
 493         if len(words) > 1 and words[0] == "\\begin_inset" and \
 494            words[1] in ["CommandInset", "ERT", "External", "Formula", "Graphics", "IPA", "listings"]:
 495             # must not replace anything in insets that store LaTeX contents in .lyx files
 496             # (math and command insets withut overridden read() and write() methods
 497             # filtering out IPA makes Text::readParToken() more simple
 498             # skip ERT as well since it is not needed there
 499             j = find_end_of_inset(document.body, i)
 500             if j == -1:
 501                 document.warning("Malformed LyX document: Can't find end of " + words[1] + " inset at line " + str(i))
 502                 i += 1
 503             else:
 504                 i = j
 505             continue
 506         while True:
 507             j = document.body[i].find("--")
 508             if j == -1:
 509                 break
 510             front = document.body[i][:j]
 511             back = document.body[i][j+2:]
 512             # We can have an arbitrary number of consecutive hyphens.
 513             # These must be split into the corresponding number of two and three hyphens
 514             # We must match what LaTeX does: First try emdash, then endash, then single hyphen
 515             if back.find("-") == 0:
 516                 back = back[1:]
 517                 if len(back) > 0:
 518                     document.body.insert(i+1, back)
 519                 document.body[i] = front + "\\threehyphens"
 520             else:
 521                 if len(back) > 0:
 522                     document.body.insert(i+1, back)
 523                 document.body[i] = front + "\\twohyphens"
 524         i += 1
 525
 526
 527 def revert_dashes(document):
 528     "convert \\twohyphens and \\threehyphens to -- and ---"
 529
 530     i = 0
 531     while i < len(document.body):
 532         words = document.body[i].split()
 533         if len(words) > 1 and words[0] == "\\begin_inset" and \
 534            words[1] in ["CommandInset", "ERT", "External", "Formula", "Graphics", "IPA", "listings"]:
 535             # see convert_dashes
 536             j = find_end_of_inset(document.body, i)
 537             if j == -1:
 538                 document.warning("Malformed LyX document: Can't find end of " + words[1] + " inset at line " + str(i))
 539                 i += 1
 540             else:
 541                 i = j
 542             continue
 543         replaced = False
 544         if document.body[i].find("\\twohyphens") >= 0:
 545             document.body[i] = document.body[i].replace("\\twohyphens", "--")
 546             replaced = True
 547         if document.body[i].find("\\threehyphens") >= 0:
 548             document.body[i] = document.body[i].replace("\\threehyphens", "---")
 549             replaced = True
 550         if replaced and i+1 < len(document.body) and \
 551            (document.body[i+1].find("\\") != 0 or \
 552             document.body[i+1].find("\\twohyphens") == 0 or
 553             document.body[i+1].find("\\threehyphens") == 0) and \
 554            len(document.body[i]) + len(document.body[i+1]) <= 80:
 555             document.body[i] = document.body[i] + document.body[i+1]
 556             document.body[i+1:i+2] = []
 557         else:
 558             i += 1
 559
 560
 561 # order is important for the last three!
 562 phrases = ["LyX", "LaTeX2e", "LaTeX", "TeX"]
 563
 564 def is_part_of_converted_phrase(line, j, phrase):
 565     "is phrase part of an already converted phrase?"
 566     for p in phrases:
 567         converted = "\\SpecialCharNoPassThru \\" + p
 568         pos = j + len(phrase) - len(converted)
 569         if pos >= 0:
 570             if line[pos:pos+len(converted)] == converted:
 571                 return True
 572     return False
 573
 574
 575 def convert_phrases(document):
 576     "convert special phrases from plain text to \\SpecialCharNoPassThru"
 577
 578     if document.backend != "latex":
 579         return
 580
 581     for phrase in phrases:
 582         i = 0
 583         while i < len(document.body):
 584             words = document.body[i].split()
 585             if len(words) > 1 and words[0] == "\\begin_inset" and \
 586                words[1] in ["CommandInset", "External", "Formula", "Graphics", "listings"]:
 587                 # must not replace anything in insets that store LaTeX contents in .lyx files
 588                 # (math and command insets withut overridden read() and write() methods
 589                 j = find_end_of_inset(document.body, i)
 590                 if j == -1:
 591                     document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(i))
 592                     i += 1
 593                 else:
 594                     i = j
 595                 continue
 596             if document.body[i].find("\\") == 0:
 597                 i += 1
 598                 continue
 599             j = document.body[i].find(phrase)
 600             if j == -1:
 601                 i += 1
 602                 continue
 603             if not is_part_of_converted_phrase(document.body[i], j, phrase):
 604                 front = document.body[i][:j]
 605                 back = document.body[i][j+len(phrase):]
 606                 if len(back) > 0:
 607                     document.body.insert(i+1, back)
 608                 # We cannot use SpecialChar since we do not know whether we are outside passThru
 609                 document.body[i] = front + "\\SpecialCharNoPassThru \\" + phrase
 610             i += 1
 611
 612
 613 def revert_phrases(document):
 614     "convert special phrases to plain text"
 615
 616     i = 0
 617     while i < len(document.body):
 618         words = document.body[i].split()
 619         if len(words) > 1 and words[0] == "\\begin_inset" and \
 620            words[1] in ["CommandInset", "External", "Formula", "Graphics", "listings"]:
 621             # see convert_phrases
 622             j = find_end_of_inset(document.body, i)
 623             if j == -1:
 624                 document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(i))
 625                 i += 1
 626             else:
 627                 i = j
 628             continue
 629         replaced = False
 630         for phrase in phrases:
 631             # we can replace SpecialChar since LyX ensures that it cannot be inserted into passThru parts
 632             if document.body[i].find("\\SpecialChar \\" + phrase) >= 0:
 633                 document.body[i] = document.body[i].replace("\\SpecialChar \\" + phrase, phrase)
 634                 replaced = True
 635             if document.body[i].find("\\SpecialCharNoPassThru \\" + phrase) >= 0:
 636                 document.body[i] = document.body[i].replace("\\SpecialCharNoPassThru \\" + phrase, phrase)
 637                 replaced = True
 638         if replaced and i+1 < len(document.body) and \
 639            (document.body[i+1].find("\\") != 0 or \
 640             document.body[i+1].find("\\SpecialChar") == 0) and \
 641            len(document.body[i]) + len(document.body[i+1]) <= 80:
 642             document.body[i] = document.body[i] + document.body[i+1]
 643             document.body[i+1:i+2] = []
 644             i -= 1
 645         i += 1
 646
 647
 648 def convert_specialchar_internal(document, forward):
 649     specialchars = {"\\-":"softhyphen", "\\textcompwordmark{}":"ligaturebreak", \
 650         "\\@.":"endofsentence", "\\ldots{}":"ldots", \
 651         "\\menuseparator":"menuseparator", "\\slash{}":"breakableslash", \
 652         "\\nobreakdash-":"nobreakdash", "\\LyX":"LyX", \
 653         "\\TeX":"TeX", "\\LaTeX2e":"LaTeX2e", \
 654         "\\LaTeX":"LaTeX" # must be after LaTeX2e
 655     }
 656
 657     i = 0
 658     while i < len(document.body):
 659         words = document.body[i].split()
 660         if len(words) > 1 and words[0] == "\\begin_inset" and \
 661            words[1] in ["CommandInset", "External", "Formula", "Graphics", "listings"]:
 662             # see convert_phrases
 663             j = find_end_of_inset(document.body, i)
 664             if j == -1:
 665                 document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(i))
 666                 i += 1
 667             else:
 668                 i = j
 669             continue
 670         for key, value in specialchars.iteritems():
 671             if forward:
 672                 document.body[i] = document.body[i].replace("\\SpecialChar " + key, "\\SpecialChar " + value)
 673                 document.body[i] = document.body[i].replace("\\SpecialCharNoPassThru " + key, "\\SpecialCharNoPassThru " + value)
 674             else:
 675                 document.body[i] = document.body[i].replace("\\SpecialChar " + value, "\\SpecialChar " + key)
 676                 document.body[i] = document.body[i].replace("\\SpecialCharNoPassThru " + value, "\\SpecialCharNoPassThru " + key)
 677         i += 1
 678
 679
 680 def convert_specialchar(document):
 681     "convert special characters to new syntax"
 682     convert_specialchar_internal(document, True)
 683
 684
 685 def revert_specialchar(document):
 686     "convert special characters to old syntax"
 687     convert_specialchar_internal(document, False)
 688
 689
 690 def revert_georgian(document):
 691     "Set the document language to English but assure Georgian output"
 692
 693     if document.language == "georgian":
 694         document.language = "english"
 695         i = find_token(document.header, "\\language georgian", 0)
 696         if i != -1:
 697             document.header[i] = "\\language english"
 698         j = find_token(document.header, "\\language_package default", 0)
 699         if j != -1:
 700             document.header[j] = "\\language_package babel"
 701         k = find_token(document.header, "\\options", 0)
 702         if k != -1:
 703             document.header[k] = document.header[k].replace("\\options", "\\options georgian,")
 704         else:
 705             l = find_token(document.header, "\\use_default_options", 0)
 706             document.header.insert(l + 1, "\\options georgian")
 707
 708
 709 def revert_sigplan_doi(document):
 710     " Reverts sigplanconf DOI layout to ERT "
 711
 712     if document.textclass != "sigplanconf":
 713         return
 714
 715     i = 0
 716     while True:
 717         i = find_token(document.body, "\\begin_layout DOI", i)
 718         if i == -1:
 719             return
 720         j = find_end_of_layout(document.body, i)
 721         if j == -1:
 722             document.warning("Malformed LyX document: Can't find end of DOI layout")
 723             i += 1
 724             continue
 725
 726         content = lyx2latex(document, document.body[i:j + 1])
 727         add_to_preamble(document, ["\\doi{" + content + "}"])
 728         del document.body[i:j + 1]
 729         # no need to reset i
 730
 731
 732 def revert_ex_itemargs(document):
 733     " Reverts \\item arguments of the example environments (Linguistics module) to TeX-code "
 734
 735     # Do we use the linguistics module?
 736     have_mod = False
 737     mods = document.get_module_list()
 738     for mod in mods:
 739         if mod == "linguistics":
 740             have_mod = True
 741             continue
 742
 743     if not have_mod:
 744         return
 745
 746     i = 0
 747     example_layouts = ["Numbered Examples (consecutive)", "Subexample"]
 748     while True:
 749         i = find_token(document.body, "\\begin_inset Argument item:", i)
 750         if i == -1:
 751             return
 752         j = find_end_of_inset(document.body, i)
 753         # Find containing paragraph layout
 754         parent = get_containing_layout(document.body, i)
 755         if parent == False:
 756             document.warning("Malformed LyX document: Can't find parent paragraph layout")
 757             i += 1
 758             continue
 759         parbeg = parent[3]
 760         layoutname = parent[0]
 761         if layoutname in example_layouts:
 762             beginPlain = find_token(document.body, "\\begin_layout Plain Layout", i)
 763             endPlain = find_end_of_layout(document.body, beginPlain)
 764             content = document.body[beginPlain + 1 : endPlain]
 765             del document.body[i:j+1]
 766             subst = put_cmd_in_ert("[") + content + put_cmd_in_ert("]")
 767             document.body[parbeg : parbeg] = subst
 768         i += 1
 769
 770
 771 def revert_forest(document):
 772     " Reverts the forest environment (Linguistics module) to TeX-code "
 773
 774     # Do we use the linguistics module?
 775     have_mod = False
 776     mods = document.get_module_list()
 777     for mod in mods:
 778         if mod == "linguistics":
 779             have_mod = True
 780             continue
 781
 782     if not have_mod:
 783         return
 784
 785     i = 0
 786     while True:
 787         i = find_token(document.body, "\\begin_inset Flex Structure Tree", i)
 788         if i == -1:
 789             return
 790         j = find_end_of_inset(document.body, i)
 791         if j == -1:
 792             document.warning("Malformed LyX document: Can't find end of Structure Tree inset")
 793             i += 1
 794             continue
 795
 796         beginPlain = find_token(document.body, "\\begin_layout Plain Layout", i)
 797         endPlain = find_end_of_layout(document.body, beginPlain)
 798         content = lyx2latex(document, document.body[beginPlain : endPlain])
 799
 800         add_to_preamble(document, ["\\usepackage{forest}"])
 801
 802         document.body[i:j + 1] = ["\\begin_inset ERT", "status collapsed", "",
 803                 "\\begin_layout Plain Layout", "", "\\backslash",
 804                 "begin{forest}", "\\end_layout", "", "\\begin_layout Plain Layout",
 805                 content, "\\end_layout", "", "\\begin_layout Plain Layout",
 806                 "\\backslash", "end{forest}", "", "\\end_layout", "", "\\end_inset"]
 807         # no need to reset i
 808
 809
 810 def revert_glossgroup(document):
 811     " Reverts the GroupGlossedWords inset (Linguistics module) to TeX-code "
 812
 813     # Do we use the linguistics module?
 814     have_mod = False
 815     mods = document.get_module_list()
 816     for mod in mods:
 817         if mod == "linguistics":
 818             have_mod = True
 819             continue
 820
 821     if not have_mod:
 822         return
 823
 824     i = 0
 825     while True:
 826         i = find_token(document.body, "\\begin_inset Flex GroupGlossedWords", i)
 827         if i == -1:
 828             return
 829         j = find_end_of_inset(document.body, i)
 830         if j == -1:
 831             document.warning("Malformed LyX document: Can't find end of GroupGlossedWords inset")
 832             i += 1
 833             continue
 834
 835         beginPlain = find_token(document.body, "\\begin_layout Plain Layout", i)
 836         endPlain = find_end_of_layout(document.body, beginPlain)
 837         content = lyx2latex(document, document.body[beginPlain : endPlain])
 838         document.warning("content: %s" % content)
 839
 840         document.body[i:j + 1] = ["{", "", content, "", "}"]
 841         # no need to reset i
 842
 843
 844 def revert_newgloss(document):
 845     " Reverts the new Glosse insets (Linguistics module) to the old format "
 846
 847     # Do we use the linguistics module?
 848     have_mod = False
 849     mods = document.get_module_list()
 850     for mod in mods:
 851         if mod == "linguistics":
 852             have_mod = True
 853             continue
 854
 855     if not have_mod:
 856         return
 857
 858     glosses = ("\\begin_inset Flex Glosse", "\\begin_inset Flex Tri-Glosse")
 859     for glosse in glosses:
 860         i = 0
 861         while True:
 862             i = find_token(document.body, glosse, i)
 863             if i == -1:
 864                 break
 865             j = find_end_of_inset(document.body, i)
 866             if j == -1:
 867                 document.warning("Malformed LyX document: Can't find end of Glosse inset")
 868                 i += 1
 869                 continue
 870
 871             arg = find_token(document.body, "\\begin_inset Argument 1", i, j)
 872             endarg = find_end_of_inset(document.body, arg)
 873             argcontent = ""
 874             if arg != -1:
 875                 argbeginPlain = find_token(document.body, "\\begin_layout Plain Layout", arg, endarg)
 876                 if argbeginPlain == -1:
 877                     document.warning("Malformed LyX document: Can't find arg plain Layout")
 878                     i += 1
 879                     continue
 880                 argendPlain = find_end_of_inset(document.body, argbeginPlain)
 881                 argcontent = lyx2latex(document, document.body[argbeginPlain : argendPlain - 2])
 882
 883                 document.body[j:j] = ["", "\\begin_layout Plain Layout","\\backslash", "glt ",
 884                     argcontent, "\\end_layout"]
 885
 886                 # remove Arg insets and paragraph, if it only contains this inset
 887                 if document.body[arg - 1] == "\\begin_layout Plain Layout" and find_end_of_layout(document.body, arg - 1) == endarg + 3:
 888                     del document.body[arg - 1 : endarg + 4]
 889                 else:
 890                     del document.body[arg : endarg + 1]
 891
 892             beginPlain = find_token(document.body, "\\begin_layout Plain Layout", i)
 893             endPlain = find_end_of_layout(document.body, beginPlain)
 894             content = lyx2latex(document, document.body[beginPlain : endPlain])
 895
 896             document.body[beginPlain + 1:endPlain] = [content]
 897             i = beginPlain + 1
 898
 899
 900 def convert_newgloss(document):
 901     " Converts Glosse insets (Linguistics module) to the new format "
 902
 903     # Do we use the linguistics module?
 904     have_mod = False
 905     mods = document.get_module_list()
 906     for mod in mods:
 907         if mod == "linguistics":
 908             have_mod = True
 909             continue
 910
 911     if not have_mod:
 912         return
 913
 914     glosses = ("\\begin_inset Flex Glosse", "\\begin_inset Flex Tri-Glosse")
 915     for glosse in glosses:
 916         i = 0
 917         while True:
 918             i = find_token(document.body, glosse, i)
 919             if i == -1:
 920                 break
 921             j = find_end_of_inset(document.body, i)
 922             if j == -1:
 923                 document.warning("Malformed LyX document: Can't find end of Glosse inset")
 924                 i += 1
 925                 continue
 926
 927             k = i
 928             while True:
 929                 argcontent = []
 930                 beginPlain = find_token(document.body, "\\begin_layout Plain Layout", k, j)
 931                 if beginPlain == -1:
 932                     break
 933                 endPlain = find_end_of_layout(document.body, beginPlain)
 934                 if endPlain == -1:
 935                     document.warning("Malformed LyX document: Can't find end of Glosse layout")
 936                     i += 1
 937                     continue
 938
 939                 glt  = find_token(document.body, "\\backslash", beginPlain, endPlain)
 940                 if glt != -1 and document.body[glt + 1].startswith("glt"):
 941                     document.body[glt + 1] = document.body[glt + 1].lstrip("glt").lstrip()
 942                     argcontent = document.body[glt + 1 : endPlain]
 943                     document.body[beginPlain + 1 : endPlain] = ["\\begin_inset Argument 1", "status open", "",
 944                         "\\begin_layout Plain Layout", "\\begin_inset ERT", "status open", "",
 945                         "\\begin_layout Plain Layout", ""] + argcontent + ["\\end_layout", "", "\\end_inset", "",
 946                         "\\end_layout", "", "\\end_inset"]
 947                 else:
 948                     content = document.body[beginPlain + 1 : endPlain]
 949                     document.body[beginPlain + 1 : endPlain] = ["\\begin_inset ERT", "status open", "",
 950                         "\\begin_layout Plain Layout"] + content + ["\\end_layout", "", "\\end_inset"]
 951
 952                 endPlain = find_end_of_layout(document.body, beginPlain)
 953                 k = endPlain
 954                 j = find_end_of_inset(document.body, i)
 955
 956             i = endPlain + 1
 957
 958
 959 def convert_BoxFeatures(document):
 960     " adds new box features "
 961
 962     i = 0
 963     while True:
 964         i = find_token(document.body, "height_special", i)
 965         if i == -1:
 966             return
 967         document.body[i+1:i+1] = ['thickness "0.4pt"', 'separation "3pt"', 'shadowsize "4pt"']
 968         i = i + 4
 969
 970
 971 def revert_BoxFeatures(document):
 972     " outputs new box features as TeX code "
 973
 974     i = 0
 975     defaultSep = "3pt"
 976     defaultThick = "0.4pt"
 977     defaultShadow = "4pt"
 978     while True:
 979         i = find_token(document.body, "height_special", i)
 980         if i == -1:
 981             return
 982         # read out the values
 983         beg = document.body[i+1].find('"');
 984         end = document.body[i+1].rfind('"');
 985         thickness = document.body[i+1][beg+1:end];
 986         beg = document.body[i+2].find('"');
 987         end = document.body[i+2].rfind('"');
 988         separation = document.body[i+2][beg+1:end];
 989         beg = document.body[i+3].find('"');
 990         end = document.body[i+3].rfind('"');
 991         shadowsize = document.body[i+3][beg+1:end];
 992         # delete the specification
 993         del document.body[i+1:i+4]
 994         # output ERT
 995         # first output the closing brace
 996         if shadowsize != defaultShadow or separation != defaultSep or thickness != defaultThick:
 997             document.body[i + 10 : i + 10] = put_cmd_in_ert("}")
 998         # now output the lengths
 999         if shadowsize != defaultShadow or separation != defaultSep or thickness != defaultThick:
1000             document.body[i - 10 : i - 10] = put_cmd_in_ert("{")
1001         if thickness != defaultThick:
1002             document.body[i - 5 : i - 4] = ["{\\backslash fboxrule " + thickness]
1003         if separation != defaultSep and thickness == defaultThick:
1004             document.body[i - 5 : i - 4] = ["{\\backslash fboxsep " + separation]
1005         if separation != defaultSep and thickness != defaultThick:
1006             document.body[i - 5 : i - 4] = ["{\\backslash fboxrule " + thickness + "\\backslash fboxsep " + separation]
1007         if shadowsize != defaultShadow and separation == defaultSep and thickness == defaultThick:
1008             document.body[i - 5 : i - 4] = ["{\\backslash shadowsize " + shadowsize]
1009         if shadowsize != defaultShadow and separation != defaultSep and thickness == defaultThick:
1010             document.body[i - 5 : i - 4] = ["{\\backslash fboxsep " + separation + "\\backslash shadowsize " + shadowsize]
1011         if shadowsize != defaultShadow and separation == defaultSep and thickness != defaultThick:
1012             document.body[i - 5 : i - 4] = ["{\\backslash fboxrule " + thickness + "\\backslash shadowsize " + shadowsize]
1013         if shadowsize != defaultShadow and separation != defaultSep and thickness != defaultThick:
1014             document.body[i - 5 : i - 4] = ["{\\backslash fboxrule " + thickness + "\\backslash fboxsep " + separation + "\\backslash shadowsize " + shadowsize]
1015         i = i + 11
1016
1017
1018 def convert_origin(document):
1019     " Insert the origin tag "
1020
1021     i = find_token(document.header, "\\textclass ", 0)
1022     if i == -1:
1023         document.warning("Malformed LyX document: No \\textclass!!")
1024         return;
1025     if document.dir == "":
1026         origin = "stdin"
1027     else:
1028         origin = document.dir.replace('\\', '/') + '/'
1029         if os.name != 'nt':
1030             origin = unicode(origin, sys.getfilesystemencoding())
1031     document.header[i:i] = ["\\origin " + origin]
1032
1033
1034 def revert_origin(document):
1035     " Remove the origin tag "
1036
1037     i = find_token(document.header, "\\origin ", 0)
1038     if i == -1:
1039         document.warning("Malformed LyX document: No \\origin!!")
1040         return;
1041     del document.header[i]
1042
1043
1044 color_names = ["brown", "darkgray", "gray", \
1045                "lightgray", "lime", "olive", "orange", \
1046                "pink", "purple", "teal", "violet"]
1047
1048 def revert_textcolor(document):
1049     " revert new \texcolor colors to TeX code "
1050
1051     i = 0
1052     j = 0
1053     xcolor = False
1054     add_to_preamble(document, ["\\@ifundefined{rangeHsb}{\\usepackage{xcolor}}"])
1055     while True:
1056         i = find_token(document.body, "\\color ", i)
1057         if i == -1:
1058             return
1059         else:
1060             for color in list(color_names):
1061                 if document.body[i] == "\\color " + color:
1062                     # register that xcolor must be loaded in the preamble
1063                     if xcolor == False:
1064                         xcolor = True
1065                         add_to_preamble(document, ["\\@ifundefined{rangeHsb}{\usepackage{xcolor}}"])
1066                     # find the next \\color and/or the next \\end_layout
1067                     j = find_token(document.body, "\\color", i + 1)
1068                     k = find_token(document.body, "\\end_layout", i + 1)
1069                     if j == -1 and k != -1:
1070                         j = k +1
1071                     # output TeX code
1072                     # first output the closing brace
1073                     if k < j:
1074                         document.body[k: k] = put_cmd_in_ert("}")
1075                     else:
1076                         document.body[j: j] = put_cmd_in_ert("}")
1077                     # now output the \textcolor command
1078                     document.body[i : i + 1] = put_cmd_in_ert("\\textcolor{" + color + "}{")
1079         i = i + 1
1080
1081
1082 def convert_colorbox(document):
1083     " adds color settings for boxes "
1084
1085     i = 0
1086     while True:
1087         i = find_token(document.body, "shadowsize", i)
1088         if i == -1:
1089             return
1090         document.body[i+1:i+1] = ['framecolor "black"', 'backgroundcolor "none"']
1091         i = i + 3
1092
1093
1094 def revert_colorbox(document):
1095     " outputs color settings for boxes as TeX code "
1096
1097     i = 0
1098     defaultframecolor = "black"
1099     defaultbackcolor = "white"
1100     while True:
1101         i = find_token(document.body, "framecolor", i)
1102         if i == -1:
1103             return
1104         # read out the values
1105         beg = document.body[i].find('"');
1106         end = document.body[i].rfind('"');
1107         framecolor = document.body[i][beg+1:end];
1108         beg = document.body[i+1].find('"');
1109         end = document.body[i+1].rfind('"');
1110         backcolor = document.body[i+1][beg+1:end];
1111         # delete the specification
1112         del document.body[i:i+2]
1113         # output TeX code
1114         # first output the closing brace
1115         if framecolor != defaultframecolor or backcolor != defaultbackcolor:
1116             document.body[i + 9 : i + 9] = put_cmd_in_ert("}")
1117         # now output the box commands
1118         if framecolor != defaultframecolor or backcolor != defaultbackcolor:
1119             document.body[i - 14 : i - 14] = put_cmd_in_ert("{")
1120         if framecolor != defaultframecolor:
1121             document.body[i - 9 : i - 8] = ["\\backslash fboxcolor{" + framecolor + "}{" + backcolor + "}{"]
1122         if backcolor != defaultbackcolor and framecolor == defaultframecolor:
1123             document.body[i - 9 : i - 8] = ["\\backslash colorbox{" + backcolor + "}{"]
1124         i = i + 11
1125
1126
1127 def revert_mathmulticol(document):
1128     " Convert formulas to ERT if they contain multicolumns "
1129
1130     i = 0
1131     while True:
1132         i = find_token(document.body, '\\begin_inset Formula', i)
1133         if i == -1:
1134             return
1135         j = find_end_of_inset(document.body, i)
1136         if j == -1:
1137             document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(i))
1138             i += 1
1139             continue
1140         lines = document.body[i:j]
1141         lines[0] = lines[0].replace('\\begin_inset Formula', '').lstrip()
1142         code = "\n".join(lines)
1143         converted = False
1144         k = 0
1145         n = 0
1146         while n >= 0:
1147             n = code.find("\\multicolumn", k)
1148             # no need to convert degenerated multicolumn cells,
1149             # they work in old LyX versions as "math ERT"
1150             if n != -1 and code.find("\\multicolumn{1}", k) != n:
1151                 ert = put_cmd_in_ert(code)
1152                 document.body[i:j+1] = ert
1153                 converted = True
1154                 break
1155             else:
1156                 k = n + 12
1157         if converted:
1158             i = find_end_of_inset(document.body, i)
1159         else:
1160             i = j
1161
1162
1163 def revert_Argument_to_TeX_brace(document, line, endline, n, nmax, environment, opt):
1164     '''
1165     Reverts an InsetArgument to TeX-code
1166     usage:
1167     revert_Argument_to_TeX_brace(document, LineOfBegin, LineOfEnd, StartArgument, EndArgument, isEnvironment, isOpt)
1168     LineOfBegin is the line  of the \begin_layout or \begin_inset statement
1169     LineOfEnd is the line  of the \end_layout or \end_inset statement, if "0" is given, the end of the file is used instead
1170     StartArgument is the number of the first argument that needs to be converted
1171     EndArgument is the number of the last argument that needs to be converted or the last defined one
1172     isEnvironment must be true, if the layout is for a LaTeX environment
1173     isOpt must be true, if the argument is an optional one
1174     '''
1175     lineArg = 0
1176     wasOpt = False
1177     while lineArg != -1 and n < nmax + 1:
1178       lineArg = find_token(document.body, "\\begin_inset Argument " + str(n), line)
1179       if lineArg > endline and endline != 0:
1180         return wasOpt
1181       if lineArg != -1:
1182         beginPlain = find_token(document.body, "\\begin_layout Plain Layout", lineArg)
1183         # we have to assure that no other inset is in the Argument
1184         beginInset = find_token(document.body, "\\begin_inset", beginPlain)
1185         endInset = find_token(document.body, "\\end_inset", beginPlain)
1186         k = beginPlain + 1
1187         l = k
1188         while beginInset < endInset and beginInset != -1:
1189           beginInset = find_token(document.body, "\\begin_inset", k)
1190           endInset = find_token(document.body, "\\end_inset", l)
1191           k = beginInset + 1
1192           l = endInset + 1
1193         if environment == False:
1194           if opt == False:
1195             document.body[endInset - 2 : endInset + 1] = put_cmd_in_ert("}{")
1196             del(document.body[lineArg : beginPlain + 1])
1197             wasOpt = False
1198           else:
1199             document.body[endInset - 2 : endInset + 1] = put_cmd_in_ert("]")
1200             document.body[lineArg : beginPlain + 1] = put_cmd_in_ert("[")
1201             wasOpt = True
1202         else:
1203           document.body[endInset - 2 : endInset + 1] = put_cmd_in_ert("}")
1204           document.body[lineArg : beginPlain + 1] = put_cmd_in_ert("{")
1205           wasOpt = False
1206         n += 1
1207     return wasOpt
1208
1209
1210 def revert_jss(document):
1211     " Reverts JSS In_Preamble commands to ERT in preamble "
1212
1213     if document.textclass != "jss":
1214         return
1215
1216     h = 0
1217     m = 0
1218     j = 0
1219     k = 0
1220     n = 0
1221     while True:
1222       # at first revert the inset layouts because they can be part of the In_Preamble layouts
1223       while m != -1 or j != -1 or h != -1 or k != -1 or n != -1:
1224         # \pkg
1225         if h != -1:
1226           h = find_token(document.body, "\\begin_inset Flex pkg", h)
1227         if h != -1:
1228           endh = find_end_of_inset(document.body, h)
1229           document.body[endh - 2 : endh + 1] = put_cmd_in_ert("}")
1230           document.body[h : h + 4] = put_cmd_in_ert("\\pkg{")
1231           h = h + 5
1232         # \proglang
1233         if m != -1:
1234           m = find_token(document.body, "\\begin_inset Flex proglang", m)
1235         if m != -1:
1236           endm = find_end_of_inset(document.body, m)
1237           document.body[endm - 2 : endm + 1] = put_cmd_in_ert("}")
1238           document.body[m : m + 4] = put_cmd_in_ert("\\proglang{")
1239           m = m + 5
1240         # \code
1241         if j != -1:
1242           j = find_token(document.body, "\\begin_inset Flex code", j)
1243         if j != -1:
1244           endj = find_end_of_inset(document.body, j)
1245           document.body[endj - 2 : endj + 1] = put_cmd_in_ert("}")
1246           document.body[j : j + 4] = put_cmd_in_ert("\\code{")
1247           j = j + 5
1248         # \email
1249         if k != -1:
1250           k = find_token(document.body, "\\begin_inset Flex E-mail", k)
1251         if k != -1:
1252           endk = find_end_of_inset(document.body, k)
1253           document.body[endk - 2 : endk + 1] = put_cmd_in_ert("}")
1254           document.body[k : k + 4] = put_cmd_in_ert("\\email{")
1255           k = k + 5
1256         # \url
1257         if n != -1:
1258           n = find_token(document.body, "\\begin_inset Flex URL", n)
1259         if n != -1:
1260           endn = find_end_of_inset(document.body, n)
1261           document.body[endn - 2 : endn + 1] = put_cmd_in_ert("}")
1262           document.body[n : n + 4] = put_cmd_in_ert("\\url{")
1263           n = n + 5
1264       # now revert the In_Preamble layouts
1265       # \title
1266       i = find_token(document.body, "\\begin_layout Title", 0)
1267       if i == -1:
1268         return
1269       j = find_end_of_layout(document.body, i)
1270       if j == -1:
1271         document.warning("Malformed LyX document: Can't find end of Title layout")
1272         i += 1
1273         continue
1274       content = lyx2latex(document, document.body[i:j + 1])
1275       add_to_preamble(document, ["\\title{" + content + "}"])
1276       del document.body[i:j + 1]
1277       # \author
1278       i = find_token(document.body, "\\begin_layout Author", 0)
1279       if i == -1:
1280         return
1281       j = find_end_of_layout(document.body, i)
1282       if j == -1:
1283         document.warning("Malformed LyX document: Can't find end of Author layout")
1284         i += 1
1285         continue
1286       content = lyx2latex(document, document.body[i:j + 1])
1287       add_to_preamble(document, ["\\author{" + content + "}"])
1288       del document.body[i:j + 1]
1289       # \Plainauthor
1290       i = find_token(document.body, "\\begin_layout Plain Author", 0)
1291       if i == -1:
1292         return
1293       j = find_end_of_layout(document.body, i)
1294       if j == -1:
1295         document.warning("Malformed LyX document: Can't find end of Plain Author layout")
1296         i += 1
1297         continue
1298       content = lyx2latex(document, document.body[i:j + 1])
1299       add_to_preamble(document, ["\\Plainauthor{" + content + "}"])
1300       del document.body[i:j + 1]
1301       # \Plaintitle
1302       i = find_token(document.body, "\\begin_layout Plain Title", 0)
1303       if i == -1:
1304         return
1305       j = find_end_of_layout(document.body, i)
1306       if j == -1:
1307         document.warning("Malformed LyX document: Can't find end of Plain Title layout")
1308         i += 1
1309         continue
1310       content = lyx2latex(document, document.body[i:j + 1])
1311       add_to_preamble(document, ["\\Plaintitle{" + content + "}"])
1312       del document.body[i:j + 1]
1313       # \Shorttitle
1314       i = find_token(document.body, "\\begin_layout Short Title", 0)
1315       if i == -1:
1316         return
1317       j = find_end_of_layout(document.body, i)
1318       if j == -1:
1319         document.warning("Malformed LyX document: Can't find end of Short Title layout")
1320         i += 1
1321         continue
1322       content = lyx2latex(document, document.body[i:j + 1])
1323       add_to_preamble(document, ["\\Shorttitle{" + content + "}"])
1324       del document.body[i:j + 1]
1325       # \Abstract
1326       i = find_token(document.body, "\\begin_layout Abstract", 0)
1327       if i == -1:
1328         return
1329       j = find_end_of_layout(document.body, i)
1330       if j == -1:
1331         document.warning("Malformed LyX document: Can't find end of Abstract layout")
1332         i += 1
1333         continue
1334       content = lyx2latex(document, document.body[i:j + 1])
1335       add_to_preamble(document, ["\\Abstract{" + content + "}"])
1336       del document.body[i:j + 1]
1337       # \Keywords
1338       i = find_token(document.body, "\\begin_layout Keywords", 0)
1339       if i == -1:
1340         return
1341       j = find_end_of_layout(document.body, i)
1342       if j == -1:
1343         document.warning("Malformed LyX document: Can't find end of Keywords layout")
1344         i += 1
1345         continue
1346       content = lyx2latex(document, document.body[i:j + 1])
1347       add_to_preamble(document, ["\\Keywords{" + content + "}"])
1348       del document.body[i:j + 1]
1349       # \Plainkeywords
1350       i = find_token(document.body, "\\begin_layout Plain Keywords", 0)
1351       if i == -1:
1352         return
1353       j = find_end_of_layout(document.body, i)
1354       if j == -1:
1355         document.warning("Malformed LyX document: Can't find end of Plain Keywords layout")
1356         i += 1
1357         continue
1358       content = lyx2latex(document, document.body[i:j + 1])
1359       add_to_preamble(document, ["\\Plainkeywords{" + content + "}"])
1360       del document.body[i:j + 1]
1361       # \Address
1362       i = find_token(document.body, "\\begin_layout Address", 0)
1363       if i == -1:
1364         return
1365       j = find_end_of_layout(document.body, i)
1366       if j == -1:
1367         document.warning("Malformed LyX document: Can't find end of Address layout")
1368         i += 1
1369         continue
1370       content = lyx2latex(document, document.body[i:j + 1])
1371       add_to_preamble(document, ["\\Address{" + content + "}"])
1372       del document.body[i:j + 1]
1373       # finally handle the code layouts
1374       h = 0
1375       m = 0
1376       j = 0
1377       k = 0
1378       l = 0
1379       while m != -1 or j != -1 or h != -1 or k != -1 or l != -1:
1380         # \CodeChunk
1381         if h != -1:
1382           h = find_token(document.body, "\\begin_layout Code Chunk", h)
1383         if h != -1:
1384           endh = find_end_of_layout(document.body, h)
1385           begindeeper = find_token(document.body, "\\begin_deeper", h)
1386           enddeeper = find_token(document.body, "\\end_deeper", h)
1387           document.body[enddeeper + 1 : enddeeper] = ["\\end_layout"]
1388           document.body[enddeeper : enddeeper + 1] = put_cmd_in_ert("\\end{CodeChunk}")
1389           del document.body[begindeeper]
1390           document.body[h : h + 3] = put_cmd_in_ert("\\begin{CodeChunk}")
1391           document.body[h - 1 : h] = ["\\begin_layout Standard"]
1392           h = h + 1
1393         # \Code
1394         if m != -1:
1395           m = find_token(document.body, "\\begin_layout Standard Code", m)
1396         if m != -1:
1397           endm = find_end_of_layout(document.body, m)
1398           document.body[endm : endm + 1] = ["\\end_layout", "", "\\begin_layout Standard"]
1399           document.body[endm + 3 : endm + 4] = put_cmd_in_ert("\\end{Code}")
1400           document.body[endm + 13 : endm + 13] = ["\\end_layout", "", "\\begin_layout Standard"]
1401           document.body[m + 1 : m] = ["\\end_layout", "", "\\begin_layout Standard"]
1402           document.body[m : m + 1] = put_cmd_in_ert("\\begin{Code}")
1403           m = m + 1
1404         # \CodeInput
1405         if j != -1:
1406           j = find_token(document.body, "\\begin_layout Code Input", j)
1407         if j != -1:
1408           endj = find_end_of_layout(document.body, j)
1409           document.body[endj : endj + 1] = ["\\end_layout", "", "\\begin_layout Standard"]
1410           document.body[endj + 3 : endj + 4] = put_cmd_in_ert("\\end{CodeInput}")
1411           document.body[endj + 13 : endj + 13] = ["\\end_layout", "", "\\begin_layout Standard"]
1412           document.body[j + 1 : j] = ["\\end_layout", "", "\\begin_layout Standard"]
1413           document.body[j : j + 1] = put_cmd_in_ert("\\begin{CodeInput}")
1414           j = j + 1
1415         # \CodeOutput
1416         if k != -1:
1417           k = find_token(document.body, "\\begin_layout Code Output", k)
1418         if k != -1:
1419           endk = find_end_of_layout(document.body, k)
1420           document.body[endk : endk + 1] = ["\\end_layout", "", "\\begin_layout Standard"]
1421           document.body[endk + 3 : endk + 4] = put_cmd_in_ert("\\end{CodeOutput}")
1422           document.body[endk + 13 : endk + 13] = ["\\end_layout", "", "\\begin_layout Standard"]
1423           document.body[k + 1 : k] = ["\\end_layout", "", "\\begin_layout Standard"]
1424           document.body[k : k + 1] = put_cmd_in_ert("\\begin{CodeOutput}")
1425           k = k + 1
1426         if l != -1:
1427           l = find_token(document.body, "\\begin_layout Appendix", l)
1428         if l != -1:
1429           endl = find_end_of_layout(document.body, l)
1430           document.body[endl : endl + 1] = ["\\end_layout", "", "\\begin_layout Standard"]
1431           document.body[endl + 3 : endl + 4] = put_cmd_in_ert("\\end{appendix}")
1432           document.body[endl + 13 : endl + 13] = ["\\end_layout", ""]
1433           document.body[l + 1 : l] = ["\\end_layout", "", "\\begin_layout Standard"]
1434           document.body[l : l + 1] = put_cmd_in_ert("\\begin{appendix}")
1435           document.body[l -1 : l] = ["", "\\begin_layout Standard"]
1436           l = l + 1
1437
1438
1439 def convert_subref(document):
1440     " converts sub: ref prefixes to subref: "
1441
1442     # 1) label insets
1443     rx = re.compile(r'^name \"sub:(.+)$')
1444     i = 0
1445     while True:
1446         i = find_token(document.body, "\\begin_inset CommandInset label", i)
1447         if i == -1:
1448             break
1449         j = find_end_of_inset(document.body, i)
1450         if j == -1:
1451             document.warning("Malformed LyX document: Can't find end of Label inset at line " + str(i))
1452             i += 1
1453             continue
1454
1455         for p in range(i, j):
1456             m = rx.match(document.body[p])
1457             if m:
1458                 label = m.group(1)
1459                 document.body[p] = "name \"subsec:" + label
1460         i += 1
1461
1462     # 2) xref insets
1463     rx = re.compile(r'^reference \"sub:(.+)$')
1464     i = 0
1465     while True:
1466         i = find_token(document.body, "\\begin_inset CommandInset ref", i)
1467         if i == -1:
1468             return
1469         j = find_end_of_inset(document.body, i)
1470         if j == -1:
1471             document.warning("Malformed LyX document: Can't find end of Ref inset at line " + str(i))
1472             i += 1
1473             continue
1474
1475         for p in range(i, j):
1476             m = rx.match(document.body[p])
1477             if m:
1478                 label = m.group(1)
1479                 document.body[p] = "reference \"subsec:" + label
1480                 break
1481         i += 1
1482
1483
1484
1485 def revert_subref(document):
1486     " reverts subref: ref prefixes to sub: "
1487
1488     # 1) label insets
1489     rx = re.compile(r'^name \"subsec:(.+)$')
1490     i = 0
1491     while True:
1492         i = find_token(document.body, "\\begin_inset CommandInset label", i)
1493         if i == -1:
1494             break
1495         j = find_end_of_inset(document.body, i)
1496         if j == -1:
1497             document.warning("Malformed LyX document: Can't find end of Label inset at line " + str(i))
1498             i += 1
1499             continue
1500
1501         for p in range(i, j):
1502             m = rx.match(document.body[p])
1503             if m:
1504                 label = m.group(1)
1505                 document.body[p] = "name \"sub:" + label
1506                 break
1507         i += 1
1508
1509     # 2) xref insets
1510     rx = re.compile(r'^reference \"subsec:(.+)$')
1511     i = 0
1512     while True:
1513         i = find_token(document.body, "\\begin_inset CommandInset ref", i)
1514         if i == -1:
1515             return
1516         j = find_end_of_inset(document.body, i)
1517         if j == -1:
1518             document.warning("Malformed LyX document: Can't find end of Ref inset at line " + str(i))
1519             i += 1
1520             continue
1521
1522         for p in range(i, j):
1523             m = rx.match(document.body[p])
1524             if m:
1525                 label = m.group(1)
1526                 document.body[p] = "reference \"sub:" + label
1527                 break
1528         i += 1
1529
1530
1531 ##
1532 # Conversion hub
1533 #
1534
1535 supported_versions = ["2.2.0", "2.2"]
1536 convert = [
1537            [475, [convert_separator]],
1538            # nothing to do for 476: We consider it a bug that older versions
1539            # did not load amsmath automatically for these commands, and do not
1540            # want to hardcode amsmath off.
1541            [476, []],
1542            [477, []],
1543            [478, []],
1544            [479, []],
1545            [480, []],
1546            [481, [convert_dashes]],
1547            [482, [convert_phrases]],
1548            [483, [convert_specialchar]],
1549            [484, []],
1550            [485, []],
1551            [486, []],
1552            [487, []],
1553            [488, [convert_newgloss]],
1554            [489, [convert_BoxFeatures]],
1555            [490, [convert_origin]],
1556            [491, []],
1557            [492, [convert_colorbox]],
1558            [493, []],
1559            [494, []],
1560            [495, [convert_subref]]
1561           ]
1562
1563 revert =  [
1564            [494, [revert_subref]],
1565            [493, [revert_jss]],
1566            [492, [revert_mathmulticol]],
1567            [491, [revert_colorbox]],
1568            [490, [revert_textcolor]],
1569            [489, [revert_origin]],
1570            [488, [revert_BoxFeatures]],
1571            [487, [revert_newgloss, revert_glossgroup]],
1572            [486, [revert_forest]],
1573            [485, [revert_ex_itemargs]],
1574            [484, [revert_sigplan_doi]],
1575            [483, [revert_georgian]],
1576            [482, [revert_specialchar]],
1577            [481, [revert_phrases]],
1578            [480, [revert_dashes]],
1579            [479, [revert_question_env]],
1580            [478, [revert_beamer_lemma]],
1581            [477, [revert_xarrow]],
1582            [476, [revert_swissgerman]],
1583            [475, [revert_smash]],
1584            [474, [revert_separator]]
1585           ]
1586
1587
1588 if __name__ == "__main__":
1589     pass