lib/lyx2lyx/lyx_2_2.py

   1 # -*- coding: utf-8 -*-
   2 # This file is part of lyx2lyx
   3 # -*- coding: utf-8 -*-
   4 # Copyright (C) 2011 The LyX team
   5 #
   6 # This program is free software; you can redistribute it and/or
   7 # modify it under the terms of the GNU General Public License
   8 # as published by the Free Software Foundation; either version 2
   9 # of the License, or (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful,
  12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 # GNU General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  19
  20 """ Convert files to the file format generated by lyx 2.2"""
  21
  22 import re, string
  23 import unicodedata
  24 import sys, os
  25
  26 # Uncomment only what you need to import, please.
  27
  28 #from parser_tools import find_token, find_end_of, find_tokens, \
  29 #  find_token_exact, find_end_of_inset, find_end_of_layout, \
  30 #  find_token_backwards, is_in_inset, get_value, get_quoted_value, \
  31 #  del_token, check_token, get_option_value
  32
  33 from lyx2lyx_tools import add_to_preamble, put_cmd_in_ert, lyx2latex#, \
  34 #  insert_to_preamble, latex_length, revert_flex_inset, \
  35 #  revert_font_attrs, hex2ratio, str2bool
  36
  37 from parser_tools import find_token, find_token_backwards, find_re, \
  38      find_end_of_inset, find_end_of_layout, find_nonempty_line, \
  39      get_containing_layout, get_value, check_token
  40
  41 ###############################################################################
  42 ###
  43 ### Conversion and reversion routines
  44 ###
  45 ###############################################################################
  46
  47 def convert_separator(document):
  48     """
  49     Convert layout separators to separator insets and add (LaTeX) paragraph
  50     breaks in order to mimic previous LaTeX export.
  51     """
  52
  53     parins = ["\\begin_inset Separator parbreak", "\\end_inset", ""]
  54     parlay = ["\\begin_layout Standard", "\\begin_inset Separator parbreak",
  55               "\\end_inset", "", "\\end_layout", ""]
  56     sty_dict = {
  57         "family" : "default",
  58         "series" : "default",
  59         "shape"  : "default",
  60         "size"   : "default",
  61         "bar"    : "default",
  62         "color"  : "inherit"
  63         }
  64
  65     i = 0
  66     while 1:
  67         i = find_token(document.body, "\\begin_deeper", i)
  68         if i == -1:
  69             break
  70
  71         j = find_token_backwards(document.body, "\\end_layout", i-1)
  72         if j != -1:
  73             # reset any text style before inserting the inset
  74             lay = get_containing_layout(document.body, j-1)
  75             if lay != False:
  76                 content = "\n".join(document.body[lay[1]:lay[2]])
  77                 for val in list(sty_dict.keys()):
  78                     if content.find("\\%s" % val) != -1:
  79                         document.body[j:j] = ["\\%s %s" % (val, sty_dict[val])]
  80                         i = i + 1
  81                         j = j + 1
  82             document.body[j:j] = parins
  83             i = i + len(parins) + 1
  84         else:
  85             i = i + 1
  86
  87     i = 0
  88     while 1:
  89         i = find_token(document.body, "\\align", i)
  90         if i == -1:
  91             break
  92
  93         lay = get_containing_layout(document.body, i)
  94         if lay != False and lay[0] == "Plain Layout":
  95             i = i + 1
  96             continue
  97
  98         j = find_token_backwards(document.body, "\\end_layout", i-1)
  99         if j != -1:
 100             lay = get_containing_layout(document.body, j-1)
 101             if lay != False and lay[0] == "Standard" \
 102                and find_token(document.body, "\\align", lay[1], lay[2]) == -1 \
 103                and find_token(document.body, "\\begin_inset VSpace", lay[1], lay[2]) == -1:
 104                 # reset any text style before inserting the inset
 105                 content = "\n".join(document.body[lay[1]:lay[2]])
 106                 for val in list(sty_dict.keys()):
 107                     if content.find("\\%s" % val) != -1:
 108                         document.body[j:j] = ["\\%s %s" % (val, sty_dict[val])]
 109                         i = i + 1
 110                         j = j + 1
 111                 document.body[j:j] = parins
 112                 i = i + len(parins) + 1
 113             else:
 114                 i = i + 1
 115         else:
 116             i = i + 1
 117
 118     regexp = re.compile(r'^\\begin_layout (?:(-*)|(\s*))(Separator|EndOfSlide)(?:(-*)|(\s*))$', re.IGNORECASE)
 119
 120     i = 0
 121     while 1:
 122         i = find_re(document.body, regexp, i)
 123         if i == -1:
 124             return
 125
 126         j = find_end_of_layout(document.body, i)
 127         if j == -1:
 128             document.warning("Malformed LyX document: Missing `\\end_layout'.")
 129             return
 130
 131         lay = get_containing_layout(document.body, j-1)
 132         if lay != False:
 133             lines = document.body[lay[3]:lay[2]]
 134         else:
 135             lines = []
 136
 137         document.body[i:j+1] = parlay
 138         if len(lines) > 0:
 139             document.body[i+1:i+1] = lines
 140
 141         i = i + len(parlay) + len(lines) + 1
 142
 143
 144 def revert_separator(document):
 145     " Revert separator insets to layout separators "
 146
 147     beamer_classes = ["beamer", "article-beamer", "scrarticle-beamer"]
 148     if document.textclass in beamer_classes:
 149         beglaysep = "\\begin_layout Separator"
 150     else:
 151         beglaysep = "\\begin_layout --Separator--"
 152
 153     parsep = [beglaysep, "", "\\end_layout", ""]
 154     comert = ["\\begin_inset ERT", "status collapsed", "",
 155               "\\begin_layout Plain Layout", "%", "\\end_layout",
 156               "", "\\end_inset", ""]
 157     empert = ["\\begin_inset ERT", "status collapsed", "",
 158               "\\begin_layout Plain Layout", " ", "\\end_layout",
 159               "", "\\end_inset", ""]
 160
 161     i = 0
 162     while 1:
 163         i = find_token(document.body, "\\begin_inset Separator", i)
 164         if i == -1:
 165             return
 166
 167         lay = get_containing_layout(document.body, i)
 168         if lay == False:
 169             document.warning("Malformed LyX document: Can't convert separator inset at line " + str(i))
 170             i = i + 1
 171             continue
 172
 173         layoutname = lay[0]
 174         beg = lay[1]
 175         end = lay[2]
 176         kind = get_value(document.body, "\\begin_inset Separator", i, i+1, "plain").split()[1]
 177         before = document.body[beg+1:i]
 178         something_before = len(before) > 0 and len("".join(before)) > 0
 179         j = find_end_of_inset(document.body, i)
 180         after = document.body[j+1:end]
 181         something_after = len(after) > 0 and len("".join(after)) > 0
 182         if kind == "plain":
 183             beg = beg + len(before) + 1
 184         elif something_before:
 185             document.body[i:i] = ["\\end_layout", ""]
 186             i = i + 2
 187             j = j + 2
 188             beg = i
 189             end = end + 2
 190
 191         if kind == "plain":
 192             if something_after:
 193                 document.body[beg:j+1] = empert
 194                 i = i + len(empert)
 195             else:
 196                 document.body[beg:j+1] = comert
 197                 i = i + len(comert)
 198         else:
 199             if something_after:
 200                 if layoutname == "Standard":
 201                     if not something_before:
 202                         document.body[beg:j+1] = parsep
 203                         i = i + len(parsep)
 204                         document.body[i:i] = ["", "\\begin_layout Standard"]
 205                         i = i + 2
 206                     else:
 207                         document.body[beg:j+1] = ["\\begin_layout Standard"]
 208                         i = i + 1
 209                 else:
 210                     document.body[beg:j+1] = ["\\begin_deeper"]
 211                     i = i + 1
 212                     end = end + 1 - (j + 1 - beg)
 213                     if not something_before:
 214                         document.body[i:i] = parsep
 215                         i = i + len(parsep)
 216                         end = end + len(parsep)
 217                     document.body[i:i] = ["\\begin_layout Standard"]
 218                     document.body[end+2:end+2] = ["", "\\end_deeper", ""]
 219                     i = i + 4
 220             else:
 221                 next_par_is_aligned = False
 222                 k = find_nonempty_line(document.body, end+1)
 223                 if k != -1 and check_token(document.body[k], "\\begin_layout"):
 224                     lay = get_containing_layout(document.body, k)
 225                     next_par_is_aligned = lay != False and \
 226                             find_token(document.body, "\\align", lay[1], lay[2]) != -1
 227                 if k != -1 and not next_par_is_aligned \
 228                         and not check_token(document.body[k], "\\end_deeper") \
 229                         and not check_token(document.body[k], "\\begin_deeper"):
 230                     if layoutname == "Standard":
 231                         document.body[beg:j+1] = [beglaysep]
 232                         i = i + 1
 233                     else:
 234                         document.body[beg:j+1] = ["\\begin_deeper", beglaysep]
 235                         end = end + 2 - (j + 1 - beg)
 236                         document.body[end+1:end+1] = ["", "\\end_deeper", ""]
 237                         i = i + 3
 238                 else:
 239                     if something_before:
 240                         del document.body[i:end+1]
 241                     else:
 242                         del document.body[i:end-1]
 243
 244         i = i + 1
 245
 246
 247 def revert_smash(document):
 248     " Set amsmath to on if smash commands are used "
 249
 250     commands = ["smash[t]", "smash[b]", "notag"]
 251     i = find_token(document.header, "\\use_package amsmath", 0)
 252     if i == -1:
 253         document.warning("Malformed LyX document: Can't find \\use_package amsmath.")
 254         return;
 255     value = get_value(document.header, "\\use_package amsmath", i).split()[1]
 256     if value != "1":
 257         # nothing to do if package is not auto but on or off
 258         return;
 259     j = 0
 260     while True:
 261         j = find_token(document.body, '\\begin_inset Formula', j)
 262         if j == -1:
 263             return
 264         k = find_end_of_inset(document.body, j)
 265         if k == -1:
 266             document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(j))
 267             j += 1
 268             continue
 269         code = "\n".join(document.body[j:k])
 270         for c in commands:
 271             if code.find("\\%s" % c) != -1:
 272                 # set amsmath to on, since it is loaded by the newer format
 273                 document.header[i] = "\\use_package amsmath 2"
 274                 return
 275         j = k
 276
 277
 278 def revert_swissgerman(document):
 279     " Set language german-ch-old to german "
 280     i = 0
 281     if document.language == "german-ch-old":
 282         document.language = "german"
 283         i = find_token(document.header, "\\language", 0)
 284         if i != -1:
 285             document.header[i] = "\\language german"
 286     j = 0
 287     while True:
 288         j = find_token(document.body, "\\lang german-ch-old", j)
 289         if j == -1:
 290             return
 291         document.body[j] = document.body[j].replace("\\lang german-ch-old", "\\lang german")
 292         j = j + 1
 293
 294
 295 def revert_use_package(document, pkg, commands, oldauto, supported):
 296     # oldauto defines how the version we are reverting to behaves:
 297     # if it is true, the old version uses the package automatically.
 298     # if it is false, the old version never uses the package.
 299     # If "supported" is true, the target version also supports this
 300     # package natively.
 301     regexp = re.compile(r'(\\use_package\s+%s)' % pkg)
 302     p = find_re(document.header, regexp, 0)
 303     value = "1" # default is auto
 304     if p != -1:
 305         value = get_value(document.header, "\\use_package" , p).split()[1]
 306         if not supported:
 307             del document.header[p]
 308     if value == "2" and not supported: # on
 309         add_to_preamble(document, ["\\usepackage{" + pkg + "}"])
 310     elif value == "1" and not oldauto: # auto
 311         i = 0
 312         while True:
 313             i = find_token(document.body, '\\begin_inset Formula', i)
 314             if i == -1:
 315                 return
 316             j = find_end_of_inset(document.body, i)
 317             if j == -1:
 318                 document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(i))
 319                 i += 1
 320                 continue
 321             code = "\n".join(document.body[i:j])
 322             for c in commands:
 323                 if code.find("\\%s" % c) != -1:
 324                     if supported:
 325                         document.header[p] = "\\use_package " + pkg + " 2"
 326                     else:
 327                         add_to_preamble(document, ["\\usepackage{" + pkg + "}"])
 328                     return
 329             i = j
 330
 331
 332 mathtools_commands = ["xhookrightarrow", "xhookleftarrow", "xRightarrow", \
 333                 "xrightharpoondown", "xrightharpoonup", "xrightleftharpoons", \
 334                 "xLeftarrow", "xleftharpoondown", "xleftharpoonup", \
 335                 "xleftrightarrow", "xLeftrightarrow", "xleftrightharpoons", \
 336                 "xmapsto"]
 337
 338 def revert_xarrow(document):
 339     "remove use_package mathtools"
 340     revert_use_package(document, "mathtools", mathtools_commands, False, True)
 341
 342
 343 def revert_beamer_lemma(document):
 344     " Reverts beamer lemma layout to ERT "
 345
 346     beamer_classes = ["beamer", "article-beamer", "scrarticle-beamer"]
 347     if document.textclass not in beamer_classes:
 348         return
 349
 350     consecutive = False
 351     i = 0
 352     while True:
 353         i = find_token(document.body, "\\begin_layout Lemma", i)
 354         if i == -1:
 355             return
 356         j = find_end_of_layout(document.body, i)
 357         if j == -1:
 358             document.warning("Malformed LyX document: Can't find end of Lemma layout")
 359             i += 1
 360             continue
 361         arg1 = find_token(document.body, "\\begin_inset Argument 1", i, j)
 362         endarg1 = find_end_of_inset(document.body, arg1)
 363         arg2 = find_token(document.body, "\\begin_inset Argument 2", i, j)
 364         endarg2 = find_end_of_inset(document.body, arg2)
 365         subst1 = []
 366         subst2 = []
 367         if arg1 != -1:
 368             beginPlain1 = find_token(document.body, "\\begin_layout Plain Layout", arg1, endarg1)
 369             if beginPlain1 == -1:
 370                 document.warning("Malformed LyX document: Can't find arg1 plain Layout")
 371                 i += 1
 372                 continue
 373             endPlain1 = find_end_of_inset(document.body, beginPlain1)
 374             content1 = document.body[beginPlain1 + 1 : endPlain1 - 2]
 375             subst1 = put_cmd_in_ert("<") + content1 + put_cmd_in_ert(">")
 376         if arg2 != -1:
 377             beginPlain2 = find_token(document.body, "\\begin_layout Plain Layout", arg2, endarg2)
 378             if beginPlain2 == -1:
 379                 document.warning("Malformed LyX document: Can't find arg2 plain Layout")
 380                 i += 1
 381                 continue
 382             endPlain2 = find_end_of_inset(document.body, beginPlain2)
 383             content2 = document.body[beginPlain2 + 1 : endPlain2 - 2]
 384             subst2 = put_cmd_in_ert("[") + content2 + put_cmd_in_ert("]")
 385
 386         # remove Arg insets
 387         if arg1 < arg2:
 388             del document.body[arg2 : endarg2 + 1]
 389             if arg1 != -1:
 390                 del document.body[arg1 : endarg1 + 1]
 391         if arg2 < arg1:
 392             del document.body[arg1 : endarg1 + 1]
 393             if arg2 != -1:
 394                 del document.body[arg2 : endarg2 + 1]
 395
 396         # index of end layout has probably changed
 397         j = find_end_of_layout(document.body, i)
 398         if j == -1:
 399             document.warning("Malformed LyX document: Can't find end of Lemma layout")
 400             i += 1
 401             continue
 402
 403         begcmd = []
 404
 405         # if this is not a consecutive env, add start command
 406         if not consecutive:
 407             begcmd = put_cmd_in_ert("\\begin{lemma}")
 408
 409         # has this a consecutive lemma?
 410         consecutive = document.body[j + 2] == "\\begin_layout Lemma"
 411
 412         # if this is not followed by a consecutive env, add end command
 413         if not consecutive:
 414             document.body[j : j + 1] = put_cmd_in_ert("\\end{lemma}") + ["\\end_layout"]
 415
 416         document.body[i : i + 1] = ["\\begin_layout Standard", ""] + begcmd + subst1 + subst2
 417
 418         i = j
 419
 420
 421
 422 def revert_question_env(document):
 423     """
 424     Reverts question and question* environments of
 425     theorems-ams-extended-bytype module to ERT
 426     """
 427
 428     # Do we use theorems-ams-extended-bytype module?
 429     have_mod = False
 430     mods = document.get_module_list()
 431     for mod in mods:
 432         if mod == "theorems-ams-extended-bytype":
 433             have_mod = True
 434             continue
 435
 436     if not have_mod:
 437         return
 438
 439     consecutive = False
 440     i = 0
 441     while True:
 442         i = find_token(document.body, "\\begin_layout Question", i)
 443         if i == -1:
 444             return
 445
 446         starred = document.body[i] == "\\begin_layout Question*"
 447
 448         j = find_end_of_layout(document.body, i)
 449         if j == -1:
 450             document.warning("Malformed LyX document: Can't find end of Question layout")
 451             i += 1
 452             continue
 453
 454         # if this is not a consecutive env, add start command
 455         begcmd = []
 456         if not consecutive:
 457             if starred:
 458                 begcmd = put_cmd_in_ert("\\begin{question*}")
 459             else:
 460                 begcmd = put_cmd_in_ert("\\begin{question}")
 461
 462         # has this a consecutive theorem of same type?
 463         consecutive = False
 464         if starred:
 465             consecutive = document.body[j + 2] == "\\begin_layout Question*"
 466         else:
 467             consecutive = document.body[j + 2] == "\\begin_layout Question"
 468
 469         # if this is not followed by a consecutive env, add end command
 470         if not consecutive:
 471             if starred:
 472                 document.body[j : j + 1] = put_cmd_in_ert("\\end{question*}") + ["\\end_layout"]
 473             else:
 474                 document.body[j : j + 1] = put_cmd_in_ert("\\end{question}") + ["\\end_layout"]
 475
 476         document.body[i : i + 1] = ["\\begin_layout Standard", ""] + begcmd
 477
 478         add_to_preamble(document, "\\providecommand{\questionname}{Question}")
 479
 480         if starred:
 481             add_to_preamble(document, "\\theoremstyle{plain}\n" \
 482                                       "\\newtheorem*{question*}{\\protect\\questionname}")
 483         else:
 484             add_to_preamble(document, "\\theoremstyle{plain}\n" \
 485                                       "\\newtheorem{question}{\\protect\\questionname}")
 486
 487         i = j
 488
 489
 490 def convert_dashes(document):
 491     "convert -- and --- to \\twohyphens and \\threehyphens"
 492
 493     if document.backend != "latex":
 494         return
 495
 496     i = 0
 497     while i < len(document.body):
 498         words = document.body[i].split()
 499         if len(words) > 1 and words[0] == "\\begin_inset" and \
 500            words[1] in ["CommandInset", "ERT", "External", "Formula", "Graphics", "IPA", "listings"]:
 501             # must not replace anything in insets that store LaTeX contents in .lyx files
 502             # (math and command insets withut overridden read() and write() methods
 503             # filtering out IPA makes Text::readParToken() more simple
 504             # skip ERT as well since it is not needed there
 505             j = find_end_of_inset(document.body, i)
 506             if j == -1:
 507                 document.warning("Malformed LyX document: Can't find end of " + words[1] + " inset at line " + str(i))
 508                 i += 1
 509             else:
 510                 i = j
 511             continue
 512         while True:
 513             j = document.body[i].find("--")
 514             if j == -1:
 515                 break
 516             front = document.body[i][:j]
 517             back = document.body[i][j+2:]
 518             # We can have an arbitrary number of consecutive hyphens.
 519             # These must be split into the corresponding number of two and three hyphens
 520             # We must match what LaTeX does: First try emdash, then endash, then single hyphen
 521             if back.find("-") == 0:
 522                 back = back[1:]
 523                 if len(back) > 0:
 524                     document.body.insert(i+1, back)
 525                 document.body[i] = front + "\\threehyphens"
 526             else:
 527                 if len(back) > 0:
 528                     document.body.insert(i+1, back)
 529                 document.body[i] = front + "\\twohyphens"
 530         i += 1
 531
 532
 533 def revert_dashes(document):
 534     "convert \\twohyphens and \\threehyphens to -- and ---"
 535
 536     i = 0
 537     while i < len(document.body):
 538         words = document.body[i].split()
 539         if len(words) > 1 and words[0] == "\\begin_inset" and \
 540            words[1] in ["CommandInset", "ERT", "External", "Formula", "Graphics", "IPA", "listings"]:
 541             # see convert_dashes
 542             j = find_end_of_inset(document.body, i)
 543             if j == -1:
 544                 document.warning("Malformed LyX document: Can't find end of " + words[1] + " inset at line " + str(i))
 545                 i += 1
 546             else:
 547                 i = j
 548             continue
 549         replaced = False
 550         if document.body[i].find("\\twohyphens") >= 0:
 551             document.body[i] = document.body[i].replace("\\twohyphens", "--")
 552             replaced = True
 553         if document.body[i].find("\\threehyphens") >= 0:
 554             document.body[i] = document.body[i].replace("\\threehyphens", "---")
 555             replaced = True
 556         if replaced and i+1 < len(document.body) and \
 557            (document.body[i+1].find("\\") != 0 or \
 558             document.body[i+1].find("\\twohyphens") == 0 or
 559             document.body[i+1].find("\\threehyphens") == 0) and \
 560            len(document.body[i]) + len(document.body[i+1]) <= 80:
 561             document.body[i] = document.body[i] + document.body[i+1]
 562             document.body[i+1:i+2] = []
 563         else:
 564             i += 1
 565
 566
 567 # order is important for the last three!
 568 phrases = ["LyX", "LaTeX2e", "LaTeX", "TeX"]
 569
 570 def is_part_of_converted_phrase(line, j, phrase):
 571     "is phrase part of an already converted phrase?"
 572     for p in phrases:
 573         converted = "\\SpecialCharNoPassThru \\" + p
 574         pos = j + len(phrase) - len(converted)
 575         if pos >= 0:
 576             if line[pos:pos+len(converted)] == converted:
 577                 return True
 578     return False
 579
 580
 581 def convert_phrases(document):
 582     "convert special phrases from plain text to \\SpecialCharNoPassThru"
 583
 584     if document.backend != "latex":
 585         return
 586
 587     for phrase in phrases:
 588         i = 0
 589         while i < len(document.body):
 590             words = document.body[i].split()
 591             if len(words) > 1 and words[0] == "\\begin_inset" and \
 592                words[1] in ["CommandInset", "External", "Formula", "Graphics", "listings"]:
 593                 # must not replace anything in insets that store LaTeX contents in .lyx files
 594                 # (math and command insets withut overridden read() and write() methods
 595                 j = find_end_of_inset(document.body, i)
 596                 if j == -1:
 597                     document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(i))
 598                     i += 1
 599                 else:
 600                     i = j
 601                 continue
 602             if document.body[i].find("\\") == 0:
 603                 i += 1
 604                 continue
 605             j = document.body[i].find(phrase)
 606             if j == -1:
 607                 i += 1
 608                 continue
 609             if not is_part_of_converted_phrase(document.body[i], j, phrase):
 610                 front = document.body[i][:j]
 611                 back = document.body[i][j+len(phrase):]
 612                 if len(back) > 0:
 613                     document.body.insert(i+1, back)
 614                 # We cannot use SpecialChar since we do not know whether we are outside passThru
 615                 document.body[i] = front + "\\SpecialCharNoPassThru \\" + phrase
 616             i += 1
 617
 618
 619 def revert_phrases(document):
 620     "convert special phrases to plain text"
 621
 622     i = 0
 623     while i < len(document.body):
 624         words = document.body[i].split()
 625         if len(words) > 1 and words[0] == "\\begin_inset" and \
 626            words[1] in ["CommandInset", "External", "Formula", "Graphics", "listings"]:
 627             # see convert_phrases
 628             j = find_end_of_inset(document.body, i)
 629             if j == -1:
 630                 document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(i))
 631                 i += 1
 632             else:
 633                 i = j
 634             continue
 635         replaced = False
 636         for phrase in phrases:
 637             # we can replace SpecialChar since LyX ensures that it cannot be inserted into passThru parts
 638             if document.body[i].find("\\SpecialChar \\" + phrase) >= 0:
 639                 document.body[i] = document.body[i].replace("\\SpecialChar \\" + phrase, phrase)
 640                 replaced = True
 641             if document.body[i].find("\\SpecialCharNoPassThru \\" + phrase) >= 0:
 642                 document.body[i] = document.body[i].replace("\\SpecialCharNoPassThru \\" + phrase, phrase)
 643                 replaced = True
 644         if replaced and i+1 < len(document.body) and \
 645            (document.body[i+1].find("\\") != 0 or \
 646             document.body[i+1].find("\\SpecialChar") == 0) and \
 647            len(document.body[i]) + len(document.body[i+1]) <= 80:
 648             document.body[i] = document.body[i] + document.body[i+1]
 649             document.body[i+1:i+2] = []
 650             i -= 1
 651         i += 1
 652
 653
 654 def convert_specialchar_internal(document, forward):
 655     specialchars = {"\\-":"softhyphen", "\\textcompwordmark{}":"ligaturebreak", \
 656         "\\@.":"endofsentence", "\\ldots{}":"ldots", \
 657         "\\menuseparator":"menuseparator", "\\slash{}":"breakableslash", \
 658         "\\nobreakdash-":"nobreakdash", "\\LyX":"LyX", \
 659         "\\TeX":"TeX", "\\LaTeX2e":"LaTeX2e", \
 660         "\\LaTeX":"LaTeX" # must be after LaTeX2e
 661     }
 662
 663     i = 0
 664     while i < len(document.body):
 665         words = document.body[i].split()
 666         if len(words) > 1 and words[0] == "\\begin_inset" and \
 667            words[1] in ["CommandInset", "External", "Formula", "Graphics", "listings"]:
 668             # see convert_phrases
 669             j = find_end_of_inset(document.body, i)
 670             if j == -1:
 671                 document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(i))
 672                 i += 1
 673             else:
 674                 i = j
 675             continue
 676         for key, value in specialchars.iteritems():
 677             if forward:
 678                 document.body[i] = document.body[i].replace("\\SpecialChar " + key, "\\SpecialChar " + value)
 679                 document.body[i] = document.body[i].replace("\\SpecialCharNoPassThru " + key, "\\SpecialCharNoPassThru " + value)
 680             else:
 681                 document.body[i] = document.body[i].replace("\\SpecialChar " + value, "\\SpecialChar " + key)
 682                 document.body[i] = document.body[i].replace("\\SpecialCharNoPassThru " + value, "\\SpecialCharNoPassThru " + key)
 683         i += 1
 684
 685
 686 def convert_specialchar(document):
 687     "convert special characters to new syntax"
 688     convert_specialchar_internal(document, True)
 689
 690
 691 def revert_specialchar(document):
 692     "convert special characters to old syntax"
 693     convert_specialchar_internal(document, False)
 694
 695
 696 def revert_georgian(document):
 697     "Set the document language to English but assure Georgian output"
 698
 699     if document.language == "georgian":
 700         document.language = "english"
 701         i = find_token(document.header, "\\language georgian", 0)
 702         if i != -1:
 703             document.header[i] = "\\language english"
 704         j = find_token(document.header, "\\language_package default", 0)
 705         if j != -1:
 706             document.header[j] = "\\language_package babel"
 707         k = find_token(document.header, "\\options", 0)
 708         if k != -1:
 709             document.header[k] = document.header[k].replace("\\options", "\\options georgian,")
 710         else:
 711             l = find_token(document.header, "\\use_default_options", 0)
 712             document.header.insert(l + 1, "\\options georgian")
 713
 714
 715 def revert_sigplan_doi(document):
 716     " Reverts sigplanconf DOI layout to ERT "
 717
 718     if document.textclass != "sigplanconf":
 719         return
 720
 721     i = 0
 722     while True:
 723         i = find_token(document.body, "\\begin_layout DOI", i)
 724         if i == -1:
 725             return
 726         j = find_end_of_layout(document.body, i)
 727         if j == -1:
 728             document.warning("Malformed LyX document: Can't find end of DOI layout")
 729             i += 1
 730             continue
 731
 732         content = lyx2latex(document, document.body[i:j + 1])
 733         add_to_preamble(document, ["\\doi{" + content + "}"])
 734         del document.body[i:j + 1]
 735         # no need to reset i
 736
 737
 738 def revert_ex_itemargs(document):
 739     " Reverts \\item arguments of the example environments (Linguistics module) to TeX-code "
 740
 741     # Do we use the linguistics module?
 742     have_mod = False
 743     mods = document.get_module_list()
 744     for mod in mods:
 745         if mod == "linguistics":
 746             have_mod = True
 747             continue
 748
 749     if not have_mod:
 750         return
 751
 752     i = 0
 753     example_layouts = ["Numbered Examples (consecutive)", "Subexample"]
 754     while True:
 755         i = find_token(document.body, "\\begin_inset Argument item:", i)
 756         if i == -1:
 757             return
 758         j = find_end_of_inset(document.body, i)
 759         # Find containing paragraph layout
 760         parent = get_containing_layout(document.body, i)
 761         if parent == False:
 762             document.warning("Malformed LyX document: Can't find parent paragraph layout")
 763             i += 1
 764             continue
 765         parbeg = parent[3]
 766         layoutname = parent[0]
 767         if layoutname in example_layouts:
 768             beginPlain = find_token(document.body, "\\begin_layout Plain Layout", i)
 769             endPlain = find_end_of_layout(document.body, beginPlain)
 770             content = document.body[beginPlain + 1 : endPlain]
 771             del document.body[i:j+1]
 772             subst = put_cmd_in_ert("[") + content + put_cmd_in_ert("]")
 773             document.body[parbeg : parbeg] = subst
 774         i += 1
 775
 776
 777 def revert_forest(document):
 778     " Reverts the forest environment (Linguistics module) to TeX-code "
 779
 780     # Do we use the linguistics module?
 781     have_mod = False
 782     mods = document.get_module_list()
 783     for mod in mods:
 784         if mod == "linguistics":
 785             have_mod = True
 786             continue
 787
 788     if not have_mod:
 789         return
 790
 791     i = 0
 792     while True:
 793         i = find_token(document.body, "\\begin_inset Flex Structure Tree", i)
 794         if i == -1:
 795             return
 796         j = find_end_of_inset(document.body, i)
 797         if j == -1:
 798             document.warning("Malformed LyX document: Can't find end of Structure Tree inset")
 799             i += 1
 800             continue
 801
 802         beginPlain = find_token(document.body, "\\begin_layout Plain Layout", i)
 803         endPlain = find_end_of_layout(document.body, beginPlain)
 804         content = lyx2latex(document, document.body[beginPlain : endPlain])
 805
 806         add_to_preamble(document, ["\\usepackage{forest}"])
 807
 808         document.body[i:j + 1] = ["\\begin_inset ERT", "status collapsed", "",
 809                 "\\begin_layout Plain Layout", "", "\\backslash",
 810                 "begin{forest}", "\\end_layout", "", "\\begin_layout Plain Layout",
 811                 content, "\\end_layout", "", "\\begin_layout Plain Layout",
 812                 "\\backslash", "end{forest}", "", "\\end_layout", "", "\\end_inset"]
 813         # no need to reset i
 814
 815
 816 def revert_glossgroup(document):
 817     " Reverts the GroupGlossedWords inset (Linguistics module) to TeX-code "
 818
 819     # Do we use the linguistics module?
 820     have_mod = False
 821     mods = document.get_module_list()
 822     for mod in mods:
 823         if mod == "linguistics":
 824             have_mod = True
 825             continue
 826
 827     if not have_mod:
 828         return
 829
 830     i = 0
 831     while True:
 832         i = find_token(document.body, "\\begin_inset Flex GroupGlossedWords", i)
 833         if i == -1:
 834             return
 835         j = find_end_of_inset(document.body, i)
 836         if j == -1:
 837             document.warning("Malformed LyX document: Can't find end of GroupGlossedWords inset")
 838             i += 1
 839             continue
 840
 841         beginPlain = find_token(document.body, "\\begin_layout Plain Layout", i)
 842         endPlain = find_end_of_layout(document.body, beginPlain)
 843         content = lyx2latex(document, document.body[beginPlain : endPlain])
 844         document.warning("content: %s" % content)
 845
 846         document.body[i:j + 1] = ["{", "", content, "", "}"]
 847         # no need to reset i
 848
 849
 850 def revert_newgloss(document):
 851     " Reverts the new Glosse insets (Linguistics module) to the old format "
 852
 853     # Do we use the linguistics module?
 854     have_mod = False
 855     mods = document.get_module_list()
 856     for mod in mods:
 857         if mod == "linguistics":
 858             have_mod = True
 859             continue
 860
 861     if not have_mod:
 862         return
 863
 864     glosses = ("\\begin_inset Flex Glosse", "\\begin_inset Flex Tri-Glosse")
 865     for glosse in glosses:
 866         i = 0
 867         while True:
 868             i = find_token(document.body, glosse, i)
 869             if i == -1:
 870                 break
 871             j = find_end_of_inset(document.body, i)
 872             if j == -1:
 873                 document.warning("Malformed LyX document: Can't find end of Glosse inset")
 874                 i += 1
 875                 continue
 876
 877             arg = find_token(document.body, "\\begin_inset Argument 1", i, j)
 878             endarg = find_end_of_inset(document.body, arg)
 879             argcontent = ""
 880             if arg != -1:
 881                 argbeginPlain = find_token(document.body, "\\begin_layout Plain Layout", arg, endarg)
 882                 if argbeginPlain == -1:
 883                     document.warning("Malformed LyX document: Can't find arg plain Layout")
 884                     i += 1
 885                     continue
 886                 argendPlain = find_end_of_inset(document.body, argbeginPlain)
 887                 argcontent = lyx2latex(document, document.body[argbeginPlain : argendPlain - 2])
 888
 889                 document.body[j:j] = ["", "\\begin_layout Plain Layout","\\backslash", "glt ",
 890                     argcontent, "\\end_layout"]
 891
 892                 # remove Arg insets and paragraph, if it only contains this inset
 893                 if document.body[arg - 1] == "\\begin_layout Plain Layout" and find_end_of_layout(document.body, arg - 1) == endarg + 3:
 894                     del document.body[arg - 1 : endarg + 4]
 895                 else:
 896                     del document.body[arg : endarg + 1]
 897
 898             beginPlain = find_token(document.body, "\\begin_layout Plain Layout", i)
 899             endPlain = find_end_of_layout(document.body, beginPlain)
 900             content = lyx2latex(document, document.body[beginPlain : endPlain])
 901
 902             document.body[beginPlain + 1:endPlain] = [content]
 903             i = beginPlain + 1
 904
 905
 906 def convert_newgloss(document):
 907     " Converts Glosse insets (Linguistics module) to the new format "
 908
 909     # Do we use the linguistics module?
 910     have_mod = False
 911     mods = document.get_module_list()
 912     for mod in mods:
 913         if mod == "linguistics":
 914             have_mod = True
 915             continue
 916
 917     if not have_mod:
 918         return
 919
 920     glosses = ("\\begin_inset Flex Glosse", "\\begin_inset Flex Tri-Glosse")
 921     for glosse in glosses:
 922         i = 0
 923         while True:
 924             i = find_token(document.body, glosse, i)
 925             if i == -1:
 926                 break
 927             j = find_end_of_inset(document.body, i)
 928             if j == -1:
 929                 document.warning("Malformed LyX document: Can't find end of Glosse inset")
 930                 i += 1
 931                 continue
 932
 933             k = i
 934             while True:
 935                 argcontent = []
 936                 beginPlain = find_token(document.body, "\\begin_layout Plain Layout", k, j)
 937                 if beginPlain == -1:
 938                     break
 939                 endPlain = find_end_of_layout(document.body, beginPlain)
 940                 if endPlain == -1:
 941                     document.warning("Malformed LyX document: Can't find end of Glosse layout")
 942                     i += 1
 943                     continue
 944
 945                 glt  = find_token(document.body, "\\backslash", beginPlain, endPlain)
 946                 if glt != -1 and document.body[glt + 1].startswith("glt"):
 947                     document.body[glt + 1] = document.body[glt + 1].lstrip("glt").lstrip()
 948                     argcontent = document.body[glt + 1 : endPlain]
 949                     document.body[beginPlain + 1 : endPlain] = ["\\begin_inset Argument 1", "status open", "",
 950                         "\\begin_layout Plain Layout", "\\begin_inset ERT", "status open", "",
 951                         "\\begin_layout Plain Layout", ""] + argcontent + ["\\end_layout", "", "\\end_inset", "",
 952                         "\\end_layout", "", "\\end_inset"]
 953                 else:
 954                     content = document.body[beginPlain + 1 : endPlain]
 955                     document.body[beginPlain + 1 : endPlain] = ["\\begin_inset ERT", "status open", "",
 956                         "\\begin_layout Plain Layout"] + content + ["\\end_layout", "", "\\end_inset"]
 957
 958                 endPlain = find_end_of_layout(document.body, beginPlain)
 959                 k = endPlain
 960                 j = find_end_of_inset(document.body, i)
 961
 962             i = endPlain + 1
 963
 964
 965 def convert_BoxFeatures(document):
 966     " adds new box features "
 967
 968     i = 0
 969     while True:
 970         i = find_token(document.body, "height_special", i)
 971         if i == -1:
 972             return
 973         document.body[i+1:i+1] = ['thickness "0.4pt"', 'separation "3pt"', 'shadowsize "4pt"']
 974         i = i + 4
 975
 976
 977 def revert_BoxFeatures(document):
 978     " outputs new box features as TeX code "
 979
 980     i = 0
 981     defaultSep = "3pt"
 982     defaultThick = "0.4pt"
 983     defaultShadow = "4pt"
 984     while True:
 985         i = find_token(document.body, "height_special", i)
 986         if i == -1:
 987             return
 988         # read out the values
 989         beg = document.body[i+1].find('"');
 990         end = document.body[i+1].rfind('"');
 991         thickness = document.body[i+1][beg+1:end];
 992         beg = document.body[i+2].find('"');
 993         end = document.body[i+2].rfind('"');
 994         separation = document.body[i+2][beg+1:end];
 995         beg = document.body[i+3].find('"');
 996         end = document.body[i+3].rfind('"');
 997         shadowsize = document.body[i+3][beg+1:end];
 998         # delete the specification
 999         del document.body[i+1:i+4]
1000         # output ERT
1001         # first output the closing brace
1002         if shadowsize != defaultShadow or separation != defaultSep or thickness != defaultThick:
1003             document.body[i + 10 : i + 10] = put_cmd_in_ert("}")
1004         # now output the lengths
1005         if shadowsize != defaultShadow or separation != defaultSep or thickness != defaultThick:
1006             document.body[i - 10 : i - 10] = put_cmd_in_ert("{")
1007         if thickness != defaultThick:
1008             document.body[i - 5 : i - 4] = ["{\\backslash fboxrule " + thickness]
1009         if separation != defaultSep and thickness == defaultThick:
1010             document.body[i - 5 : i - 4] = ["{\\backslash fboxsep " + separation]
1011         if separation != defaultSep and thickness != defaultThick:
1012             document.body[i - 5 : i - 4] = ["{\\backslash fboxrule " + thickness + "\\backslash fboxsep " + separation]
1013         if shadowsize != defaultShadow and separation == defaultSep and thickness == defaultThick:
1014             document.body[i - 5 : i - 4] = ["{\\backslash shadowsize " + shadowsize]
1015         if shadowsize != defaultShadow and separation != defaultSep and thickness == defaultThick:
1016             document.body[i - 5 : i - 4] = ["{\\backslash fboxsep " + separation + "\\backslash shadowsize " + shadowsize]
1017         if shadowsize != defaultShadow and separation == defaultSep and thickness != defaultThick:
1018             document.body[i - 5 : i - 4] = ["{\\backslash fboxrule " + thickness + "\\backslash shadowsize " + shadowsize]
1019         if shadowsize != defaultShadow and separation != defaultSep and thickness != defaultThick:
1020             document.body[i - 5 : i - 4] = ["{\\backslash fboxrule " + thickness + "\\backslash fboxsep " + separation + "\\backslash shadowsize " + shadowsize]
1021         i = i + 11
1022
1023
1024 def convert_origin(document):
1025     " Insert the origin tag "
1026
1027     i = find_token(document.header, "\\textclass ", 0)
1028     if i == -1:
1029         document.warning("Malformed LyX document: No \\textclass!!")
1030         return;
1031     if document.dir == "":
1032         origin = "stdin"
1033     else:
1034         origin = document.dir.replace('\\', '/') + '/'
1035         if os.name != 'nt':
1036             origin = unicode(origin, sys.getfilesystemencoding())
1037     document.header[i:i] = ["\\origin " + origin]
1038
1039
1040 def revert_origin(document):
1041     " Remove the origin tag "
1042
1043     i = find_token(document.header, "\\origin ", 0)
1044     if i == -1:
1045         document.warning("Malformed LyX document: No \\origin!!")
1046         return;
1047     del document.header[i]
1048
1049
1050 color_names = ["brown", "darkgray", "gray", \
1051                "lightgray", "lime", "olive", "orange", \
1052                "pink", "purple", "teal", "violet"]
1053
1054 def revert_textcolor(document):
1055     " revert new \\textcolor colors to TeX code "
1056
1057     i = 0
1058     j = 0
1059     xcolor = False
1060     while True:
1061         i = find_token(document.body, "\\color ", i)
1062         if i == -1:
1063             return
1064         else:
1065             for color in list(color_names):
1066                 if document.body[i] == "\\color " + color:
1067                     # register that xcolor must be loaded in the preamble
1068                     if xcolor == False:
1069                         xcolor = True
1070                         add_to_preamble(document, ["\\@ifundefined{rangeHsb}{\usepackage{xcolor}}{}"])
1071                     # find the next \\color and/or the next \\end_layout
1072                     j = find_token(document.body, "\\color", i + 1)
1073                     k = find_token(document.body, "\\end_layout", i + 1)
1074                     if j == -1 and k != -1:
1075                         j = k +1
1076                     # output TeX code
1077                     # first output the closing brace
1078                     if k < j:
1079                         document.body[k: k] = put_cmd_in_ert("}")
1080                     else:
1081                         document.body[j: j] = put_cmd_in_ert("}")
1082                     # now output the \textcolor command
1083                     document.body[i : i + 1] = put_cmd_in_ert("\\textcolor{" + color + "}{")
1084         i = i + 1
1085
1086
1087 def convert_colorbox(document):
1088     " adds color settings for boxes "
1089
1090     i = 0
1091     while True:
1092         i = find_token(document.body, "shadowsize", i)
1093         if i == -1:
1094             return
1095         document.body[i+1:i+1] = ['framecolor "black"', 'backgroundcolor "none"']
1096         i = i + 3
1097
1098
1099 def revert_colorbox(document):
1100     " outputs color settings for boxes as TeX code "
1101
1102     binset = 0
1103     defaultframecolor = "black"
1104     defaultbackcolor = "none"
1105     while True:
1106         binset = find_token(document.body, "\\begin_inset Box", binset)
1107         if binset == -1:
1108             return
1109
1110         einset = find_end_of_inset(document.body, binset)
1111         if einset == -1:
1112             document.warning("Malformed LyX document: Can't find end of box inset!")
1113             binset += 1
1114             continue
1115
1116         blay = find_token(document.body, "\\begin_layout", binset, einset)
1117         if blay == -1:
1118             document.warning("Malformed LyX document: Can't find start of layout!")
1119             binset = einset
1120             continue
1121
1122         # doing it this way, we make sure only to find a framecolor option
1123         frame = find_token(document.body, "framecolor", binset, blay)
1124         if frame == -1:
1125             binset = einset
1126             continue
1127
1128         beg = document.body[frame].find('"')
1129         end = document.body[frame].rfind('"')
1130         framecolor = document.body[frame][beg+1:end]
1131
1132         # this should be on the next line
1133         bgcolor = frame + 1
1134         beg = document.body[bgcolor].find('"')
1135         end = document.body[bgcolor].rfind('"')
1136         backcolor = document.body[bgcolor][beg+1:end]
1137
1138         # delete those bits
1139         del document.body[frame:frame+2]
1140         # adjust end of inset
1141         einset -= 2
1142
1143         if document.body[binset] == "\\begin_inset Box Boxed" and \
1144             framecolor != defaultframecolor:
1145           document.body[binset] = "\\begin_inset Box Frameless"
1146
1147         # output TeX code
1148         # first output the closing brace
1149         if framecolor == defaultframecolor and backcolor == defaultbackcolor:
1150             # nothing needed
1151             pass
1152         else:
1153             document.body[einset + 1 : einset + 1] = put_cmd_in_ert("}")
1154             if framecolor != defaultframecolor:
1155                 document.body[binset:binset] = put_cmd_in_ert("\\backslash fcolorbox{" + framecolor + "}{" + backcolor + "}{")
1156             else:
1157               document.body[binset:binset] = put_cmd_in_ert("\\backslash colorbox{" + backcolor + "}{")
1158
1159         binset = einset
1160
1161
1162 def revert_mathmulticol(document):
1163     " Convert formulas to ERT if they contain multicolumns "
1164
1165     i = 0
1166     while True:
1167         i = find_token(document.body, '\\begin_inset Formula', i)
1168         if i == -1:
1169             return
1170         j = find_end_of_inset(document.body, i)
1171         if j == -1:
1172             document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(i))
1173             i += 1
1174             continue
1175         lines = document.body[i:j]
1176         lines[0] = lines[0].replace('\\begin_inset Formula', '').lstrip()
1177         code = "\n".join(lines)
1178         converted = False
1179         k = 0
1180         n = 0
1181         while n >= 0:
1182             n = code.find("\\multicolumn", k)
1183             # no need to convert degenerated multicolumn cells,
1184             # they work in old LyX versions as "math ERT"
1185             if n != -1 and code.find("\\multicolumn{1}", k) != n:
1186                 ert = put_cmd_in_ert(code)
1187                 document.body[i:j+1] = ert
1188                 converted = True
1189                 break
1190             else:
1191                 k = n + 12
1192         if converted:
1193             i = find_end_of_inset(document.body, i)
1194         else:
1195             i = j
1196
1197
1198 def revert_jss(document):
1199     " Reverts JSS In_Preamble commands to ERT in preamble "
1200
1201     if document.textclass != "jss":
1202         return
1203
1204     h = 0
1205     m = 0
1206     j = 0
1207     k = 0
1208     n = 0
1209     while True:
1210       # at first revert the inset layouts because they can be part of the In_Preamble layouts
1211       while m != -1 or j != -1 or h != -1 or k != -1 or n != -1:
1212         # \pkg
1213         if h != -1:
1214           h = find_token(document.body, "\\begin_inset Flex Pkg", h)
1215         if h != -1:
1216           endh = find_end_of_inset(document.body, h)
1217           document.body[endh - 2 : endh + 1] = put_cmd_in_ert("}")
1218           document.body[h : h + 4] = put_cmd_in_ert("\\pkg{")
1219           h = h + 5
1220         # \proglang
1221         if m != -1:
1222           m = find_token(document.body, "\\begin_inset Flex Proglang", m)
1223         if m != -1:
1224           endm = find_end_of_inset(document.body, m)
1225           document.body[endm - 2 : endm + 1] = put_cmd_in_ert("}")
1226           document.body[m : m + 4] = put_cmd_in_ert("\\proglang{")
1227           m = m + 5
1228         # \code
1229         if j != -1:
1230           j = find_token(document.body, "\\begin_inset Flex Code", j)
1231         if j != -1:
1232           # assure that we are not in a Code Chunk inset
1233           if document.body[j][-1] == "e":
1234               endj = find_end_of_inset(document.body, j)
1235               document.body[endj - 2 : endj + 1] = put_cmd_in_ert("}")
1236               document.body[j : j + 4] = put_cmd_in_ert("\\code{")
1237               j = j + 5
1238           else:
1239               j = j + 1
1240         # \email
1241         if k != -1:
1242           k = find_token(document.body, "\\begin_inset Flex E-mail", k)
1243         if k != -1:
1244           endk = find_end_of_inset(document.body, k)
1245           document.body[endk - 2 : endk + 1] = put_cmd_in_ert("}")
1246           document.body[k : k + 4] = put_cmd_in_ert("\\email{")
1247           k = k + 5
1248         # \url
1249         if n != -1:
1250           n = find_token(document.body, "\\begin_inset Flex URL", n)
1251         if n != -1:
1252           endn = find_end_of_inset(document.body, n)
1253           document.body[endn - 2 : endn + 1] = put_cmd_in_ert("}")
1254           document.body[n : n + 4] = put_cmd_in_ert("\\url{")
1255           n = n + 5
1256       # now revert the In_Preamble layouts
1257       # \title
1258       i = find_token(document.body, "\\begin_layout Title", 0)
1259       if i == -1:
1260         return
1261       j = find_end_of_layout(document.body, i)
1262       if j == -1:
1263         document.warning("Malformed LyX document: Can't find end of Title layout")
1264         i += 1
1265         continue
1266       content = lyx2latex(document, document.body[i:j + 1])
1267       add_to_preamble(document, ["\\title{" + content + "}"])
1268       del document.body[i:j + 1]
1269       # \author
1270       i = find_token(document.body, "\\begin_layout Author", 0)
1271       if i == -1:
1272         return
1273       j = find_end_of_layout(document.body, i)
1274       if j == -1:
1275         document.warning("Malformed LyX document: Can't find end of Author layout")
1276         i += 1
1277         continue
1278       content = lyx2latex(document, document.body[i:j + 1])
1279       add_to_preamble(document, ["\\author{" + content + "}"])
1280       del document.body[i:j + 1]
1281       # \Plainauthor
1282       i = find_token(document.body, "\\begin_layout Plain Author", 0)
1283       if i == -1:
1284         return
1285       j = find_end_of_layout(document.body, i)
1286       if j == -1:
1287         document.warning("Malformed LyX document: Can't find end of Plain Author layout")
1288         i += 1
1289         continue
1290       content = lyx2latex(document, document.body[i:j + 1])
1291       add_to_preamble(document, ["\\Plainauthor{" + content + "}"])
1292       del document.body[i:j + 1]
1293       # \Plaintitle
1294       i = find_token(document.body, "\\begin_layout Plain Title", 0)
1295       if i == -1:
1296         return
1297       j = find_end_of_layout(document.body, i)
1298       if j == -1:
1299         document.warning("Malformed LyX document: Can't find end of Plain Title layout")
1300         i += 1
1301         continue
1302       content = lyx2latex(document, document.body[i:j + 1])
1303       add_to_preamble(document, ["\\Plaintitle{" + content + "}"])
1304       del document.body[i:j + 1]
1305       # \Shorttitle
1306       i = find_token(document.body, "\\begin_layout Short Title", 0)
1307       if i == -1:
1308         return
1309       j = find_end_of_layout(document.body, i)
1310       if j == -1:
1311         document.warning("Malformed LyX document: Can't find end of Short Title layout")
1312         i += 1
1313         continue
1314       content = lyx2latex(document, document.body[i:j + 1])
1315       add_to_preamble(document, ["\\Shorttitle{" + content + "}"])
1316       del document.body[i:j + 1]
1317       # \Abstract
1318       i = find_token(document.body, "\\begin_layout Abstract", 0)
1319       if i == -1:
1320         return
1321       j = find_end_of_layout(document.body, i)
1322       if j == -1:
1323         document.warning("Malformed LyX document: Can't find end of Abstract layout")
1324         i += 1
1325         continue
1326       content = lyx2latex(document, document.body[i:j + 1])
1327       add_to_preamble(document, ["\\Abstract{" + content + "}"])
1328       del document.body[i:j + 1]
1329       # \Keywords
1330       i = find_token(document.body, "\\begin_layout Keywords", 0)
1331       if i == -1:
1332         return
1333       j = find_end_of_layout(document.body, i)
1334       if j == -1:
1335         document.warning("Malformed LyX document: Can't find end of Keywords layout")
1336         i += 1
1337         continue
1338       content = lyx2latex(document, document.body[i:j + 1])
1339       add_to_preamble(document, ["\\Keywords{" + content + "}"])
1340       del document.body[i:j + 1]
1341       # \Plainkeywords
1342       i = find_token(document.body, "\\begin_layout Plain Keywords", 0)
1343       if i == -1:
1344         return
1345       j = find_end_of_layout(document.body, i)
1346       if j == -1:
1347         document.warning("Malformed LyX document: Can't find end of Plain Keywords layout")
1348         i += 1
1349         continue
1350       content = lyx2latex(document, document.body[i:j + 1])
1351       add_to_preamble(document, ["\\Plainkeywords{" + content + "}"])
1352       del document.body[i:j + 1]
1353       # \Address
1354       i = find_token(document.body, "\\begin_layout Address", 0)
1355       if i == -1:
1356         return
1357       j = find_end_of_layout(document.body, i)
1358       if j == -1:
1359         document.warning("Malformed LyX document: Can't find end of Address layout")
1360         i += 1
1361         continue
1362       content = lyx2latex(document, document.body[i:j + 1])
1363       add_to_preamble(document, ["\\Address{" + content + "}"])
1364       del document.body[i:j + 1]
1365       # finally handle the code layouts
1366       h = 0
1367       m = 0
1368       j = 0
1369       k = 0
1370       while m != -1 or j != -1 or h != -1 or k != -1:
1371         # \CodeChunk
1372         if h != -1:
1373           h = find_token(document.body, "\\begin_inset Flex Code Chunk", h)
1374         if h != -1:
1375           endh = find_end_of_inset(document.body, h)
1376           document.body[endh + 1 : endh] = ["\\end_layout"]
1377           document.body[endh : endh + 1] = put_cmd_in_ert("\\end{CodeChunk}")
1378           document.body[h : h + 3] = put_cmd_in_ert("\\begin{CodeChunk}")
1379           document.body[h - 1 : h] = ["\\begin_layout Standard"]
1380           h = h + 1
1381         # \CodeInput
1382         if j != -1:
1383           j = find_token(document.body, "\\begin_layout Code Input", j)
1384         if j != -1:
1385           endj = find_end_of_layout(document.body, j)
1386           document.body[endj : endj + 1] = ["\\end_layout", "", "\\begin_layout Standard"]
1387           document.body[endj + 3 : endj + 4] = put_cmd_in_ert("\\end{CodeInput}")
1388           document.body[endj + 13 : endj + 13] = ["\\end_layout", "", "\\begin_layout Standard"]
1389           document.body[j + 1 : j] = ["\\end_layout", "", "\\begin_layout Standard"]
1390           document.body[j : j + 1] = put_cmd_in_ert("\\begin{CodeInput}")
1391           j = j + 1
1392         # \CodeOutput
1393         if k != -1:
1394           k = find_token(document.body, "\\begin_layout Code Output", k)
1395         if k != -1:
1396           endk = find_end_of_layout(document.body, k)
1397           document.body[endk : endk + 1] = ["\\end_layout", "", "\\begin_layout Standard"]
1398           document.body[endk + 3 : endk + 4] = put_cmd_in_ert("\\end{CodeOutput}")
1399           document.body[endk + 13 : endk + 13] = ["\\end_layout", "", "\\begin_layout Standard"]
1400           document.body[k + 1 : k] = ["\\end_layout", "", "\\begin_layout Standard"]
1401           document.body[k : k + 1] = put_cmd_in_ert("\\begin{CodeOutput}")
1402           k = k + 1
1403         # \Code
1404         if m != -1:
1405           m = find_token(document.body, "\\begin_layout Code", m)
1406         if m != -1:
1407           endm = find_end_of_layout(document.body, m)
1408           document.body[endm : endm + 1] = ["\\end_layout", "", "\\begin_layout Standard"]
1409           document.body[endm + 3 : endm + 4] = put_cmd_in_ert("\\end{Code}")
1410           document.body[endm + 13 : endm + 13] = ["\\end_layout", "", "\\begin_layout Standard"]
1411           document.body[m + 1 : m] = ["\\end_layout", "", "\\begin_layout Standard"]
1412           document.body[m : m + 1] = put_cmd_in_ert("\\begin{Code}")
1413           m = m + 1
1414
1415
1416 def convert_subref(document):
1417     " converts sub: ref prefixes to subref: "
1418
1419     # 1) label insets
1420     rx = re.compile(r'^name \"sub:(.+)$')
1421     i = 0
1422     while True:
1423         i = find_token(document.body, "\\begin_inset CommandInset label", i)
1424         if i == -1:
1425             break
1426         j = find_end_of_inset(document.body, i)
1427         if j == -1:
1428             document.warning("Malformed LyX document: Can't find end of Label inset at line " + str(i))
1429             i += 1
1430             continue
1431
1432         for p in range(i, j):
1433             m = rx.match(document.body[p])
1434             if m:
1435                 label = m.group(1)
1436                 document.body[p] = "name \"subsec:" + label
1437         i += 1
1438
1439     # 2) xref insets
1440     rx = re.compile(r'^reference \"sub:(.+)$')
1441     i = 0
1442     while True:
1443         i = find_token(document.body, "\\begin_inset CommandInset ref", i)
1444         if i == -1:
1445             return
1446         j = find_end_of_inset(document.body, i)
1447         if j == -1:
1448             document.warning("Malformed LyX document: Can't find end of Ref inset at line " + str(i))
1449             i += 1
1450             continue
1451
1452         for p in range(i, j):
1453             m = rx.match(document.body[p])
1454             if m:
1455                 label = m.group(1)
1456                 document.body[p] = "reference \"subsec:" + label
1457                 break
1458         i += 1
1459
1460
1461
1462 def revert_subref(document):
1463     " reverts subref: ref prefixes to sub: "
1464
1465     # 1) label insets
1466     rx = re.compile(r'^name \"subsec:(.+)$')
1467     i = 0
1468     while True:
1469         i = find_token(document.body, "\\begin_inset CommandInset label", i)
1470         if i == -1:
1471             break
1472         j = find_end_of_inset(document.body, i)
1473         if j == -1:
1474             document.warning("Malformed LyX document: Can't find end of Label inset at line " + str(i))
1475             i += 1
1476             continue
1477
1478         for p in range(i, j):
1479             m = rx.match(document.body[p])
1480             if m:
1481                 label = m.group(1)
1482                 document.body[p] = "name \"sub:" + label
1483                 break
1484         i += 1
1485
1486     # 2) xref insets
1487     rx = re.compile(r'^reference \"subsec:(.+)$')
1488     i = 0
1489     while True:
1490         i = find_token(document.body, "\\begin_inset CommandInset ref", i)
1491         if i == -1:
1492             return
1493         j = find_end_of_inset(document.body, i)
1494         if j == -1:
1495             document.warning("Malformed LyX document: Can't find end of Ref inset at line " + str(i))
1496             i += 1
1497             continue
1498
1499         for p in range(i, j):
1500             m = rx.match(document.body[p])
1501             if m:
1502                 label = m.group(1)
1503                 document.body[p] = "reference \"sub:" + label
1504                 break
1505         i += 1
1506
1507
1508 def convert_nounzip(document):
1509     " remove the noUnzip parameter of graphics insets "
1510
1511     rx = re.compile(r'\s*noUnzip\s*$')
1512     i = 0
1513     while True:
1514         i = find_token(document.body, "\\begin_inset Graphics", i)
1515         if i == -1:
1516             break
1517         j = find_end_of_inset(document.body, i)
1518         if j == -1:
1519             document.warning("Malformed LyX document: Can't find end of graphics inset at line " + str(i))
1520             i += 1
1521             continue
1522
1523         k = find_re(document.body, rx, i, j)
1524         if k != -1:
1525           del document.body[k]
1526           j = j - 1
1527         i = j + 1
1528
1529
1530 ##
1531 # Conversion hub
1532 #
1533
1534 supported_versions = ["2.2.0", "2.2"]
1535 convert = [
1536            [475, [convert_separator]],
1537            # nothing to do for 476: We consider it a bug that older versions
1538            # did not load amsmath automatically for these commands, and do not
1539            # want to hardcode amsmath off.
1540            [476, []],
1541            [477, []],
1542            [478, []],
1543            [479, []],
1544            [480, []],
1545            [481, [convert_dashes]],
1546            [482, [convert_phrases]],
1547            [483, [convert_specialchar]],
1548            [484, []],
1549            [485, []],
1550            [486, []],
1551            [487, []],
1552            [488, [convert_newgloss]],
1553            [489, [convert_BoxFeatures]],
1554            [490, [convert_origin]],
1555            [491, []],
1556            [492, [convert_colorbox]],
1557            [493, []],
1558            [494, []],
1559            [495, [convert_subref]],
1560            [496, [convert_nounzip]]
1561           ]
1562
1563 revert =  [
1564            [495, []], # nothing to do since the noUnzip parameter was optional
1565            [494, [revert_subref]],
1566            [493, [revert_jss]],
1567            [492, [revert_mathmulticol]],
1568            [491, [revert_colorbox]],
1569            [490, [revert_textcolor]],
1570            [489, [revert_origin]],
1571            [488, [revert_BoxFeatures]],
1572            [487, [revert_newgloss, revert_glossgroup]],
1573            [486, [revert_forest]],
1574            [485, [revert_ex_itemargs]],
1575            [484, [revert_sigplan_doi]],
1576            [483, [revert_georgian]],
1577            [482, [revert_specialchar]],
1578            [481, [revert_phrases]],
1579            [480, [revert_dashes]],
1580            [479, [revert_question_env]],
1581            [478, [revert_beamer_lemma]],
1582            [477, [revert_xarrow]],
1583            [476, [revert_swissgerman]],
1584            [475, [revert_smash]],
1585            [474, [revert_separator]]
1586           ]
1587
1588
1589 if __name__ == "__main__":
1590     pass