lib/lyx2lyx/lyx_2_1.py

   1 # -*- coding: utf-8 -*-
   2 # This file is part of lyx2lyx
   3 # -*- coding: utf-8 -*-
   4 # Copyright (C) 2011 The LyX team
   5 #
   6 # This program is free software; you can redistribute it and/or
   7 # modify it under the terms of the GNU General Public License
   8 # as published by the Free Software Foundation; either version 2
   9 # of the License, or (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful,
  12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 # GNU General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
  19
  20 """ Convert files to the file format generated by lyx 2.1"""
  21
  22 import re, string
  23 import unicodedata
  24 import sys, os
  25
  26 # Uncomment only what you need to import, please.
  27
  28 from parser_tools import del_token, find_token, find_end_of, find_end_of_inset, \
  29     find_end_of_layout, find_re, get_option_value, get_value, get_quoted_value, \
  30     set_option_value
  31
  32 #from parser_tools import find_token, find_end_of, find_tokens, \
  33   #find_token_exact, find_end_of_inset, find_end_of_layout, \
  34   #find_token_backwards, is_in_inset, del_token, check_token
  35
  36 from lyx2lyx_tools import add_to_preamble, put_cmd_in_ert, get_ert
  37
  38 #from lyx2lyx_tools import insert_to_preamble, \
  39 #  lyx2latex, latex_length, revert_flex_inset, \
  40 #  revert_font_attrs, hex2ratio, str2bool
  41
  42 ####################################################################
  43 # Private helper functions
  44
  45 #def remove_option(lines, m, option):
  46     #''' removes option from line m. returns whether we did anything '''
  47     #l = lines[m].find(option)
  48     #if l == -1:
  49         #return False
  50     #val = lines[m][l:].split('"')[1]
  51     #lines[m] = lines[m][:l - 1] + lines[m][l+len(option + '="' + val + '"'):]
  52     #return True
  53
  54
  55 ###############################################################################
  56 ###
  57 ### Conversion and reversion routines
  58 ###
  59 ###############################################################################
  60
  61 def revert_visible_space(document):
  62     "Revert InsetSpace visible into its ERT counterpart"
  63     i = 0
  64     while True:
  65       i = find_token(document.body, "\\begin_inset space \\textvisiblespace{}", i)
  66       if i == -1:
  67         return
  68       end = find_end_of_inset(document.body, i)
  69       subst = put_cmd_in_ert("\\textvisiblespace{}")
  70       document.body[i:end + 1] = subst
  71
  72
  73 def convert_undertilde(document):
  74     " Load undertilde automatically "
  75     i = find_token(document.header, "\\use_mathdots" , 0)
  76     if i == -1:
  77         i = find_token(document.header, "\\use_mhchem" , 0)
  78     if i == -1:
  79         i = find_token(document.header, "\\use_esint" , 0)
  80     if i == -1:
  81         document.warning("Malformed LyX document: Can't find \\use_mathdots.")
  82         return;
  83     j = find_token(document.preamble, "\\usepackage{undertilde}", 0)
  84     if j == -1:
  85         document.header.insert(i + 1, "\\use_undertilde 0")
  86     else:
  87         document.header.insert(i + 1, "\\use_undertilde 2")
  88         del document.preamble[j]
  89
  90
  91 def revert_undertilde(document):
  92     " Load undertilde if used in the document "
  93     undertilde = find_token(document.header, "\\use_undertilde" , 0)
  94     if undertilde == -1:
  95       document.warning("No \\use_undertilde line. Assuming auto.")
  96     else:
  97       val = get_value(document.header, "\\use_undertilde", undertilde)
  98       del document.header[undertilde]
  99       try:
 100         usetilde = int(val)
 101       except:
 102         document.warning("Invalid \\use_undertilde value: " + val + ". Assuming auto.")
 103         # probably usedots has not been changed, but be safe.
 104         usetilde = 1
 105
 106       if usetilde == 0:
 107         # do not load case
 108         return
 109       if usetilde == 2:
 110         # force load case
 111         add_to_preamble(document, ["\\usepackage{undertilde}"])
 112         return
 113
 114     # so we are in the auto case. we want to load undertilde if \utilde is used.
 115     i = 0
 116     while True:
 117       i = find_token(document.body, '\\begin_inset Formula', i)
 118       if i == -1:
 119         return
 120       j = find_end_of_inset(document.body, i)
 121       if j == -1:
 122         document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(i))
 123         i += 1
 124         continue
 125       code = "\n".join(document.body[i:j])
 126       if code.find("\\utilde") != -1:
 127         add_to_preamble(document, ["\\@ifundefined{utilde}{\\usepackage{undertilde}}"])
 128         return
 129       i = j
 130
 131
 132 def revert_negative_space(document):
 133     "Revert InsetSpace negmedspace and negthickspace into its TeX-code counterpart"
 134     i = 0
 135     j = 0
 136     reverted = False
 137     while True:
 138       i = find_token(document.body, "\\begin_inset space \\negmedspace{}", i)
 139       if i == -1:
 140         j = find_token(document.body, "\\begin_inset space \\negthickspace{}", j)
 141         if j == -1:
 142           # load amsmath in the preamble if not already loaded if we are at the end of checking
 143           if reverted == True:
 144             i = find_token(document.header, "\\use_amsmath 2", 0)
 145             if i == -1:
 146               add_to_preamble(document, ["\\@ifundefined{negthickspace}{\\usepackage{amsmath}}"])
 147           return
 148       if i == -1:
 149         return
 150       end = find_end_of_inset(document.body, i)
 151       subst = put_cmd_in_ert("\\negmedspace{}")
 152       document.body[i:end + 1] = subst
 153       j = find_token(document.body, "\\begin_inset space \\negthickspace{}", j)
 154       if j == -1:
 155         return
 156       end = find_end_of_inset(document.body, j)
 157       subst = put_cmd_in_ert("\\negthickspace{}")
 158       document.body[j:end + 1] = subst
 159       reverted = True
 160
 161
 162 def revert_math_spaces(document):
 163     "Revert formulas with protected custom space and protected hfills to TeX-code"
 164     i = 0
 165     while True:
 166       i = find_token(document.body, "\\begin_inset Formula", i)
 167       if i == -1:
 168         return
 169       j = document.body[i].find("\\hspace*")
 170       if j != -1:
 171         end = find_end_of_inset(document.body, i)
 172         subst = put_cmd_in_ert(document.body[i][21:])
 173         document.body[i:end + 1] = subst
 174       i = i + 1
 175
 176
 177 def convert_japanese_encodings(document):
 178     " Rename the japanese encodings to names understood by platex "
 179     jap_enc_dict = {
 180         "EUC-JP-pLaTeX": "euc",
 181         "JIS-pLaTeX":    "jis",
 182         "SJIS-pLaTeX":   "sjis"
 183     }
 184     i = find_token(document.header, "\\inputencoding" , 0)
 185     if i == -1:
 186         return
 187     val = get_value(document.header, "\\inputencoding", i)
 188     if val in jap_enc_dict.keys():
 189         document.header[i] = "\\inputencoding %s" % jap_enc_dict[val]
 190
 191
 192 def revert_japanese_encodings(document):
 193     " Revert the japanese encodings name changes "
 194     jap_enc_dict = {
 195         "euc":  "EUC-JP-pLaTeX",
 196         "jis":  "JIS-pLaTeX",
 197         "sjis": "SJIS-pLaTeX"
 198     }
 199     i = find_token(document.header, "\\inputencoding" , 0)
 200     if i == -1:
 201         return
 202     val = get_value(document.header, "\\inputencoding", i)
 203     if val in jap_enc_dict.keys():
 204         document.header[i] = "\\inputencoding %s" % jap_enc_dict[val]
 205
 206
 207 def revert_justification(document):
 208     " Revert the \\justification buffer param"
 209     if not del_token(document.header, '\\justification', 0):
 210         document.warning("Malformed LyX document: Missing \\justification.")
 211
 212
 213 def revert_australian(document):
 214     "Set English language variants Australian and Newzealand to English"
 215
 216     if document.language == "australian" or document.language == "newzealand":
 217         document.language = "english"
 218         i = find_token(document.header, "\\language", 0)
 219         if i != -1:
 220             document.header[i] = "\\language english"
 221     j = 0
 222     while True:
 223         j = find_token(document.body, "\\lang australian", j)
 224         if j == -1:
 225             j = find_token(document.body, "\\lang newzealand", 0)
 226             if j == -1:
 227                 return
 228             else:
 229                 document.body[j] = document.body[j].replace("\\lang newzealand", "\\lang english")
 230         else:
 231             document.body[j] = document.body[j].replace("\\lang australian", "\\lang english")
 232         j += 1
 233
 234
 235 def convert_biblio_style(document):
 236     "Add a sensible default for \\biblio_style based on the citation engine."
 237     i = find_token(document.header, "\\cite_engine", 0)
 238     if i != -1:
 239         engine = get_value(document.header, "\\cite_engine", i).split("_")[0]
 240         style = {"basic": "plain", "natbib": "plainnat", "jurabib": "jurabib"}
 241         document.header.insert(i + 1, "\\biblio_style " + style[engine])
 242
 243
 244 def revert_biblio_style(document):
 245     "BibTeX insets with default option use the style defined by \\biblio_style."
 246     i = find_token(document.header, "\\biblio_style" , 0)
 247     if i == -1:
 248         document.warning("No \\biblio_style line. Nothing to do.")
 249         return
 250
 251     default_style = get_value(document.header, "\\biblio_style", i)
 252     del document.header[i]
 253
 254     # We are looking for bibtex insets having the default option
 255     i = 0
 256     while True:
 257         i = find_token(document.body, "\\begin_inset CommandInset bibtex", i)
 258         if i == -1:
 259             return
 260         j = find_end_of_inset(document.body, i)
 261         if j == -1:
 262             document.warning("Malformed LyX document: Can't find end of bibtex inset at line " + str(i))
 263             i += 1
 264             return
 265         k = find_token(document.body, "options", i, j)
 266         if k != -1:
 267             options = get_quoted_value(document.body, "options", k)
 268             if "default" in options.split(","):
 269                 document.body[k] = 'options "%s"' \
 270                     % options.replace("default", default_style)
 271         i = j
 272
 273
 274 def handle_longtable_captions(document, forward):
 275     begin_table = 0
 276     while True:
 277         begin_table = find_token(document.body, '<lyxtabular version=', begin_table)
 278         if begin_table == -1:
 279             break
 280         end_table = find_end_of(document.body, begin_table, '<lyxtabular', '</lyxtabular>')
 281         if end_table == -1:
 282             document.warning("Malformed LyX document: Could not find end of table.")
 283             begin_table += 1
 284             continue
 285         fline = find_token(document.body, "<features", begin_table, end_table)
 286         if fline == -1:
 287             document.warning("Can't find features for inset at line " + str(begin_table))
 288             begin_table += 1
 289             continue
 290         p = document.body[fline].find("islongtable")
 291         if p == -1:
 292             # no longtable
 293             begin_table += 1
 294             continue
 295         numrows = get_option_value(document.body[begin_table], "rows")
 296         try:
 297             numrows = int(numrows)
 298         except:
 299             document.warning(document.body[begin_table])
 300             document.warning("Unable to determine rows!")
 301             begin_table = end_table
 302             continue
 303         begin_row = begin_table
 304         for row in range(numrows):
 305             begin_row = find_token(document.body, '<row', begin_row, end_table)
 306             if begin_row == -1:
 307                 document.warning("Can't find row " + str(row + 1))
 308                 break
 309             end_row = find_end_of(document.body, begin_row, '<row', '</row>')
 310             if end_row == -1:
 311                 document.warning("Can't find end of row " + str(row + 1))
 312                 break
 313             if forward:
 314                 if (get_option_value(document.body[begin_row], 'caption') == 'true' and
 315                     get_option_value(document.body[begin_row], 'endfirsthead') != 'true' and
 316                     get_option_value(document.body[begin_row], 'endhead') != 'true' and
 317                     get_option_value(document.body[begin_row], 'endfoot') != 'true' and
 318                     get_option_value(document.body[begin_row], 'endlastfoot') != 'true'):
 319                     document.body[begin_row] = set_option_value(document.body[begin_row], 'caption', 'true", endfirsthead="true')
 320             elif get_option_value(document.body[begin_row], 'caption') == 'true':
 321                 if get_option_value(document.body[begin_row], 'endfirsthead') == 'true':
 322                     document.body[begin_row] = set_option_value(document.body[begin_row], 'endfirsthead', 'false')
 323                 if get_option_value(document.body[begin_row], 'endhead') == 'true':
 324                     document.body[begin_row] = set_option_value(document.body[begin_row], 'endhead', 'false')
 325                 if get_option_value(document.body[begin_row], 'endfoot') == 'true':
 326                     document.body[begin_row] = set_option_value(document.body[begin_row], 'endfoot', 'false')
 327                 if get_option_value(document.body[begin_row], 'endlastfoot') == 'true':
 328                     document.body[begin_row] = set_option_value(document.body[begin_row], 'endlastfoot', 'false')
 329             begin_row = end_row
 330         # since there could be a tabular inside this one, we
 331         # cannot jump to end.
 332         begin_table += 1
 333
 334
 335 def convert_longtable_captions(document):
 336     "Add a firsthead flag to caption rows"
 337     handle_longtable_captions(document, True)
 338
 339
 340 def revert_longtable_captions(document):
 341     "remove head/foot flag from caption rows"
 342     handle_longtable_captions(document, False)
 343
 344
 345 def convert_use_packages(document):
 346     "use_xxx yyy => use_package xxx yyy"
 347     packages = ["amsmath", "esint", "mathdots", "mhchem", "undertilde"]
 348     for p in packages:
 349         i = find_token(document.header, "\\use_%s" % p, 0)
 350         if i != -1:
 351             value = get_value(document.header, "\\use_%s" % p, i)
 352             document.header[i] = "\\use_package %s %s" % (p, value)
 353
 354
 355 def revert_use_packages(document):
 356     "use_package xxx yyy => use_xxx yyy"
 357     packages = ["amsmath", "esint", "mathdots", "mhchem", "undertilde"]
 358     # the order is arbitrary for the use_package version, and not all packages need to be given.
 359     # Ensure a complete list and correct order (important for older LyX versions and especially lyx2lyx)
 360     j = 0
 361     for p in packages:
 362         regexp = re.compile(r'(\\use_package\s+%s)' % p)
 363         i = find_re(document.header, regexp, j)
 364         if i != -1:
 365             value = get_value(document.header, "\\use_package %s" % p, i).split()[1]
 366             del document.header[i]
 367             j = i
 368             document.header.insert(j, "\\use_%s %s"  % (p, value))
 369         j = j + 1
 370
 371
 372 def convert_use_mathtools(document):
 373     "insert use_package mathtools"
 374     i = find_token(document.header, "\\use_package", 0)
 375     if i == -1:
 376         document.warning("Malformed LyX document: Can't find \\use_package.")
 377         return;
 378     j = find_token(document.preamble, "\\usepackage{mathtools}", 0)
 379     if j == -1:
 380         document.header.insert(i + 1, "\\use_package mathtools 0")
 381     else:
 382         document.header.insert(i + 1, "\\use_package mathtools 2")
 383         del document.preamble[j]
 384
 385
 386 def revert_use_mathtools(document):
 387     "remove use_package mathtools"
 388     regexp = re.compile(r'(\\use_package\s+mathtools)')
 389     i = find_re(document.header, regexp, 0)
 390     value = "1" # default is auto
 391     if i != -1:
 392         value = get_value(document.header, "\\use_package" , i).split()[1]
 393         del document.header[i]
 394     if value == "2": # on
 395         add_to_preamble(document, ["\\usepackage{mathtools}"])
 396     elif value == "1": # auto
 397         commands = ["mathclap", "mathllap", "mathrlap", \
 398                     "lgathered", "rgathered", "vcentcolon", "dblcolon", \
 399                     "coloneqq", "Coloneqq", "coloneq", "Coloneq", "eqqcolon", \
 400                     "Eqqcolon", "eqcolon", "Eqcolon", "colonapprox", \
 401                     "Colonapprox", "colonsim", "Colonsim"]
 402         i = 0
 403         while True:
 404             i = find_token(document.body, '\\begin_inset Formula', i)
 405             if i == -1:
 406                 return
 407             j = find_end_of_inset(document.body, i)
 408             if j == -1:
 409                 document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(i))
 410                 i += 1
 411                 continue
 412             code = "\n".join(document.body[i:j])
 413             for c in commands:
 414                 if code.find("\\%s" % c) != -1:
 415                     add_to_preamble(document, ["\\usepackage{mathtools}"])
 416                     return
 417             i = j
 418
 419
 420 def convert_cite_engine_type(document):
 421     "Determine the \\cite_engine_type from the citation engine."
 422     i = find_token(document.header, "\\cite_engine", 0)
 423     if i == -1:
 424         return
 425     engine = get_value(document.header, "\\cite_engine", i)
 426     if "_" in engine:
 427         engine, type = engine.split("_")
 428     else:
 429         type = {"basic": "numerical", "jurabib": "authoryear"}[engine]
 430     document.header[i] = "\\cite_engine " + engine
 431     document.header.insert(i + 1, "\\cite_engine_type " + type)
 432
 433
 434 def revert_cite_engine_type(document):
 435     "Natbib had the type appended with an underscore."
 436     engine_type = "numerical"
 437     i = find_token(document.header, "\\cite_engine_type" , 0)
 438     if i == -1:
 439         document.warning("No \\cite_engine_type line. Assuming numerical.")
 440     else:
 441         engine_type = get_value(document.header, "\\cite_engine_type", i)
 442         del document.header[i]
 443
 444     # We are looking for the natbib citation engine
 445     i = find_token(document.header, "\\cite_engine natbib", 0)
 446     if i == -1:
 447         return
 448     document.header[i] = "\\cite_engine natbib_" + engine_type
 449
 450
 451 def revert_cancel(document):
 452     "add cancel to the preamble if necessary"
 453     commands = ["cancelto", "cancel", "bcancel", "xcancel"]
 454     i = 0
 455     while True:
 456         i = find_token(document.body, '\\begin_inset Formula', i)
 457         if i == -1:
 458             return
 459         j = find_end_of_inset(document.body, i)
 460         if j == -1:
 461             document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(i))
 462             i += 1
 463             continue
 464         code = "\n".join(document.body[i:j])
 465         for c in commands:
 466             if code.find("\\%s" % c) != -1:
 467                 add_to_preamble(document, ["\\usepackage{cancel}"])
 468                 return
 469         i = j
 470
 471
 472 def revert_verbatim(document):
 473     " Revert verbatim einvironments completely to TeX-code. "
 474     i = 0
 475     consecutive = False
 476     subst_end = ['\end_layout', '', '\\begin_layout Plain Layout',
 477                  '\end_layout', '',
 478                  '\\begin_layout Plain Layout', '', '',
 479                  '\\backslash', '',
 480                  'end{verbatim}',
 481                  '\\end_layout', '', '\\end_inset',
 482                  '', '', '\\end_layout']
 483     subst_begin = ['\\begin_layout Standard', '\\noindent',
 484                    '\\begin_inset ERT', 'status collapsed', '',
 485                    '\\begin_layout Plain Layout', '', '', '\\backslash',
 486                    'begin{verbatim}',
 487                    '\\end_layout', '', '\\begin_layout Plain Layout', '']
 488     while 1:
 489         i = find_token(document.body, "\\begin_layout Verbatim", i)
 490         if i == -1:
 491             return
 492         j = find_end_of_layout(document.body, i)
 493         if j == -1:
 494             document.warning("Malformed lyx document: Can't find end of Verbatim layout")
 495             i += 1
 496             continue
 497         # delete all line breaks insets (there are no other insets)
 498         l = i
 499         while 1:
 500             n = find_token(document.body, "\\begin_inset Newline newline", l)
 501             if n == -1:
 502                 n = find_token(document.body, "\\begin_inset Newline linebreak", l)
 503                 if n == -1:
 504                     break
 505             m = find_end_of_inset(document.body, n)
 506             del(document.body[m:m+1])
 507             document.body[n:n+1] = ['\end_layout', '', '\\begin_layout Plain Layout']
 508             l += 1
 509             j += 1
 510         # consecutive verbatim environments need to be connected
 511         k = find_token(document.body, "\\begin_layout Verbatim", j)
 512         if k == j + 2 and consecutive == False:
 513             consecutive = True
 514             document.body[j:j+1] = ['\end_layout', '', '\\begin_layout Plain Layout']
 515             document.body[i:i+1] = subst_begin
 516             continue
 517         if k == j + 2 and consecutive == True:
 518             document.body[j:j+1] = ['\end_layout', '', '\\begin_layout Plain Layout']
 519             del(document.body[i:i+1])
 520             continue
 521         if k != j + 2 and consecutive == True:
 522             document.body[j:j+1] = subst_end
 523             # the next paragraph must not be indented
 524             document.body[j+19:j+19] = ['\\noindent']
 525             del(document.body[i:i+1])
 526             consecutive = False
 527             continue
 528         else:
 529             document.body[j:j+1] = subst_end
 530             # the next paragraph must not be indented
 531             document.body[j+19:j+19] = ['\\noindent']
 532             document.body[i:i+1] = subst_begin
 533
 534
 535 def revert_tipa(document):
 536     " Revert native TIPA insets to mathed or ERT. "
 537     i = 0
 538     while 1:
 539         i = find_token(document.body, "\\begin_inset IPA", i)
 540         if i == -1:
 541             return
 542         j = find_end_of_inset(document.body, i)
 543         if j == -1:
 544             document.warning("Malformed lyx document: Can't find end of IPA inset")
 545             i += 1
 546             continue
 547         Multipar = False
 548         n = find_token(document.body, "\\begin_layout", i, j)
 549         if n == -1:
 550             document.warning("Malformed lyx document: IPA inset has no embedded layout")
 551             i += 1
 552             continue
 553         m = find_end_of_layout(document.body, n)
 554         if m == -1:
 555             document.warning("Malformed lyx document: Can't find end of embedded layout")
 556             i += 1
 557             continue
 558         content = document.body[n+1:m]
 559         p = find_token(document.body, "\\begin_layout", m, j)
 560         if p != -1 or len(content) > 1:
 561             Multipar = True
 562             content = document.body[i+1:j]
 563         if Multipar:
 564             # IPA insets with multiple pars need to be wrapped by \begin{IPA}...\end{IPA}
 565             document.body[i:j+1] = ['\\end_layout', '', '\\begin_layout Standard'] + put_cmd_in_ert("\\begin{IPA}") + ['\\end_layout'] + content + ['\\begin_layout Standard'] + put_cmd_in_ert("\\end{IPA}")
 566             add_to_preamble(document, ["\\usepackage{tipa,tipx}"])
 567         else:
 568             # single-par IPA insets can be reverted to mathed
 569             document.body[i:j+1] = ["\\begin_inset Formula $\\text{\\textipa{" + content[0] + "}}$", "\\end_inset"]
 570         i = j
 571
 572
 573 def revert_cell_rotation(document):
 574   "Revert cell rotations to TeX-code"
 575
 576   load_rotating = False
 577   i = 0
 578   try:
 579     while True:
 580       # first, let's find out if we need to do anything
 581       i = find_token(document.body, '<cell ', i)
 582       if i == -1:
 583         return
 584       j = document.body[i].find('rotate="')
 585       if j != -1:
 586         k = document.body[i].find('"', j + 8)
 587         value = document.body[i][j + 8 : k]
 588         if value == "0":
 589           rgx = re.compile(r' rotate="[^"]+?"')
 590           # remove rotate option
 591           document.body[i] = rgx.sub('', document.body[i])
 592         elif value == "90":
 593           rgx = re.compile(r' rotate="[^"]+?"')
 594           document.body[i] = rgx.sub('rotate="true"', document.body[i])
 595         else:
 596           rgx = re.compile(r' rotate="[^"]+?"')
 597           load_rotating = True
 598           # remove rotate option
 599           document.body[i] = rgx.sub('', document.body[i])
 600           # write ERT
 601           document.body[i + 5 : i + 5] = \
 602             put_cmd_in_ert("\\end{turn}")
 603           document.body[i + 4 : i + 4] = \
 604             put_cmd_in_ert("\\begin{turn}{" + value + "}")
 605
 606       i += 1
 607
 608   finally:
 609     if load_rotating:
 610       add_to_preamble(document, ["\\@ifundefined{turnbox}{\usepackage{rotating}}{}"])
 611
 612
 613 def convert_cell_rotation(document):
 614     'Convert cell rotation statements from "true" to "90"'
 615
 616     i = 0
 617     while True:
 618       # first, let's find out if we need to do anything
 619       i = find_token(document.body, '<cell ', i)
 620       if i == -1:
 621         return
 622       j = document.body[i].find('rotate="true"')
 623       if j != -1:
 624         rgx = re.compile(r'rotate="[^"]+?"')
 625         # convert "true" to "90"
 626         document.body[i] = rgx.sub('rotate="90"', document.body[i])
 627
 628       i += 1
 629
 630
 631 def revert_table_rotation(document):
 632   "Revert table rotations to TeX-code"
 633
 634   load_rotating = False
 635   i = 0
 636   try:
 637     while True:
 638       # first, let's find out if we need to do anything
 639       i = find_token(document.body, '<features ', i)
 640       if i == -1:
 641         return
 642       j = document.body[i].find('rotate="')
 643       if j != -1:
 644         end_table = find_token(document.body, '</lyxtabular>', j)
 645         k = document.body[i].find('"', j + 8)
 646         value = document.body[i][j + 8 : k]
 647         if value == "0":
 648           rgx = re.compile(r' rotate="[^"]+?"')
 649           # remove rotate option
 650           document.body[i] = rgx.sub('', document.body[i])
 651         elif value == "90":
 652           rgx = re.compile(r'rotate="[^"]+?"')
 653           document.body[i] = rgx.sub('rotate="true"', document.body[i])
 654         else:
 655           rgx = re.compile(r' rotate="[^"]+?"')
 656           load_rotating = True
 657           # remove rotate option
 658           document.body[i] = rgx.sub('', document.body[i])
 659           # write ERT
 660           document.body[end_table + 3 : end_table + 3] = \
 661             put_cmd_in_ert("\\end{turn}")
 662           document.body[i - 2 : i - 2] = \
 663             put_cmd_in_ert("\\begin{turn}{" + value + "}")
 664
 665       i += 1
 666
 667   finally:
 668     if load_rotating:
 669       add_to_preamble(document, ["\\@ifundefined{turnbox}{\usepackage{rotating}}{}"])
 670
 671
 672 def convert_table_rotation(document):
 673     'Convert table rotation statements from "true" to "90"'
 674
 675     i = 0
 676     while True:
 677       # first, let's find out if we need to do anything
 678       i = find_token(document.body, '<features ', i)
 679       if i == -1:
 680         return
 681       j = document.body[i].find('rotate="true"')
 682       if j != -1:
 683         rgx = re.compile(r'rotate="[^"]+?"')
 684         # convert "true" to "90"
 685         document.body[i] = rgx.sub('rotate="90"', document.body[i])
 686
 687       i += 1
 688
 689
 690 def convert_listoflistings(document):
 691     'Convert ERT \lstlistoflistings to TOC lstlistoflistings inset'
 692     # We can support roundtrip because the command is so simple
 693     i = 0
 694     while True:
 695         i = find_token(document.body, "\\begin_inset ERT", i)
 696         if i == -1:
 697             return
 698         j = find_end_of_inset(document.body, i)
 699         if j == -1:
 700             document.warning("Malformed lyx document: Can't find end of ERT inset")
 701             i += 1
 702             continue
 703         ert = get_ert(document.body, i)
 704         if ert == "\\lstlistoflistings{}":
 705             document.body[i:j] = ["\\begin_inset CommandInset toc", "LatexCommand lstlistoflistings", ""]
 706             i = i + 4
 707         else:
 708             i = j + 1
 709
 710
 711 def revert_listoflistings(document):
 712     'Convert TOC lstlistoflistings inset to ERT lstlistoflistings'
 713     i = 0
 714     while True:
 715         i = find_token(document.body, "\\begin_inset CommandInset toc", i)
 716         if i == -1:
 717             return
 718         if document.body[i+1] == "LatexCommand lstlistoflistings":
 719             j = find_end_of_inset(document.body, i)
 720             if j == -1:
 721                 document.warning("Malformed lyx document: Can't find end of TOC inset")
 722                 i += 1
 723                 continue
 724             subst = put_cmd_in_ert("\\lstlistoflistings{}")
 725             document.body[i:j+1] = subst
 726             add_to_preamble(document, ["\\usepackage{listings}"])
 727         i = i + 1
 728
 729
 730 def convert_use_amssymb(document):
 731     "insert use_package amssymb"
 732     regexp = re.compile(r'(\\use_package\s+amsmath)')
 733     i = find_re(document.header, regexp, 0)
 734     if i == -1:
 735         document.warning("Malformed LyX document: Can't find \\use_package amsmath.")
 736         return;
 737     value = get_value(document.header, "\\use_package" , i).split()[1]
 738     useamsmath = 0
 739     try:
 740         useamsmath = int(value)
 741     except:
 742         document.warning("Invalid \\use_package amsmath: " + value + ". Assuming auto.")
 743         useamsmath = 1
 744     j = find_token(document.preamble, "\\usepackage{amssymb}", 0)
 745     if j == -1:
 746         document.header.insert(i + 1, "\\use_package amssymb %d" % useamsmath)
 747     else:
 748         document.header.insert(i + 1, "\\use_package amssymb 2")
 749         del document.preamble[j]
 750
 751
 752 def revert_use_amssymb(document):
 753     "remove use_package amssymb"
 754     regexp1 = re.compile(r'(\\use_package\s+amsmath)')
 755     regexp2 = re.compile(r'(\\use_package\s+amssymb)')
 756     i = find_re(document.header, regexp1, 0)
 757     j = find_re(document.header, regexp2, 0)
 758     value1 = "1" # default is auto
 759     value2 = "1" # default is auto
 760     if i != -1:
 761         value1 = get_value(document.header, "\\use_package" , i).split()[1]
 762     if j != -1:
 763         value2 = get_value(document.header, "\\use_package" , j).split()[1]
 764         del document.header[j]
 765     if value1 != value2 and value2 == "2": # on
 766         add_to_preamble(document, ["\\usepackage{amssymb}"])
 767
 768
 769 def revert_ancientgreek(document):
 770     "Set the document language for ancientgreek to greek"
 771
 772     if document.language == "ancientgreek":
 773         document.language = "greek"
 774         i = find_token(document.header, "\\language", 0)
 775         if i != -1:
 776             document.header[i] = "\\language greek"
 777     j = 0
 778     while True:
 779         j = find_token(document.body, "\\lang ancientgreek", j)
 780         if j == -1:
 781             return
 782         else:
 783             document.body[j] = document.body[j].replace("\\lang ancientgreek", "\\lang greek")
 784         j += 1
 785
 786
 787 def revert_languages(document):
 788     "Set the document language for new supported languages to English"
 789
 790     languages = [
 791                  "coptic", "divehi", "hindi", "kurmanji", "lao", "marathi", "occitan", "sanskrit",
 792                  "syriac", "tamil", "telugu", "urdu"
 793                 ]
 794     for n in range(len(languages)):
 795         if document.language == languages[n]:
 796             document.language = "english"
 797             i = find_token(document.header, "\\language", 0)
 798             if i != -1:
 799                 document.header[i] = "\\language english"
 800         j = 0
 801         while j < len(document.body):
 802             j = find_token(document.body, "\\lang " + languages[n], j)
 803             if j != -1:
 804                 document.body[j] = document.body[j].replace("\\lang " + languages[n], "\\lang english")
 805                 j += 1
 806             else:
 807                 j = len(document.body)
 808
 809
 810 def convert_armenian(document):
 811     "Use polyglossia and thus non-TeX fonts for Armenian"
 812
 813     if document.language == "armenian":
 814         i = find_token(document.header, "\\use_non_tex_fonts", 0)
 815         if i != -1:
 816             document.header[i] = "\\use_non_tex_fonts true"
 817
 818
 819 def revert_armenian(document):
 820     "Use ArmTeX and thus TeX fonts for Armenian"
 821
 822     if document.language == "armenian":
 823         i = find_token(document.header, "\\use_non_tex_fonts", 0)
 824         if i != -1:
 825             document.header[i] = "\\use_non_tex_fonts false"
 826
 827
 828 def revert_libertine(document):
 829     " Revert native libertine font definition to LaTeX "
 830
 831     if find_token(document.header, "\\use_non_tex_fonts false", 0) != -1:
 832         i = find_token(document.header, "\\font_roman libertine", 0)
 833         if i != -1:
 834             osf = False
 835             j = find_token(document.header, "\\font_osf true", 0)
 836             if j != -1:
 837                 osf = True
 838             preamble = "\\usepackage"
 839             if osf:
 840                 document.header[j] = "\\font_osf false"
 841             else:
 842                 preamble += "[lining]"
 843             preamble += "{libertine-type1}"
 844             add_to_preamble(document, [preamble])
 845             document.header[i] = "\\font_roman default"
 846
 847
 848 def revert_txtt(document):
 849     " Revert native txtt font definition to LaTeX "
 850
 851     if find_token(document.header, "\\use_non_tex_fonts false", 0) != -1:
 852         i = find_token(document.header, "\\font_typewriter txtt", 0)
 853         if i != -1:
 854             preamble = "\\renewcommand{\\ttdefault}{txtt}"
 855             add_to_preamble(document, [preamble])
 856             document.header[i] = "\\font_typewriter default"
 857
 858
 859 def revert_mathdesign(document):
 860     " Revert native mathdesign font definition to LaTeX "
 861
 862     if find_token(document.header, "\\use_non_tex_fonts false", 0) != -1:
 863         mathdesign_dict = {
 864         "mdbch":  "charter",
 865         "mdput":  "utopia",
 866         "mdugm":  "garamond"
 867         }
 868         i = find_token(document.header, "\\font_roman", 0)
 869         if i == -1:
 870             return
 871         val = get_value(document.header, "\\font_roman", i)
 872         if val in mathdesign_dict.keys():
 873             preamble = "\\usepackage[%s" % mathdesign_dict[val]
 874             expert = False
 875             j = find_token(document.header, "\\font_osf true", 0)
 876             if j != -1:
 877                 expert = True
 878                 document.header[j] = "\\font_osf false"
 879             l = find_token(document.header, "\\font_sc true", 0)
 880             if l != -1:
 881                 expert = True
 882                 document.header[l] = "\\font_sc false"
 883             if expert:
 884                 preamble += ",expert"
 885             preamble += "]{mathdesign}"
 886             add_to_preamble(document, [preamble])
 887             document.header[i] = "\\font_roman default"
 888
 889
 890 def revert_texgyre(document):
 891     " Revert native TeXGyre font definition to LaTeX "
 892
 893     if find_token(document.header, "\\use_non_tex_fonts false", 0) != -1:
 894         texgyre_fonts = ["tgadventor", "tgbonum", "tgchorus", "tgcursor", \
 895                          "tgheros", "tgpagella", "tgschola", "tgtermes"]
 896         i = find_token(document.header, "\\font_roman", 0)
 897         if i != -1:
 898             val = get_value(document.header, "\\font_roman", i)
 899             if val in texgyre_fonts:
 900                 preamble = "\\usepackage{%s}" % val
 901                 add_to_preamble(document, [preamble])
 902                 document.header[i] = "\\font_roman default"
 903         i = find_token(document.header, "\\font_sans", 0)
 904         if i != -1:
 905             val = get_value(document.header, "\\font_sans", i)
 906             if val in texgyre_fonts:
 907                 preamble = "\\usepackage{%s}" % val
 908                 add_to_preamble(document, [preamble])
 909                 document.header[i] = "\\font_sans default"
 910         i = find_token(document.header, "\\font_typewriter", 0)
 911         if i != -1:
 912             val = get_value(document.header, "\\font_typewriter", i)
 913             if val in texgyre_fonts:
 914                 preamble = "\\usepackage{%s}" % val
 915                 add_to_preamble(document, [preamble])
 916                 document.header[i] = "\\font_typewriter default"
 917
 918
 919 def revert_ipadeco(document):
 920     " Revert IPA decorations to ERT "
 921     i = 0
 922     while True:
 923       i = find_token(document.body, "\\begin_inset IPADeco", i)
 924       if i == -1:
 925           return
 926       end = find_end_of_inset(document.body, i)
 927       if end == -1:
 928           document.warning("Can't find end of inset at line " + str(i))
 929           i += 1
 930           continue
 931       line = document.body[i]
 932       rx = re.compile(r'\\begin_inset IPADeco (.*)$')
 933       m = rx.match(line)
 934       decotype = m.group(1)
 935       if decotype != "toptiebar" and decotype != "bottomtiebar":
 936           document.warning("Invalid IPADeco type: " + decotype)
 937           i = end
 938           continue
 939       blay = find_token(document.body, "\\begin_layout Plain Layout", i, end)
 940       if blay == -1:
 941           document.warning("Can't find layout for inset at line " + str(i))
 942           i = end
 943           continue
 944       bend = find_end_of_layout(document.body, blay)
 945       if bend == -1:
 946           document.warning("Malformed LyX document: Could not find end of IPADeco inset's layout.")
 947           i = end
 948           continue
 949       substi = ["\\begin_inset ERT", "status collapsed", "",
 950                 "\\begin_layout Plain Layout", "", "", "\\backslash",
 951                 decotype + "{", "\\end_layout", "", "\\end_inset"]
 952       substj = ["\\size default", "", "\\begin_inset ERT", "status collapsed", "",
 953                 "\\begin_layout Plain Layout", "", "}", "\\end_layout", "", "\\end_inset"]
 954       # do the later one first so as not to mess up the numbering
 955       document.body[bend:end + 1] = substj
 956       document.body[i:blay + 1] = substi
 957       i = end + len(substi) + len(substj) - (end - bend) - (blay - i) - 2
 958       add_to_preamble(document, "\\usepackage{tipa}")
 959
 960
 961 def revert_ipachar(document):
 962     ' Revert \\IPAChar to ERT '
 963     i = 0
 964     found = False
 965     while i < len(document.body):
 966         m = re.match(r'(.*)\\IPAChar \\(\w+\{\w+\})(.*)', document.body[i])
 967         if m:
 968             found = True
 969             before = m.group(1)
 970             ipachar = m.group(2)
 971             after = m.group(3)
 972             subst = [before,
 973                      '\\begin_inset ERT',
 974                      'status collapsed', '',
 975                      '\\begin_layout Standard',
 976                      '', '', '\\backslash',
 977                      ipachar,
 978                      '\\end_layout', '',
 979                      '\\end_inset', '',
 980                      after]
 981             document.body[i: i+1] = subst
 982             i = i + len(subst)
 983         else:
 984             i = i + 1
 985     if found:
 986         add_to_preamble(document, "\\usepackage{tone}")
 987
 988
 989 def revert_minionpro(document):
 990     " Revert native MinionPro font definition to LaTeX "
 991
 992     if find_token(document.header, "\\use_non_tex_fonts false", 0) != -1:
 993         i = find_token(document.header, "\\font_roman minionpro", 0)
 994         if i != -1:
 995             osf = False
 996             j = find_token(document.header, "\\font_osf true", 0)
 997             if j != -1:
 998                 osf = True
 999             preamble = "\\usepackage"
1000             if osf:
1001                 document.header[j] = "\\font_osf false"
1002             else:
1003                 preamble += "[lf]"
1004             preamble += "{MinionPro}"
1005             add_to_preamble(document, [preamble])
1006             document.header[i] = "\\font_roman default"
1007
1008
1009 def revert_mathfonts(document):
1010     " Revert native math font definitions to LaTeX "
1011
1012     i = find_token(document.header, "\\font_math", 0)
1013     if i == -1:
1014        return
1015     if find_token(document.header, "\\use_non_tex_fonts false", 0) != -1:
1016         val = get_value(document.header, "\\font_math", i)
1017         if val == "eulervm":
1018             add_to_preamble(document, "\\usepackage{eulervm}")
1019         elif val == "default":
1020             mathfont_dict = {
1021             "lmodern":  "\\renewcommand{\\rmdefault}{lmr}",
1022             "minionpro":  "\\usepackage[onlytext,lf]{MinionPro}",
1023             "minionpro-osf":  "\\usepackage[onlytext]{MinionPro}",
1024             "palatino":  "\\renewcommand{\\rmdefault}{ppl}",
1025             "palatino-osf":  "\\renewcommand{\\rmdefault}{pplj}",
1026             "times":  "\\renewcommand{\\rmdefault}{ptm}",
1027             "utopia":  "\\renewcommand{\\rmdefault}{futs}",
1028             "utopia-osf":  "\\renewcommand{\\rmdefault}{futj}",
1029             }
1030             j = find_token(document.header, "\\font_roman", 0)
1031             if j != -1:
1032                 rm = get_value(document.header, "\\font_roman", j)
1033                 k = find_token(document.header, "\\font_osf true", 0)
1034                 if k != -1:
1035                     rm += "-osf"
1036                 if rm in mathfont_dict.keys():
1037                     add_to_preamble(document, mathfont_dict[rm])
1038                     document.header[j] = "\\font_roman default"
1039                     if k != -1:
1040                         document.header[k] = "\\font_osf false"
1041     del document.header[i]
1042
1043
1044 def revert_mdnomath(document):
1045     " Revert mathdesign and fourier without math "
1046
1047     if find_token(document.header, "\\use_non_tex_fonts false", 0) != -1:
1048         mathdesign_dict = {
1049         "md-charter": "mdbch",
1050         "md-utopia": "mdput",
1051         "md-garamond": "mdugm"
1052         }
1053         i = find_token(document.header, "\\font_roman", 0)
1054         if i == -1:
1055             return
1056         val = get_value(document.header, "\\font_roman", i)
1057         if val in mathdesign_dict.keys():
1058             j = find_token(document.header, "\\font_math", 0)
1059             if j == -1:
1060                 document.header[i] = "\\font_roman %s" % mathdesign_dict[val]
1061             mval = get_value(document.header, "\\font_math", j)
1062             if mval == "default":
1063                 document.header[i] = "\\font_roman default"
1064                 add_to_preamble(document, "\\renewcommand{\\rmdefault}{%s}" % mathdesign_dict[val])
1065             else:
1066                 document.header[i] = "\\font_roman %s" % mathdesign_dict[val]
1067
1068
1069 def convert_mdnomath(document):
1070     " Change mathdesign font name "
1071
1072     if find_token(document.header, "\\use_non_tex_fonts false", 0) != -1:
1073         mathdesign_dict = {
1074         "mdbch":  "md-charter",
1075         "mdput":  "md-utopia",
1076         "mdugm":  "md-garamond"
1077         }
1078         i = find_token(document.header, "\\font_roman", 0)
1079         if i == -1:
1080             return
1081         val = get_value(document.header, "\\font_roman", i)
1082         if val in mathdesign_dict.keys():
1083              document.header[i] = "\\font_roman %s" % mathdesign_dict[val]
1084
1085
1086 def revert_newtxmath(document):
1087     " Revert native newtxmath definitions to LaTeX "
1088
1089     i = find_token(document.header, "\\font_math", 0)
1090     if i == -1:
1091        return
1092     if find_token(document.header, "\\use_non_tex_fonts false", 0) != -1:
1093         val = get_value(document.header, "\\font_math", i)
1094         mathfont_dict = {
1095         "libertine-ntxm":  "\\usepackage[libertine]{newtxmath}",
1096         "minion-ntxm":  "\\usepackage[minion]{newtxmath}",
1097         "newtxmath":  "\\usepackage{newtxmath}",
1098         }
1099         if val in mathfont_dict.keys():
1100             add_to_preamble(document, mathfont_dict[val])
1101             document.header[i] = "\\font_math auto"
1102
1103
1104 def revert_biolinum(document):
1105     " Revert native biolinum font definition to LaTeX "
1106
1107     if find_token(document.header, "\\use_non_tex_fonts false", 0) != -1:
1108         i = find_token(document.header, "\\font_sans biolinum", 0)
1109         if i != -1:
1110             osf = False
1111             j = find_token(document.header, "\\font_osf true", 0)
1112             if j != -1:
1113                 osf = True
1114             preamble = "\\usepackage"
1115             if not osf:
1116                 preamble += "[lf]"
1117             preamble += "{biolinum-type1}"
1118             add_to_preamble(document, [preamble])
1119             document.header[i] = "\\font_sans default"
1120
1121
1122 ##
1123 # Conversion hub
1124 #
1125
1126 supported_versions = ["2.1.0","2.1"]
1127 convert = [
1128            [414, []],
1129            [415, [convert_undertilde]],
1130            [416, []],
1131            [417, [convert_japanese_encodings]],
1132            [418, []],
1133            [419, []],
1134            [420, [convert_biblio_style]],
1135            [421, [convert_longtable_captions]],
1136            [422, [convert_use_packages]],
1137            [423, [convert_use_mathtools]],
1138            [424, [convert_cite_engine_type]],
1139            [425, []],
1140            [426, []],
1141            [427, []],
1142            [428, [convert_cell_rotation]],
1143            [429, [convert_table_rotation]],
1144            [430, [convert_listoflistings]],
1145            [431, [convert_use_amssymb]],
1146            [432, []],
1147            [433, [convert_armenian]],
1148            [434, []],
1149            [435, []],
1150            [436, []],
1151            [437, []],
1152            [438, []],
1153            [439, []],
1154            [440, []],
1155            [441, [convert_mdnomath]],
1156            [442, []],
1157            [443, []],
1158            [444, []]
1159           ]
1160
1161 revert =  [
1162            [443, [revert_biolinum]],
1163            [442, []],
1164            [441, [revert_newtxmath]],
1165            [440, [revert_mdnomath]],
1166            [439, [revert_mathfonts]],
1167            [438, [revert_minionpro]],
1168            [437, [revert_ipadeco, revert_ipachar]],
1169            [436, [revert_texgyre]],
1170            [435, [revert_mathdesign]],
1171            [434, [revert_txtt]],
1172            [433, [revert_libertine]],
1173            [432, [revert_armenian]],
1174            [431, [revert_languages, revert_ancientgreek]],
1175            [430, [revert_use_amssymb]],
1176            [429, [revert_listoflistings]],
1177            [428, [revert_table_rotation]],
1178            [427, [revert_cell_rotation]],
1179            [426, [revert_tipa]],
1180            [425, [revert_verbatim]],
1181            [424, [revert_cancel]],
1182            [423, [revert_cite_engine_type]],
1183            [422, [revert_use_mathtools]],
1184            [421, [revert_use_packages]],
1185            [420, [revert_longtable_captions]],
1186            [419, [revert_biblio_style]],
1187            [418, [revert_australian]],
1188            [417, [revert_justification]],
1189            [416, [revert_japanese_encodings]],
1190            [415, [revert_negative_space, revert_math_spaces]],
1191            [414, [revert_undertilde]],
1192            [413, [revert_visible_space]]
1193           ]
1194
1195
1196 if __name__ == "__main__":
1197     pass