lib/lyx2lyx/lyx_2_1.py

   1 # -*- coding: utf-8 -*-
   2 # This file is part of lyx2lyx
   3 # -*- coding: utf-8 -*-
   4 # Copyright (C) 2011 The LyX team
   5 #
   6 # This program is free software; you can redistribute it and/or
   7 # modify it under the terms of the GNU General Public License
   8 # as published by the Free Software Foundation; either version 2
   9 # of the License, or (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful,
  12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 # GNU General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
  19
  20 """ Convert files to the file format generated by lyx 2.1"""
  21
  22 import re, string
  23 import unicodedata
  24 import sys, os
  25
  26 # Uncomment only what you need to import, please.
  27
  28 from parser_tools import del_token, find_token, find_end_of, find_end_of_inset, \
  29     find_re, get_option_value, get_value, get_quoted_value, set_option_value
  30
  31 #from parser_tools import find_token, find_end_of, find_tokens, \
  32   #find_token_exact, find_end_of_inset, find_end_of_layout, \
  33   #find_token_backwards, is_in_inset, get_value, get_quoted_value, \
  34   #del_token, check_token
  35
  36 from lyx2lyx_tools import add_to_preamble, put_cmd_in_ert
  37
  38 #from lyx2lyx_tools import insert_to_preamble, \
  39 #  put_cmd_in_ert, lyx2latex, latex_length, revert_flex_inset, \
  40 #  revert_font_attrs, hex2ratio, str2bool
  41
  42 ####################################################################
  43 # Private helper functions
  44
  45 #def remove_option(lines, m, option):
  46     #''' removes option from line m. returns whether we did anything '''
  47     #l = lines[m].find(option)
  48     #if l == -1:
  49         #return False
  50     #val = lines[m][l:].split('"')[1]
  51     #lines[m] = lines[m][:l - 1] + lines[m][l+len(option + '="' + val + '"'):]
  52     #return True
  53
  54
  55 ###############################################################################
  56 ###
  57 ### Conversion and reversion routines
  58 ###
  59 ###############################################################################
  60
  61 def revert_visible_space(document):
  62     "Revert InsetSpace visible into its ERT counterpart"
  63     i = 0
  64     while True:
  65       i = find_token(document.body, "\\begin_inset space \\textvisiblespace{}", i)
  66       if i == -1:
  67         return
  68       end = find_end_of_inset(document.body, i)
  69       subst = put_cmd_in_ert("\\textvisiblespace{}")
  70       document.body[i:end + 1] = subst
  71
  72
  73 def convert_undertilde(document):
  74     " Load undertilde automatically "
  75     i = find_token(document.header, "\\use_mathdots" , 0)
  76     if i == -1:
  77         i = find_token(document.header, "\\use_mhchem" , 0)
  78     if i == -1:
  79         i = find_token(document.header, "\\use_esint" , 0)
  80     if i == -1:
  81         document.warning("Malformed LyX document: Can't find \\use_mathdots.")
  82         return;
  83     j = find_token(document.preamble, "\\usepackage{undertilde}", 0)
  84     if j == -1:
  85         document.header.insert(i + 1, "\\use_undertilde 0")
  86     else:
  87         document.header.insert(i + 1, "\\use_undertilde 2")
  88         del document.preamble[j]
  89
  90
  91 def revert_undertilde(document):
  92     " Load undertilde if used in the document "
  93     undertilde = find_token(document.header, "\\use_undertilde" , 0)
  94     if undertilde == -1:
  95       document.warning("No \\use_undertilde line. Assuming auto.")
  96     else:
  97       val = get_value(document.header, "\\use_undertilde", undertilde)
  98       del document.header[undertilde]
  99       try:
 100         usetilde = int(val)
 101       except:
 102         document.warning("Invalid \\use_undertilde value: " + val + ". Assuming auto.")
 103         # probably usedots has not been changed, but be safe.
 104         usetilde = 1
 105
 106       if usetilde == 0:
 107         # do not load case
 108         return
 109       if usetilde == 2:
 110         # force load case
 111         add_to_preamble(document, ["\\usepackage{undertilde}"])
 112         return
 113
 114     # so we are in the auto case. we want to load undertilde if \utilde is used.
 115     i = 0
 116     while True:
 117       i = find_token(document.body, '\\begin_inset Formula', i)
 118       if i == -1:
 119         return
 120       j = find_end_of_inset(document.body, i)
 121       if j == -1:
 122         document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(i))
 123         i += 1
 124         continue
 125       code = "\n".join(document.body[i:j])
 126       if code.find("\\utilde") != -1:
 127         add_to_preamble(document, ["\\@ifundefined{utilde}{\\usepackage{undertilde}}"])
 128         return
 129       i = j
 130
 131
 132 def revert_negative_space(document):
 133     "Revert InsetSpace negmedspace and negthickspace into its TeX-code counterpart"
 134     i = 0
 135     j = 0
 136     reverted = False
 137     while True:
 138       i = find_token(document.body, "\\begin_inset space \\negmedspace{}", i)
 139       if i == -1:
 140         j = find_token(document.body, "\\begin_inset space \\negthickspace{}", j)
 141         if j == -1:
 142           # load amsmath in the preamble if not already loaded if we are at the end of checking
 143           if reverted == True:
 144             i = find_token(document.header, "\\use_amsmath 2", 0)
 145             if i == -1:
 146               add_to_preamble(document, ["\\@ifundefined{negthickspace}{\\usepackage{amsmath}}"])
 147           return
 148       if i == -1:
 149         return
 150       end = find_end_of_inset(document.body, i)
 151       subst = put_cmd_in_ert("\\negmedspace{}")
 152       document.body[i:end + 1] = subst
 153       j = find_token(document.body, "\\begin_inset space \\negthickspace{}", j)
 154       if j == -1:
 155         return
 156       end = find_end_of_inset(document.body, j)
 157       subst = put_cmd_in_ert("\\negthickspace{}")
 158       document.body[j:end + 1] = subst
 159       reverted = True
 160
 161
 162 def revert_math_spaces(document):
 163     "Revert formulas with protected custom space and protected hfills to TeX-code"
 164     i = 0
 165     while True:
 166       i = find_token(document.body, "\\begin_inset Formula", i)
 167       if i == -1:
 168         return
 169       j = document.body[i].find("\\hspace*")
 170       if j != -1:
 171         end = find_end_of_inset(document.body, i)
 172         subst = put_cmd_in_ert(document.body[i][21:])
 173         document.body[i:end + 1] = subst
 174       i = i + 1
 175
 176
 177 def convert_japanese_encodings(document):
 178     " Rename the japanese encodings to names understood by platex "
 179     jap_enc_dict = {
 180         "EUC-JP-pLaTeX": "euc",
 181         "JIS-pLaTeX":    "jis",
 182         "SJIS-pLaTeX":   "sjis"
 183     }
 184     i = find_token(document.header, "\\inputencoding" , 0)
 185     if i == -1:
 186         return
 187     val = get_value(document.header, "\\inputencoding", i)
 188     if val in jap_enc_dict.keys():
 189         document.header[i] = "\\inputencoding %s" % jap_enc_dict[val]
 190
 191
 192 def revert_japanese_encodings(document):
 193     " Revert the japanese encodings name changes "
 194     jap_enc_dict = {
 195         "euc":  "EUC-JP-pLaTeX",
 196         "jis":  "JIS-pLaTeX",
 197         "sjis": "SJIS-pLaTeX"
 198     }
 199     i = find_token(document.header, "\\inputencoding" , 0)
 200     if i == -1:
 201         return
 202     val = get_value(document.header, "\\inputencoding", i)
 203     if val in jap_enc_dict.keys():
 204         document.header[i] = "\\inputencoding %s" % jap_enc_dict[val]
 205
 206
 207 def revert_justification(document):
 208     " Revert the \\justification buffer param"
 209     if not del_token(document.header, '\\justification', 0):
 210         document.warning("Malformed LyX document: Missing \\justification.")
 211
 212
 213 def revert_australian(document):
 214     "Set English language variants Australian and Newzealand to English"
 215
 216     if document.language == "australian" or document.language == "newzealand":
 217         document.language = "english"
 218         i = find_token(document.header, "\\language", 0)
 219         if i != -1:
 220             document.header[i] = "\\language english"
 221
 222     j = 0
 223     while True:
 224         j = find_token(document.body, "\\lang australian", j)
 225         if j == -1:
 226             j = find_token(document.body, "\\lang newzealand", 0)
 227             if j == -1:
 228                 return
 229             else:
 230                 document.body[j] = document.body[j].replace("\\lang newzealand", "\\lang english")
 231         else:
 232             document.body[j] = document.body[j].replace("\\lang australian", "\\lang english")
 233         j += 1
 234
 235
 236 def convert_biblio_style(document):
 237     "Add a sensible default for \\biblio_style based on the citation engine."
 238     i = find_token(document.header, "\\cite_engine", 0)
 239     if i != -1:
 240         engine = get_value(document.header, "\\cite_engine", i).split("_")[0]
 241         style = {"basic": "plain", "natbib": "plainnat", "jurabib": "jurabib"}
 242         document.header.insert(i + 1, "\\biblio_style " + style[engine])
 243
 244
 245 def revert_biblio_style(document):
 246     "BibTeX insets with default option use the style defined by \\biblio_style."
 247     i = find_token(document.header, "\\biblio_style" , 0)
 248     if i == -1:
 249         document.warning("No \\biblio_style line. Nothing to do.")
 250         return
 251
 252     default_style = get_value(document.header, "\\biblio_style", i)
 253     del document.header[i]
 254
 255     # We are looking for bibtex insets having the default option
 256     i = 0
 257     while True:
 258         i = find_token(document.body, "\\begin_inset CommandInset bibtex", i)
 259         if i == -1:
 260             return
 261         j = find_end_of_inset(document.body, i)
 262         if j == -1:
 263             document.warning("Malformed LyX document: Can't find end of bibtex inset at line " + str(i))
 264             i += 1
 265             return
 266         k = find_token(document.body, "options", i, j)
 267         if k != -1:
 268             options = get_quoted_value(document.body, "options", k)
 269             if "default" in options.split(","):
 270                 document.body[k] = 'options "%s"' \
 271                     % options.replace("default", default_style)
 272         i = j
 273
 274
 275 def handle_longtable_captions(document, forward):
 276     begin_table = 0
 277     while True:
 278         begin_table = find_token(document.body, '<lyxtabular version=', begin_table)
 279         if begin_table == -1:
 280             break
 281         end_table = find_end_of(document.body, begin_table, '<lyxtabular', '</lyxtabular>')
 282         if end_table == -1:
 283             document.warning("Malformed LyX document: Could not find end of table.")
 284             begin_table += 1
 285             continue
 286         fline = find_token(document.body, "<features", begin_table, end_table)
 287         if fline == -1:
 288             document.warning("Can't find features for inset at line " + str(begin_table))
 289             begin_table += 1
 290             continue
 291         p = document.body[fline].find("islongtable")
 292         if p == -1:
 293             # no longtable
 294             begin_table += 1
 295             continue
 296         numrows = get_option_value(document.body[begin_table], "rows")
 297         try:
 298             numrows = int(numrows)
 299         except:
 300             document.warning(document.body[begin_table])
 301             document.warning("Unable to determine rows!")
 302             begin_table = end_table
 303             continue
 304         begin_row = begin_table
 305         for row in range(numrows):
 306             begin_row = find_token(document.body, '<row', begin_row, end_table)
 307             if begin_row == -1:
 308                 document.warning("Can't find row " + str(row + 1))
 309                 break
 310             end_row = find_end_of(document.body, begin_row, '<row', '</row>')
 311             if end_row == -1:
 312                 document.warning("Can't find end of row " + str(row + 1))
 313                 break
 314             if forward:
 315                 if (get_option_value(document.body[begin_row], 'caption') == 'true' and
 316                     get_option_value(document.body[begin_row], 'endfirsthead') != 'true' and
 317                     get_option_value(document.body[begin_row], 'endhead') != 'true' and
 318                     get_option_value(document.body[begin_row], 'endfoot') != 'true' and
 319                     get_option_value(document.body[begin_row], 'endlastfoot') != 'true'):
 320                     document.body[begin_row] = set_option_value(document.body[begin_row], 'caption', 'true", endfirsthead="true')
 321             elif get_option_value(document.body[begin_row], 'caption') == 'true':
 322                 if get_option_value(document.body[begin_row], 'endfirsthead') == 'true':
 323                     document.body[begin_row] = set_option_value(document.body[begin_row], 'endfirsthead', 'false')
 324                 if get_option_value(document.body[begin_row], 'endhead') == 'true':
 325                     document.body[begin_row] = set_option_value(document.body[begin_row], 'endhead', 'false')
 326                 if get_option_value(document.body[begin_row], 'endfoot') == 'true':
 327                     document.body[begin_row] = set_option_value(document.body[begin_row], 'endfoot', 'false')
 328                 if get_option_value(document.body[begin_row], 'endlastfoot') == 'true':
 329                     document.body[begin_row] = set_option_value(document.body[begin_row], 'endlastfoot', 'false')
 330             begin_row = end_row
 331         # since there could be a tabular inside this one, we
 332         # cannot jump to end.
 333         begin_table += 1
 334
 335
 336 def convert_longtable_captions(document):
 337     "Add a firsthead flag to caption rows"
 338     handle_longtable_captions(document, True)
 339
 340
 341 def revert_longtable_captions(document):
 342     "remove head/foot flag from caption rows"
 343     handle_longtable_captions(document, False)
 344
 345
 346 def convert_use_packages(document):
 347     "use_xxx yyy => use_package xxx yyy"
 348     packages = ["amsmath", "esint", "mathdots", "mhchem", "undertilde"]
 349     for p in packages:
 350         i = find_token(document.header, "\\use_%s" % p , 0)
 351         if i != -1:
 352             value = get_value(document.header, "\\use_%s" % p , i)
 353             document.header[i] = "\\use_package %s %s" % (p, value)
 354
 355
 356 def revert_use_packages(document):
 357     "use_package xxx yyy => use_xxx yyy"
 358     packages = {"amsmath":"1", "esint":"1", "mathdots":"1", "mhchem":"1", "undertilde":"1"}
 359     # the order is arbitrary for the use_package version, and not all packages need to be given.
 360     # Ensure a complete list and correct order (important for older LyX versions and especially lyx2lyx)
 361     j = -1
 362     for p in packages.keys():
 363         regexp = re.compile(r'(\\use_package\s+%s)' % p)
 364         i = find_re(document.header, regexp, 0)
 365         if i != -1:
 366             value = get_value(document.header, "\\use_package" , i).split()[1]
 367             del document.header[i]
 368             j = i
 369     for (p, v) in packages.items():
 370         document.header.insert(j, "\\use_%s %s"  % (p, value))
 371         j = j + 1
 372
 373
 374 def convert_use_mathtools(document):
 375     "insert use_package mathtools"
 376     i = find_token(document.header, "\\use_package", 0)
 377     if i == -1:
 378         document.warning("Malformed LyX document: Can't find \\use_package.")
 379         return;
 380     j = find_token(document.preamble, "\\usepackage{mathtools}", 0)
 381     if j == -1:
 382         document.header.insert(i + 1, "\\use_package mathtools 0")
 383     else:
 384         document.header.insert(i + 1, "\\use_package mathtools 2")
 385         del document.preamble[j]
 386
 387
 388 def revert_use_mathtools(document):
 389     "remove use_package mathtools"
 390     regexp = re.compile(r'(\\use_package\s+mathtools)')
 391     i = find_re(document.header, regexp, 0)
 392     value = "1" # default is auto
 393     if i != -1:
 394         value = get_value(document.header, "\\use_package" , i).split()[1]
 395         del document.header[i]
 396     if value == "2": # on
 397         add_to_preamble(document, ["\\usepackage{mathtools}"])
 398     elif value == "1": # auto
 399         commands = ["mathclap", "mathllap", "mathrlap", \
 400                     "lgathered", "rgathered", "vcentcolon", "dblcolon", \
 401                     "coloneqq", "Coloneqq", "coloneq", "Coloneq", "eqqcolon", \
 402                     "Eqqcolon", "eqcolon", "Eqcolon", "colonapprox", \
 403                     "Colonapprox", "colonsim", "Colonsim"]
 404         i = 0
 405         while True:
 406             i = find_token(document.body, '\\begin_inset Formula', i)
 407             if i == -1:
 408                 return
 409             j = find_end_of_inset(document.body, i)
 410             if j == -1:
 411                 document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(i))
 412                 i += 1
 413                 continue
 414             code = "\n".join(document.body[i:j])
 415             for c in commands:
 416                 if code.find("\\%s" % c) != -1:
 417                     add_to_preamble(document, ["\\usepackage{mathtools}"])
 418                     return
 419             i = j
 420
 421
 422 def convert_cite_engine_type(document):
 423     "Determine the \\cite_engine_type from the citation engine."
 424     i = find_token(document.header, "\\cite_engine", 0)
 425     if i == -1:
 426         return
 427     engine = get_value(document.header, "\\cite_engine", i)
 428     if "_" in engine:
 429         engine, type = engine.split("_")
 430     else:
 431         type = {"basic": "numerical", "jurabib": "authoryear"}[engine]
 432     document.header[i] = "\\cite_engine " + engine
 433     document.header.insert(i + 1, "\\cite_engine_type " + type)
 434
 435
 436 def revert_cite_engine_type(document):
 437     "Natbib had the type appended with an underscore."
 438     engine_type = "numerical"
 439     i = find_token(document.header, "\\cite_engine_type" , 0)
 440     if i == -1:
 441         document.warning("No \\cite_engine_type line. Assuming numerical.")
 442     else:
 443         engine_type = get_value(document.header, "\\cite_engine_type", i)
 444         del document.header[i]
 445
 446     # We are looking for the natbib citation engine
 447     i = find_token(document.header, "\\cite_engine natbib", i)
 448     if i == -1:
 449         return
 450     document.header[i] = "\\cite_engine natbib_" + engine_type
 451
 452
 453 def revert_cancel(document):
 454     "add cancel to the preamble if necessary"
 455     commands = ["cancelto", "cancel", "bcancel", "xcancel"]
 456     i = 0
 457     while True:
 458         i = find_token(document.body, '\\begin_inset Formula', i)
 459         if i == -1:
 460             return
 461         j = find_end_of_inset(document.body, i)
 462         if j == -1:
 463             document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(i))
 464             i += 1
 465             continue
 466         code = "\n".join(document.body[i:j])
 467         for c in commands:
 468             if code.find("\\%s" % c) != -1:
 469                 add_to_preamble(document, ["\\usepackage{cancel}"])
 470                 return
 471         i = j
 472
 473
 474 ##
 475 # Conversion hub
 476 #
 477
 478 supported_versions = ["2.1.0","2.1"]
 479 convert = [
 480            [414, []],
 481            [415, [convert_undertilde]],
 482            [416, []],
 483            [417, [convert_japanese_encodings]],
 484            [418, []],
 485            [419, []],
 486            [420, [convert_biblio_style]],
 487            [421, [convert_longtable_captions]],
 488            [422, [convert_use_packages]],
 489            [423, [convert_use_mathtools]],
 490            [424, [convert_cite_engine_type]],
 491            [425, []]
 492           ]
 493
 494 revert =  [
 495            [424, [revert_cancel]],
 496            [423, [revert_cite_engine_type, revert_cancel]],
 497            [422, [revert_use_mathtools]],
 498            [421, [revert_use_packages]],
 499            [420, [revert_longtable_captions]],
 500            [419, [revert_biblio_style]],
 501            [418, [revert_australian]],
 502            [417, [revert_justification]],
 503            [416, [revert_japanese_encodings]],
 504            [415, [revert_negative_space, revert_math_spaces]],
 505            [414, [revert_undertilde]],
 506            [413, [revert_visible_space]]
 507           ]
 508
 509
 510 if __name__ == "__main__":
 511     pass