lib/lyx2lyx/lyx_2_4.py

   1 # -*- coding: utf-8 -*-
   2 # This file is part of lyx2lyx
   3 # Copyright (C) 2018 The LyX team
   4 #
   5 # This program is free software; you can redistribute it and/or
   6 # modify it under the terms of the GNU General Public License
   7 # as published by the Free Software Foundation; either version 2
   8 # of the License, or (at your option) any later version.
   9 #
  10 # This program is distributed in the hope that it will be useful,
  11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 # GNU General Public License for more details.
  14 #
  15 # You should have received a copy of the GNU General Public License
  16 # along with this program; if not, write to the Free Software
  17 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  18
  19 """ Convert files to the file format generated by lyx 2.4"""
  20
  21 import re, string
  22 import unicodedata
  23 import sys, os
  24
  25 # Uncomment only what you need to import, please.
  26
  27 from parser_tools import (count_pars_in_inset, find_end_of_inset, find_end_of_layout,
  28 find_token, get_bool_value, get_option_value, get_value, get_quoted_value)
  29 #    del_token, del_value, del_complete_lines,
  30 #    find_complete_lines, find_end_of,
  31 #    find_re, find_substring, find_token_backwards,
  32 #    get_containing_inset, get_containing_layout,
  33 #    is_in_inset, set_bool_value
  34 #    find_tokens, find_token_exact, check_token
  35
  36 from lyx2lyx_tools import (put_cmd_in_ert, add_to_preamble)
  37 #  revert_font_attrs, insert_to_preamble, latex_length
  38 #  get_ert, lyx2latex, lyx2verbatim, length_in_bp, convert_info_insets
  39 #  revert_flex_inset, hex2ratio, str2bool
  40
  41 ####################################################################
  42 # Private helper functions
  43
  44
  45
  46 ###############################################################################
  47 ###
  48 ### Conversion and reversion routines
  49 ###
  50 ###############################################################################
  51
  52
  53 def convert_lst_literalparam(document):
  54     " Add param literal to include inset "
  55
  56     i = 0
  57     while True:
  58         i = find_token(document.body, '\\begin_inset CommandInset include', i)
  59         if i == -1:
  60             break
  61         j = find_end_of_inset(document.body, i)
  62         if j == -1:
  63             document.warning("Malformed LyX document: Can't find end of command inset at line %d" % i)
  64             i += 1
  65             continue
  66         while i < j and document.body[i].strip() != '':
  67             i += 1
  68         document.body.insert(i, "literal \"true\"")
  69
  70
  71 def revert_lst_literalparam(document):
  72     " Remove param literal from include inset "
  73
  74     i = 0
  75     while True:
  76         i = find_token(document.body, '\\begin_inset CommandInset include', i)
  77         if i == -1:
  78             break
  79         j = find_end_of_inset(document.body, i)
  80         if j == -1:
  81             document.warning("Malformed LyX document: Can't find end of include inset at line %d" % i)
  82             i += 1
  83             continue
  84         k = find_token(document.body, 'literal', i, j)
  85         if k == -1:
  86             i += 1
  87             continue
  88         del document.body[k]
  89
  90
  91 def revert_paratype(document):
  92     " Revert ParaType font definitions to LaTeX "
  93
  94     if find_token(document.header, "\\use_non_tex_fonts false", 0) != -1:
  95         preamble = ""
  96         i1 = find_token(document.header, "\\font_roman \"PTSerif-TLF\"", 0)
  97         i2 = find_token(document.header, "\\font_sans \"default\"", 0)
  98         i3 = find_token(document.header, "\\font_typewriter \"default\"", 0)
  99         j = find_token(document.header, "\\font_sans \"PTSans-TLF\"", 0)
 100         sfval = get_value(document.header, "\\font_sf_scale", 0)
 101         # cutoff " 100"
 102         sfval = sfval[:-4]
 103         sfoption = ""
 104         if sfval != "100":
 105             sfoption = "scaled=" + format(float(sfval) / 100, '.2f')
 106         k = find_token(document.header, "\\font_typewriter \"PTMono-TLF\"", 0)
 107         ttval = get_value(document.header, "\\font_tt_scale", 0)
 108         # cutoff " 100"
 109         ttval = ttval[:-4]
 110         ttoption = ""
 111         if ttval != "100":
 112             ttoption = "scaled=" + format(float(ttval) / 100, '.2f')
 113         if i1 != -1 and i2 != -1 and i3!= -1:
 114             add_to_preamble(document, ["\\usepackage{paratype}"])
 115         else:
 116             if i1!= -1:
 117                 add_to_preamble(document, ["\\usepackage{PTSerif}"])
 118                 document.header[i1] = document.header[i1].replace("PTSerif-TLF", "default")
 119             if j!= -1:
 120                 if sfoption != "":
 121                     add_to_preamble(document, ["\\usepackage[" + sfoption + "]{PTSans}"])
 122                 else:
 123                     add_to_preamble(document, ["\\usepackage{PTSans}"])
 124                 document.header[j] = document.header[j].replace("PTSans-TLF", "default")
 125             if k!= -1:
 126                 if ttoption != "":
 127                     add_to_preamble(document, ["\\usepackage[" + ttoption + "]{PTMono}"])
 128                 else:
 129                     add_to_preamble(document, ["\\usepackage{PTMono}"])
 130                 document.header[k] = document.header[k].replace("PTMono-TLF", "default")
 131
 132
 133 def revert_xcharter(document):
 134     " Revert XCharter font definitions to LaTeX "
 135
 136     i = find_token(document.header, "\\font_roman \"xcharter\"", 0)
 137     if i == -1:
 138         return
 139
 140     # replace unsupported font setting
 141     document.header[i] = document.header[i].replace("xcharter", "default")
 142     # no need for preamble code with system fonts
 143     if get_bool_value(document.header, "\\use_non_tex_fonts"):
 144         return
 145
 146     # transfer old style figures setting to package options
 147     j = find_token(document.header, "\\font_osf true")
 148     if j != -1:
 149         options = "[osf]"
 150         document.header[j] = "\\font_osf false"
 151     else:
 152         options = ""
 153     if i != -1:
 154         add_to_preamble(document, ["\\usepackage%s{XCharter}"%options])
 155
 156
 157 def revert_lscape(document):
 158     " Reverts the landscape environment (Landscape module) to TeX-code "
 159
 160     if not "landscape" in document.get_module_list():
 161         return
 162
 163     i = 0
 164     while True:
 165         i = find_token(document.body, "\\begin_inset Flex Landscape", i)
 166         if i == -1:
 167             return
 168         j = find_end_of_inset(document.body, i)
 169         if j == -1:
 170             document.warning("Malformed LyX document: Can't find end of Landscape inset")
 171             i += 1
 172             continue
 173
 174         if document.body[i] == "\\begin_inset Flex Landscape (Floating)":
 175             document.body[j - 2 : j + 1] = put_cmd_in_ert("\\end{landscape}}")
 176             document.body[i : i + 4] = put_cmd_in_ert("\\afterpage{\\begin{landscape}")
 177             add_to_preamble(document, ["\\usepackage{afterpage}"])
 178         else:
 179             document.body[j - 2 : j + 1] = put_cmd_in_ert("\\end{landscape}")
 180             document.body[i : i + 4] = put_cmd_in_ert("\\begin{landscape}")
 181
 182         add_to_preamble(document, ["\\usepackage{pdflscape}"])
 183         # no need to reset i
 184
 185
 186 def convert_fontenc(document):
 187     " Convert default fontenc setting "
 188
 189     i = find_token(document.header, "\\fontencoding global", 0)
 190     if i == -1:
 191         return
 192
 193     document.header[i] = document.header[i].replace("global", "auto")
 194
 195
 196 def revert_fontenc(document):
 197     " Revert default fontenc setting "
 198
 199     i = find_token(document.header, "\\fontencoding auto", 0)
 200     if i == -1:
 201         return
 202
 203     document.header[i] = document.header[i].replace("auto", "global")
 204
 205
 206 def revert_nospellcheck(document):
 207     " Remove nospellcheck font info param "
 208
 209     i = 0
 210     while True:
 211         i = find_token(document.body, '\\nospellcheck', i)
 212         if i == -1:
 213             return
 214         del document.body[i]
 215
 216
 217 def revert_floatpclass(document):
 218     " Remove float placement params 'document' and 'class' "
 219
 220     i = 0
 221     i = find_token(document.header, "\\float_placement class", 0)
 222     if i != -1:
 223         del document.header[i]
 224
 225     i = 0
 226     while True:
 227         i = find_token(document.body, '\\begin_inset Float', i)
 228         if i == -1:
 229             break
 230         j = find_end_of_inset(document.body, i)
 231         k = find_token(document.body, 'placement class', i, i + 2)
 232         if k == -1:
 233             k = find_token(document.body, 'placement document', i, i + 2)
 234             if k != -1:
 235                 del document.body[k]
 236             i = j
 237             continue
 238         del document.body[k]
 239
 240
 241 def revert_floatalignment(document):
 242     " Remove float alignment params "
 243
 244     i = 0
 245     i = find_token(document.header, "\\float_alignment", 0)
 246     galignment = ""
 247     if i != -1:
 248         galignment = get_value(document.header, "\\float_alignment", i)
 249         del document.header[i]
 250
 251     i = 0
 252     while True:
 253         i = find_token(document.body, '\\begin_inset Float', i)
 254         if i == -1:
 255             break
 256         j = find_end_of_inset(document.body, i)
 257         if j == -1:
 258             document.warning("Malformed LyX document: Can't find end of inset at line " + str(i))
 259             i += 1
 260         k = find_token(document.body, 'alignment', i, i + 4)
 261         if k == -1:
 262             i = j
 263             continue
 264         alignment = get_value(document.body, "alignment", k)
 265         if alignment == "document":
 266             alignment = galignment
 267         del document.body[k]
 268         l = find_token(document.body, "\\begin_layout Plain Layout", i, j)
 269         if l == -1:
 270             document.warning("Can't find float layout!")
 271             i = j
 272             continue
 273         alcmd = []
 274         if alignment == "left":
 275             alcmd = put_cmd_in_ert("\\raggedright{}")
 276         elif alignment == "center":
 277             alcmd = put_cmd_in_ert("\\centering{}")
 278         elif alignment == "right":
 279             alcmd = put_cmd_in_ert("\\raggedleft{}")
 280         if len(alcmd) > 0:
 281             document.body[l+1:l+1] = alcmd
 282         i = j
 283
 284
 285 def revert_tuftecite(document):
 286     " Revert \cite commands in tufte classes "
 287
 288     tufte = ["tufte-book", "tufte-handout"]
 289     if document.textclass not in tufte:
 290         return
 291
 292     i = 0
 293     while (True):
 294         i = find_token(document.body, "\\begin_inset CommandInset citation", i)
 295         if i == -1:
 296             break
 297         j = find_end_of_inset(document.body, i)
 298         if j == -1:
 299             document.warning("Can't find end of citation inset at line %d!!" %(i))
 300             i += 1
 301             continue
 302         k = find_token(document.body, "LatexCommand", i, j)
 303         if k == -1:
 304             document.warning("Can't find LatexCommand for citation inset at line %d!" %(i))
 305             i = j + 1
 306             continue
 307         cmd = get_value(document.body, "LatexCommand", k)
 308         if cmd != "cite":
 309             i = j + 1
 310             continue
 311         pre = get_quoted_value(document.body, "before", i, j)
 312         post = get_quoted_value(document.body, "after", i, j)
 313         key = get_quoted_value(document.body, "key", i, j)
 314         if not key:
 315             document.warning("Citation inset at line %d does not have a key!" %(i))
 316             key = "???"
 317         # Replace command with ERT
 318         res = "\\cite"
 319         if pre:
 320             res += "[" + pre + "]"
 321         if post:
 322             res += "[" + post + "]"
 323         elif pre:
 324             res += "[]"
 325         res += "{" + key + "}"
 326         document.body[i:j+1] = put_cmd_in_ert([res])
 327         i = j + 1
 328
 329
 330 def revert_stretchcolumn(document):
 331     " We remove the column varwidth flags or everything else will become a mess. "
 332     i = 0
 333     while True:
 334         i = find_token(document.body, "\\begin_inset Tabular", i)
 335         if i == -1:
 336             return
 337         j = find_end_of_inset(document.body, i + 1)
 338         if j == -1:
 339             document.warning("Malformed LyX document: Could not find end of tabular.")
 340             continue
 341         for k in range(i, j):
 342             if re.search('^<column.*varwidth="[^"]+".*>$', document.body[k]):
 343                 document.warning("Converting 'tabularx'/'xltabular' table to normal table.")
 344                 document.body[k] = document.body[k].replace(' varwidth="true"', '')
 345         i = i + 1
 346
 347
 348 def revert_vcolumns(document):
 349     " Revert standard columns with line breaks etc. "
 350     i = 0
 351     needvarwidth = False
 352     needarray = False
 353     try:
 354         while True:
 355             i = find_token(document.body, "\\begin_inset Tabular", i)
 356             if i == -1:
 357                 return
 358             j = find_end_of_inset(document.body, i)
 359             if j == -1:
 360                 document.warning("Malformed LyX document: Could not find end of tabular.")
 361                 i += 1
 362                 continue
 363
 364             # Collect necessary column information
 365             m = i + 1
 366             nrows = int(document.body[i+1].split('"')[3])
 367             ncols = int(document.body[i+1].split('"')[5])
 368             col_info = []
 369             for k in range(ncols):
 370                 m = find_token(document.body, "<column", m)
 371                 width = get_option_value(document.body[m], 'width')
 372                 varwidth = get_option_value(document.body[m], 'varwidth')
 373                 alignment = get_option_value(document.body[m], 'alignment')
 374                 special = get_option_value(document.body[m], 'special')
 375                 col_info.append([width, varwidth, alignment, special, m])
 376
 377             # Now parse cells
 378             m = i + 1
 379             lines = []
 380             for row in range(nrows):
 381                 for col in range(ncols):
 382                     m = find_token(document.body, "<cell", m)
 383                     multicolumn = get_option_value(document.body[m], 'multicolumn')
 384                     multirow = get_option_value(document.body[m], 'multirow')
 385                     width = get_option_value(document.body[m], 'width')
 386                     rotate = get_option_value(document.body[m], 'rotate')
 387                     # Check for: linebreaks, multipars, non-standard environments
 388                     begcell = m
 389                     endcell = find_token(document.body, "</cell>", begcell)
 390                     vcand = False
 391                     if find_token(document.body, "\\begin_inset Newline", begcell, endcell) != -1:
 392                         vcand = True
 393                     elif count_pars_in_inset(document.body, begcell + 2) > 1:
 394                         vcand = True
 395                     elif get_value(document.body, "\\begin_layout", begcell) != "Plain Layout":
 396                         vcand = True
 397                     if vcand and rotate == "" and ((multicolumn == "" and multirow == "") or width == ""):
 398                         if col_info[col][0] == "" and col_info[col][1] == "" and col_info[col][3] == "":
 399                             needvarwidth = True
 400                             alignment = col_info[col][2]
 401                             col_line = col_info[col][4]
 402                             vval = ""
 403                             if alignment == "center":
 404                                 vval = ">{\\centering}"
 405                             elif  alignment == "left":
 406                                 vval = ">{\\raggedright}"
 407                             elif alignment == "right":
 408                                 vval = ">{\\raggedleft}"
 409                             if vval != "":
 410                                 needarray = True
 411                             vval += "V{\\linewidth}"
 412
 413                             document.body[col_line] = document.body[col_line][:-1] + " special=\"" + vval + "\">"
 414                             # ERT newlines and linebreaks (since LyX < 2.4 automatically inserts parboxes
 415                             # with newlines, and we do not want that)
 416                             while True:
 417                                 endcell = find_token(document.body, "</cell>", begcell)
 418                                 linebreak = False
 419                                 nl = find_token(document.body, "\\begin_inset Newline newline", begcell, endcell)
 420                                 if nl == -1:
 421                                     nl = find_token(document.body, "\\begin_inset Newline linebreak", begcell, endcell)
 422                                     if nl == -1:
 423                                          break
 424                                     linebreak = True
 425                                 nle = find_end_of_inset(document.body, nl)
 426                                 del(document.body[nle:nle+1])
 427                                 if linebreak:
 428                                     document.body[nl:nl+1] = put_cmd_in_ert("\\linebreak{}")
 429                                 else:
 430                                     document.body[nl:nl+1] = put_cmd_in_ert("\\\\")
 431                     m += 1
 432
 433             i = j + 1
 434
 435     finally:
 436         if needarray == True:
 437             add_to_preamble(document, ["\\usepackage{array}"])
 438         if needvarwidth == True:
 439             add_to_preamble(document, ["\\usepackage{varwidth}"])
 440
 441
 442 def revert_bibencoding(document):
 443     " Revert bibliography encoding "
 444
 445     # Get cite engine
 446     engine = "basic"
 447     i = find_token(document.header, "\\cite_engine", 0)
 448     if i == -1:
 449         document.warning("Malformed document! Missing \\cite_engine")
 450     else:
 451         engine = get_value(document.header, "\\cite_engine", i)
 452
 453     # Check if biblatex
 454     biblatex = False
 455     if engine in ["biblatex", "biblatex-natbib"]:
 456         biblatex = True
 457
 458     # Map lyx to latex encoding names
 459     encodings = {
 460         "utf8" : "utf8",
 461         "utf8x" : "utf8x",
 462         "armscii8" : "armscii8",
 463         "iso8859-1" : "latin1",
 464         "iso8859-2" : "latin2",
 465         "iso8859-3" : "latin3",
 466         "iso8859-4" : "latin4",
 467         "iso8859-5" : "iso88595",
 468         "iso8859-6" : "8859-6",
 469         "iso8859-7" : "iso-8859-7",
 470         "iso8859-8" : "8859-8",
 471         "iso8859-9" : "latin5",
 472         "iso8859-13" : "latin7",
 473         "iso8859-15" : "latin9",
 474         "iso8859-16" : "latin10",
 475         "applemac" : "applemac",
 476         "cp437" : "cp437",
 477         "cp437de" : "cp437de",
 478         "cp850" : "cp850",
 479         "cp852" : "cp852",
 480         "cp855" : "cp855",
 481         "cp858" : "cp858",
 482         "cp862" : "cp862",
 483         "cp865" : "cp865",
 484         "cp866" : "cp866",
 485         "cp1250" : "cp1250",
 486         "cp1251" : "cp1251",
 487         "cp1252" : "cp1252",
 488         "cp1255" : "cp1255",
 489         "cp1256" : "cp1256",
 490         "cp1257" : "cp1257",
 491         "koi8-r" : "koi8-r",
 492         "koi8-u" : "koi8-u",
 493         "pt154" : "pt154",
 494         "utf8-platex" : "utf8",
 495         "ascii" : "ascii"
 496     }
 497
 498     i = 0
 499     bibresources = []
 500     while (True):
 501         i = find_token(document.body, "\\begin_inset CommandInset bibtex", i)
 502         if i == -1:
 503             break
 504         j = find_end_of_inset(document.body, i)
 505         if j == -1:
 506             document.warning("Can't find end of bibtex inset at line %d!!" %(i))
 507             i += 1
 508             continue
 509         encoding = get_quoted_value(document.body, "encoding", i, j)
 510         if not encoding:
 511             i += 1
 512             continue
 513         # remove encoding line
 514         k = find_token(document.body, "encoding", i, j)
 515         if k != -1:
 516             del document.body[k]
 517         # Re-find inset end line
 518         j = find_end_of_inset(document.body, i)
 519         if biblatex:
 520             biblio_options = ""
 521             h = find_token(document.header, "\\biblio_options", 0)
 522             if h != -1:
 523                 biblio_options = get_value(document.header, "\\biblio_options", h)
 524                 if not "bibencoding" in biblio_options:
 525                      document.header[h] += ",bibencoding=%s" % encodings[encoding]
 526             else:
 527                 bs = find_token(document.header, "\\biblatex_bibstyle", 0)
 528                 if bs == -1:
 529                     # this should not happen
 530                     document.warning("Malformed LyX document! No \\biblatex_bibstyle header found!")
 531                 else:
 532                     document.header[bs-1 : bs-1] = ["\\biblio_options bibencoding=" + encodings[encoding]]
 533         else:
 534             document.body[j+1:j+1] = put_cmd_in_ert("\\egroup")
 535             document.body[i:i] = put_cmd_in_ert("\\bgroup\\inputencoding{" + encodings[encoding] + "}")
 536
 537         i = j + 1
 538
 539
 540 ##
 541 # Conversion hub
 542 #
 543
 544 supported_versions = ["2.4.0", "2.4"]
 545 convert = [
 546            [545, [convert_lst_literalparam]],
 547            [546, []],
 548            [547, []],
 549            [548, []],
 550            [549, []],
 551            [550, [convert_fontenc]],
 552            [551, []],
 553            [552, []],
 554            [553, []],
 555            [554, []],
 556            [555, []],
 557            [556, []]
 558           ]
 559
 560 revert =  [
 561            [555, [revert_bibencoding]],
 562            [554, [revert_vcolumns]],
 563            [553, [revert_stretchcolumn]],
 564            [552, [revert_tuftecite]],
 565            [551, [revert_floatpclass, revert_floatalignment]],
 566            [550, [revert_nospellcheck]],
 567            [549, [revert_fontenc]],
 568            [548, []],# dummy format change
 569            [547, [revert_lscape]],
 570            [546, [revert_xcharter]],
 571            [545, [revert_paratype]],
 572            [544, [revert_lst_literalparam]]
 573           ]
 574
 575
 576 if __name__ == "__main__":
 577     pass