lib/lyx2lyx/lyx_2_4.py

   1 # -*- coding: utf-8 -*-
   2 # This file is part of lyx2lyx
   3 # Copyright (C) 2018 The LyX team
   4 #
   5 # This program is free software; you can redistribute it and/or
   6 # modify it under the terms of the GNU General Public License
   7 # as published by the Free Software Foundation; either version 2
   8 # of the License, or (at your option) any later version.
   9 #
  10 # This program is distributed in the hope that it will be useful,
  11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 # GNU General Public License for more details.
  14 #
  15 # You should have received a copy of the GNU General Public License
  16 # along with this program; if not, write to the Free Software
  17 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  18
  19 """ Convert files to the file format generated by lyx 2.4"""
  20
  21 import re, string
  22 import unicodedata
  23 import sys, os
  24
  25 from datetime import (datetime, date, time)
  26
  27 # Uncomment only what you need to import, please.
  28
  29 from parser_tools import (count_pars_in_inset, find_end_of_inset, find_end_of_layout,
  30                           find_token, find_re, get_bool_value, get_containing_layout,
  31                           get_option_value, get_value, get_quoted_value)
  32 #    del_token, del_value, del_complete_lines,
  33 #    find_complete_lines, find_end_of,
  34 #    find_re, find_substring, find_token_backwards,
  35 #    get_containing_inset,
  36 #    is_in_inset, set_bool_value
  37 #    find_tokens, find_token_exact, check_token
  38
  39 from lyx2lyx_tools import (put_cmd_in_ert, add_to_preamble)
  40 #  revert_font_attrs, insert_to_preamble, latex_length
  41 #  get_ert, lyx2latex, lyx2verbatim, length_in_bp, convert_info_insets
  42 #  revert_flex_inset, hex2ratio, str2bool
  43
  44 ####################################################################
  45 # Private helper functions
  46
  47 def add_preamble_fonts(document, fontmap):
  48     " Add collected font-packages with their option to user-preamble"
  49
  50     for pkg in fontmap:
  51         if len(fontmap[pkg]) > 0:
  52             xoption = "[" + ",".join(fontmap[pkg]) + "]"
  53         else:
  54             xoption = ""
  55         preamble = "\\usepackage" + xoption + "{%s}" % pkg
  56         add_to_preamble(document, [preamble])
  57
  58
  59 def convert_fonts(document, font_list, font_type, scale_type, pkg):
  60     " Handle font definition to LaTeX "
  61
  62     def createkey(pkg, options):
  63         sort(options)
  64         return pkg + ':' + "-".join(options)
  65
  66     def getfontname(pkg, options, pkg2fontmap, font2pkgmap):
  67         ""
  68         options.sort()
  69         pkgkey = createkey(pkg, options)
  70
  71         if not pkgkey in pkg2fontmap:
  72             return None
  73         fontname = pkg2fontmap[pkgkey]
  74         if not fontname in font2pkgmap:
  75             document.warning("Something is wrong in pkgname+options <-> fontname conversion")
  76             return None
  77
  78         pkgkey2 = createkey(font2pkgmap[fontname].package, font2pkgmap[fontname].options)
  79         if pkgkey == pkgkey2:
  80             return fontname
  81         return None
  82
  83     # We need a mapping pkg+options => font_name
  84     # and font_name => pkg+options
  85     class fontinfo:
  86         package = None
  87         options = []
  88
  89     font2pkgmap = dict()
  90     pkg2fontmap = dict()
  91     pkgmap = dict()
  92     for fl in font_list:
  93         fe = fontinfo()
  94         flt = fl.split(",")
  95         font_name = flt[0]
  96         fe.options = flt[1:]
  97         if pkg == None:
  98             fe.package = font_name
  99         else:
 100             fe.package = pkg
 101         font2pkgmap[font_name] = fe
 102         pkgkey = createkey(fe.package, fe.options)
 103         if pkgkey in pkg2fontmap:
 104             # Repeated the same entry? Check content
 105             if pkg2fontmap[pkgkey] != font_name:
 106                 print "ERROR:"
 107         pkg2fontmap[pkgkey] = font_name
 108         pkgmap[fe.package] = 1
 109
 110     rpkg = re.compile(r'^\\usepackage(\[([^\]]*)\])?\{([^\}]+)\}')
 111     rscaleopt = re.compile(r'^scaled?=(.*)')
 112     ft = font_type
 113     if scale_type == None:
 114         fontscale = None
 115     else:
 116         fontscale = "\\font_" + scale_type + "_scale"
 117     i = 0
 118     while i < len(document.preamble):
 119         i = find_re(document.preamble, rpkg, i)
 120         if i == -1:
 121             return
 122         mo = rpkg.search(document.preamble[i])
 123         if mo == None or mo.group(2) == None:
 124             options = []
 125         else:
 126             options = mo.group(2).replace(' ', '').split(",")
 127         pkg = mo.group(3)
 128         o = 0
 129         oscale = 1
 130         while o < len(options):
 131             mo = rscaleopt.search(options[o])
 132             if mo == None:
 133                 o += 1
 134                 continue
 135             oscale = mo.group(1)
 136             del options[o]
 137             break
 138
 139         if not pkg in pkgmap:
 140             i += 1
 141             continue
 142         # determine fontname
 143         fn = getfontname(pkg, options, pkg2fontmap, font2pkgmap)
 144         if fn == None:
 145             i += 1
 146             continue
 147         del document.preamble[i]
 148         if i > 0 and document.preamble[i-1] == "% Added by lyx2lyx":
 149             del document.preamble[i-1]
 150         if fontscale != None:
 151             j = find_token(document.header, fontscale, 0)
 152             if j != -1:
 153                 val = get_value(document.header, fontscale, j)
 154                 vals = val.split()
 155                 scale = "100"
 156                 if oscale != None:
 157                     scale = "%03d" % int(float(oscale) * 100)
 158                 document.header[j] = fontscale + " " + scale + " " + vals[1]
 159         j = find_token(document.header, ft, 0)
 160         if j != -1:
 161             val = get_value(document.header, ft, j)
 162             vals = val.split()
 163             document.header[j] = ft + ' "' + fn + '" ' + vals[1]
 164
 165 def revert_fonts(document, font_list, fontmap, package=None):
 166     " Revert native font definition to LaTeX "
 167     # fonlist := list of fonts created from the same package
 168     # Empty package means that the font-name is the same as the package-name
 169     # fontmap (key = package, val += found options) will be filled
 170     # and used later in add_preamble_fonts() to be added to user-preamble
 171
 172     if find_token(document.header, "\\use_non_tex_fonts false", 0) != -1:
 173         font_types = ["\\font_roman", "\\font_sans,sf", "\\font_typewriter,tt", "\\font_math,math"]
 174         optmap = dict()
 175         for fontl1 in font_list:
 176             fontl = fontl1.split(",")
 177             font = fontl[0]
 178             optmap[font] = fontl[1:]
 179         for ft1 in font_types:
 180             fts = ft1.split(",")
 181             ft = fts[0]
 182             i = find_token(document.header, ft, 0)
 183             if i == -1:
 184                 continue
 185             val = get_value(document.header, ft, i)
 186             words = val.split()
 187             font = words[0].replace('"', '')
 188             if not font in optmap:
 189                 continue
 190             if package == None:
 191                 val = font;
 192             else:
 193                 val = package
 194             if not val in fontmap:
 195                 fontmap[val] = []
 196             document.header[i] = ft + ' "default" ' + words[1]
 197             if len(fts) > 1:
 198                 xval =  get_value(document.header, "\\font_" + fts[1] + "_scale", 0)
 199                 if xval != '':
 200                     # cutoff " 100"
 201                     xval = xval[:-4]
 202                     if xval != "100":
 203                         fontmap[val].extend(["scale=" + format(float(xval) / 100, '.2f')])
 204             if len(optmap[font]) > 0:
 205                 fontmap[val].extend(optmap[font])
 206
 207 ###############################################################################
 208 ###
 209 ### Conversion and reversion routines
 210 ###
 211 ###############################################################################
 212
 213 def convert_ibmplex(document):
 214     " Handle IBM Plex font definition to LaTeX "
 215
 216     ibmplex_fonts_roman = ['IBMPlexSerif', 'IBMPlexSerifThin,thin',
 217                            'IBMPlexSerifExtraLight,extralight', 'IBMPlexSerifLight,light',
 218                            'IBMPlexSerifSemibold,semibold']
 219     ibmplex_fonts_sans = ['IBMPlexSans','IBMPlexSansCondensed,condensed',
 220                           'IBMPlexSansThin,thin', 'IBMPlexSansExtraLight,extralight',
 221                           'IBMPlexSansLight,light', 'IBMPlexSansSemibold,semibold']
 222     ibmplex_fonts_typewriter = ['IBMPlexMono', 'IBMPlexMonoThin,thin',
 223                                 'IBMPlexMonoExtraLight,extralight', 'IBMPlexMonoLight,light',
 224                                 'IBMPlexMonoSemibold,semibold']
 225
 226     convert_fonts(document, ibmplex_fonts_roman, "\\font_roman", None, "plex-serif")
 227     convert_fonts(document, ibmplex_fonts_sans, "\\font_sans", "sf", "plex-sans")
 228     convert_fonts(document, ibmplex_fonts_typewriter, "\\font_typewriter", "tt", "plex-mono")
 229
 230 def revert_ibmplex(document):
 231     " Revert native IBM Plex font definition to LaTeX "
 232
 233     fontmap = dict()
 234     revert_fonts(document, ['IBMPlexSerif', 'IBMPlexSerifThin,thin',
 235                             'IBMPlexSerifExtraLight,extralight',
 236                             'IBMPlexSerifLight,light', 'IBMPlexSerifSemibold,semibold'],
 237                  fontmap, "plex-serif")
 238     revert_fonts(document, ['IBMPlexSans','IBMPlexSansCondensed,condensed',
 239                             'IBMPlexSansThin,thin', 'IBMPlexSansExtraLight,extralight',
 240                             'IBMPlexSansLight,light', 'IBMPlexSansSemibold,semibold'],
 241                  fontmap, "plex-sans")
 242     revert_fonts(document, ['IBMPlexMono', 'IBMPlexMonoThin,thin',
 243                             'IBMPlexMonoExtraLight,extralight', 'IBMPlexMonoLight,light',
 244                             'IBMPlexMonoSemibold,semibold'],
 245                  fontmap, "plex-mono")
 246     add_preamble_fonts(document, fontmap)
 247
 248 def convert_dejavu(document):
 249     " Handle DejaVu font definition to LaTeX "
 250
 251     dejavu_fonts_roman = ['DejaVuSerif', 'DejaVuSerifCondensed']
 252     dejavu_fonts_sans = ['DejaVuSans','DejaVuSansCondensed']
 253     dejavu_fonts_typewriter = ['DejaVuSansMono']
 254
 255     convert_fonts(document, dejavu_fonts_roman, "\\font_roman", None, None)
 256     convert_fonts(document, dejavu_fonts_sans, "\\font_sans", "sf", None)
 257     convert_fonts(document, dejavu_fonts_typewriter, "\\font_typewriter", "tt", None)
 258
 259 def revert_dejavu(document):
 260     " Revert native DejaVu font definition to LaTeX "
 261
 262     dejavu_fonts = ['DejaVuSerif', 'DejaVuSerifCondensed', 'DejaVuSans',
 263                     'DejaVuSansMono', 'DejaVuSansCondensed']
 264     fontmap = dict()
 265     revert_fonts(document, dejavu_fonts, fontmap)
 266     add_preamble_fonts(document, fontmap)
 267
 268 def removeFrontMatterStyles(document):
 269     " Remove styles Begin/EndFrontmatter"
 270
 271     layouts = ['BeginFrontmatter', 'EndFrontmatter']
 272     for layout in layouts:
 273         i = 0
 274         while True:
 275             i = find_token(document.body, '\\begin_layout ' + layout, i)
 276             if i == -1:
 277                 break
 278             j = find_end_of_layout(document.body, i)
 279             if j == -1:
 280                 document.warning("Malformed LyX document: Can't find end of layout at line %d" % i)
 281                 i += 1
 282                 continue
 283             while i > 0 and document.body[i-1].strip() == '':
 284                 i -= 1
 285             while document.body[j+1].strip() == '':
 286                 j = j + 1
 287             document.body[i:j+1] = ['']
 288
 289 def addFrontMatterStyles(document):
 290     " Use styles Begin/EndFrontmatter for elsarticle"
 291
 292     def insertFrontmatter(prefix, line):
 293         above = line
 294         while above > 0 and document.body[above-1].strip() == '':
 295             above -= 1
 296         below = line
 297         while document.body[below].strip() == '':
 298             below += 1
 299         document.body[above:below] = ['', '\\begin_layout ' + prefix + 'Frontmatter',
 300                                     '\\begin_inset Note Note',
 301                                     'status open', '',
 302                                     '\\begin_layout Plain Layout',
 303                                     'Keep this empty!',
 304                                     '\\end_layout', '',
 305                                     '\\end_inset', '', '',
 306                                     '\\end_layout', '']
 307
 308     if document.textclass == "elsarticle":
 309         layouts = ['Title', 'Title footnote', 'Author', 'Author footnote',
 310                    'Corresponding author', 'Address', 'Email', 'Abstract', 'Keywords']
 311         first = -1
 312         last = -1
 313         for layout in layouts:
 314             i = 0
 315             while True:
 316                 i = find_token(document.body, '\\begin_layout ' + layout, i)
 317                 if i == -1:
 318                     break
 319                 k = find_end_of_layout(document.body, i)
 320                 if k == -1:
 321                     document.warning("Malformed LyX document: Can't find end of layout at line %d" % i)
 322                     i += 1;
 323                     continue
 324                 if first == -1 or i < first:
 325                     first = i
 326                 if last == -1 or last <= k:
 327                     last = k+1
 328                 i = k+1
 329         if first == -1:
 330             return
 331         insertFrontmatter('End', last)
 332         insertFrontmatter('Begin', first)
 333
 334 def convert_lst_literalparam(document):
 335     " Add param literal to include inset "
 336
 337     i = 0
 338     while True:
 339         i = find_token(document.body, '\\begin_inset CommandInset include', i)
 340         if i == -1:
 341             break
 342         j = find_end_of_inset(document.body, i)
 343         if j == -1:
 344             document.warning("Malformed LyX document: Can't find end of command inset at line %d" % i)
 345             i += 1
 346             continue
 347         while i < j and document.body[i].strip() != '':
 348             i += 1
 349         document.body.insert(i, "literal \"true\"")
 350
 351
 352 def revert_lst_literalparam(document):
 353     " Remove param literal from include inset "
 354
 355     i = 0
 356     while True:
 357         i = find_token(document.body, '\\begin_inset CommandInset include', i)
 358         if i == -1:
 359             break
 360         j = find_end_of_inset(document.body, i)
 361         if j == -1:
 362             document.warning("Malformed LyX document: Can't find end of include inset at line %d" % i)
 363             i += 1
 364             continue
 365         k = find_token(document.body, 'literal', i, j)
 366         if k == -1:
 367             i += 1
 368             continue
 369         del document.body[k]
 370
 371
 372 def revert_paratype(document):
 373     " Revert ParaType font definitions to LaTeX "
 374
 375     if find_token(document.header, "\\use_non_tex_fonts false", 0) != -1:
 376         preamble = ""
 377         i1 = find_token(document.header, "\\font_roman \"PTSerif-TLF\"", 0)
 378         i2 = find_token(document.header, "\\font_sans \"default\"", 0)
 379         i3 = find_token(document.header, "\\font_typewriter \"default\"", 0)
 380         j = find_token(document.header, "\\font_sans \"PTSans-TLF\"", 0)
 381         sfval = get_value(document.header, "\\font_sf_scale", 0)
 382         # cutoff " 100"
 383         sfval = sfval[:-4]
 384         sfoption = ""
 385         if sfval != "100":
 386             sfoption = "scaled=" + format(float(sfval) / 100, '.2f')
 387         k = find_token(document.header, "\\font_typewriter \"PTMono-TLF\"", 0)
 388         ttval = get_value(document.header, "\\font_tt_scale", 0)
 389         # cutoff " 100"
 390         ttval = ttval[:-4]
 391         ttoption = ""
 392         if ttval != "100":
 393             ttoption = "scaled=" + format(float(ttval) / 100, '.2f')
 394         if i1 != -1 and i2 != -1 and i3!= -1:
 395             add_to_preamble(document, ["\\usepackage{paratype}"])
 396         else:
 397             if i1!= -1:
 398                 add_to_preamble(document, ["\\usepackage{PTSerif}"])
 399                 document.header[i1] = document.header[i1].replace("PTSerif-TLF", "default")
 400             if j!= -1:
 401                 if sfoption != "":
 402                     add_to_preamble(document, ["\\usepackage[" + sfoption + "]{PTSans}"])
 403                 else:
 404                     add_to_preamble(document, ["\\usepackage{PTSans}"])
 405                 document.header[j] = document.header[j].replace("PTSans-TLF", "default")
 406             if k!= -1:
 407                 if ttoption != "":
 408                     add_to_preamble(document, ["\\usepackage[" + ttoption + "]{PTMono}"])
 409                 else:
 410                     add_to_preamble(document, ["\\usepackage{PTMono}"])
 411                 document.header[k] = document.header[k].replace("PTMono-TLF", "default")
 412
 413
 414 def revert_xcharter(document):
 415     " Revert XCharter font definitions to LaTeX "
 416
 417     i = find_token(document.header, "\\font_roman \"xcharter\"", 0)
 418     if i == -1:
 419         return
 420
 421     # replace unsupported font setting
 422     document.header[i] = document.header[i].replace("xcharter", "default")
 423     # no need for preamble code with system fonts
 424     if get_bool_value(document.header, "\\use_non_tex_fonts"):
 425         return
 426
 427     # transfer old style figures setting to package options
 428     j = find_token(document.header, "\\font_osf true")
 429     if j != -1:
 430         options = "[osf]"
 431         document.header[j] = "\\font_osf false"
 432     else:
 433         options = ""
 434     if i != -1:
 435         add_to_preamble(document, ["\\usepackage%s{XCharter}"%options])
 436
 437
 438 def revert_lscape(document):
 439     " Reverts the landscape environment (Landscape module) to TeX-code "
 440
 441     if not "landscape" in document.get_module_list():
 442         return
 443
 444     i = 0
 445     while True:
 446         i = find_token(document.body, "\\begin_inset Flex Landscape", i)
 447         if i == -1:
 448             return
 449         j = find_end_of_inset(document.body, i)
 450         if j == -1:
 451             document.warning("Malformed LyX document: Can't find end of Landscape inset")
 452             i += 1
 453             continue
 454
 455         if document.body[i] == "\\begin_inset Flex Landscape (Floating)":
 456             document.body[j - 2 : j + 1] = put_cmd_in_ert("\\end{landscape}}")
 457             document.body[i : i + 4] = put_cmd_in_ert("\\afterpage{\\begin{landscape}")
 458             add_to_preamble(document, ["\\usepackage{afterpage}"])
 459         else:
 460             document.body[j - 2 : j + 1] = put_cmd_in_ert("\\end{landscape}")
 461             document.body[i : i + 4] = put_cmd_in_ert("\\begin{landscape}")
 462
 463         add_to_preamble(document, ["\\usepackage{pdflscape}"])
 464         # no need to reset i
 465
 466
 467 def convert_fontenc(document):
 468     " Convert default fontenc setting "
 469
 470     i = find_token(document.header, "\\fontencoding global", 0)
 471     if i == -1:
 472         return
 473
 474     document.header[i] = document.header[i].replace("global", "auto")
 475
 476
 477 def revert_fontenc(document):
 478     " Revert default fontenc setting "
 479
 480     i = find_token(document.header, "\\fontencoding auto", 0)
 481     if i == -1:
 482         return
 483
 484     document.header[i] = document.header[i].replace("auto", "global")
 485
 486
 487 def revert_nospellcheck(document):
 488     " Remove nospellcheck font info param "
 489
 490     i = 0
 491     while True:
 492         i = find_token(document.body, '\\nospellcheck', i)
 493         if i == -1:
 494             return
 495         del document.body[i]
 496
 497
 498 def revert_floatpclass(document):
 499     " Remove float placement params 'document' and 'class' "
 500
 501     i = 0
 502     i = find_token(document.header, "\\float_placement class", 0)
 503     if i != -1:
 504         del document.header[i]
 505
 506     i = 0
 507     while True:
 508         i = find_token(document.body, '\\begin_inset Float', i)
 509         if i == -1:
 510             break
 511         j = find_end_of_inset(document.body, i)
 512         k = find_token(document.body, 'placement class', i, i + 2)
 513         if k == -1:
 514             k = find_token(document.body, 'placement document', i, i + 2)
 515             if k != -1:
 516                 del document.body[k]
 517             i = j
 518             continue
 519         del document.body[k]
 520
 521
 522 def revert_floatalignment(document):
 523     " Remove float alignment params "
 524
 525     i = 0
 526     i = find_token(document.header, "\\float_alignment", 0)
 527     galignment = ""
 528     if i != -1:
 529         galignment = get_value(document.header, "\\float_alignment", i)
 530         del document.header[i]
 531
 532     i = 0
 533     while True:
 534         i = find_token(document.body, '\\begin_inset Float', i)
 535         if i == -1:
 536             break
 537         j = find_end_of_inset(document.body, i)
 538         if j == -1:
 539             document.warning("Malformed LyX document: Can't find end of inset at line " + str(i))
 540             i += 1
 541         k = find_token(document.body, 'alignment', i, i + 4)
 542         if k == -1:
 543             i = j
 544             continue
 545         alignment = get_value(document.body, "alignment", k)
 546         if alignment == "document":
 547             alignment = galignment
 548         del document.body[k]
 549         l = find_token(document.body, "\\begin_layout Plain Layout", i, j)
 550         if l == -1:
 551             document.warning("Can't find float layout!")
 552             i = j
 553             continue
 554         alcmd = []
 555         if alignment == "left":
 556             alcmd = put_cmd_in_ert("\\raggedright{}")
 557         elif alignment == "center":
 558             alcmd = put_cmd_in_ert("\\centering{}")
 559         elif alignment == "right":
 560             alcmd = put_cmd_in_ert("\\raggedleft{}")
 561         if len(alcmd) > 0:
 562             document.body[l+1:l+1] = alcmd
 563         i = j
 564
 565
 566 def revert_tuftecite(document):
 567     " Revert \cite commands in tufte classes "
 568
 569     tufte = ["tufte-book", "tufte-handout"]
 570     if document.textclass not in tufte:
 571         return
 572
 573     i = 0
 574     while (True):
 575         i = find_token(document.body, "\\begin_inset CommandInset citation", i)
 576         if i == -1:
 577             break
 578         j = find_end_of_inset(document.body, i)
 579         if j == -1:
 580             document.warning("Can't find end of citation inset at line %d!!" %(i))
 581             i += 1
 582             continue
 583         k = find_token(document.body, "LatexCommand", i, j)
 584         if k == -1:
 585             document.warning("Can't find LatexCommand for citation inset at line %d!" %(i))
 586             i = j + 1
 587             continue
 588         cmd = get_value(document.body, "LatexCommand", k)
 589         if cmd != "cite":
 590             i = j + 1
 591             continue
 592         pre = get_quoted_value(document.body, "before", i, j)
 593         post = get_quoted_value(document.body, "after", i, j)
 594         key = get_quoted_value(document.body, "key", i, j)
 595         if not key:
 596             document.warning("Citation inset at line %d does not have a key!" %(i))
 597             key = "???"
 598         # Replace command with ERT
 599         res = "\\cite"
 600         if pre:
 601             res += "[" + pre + "]"
 602         if post:
 603             res += "[" + post + "]"
 604         elif pre:
 605             res += "[]"
 606         res += "{" + key + "}"
 607         document.body[i:j+1] = put_cmd_in_ert([res])
 608         i = j + 1
 609
 610
 611 def revert_stretchcolumn(document):
 612     " We remove the column varwidth flags or everything else will become a mess. "
 613     i = 0
 614     while True:
 615         i = find_token(document.body, "\\begin_inset Tabular", i)
 616         if i == -1:
 617             return
 618         j = find_end_of_inset(document.body, i + 1)
 619         if j == -1:
 620             document.warning("Malformed LyX document: Could not find end of tabular.")
 621             continue
 622         for k in range(i, j):
 623             if re.search('^<column.*varwidth="[^"]+".*>$', document.body[k]):
 624                 document.warning("Converting 'tabularx'/'xltabular' table to normal table.")
 625                 document.body[k] = document.body[k].replace(' varwidth="true"', '')
 626         i = i + 1
 627
 628
 629 def revert_vcolumns(document):
 630     " Revert standard columns with line breaks etc. "
 631     i = 0
 632     needvarwidth = False
 633     needarray = False
 634     try:
 635         while True:
 636             i = find_token(document.body, "\\begin_inset Tabular", i)
 637             if i == -1:
 638                 return
 639             j = find_end_of_inset(document.body, i)
 640             if j == -1:
 641                 document.warning("Malformed LyX document: Could not find end of tabular.")
 642                 i += 1
 643                 continue
 644
 645             # Collect necessary column information
 646             m = i + 1
 647             nrows = int(document.body[i+1].split('"')[3])
 648             ncols = int(document.body[i+1].split('"')[5])
 649             col_info = []
 650             for k in range(ncols):
 651                 m = find_token(document.body, "<column", m)
 652                 width = get_option_value(document.body[m], 'width')
 653                 varwidth = get_option_value(document.body[m], 'varwidth')
 654                 alignment = get_option_value(document.body[m], 'alignment')
 655                 special = get_option_value(document.body[m], 'special')
 656                 col_info.append([width, varwidth, alignment, special, m])
 657
 658             # Now parse cells
 659             m = i + 1
 660             lines = []
 661             for row in range(nrows):
 662                 for col in range(ncols):
 663                     m = find_token(document.body, "<cell", m)
 664                     multicolumn = get_option_value(document.body[m], 'multicolumn')
 665                     multirow = get_option_value(document.body[m], 'multirow')
 666                     width = get_option_value(document.body[m], 'width')
 667                     rotate = get_option_value(document.body[m], 'rotate')
 668                     # Check for: linebreaks, multipars, non-standard environments
 669                     begcell = m
 670                     endcell = find_token(document.body, "</cell>", begcell)
 671                     vcand = False
 672                     if find_token(document.body, "\\begin_inset Newline", begcell, endcell) != -1:
 673                         vcand = True
 674                     elif count_pars_in_inset(document.body, begcell + 2) > 1:
 675                         vcand = True
 676                     elif get_value(document.body, "\\begin_layout", begcell) != "Plain Layout":
 677                         vcand = True
 678                     if vcand and rotate == "" and ((multicolumn == "" and multirow == "") or width == ""):
 679                         if col_info[col][0] == "" and col_info[col][1] == "" and col_info[col][3] == "":
 680                             needvarwidth = True
 681                             alignment = col_info[col][2]
 682                             col_line = col_info[col][4]
 683                             vval = ""
 684                             if alignment == "center":
 685                                 vval = ">{\\centering}"
 686                             elif  alignment == "left":
 687                                 vval = ">{\\raggedright}"
 688                             elif alignment == "right":
 689                                 vval = ">{\\raggedleft}"
 690                             if vval != "":
 691                                 needarray = True
 692                             vval += "V{\\linewidth}"
 693
 694                             document.body[col_line] = document.body[col_line][:-1] + " special=\"" + vval + "\">"
 695                             # ERT newlines and linebreaks (since LyX < 2.4 automatically inserts parboxes
 696                             # with newlines, and we do not want that)
 697                             while True:
 698                                 endcell = find_token(document.body, "</cell>", begcell)
 699                                 linebreak = False
 700                                 nl = find_token(document.body, "\\begin_inset Newline newline", begcell, endcell)
 701                                 if nl == -1:
 702                                     nl = find_token(document.body, "\\begin_inset Newline linebreak", begcell, endcell)
 703                                     if nl == -1:
 704                                          break
 705                                     linebreak = True
 706                                 nle = find_end_of_inset(document.body, nl)
 707                                 del(document.body[nle:nle+1])
 708                                 if linebreak:
 709                                     document.body[nl:nl+1] = put_cmd_in_ert("\\linebreak{}")
 710                                 else:
 711                                     document.body[nl:nl+1] = put_cmd_in_ert("\\\\")
 712                     m += 1
 713
 714             i = j + 1
 715
 716     finally:
 717         if needarray == True:
 718             add_to_preamble(document, ["\\usepackage{array}"])
 719         if needvarwidth == True:
 720             add_to_preamble(document, ["\\usepackage{varwidth}"])
 721
 722
 723 def revert_bibencoding(document):
 724     " Revert bibliography encoding "
 725
 726     # Get cite engine
 727     engine = "basic"
 728     i = find_token(document.header, "\\cite_engine", 0)
 729     if i == -1:
 730         document.warning("Malformed document! Missing \\cite_engine")
 731     else:
 732         engine = get_value(document.header, "\\cite_engine", i)
 733
 734     # Check if biblatex
 735     biblatex = False
 736     if engine in ["biblatex", "biblatex-natbib"]:
 737         biblatex = True
 738
 739     # Map lyx to latex encoding names
 740     encodings = {
 741         "utf8" : "utf8",
 742         "utf8x" : "utf8x",
 743         "armscii8" : "armscii8",
 744         "iso8859-1" : "latin1",
 745         "iso8859-2" : "latin2",
 746         "iso8859-3" : "latin3",
 747         "iso8859-4" : "latin4",
 748         "iso8859-5" : "iso88595",
 749         "iso8859-6" : "8859-6",
 750         "iso8859-7" : "iso-8859-7",
 751         "iso8859-8" : "8859-8",
 752         "iso8859-9" : "latin5",
 753         "iso8859-13" : "latin7",
 754         "iso8859-15" : "latin9",
 755         "iso8859-16" : "latin10",
 756         "applemac" : "applemac",
 757         "cp437" : "cp437",
 758         "cp437de" : "cp437de",
 759         "cp850" : "cp850",
 760         "cp852" : "cp852",
 761         "cp855" : "cp855",
 762         "cp858" : "cp858",
 763         "cp862" : "cp862",
 764         "cp865" : "cp865",
 765         "cp866" : "cp866",
 766         "cp1250" : "cp1250",
 767         "cp1251" : "cp1251",
 768         "cp1252" : "cp1252",
 769         "cp1255" : "cp1255",
 770         "cp1256" : "cp1256",
 771         "cp1257" : "cp1257",
 772         "koi8-r" : "koi8-r",
 773         "koi8-u" : "koi8-u",
 774         "pt154" : "pt154",
 775         "utf8-platex" : "utf8",
 776         "ascii" : "ascii"
 777     }
 778
 779     i = 0
 780     bibresources = []
 781     while (True):
 782         i = find_token(document.body, "\\begin_inset CommandInset bibtex", i)
 783         if i == -1:
 784             break
 785         j = find_end_of_inset(document.body, i)
 786         if j == -1:
 787             document.warning("Can't find end of bibtex inset at line %d!!" %(i))
 788             i += 1
 789             continue
 790         encoding = get_quoted_value(document.body, "encoding", i, j)
 791         if not encoding:
 792             i += 1
 793             continue
 794         # remove encoding line
 795         k = find_token(document.body, "encoding", i, j)
 796         if k != -1:
 797             del document.body[k]
 798         # Re-find inset end line
 799         j = find_end_of_inset(document.body, i)
 800         if biblatex:
 801             biblio_options = ""
 802             h = find_token(document.header, "\\biblio_options", 0)
 803             if h != -1:
 804                 biblio_options = get_value(document.header, "\\biblio_options", h)
 805                 if not "bibencoding" in biblio_options:
 806                      document.header[h] += ",bibencoding=%s" % encodings[encoding]
 807             else:
 808                 bs = find_token(document.header, "\\biblatex_bibstyle", 0)
 809                 if bs == -1:
 810                     # this should not happen
 811                     document.warning("Malformed LyX document! No \\biblatex_bibstyle header found!")
 812                 else:
 813                     document.header[bs-1 : bs-1] = ["\\biblio_options bibencoding=" + encodings[encoding]]
 814         else:
 815             document.body[j+1:j+1] = put_cmd_in_ert("\\egroup")
 816             document.body[i:i] = put_cmd_in_ert("\\bgroup\\inputencoding{" + encodings[encoding] + "}")
 817
 818         i = j + 1
 819
 820
 821
 822 def convert_vcsinfo(document):
 823     " Separate vcs Info inset from buffer Info inset. "
 824
 825     types = {
 826         "vcs-revision" : "revision",
 827         "vcs-tree-revision" : "tree-revision",
 828         "vcs-author" : "author",
 829         "vcs-time" : "time",
 830         "vcs-date" : "date"
 831     }
 832     i = 0
 833     while True:
 834         i = find_token(document.body, "\\begin_inset Info", i)
 835         if i == -1:
 836             return
 837         j = find_end_of_inset(document.body, i + 1)
 838         if j == -1:
 839             document.warning("Malformed LyX document: Could not find end of Info inset.")
 840             i = i + 1
 841             continue
 842         tp = find_token(document.body, 'type', i, j)
 843         tpv = get_quoted_value(document.body, "type", tp)
 844         if tpv != "buffer":
 845             i = i + 1
 846             continue
 847         arg = find_token(document.body, 'arg', i, j)
 848         argv = get_quoted_value(document.body, "arg", arg)
 849         if argv not in list(types.keys()):
 850             i = i + 1
 851             continue
 852         document.body[tp] = "type \"vcs\""
 853         document.body[arg] = "arg \"" + types[argv] + "\""
 854         i = i + 1
 855
 856
 857 def revert_vcsinfo(document):
 858     " Merge vcs Info inset to buffer Info inset. "
 859
 860     args = ["revision", "tree-revision", "author", "time", "date" ]
 861     i = 0
 862     while True:
 863         i = find_token(document.body, "\\begin_inset Info", i)
 864         if i == -1:
 865             return
 866         j = find_end_of_inset(document.body, i + 1)
 867         if j == -1:
 868             document.warning("Malformed LyX document: Could not find end of Info inset.")
 869             i = i + 1
 870             continue
 871         tp = find_token(document.body, 'type', i, j)
 872         tpv = get_quoted_value(document.body, "type", tp)
 873         if tpv != "vcs":
 874             i = i + 1
 875             continue
 876         arg = find_token(document.body, 'arg', i, j)
 877         argv = get_quoted_value(document.body, "arg", arg)
 878         if argv not in args:
 879             document.warning("Malformed Info inset. Invalid vcs arg.")
 880             i = i + 1
 881             continue
 882         document.body[tp] = "type \"buffer\""
 883         document.body[arg] = "arg \"vcs-" + argv + "\""
 884         i = i + 1
 885
 886
 887 def revert_dateinfo(document):
 888     " Revert date info insets to static text. "
 889
 890 # FIXME This currently only considers the main language and uses the system locale
 891 # Ideally, it should honor context languages and switch the locale accordingly.
 892
 893     # The date formats for each language using strftime syntax:
 894     # long, short, loclong, locmedium, locshort
 895     dateformats = {
 896         "afrikaans" : ["%A, %d %B %Y", "%Y-%m-%d", "%d %B %Y", "%d %b %Y", "%Y/%m/%d"],
 897         "albanian" : ["%A, %d %B %Y", "%d.%m.%y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
 898         "american" : ["%A, %B %d, %Y", "%m/%d/%y", "%B %d, %Y", "%b %d, %Y", "%m/%d/%Y"],
 899         "amharic" : ["%A ፣%d %B %Y", "%d/%m/%Y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
 900         "ancientgreek" : ["%A, %d %B %Y", "%d %b %Y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
 901         "arabic_arabi" : ["%A، %d %B، %Y", "%d‏/%m‏/%Y", "%d %B، %Y", "%d/%m/%Y", "%d/%m/%Y"],
 902         "arabic_arabtex" : ["%A، %d %B، %Y", "%d‏/%m‏/%Y", "%d %B، %Y", "%d/%m/%Y", "%d/%m/%Y"],
 903         "armenian" : ["%Y թ. %B %d, %A", "%d.%m.%y", "%d %B، %Y", "%d %b، %Y", "%d/%m/%Y"],
 904         "asturian" : ["%A, %d %B de %Y", "%d/%m/%y", "%d de %B de %Y", "%d %b %Y", "%d/%m/%Y"],
 905         "australian" : ["%A, %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
 906         "austrian" : ["%A, %d. %B %Y", "%d.%m.%y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
 907         "bahasa" : ["%A, %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
 908         "bahasam" : ["%A, %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
 909         "basque" : ["%Y(e)ko %B %d, %A", "%y/%m/%d", "%Y %B %d", "%Y %b %d", "%Y/%m/%d"],
 910         "belarusian" : ["%A, %d %B %Y г.", "%d.%m.%y", "%d %B %Y", "%d %b %Y", "%d.%m.%Y"],
 911         "bosnian" : ["%A, %d. %B %Y.", "%d.%m.%y.", "%d. %B %Y", "%d. %b %Y", "%Y-%m-%d"],
 912         "brazilian" : ["%A, %d de %B de %Y", "%d/%m/%Y", "%d de %B de %Y", "%d de %b de %Y", "%d/%m/%Y"],
 913         "breton" : ["%Y %B %d, %A", "%Y-%m-%d", "%d %B %Y", "%d %b %Y", "%Y-%m-%d"],
 914         "british" : ["%A, %d %B %Y", "%d/%m/%Y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
 915         "bulgarian" : ["%A, %d %B %Y г.", "%d.%m.%y г.", "%d %B %Y", "%d %b %Y", "%Y-%m-%d"],
 916         "canadian" : ["%A, %B %d, %Y", "%Y-%m-%d", "%B %d, %Y", "%d %b %Y", "%Y-%m-%d"],
 917         "canadien" : ["%A %d %B %Y", "%y-%m-%d", "%d %B %Y", "%d %b %Y", "%Y-%m-%d"],
 918         "catalan" : ["%A, %d %B de %Y", "%d/%m/%y", "%d / %B / %Y", "%d / %b / %Y", "%d/%m/%Y"],
 919         "chinese-simplified" : ["%Y年%m月%d日%A", "%Y/%m/%d", "%Y年%m月%d日", "%Y-%m-%d", "%y-%m-%d"],
 920         "chinese-traditional" : ["%Y年%m月%d日 %A", "%Y/%m/%d", "%Y年%m月%d日", "%Y年%m月%d日", "%y年%m月%d日"],
 921         "coptic" : ["%A, %d %B %Y", "%d %b %Y", "%B %d, %Y", "%b %d, %Y", "%m/%d/%Y"],
 922         "croatian" : ["%A, %d. %B %Y.", "%d. %m. %Y.", "%d. %B %Y.", "%d. %b. %Y.", "%d.%m.%Y."],
 923         "czech" : ["%A %d. %B %Y", "%d.%m.%y", "%d. %B %Y", "%d. %b. %Y", "%d.%m.%Y"],
 924         "danish" : ["%A den %d. %B %Y", "%d/%m/%Y", "%d. %B %Y", "%d. %b %Y", "%d/%m/%Y"],
 925         "divehi" : ["%Y %B %d, %A", "%Y-%m-%d", "%Y %B %d", "%Y %b %d", "%d/%m/%Y"],
 926         "dutch" : ["%A %d %B %Y", "%d-%m-%y", "%d %B %Y", "%d %b %Y", "%d-%m-%Y"],
 927         "english" : ["%A, %B %d, %Y", "%m/%d/%y", "%B %d, %Y", "%b %d, %Y", "%m/%d/%Y"],
 928         "esperanto" : ["%A, %d %B %Y", "%d %b %Y", "la %d de %B %Y", "la %d de %b %Y", "%m/%d/%Y"],
 929         "estonian" : ["%A, %d. %B %Y", "%d.%m.%y", "%d %B %Y", "%d %b %Y", "%d.%m.%Y"],
 930         "farsi" : ["%A %d %B %Y", "%Y/%m/%d", "%d %B %Y", "%d %b %Y", "%Y/%m/%d"],
 931         "finnish" : ["%A %d. %B %Y", "%d.%m.%Y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
 932         "french" : ["%A %d %B %Y", "%d/%m/%Y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
 933         "friulan" : ["%A %d di %B dal %Y", "%d/%m/%y", "%d di %B dal %Y", "%d di %b dal %Y", "%d/%m/%Y"],
 934         "galician" : ["%A, %d de %B de %Y", "%d/%m/%y", "%d de %B de %Y", "%d de %b de %Y", "%d/%m/%Y"],
 935         "georgian" : ["%A, %d %B, %Y", "%d.%m.%y", "%B %d, %Y", "%b %d, %Y", "%m/%d/%Y"],
 936         "german" : ["%A, %d. %B %Y", "%d.%m.%y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
 937         "german-ch" : ["%A, %d. %B %Y", "%d.%m.%y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
 938         "german-ch-old" : ["%A, %d. %B %Y", "%d.%m.%y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
 939         "greek" : ["%A, %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
 940         "hebrew" : ["%A, %d ב%B %Y", "%d.%m.%Y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
 941         "hindi" : ["%A, %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d-%m-%Y"],
 942         "icelandic" : ["%A, %d. %B %Y", "%d.%m.%Y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
 943         "interlingua" : ["%Y %B %d, %A", "%Y-%m-%d", "le %d de %B %Y", "le %d de %b %Y", "%Y-%m-%d"],
 944         "irish" : ["%A %d %B %Y", "%d/%m/%Y", "%d. %B %Y", "%d. %b %Y", "%d/%m/%Y"],
 945         "italian" : ["%A %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d/%b/%Y", "%d/%m/%Y"],
 946         "japanese" : ["%Y年%m月%d日%A", "%Y/%m/%d", "%Y年%m月%d日", "%Y/%m/%d", "%y/%m/%d"],
 947         "japanese-cjk" : ["%Y年%m月%d日%A", "%Y/%m/%d", "%Y年%m月%d日", "%Y/%m/%d", "%y/%m/%d"],
 948         "kannada" : ["%A, %B %d, %Y", "%d/%m/%y", "%d %B %Y", "%d %B %Y", "%d-%m-%Y"],
 949         "kazakh" : ["%Y ж. %d %B, %A", "%d.%m.%y", "%d %B %Y", "%d %B %Y", "%Y-%d-%m"],
 950         "khmer" : ["%A %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %B %Y", "%d/%m/%Y"],
 951         "korean" : ["%Y년 %m월 %d일 %A", "%y. %m. %d.", "%Y년 %m월 %d일", "%Y. %m. %d.", "%y. %m. %d."],
 952         "kurmanji" : ["%A, %d %B %Y", "%d %b %Y", "%d. %B %Y", "%d. %m. %Y", "%Y-%m-%d"],
 953         "lao" : ["%A ທີ %d %B %Y", "%d/%m/%Y", "%d %B %Y", "%d %B %Y", "%d/%m/%Y"],
 954         "latin" : ["%A, %d %B %Y", "%d %b %Y", "%B %d, %Y", "%b %d, %Y", "%m/%d/%Y"],
 955         "latvian" : ["%A, %Y. gada %d. %B", "%d.%m.%y", "%Y. gada %d. %B", "%Y. gada %d. %b", "%d.%m.%Y"],
 956         "lithuanian" : ["%Y m. %B %d d., %A", "%Y-%m-%d", "%Y m. %B %d d.", "%Y m. %B %d d.", "%Y-%m-%d"],
 957         "lowersorbian" : ["%A, %d. %B %Y", "%d.%m.%y", "%d %B %Y", "%d %b %Y", "%d.%m.%Y"],
 958         "macedonian" : ["%A, %d %B %Y", "%d.%m.%y", "%d %B %Y", "%d %b %Y", "%d.%m.%Y"],
 959         "magyar" : ["%Y. %B %d., %A", "%Y. %m. %d.", "%Y. %B %d.", "%Y. %b %d.", "%Y.%m.%d."],
 960         "marathi" : ["%A, %d %B, %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d-%m-%Y"],
 961         "mongolian" : ["%A, %Y оны %m сарын %d", "%Y-%m-%d", "%Y оны %m сарын %d", "%d-%m-%Y", "%d-%m-%Y"],
 962         "naustrian" : ["%A, %d. %B %Y", "%d.%m.%y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
 963         "newzealand" : ["%A, %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
 964         "ngerman" : ["%A, %d. %B %Y", "%d.%m.%y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
 965         "norsk" : ["%A %d. %B %Y", "%d.%m.%Y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
 966         "nynorsk" : ["%A %d. %B %Y", "%d.%m.%Y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
 967         "occitan" : ["%Y %B %d, %A", "%Y-%m-%d", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
 968         "piedmontese" : ["%A, %d %B %Y", "%d %b %Y", "%B %d, %Y", "%b %d, %Y", "%m/%d/%Y"],
 969         "polish" : ["%A, %d %B %Y", "%d.%m.%Y", "%d %B %Y", "%d %b %Y", "%Y-%m-%d"],
 970         "polutonikogreek" : ["%A, %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
 971         "portuguese" : ["%A, %d de %B de %Y", "%d/%m/%y", "%d de %B de %Y", "%d de %b de %Y", "%Y/%m/%d"],
 972         "romanian" : ["%A, %d %B %Y", "%d.%m.%Y", "%d %B %Y", "%d %b %Y", "%d.%m.%Y"],
 973         "romansh" : ["%A, ils %d da %B %Y", "%d-%m-%y", "%d %B %Y", "%d %b %Y", "%d.%m.%Y"],
 974         "russian" : ["%A, %d %B %Y г.", "%d.%m.%Y", "%d %B %Y г.", "%d %b %Y г.", "%d.%m.%Y"],
 975         "samin" : ["%Y %B %d, %A", "%Y-%m-%d", "%B %d. b. %Y", "%b %d. b. %Y", "%d.%m.%Y"],
 976         "sanskrit" : ["%Y %B %d, %A", "%Y-%m-%d", "%d %B %Y", "%d %b %Y", "%d-%m-%Y"],
 977         "scottish" : ["%A, %dmh %B %Y", "%d/%m/%Y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
 978         "serbian" : ["%A, %d. %B %Y.", "%d.%m.%y.", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
 979         "serbian-latin" : ["%A, %d. %B %Y.", "%d.%m.%y.", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
 980         "slovak" : ["%A, %d. %B %Y", "%d. %m. %Y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
 981         "slovene" : ["%A, %d. %B %Y", "%d. %m. %y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
 982         "spanish" : ["%A, %d de %B de %Y", "%d/%m/%y", "%d de %B %de %Y", "%d %b %Y", "%d/%m/%Y"],
 983         "spanish-mexico" : ["%A, %d de %B %de %Y", "%d/%m/%y", "%d de %B de %Y", "%d %b %Y", "%d/%m/%Y"],
 984         "swedish" : ["%A %d %B %Y", "%Y-%m-%d", "%d %B %Y", "%d %b %Y", "%Y-%m-%d"],
 985         "syriac" : ["%Y %B %d, %A", "%Y-%m-%d", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
 986         "tamil" : ["%A, %d %B, %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d-%m-%Y"],
 987         "telugu" : ["%d, %B %Y, %A", "%d-%m-%y", "%d %B %Y", "%d %b %Y", "%d-%m-%Y"],
 988         "thai" : ["%Aที่ %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
 989         "tibetan" : ["%Y %Bའི་ཚེས་%d, %A", "%Y-%m-%d", "%B %d, %Y", "%b %d, %Y", "%m/%d/%Y"],
 990         "turkish" : ["%d %B %Y %A", "%d.%m.%Y", "%d %B %Y", "%d.%b.%Y", "%d.%m.%Y"],
 991         "turkmen" : ["%d %B %Y %A", "%d.%m.%Y", "%Y ý. %B %d", "%d.%m.%Y ý.", "%d.%m.%y ý."],
 992         "ukrainian" : ["%A, %d %B %Y р.", "%d.%m.%y", "%d %B %Y", "%d %m %Y", "%d.%m.%Y"],
 993         "uppersorbian" : ["%A, %d. %B %Y", "%d.%m.%y", "%d %B %Y", "%d %b %Y", "%d.%m.%Y"],
 994         "urdu" : ["%A، %d %B، %Y", "%d/%m/%y", "%d %B, %Y", "%d %b %Y", "%d/%m/%Y"],
 995         "vietnamese" : ["%A, %d %B, %Y", "%d/%m/%Y", "%d tháng %B %Y", "%d-%m-%Y", "%d/%m/%Y"],
 996         "welsh" : ["%A, %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
 997     }
 998
 999     types = ["date", "fixdate", "moddate" ]
1000     i = 0
1001     i = find_token(document.header, "\\language", 0)
1002     if i == -1:
1003         # this should not happen
1004         document.warning("Malformed LyX document! No \\language header found!")
1005         return
1006     lang = get_value(document.header, "\\language", i)
1007
1008     i = 0
1009     while True:
1010         i = find_token(document.body, "\\begin_inset Info", i)
1011         if i == -1:
1012             return
1013         j = find_end_of_inset(document.body, i + 1)
1014         if j == -1:
1015             document.warning("Malformed LyX document: Could not find end of Info inset.")
1016             i = i + 1
1017             continue
1018         tp = find_token(document.body, 'type', i, j)
1019         tpv = get_quoted_value(document.body, "type", tp)
1020         if tpv not in types:
1021             i = i + 1
1022             continue
1023         arg = find_token(document.body, 'arg', i, j)
1024         argv = get_quoted_value(document.body, "arg", arg)
1025         isodate = ""
1026         dte = date.today()
1027         if tpv == "fixdate":
1028             datecomps = argv.split('@')
1029             if len(datecomps) > 1:
1030                 argv = datecomps[0]
1031                 isodate = datecomps[1]
1032                 m = re.search('(\d\d\d\d)-(\d\d)-(\d\d)', isodate)
1033                 if m:
1034                     dte = date(int(m.group(1)), int(m.group(2)), int(m.group(3)))
1035 # FIXME if we had the path to the original document (not the one in the tmp dir),
1036 #        we could use the mtime.
1037 #        elif tpv == "moddate":
1038 #            dte = date.fromtimestamp(os.path.getmtime(document.dir))
1039         result = ""
1040         if argv == "ISO":
1041             result = dte.isodate()
1042         elif argv == "long":
1043             result = dte.strftime(dateformats[lang][0])
1044         elif argv == "short":
1045             result = dte.strftime(dateformats[lang][1])
1046         elif argv == "loclong":
1047             result = dte.strftime(dateformats[lang][2])
1048         elif argv == "locmedium":
1049             result = dte.strftime(dateformats[lang][3])
1050         elif argv == "locshort":
1051             result = dte.strftime(dateformats[lang][4])
1052         else:
1053             fmt = argv.replace("MMMM", "%b").replace("MMM", "%b").replace("MM", "%m").replace("M", "%m")
1054             fmt = fmt.replace("yyyy", "%Y").replace("yy", "%y")
1055             fmt = fmt.replace("dddd", "%A").replace("ddd", "%a").replace("dd", "%d")
1056             fmt = re.sub('[^\'%]d', '%d', fmt)
1057             fmt = fmt.replace("'", "")
1058             result = dte.strftime(fmt)
1059         document.body[i : j+1] = result
1060         i = i + 1
1061
1062
1063 def revert_timeinfo(document):
1064     " Revert time info insets to static text. "
1065
1066 # FIXME This currently only considers the main language and uses the system locale
1067 # Ideally, it should honor context languages and switch the locale accordingly.
1068 # Also, the time object is "naive", i.e., it does not know of timezones (%Z will
1069 # be empty).
1070
1071     # The time formats for each language using strftime syntax:
1072     # long, short
1073     timeformats = {
1074         "afrikaans" : ["%H:%M:%S %Z", "%H:%M"],
1075         "albanian" : ["%I:%M:%S %p, %Z", "%I:%M %p"],
1076         "american" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1077         "amharic" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1078         "ancientgreek" : ["%H:%M:%S %Z", "%H:%M:%S"],
1079         "arabic_arabi" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1080         "arabic_arabtex" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1081         "armenian" : ["%H:%M:%S %Z", "%H:%M"],
1082         "asturian" : ["%H:%M:%S %Z", "%H:%M"],
1083         "australian" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1084         "austrian" : ["%H:%M:%S %Z", "%H:%M"],
1085         "bahasa" : ["%H.%M.%S %Z", "%H.%M"],
1086         "bahasam" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1087         "basque" : ["%H:%M:%S (%Z)", "%H:%M"],
1088         "belarusian" : ["%H:%M:%S, %Z", "%H:%M"],
1089         "bosnian" : ["%H:%M:%S %Z", "%H:%M"],
1090         "brazilian" : ["%H:%M:%S %Z", "%H:%M"],
1091         "breton" : ["%H:%M:%S %Z", "%H:%M"],
1092         "british" : ["%H:%M:%S %Z", "%H:%M"],
1093         "bulgarian" : ["%H:%M:%S %Z", "%H:%M"],
1094         "canadian" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1095         "canadien" : ["%H:%M:%S %Z", "%H h %M"],
1096         "catalan" : ["%H:%M:%S %Z", "%H:%M"],
1097         "chinese-simplified" : ["%Z %p%I:%M:%S", "%p%I:%M"],
1098         "chinese-traditional" : ["%p%I:%M:%S [%Z]", "%p%I:%M"],
1099         "coptic" : ["%H:%M:%S %Z", "%H:%M:%S"],
1100         "croatian" : ["%H:%M:%S (%Z)", "%H:%M"],
1101         "czech" : ["%H:%M:%S %Z", "%H:%M"],
1102         "danish" : ["%H.%M.%S %Z", "%H.%M"],
1103         "divehi" : ["%H:%M:%S %Z", "%H:%M"],
1104         "dutch" : ["%H:%M:%S %Z", "%H:%M"],
1105         "english" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1106         "esperanto" : ["%H:%M:%S %Z", "%H:%M:%S"],
1107         "estonian" : ["%H:%M:%S %Z", "%H:%M"],
1108         "farsi" : ["%H:%M:%S (%Z)", "%H:%M"],
1109         "finnish" : ["%H.%M.%S %Z", "%H.%M"],
1110         "french" : ["%H:%M:%S %Z", "%H:%M"],
1111         "friulan" : ["%H:%M:%S %Z", "%H:%M"],
1112         "galician" : ["%H:%M:%S %Z", "%H:%M"],
1113         "georgian" : ["%H:%M:%S %Z", "%H:%M"],
1114         "german" : ["%H:%M:%S %Z", "%H:%M"],
1115         "german-ch" : ["%H:%M:%S %Z", "%H:%M"],
1116         "german-ch-old" : ["%H:%M:%S %Z", "%H:%M"],
1117         "greek" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1118         "hebrew" : ["%H:%M:%S %Z", "%H:%M"],
1119         "hindi" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1120         "icelandic" : ["%H:%M:%S %Z", "%H:%M"],
1121         "interlingua" : ["%H:%M:%S %Z", "%H:%M"],
1122         "irish" : ["%H:%M:%S %Z", "%H:%M"],
1123         "italian" : ["%H:%M:%S %Z", "%H:%M"],
1124         "japanese" : ["%H時%M分%S秒 %Z", "%H:%M"],
1125         "japanese-cjk" : ["%H時%M分%S秒 %Z", "%H:%M"],
1126         "kannada" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1127         "kazakh" : ["%H:%M:%S %Z", "%H:%M"],
1128         "khmer" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1129         "korean" : ["%p %I시%M분 %S초 %Z", "%p %I:%M"],
1130         "kurmanji" : ["%H:%M:%S %Z", "%H:%M:%S"],
1131         "lao" : ["%H ໂມງ%M ນາທີ  %S ວິນາທີ %Z", "%H:%M"],
1132         "latin" : ["%H:%M:%S %Z", "%H:%M:%S"],
1133         "latvian" : ["%H:%M:%S %Z", "%H:%M"],
1134         "lithuanian" : ["%H:%M:%S %Z", "%H:%M"],
1135         "lowersorbian" : ["%H:%M:%S %Z", "%H:%M"],
1136         "macedonian" : ["%H:%M:%S %Z", "%H:%M"],
1137         "magyar" : ["%H:%M:%S %Z", "%H:%M"],
1138         "marathi" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1139         "mongolian" : ["%H:%M:%S %Z", "%H:%M"],
1140         "naustrian" : ["%H:%M:%S %Z", "%H:%M"],
1141         "newzealand" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1142         "ngerman" : ["%H:%M:%S %Z", "%H:%M"],
1143         "norsk" : ["%H:%M:%S %Z", "%H:%M"],
1144         "nynorsk" : ["kl. %H:%M:%S %Z", "%H:%M"],
1145         "occitan" : ["%H:%M:%S %Z", "%H:%M"],
1146         "piedmontese" : ["%H:%M:%S %Z", "%H:%M:%S"],
1147         "polish" : ["%H:%M:%S %Z", "%H:%M"],
1148         "polutonikogreek" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1149         "portuguese" : ["%H:%M:%S %Z", "%H:%M"],
1150         "romanian" : ["%H:%M:%S %Z", "%H:%M"],
1151         "romansh" : ["%H:%M:%S %Z", "%H:%M"],
1152         "russian" : ["%H:%M:%S %Z", "%H:%M"],
1153         "samin" : ["%H:%M:%S %Z", "%H:%M"],
1154         "sanskrit" : ["%H:%M:%S %Z", "%H:%M"],
1155         "scottish" : ["%H:%M:%S %Z", "%H:%M"],
1156         "serbian" : ["%H:%M:%S %Z", "%H:%M"],
1157         "serbian-latin" : ["%H:%M:%S %Z", "%H:%M"],
1158         "slovak" : ["%H:%M:%S %Z", "%H:%M"],
1159         "slovene" : ["%H:%M:%S %Z", "%H:%M"],
1160         "spanish" : ["%H:%M:%S (%Z)", "%H:%M"],
1161         "spanish-mexico" : ["%H:%M:%S %Z", "%H:%M"],
1162         "swedish" : ["kl. %H:%M:%S %Z", "%H:%M"],
1163         "syriac" : ["%H:%M:%S %Z", "%H:%M"],
1164         "tamil" : ["%p %I:%M:%S %Z", "%p %I:%M"],
1165         "telugu" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1166         "thai" : ["%H นาฬิกา %M นาที  %S วินาที %Z", "%H:%M"],
1167         "tibetan" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1168         "turkish" : ["%H:%M:%S %Z", "%H:%M"],
1169         "turkmen" : ["%H:%M:%S %Z", "%H:%M"],
1170         "ukrainian" : ["%H:%M:%S %Z", "%H:%M"],
1171         "uppersorbian" : ["%H:%M:%S %Z", "%H:%M hodź."],
1172         "urdu" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1173         "vietnamese" : ["%H:%M:%S %Z", "%H:%M"],
1174         "welsh" : ["%H:%M:%S %Z", "%H:%M"]
1175     }
1176
1177     types = ["time", "fixtime", "modtime" ]
1178     i = 0
1179     i = find_token(document.header, "\\language", 0)
1180     if i == -1:
1181         # this should not happen
1182         document.warning("Malformed LyX document! No \\language header found!")
1183         return
1184     lang = get_value(document.header, "\\language", i)
1185
1186     i = 0
1187     while True:
1188         i = find_token(document.body, "\\begin_inset Info", i)
1189         if i == -1:
1190             return
1191         j = find_end_of_inset(document.body, i + 1)
1192         if j == -1:
1193             document.warning("Malformed LyX document: Could not find end of Info inset.")
1194             i = i + 1
1195             continue
1196         tp = find_token(document.body, 'type', i, j)
1197         tpv = get_quoted_value(document.body, "type", tp)
1198         if tpv not in types:
1199             i = i + 1
1200             continue
1201         arg = find_token(document.body, 'arg', i, j)
1202         argv = get_quoted_value(document.body, "arg", arg)
1203         isotime = ""
1204         dtme = datetime.now()
1205         tme = dtme.time()
1206         if tpv == "fixtime":
1207             timecomps = argv.split('@')
1208             if len(timecomps) > 1:
1209                 argv = timecomps[0]
1210                 isotime = timecomps[1]
1211                 m = re.search('(\d\d):(\d\d):(\d\d)', isotime)
1212                 if m:
1213                     tme = time(int(m.group(1)), int(m.group(2)), int(m.group(3)))
1214                 else:
1215                     m = re.search('(\d\d):(\d\d)', isotime)
1216                     if m:
1217                         tme = time(int(m.group(1)), int(m.group(2)))
1218 # FIXME if we had the path to the original document (not the one in the tmp dir),
1219 #        we could use the mtime.
1220 #        elif tpv == "moddate":
1221 #            dte = date.fromtimestamp(os.path.getmtime(document.dir))
1222         result = ""
1223         if argv == "ISO":
1224             result = tme.isoformat()
1225         elif argv == "long":
1226             result = tme.strftime(timeformats[lang][0])
1227         elif argv == "short":
1228             result = tme.strftime(timeformats[lang][1])
1229         else:
1230             fmt = argv.replace("HH", "%H").replace("H", "%H").replace("hh", "%I").replace("h", "%I")
1231             fmt = fmt.replace("mm", "%M").replace("m", "%M").replace("ss", "%S").replace("s", "%S")
1232             fmt = fmt.replace("zzz", "%f").replace("z", "%f").replace("t", "%Z")
1233             fmt = fmt.replace("AP", "%p").replace("ap", "%p").replace("A", "%p").replace("a", "%p")
1234             fmt = fmt.replace("'", "")
1235             result = dte.strftime(fmt)
1236         document.body[i : j+1] = result
1237         i = i + 1
1238
1239
1240 def revert_namenoextinfo(document):
1241     " Merge buffer Info inset type name-noext to name. "
1242
1243     i = 0
1244     while True:
1245         i = find_token(document.body, "\\begin_inset Info", i)
1246         if i == -1:
1247             return
1248         j = find_end_of_inset(document.body, i + 1)
1249         if j == -1:
1250             document.warning("Malformed LyX document: Could not find end of Info inset.")
1251             i = i + 1
1252             continue
1253         tp = find_token(document.body, 'type', i, j)
1254         tpv = get_quoted_value(document.body, "type", tp)
1255         if tpv != "buffer":
1256             i = i + 1
1257             continue
1258         arg = find_token(document.body, 'arg', i, j)
1259         argv = get_quoted_value(document.body, "arg", arg)
1260         if argv != "name-noext":
1261             i = i + 1
1262             continue
1263         document.body[arg] = "arg \"name\""
1264         i = i + 1
1265
1266
1267 def revert_l7ninfo(document):
1268     " Revert l7n Info inset to text. "
1269
1270     i = 0
1271     while True:
1272         i = find_token(document.body, "\\begin_inset Info", i)
1273         if i == -1:
1274             return
1275         j = find_end_of_inset(document.body, i + 1)
1276         if j == -1:
1277             document.warning("Malformed LyX document: Could not find end of Info inset.")
1278             i = i + 1
1279             continue
1280         tp = find_token(document.body, 'type', i, j)
1281         tpv = get_quoted_value(document.body, "type", tp)
1282         if tpv != "l7n":
1283             i = i + 1
1284             continue
1285         arg = find_token(document.body, 'arg', i, j)
1286         argv = get_quoted_value(document.body, "arg", arg)
1287         # remove trailing colons, menu accelerator (|...) and qt accelerator (&), while keeping literal " & "
1288         argv = argv.rstrip(':').split('|')[0].replace(" & ", "</amp;>").replace("&", "").replace("</amp;>", " & ")
1289         document.body[i : j+1] = argv
1290         i = i + 1
1291
1292
1293 def revert_listpargs(document):
1294     " Reverts listpreamble arguments to TeX-code "
1295     i = 0
1296     while True:
1297         i = find_token(document.body, "\\begin_inset Argument listpreamble:", i)
1298         if i == -1:
1299             return
1300         j = find_end_of_inset(document.body, i)
1301         # Find containing paragraph layout
1302         parent = get_containing_layout(document.body, i)
1303         if parent == False:
1304             document.warning("Malformed LyX document: Can't find parent paragraph layout")
1305             i += 1
1306             continue
1307         parbeg = parent[3]
1308         beginPlain = find_token(document.body, "\\begin_layout Plain Layout", i)
1309         endPlain = find_end_of_layout(document.body, beginPlain)
1310         content = document.body[beginPlain + 1 : endPlain]
1311         del document.body[i:j+1]
1312         subst = ["\\begin_inset ERT", "status collapsed", "", "\\begin_layout Plain Layout",
1313                  "{"] + content + ["}", "\\end_layout", "", "\\end_inset", ""]
1314         document.body[parbeg : parbeg] = subst
1315         i += 1
1316
1317
1318 ##
1319 # Conversion hub
1320 #
1321
1322 supported_versions = ["2.4.0", "2.4"]
1323 convert = [
1324            [545, [convert_lst_literalparam]],
1325            [546, []],
1326            [547, []],
1327            [548, []],
1328            [549, []],
1329            [550, [convert_fontenc]],
1330            [551, []],
1331            [552, []],
1332            [553, []],
1333            [554, []],
1334            [555, []],
1335            [556, []],
1336            [557, [convert_vcsinfo]],
1337            [558, [removeFrontMatterStyles]],
1338            [559, []],
1339            [560, []],
1340            [561, [convert_dejavu, convert_ibmplex]],
1341            [562, []],
1342            [563, []]
1343           ]
1344
1345 revert =  [
1346            [562, [revert_listpargs]],
1347            [561, [revert_l7ninfo]],
1348            [560, [revert_ibmplex, revert_dejavu]],
1349            [559, [revert_timeinfo, revert_namenoextinfo]],
1350            [558, [revert_dateinfo]],
1351            [557, [addFrontMatterStyles]],
1352            [556, [revert_vcsinfo]],
1353            [555, [revert_bibencoding]],
1354            [554, [revert_vcolumns]],
1355            [553, [revert_stretchcolumn]],
1356            [552, [revert_tuftecite]],
1357            [551, [revert_floatpclass, revert_floatalignment]],
1358            [550, [revert_nospellcheck]],
1359            [549, [revert_fontenc]],
1360            [548, []],# dummy format change
1361            [547, [revert_lscape]],
1362            [546, [revert_xcharter]],
1363            [545, [revert_paratype]],
1364            [544, [revert_lst_literalparam]]
1365           ]
1366
1367
1368 if __name__ == "__main__":
1369     pass