lib/lyx2lyx/lyx_2_4.py

   1 # -*- coding: utf-8 -*-
   2 # This file is part of lyx2lyx
   3 # Copyright (C) 2018 The LyX team
   4 #
   5 # This program is free software; you can redistribute it and/or
   6 # modify it under the terms of the GNU General Public License
   7 # as published by the Free Software Foundation; either version 2
   8 # of the License, or (at your option) any later version.
   9 #
  10 # This program is distributed in the hope that it will be useful,
  11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 # GNU General Public License for more details.
  14 #
  15 # You should have received a copy of the GNU General Public License
  16 # along with this program; if not, write to the Free Software
  17 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  18
  19 """ Convert files to the file format generated by lyx 2.4"""
  20
  21 import re, string
  22 import unicodedata
  23 import sys, os
  24
  25 from datetime import (datetime, date, time)
  26
  27 # Uncomment only what you need to import, please.
  28
  29 from parser_tools import (count_pars_in_inset, find_end_of_inset, find_end_of_layout,
  30                           find_token, find_re, get_bool_value, get_containing_layout,
  31                           get_option_value, get_value, get_quoted_value)
  32 #    del_token, del_value, del_complete_lines,
  33 #    find_complete_lines, find_end_of,
  34 #    find_re, find_substring, find_token_backwards,
  35 #    get_containing_inset,
  36 #    is_in_inset, set_bool_value
  37 #    find_tokens, find_token_exact, check_token
  38
  39 from lyx2lyx_tools import (put_cmd_in_ert, add_to_preamble)
  40 #  revert_font_attrs, insert_to_preamble, latex_length
  41 #  get_ert, lyx2latex, lyx2verbatim, length_in_bp, convert_info_insets
  42 #  revert_flex_inset, hex2ratio, str2bool
  43
  44 ####################################################################
  45 # Private helper functions
  46
  47 def add_preamble_fonts(document, fontmap):
  48     " Add collected font-packages with their option to user-preamble"
  49
  50     for pkg in fontmap:
  51         if len(fontmap[pkg]) > 0:
  52             xoption = "[" + ",".join(fontmap[pkg]) + "]"
  53         else:
  54             xoption = ""
  55         preamble = "\\usepackage" + xoption + "{%s}" % pkg
  56         add_to_preamble(document, [preamble])
  57
  58
  59 def createkey(pkg, options):
  60     options.sort()
  61     return pkg + ':' + "-".join(options)
  62
  63 class fontinfo:
  64     def __init__(self):
  65         self.fontname = None    # key into font2pkgmap
  66         self.fonttype = None    # roman,sans,typewriter,math
  67         self.scaletype = None   # None,sf,tt
  68         self.scaleopt = None    # None, 'scaled', 'scale'
  69         self.scaleval = 1
  70         self.package = None
  71         self.options = []
  72         self.pkgkey = None      # key into pkg2fontmap
  73
  74     def addkey(self):
  75         self.pkgkey = createkey(self.package, self.options)
  76
  77 class fontmapping:
  78     def __init__(self):
  79         self.font2pkgmap = dict()
  80         self.pkg2fontmap = dict()
  81         self.pkginmap = dict()  # defines, if a map for package exists
  82
  83     def expandFontMapping(self, font_list, font_type, scale_type, pkg, scaleopt = None):
  84         " Expand fontinfo mapping"
  85         #
  86         # fontlist:    list of fontnames, each element
  87         #              may contain a ','-separated list of needed options
  88         #              like e.g. 'IBMPlexSansCondensed,condensed'
  89         # font_type:   one of 'roman', 'sans', 'typewriter', 'math'
  90         # scale_type:  one of None, 'sf', 'tt'
  91         # pkg:         package defining the font. Defaults to fontname if None
  92         # scaleopt:    one of None, 'scale', 'scaled', or some other string
  93         #              to be used in scale option (e.g. scaled=0.7)
  94         for fl in font_list:
  95             fe = fontinfo()
  96             fe.fonttype = font_type
  97             fe.scaletype = scale_type
  98             flt = fl.split(",")
  99             font_name = flt[0]
 100             fe.fontname = font_name
 101             fe.options = flt[1:]
 102             fe.scaleopt = scaleopt
 103             if pkg == None:
 104                 fe.package = font_name
 105             else:
 106                 fe.package = pkg
 107             fe.addkey()
 108             self.font2pkgmap[font_name] = fe
 109             if fe.pkgkey in self.pkg2fontmap:
 110                 # Repeated the same entry? Check content
 111                 if self.pkg2fontmap[fe.pkgkey] != font_name:
 112                     document.error("Something is wrong in pkgname+options <-> fontname mapping")
 113             self.pkg2fontmap[fe.pkgkey] = font_name
 114             self.pkginmap[fe.package] = 1
 115
 116     def getfontname(self, pkg, options):
 117         options.sort()
 118         pkgkey = createkey(pkg, options)
 119         if not pkgkey in self.pkg2fontmap:
 120             return None
 121         fontname = self.pkg2fontmap[pkgkey]
 122         if not fontname in self.font2pkgmap:
 123             document.error("Something is wrong in pkgname+options <-> fontname mapping")
 124             return None
 125         if pkgkey == self.font2pkgmap[fontname].pkgkey:
 126             return fontname
 127         return None
 128
 129 def createFontMapping():
 130     # Create info for known fonts for the use in
 131     #   convert_latexFonts() and
 132     #   revert_latexFonts()
 133     #
 134     # * Would be more handy to parse latexFonts file,
 135     #   but the path to this file is unknown
 136     # * For now, add DejaVu and IBMPlex only.
 137     # * Expand, if desired
 138     fm = fontmapping()
 139     fm.expandFontMapping(['DejaVuSerif', 'DejaVuSerifCondensed'], "roman", None, None)
 140     fm.expandFontMapping(['DejaVuSans','DejaVuSansCondensed'], "sans", "sf", None, "scaled")
 141     fm.expandFontMapping(['DejaVuSansMono'], "typewriter", "tt", None, "scaled")
 142     fm.expandFontMapping(['IBMPlexSerif', 'IBMPlexSerifThin,thin',
 143                           'IBMPlexSerifExtraLight,extralight', 'IBMPlexSerifLight,light',
 144                           'IBMPlexSerifSemibold,semibold'],
 145                          "roman", None, "plex-serif")
 146     fm.expandFontMapping(['IBMPlexSans','IBMPlexSansCondensed,condensed',
 147                           'IBMPlexSansThin,thin', 'IBMPlexSansExtraLight,extralight',
 148                           'IBMPlexSansLight,light', 'IBMPlexSansSemibold,semibold'],
 149                          "sans", "sf", "plex-sans", "scale")
 150     fm.expandFontMapping(['IBMPlexMono', 'IBMPlexMonoThin,thin',
 151                           'IBMPlexMonoExtraLight,extralight', 'IBMPlexMonoLight,light',
 152                           'IBMPlexMonoSemibold,semibold'],
 153                          "typewriter", "tt", "plex-mono", "scale")
 154     fm.expandFontMapping(['ADOBESourceSerifPro'], "roman", None, "sourceserifpro")
 155     fm.expandFontMapping(['ADOBESourceSansPro'], "sans", "sf", "sourcesanspro", "scaled")
 156     fm.expandFontMapping(['ADOBESourceCodePro'], "typewriter", "tt", "sourcecodepro", "scaled")
 157     return fm
 158
 159 def convert_fonts(document, fm):
 160     " Handle font definition to LaTeX "
 161
 162     rpkg = re.compile(r'^\\usepackage(\[([^\]]*)\])?\{([^\}]+)\}')
 163     rscaleopt = re.compile(r'^scaled?=(.*)')
 164
 165     i = 0
 166     while i < len(document.preamble):
 167         i = find_re(document.preamble, rpkg, i)
 168         if i == -1:
 169             return
 170         mo = rpkg.search(document.preamble[i])
 171         if mo == None or mo.group(2) == None:
 172             options = []
 173         else:
 174             options = mo.group(2).replace(' ', '').split(",")
 175         pkg = mo.group(3)
 176         o = 0
 177         oscale = 1
 178         while o < len(options):
 179             mo = rscaleopt.search(options[o])
 180             if mo == None:
 181                 o += 1
 182                 continue
 183             oscale = mo.group(1)
 184             del options[o]
 185             break
 186
 187         if not pkg in fm.pkginmap:
 188             i += 1
 189             continue
 190         # determine fontname
 191         fn = fm.getfontname(pkg, options)
 192         if fn == None:
 193             i += 1
 194             continue
 195         del document.preamble[i]
 196         fontinfo = fm.font2pkgmap[fn]
 197         if fontinfo.scaletype == None:
 198             fontscale = None
 199         else:
 200             fontscale = "\\font_" + fontinfo.scaletype + "_scale"
 201             fontinfo.scaleval = oscale
 202
 203         if i > 0 and document.preamble[i-1] == "% Added by lyx2lyx":
 204             del document.preamble[i-1]
 205         if fontscale != None:
 206             j = find_token(document.header, fontscale, 0)
 207             if j != -1:
 208                 val = get_value(document.header, fontscale, j)
 209                 vals = val.split()
 210                 scale = "100"
 211                 if oscale != None:
 212                     scale = "%03d" % int(float(oscale) * 100)
 213                 document.header[j] = fontscale + " " + scale + " " + vals[1]
 214         ft = "\\font_" + fontinfo.fonttype
 215         j = find_token(document.header, ft, 0)
 216         if j != -1:
 217             val = get_value(document.header, ft, j)
 218             vals = val.split()
 219             document.header[j] = ft + ' "' + fn + '" ' + vals[1]
 220
 221 def revert_fonts(document, fm, fontmap):
 222     " Revert native font definition to LaTeX "
 223     # fonlist := list of fonts created from the same package
 224     # Empty package means that the font-name is the same as the package-name
 225     # fontmap (key = package, val += found options) will be filled
 226     # and used later in add_preamble_fonts() to be added to user-preamble
 227
 228     rfontscale = re.compile(r'^\s*(\\font_(roman|sans|typewriter|math))\s+')
 229     rscales = re.compile(r'^\s*(\d+)\s+(\d+)')
 230     i = 0
 231     while i < len(document.header):
 232         i = find_re(document.header, rfontscale, i)
 233         if (i == -1):
 234             break
 235         mo = rfontscale.search(document.header[i])
 236         if mo == None:
 237             i += 1
 238             continue
 239         ft = mo.group(1)    # 'roman', 'sans', 'typewriter', 'math'
 240         val = get_value(document.header, ft, i)
 241         words = val.split()
 242         font = words[0].replace('"', '')
 243         if not font in fm.font2pkgmap:
 244             i += 1
 245             continue
 246         fontinfo = fm.font2pkgmap[font]
 247         val = fontinfo.package
 248         if not val in fontmap:
 249             fontmap[val] = []
 250         document.header[i] = ft + ' "default" ' + words[1]
 251         if fontinfo.scaleopt != None:
 252             xval =  get_value(document.header, "\\font_" + fontinfo.scaletype + "_scale", 0)
 253             mo = rscales.search(xval)
 254             if mo != None:
 255                 xval1 = mo.group(1)
 256                 xval2 = mo.group(2)
 257                 if xval1 != "100":
 258                     # set correct scale option
 259                     fontmap[val].extend([fontinfo.scaleopt + "=" + format(float(xval1) / 100, '.2f')])
 260         if len(fontinfo.options) > 0:
 261             fontmap[val].extend(fontinfo.options)
 262         i += 1
 263
 264 ###############################################################################
 265 ###
 266 ### Conversion and reversion routines
 267 ###
 268 ###############################################################################
 269
 270 def convert_latexFonts(document):
 271     " Handle DejaVu and IBMPlex fonts definition to LaTeX "
 272
 273     if find_token(document.header, "\\use_non_tex_fonts false", 0) != -1:
 274         fm = createFontMapping()
 275         convert_fonts(document, fm)
 276
 277 def revert_latexFonts(document):
 278     " Revert native DejaVu font definition to LaTeX "
 279
 280     if find_token(document.header, "\\use_non_tex_fonts false", 0) != -1:
 281         fontmap = dict()
 282         fm = createFontMapping()
 283         revert_fonts(document, fm, fontmap)
 284         add_preamble_fonts(document, fontmap)
 285
 286 def removeFrontMatterStyles(document):
 287     " Remove styles Begin/EndFrontmatter"
 288
 289     layouts = ['BeginFrontmatter', 'EndFrontmatter']
 290     for layout in layouts:
 291         i = 0
 292         while True:
 293             i = find_token(document.body, '\\begin_layout ' + layout, i)
 294             if i == -1:
 295                 break
 296             j = find_end_of_layout(document.body, i)
 297             if j == -1:
 298                 document.warning("Malformed LyX document: Can't find end of layout at line %d" % i)
 299                 i += 1
 300                 continue
 301             while i > 0 and document.body[i-1].strip() == '':
 302                 i -= 1
 303             while document.body[j+1].strip() == '':
 304                 j = j + 1
 305             document.body[i:j+1] = ['']
 306
 307 def addFrontMatterStyles(document):
 308     " Use styles Begin/EndFrontmatter for elsarticle"
 309
 310     def insertFrontmatter(prefix, line):
 311         above = line
 312         while above > 0 and document.body[above-1].strip() == '':
 313             above -= 1
 314         below = line
 315         while document.body[below].strip() == '':
 316             below += 1
 317         document.body[above:below] = ['', '\\begin_layout ' + prefix + 'Frontmatter',
 318                                     '\\begin_inset Note Note',
 319                                     'status open', '',
 320                                     '\\begin_layout Plain Layout',
 321                                     'Keep this empty!',
 322                                     '\\end_layout', '',
 323                                     '\\end_inset', '', '',
 324                                     '\\end_layout', '']
 325
 326     if document.textclass == "elsarticle":
 327         layouts = ['Title', 'Title footnote', 'Author', 'Author footnote',
 328                    'Corresponding author', 'Address', 'Email', 'Abstract', 'Keywords']
 329         first = -1
 330         last = -1
 331         for layout in layouts:
 332             i = 0
 333             while True:
 334                 i = find_token(document.body, '\\begin_layout ' + layout, i)
 335                 if i == -1:
 336                     break
 337                 k = find_end_of_layout(document.body, i)
 338                 if k == -1:
 339                     document.warning("Malformed LyX document: Can't find end of layout at line %d" % i)
 340                     i += 1;
 341                     continue
 342                 if first == -1 or i < first:
 343                     first = i
 344                 if last == -1 or last <= k:
 345                     last = k+1
 346                 i = k+1
 347         if first == -1:
 348             return
 349         insertFrontmatter('End', last)
 350         insertFrontmatter('Begin', first)
 351
 352 def convert_lst_literalparam(document):
 353     " Add param literal to include inset "
 354
 355     i = 0
 356     while True:
 357         i = find_token(document.body, '\\begin_inset CommandInset include', i)
 358         if i == -1:
 359             break
 360         j = find_end_of_inset(document.body, i)
 361         if j == -1:
 362             document.warning("Malformed LyX document: Can't find end of command inset at line %d" % i)
 363             i += 1
 364             continue
 365         while i < j and document.body[i].strip() != '':
 366             i += 1
 367         document.body.insert(i, "literal \"true\"")
 368
 369
 370 def revert_lst_literalparam(document):
 371     " Remove param literal from include inset "
 372
 373     i = 0
 374     while True:
 375         i = find_token(document.body, '\\begin_inset CommandInset include', i)
 376         if i == -1:
 377             break
 378         j = find_end_of_inset(document.body, i)
 379         if j == -1:
 380             document.warning("Malformed LyX document: Can't find end of include inset at line %d" % i)
 381             i += 1
 382             continue
 383         k = find_token(document.body, 'literal', i, j)
 384         if k == -1:
 385             i += 1
 386             continue
 387         del document.body[k]
 388
 389
 390 def revert_paratype(document):
 391     " Revert ParaType font definitions to LaTeX "
 392
 393     if find_token(document.header, "\\use_non_tex_fonts false", 0) != -1:
 394         preamble = ""
 395         i1 = find_token(document.header, "\\font_roman \"PTSerif-TLF\"", 0)
 396         i2 = find_token(document.header, "\\font_sans \"default\"", 0)
 397         i3 = find_token(document.header, "\\font_typewriter \"default\"", 0)
 398         j = find_token(document.header, "\\font_sans \"PTSans-TLF\"", 0)
 399         sfval = get_value(document.header, "\\font_sf_scale", 0)
 400         # cutoff " 100"
 401         sfval = sfval[:-4]
 402         sfoption = ""
 403         if sfval != "100":
 404             sfoption = "scaled=" + format(float(sfval) / 100, '.2f')
 405         k = find_token(document.header, "\\font_typewriter \"PTMono-TLF\"", 0)
 406         ttval = get_value(document.header, "\\font_tt_scale", 0)
 407         # cutoff " 100"
 408         ttval = ttval[:-4]
 409         ttoption = ""
 410         if ttval != "100":
 411             ttoption = "scaled=" + format(float(ttval) / 100, '.2f')
 412         if i1 != -1 and i2 != -1 and i3!= -1:
 413             add_to_preamble(document, ["\\usepackage{paratype}"])
 414         else:
 415             if i1!= -1:
 416                 add_to_preamble(document, ["\\usepackage{PTSerif}"])
 417                 document.header[i1] = document.header[i1].replace("PTSerif-TLF", "default")
 418             if j!= -1:
 419                 if sfoption != "":
 420                     add_to_preamble(document, ["\\usepackage[" + sfoption + "]{PTSans}"])
 421                 else:
 422                     add_to_preamble(document, ["\\usepackage{PTSans}"])
 423                 document.header[j] = document.header[j].replace("PTSans-TLF", "default")
 424             if k!= -1:
 425                 if ttoption != "":
 426                     add_to_preamble(document, ["\\usepackage[" + ttoption + "]{PTMono}"])
 427                 else:
 428                     add_to_preamble(document, ["\\usepackage{PTMono}"])
 429                 document.header[k] = document.header[k].replace("PTMono-TLF", "default")
 430
 431
 432 def revert_xcharter(document):
 433     " Revert XCharter font definitions to LaTeX "
 434
 435     i = find_token(document.header, "\\font_roman \"xcharter\"", 0)
 436     if i == -1:
 437         return
 438
 439     # replace unsupported font setting
 440     document.header[i] = document.header[i].replace("xcharter", "default")
 441     # no need for preamble code with system fonts
 442     if get_bool_value(document.header, "\\use_non_tex_fonts"):
 443         return
 444
 445     # transfer old style figures setting to package options
 446     j = find_token(document.header, "\\font_osf true")
 447     if j != -1:
 448         options = "[osf]"
 449         document.header[j] = "\\font_osf false"
 450     else:
 451         options = ""
 452     if i != -1:
 453         add_to_preamble(document, ["\\usepackage%s{XCharter}"%options])
 454
 455
 456 def revert_lscape(document):
 457     " Reverts the landscape environment (Landscape module) to TeX-code "
 458
 459     if not "landscape" in document.get_module_list():
 460         return
 461
 462     i = 0
 463     while True:
 464         i = find_token(document.body, "\\begin_inset Flex Landscape", i)
 465         if i == -1:
 466             return
 467         j = find_end_of_inset(document.body, i)
 468         if j == -1:
 469             document.warning("Malformed LyX document: Can't find end of Landscape inset")
 470             i += 1
 471             continue
 472
 473         if document.body[i] == "\\begin_inset Flex Landscape (Floating)":
 474             document.body[j - 2 : j + 1] = put_cmd_in_ert("\\end{landscape}}")
 475             document.body[i : i + 4] = put_cmd_in_ert("\\afterpage{\\begin{landscape}")
 476             add_to_preamble(document, ["\\usepackage{afterpage}"])
 477         else:
 478             document.body[j - 2 : j + 1] = put_cmd_in_ert("\\end{landscape}")
 479             document.body[i : i + 4] = put_cmd_in_ert("\\begin{landscape}")
 480
 481         add_to_preamble(document, ["\\usepackage{pdflscape}"])
 482         # no need to reset i
 483
 484
 485 def convert_fontenc(document):
 486     " Convert default fontenc setting "
 487
 488     i = find_token(document.header, "\\fontencoding global", 0)
 489     if i == -1:
 490         return
 491
 492     document.header[i] = document.header[i].replace("global", "auto")
 493
 494
 495 def revert_fontenc(document):
 496     " Revert default fontenc setting "
 497
 498     i = find_token(document.header, "\\fontencoding auto", 0)
 499     if i == -1:
 500         return
 501
 502     document.header[i] = document.header[i].replace("auto", "global")
 503
 504
 505 def revert_nospellcheck(document):
 506     " Remove nospellcheck font info param "
 507
 508     i = 0
 509     while True:
 510         i = find_token(document.body, '\\nospellcheck', i)
 511         if i == -1:
 512             return
 513         del document.body[i]
 514
 515
 516 def revert_floatpclass(document):
 517     " Remove float placement params 'document' and 'class' "
 518
 519     i = 0
 520     i = find_token(document.header, "\\float_placement class", 0)
 521     if i != -1:
 522         del document.header[i]
 523
 524     i = 0
 525     while True:
 526         i = find_token(document.body, '\\begin_inset Float', i)
 527         if i == -1:
 528             break
 529         j = find_end_of_inset(document.body, i)
 530         k = find_token(document.body, 'placement class', i, i + 2)
 531         if k == -1:
 532             k = find_token(document.body, 'placement document', i, i + 2)
 533             if k != -1:
 534                 del document.body[k]
 535             i = j
 536             continue
 537         del document.body[k]
 538
 539
 540 def revert_floatalignment(document):
 541     " Remove float alignment params "
 542
 543     i = 0
 544     i = find_token(document.header, "\\float_alignment", 0)
 545     galignment = ""
 546     if i != -1:
 547         galignment = get_value(document.header, "\\float_alignment", i)
 548         del document.header[i]
 549
 550     i = 0
 551     while True:
 552         i = find_token(document.body, '\\begin_inset Float', i)
 553         if i == -1:
 554             break
 555         j = find_end_of_inset(document.body, i)
 556         if j == -1:
 557             document.warning("Malformed LyX document: Can't find end of inset at line " + str(i))
 558             i += 1
 559         k = find_token(document.body, 'alignment', i, i + 4)
 560         if k == -1:
 561             i = j
 562             continue
 563         alignment = get_value(document.body, "alignment", k)
 564         if alignment == "document":
 565             alignment = galignment
 566         del document.body[k]
 567         l = find_token(document.body, "\\begin_layout Plain Layout", i, j)
 568         if l == -1:
 569             document.warning("Can't find float layout!")
 570             i = j
 571             continue
 572         alcmd = []
 573         if alignment == "left":
 574             alcmd = put_cmd_in_ert("\\raggedright{}")
 575         elif alignment == "center":
 576             alcmd = put_cmd_in_ert("\\centering{}")
 577         elif alignment == "right":
 578             alcmd = put_cmd_in_ert("\\raggedleft{}")
 579         if len(alcmd) > 0:
 580             document.body[l+1:l+1] = alcmd
 581         i = j
 582
 583
 584 def revert_tuftecite(document):
 585     " Revert \cite commands in tufte classes "
 586
 587     tufte = ["tufte-book", "tufte-handout"]
 588     if document.textclass not in tufte:
 589         return
 590
 591     i = 0
 592     while (True):
 593         i = find_token(document.body, "\\begin_inset CommandInset citation", i)
 594         if i == -1:
 595             break
 596         j = find_end_of_inset(document.body, i)
 597         if j == -1:
 598             document.warning("Can't find end of citation inset at line %d!!" %(i))
 599             i += 1
 600             continue
 601         k = find_token(document.body, "LatexCommand", i, j)
 602         if k == -1:
 603             document.warning("Can't find LatexCommand for citation inset at line %d!" %(i))
 604             i = j + 1
 605             continue
 606         cmd = get_value(document.body, "LatexCommand", k)
 607         if cmd != "cite":
 608             i = j + 1
 609             continue
 610         pre = get_quoted_value(document.body, "before", i, j)
 611         post = get_quoted_value(document.body, "after", i, j)
 612         key = get_quoted_value(document.body, "key", i, j)
 613         if not key:
 614             document.warning("Citation inset at line %d does not have a key!" %(i))
 615             key = "???"
 616         # Replace command with ERT
 617         res = "\\cite"
 618         if pre:
 619             res += "[" + pre + "]"
 620         if post:
 621             res += "[" + post + "]"
 622         elif pre:
 623             res += "[]"
 624         res += "{" + key + "}"
 625         document.body[i:j+1] = put_cmd_in_ert([res])
 626         i = j + 1
 627
 628
 629 def revert_stretchcolumn(document):
 630     " We remove the column varwidth flags or everything else will become a mess. "
 631     i = 0
 632     while True:
 633         i = find_token(document.body, "\\begin_inset Tabular", i)
 634         if i == -1:
 635             return
 636         j = find_end_of_inset(document.body, i + 1)
 637         if j == -1:
 638             document.warning("Malformed LyX document: Could not find end of tabular.")
 639             continue
 640         for k in range(i, j):
 641             if re.search('^<column.*varwidth="[^"]+".*>$', document.body[k]):
 642                 document.warning("Converting 'tabularx'/'xltabular' table to normal table.")
 643                 document.body[k] = document.body[k].replace(' varwidth="true"', '')
 644         i = i + 1
 645
 646
 647 def revert_vcolumns(document):
 648     " Revert standard columns with line breaks etc. "
 649     i = 0
 650     needvarwidth = False
 651     needarray = False
 652     try:
 653         while True:
 654             i = find_token(document.body, "\\begin_inset Tabular", i)
 655             if i == -1:
 656                 return
 657             j = find_end_of_inset(document.body, i)
 658             if j == -1:
 659                 document.warning("Malformed LyX document: Could not find end of tabular.")
 660                 i += 1
 661                 continue
 662
 663             # Collect necessary column information
 664             m = i + 1
 665             nrows = int(document.body[i+1].split('"')[3])
 666             ncols = int(document.body[i+1].split('"')[5])
 667             col_info = []
 668             for k in range(ncols):
 669                 m = find_token(document.body, "<column", m)
 670                 width = get_option_value(document.body[m], 'width')
 671                 varwidth = get_option_value(document.body[m], 'varwidth')
 672                 alignment = get_option_value(document.body[m], 'alignment')
 673                 special = get_option_value(document.body[m], 'special')
 674                 col_info.append([width, varwidth, alignment, special, m])
 675
 676             # Now parse cells
 677             m = i + 1
 678             lines = []
 679             for row in range(nrows):
 680                 for col in range(ncols):
 681                     m = find_token(document.body, "<cell", m)
 682                     multicolumn = get_option_value(document.body[m], 'multicolumn')
 683                     multirow = get_option_value(document.body[m], 'multirow')
 684                     width = get_option_value(document.body[m], 'width')
 685                     rotate = get_option_value(document.body[m], 'rotate')
 686                     # Check for: linebreaks, multipars, non-standard environments
 687                     begcell = m
 688                     endcell = find_token(document.body, "</cell>", begcell)
 689                     vcand = False
 690                     if find_token(document.body, "\\begin_inset Newline", begcell, endcell) != -1:
 691                         vcand = True
 692                     elif count_pars_in_inset(document.body, begcell + 2) > 1:
 693                         vcand = True
 694                     elif get_value(document.body, "\\begin_layout", begcell) != "Plain Layout":
 695                         vcand = True
 696                     if vcand and rotate == "" and ((multicolumn == "" and multirow == "") or width == ""):
 697                         if col_info[col][0] == "" and col_info[col][1] == "" and col_info[col][3] == "":
 698                             needvarwidth = True
 699                             alignment = col_info[col][2]
 700                             col_line = col_info[col][4]
 701                             vval = ""
 702                             if alignment == "center":
 703                                 vval = ">{\\centering}"
 704                             elif  alignment == "left":
 705                                 vval = ">{\\raggedright}"
 706                             elif alignment == "right":
 707                                 vval = ">{\\raggedleft}"
 708                             if vval != "":
 709                                 needarray = True
 710                             vval += "V{\\linewidth}"
 711
 712                             document.body[col_line] = document.body[col_line][:-1] + " special=\"" + vval + "\">"
 713                             # ERT newlines and linebreaks (since LyX < 2.4 automatically inserts parboxes
 714                             # with newlines, and we do not want that)
 715                             while True:
 716                                 endcell = find_token(document.body, "</cell>", begcell)
 717                                 linebreak = False
 718                                 nl = find_token(document.body, "\\begin_inset Newline newline", begcell, endcell)
 719                                 if nl == -1:
 720                                     nl = find_token(document.body, "\\begin_inset Newline linebreak", begcell, endcell)
 721                                     if nl == -1:
 722                                          break
 723                                     linebreak = True
 724                                 nle = find_end_of_inset(document.body, nl)
 725                                 del(document.body[nle:nle+1])
 726                                 if linebreak:
 727                                     document.body[nl:nl+1] = put_cmd_in_ert("\\linebreak{}")
 728                                 else:
 729                                     document.body[nl:nl+1] = put_cmd_in_ert("\\\\")
 730                     m += 1
 731
 732             i = j + 1
 733
 734     finally:
 735         if needarray == True:
 736             add_to_preamble(document, ["\\usepackage{array}"])
 737         if needvarwidth == True:
 738             add_to_preamble(document, ["\\usepackage{varwidth}"])
 739
 740
 741 def revert_bibencoding(document):
 742     " Revert bibliography encoding "
 743
 744     # Get cite engine
 745     engine = "basic"
 746     i = find_token(document.header, "\\cite_engine", 0)
 747     if i == -1:
 748         document.warning("Malformed document! Missing \\cite_engine")
 749     else:
 750         engine = get_value(document.header, "\\cite_engine", i)
 751
 752     # Check if biblatex
 753     biblatex = False
 754     if engine in ["biblatex", "biblatex-natbib"]:
 755         biblatex = True
 756
 757     # Map lyx to latex encoding names
 758     encodings = {
 759         "utf8" : "utf8",
 760         "utf8x" : "utf8x",
 761         "armscii8" : "armscii8",
 762         "iso8859-1" : "latin1",
 763         "iso8859-2" : "latin2",
 764         "iso8859-3" : "latin3",
 765         "iso8859-4" : "latin4",
 766         "iso8859-5" : "iso88595",
 767         "iso8859-6" : "8859-6",
 768         "iso8859-7" : "iso-8859-7",
 769         "iso8859-8" : "8859-8",
 770         "iso8859-9" : "latin5",
 771         "iso8859-13" : "latin7",
 772         "iso8859-15" : "latin9",
 773         "iso8859-16" : "latin10",
 774         "applemac" : "applemac",
 775         "cp437" : "cp437",
 776         "cp437de" : "cp437de",
 777         "cp850" : "cp850",
 778         "cp852" : "cp852",
 779         "cp855" : "cp855",
 780         "cp858" : "cp858",
 781         "cp862" : "cp862",
 782         "cp865" : "cp865",
 783         "cp866" : "cp866",
 784         "cp1250" : "cp1250",
 785         "cp1251" : "cp1251",
 786         "cp1252" : "cp1252",
 787         "cp1255" : "cp1255",
 788         "cp1256" : "cp1256",
 789         "cp1257" : "cp1257",
 790         "koi8-r" : "koi8-r",
 791         "koi8-u" : "koi8-u",
 792         "pt154" : "pt154",
 793         "utf8-platex" : "utf8",
 794         "ascii" : "ascii"
 795     }
 796
 797     i = 0
 798     bibresources = []
 799     while (True):
 800         i = find_token(document.body, "\\begin_inset CommandInset bibtex", i)
 801         if i == -1:
 802             break
 803         j = find_end_of_inset(document.body, i)
 804         if j == -1:
 805             document.warning("Can't find end of bibtex inset at line %d!!" %(i))
 806             i += 1
 807             continue
 808         encoding = get_quoted_value(document.body, "encoding", i, j)
 809         if not encoding:
 810             i += 1
 811             continue
 812         # remove encoding line
 813         k = find_token(document.body, "encoding", i, j)
 814         if k != -1:
 815             del document.body[k]
 816         # Re-find inset end line
 817         j = find_end_of_inset(document.body, i)
 818         if biblatex:
 819             biblio_options = ""
 820             h = find_token(document.header, "\\biblio_options", 0)
 821             if h != -1:
 822                 biblio_options = get_value(document.header, "\\biblio_options", h)
 823                 if not "bibencoding" in biblio_options:
 824                      document.header[h] += ",bibencoding=%s" % encodings[encoding]
 825             else:
 826                 bs = find_token(document.header, "\\biblatex_bibstyle", 0)
 827                 if bs == -1:
 828                     # this should not happen
 829                     document.warning("Malformed LyX document! No \\biblatex_bibstyle header found!")
 830                 else:
 831                     document.header[bs-1 : bs-1] = ["\\biblio_options bibencoding=" + encodings[encoding]]
 832         else:
 833             document.body[j+1:j+1] = put_cmd_in_ert("\\egroup")
 834             document.body[i:i] = put_cmd_in_ert("\\bgroup\\inputencoding{" + encodings[encoding] + "}")
 835
 836         i = j + 1
 837
 838
 839
 840 def convert_vcsinfo(document):
 841     " Separate vcs Info inset from buffer Info inset. "
 842
 843     types = {
 844         "vcs-revision" : "revision",
 845         "vcs-tree-revision" : "tree-revision",
 846         "vcs-author" : "author",
 847         "vcs-time" : "time",
 848         "vcs-date" : "date"
 849     }
 850     i = 0
 851     while True:
 852         i = find_token(document.body, "\\begin_inset Info", i)
 853         if i == -1:
 854             return
 855         j = find_end_of_inset(document.body, i + 1)
 856         if j == -1:
 857             document.warning("Malformed LyX document: Could not find end of Info inset.")
 858             i = i + 1
 859             continue
 860         tp = find_token(document.body, 'type', i, j)
 861         tpv = get_quoted_value(document.body, "type", tp)
 862         if tpv != "buffer":
 863             i = i + 1
 864             continue
 865         arg = find_token(document.body, 'arg', i, j)
 866         argv = get_quoted_value(document.body, "arg", arg)
 867         if argv not in list(types.keys()):
 868             i = i + 1
 869             continue
 870         document.body[tp] = "type \"vcs\""
 871         document.body[arg] = "arg \"" + types[argv] + "\""
 872         i = i + 1
 873
 874
 875 def revert_vcsinfo(document):
 876     " Merge vcs Info inset to buffer Info inset. "
 877
 878     args = ["revision", "tree-revision", "author", "time", "date" ]
 879     i = 0
 880     while True:
 881         i = find_token(document.body, "\\begin_inset Info", i)
 882         if i == -1:
 883             return
 884         j = find_end_of_inset(document.body, i + 1)
 885         if j == -1:
 886             document.warning("Malformed LyX document: Could not find end of Info inset.")
 887             i = i + 1
 888             continue
 889         tp = find_token(document.body, 'type', i, j)
 890         tpv = get_quoted_value(document.body, "type", tp)
 891         if tpv != "vcs":
 892             i = i + 1
 893             continue
 894         arg = find_token(document.body, 'arg', i, j)
 895         argv = get_quoted_value(document.body, "arg", arg)
 896         if argv not in args:
 897             document.warning("Malformed Info inset. Invalid vcs arg.")
 898             i = i + 1
 899             continue
 900         document.body[tp] = "type \"buffer\""
 901         document.body[arg] = "arg \"vcs-" + argv + "\""
 902         i = i + 1
 903
 904
 905 def revert_dateinfo(document):
 906     " Revert date info insets to static text. "
 907
 908 # FIXME This currently only considers the main language and uses the system locale
 909 # Ideally, it should honor context languages and switch the locale accordingly.
 910
 911     # The date formats for each language using strftime syntax:
 912     # long, short, loclong, locmedium, locshort
 913     dateformats = {
 914         "afrikaans" : ["%A, %d %B %Y", "%Y-%m-%d", "%d %B %Y", "%d %b %Y", "%Y/%m/%d"],
 915         "albanian" : ["%A, %d %B %Y", "%d.%m.%y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
 916         "american" : ["%A, %B %d, %Y", "%m/%d/%y", "%B %d, %Y", "%b %d, %Y", "%m/%d/%Y"],
 917         "amharic" : ["%A ፣%d %B %Y", "%d/%m/%Y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
 918         "ancientgreek" : ["%A, %d %B %Y", "%d %b %Y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
 919         "arabic_arabi" : ["%A، %d %B، %Y", "%d‏/%m‏/%Y", "%d %B، %Y", "%d/%m/%Y", "%d/%m/%Y"],
 920         "arabic_arabtex" : ["%A، %d %B، %Y", "%d‏/%m‏/%Y", "%d %B، %Y", "%d/%m/%Y", "%d/%m/%Y"],
 921         "armenian" : ["%Y թ. %B %d, %A", "%d.%m.%y", "%d %B، %Y", "%d %b، %Y", "%d/%m/%Y"],
 922         "asturian" : ["%A, %d %B de %Y", "%d/%m/%y", "%d de %B de %Y", "%d %b %Y", "%d/%m/%Y"],
 923         "australian" : ["%A, %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
 924         "austrian" : ["%A, %d. %B %Y", "%d.%m.%y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
 925         "bahasa" : ["%A, %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
 926         "bahasam" : ["%A, %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
 927         "basque" : ["%Y(e)ko %B %d, %A", "%y/%m/%d", "%Y %B %d", "%Y %b %d", "%Y/%m/%d"],
 928         "belarusian" : ["%A, %d %B %Y г.", "%d.%m.%y", "%d %B %Y", "%d %b %Y", "%d.%m.%Y"],
 929         "bosnian" : ["%A, %d. %B %Y.", "%d.%m.%y.", "%d. %B %Y", "%d. %b %Y", "%Y-%m-%d"],
 930         "brazilian" : ["%A, %d de %B de %Y", "%d/%m/%Y", "%d de %B de %Y", "%d de %b de %Y", "%d/%m/%Y"],
 931         "breton" : ["%Y %B %d, %A", "%Y-%m-%d", "%d %B %Y", "%d %b %Y", "%Y-%m-%d"],
 932         "british" : ["%A, %d %B %Y", "%d/%m/%Y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
 933         "bulgarian" : ["%A, %d %B %Y г.", "%d.%m.%y г.", "%d %B %Y", "%d %b %Y", "%Y-%m-%d"],
 934         "canadian" : ["%A, %B %d, %Y", "%Y-%m-%d", "%B %d, %Y", "%d %b %Y", "%Y-%m-%d"],
 935         "canadien" : ["%A %d %B %Y", "%y-%m-%d", "%d %B %Y", "%d %b %Y", "%Y-%m-%d"],
 936         "catalan" : ["%A, %d %B de %Y", "%d/%m/%y", "%d / %B / %Y", "%d / %b / %Y", "%d/%m/%Y"],
 937         "chinese-simplified" : ["%Y年%m月%d日%A", "%Y/%m/%d", "%Y年%m月%d日", "%Y-%m-%d", "%y-%m-%d"],
 938         "chinese-traditional" : ["%Y年%m月%d日 %A", "%Y/%m/%d", "%Y年%m月%d日", "%Y年%m月%d日", "%y年%m月%d日"],
 939         "coptic" : ["%A, %d %B %Y", "%d %b %Y", "%B %d, %Y", "%b %d, %Y", "%m/%d/%Y"],
 940         "croatian" : ["%A, %d. %B %Y.", "%d. %m. %Y.", "%d. %B %Y.", "%d. %b. %Y.", "%d.%m.%Y."],
 941         "czech" : ["%A %d. %B %Y", "%d.%m.%y", "%d. %B %Y", "%d. %b. %Y", "%d.%m.%Y"],
 942         "danish" : ["%A den %d. %B %Y", "%d/%m/%Y", "%d. %B %Y", "%d. %b %Y", "%d/%m/%Y"],
 943         "divehi" : ["%Y %B %d, %A", "%Y-%m-%d", "%Y %B %d", "%Y %b %d", "%d/%m/%Y"],
 944         "dutch" : ["%A %d %B %Y", "%d-%m-%y", "%d %B %Y", "%d %b %Y", "%d-%m-%Y"],
 945         "english" : ["%A, %B %d, %Y", "%m/%d/%y", "%B %d, %Y", "%b %d, %Y", "%m/%d/%Y"],
 946         "esperanto" : ["%A, %d %B %Y", "%d %b %Y", "la %d de %B %Y", "la %d de %b %Y", "%m/%d/%Y"],
 947         "estonian" : ["%A, %d. %B %Y", "%d.%m.%y", "%d %B %Y", "%d %b %Y", "%d.%m.%Y"],
 948         "farsi" : ["%A %d %B %Y", "%Y/%m/%d", "%d %B %Y", "%d %b %Y", "%Y/%m/%d"],
 949         "finnish" : ["%A %d. %B %Y", "%d.%m.%Y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
 950         "french" : ["%A %d %B %Y", "%d/%m/%Y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
 951         "friulan" : ["%A %d di %B dal %Y", "%d/%m/%y", "%d di %B dal %Y", "%d di %b dal %Y", "%d/%m/%Y"],
 952         "galician" : ["%A, %d de %B de %Y", "%d/%m/%y", "%d de %B de %Y", "%d de %b de %Y", "%d/%m/%Y"],
 953         "georgian" : ["%A, %d %B, %Y", "%d.%m.%y", "%B %d, %Y", "%b %d, %Y", "%m/%d/%Y"],
 954         "german" : ["%A, %d. %B %Y", "%d.%m.%y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
 955         "german-ch" : ["%A, %d. %B %Y", "%d.%m.%y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
 956         "german-ch-old" : ["%A, %d. %B %Y", "%d.%m.%y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
 957         "greek" : ["%A, %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
 958         "hebrew" : ["%A, %d ב%B %Y", "%d.%m.%Y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
 959         "hindi" : ["%A, %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d-%m-%Y"],
 960         "icelandic" : ["%A, %d. %B %Y", "%d.%m.%Y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
 961         "interlingua" : ["%Y %B %d, %A", "%Y-%m-%d", "le %d de %B %Y", "le %d de %b %Y", "%Y-%m-%d"],
 962         "irish" : ["%A %d %B %Y", "%d/%m/%Y", "%d. %B %Y", "%d. %b %Y", "%d/%m/%Y"],
 963         "italian" : ["%A %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d/%b/%Y", "%d/%m/%Y"],
 964         "japanese" : ["%Y年%m月%d日%A", "%Y/%m/%d", "%Y年%m月%d日", "%Y/%m/%d", "%y/%m/%d"],
 965         "japanese-cjk" : ["%Y年%m月%d日%A", "%Y/%m/%d", "%Y年%m月%d日", "%Y/%m/%d", "%y/%m/%d"],
 966         "kannada" : ["%A, %B %d, %Y", "%d/%m/%y", "%d %B %Y", "%d %B %Y", "%d-%m-%Y"],
 967         "kazakh" : ["%Y ж. %d %B, %A", "%d.%m.%y", "%d %B %Y", "%d %B %Y", "%Y-%d-%m"],
 968         "khmer" : ["%A %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %B %Y", "%d/%m/%Y"],
 969         "korean" : ["%Y년 %m월 %d일 %A", "%y. %m. %d.", "%Y년 %m월 %d일", "%Y. %m. %d.", "%y. %m. %d."],
 970         "kurmanji" : ["%A, %d %B %Y", "%d %b %Y", "%d. %B %Y", "%d. %m. %Y", "%Y-%m-%d"],
 971         "lao" : ["%A ທີ %d %B %Y", "%d/%m/%Y", "%d %B %Y", "%d %B %Y", "%d/%m/%Y"],
 972         "latin" : ["%A, %d %B %Y", "%d %b %Y", "%B %d, %Y", "%b %d, %Y", "%m/%d/%Y"],
 973         "latvian" : ["%A, %Y. gada %d. %B", "%d.%m.%y", "%Y. gada %d. %B", "%Y. gada %d. %b", "%d.%m.%Y"],
 974         "lithuanian" : ["%Y m. %B %d d., %A", "%Y-%m-%d", "%Y m. %B %d d.", "%Y m. %B %d d.", "%Y-%m-%d"],
 975         "lowersorbian" : ["%A, %d. %B %Y", "%d.%m.%y", "%d %B %Y", "%d %b %Y", "%d.%m.%Y"],
 976         "macedonian" : ["%A, %d %B %Y", "%d.%m.%y", "%d %B %Y", "%d %b %Y", "%d.%m.%Y"],
 977         "magyar" : ["%Y. %B %d., %A", "%Y. %m. %d.", "%Y. %B %d.", "%Y. %b %d.", "%Y.%m.%d."],
 978         "marathi" : ["%A, %d %B, %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d-%m-%Y"],
 979         "mongolian" : ["%A, %Y оны %m сарын %d", "%Y-%m-%d", "%Y оны %m сарын %d", "%d-%m-%Y", "%d-%m-%Y"],
 980         "naustrian" : ["%A, %d. %B %Y", "%d.%m.%y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
 981         "newzealand" : ["%A, %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
 982         "ngerman" : ["%A, %d. %B %Y", "%d.%m.%y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
 983         "norsk" : ["%A %d. %B %Y", "%d.%m.%Y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
 984         "nynorsk" : ["%A %d. %B %Y", "%d.%m.%Y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
 985         "occitan" : ["%Y %B %d, %A", "%Y-%m-%d", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
 986         "piedmontese" : ["%A, %d %B %Y", "%d %b %Y", "%B %d, %Y", "%b %d, %Y", "%m/%d/%Y"],
 987         "polish" : ["%A, %d %B %Y", "%d.%m.%Y", "%d %B %Y", "%d %b %Y", "%Y-%m-%d"],
 988         "polutonikogreek" : ["%A, %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
 989         "portuguese" : ["%A, %d de %B de %Y", "%d/%m/%y", "%d de %B de %Y", "%d de %b de %Y", "%Y/%m/%d"],
 990         "romanian" : ["%A, %d %B %Y", "%d.%m.%Y", "%d %B %Y", "%d %b %Y", "%d.%m.%Y"],
 991         "romansh" : ["%A, ils %d da %B %Y", "%d-%m-%y", "%d %B %Y", "%d %b %Y", "%d.%m.%Y"],
 992         "russian" : ["%A, %d %B %Y г.", "%d.%m.%Y", "%d %B %Y г.", "%d %b %Y г.", "%d.%m.%Y"],
 993         "samin" : ["%Y %B %d, %A", "%Y-%m-%d", "%B %d. b. %Y", "%b %d. b. %Y", "%d.%m.%Y"],
 994         "sanskrit" : ["%Y %B %d, %A", "%Y-%m-%d", "%d %B %Y", "%d %b %Y", "%d-%m-%Y"],
 995         "scottish" : ["%A, %dmh %B %Y", "%d/%m/%Y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
 996         "serbian" : ["%A, %d. %B %Y.", "%d.%m.%y.", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
 997         "serbian-latin" : ["%A, %d. %B %Y.", "%d.%m.%y.", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
 998         "slovak" : ["%A, %d. %B %Y", "%d. %m. %Y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
 999         "slovene" : ["%A, %d. %B %Y", "%d. %m. %y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
1000         "spanish" : ["%A, %d de %B de %Y", "%d/%m/%y", "%d de %B %de %Y", "%d %b %Y", "%d/%m/%Y"],
1001         "spanish-mexico" : ["%A, %d de %B %de %Y", "%d/%m/%y", "%d de %B de %Y", "%d %b %Y", "%d/%m/%Y"],
1002         "swedish" : ["%A %d %B %Y", "%Y-%m-%d", "%d %B %Y", "%d %b %Y", "%Y-%m-%d"],
1003         "syriac" : ["%Y %B %d, %A", "%Y-%m-%d", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
1004         "tamil" : ["%A, %d %B, %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d-%m-%Y"],
1005         "telugu" : ["%d, %B %Y, %A", "%d-%m-%y", "%d %B %Y", "%d %b %Y", "%d-%m-%Y"],
1006         "thai" : ["%Aที่ %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
1007         "tibetan" : ["%Y %Bའི་ཚེས་%d, %A", "%Y-%m-%d", "%B %d, %Y", "%b %d, %Y", "%m/%d/%Y"],
1008         "turkish" : ["%d %B %Y %A", "%d.%m.%Y", "%d %B %Y", "%d.%b.%Y", "%d.%m.%Y"],
1009         "turkmen" : ["%d %B %Y %A", "%d.%m.%Y", "%Y ý. %B %d", "%d.%m.%Y ý.", "%d.%m.%y ý."],
1010         "ukrainian" : ["%A, %d %B %Y р.", "%d.%m.%y", "%d %B %Y", "%d %m %Y", "%d.%m.%Y"],
1011         "uppersorbian" : ["%A, %d. %B %Y", "%d.%m.%y", "%d %B %Y", "%d %b %Y", "%d.%m.%Y"],
1012         "urdu" : ["%A، %d %B، %Y", "%d/%m/%y", "%d %B, %Y", "%d %b %Y", "%d/%m/%Y"],
1013         "vietnamese" : ["%A, %d %B, %Y", "%d/%m/%Y", "%d tháng %B %Y", "%d-%m-%Y", "%d/%m/%Y"],
1014         "welsh" : ["%A, %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
1015     }
1016
1017     types = ["date", "fixdate", "moddate" ]
1018     i = 0
1019     i = find_token(document.header, "\\language", 0)
1020     if i == -1:
1021         # this should not happen
1022         document.warning("Malformed LyX document! No \\language header found!")
1023         return
1024     lang = get_value(document.header, "\\language", i)
1025
1026     i = 0
1027     while True:
1028         i = find_token(document.body, "\\begin_inset Info", i)
1029         if i == -1:
1030             return
1031         j = find_end_of_inset(document.body, i + 1)
1032         if j == -1:
1033             document.warning("Malformed LyX document: Could not find end of Info inset.")
1034             i = i + 1
1035             continue
1036         tp = find_token(document.body, 'type', i, j)
1037         tpv = get_quoted_value(document.body, "type", tp)
1038         if tpv not in types:
1039             i = i + 1
1040             continue
1041         arg = find_token(document.body, 'arg', i, j)
1042         argv = get_quoted_value(document.body, "arg", arg)
1043         isodate = ""
1044         dte = date.today()
1045         if tpv == "fixdate":
1046             datecomps = argv.split('@')
1047             if len(datecomps) > 1:
1048                 argv = datecomps[0]
1049                 isodate = datecomps[1]
1050                 m = re.search('(\d\d\d\d)-(\d\d)-(\d\d)', isodate)
1051                 if m:
1052                     dte = date(int(m.group(1)), int(m.group(2)), int(m.group(3)))
1053 # FIXME if we had the path to the original document (not the one in the tmp dir),
1054 #        we could use the mtime.
1055 #        elif tpv == "moddate":
1056 #            dte = date.fromtimestamp(os.path.getmtime(document.dir))
1057         result = ""
1058         if argv == "ISO":
1059             result = dte.isodate()
1060         elif argv == "long":
1061             result = dte.strftime(dateformats[lang][0])
1062         elif argv == "short":
1063             result = dte.strftime(dateformats[lang][1])
1064         elif argv == "loclong":
1065             result = dte.strftime(dateformats[lang][2])
1066         elif argv == "locmedium":
1067             result = dte.strftime(dateformats[lang][3])
1068         elif argv == "locshort":
1069             result = dte.strftime(dateformats[lang][4])
1070         else:
1071             fmt = argv.replace("MMMM", "%b").replace("MMM", "%b").replace("MM", "%m").replace("M", "%m")
1072             fmt = fmt.replace("yyyy", "%Y").replace("yy", "%y")
1073             fmt = fmt.replace("dddd", "%A").replace("ddd", "%a").replace("dd", "%d")
1074             fmt = re.sub('[^\'%]d', '%d', fmt)
1075             fmt = fmt.replace("'", "")
1076             result = dte.strftime(fmt)
1077         document.body[i : j+1] = result
1078         i = i + 1
1079
1080
1081 def revert_timeinfo(document):
1082     " Revert time info insets to static text. "
1083
1084 # FIXME This currently only considers the main language and uses the system locale
1085 # Ideally, it should honor context languages and switch the locale accordingly.
1086 # Also, the time object is "naive", i.e., it does not know of timezones (%Z will
1087 # be empty).
1088
1089     # The time formats for each language using strftime syntax:
1090     # long, short
1091     timeformats = {
1092         "afrikaans" : ["%H:%M:%S %Z", "%H:%M"],
1093         "albanian" : ["%I:%M:%S %p, %Z", "%I:%M %p"],
1094         "american" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1095         "amharic" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1096         "ancientgreek" : ["%H:%M:%S %Z", "%H:%M:%S"],
1097         "arabic_arabi" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1098         "arabic_arabtex" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1099         "armenian" : ["%H:%M:%S %Z", "%H:%M"],
1100         "asturian" : ["%H:%M:%S %Z", "%H:%M"],
1101         "australian" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1102         "austrian" : ["%H:%M:%S %Z", "%H:%M"],
1103         "bahasa" : ["%H.%M.%S %Z", "%H.%M"],
1104         "bahasam" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1105         "basque" : ["%H:%M:%S (%Z)", "%H:%M"],
1106         "belarusian" : ["%H:%M:%S, %Z", "%H:%M"],
1107         "bosnian" : ["%H:%M:%S %Z", "%H:%M"],
1108         "brazilian" : ["%H:%M:%S %Z", "%H:%M"],
1109         "breton" : ["%H:%M:%S %Z", "%H:%M"],
1110         "british" : ["%H:%M:%S %Z", "%H:%M"],
1111         "bulgarian" : ["%H:%M:%S %Z", "%H:%M"],
1112         "canadian" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1113         "canadien" : ["%H:%M:%S %Z", "%H h %M"],
1114         "catalan" : ["%H:%M:%S %Z", "%H:%M"],
1115         "chinese-simplified" : ["%Z %p%I:%M:%S", "%p%I:%M"],
1116         "chinese-traditional" : ["%p%I:%M:%S [%Z]", "%p%I:%M"],
1117         "coptic" : ["%H:%M:%S %Z", "%H:%M:%S"],
1118         "croatian" : ["%H:%M:%S (%Z)", "%H:%M"],
1119         "czech" : ["%H:%M:%S %Z", "%H:%M"],
1120         "danish" : ["%H.%M.%S %Z", "%H.%M"],
1121         "divehi" : ["%H:%M:%S %Z", "%H:%M"],
1122         "dutch" : ["%H:%M:%S %Z", "%H:%M"],
1123         "english" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1124         "esperanto" : ["%H:%M:%S %Z", "%H:%M:%S"],
1125         "estonian" : ["%H:%M:%S %Z", "%H:%M"],
1126         "farsi" : ["%H:%M:%S (%Z)", "%H:%M"],
1127         "finnish" : ["%H.%M.%S %Z", "%H.%M"],
1128         "french" : ["%H:%M:%S %Z", "%H:%M"],
1129         "friulan" : ["%H:%M:%S %Z", "%H:%M"],
1130         "galician" : ["%H:%M:%S %Z", "%H:%M"],
1131         "georgian" : ["%H:%M:%S %Z", "%H:%M"],
1132         "german" : ["%H:%M:%S %Z", "%H:%M"],
1133         "german-ch" : ["%H:%M:%S %Z", "%H:%M"],
1134         "german-ch-old" : ["%H:%M:%S %Z", "%H:%M"],
1135         "greek" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1136         "hebrew" : ["%H:%M:%S %Z", "%H:%M"],
1137         "hindi" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1138         "icelandic" : ["%H:%M:%S %Z", "%H:%M"],
1139         "interlingua" : ["%H:%M:%S %Z", "%H:%M"],
1140         "irish" : ["%H:%M:%S %Z", "%H:%M"],
1141         "italian" : ["%H:%M:%S %Z", "%H:%M"],
1142         "japanese" : ["%H時%M分%S秒 %Z", "%H:%M"],
1143         "japanese-cjk" : ["%H時%M分%S秒 %Z", "%H:%M"],
1144         "kannada" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1145         "kazakh" : ["%H:%M:%S %Z", "%H:%M"],
1146         "khmer" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1147         "korean" : ["%p %I시%M분 %S초 %Z", "%p %I:%M"],
1148         "kurmanji" : ["%H:%M:%S %Z", "%H:%M:%S"],
1149         "lao" : ["%H ໂມງ%M ນາທີ  %S ວິນາທີ %Z", "%H:%M"],
1150         "latin" : ["%H:%M:%S %Z", "%H:%M:%S"],
1151         "latvian" : ["%H:%M:%S %Z", "%H:%M"],
1152         "lithuanian" : ["%H:%M:%S %Z", "%H:%M"],
1153         "lowersorbian" : ["%H:%M:%S %Z", "%H:%M"],
1154         "macedonian" : ["%H:%M:%S %Z", "%H:%M"],
1155         "magyar" : ["%H:%M:%S %Z", "%H:%M"],
1156         "marathi" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1157         "mongolian" : ["%H:%M:%S %Z", "%H:%M"],
1158         "naustrian" : ["%H:%M:%S %Z", "%H:%M"],
1159         "newzealand" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1160         "ngerman" : ["%H:%M:%S %Z", "%H:%M"],
1161         "norsk" : ["%H:%M:%S %Z", "%H:%M"],
1162         "nynorsk" : ["kl. %H:%M:%S %Z", "%H:%M"],
1163         "occitan" : ["%H:%M:%S %Z", "%H:%M"],
1164         "piedmontese" : ["%H:%M:%S %Z", "%H:%M:%S"],
1165         "polish" : ["%H:%M:%S %Z", "%H:%M"],
1166         "polutonikogreek" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1167         "portuguese" : ["%H:%M:%S %Z", "%H:%M"],
1168         "romanian" : ["%H:%M:%S %Z", "%H:%M"],
1169         "romansh" : ["%H:%M:%S %Z", "%H:%M"],
1170         "russian" : ["%H:%M:%S %Z", "%H:%M"],
1171         "samin" : ["%H:%M:%S %Z", "%H:%M"],
1172         "sanskrit" : ["%H:%M:%S %Z", "%H:%M"],
1173         "scottish" : ["%H:%M:%S %Z", "%H:%M"],
1174         "serbian" : ["%H:%M:%S %Z", "%H:%M"],
1175         "serbian-latin" : ["%H:%M:%S %Z", "%H:%M"],
1176         "slovak" : ["%H:%M:%S %Z", "%H:%M"],
1177         "slovene" : ["%H:%M:%S %Z", "%H:%M"],
1178         "spanish" : ["%H:%M:%S (%Z)", "%H:%M"],
1179         "spanish-mexico" : ["%H:%M:%S %Z", "%H:%M"],
1180         "swedish" : ["kl. %H:%M:%S %Z", "%H:%M"],
1181         "syriac" : ["%H:%M:%S %Z", "%H:%M"],
1182         "tamil" : ["%p %I:%M:%S %Z", "%p %I:%M"],
1183         "telugu" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1184         "thai" : ["%H นาฬิกา %M นาที  %S วินาที %Z", "%H:%M"],
1185         "tibetan" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1186         "turkish" : ["%H:%M:%S %Z", "%H:%M"],
1187         "turkmen" : ["%H:%M:%S %Z", "%H:%M"],
1188         "ukrainian" : ["%H:%M:%S %Z", "%H:%M"],
1189         "uppersorbian" : ["%H:%M:%S %Z", "%H:%M hodź."],
1190         "urdu" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1191         "vietnamese" : ["%H:%M:%S %Z", "%H:%M"],
1192         "welsh" : ["%H:%M:%S %Z", "%H:%M"]
1193     }
1194
1195     types = ["time", "fixtime", "modtime" ]
1196     i = 0
1197     i = find_token(document.header, "\\language", 0)
1198     if i == -1:
1199         # this should not happen
1200         document.warning("Malformed LyX document! No \\language header found!")
1201         return
1202     lang = get_value(document.header, "\\language", i)
1203
1204     i = 0
1205     while True:
1206         i = find_token(document.body, "\\begin_inset Info", i)
1207         if i == -1:
1208             return
1209         j = find_end_of_inset(document.body, i + 1)
1210         if j == -1:
1211             document.warning("Malformed LyX document: Could not find end of Info inset.")
1212             i = i + 1
1213             continue
1214         tp = find_token(document.body, 'type', i, j)
1215         tpv = get_quoted_value(document.body, "type", tp)
1216         if tpv not in types:
1217             i = i + 1
1218             continue
1219         arg = find_token(document.body, 'arg', i, j)
1220         argv = get_quoted_value(document.body, "arg", arg)
1221         isotime = ""
1222         dtme = datetime.now()
1223         tme = dtme.time()
1224         if tpv == "fixtime":
1225             timecomps = argv.split('@')
1226             if len(timecomps) > 1:
1227                 argv = timecomps[0]
1228                 isotime = timecomps[1]
1229                 m = re.search('(\d\d):(\d\d):(\d\d)', isotime)
1230                 if m:
1231                     tme = time(int(m.group(1)), int(m.group(2)), int(m.group(3)))
1232                 else:
1233                     m = re.search('(\d\d):(\d\d)', isotime)
1234                     if m:
1235                         tme = time(int(m.group(1)), int(m.group(2)))
1236 # FIXME if we had the path to the original document (not the one in the tmp dir),
1237 #        we could use the mtime.
1238 #        elif tpv == "moddate":
1239 #            dte = date.fromtimestamp(os.path.getmtime(document.dir))
1240         result = ""
1241         if argv == "ISO":
1242             result = tme.isoformat()
1243         elif argv == "long":
1244             result = tme.strftime(timeformats[lang][0])
1245         elif argv == "short":
1246             result = tme.strftime(timeformats[lang][1])
1247         else:
1248             fmt = argv.replace("HH", "%H").replace("H", "%H").replace("hh", "%I").replace("h", "%I")
1249             fmt = fmt.replace("mm", "%M").replace("m", "%M").replace("ss", "%S").replace("s", "%S")
1250             fmt = fmt.replace("zzz", "%f").replace("z", "%f").replace("t", "%Z")
1251             fmt = fmt.replace("AP", "%p").replace("ap", "%p").replace("A", "%p").replace("a", "%p")
1252             fmt = fmt.replace("'", "")
1253             result = dte.strftime(fmt)
1254         document.body[i : j+1] = result
1255         i = i + 1
1256
1257
1258 def revert_namenoextinfo(document):
1259     " Merge buffer Info inset type name-noext to name. "
1260
1261     i = 0
1262     while True:
1263         i = find_token(document.body, "\\begin_inset Info", i)
1264         if i == -1:
1265             return
1266         j = find_end_of_inset(document.body, i + 1)
1267         if j == -1:
1268             document.warning("Malformed LyX document: Could not find end of Info inset.")
1269             i = i + 1
1270             continue
1271         tp = find_token(document.body, 'type', i, j)
1272         tpv = get_quoted_value(document.body, "type", tp)
1273         if tpv != "buffer":
1274             i = i + 1
1275             continue
1276         arg = find_token(document.body, 'arg', i, j)
1277         argv = get_quoted_value(document.body, "arg", arg)
1278         if argv != "name-noext":
1279             i = i + 1
1280             continue
1281         document.body[arg] = "arg \"name\""
1282         i = i + 1
1283
1284
1285 def revert_l7ninfo(document):
1286     " Revert l7n Info inset to text. "
1287
1288     i = 0
1289     while True:
1290         i = find_token(document.body, "\\begin_inset Info", i)
1291         if i == -1:
1292             return
1293         j = find_end_of_inset(document.body, i + 1)
1294         if j == -1:
1295             document.warning("Malformed LyX document: Could not find end of Info inset.")
1296             i = i + 1
1297             continue
1298         tp = find_token(document.body, 'type', i, j)
1299         tpv = get_quoted_value(document.body, "type", tp)
1300         if tpv != "l7n":
1301             i = i + 1
1302             continue
1303         arg = find_token(document.body, 'arg', i, j)
1304         argv = get_quoted_value(document.body, "arg", arg)
1305         # remove trailing colons, menu accelerator (|...) and qt accelerator (&), while keeping literal " & "
1306         argv = argv.rstrip(':').split('|')[0].replace(" & ", "</amp;>").replace("&", "").replace("</amp;>", " & ")
1307         document.body[i : j+1] = argv
1308         i = i + 1
1309
1310
1311 def revert_listpargs(document):
1312     " Reverts listpreamble arguments to TeX-code "
1313     i = 0
1314     while True:
1315         i = find_token(document.body, "\\begin_inset Argument listpreamble:", i)
1316         if i == -1:
1317             return
1318         j = find_end_of_inset(document.body, i)
1319         # Find containing paragraph layout
1320         parent = get_containing_layout(document.body, i)
1321         if parent == False:
1322             document.warning("Malformed LyX document: Can't find parent paragraph layout")
1323             i += 1
1324             continue
1325         parbeg = parent[3]
1326         beginPlain = find_token(document.body, "\\begin_layout Plain Layout", i)
1327         endPlain = find_end_of_layout(document.body, beginPlain)
1328         content = document.body[beginPlain + 1 : endPlain]
1329         del document.body[i:j+1]
1330         subst = ["\\begin_inset ERT", "status collapsed", "", "\\begin_layout Plain Layout",
1331                  "{"] + content + ["}", "\\end_layout", "", "\\end_inset", ""]
1332         document.body[parbeg : parbeg] = subst
1333         i += 1
1334
1335
1336 def revert_lformatinfo(document):
1337     " Revert layout format Info inset to text. "
1338
1339     i = 0
1340     while True:
1341         i = find_token(document.body, "\\begin_inset Info", i)
1342         if i == -1:
1343             return
1344         j = find_end_of_inset(document.body, i + 1)
1345         if j == -1:
1346             document.warning("Malformed LyX document: Could not find end of Info inset.")
1347             i = i + 1
1348             continue
1349         tp = find_token(document.body, 'type', i, j)
1350         tpv = get_quoted_value(document.body, "type", tp)
1351         if tpv != "lyxinfo":
1352             i = i + 1
1353             continue
1354         arg = find_token(document.body, 'arg', i, j)
1355         argv = get_quoted_value(document.body, "arg", arg)
1356         if argv != "layoutformat":
1357             i = i + 1
1358             continue
1359         # hardcoded for now
1360         document.body[i : j+1] = "69"
1361         i = i + 1
1362
1363
1364 ##
1365 # Conversion hub
1366 #
1367
1368 supported_versions = ["2.4.0", "2.4"]
1369 convert = [
1370            [545, [convert_lst_literalparam]],
1371            [546, []],
1372            [547, []],
1373            [548, []],
1374            [549, []],
1375            [550, [convert_fontenc]],
1376            [551, []],
1377            [552, []],
1378            [553, []],
1379            [554, []],
1380            [555, []],
1381            [556, []],
1382            [557, [convert_vcsinfo]],
1383            [558, [removeFrontMatterStyles]],
1384            [559, []],
1385            [560, []],
1386            [561, [convert_latexFonts]], # Handle dejavu, ibmplex fonts in GUI
1387            [562, []],
1388            [563, []],
1389            [564, []]
1390           ]
1391
1392 revert =  [
1393            [563, [revert_lformatinfo]],
1394            [562, [revert_listpargs]],
1395            [561, [revert_l7ninfo]],
1396            [560, [revert_latexFonts]], # Handle dejavu, ibmplex fonts in user preamble
1397            [559, [revert_timeinfo, revert_namenoextinfo]],
1398            [558, [revert_dateinfo]],
1399            [557, [addFrontMatterStyles]],
1400            [556, [revert_vcsinfo]],
1401            [555, [revert_bibencoding]],
1402            [554, [revert_vcolumns]],
1403            [553, [revert_stretchcolumn]],
1404            [552, [revert_tuftecite]],
1405            [551, [revert_floatpclass, revert_floatalignment]],
1406            [550, [revert_nospellcheck]],
1407            [549, [revert_fontenc]],
1408            [548, []],# dummy format change
1409            [547, [revert_lscape]],
1410            [546, [revert_xcharter]],
1411            [545, [revert_paratype]],
1412            [544, [revert_lst_literalparam]]
1413           ]
1414
1415
1416 if __name__ == "__main__":
1417     pass