lib/lyx2lyx/lyx_2_4.py

   1 # -*- coding: utf-8 -*-
   2 # This file is part of lyx2lyx
   3 # Copyright (C) 2018 The LyX team
   4 #
   5 # This program is free software; you can redistribute it and/or
   6 # modify it under the terms of the GNU General Public License
   7 # as published by the Free Software Foundation; either version 2
   8 # of the License, or (at your option) any later version.
   9 #
  10 # This program is distributed in the hope that it will be useful,
  11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 # GNU General Public License for more details.
  14 #
  15 # You should have received a copy of the GNU General Public License
  16 # along with this program; if not, write to the Free Software
  17 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  18
  19 """ Convert files to the file format generated by lyx 2.4"""
  20
  21 import re, string
  22 import unicodedata
  23 import sys, os
  24
  25 from datetime import (datetime, date, time)
  26
  27 # Uncomment only what you need to import, please.
  28
  29 from parser_tools import (count_pars_in_inset, find_end_of_inset, find_end_of_layout,
  30                           find_token, find_re, get_bool_value, get_containing_layout,
  31                           get_option_value, get_value, get_quoted_value)
  32 #    del_token, del_value, del_complete_lines,
  33 #    find_complete_lines, find_end_of,
  34 #    find_re, find_substring, find_token_backwards,
  35 #    get_containing_inset,
  36 #    is_in_inset, set_bool_value
  37 #    find_tokens, find_token_exact, check_token
  38
  39 from lyx2lyx_tools import (put_cmd_in_ert, add_to_preamble)
  40 #  revert_font_attrs, insert_to_preamble, latex_length
  41 #  get_ert, lyx2latex, lyx2verbatim, length_in_bp, convert_info_insets
  42 #  revert_flex_inset, hex2ratio, str2bool
  43
  44 ####################################################################
  45 # Private helper functions
  46
  47 def add_preamble_fonts(document, fontmap):
  48     " Add collected font-packages with their option to user-preamble"
  49
  50     for pkg in fontmap:
  51         if len(fontmap[pkg]) > 0:
  52             xoption = "[" + ",".join(fontmap[pkg]) + "]"
  53         else:
  54             xoption = ""
  55         preamble = "\\usepackage" + xoption + "{%s}" % pkg
  56         add_to_preamble(document, [preamble])
  57
  58
  59 def createkey(pkg, options):
  60     options.sort()
  61     return pkg + ':' + "-".join(options)
  62
  63 class fontinfo:
  64     def __init__(self):
  65         self.fontname = None    # key into font2pkgmap
  66         self.fonttype = None    # roman,sans,typewriter,math
  67         self.scaletype = None   # None,sf,tt
  68         self.scaleopt = None    # None, 'scaled', 'scale'
  69         self.scaleval = 1
  70         self.package = None
  71         self.options = []
  72         self.pkgkey = None      # key into pkg2fontmap
  73
  74     def addkey(self):
  75         self.pkgkey = createkey(self.package, self.options)
  76
  77 class fontmapping:
  78     def __init__(self):
  79         self.font2pkgmap = dict()
  80         self.pkg2fontmap = dict()
  81         self.pkginmap = dict()  # defines, if a map for package exists
  82
  83     def expandFontMapping(self, font_list, font_type, scale_type, pkg, scaleopt = None):
  84         " Expand fontinfo mapping"
  85         #
  86         # fontlist:    list of fontnames, each element
  87         #              may contain a ','-separated list of needed options
  88         #              like e.g. 'IBMPlexSansCondensed,condensed'
  89         # font_type:   one of 'roman', 'sans', 'typewriter', 'math'
  90         # scale_type:  one of None, 'sf', 'tt'
  91         # pkg:         package defining the font. Defaults to fontname if None
  92         # scaleopt:    one of None, 'scale', 'scaled', or some other string
  93         #              to be used in scale option (e.g. scaled=0.7)
  94         for fl in font_list:
  95             fe = fontinfo()
  96             fe.fonttype = font_type
  97             fe.scaletype = scale_type
  98             flt = fl.split(",")
  99             font_name = flt[0]
 100             fe.fontname = font_name
 101             fe.options = flt[1:]
 102             fe.scaleopt = scaleopt
 103             if pkg == None:
 104                 fe.package = font_name
 105             else:
 106                 fe.package = pkg
 107             fe.addkey()
 108             self.font2pkgmap[font_name] = fe
 109             if fe.pkgkey in self.pkg2fontmap:
 110                 # Repeated the same entry? Check content
 111                 if self.pkg2fontmap[fe.pkgkey] != font_name:
 112                     document.error("Something is wrong in pkgname+options <-> fontname mapping")
 113             self.pkg2fontmap[fe.pkgkey] = font_name
 114             self.pkginmap[fe.package] = 1
 115
 116     def getfontname(self, pkg, options):
 117         options.sort()
 118         pkgkey = createkey(pkg, options)
 119         if not pkgkey in self.pkg2fontmap:
 120             return None
 121         fontname = self.pkg2fontmap[pkgkey]
 122         if not fontname in self.font2pkgmap:
 123             document.error("Something is wrong in pkgname+options <-> fontname mapping")
 124             return None
 125         if pkgkey == self.font2pkgmap[fontname].pkgkey:
 126             return fontname
 127         return None
 128
 129 def createFontMapping():
 130     # Create info for known fonts for the use in
 131     #   convert_latexFonts() and
 132     #   revert_latexFonts()
 133     #
 134     # * Would be more handy to parse latexFonts file,
 135     #   but the path to this file is unknown
 136     # * For now, add DejaVu and IBMPlex only.
 137     # * Expand, if desired
 138     fm = fontmapping()
 139     fm.expandFontMapping(['DejaVuSerif', 'DejaVuSerifCondensed'], "roman", None, None)
 140     fm.expandFontMapping(['DejaVuSans','DejaVuSansCondensed'], "sans", "sf", None, "scaled")
 141     fm.expandFontMapping(['DejaVuSansMono'], "typewriter", "tt", None, "scaled")
 142     fm.expandFontMapping(['IBMPlexSerif', 'IBMPlexSerifThin,thin',
 143                           'IBMPlexSerifExtraLight,extralight', 'IBMPlexSerifLight,light',
 144                           'IBMPlexSerifSemibold,semibold'],
 145                          "roman", None, "plex-serif")
 146     fm.expandFontMapping(['IBMPlexSans','IBMPlexSansCondensed,condensed',
 147                           'IBMPlexSansThin,thin', 'IBMPlexSansExtraLight,extralight',
 148                           'IBMPlexSansLight,light', 'IBMPlexSansSemibold,semibold'],
 149                          "sans", "sf", "plex-sans", "scale")
 150     fm.expandFontMapping(['IBMPlexMono', 'IBMPlexMonoThin,thin',
 151                           'IBMPlexMonoExtraLight,extralight', 'IBMPlexMonoLight,light',
 152                           'IBMPlexMonoSemibold,semibold'],
 153                          "typewriter", "tt", "plex-mono", "scale")
 154     return fm
 155
 156 def convert_fonts(document, fm):
 157     " Handle font definition to LaTeX "
 158
 159     rpkg = re.compile(r'^\\usepackage(\[([^\]]*)\])?\{([^\}]+)\}')
 160     rscaleopt = re.compile(r'^scaled?=(.*)')
 161
 162     i = 0
 163     while i < len(document.preamble):
 164         i = find_re(document.preamble, rpkg, i)
 165         if i == -1:
 166             return
 167         mo = rpkg.search(document.preamble[i])
 168         if mo == None or mo.group(2) == None:
 169             options = []
 170         else:
 171             options = mo.group(2).replace(' ', '').split(",")
 172         pkg = mo.group(3)
 173         o = 0
 174         oscale = 1
 175         while o < len(options):
 176             mo = rscaleopt.search(options[o])
 177             if mo == None:
 178                 o += 1
 179                 continue
 180             oscale = mo.group(1)
 181             del options[o]
 182             break
 183
 184         if not pkg in fm.pkginmap:
 185             i += 1
 186             continue
 187         # determine fontname
 188         fn = fm.getfontname(pkg, options)
 189         if fn == None:
 190             i += 1
 191             continue
 192         del document.preamble[i]
 193         fontinfo = fm.font2pkgmap[fn]
 194         if fontinfo.scaletype == None:
 195             fontscale = None
 196         else:
 197             fontscale = "\\font_" + fontinfo.scaletype + "_scale"
 198             fontinfo.scaleval = oscale
 199
 200         if i > 0 and document.preamble[i-1] == "% Added by lyx2lyx":
 201             del document.preamble[i-1]
 202         if fontscale != None:
 203             j = find_token(document.header, fontscale, 0)
 204             if j != -1:
 205                 val = get_value(document.header, fontscale, j)
 206                 vals = val.split()
 207                 scale = "100"
 208                 if oscale != None:
 209                     scale = "%03d" % int(float(oscale) * 100)
 210                 document.header[j] = fontscale + " " + scale + " " + vals[1]
 211         ft = "\\font_" + fontinfo.fonttype
 212         j = find_token(document.header, ft, 0)
 213         if j != -1:
 214             val = get_value(document.header, ft, j)
 215             vals = val.split()
 216             document.header[j] = ft + ' "' + fn + '" ' + vals[1]
 217
 218 def revert_fonts(document, fm, fontmap):
 219     " Revert native font definition to LaTeX "
 220     # fonlist := list of fonts created from the same package
 221     # Empty package means that the font-name is the same as the package-name
 222     # fontmap (key = package, val += found options) will be filled
 223     # and used later in add_preamble_fonts() to be added to user-preamble
 224
 225     rfontscale = re.compile(r'^\s*(\\font_(roman|sans|typewriter|math))\s+')
 226     rscales = re.compile(r'^\s*(\d+)\s+(\d+)')
 227     i = 0
 228     while i < len(document.header):
 229         i = find_re(document.header, rfontscale, i)
 230         if (i == -1):
 231             break
 232         mo = rfontscale.search(document.header[i])
 233         if mo == None:
 234             i += 1
 235             continue
 236         ft = mo.group(1)    # 'roman', 'sans', 'typewriter', 'math'
 237         val = get_value(document.header, ft, i)
 238         words = val.split()
 239         font = words[0].replace('"', '')
 240         if not font in fm.font2pkgmap:
 241             i += 1
 242             continue
 243         fontinfo = fm.font2pkgmap[font]
 244         val = fontinfo.package
 245         if not val in fontmap:
 246             fontmap[val] = []
 247         document.header[i] = ft + ' "default" ' + words[1]
 248         if fontinfo.scaleopt != None:
 249             xval =  get_value(document.header, "\\font_" + fontinfo.scaletype + "_scale", 0)
 250             mo = rscales.search(xval)
 251             if mo != None:
 252                 xval1 = mo.group(1)
 253                 xval2 = mo.group(2)
 254                 if xval1 != "100":
 255                     # set correct scale option
 256                     fontmap[val].extend([fontinfo.scaleopt + "=" + format(float(xval1) / 100, '.2f')])
 257         if len(fontinfo.options) > 0:
 258             fontmap[val].extend(fontinfo.options)
 259         i += 1
 260
 261 ###############################################################################
 262 ###
 263 ### Conversion and reversion routines
 264 ###
 265 ###############################################################################
 266
 267 def convert_latexFonts(document):
 268     " Handle DejaVu and IBMPlex fonts definition to LaTeX "
 269
 270     if find_token(document.header, "\\use_non_tex_fonts false", 0) != -1:
 271         fm = createFontMapping()
 272         convert_fonts(document, fm)
 273
 274 def revert_latexFonts(document):
 275     " Revert native DejaVu font definition to LaTeX "
 276
 277     if find_token(document.header, "\\use_non_tex_fonts false", 0) != -1:
 278         fontmap = dict()
 279         fm = createFontMapping()
 280         revert_fonts(document, fm, fontmap)
 281         add_preamble_fonts(document, fontmap)
 282
 283 def removeFrontMatterStyles(document):
 284     " Remove styles Begin/EndFrontmatter"
 285
 286     layouts = ['BeginFrontmatter', 'EndFrontmatter']
 287     for layout in layouts:
 288         i = 0
 289         while True:
 290             i = find_token(document.body, '\\begin_layout ' + layout, i)
 291             if i == -1:
 292                 break
 293             j = find_end_of_layout(document.body, i)
 294             if j == -1:
 295                 document.warning("Malformed LyX document: Can't find end of layout at line %d" % i)
 296                 i += 1
 297                 continue
 298             while i > 0 and document.body[i-1].strip() == '':
 299                 i -= 1
 300             while document.body[j+1].strip() == '':
 301                 j = j + 1
 302             document.body[i:j+1] = ['']
 303
 304 def addFrontMatterStyles(document):
 305     " Use styles Begin/EndFrontmatter for elsarticle"
 306
 307     def insertFrontmatter(prefix, line):
 308         above = line
 309         while above > 0 and document.body[above-1].strip() == '':
 310             above -= 1
 311         below = line
 312         while document.body[below].strip() == '':
 313             below += 1
 314         document.body[above:below] = ['', '\\begin_layout ' + prefix + 'Frontmatter',
 315                                     '\\begin_inset Note Note',
 316                                     'status open', '',
 317                                     '\\begin_layout Plain Layout',
 318                                     'Keep this empty!',
 319                                     '\\end_layout', '',
 320                                     '\\end_inset', '', '',
 321                                     '\\end_layout', '']
 322
 323     if document.textclass == "elsarticle":
 324         layouts = ['Title', 'Title footnote', 'Author', 'Author footnote',
 325                    'Corresponding author', 'Address', 'Email', 'Abstract', 'Keywords']
 326         first = -1
 327         last = -1
 328         for layout in layouts:
 329             i = 0
 330             while True:
 331                 i = find_token(document.body, '\\begin_layout ' + layout, i)
 332                 if i == -1:
 333                     break
 334                 k = find_end_of_layout(document.body, i)
 335                 if k == -1:
 336                     document.warning("Malformed LyX document: Can't find end of layout at line %d" % i)
 337                     i += 1;
 338                     continue
 339                 if first == -1 or i < first:
 340                     first = i
 341                 if last == -1 or last <= k:
 342                     last = k+1
 343                 i = k+1
 344         if first == -1:
 345             return
 346         insertFrontmatter('End', last)
 347         insertFrontmatter('Begin', first)
 348
 349 def convert_lst_literalparam(document):
 350     " Add param literal to include inset "
 351
 352     i = 0
 353     while True:
 354         i = find_token(document.body, '\\begin_inset CommandInset include', i)
 355         if i == -1:
 356             break
 357         j = find_end_of_inset(document.body, i)
 358         if j == -1:
 359             document.warning("Malformed LyX document: Can't find end of command inset at line %d" % i)
 360             i += 1
 361             continue
 362         while i < j and document.body[i].strip() != '':
 363             i += 1
 364         document.body.insert(i, "literal \"true\"")
 365
 366
 367 def revert_lst_literalparam(document):
 368     " Remove param literal from include inset "
 369
 370     i = 0
 371     while True:
 372         i = find_token(document.body, '\\begin_inset CommandInset include', i)
 373         if i == -1:
 374             break
 375         j = find_end_of_inset(document.body, i)
 376         if j == -1:
 377             document.warning("Malformed LyX document: Can't find end of include inset at line %d" % i)
 378             i += 1
 379             continue
 380         k = find_token(document.body, 'literal', i, j)
 381         if k == -1:
 382             i += 1
 383             continue
 384         del document.body[k]
 385
 386
 387 def revert_paratype(document):
 388     " Revert ParaType font definitions to LaTeX "
 389
 390     if find_token(document.header, "\\use_non_tex_fonts false", 0) != -1:
 391         preamble = ""
 392         i1 = find_token(document.header, "\\font_roman \"PTSerif-TLF\"", 0)
 393         i2 = find_token(document.header, "\\font_sans \"default\"", 0)
 394         i3 = find_token(document.header, "\\font_typewriter \"default\"", 0)
 395         j = find_token(document.header, "\\font_sans \"PTSans-TLF\"", 0)
 396         sfval = get_value(document.header, "\\font_sf_scale", 0)
 397         # cutoff " 100"
 398         sfval = sfval[:-4]
 399         sfoption = ""
 400         if sfval != "100":
 401             sfoption = "scaled=" + format(float(sfval) / 100, '.2f')
 402         k = find_token(document.header, "\\font_typewriter \"PTMono-TLF\"", 0)
 403         ttval = get_value(document.header, "\\font_tt_scale", 0)
 404         # cutoff " 100"
 405         ttval = ttval[:-4]
 406         ttoption = ""
 407         if ttval != "100":
 408             ttoption = "scaled=" + format(float(ttval) / 100, '.2f')
 409         if i1 != -1 and i2 != -1 and i3!= -1:
 410             add_to_preamble(document, ["\\usepackage{paratype}"])
 411         else:
 412             if i1!= -1:
 413                 add_to_preamble(document, ["\\usepackage{PTSerif}"])
 414                 document.header[i1] = document.header[i1].replace("PTSerif-TLF", "default")
 415             if j!= -1:
 416                 if sfoption != "":
 417                     add_to_preamble(document, ["\\usepackage[" + sfoption + "]{PTSans}"])
 418                 else:
 419                     add_to_preamble(document, ["\\usepackage{PTSans}"])
 420                 document.header[j] = document.header[j].replace("PTSans-TLF", "default")
 421             if k!= -1:
 422                 if ttoption != "":
 423                     add_to_preamble(document, ["\\usepackage[" + ttoption + "]{PTMono}"])
 424                 else:
 425                     add_to_preamble(document, ["\\usepackage{PTMono}"])
 426                 document.header[k] = document.header[k].replace("PTMono-TLF", "default")
 427
 428
 429 def revert_xcharter(document):
 430     " Revert XCharter font definitions to LaTeX "
 431
 432     i = find_token(document.header, "\\font_roman \"xcharter\"", 0)
 433     if i == -1:
 434         return
 435
 436     # replace unsupported font setting
 437     document.header[i] = document.header[i].replace("xcharter", "default")
 438     # no need for preamble code with system fonts
 439     if get_bool_value(document.header, "\\use_non_tex_fonts"):
 440         return
 441
 442     # transfer old style figures setting to package options
 443     j = find_token(document.header, "\\font_osf true")
 444     if j != -1:
 445         options = "[osf]"
 446         document.header[j] = "\\font_osf false"
 447     else:
 448         options = ""
 449     if i != -1:
 450         add_to_preamble(document, ["\\usepackage%s{XCharter}"%options])
 451
 452
 453 def revert_lscape(document):
 454     " Reverts the landscape environment (Landscape module) to TeX-code "
 455
 456     if not "landscape" in document.get_module_list():
 457         return
 458
 459     i = 0
 460     while True:
 461         i = find_token(document.body, "\\begin_inset Flex Landscape", i)
 462         if i == -1:
 463             return
 464         j = find_end_of_inset(document.body, i)
 465         if j == -1:
 466             document.warning("Malformed LyX document: Can't find end of Landscape inset")
 467             i += 1
 468             continue
 469
 470         if document.body[i] == "\\begin_inset Flex Landscape (Floating)":
 471             document.body[j - 2 : j + 1] = put_cmd_in_ert("\\end{landscape}}")
 472             document.body[i : i + 4] = put_cmd_in_ert("\\afterpage{\\begin{landscape}")
 473             add_to_preamble(document, ["\\usepackage{afterpage}"])
 474         else:
 475             document.body[j - 2 : j + 1] = put_cmd_in_ert("\\end{landscape}")
 476             document.body[i : i + 4] = put_cmd_in_ert("\\begin{landscape}")
 477
 478         add_to_preamble(document, ["\\usepackage{pdflscape}"])
 479         # no need to reset i
 480
 481
 482 def convert_fontenc(document):
 483     " Convert default fontenc setting "
 484
 485     i = find_token(document.header, "\\fontencoding global", 0)
 486     if i == -1:
 487         return
 488
 489     document.header[i] = document.header[i].replace("global", "auto")
 490
 491
 492 def revert_fontenc(document):
 493     " Revert default fontenc setting "
 494
 495     i = find_token(document.header, "\\fontencoding auto", 0)
 496     if i == -1:
 497         return
 498
 499     document.header[i] = document.header[i].replace("auto", "global")
 500
 501
 502 def revert_nospellcheck(document):
 503     " Remove nospellcheck font info param "
 504
 505     i = 0
 506     while True:
 507         i = find_token(document.body, '\\nospellcheck', i)
 508         if i == -1:
 509             return
 510         del document.body[i]
 511
 512
 513 def revert_floatpclass(document):
 514     " Remove float placement params 'document' and 'class' "
 515
 516     i = 0
 517     i = find_token(document.header, "\\float_placement class", 0)
 518     if i != -1:
 519         del document.header[i]
 520
 521     i = 0
 522     while True:
 523         i = find_token(document.body, '\\begin_inset Float', i)
 524         if i == -1:
 525             break
 526         j = find_end_of_inset(document.body, i)
 527         k = find_token(document.body, 'placement class', i, i + 2)
 528         if k == -1:
 529             k = find_token(document.body, 'placement document', i, i + 2)
 530             if k != -1:
 531                 del document.body[k]
 532             i = j
 533             continue
 534         del document.body[k]
 535
 536
 537 def revert_floatalignment(document):
 538     " Remove float alignment params "
 539
 540     i = 0
 541     i = find_token(document.header, "\\float_alignment", 0)
 542     galignment = ""
 543     if i != -1:
 544         galignment = get_value(document.header, "\\float_alignment", i)
 545         del document.header[i]
 546
 547     i = 0
 548     while True:
 549         i = find_token(document.body, '\\begin_inset Float', i)
 550         if i == -1:
 551             break
 552         j = find_end_of_inset(document.body, i)
 553         if j == -1:
 554             document.warning("Malformed LyX document: Can't find end of inset at line " + str(i))
 555             i += 1
 556         k = find_token(document.body, 'alignment', i, i + 4)
 557         if k == -1:
 558             i = j
 559             continue
 560         alignment = get_value(document.body, "alignment", k)
 561         if alignment == "document":
 562             alignment = galignment
 563         del document.body[k]
 564         l = find_token(document.body, "\\begin_layout Plain Layout", i, j)
 565         if l == -1:
 566             document.warning("Can't find float layout!")
 567             i = j
 568             continue
 569         alcmd = []
 570         if alignment == "left":
 571             alcmd = put_cmd_in_ert("\\raggedright{}")
 572         elif alignment == "center":
 573             alcmd = put_cmd_in_ert("\\centering{}")
 574         elif alignment == "right":
 575             alcmd = put_cmd_in_ert("\\raggedleft{}")
 576         if len(alcmd) > 0:
 577             document.body[l+1:l+1] = alcmd
 578         i = j
 579
 580
 581 def revert_tuftecite(document):
 582     " Revert \cite commands in tufte classes "
 583
 584     tufte = ["tufte-book", "tufte-handout"]
 585     if document.textclass not in tufte:
 586         return
 587
 588     i = 0
 589     while (True):
 590         i = find_token(document.body, "\\begin_inset CommandInset citation", i)
 591         if i == -1:
 592             break
 593         j = find_end_of_inset(document.body, i)
 594         if j == -1:
 595             document.warning("Can't find end of citation inset at line %d!!" %(i))
 596             i += 1
 597             continue
 598         k = find_token(document.body, "LatexCommand", i, j)
 599         if k == -1:
 600             document.warning("Can't find LatexCommand for citation inset at line %d!" %(i))
 601             i = j + 1
 602             continue
 603         cmd = get_value(document.body, "LatexCommand", k)
 604         if cmd != "cite":
 605             i = j + 1
 606             continue
 607         pre = get_quoted_value(document.body, "before", i, j)
 608         post = get_quoted_value(document.body, "after", i, j)
 609         key = get_quoted_value(document.body, "key", i, j)
 610         if not key:
 611             document.warning("Citation inset at line %d does not have a key!" %(i))
 612             key = "???"
 613         # Replace command with ERT
 614         res = "\\cite"
 615         if pre:
 616             res += "[" + pre + "]"
 617         if post:
 618             res += "[" + post + "]"
 619         elif pre:
 620             res += "[]"
 621         res += "{" + key + "}"
 622         document.body[i:j+1] = put_cmd_in_ert([res])
 623         i = j + 1
 624
 625
 626 def revert_stretchcolumn(document):
 627     " We remove the column varwidth flags or everything else will become a mess. "
 628     i = 0
 629     while True:
 630         i = find_token(document.body, "\\begin_inset Tabular", i)
 631         if i == -1:
 632             return
 633         j = find_end_of_inset(document.body, i + 1)
 634         if j == -1:
 635             document.warning("Malformed LyX document: Could not find end of tabular.")
 636             continue
 637         for k in range(i, j):
 638             if re.search('^<column.*varwidth="[^"]+".*>$', document.body[k]):
 639                 document.warning("Converting 'tabularx'/'xltabular' table to normal table.")
 640                 document.body[k] = document.body[k].replace(' varwidth="true"', '')
 641         i = i + 1
 642
 643
 644 def revert_vcolumns(document):
 645     " Revert standard columns with line breaks etc. "
 646     i = 0
 647     needvarwidth = False
 648     needarray = False
 649     try:
 650         while True:
 651             i = find_token(document.body, "\\begin_inset Tabular", i)
 652             if i == -1:
 653                 return
 654             j = find_end_of_inset(document.body, i)
 655             if j == -1:
 656                 document.warning("Malformed LyX document: Could not find end of tabular.")
 657                 i += 1
 658                 continue
 659
 660             # Collect necessary column information
 661             m = i + 1
 662             nrows = int(document.body[i+1].split('"')[3])
 663             ncols = int(document.body[i+1].split('"')[5])
 664             col_info = []
 665             for k in range(ncols):
 666                 m = find_token(document.body, "<column", m)
 667                 width = get_option_value(document.body[m], 'width')
 668                 varwidth = get_option_value(document.body[m], 'varwidth')
 669                 alignment = get_option_value(document.body[m], 'alignment')
 670                 special = get_option_value(document.body[m], 'special')
 671                 col_info.append([width, varwidth, alignment, special, m])
 672
 673             # Now parse cells
 674             m = i + 1
 675             lines = []
 676             for row in range(nrows):
 677                 for col in range(ncols):
 678                     m = find_token(document.body, "<cell", m)
 679                     multicolumn = get_option_value(document.body[m], 'multicolumn')
 680                     multirow = get_option_value(document.body[m], 'multirow')
 681                     width = get_option_value(document.body[m], 'width')
 682                     rotate = get_option_value(document.body[m], 'rotate')
 683                     # Check for: linebreaks, multipars, non-standard environments
 684                     begcell = m
 685                     endcell = find_token(document.body, "</cell>", begcell)
 686                     vcand = False
 687                     if find_token(document.body, "\\begin_inset Newline", begcell, endcell) != -1:
 688                         vcand = True
 689                     elif count_pars_in_inset(document.body, begcell + 2) > 1:
 690                         vcand = True
 691                     elif get_value(document.body, "\\begin_layout", begcell) != "Plain Layout":
 692                         vcand = True
 693                     if vcand and rotate == "" and ((multicolumn == "" and multirow == "") or width == ""):
 694                         if col_info[col][0] == "" and col_info[col][1] == "" and col_info[col][3] == "":
 695                             needvarwidth = True
 696                             alignment = col_info[col][2]
 697                             col_line = col_info[col][4]
 698                             vval = ""
 699                             if alignment == "center":
 700                                 vval = ">{\\centering}"
 701                             elif  alignment == "left":
 702                                 vval = ">{\\raggedright}"
 703                             elif alignment == "right":
 704                                 vval = ">{\\raggedleft}"
 705                             if vval != "":
 706                                 needarray = True
 707                             vval += "V{\\linewidth}"
 708
 709                             document.body[col_line] = document.body[col_line][:-1] + " special=\"" + vval + "\">"
 710                             # ERT newlines and linebreaks (since LyX < 2.4 automatically inserts parboxes
 711                             # with newlines, and we do not want that)
 712                             while True:
 713                                 endcell = find_token(document.body, "</cell>", begcell)
 714                                 linebreak = False
 715                                 nl = find_token(document.body, "\\begin_inset Newline newline", begcell, endcell)
 716                                 if nl == -1:
 717                                     nl = find_token(document.body, "\\begin_inset Newline linebreak", begcell, endcell)
 718                                     if nl == -1:
 719                                          break
 720                                     linebreak = True
 721                                 nle = find_end_of_inset(document.body, nl)
 722                                 del(document.body[nle:nle+1])
 723                                 if linebreak:
 724                                     document.body[nl:nl+1] = put_cmd_in_ert("\\linebreak{}")
 725                                 else:
 726                                     document.body[nl:nl+1] = put_cmd_in_ert("\\\\")
 727                     m += 1
 728
 729             i = j + 1
 730
 731     finally:
 732         if needarray == True:
 733             add_to_preamble(document, ["\\usepackage{array}"])
 734         if needvarwidth == True:
 735             add_to_preamble(document, ["\\usepackage{varwidth}"])
 736
 737
 738 def revert_bibencoding(document):
 739     " Revert bibliography encoding "
 740
 741     # Get cite engine
 742     engine = "basic"
 743     i = find_token(document.header, "\\cite_engine", 0)
 744     if i == -1:
 745         document.warning("Malformed document! Missing \\cite_engine")
 746     else:
 747         engine = get_value(document.header, "\\cite_engine", i)
 748
 749     # Check if biblatex
 750     biblatex = False
 751     if engine in ["biblatex", "biblatex-natbib"]:
 752         biblatex = True
 753
 754     # Map lyx to latex encoding names
 755     encodings = {
 756         "utf8" : "utf8",
 757         "utf8x" : "utf8x",
 758         "armscii8" : "armscii8",
 759         "iso8859-1" : "latin1",
 760         "iso8859-2" : "latin2",
 761         "iso8859-3" : "latin3",
 762         "iso8859-4" : "latin4",
 763         "iso8859-5" : "iso88595",
 764         "iso8859-6" : "8859-6",
 765         "iso8859-7" : "iso-8859-7",
 766         "iso8859-8" : "8859-8",
 767         "iso8859-9" : "latin5",
 768         "iso8859-13" : "latin7",
 769         "iso8859-15" : "latin9",
 770         "iso8859-16" : "latin10",
 771         "applemac" : "applemac",
 772         "cp437" : "cp437",
 773         "cp437de" : "cp437de",
 774         "cp850" : "cp850",
 775         "cp852" : "cp852",
 776         "cp855" : "cp855",
 777         "cp858" : "cp858",
 778         "cp862" : "cp862",
 779         "cp865" : "cp865",
 780         "cp866" : "cp866",
 781         "cp1250" : "cp1250",
 782         "cp1251" : "cp1251",
 783         "cp1252" : "cp1252",
 784         "cp1255" : "cp1255",
 785         "cp1256" : "cp1256",
 786         "cp1257" : "cp1257",
 787         "koi8-r" : "koi8-r",
 788         "koi8-u" : "koi8-u",
 789         "pt154" : "pt154",
 790         "utf8-platex" : "utf8",
 791         "ascii" : "ascii"
 792     }
 793
 794     i = 0
 795     bibresources = []
 796     while (True):
 797         i = find_token(document.body, "\\begin_inset CommandInset bibtex", i)
 798         if i == -1:
 799             break
 800         j = find_end_of_inset(document.body, i)
 801         if j == -1:
 802             document.warning("Can't find end of bibtex inset at line %d!!" %(i))
 803             i += 1
 804             continue
 805         encoding = get_quoted_value(document.body, "encoding", i, j)
 806         if not encoding:
 807             i += 1
 808             continue
 809         # remove encoding line
 810         k = find_token(document.body, "encoding", i, j)
 811         if k != -1:
 812             del document.body[k]
 813         # Re-find inset end line
 814         j = find_end_of_inset(document.body, i)
 815         if biblatex:
 816             biblio_options = ""
 817             h = find_token(document.header, "\\biblio_options", 0)
 818             if h != -1:
 819                 biblio_options = get_value(document.header, "\\biblio_options", h)
 820                 if not "bibencoding" in biblio_options:
 821                      document.header[h] += ",bibencoding=%s" % encodings[encoding]
 822             else:
 823                 bs = find_token(document.header, "\\biblatex_bibstyle", 0)
 824                 if bs == -1:
 825                     # this should not happen
 826                     document.warning("Malformed LyX document! No \\biblatex_bibstyle header found!")
 827                 else:
 828                     document.header[bs-1 : bs-1] = ["\\biblio_options bibencoding=" + encodings[encoding]]
 829         else:
 830             document.body[j+1:j+1] = put_cmd_in_ert("\\egroup")
 831             document.body[i:i] = put_cmd_in_ert("\\bgroup\\inputencoding{" + encodings[encoding] + "}")
 832
 833         i = j + 1
 834
 835
 836
 837 def convert_vcsinfo(document):
 838     " Separate vcs Info inset from buffer Info inset. "
 839
 840     types = {
 841         "vcs-revision" : "revision",
 842         "vcs-tree-revision" : "tree-revision",
 843         "vcs-author" : "author",
 844         "vcs-time" : "time",
 845         "vcs-date" : "date"
 846     }
 847     i = 0
 848     while True:
 849         i = find_token(document.body, "\\begin_inset Info", i)
 850         if i == -1:
 851             return
 852         j = find_end_of_inset(document.body, i + 1)
 853         if j == -1:
 854             document.warning("Malformed LyX document: Could not find end of Info inset.")
 855             i = i + 1
 856             continue
 857         tp = find_token(document.body, 'type', i, j)
 858         tpv = get_quoted_value(document.body, "type", tp)
 859         if tpv != "buffer":
 860             i = i + 1
 861             continue
 862         arg = find_token(document.body, 'arg', i, j)
 863         argv = get_quoted_value(document.body, "arg", arg)
 864         if argv not in list(types.keys()):
 865             i = i + 1
 866             continue
 867         document.body[tp] = "type \"vcs\""
 868         document.body[arg] = "arg \"" + types[argv] + "\""
 869         i = i + 1
 870
 871
 872 def revert_vcsinfo(document):
 873     " Merge vcs Info inset to buffer Info inset. "
 874
 875     args = ["revision", "tree-revision", "author", "time", "date" ]
 876     i = 0
 877     while True:
 878         i = find_token(document.body, "\\begin_inset Info", i)
 879         if i == -1:
 880             return
 881         j = find_end_of_inset(document.body, i + 1)
 882         if j == -1:
 883             document.warning("Malformed LyX document: Could not find end of Info inset.")
 884             i = i + 1
 885             continue
 886         tp = find_token(document.body, 'type', i, j)
 887         tpv = get_quoted_value(document.body, "type", tp)
 888         if tpv != "vcs":
 889             i = i + 1
 890             continue
 891         arg = find_token(document.body, 'arg', i, j)
 892         argv = get_quoted_value(document.body, "arg", arg)
 893         if argv not in args:
 894             document.warning("Malformed Info inset. Invalid vcs arg.")
 895             i = i + 1
 896             continue
 897         document.body[tp] = "type \"buffer\""
 898         document.body[arg] = "arg \"vcs-" + argv + "\""
 899         i = i + 1
 900
 901
 902 def revert_dateinfo(document):
 903     " Revert date info insets to static text. "
 904
 905 # FIXME This currently only considers the main language and uses the system locale
 906 # Ideally, it should honor context languages and switch the locale accordingly.
 907
 908     # The date formats for each language using strftime syntax:
 909     # long, short, loclong, locmedium, locshort
 910     dateformats = {
 911         "afrikaans" : ["%A, %d %B %Y", "%Y-%m-%d", "%d %B %Y", "%d %b %Y", "%Y/%m/%d"],
 912         "albanian" : ["%A, %d %B %Y", "%d.%m.%y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
 913         "american" : ["%A, %B %d, %Y", "%m/%d/%y", "%B %d, %Y", "%b %d, %Y", "%m/%d/%Y"],
 914         "amharic" : ["%A ፣%d %B %Y", "%d/%m/%Y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
 915         "ancientgreek" : ["%A, %d %B %Y", "%d %b %Y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
 916         "arabic_arabi" : ["%A، %d %B، %Y", "%d‏/%m‏/%Y", "%d %B، %Y", "%d/%m/%Y", "%d/%m/%Y"],
 917         "arabic_arabtex" : ["%A، %d %B، %Y", "%d‏/%m‏/%Y", "%d %B، %Y", "%d/%m/%Y", "%d/%m/%Y"],
 918         "armenian" : ["%Y թ. %B %d, %A", "%d.%m.%y", "%d %B، %Y", "%d %b، %Y", "%d/%m/%Y"],
 919         "asturian" : ["%A, %d %B de %Y", "%d/%m/%y", "%d de %B de %Y", "%d %b %Y", "%d/%m/%Y"],
 920         "australian" : ["%A, %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
 921         "austrian" : ["%A, %d. %B %Y", "%d.%m.%y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
 922         "bahasa" : ["%A, %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
 923         "bahasam" : ["%A, %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
 924         "basque" : ["%Y(e)ko %B %d, %A", "%y/%m/%d", "%Y %B %d", "%Y %b %d", "%Y/%m/%d"],
 925         "belarusian" : ["%A, %d %B %Y г.", "%d.%m.%y", "%d %B %Y", "%d %b %Y", "%d.%m.%Y"],
 926         "bosnian" : ["%A, %d. %B %Y.", "%d.%m.%y.", "%d. %B %Y", "%d. %b %Y", "%Y-%m-%d"],
 927         "brazilian" : ["%A, %d de %B de %Y", "%d/%m/%Y", "%d de %B de %Y", "%d de %b de %Y", "%d/%m/%Y"],
 928         "breton" : ["%Y %B %d, %A", "%Y-%m-%d", "%d %B %Y", "%d %b %Y", "%Y-%m-%d"],
 929         "british" : ["%A, %d %B %Y", "%d/%m/%Y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
 930         "bulgarian" : ["%A, %d %B %Y г.", "%d.%m.%y г.", "%d %B %Y", "%d %b %Y", "%Y-%m-%d"],
 931         "canadian" : ["%A, %B %d, %Y", "%Y-%m-%d", "%B %d, %Y", "%d %b %Y", "%Y-%m-%d"],
 932         "canadien" : ["%A %d %B %Y", "%y-%m-%d", "%d %B %Y", "%d %b %Y", "%Y-%m-%d"],
 933         "catalan" : ["%A, %d %B de %Y", "%d/%m/%y", "%d / %B / %Y", "%d / %b / %Y", "%d/%m/%Y"],
 934         "chinese-simplified" : ["%Y年%m月%d日%A", "%Y/%m/%d", "%Y年%m月%d日", "%Y-%m-%d", "%y-%m-%d"],
 935         "chinese-traditional" : ["%Y年%m月%d日 %A", "%Y/%m/%d", "%Y年%m月%d日", "%Y年%m月%d日", "%y年%m月%d日"],
 936         "coptic" : ["%A, %d %B %Y", "%d %b %Y", "%B %d, %Y", "%b %d, %Y", "%m/%d/%Y"],
 937         "croatian" : ["%A, %d. %B %Y.", "%d. %m. %Y.", "%d. %B %Y.", "%d. %b. %Y.", "%d.%m.%Y."],
 938         "czech" : ["%A %d. %B %Y", "%d.%m.%y", "%d. %B %Y", "%d. %b. %Y", "%d.%m.%Y"],
 939         "danish" : ["%A den %d. %B %Y", "%d/%m/%Y", "%d. %B %Y", "%d. %b %Y", "%d/%m/%Y"],
 940         "divehi" : ["%Y %B %d, %A", "%Y-%m-%d", "%Y %B %d", "%Y %b %d", "%d/%m/%Y"],
 941         "dutch" : ["%A %d %B %Y", "%d-%m-%y", "%d %B %Y", "%d %b %Y", "%d-%m-%Y"],
 942         "english" : ["%A, %B %d, %Y", "%m/%d/%y", "%B %d, %Y", "%b %d, %Y", "%m/%d/%Y"],
 943         "esperanto" : ["%A, %d %B %Y", "%d %b %Y", "la %d de %B %Y", "la %d de %b %Y", "%m/%d/%Y"],
 944         "estonian" : ["%A, %d. %B %Y", "%d.%m.%y", "%d %B %Y", "%d %b %Y", "%d.%m.%Y"],
 945         "farsi" : ["%A %d %B %Y", "%Y/%m/%d", "%d %B %Y", "%d %b %Y", "%Y/%m/%d"],
 946         "finnish" : ["%A %d. %B %Y", "%d.%m.%Y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
 947         "french" : ["%A %d %B %Y", "%d/%m/%Y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
 948         "friulan" : ["%A %d di %B dal %Y", "%d/%m/%y", "%d di %B dal %Y", "%d di %b dal %Y", "%d/%m/%Y"],
 949         "galician" : ["%A, %d de %B de %Y", "%d/%m/%y", "%d de %B de %Y", "%d de %b de %Y", "%d/%m/%Y"],
 950         "georgian" : ["%A, %d %B, %Y", "%d.%m.%y", "%B %d, %Y", "%b %d, %Y", "%m/%d/%Y"],
 951         "german" : ["%A, %d. %B %Y", "%d.%m.%y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
 952         "german-ch" : ["%A, %d. %B %Y", "%d.%m.%y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
 953         "german-ch-old" : ["%A, %d. %B %Y", "%d.%m.%y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
 954         "greek" : ["%A, %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
 955         "hebrew" : ["%A, %d ב%B %Y", "%d.%m.%Y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
 956         "hindi" : ["%A, %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d-%m-%Y"],
 957         "icelandic" : ["%A, %d. %B %Y", "%d.%m.%Y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
 958         "interlingua" : ["%Y %B %d, %A", "%Y-%m-%d", "le %d de %B %Y", "le %d de %b %Y", "%Y-%m-%d"],
 959         "irish" : ["%A %d %B %Y", "%d/%m/%Y", "%d. %B %Y", "%d. %b %Y", "%d/%m/%Y"],
 960         "italian" : ["%A %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d/%b/%Y", "%d/%m/%Y"],
 961         "japanese" : ["%Y年%m月%d日%A", "%Y/%m/%d", "%Y年%m月%d日", "%Y/%m/%d", "%y/%m/%d"],
 962         "japanese-cjk" : ["%Y年%m月%d日%A", "%Y/%m/%d", "%Y年%m月%d日", "%Y/%m/%d", "%y/%m/%d"],
 963         "kannada" : ["%A, %B %d, %Y", "%d/%m/%y", "%d %B %Y", "%d %B %Y", "%d-%m-%Y"],
 964         "kazakh" : ["%Y ж. %d %B, %A", "%d.%m.%y", "%d %B %Y", "%d %B %Y", "%Y-%d-%m"],
 965         "khmer" : ["%A %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %B %Y", "%d/%m/%Y"],
 966         "korean" : ["%Y년 %m월 %d일 %A", "%y. %m. %d.", "%Y년 %m월 %d일", "%Y. %m. %d.", "%y. %m. %d."],
 967         "kurmanji" : ["%A, %d %B %Y", "%d %b %Y", "%d. %B %Y", "%d. %m. %Y", "%Y-%m-%d"],
 968         "lao" : ["%A ທີ %d %B %Y", "%d/%m/%Y", "%d %B %Y", "%d %B %Y", "%d/%m/%Y"],
 969         "latin" : ["%A, %d %B %Y", "%d %b %Y", "%B %d, %Y", "%b %d, %Y", "%m/%d/%Y"],
 970         "latvian" : ["%A, %Y. gada %d. %B", "%d.%m.%y", "%Y. gada %d. %B", "%Y. gada %d. %b", "%d.%m.%Y"],
 971         "lithuanian" : ["%Y m. %B %d d., %A", "%Y-%m-%d", "%Y m. %B %d d.", "%Y m. %B %d d.", "%Y-%m-%d"],
 972         "lowersorbian" : ["%A, %d. %B %Y", "%d.%m.%y", "%d %B %Y", "%d %b %Y", "%d.%m.%Y"],
 973         "macedonian" : ["%A, %d %B %Y", "%d.%m.%y", "%d %B %Y", "%d %b %Y", "%d.%m.%Y"],
 974         "magyar" : ["%Y. %B %d., %A", "%Y. %m. %d.", "%Y. %B %d.", "%Y. %b %d.", "%Y.%m.%d."],
 975         "marathi" : ["%A, %d %B, %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d-%m-%Y"],
 976         "mongolian" : ["%A, %Y оны %m сарын %d", "%Y-%m-%d", "%Y оны %m сарын %d", "%d-%m-%Y", "%d-%m-%Y"],
 977         "naustrian" : ["%A, %d. %B %Y", "%d.%m.%y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
 978         "newzealand" : ["%A, %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
 979         "ngerman" : ["%A, %d. %B %Y", "%d.%m.%y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
 980         "norsk" : ["%A %d. %B %Y", "%d.%m.%Y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
 981         "nynorsk" : ["%A %d. %B %Y", "%d.%m.%Y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
 982         "occitan" : ["%Y %B %d, %A", "%Y-%m-%d", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
 983         "piedmontese" : ["%A, %d %B %Y", "%d %b %Y", "%B %d, %Y", "%b %d, %Y", "%m/%d/%Y"],
 984         "polish" : ["%A, %d %B %Y", "%d.%m.%Y", "%d %B %Y", "%d %b %Y", "%Y-%m-%d"],
 985         "polutonikogreek" : ["%A, %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
 986         "portuguese" : ["%A, %d de %B de %Y", "%d/%m/%y", "%d de %B de %Y", "%d de %b de %Y", "%Y/%m/%d"],
 987         "romanian" : ["%A, %d %B %Y", "%d.%m.%Y", "%d %B %Y", "%d %b %Y", "%d.%m.%Y"],
 988         "romansh" : ["%A, ils %d da %B %Y", "%d-%m-%y", "%d %B %Y", "%d %b %Y", "%d.%m.%Y"],
 989         "russian" : ["%A, %d %B %Y г.", "%d.%m.%Y", "%d %B %Y г.", "%d %b %Y г.", "%d.%m.%Y"],
 990         "samin" : ["%Y %B %d, %A", "%Y-%m-%d", "%B %d. b. %Y", "%b %d. b. %Y", "%d.%m.%Y"],
 991         "sanskrit" : ["%Y %B %d, %A", "%Y-%m-%d", "%d %B %Y", "%d %b %Y", "%d-%m-%Y"],
 992         "scottish" : ["%A, %dmh %B %Y", "%d/%m/%Y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
 993         "serbian" : ["%A, %d. %B %Y.", "%d.%m.%y.", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
 994         "serbian-latin" : ["%A, %d. %B %Y.", "%d.%m.%y.", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
 995         "slovak" : ["%A, %d. %B %Y", "%d. %m. %Y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
 996         "slovene" : ["%A, %d. %B %Y", "%d. %m. %y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
 997         "spanish" : ["%A, %d de %B de %Y", "%d/%m/%y", "%d de %B %de %Y", "%d %b %Y", "%d/%m/%Y"],
 998         "spanish-mexico" : ["%A, %d de %B %de %Y", "%d/%m/%y", "%d de %B de %Y", "%d %b %Y", "%d/%m/%Y"],
 999         "swedish" : ["%A %d %B %Y", "%Y-%m-%d", "%d %B %Y", "%d %b %Y", "%Y-%m-%d"],
1000         "syriac" : ["%Y %B %d, %A", "%Y-%m-%d", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
1001         "tamil" : ["%A, %d %B, %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d-%m-%Y"],
1002         "telugu" : ["%d, %B %Y, %A", "%d-%m-%y", "%d %B %Y", "%d %b %Y", "%d-%m-%Y"],
1003         "thai" : ["%Aที่ %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
1004         "tibetan" : ["%Y %Bའི་ཚེས་%d, %A", "%Y-%m-%d", "%B %d, %Y", "%b %d, %Y", "%m/%d/%Y"],
1005         "turkish" : ["%d %B %Y %A", "%d.%m.%Y", "%d %B %Y", "%d.%b.%Y", "%d.%m.%Y"],
1006         "turkmen" : ["%d %B %Y %A", "%d.%m.%Y", "%Y ý. %B %d", "%d.%m.%Y ý.", "%d.%m.%y ý."],
1007         "ukrainian" : ["%A, %d %B %Y р.", "%d.%m.%y", "%d %B %Y", "%d %m %Y", "%d.%m.%Y"],
1008         "uppersorbian" : ["%A, %d. %B %Y", "%d.%m.%y", "%d %B %Y", "%d %b %Y", "%d.%m.%Y"],
1009         "urdu" : ["%A، %d %B، %Y", "%d/%m/%y", "%d %B, %Y", "%d %b %Y", "%d/%m/%Y"],
1010         "vietnamese" : ["%A, %d %B, %Y", "%d/%m/%Y", "%d tháng %B %Y", "%d-%m-%Y", "%d/%m/%Y"],
1011         "welsh" : ["%A, %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
1012     }
1013
1014     types = ["date", "fixdate", "moddate" ]
1015     i = 0
1016     i = find_token(document.header, "\\language", 0)
1017     if i == -1:
1018         # this should not happen
1019         document.warning("Malformed LyX document! No \\language header found!")
1020         return
1021     lang = get_value(document.header, "\\language", i)
1022
1023     i = 0
1024     while True:
1025         i = find_token(document.body, "\\begin_inset Info", i)
1026         if i == -1:
1027             return
1028         j = find_end_of_inset(document.body, i + 1)
1029         if j == -1:
1030             document.warning("Malformed LyX document: Could not find end of Info inset.")
1031             i = i + 1
1032             continue
1033         tp = find_token(document.body, 'type', i, j)
1034         tpv = get_quoted_value(document.body, "type", tp)
1035         if tpv not in types:
1036             i = i + 1
1037             continue
1038         arg = find_token(document.body, 'arg', i, j)
1039         argv = get_quoted_value(document.body, "arg", arg)
1040         isodate = ""
1041         dte = date.today()
1042         if tpv == "fixdate":
1043             datecomps = argv.split('@')
1044             if len(datecomps) > 1:
1045                 argv = datecomps[0]
1046                 isodate = datecomps[1]
1047                 m = re.search('(\d\d\d\d)-(\d\d)-(\d\d)', isodate)
1048                 if m:
1049                     dte = date(int(m.group(1)), int(m.group(2)), int(m.group(3)))
1050 # FIXME if we had the path to the original document (not the one in the tmp dir),
1051 #        we could use the mtime.
1052 #        elif tpv == "moddate":
1053 #            dte = date.fromtimestamp(os.path.getmtime(document.dir))
1054         result = ""
1055         if argv == "ISO":
1056             result = dte.isodate()
1057         elif argv == "long":
1058             result = dte.strftime(dateformats[lang][0])
1059         elif argv == "short":
1060             result = dte.strftime(dateformats[lang][1])
1061         elif argv == "loclong":
1062             result = dte.strftime(dateformats[lang][2])
1063         elif argv == "locmedium":
1064             result = dte.strftime(dateformats[lang][3])
1065         elif argv == "locshort":
1066             result = dte.strftime(dateformats[lang][4])
1067         else:
1068             fmt = argv.replace("MMMM", "%b").replace("MMM", "%b").replace("MM", "%m").replace("M", "%m")
1069             fmt = fmt.replace("yyyy", "%Y").replace("yy", "%y")
1070             fmt = fmt.replace("dddd", "%A").replace("ddd", "%a").replace("dd", "%d")
1071             fmt = re.sub('[^\'%]d', '%d', fmt)
1072             fmt = fmt.replace("'", "")
1073             result = dte.strftime(fmt)
1074         document.body[i : j+1] = result
1075         i = i + 1
1076
1077
1078 def revert_timeinfo(document):
1079     " Revert time info insets to static text. "
1080
1081 # FIXME This currently only considers the main language and uses the system locale
1082 # Ideally, it should honor context languages and switch the locale accordingly.
1083 # Also, the time object is "naive", i.e., it does not know of timezones (%Z will
1084 # be empty).
1085
1086     # The time formats for each language using strftime syntax:
1087     # long, short
1088     timeformats = {
1089         "afrikaans" : ["%H:%M:%S %Z", "%H:%M"],
1090         "albanian" : ["%I:%M:%S %p, %Z", "%I:%M %p"],
1091         "american" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1092         "amharic" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1093         "ancientgreek" : ["%H:%M:%S %Z", "%H:%M:%S"],
1094         "arabic_arabi" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1095         "arabic_arabtex" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1096         "armenian" : ["%H:%M:%S %Z", "%H:%M"],
1097         "asturian" : ["%H:%M:%S %Z", "%H:%M"],
1098         "australian" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1099         "austrian" : ["%H:%M:%S %Z", "%H:%M"],
1100         "bahasa" : ["%H.%M.%S %Z", "%H.%M"],
1101         "bahasam" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1102         "basque" : ["%H:%M:%S (%Z)", "%H:%M"],
1103         "belarusian" : ["%H:%M:%S, %Z", "%H:%M"],
1104         "bosnian" : ["%H:%M:%S %Z", "%H:%M"],
1105         "brazilian" : ["%H:%M:%S %Z", "%H:%M"],
1106         "breton" : ["%H:%M:%S %Z", "%H:%M"],
1107         "british" : ["%H:%M:%S %Z", "%H:%M"],
1108         "bulgarian" : ["%H:%M:%S %Z", "%H:%M"],
1109         "canadian" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1110         "canadien" : ["%H:%M:%S %Z", "%H h %M"],
1111         "catalan" : ["%H:%M:%S %Z", "%H:%M"],
1112         "chinese-simplified" : ["%Z %p%I:%M:%S", "%p%I:%M"],
1113         "chinese-traditional" : ["%p%I:%M:%S [%Z]", "%p%I:%M"],
1114         "coptic" : ["%H:%M:%S %Z", "%H:%M:%S"],
1115         "croatian" : ["%H:%M:%S (%Z)", "%H:%M"],
1116         "czech" : ["%H:%M:%S %Z", "%H:%M"],
1117         "danish" : ["%H.%M.%S %Z", "%H.%M"],
1118         "divehi" : ["%H:%M:%S %Z", "%H:%M"],
1119         "dutch" : ["%H:%M:%S %Z", "%H:%M"],
1120         "english" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1121         "esperanto" : ["%H:%M:%S %Z", "%H:%M:%S"],
1122         "estonian" : ["%H:%M:%S %Z", "%H:%M"],
1123         "farsi" : ["%H:%M:%S (%Z)", "%H:%M"],
1124         "finnish" : ["%H.%M.%S %Z", "%H.%M"],
1125         "french" : ["%H:%M:%S %Z", "%H:%M"],
1126         "friulan" : ["%H:%M:%S %Z", "%H:%M"],
1127         "galician" : ["%H:%M:%S %Z", "%H:%M"],
1128         "georgian" : ["%H:%M:%S %Z", "%H:%M"],
1129         "german" : ["%H:%M:%S %Z", "%H:%M"],
1130         "german-ch" : ["%H:%M:%S %Z", "%H:%M"],
1131         "german-ch-old" : ["%H:%M:%S %Z", "%H:%M"],
1132         "greek" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1133         "hebrew" : ["%H:%M:%S %Z", "%H:%M"],
1134         "hindi" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1135         "icelandic" : ["%H:%M:%S %Z", "%H:%M"],
1136         "interlingua" : ["%H:%M:%S %Z", "%H:%M"],
1137         "irish" : ["%H:%M:%S %Z", "%H:%M"],
1138         "italian" : ["%H:%M:%S %Z", "%H:%M"],
1139         "japanese" : ["%H時%M分%S秒 %Z", "%H:%M"],
1140         "japanese-cjk" : ["%H時%M分%S秒 %Z", "%H:%M"],
1141         "kannada" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1142         "kazakh" : ["%H:%M:%S %Z", "%H:%M"],
1143         "khmer" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1144         "korean" : ["%p %I시%M분 %S초 %Z", "%p %I:%M"],
1145         "kurmanji" : ["%H:%M:%S %Z", "%H:%M:%S"],
1146         "lao" : ["%H ໂມງ%M ນາທີ  %S ວິນາທີ %Z", "%H:%M"],
1147         "latin" : ["%H:%M:%S %Z", "%H:%M:%S"],
1148         "latvian" : ["%H:%M:%S %Z", "%H:%M"],
1149         "lithuanian" : ["%H:%M:%S %Z", "%H:%M"],
1150         "lowersorbian" : ["%H:%M:%S %Z", "%H:%M"],
1151         "macedonian" : ["%H:%M:%S %Z", "%H:%M"],
1152         "magyar" : ["%H:%M:%S %Z", "%H:%M"],
1153         "marathi" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1154         "mongolian" : ["%H:%M:%S %Z", "%H:%M"],
1155         "naustrian" : ["%H:%M:%S %Z", "%H:%M"],
1156         "newzealand" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1157         "ngerman" : ["%H:%M:%S %Z", "%H:%M"],
1158         "norsk" : ["%H:%M:%S %Z", "%H:%M"],
1159         "nynorsk" : ["kl. %H:%M:%S %Z", "%H:%M"],
1160         "occitan" : ["%H:%M:%S %Z", "%H:%M"],
1161         "piedmontese" : ["%H:%M:%S %Z", "%H:%M:%S"],
1162         "polish" : ["%H:%M:%S %Z", "%H:%M"],
1163         "polutonikogreek" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1164         "portuguese" : ["%H:%M:%S %Z", "%H:%M"],
1165         "romanian" : ["%H:%M:%S %Z", "%H:%M"],
1166         "romansh" : ["%H:%M:%S %Z", "%H:%M"],
1167         "russian" : ["%H:%M:%S %Z", "%H:%M"],
1168         "samin" : ["%H:%M:%S %Z", "%H:%M"],
1169         "sanskrit" : ["%H:%M:%S %Z", "%H:%M"],
1170         "scottish" : ["%H:%M:%S %Z", "%H:%M"],
1171         "serbian" : ["%H:%M:%S %Z", "%H:%M"],
1172         "serbian-latin" : ["%H:%M:%S %Z", "%H:%M"],
1173         "slovak" : ["%H:%M:%S %Z", "%H:%M"],
1174         "slovene" : ["%H:%M:%S %Z", "%H:%M"],
1175         "spanish" : ["%H:%M:%S (%Z)", "%H:%M"],
1176         "spanish-mexico" : ["%H:%M:%S %Z", "%H:%M"],
1177         "swedish" : ["kl. %H:%M:%S %Z", "%H:%M"],
1178         "syriac" : ["%H:%M:%S %Z", "%H:%M"],
1179         "tamil" : ["%p %I:%M:%S %Z", "%p %I:%M"],
1180         "telugu" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1181         "thai" : ["%H นาฬิกา %M นาที  %S วินาที %Z", "%H:%M"],
1182         "tibetan" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1183         "turkish" : ["%H:%M:%S %Z", "%H:%M"],
1184         "turkmen" : ["%H:%M:%S %Z", "%H:%M"],
1185         "ukrainian" : ["%H:%M:%S %Z", "%H:%M"],
1186         "uppersorbian" : ["%H:%M:%S %Z", "%H:%M hodź."],
1187         "urdu" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1188         "vietnamese" : ["%H:%M:%S %Z", "%H:%M"],
1189         "welsh" : ["%H:%M:%S %Z", "%H:%M"]
1190     }
1191
1192     types = ["time", "fixtime", "modtime" ]
1193     i = 0
1194     i = find_token(document.header, "\\language", 0)
1195     if i == -1:
1196         # this should not happen
1197         document.warning("Malformed LyX document! No \\language header found!")
1198         return
1199     lang = get_value(document.header, "\\language", i)
1200
1201     i = 0
1202     while True:
1203         i = find_token(document.body, "\\begin_inset Info", i)
1204         if i == -1:
1205             return
1206         j = find_end_of_inset(document.body, i + 1)
1207         if j == -1:
1208             document.warning("Malformed LyX document: Could not find end of Info inset.")
1209             i = i + 1
1210             continue
1211         tp = find_token(document.body, 'type', i, j)
1212         tpv = get_quoted_value(document.body, "type", tp)
1213         if tpv not in types:
1214             i = i + 1
1215             continue
1216         arg = find_token(document.body, 'arg', i, j)
1217         argv = get_quoted_value(document.body, "arg", arg)
1218         isotime = ""
1219         dtme = datetime.now()
1220         tme = dtme.time()
1221         if tpv == "fixtime":
1222             timecomps = argv.split('@')
1223             if len(timecomps) > 1:
1224                 argv = timecomps[0]
1225                 isotime = timecomps[1]
1226                 m = re.search('(\d\d):(\d\d):(\d\d)', isotime)
1227                 if m:
1228                     tme = time(int(m.group(1)), int(m.group(2)), int(m.group(3)))
1229                 else:
1230                     m = re.search('(\d\d):(\d\d)', isotime)
1231                     if m:
1232                         tme = time(int(m.group(1)), int(m.group(2)))
1233 # FIXME if we had the path to the original document (not the one in the tmp dir),
1234 #        we could use the mtime.
1235 #        elif tpv == "moddate":
1236 #            dte = date.fromtimestamp(os.path.getmtime(document.dir))
1237         result = ""
1238         if argv == "ISO":
1239             result = tme.isoformat()
1240         elif argv == "long":
1241             result = tme.strftime(timeformats[lang][0])
1242         elif argv == "short":
1243             result = tme.strftime(timeformats[lang][1])
1244         else:
1245             fmt = argv.replace("HH", "%H").replace("H", "%H").replace("hh", "%I").replace("h", "%I")
1246             fmt = fmt.replace("mm", "%M").replace("m", "%M").replace("ss", "%S").replace("s", "%S")
1247             fmt = fmt.replace("zzz", "%f").replace("z", "%f").replace("t", "%Z")
1248             fmt = fmt.replace("AP", "%p").replace("ap", "%p").replace("A", "%p").replace("a", "%p")
1249             fmt = fmt.replace("'", "")
1250             result = dte.strftime(fmt)
1251         document.body[i : j+1] = result
1252         i = i + 1
1253
1254
1255 def revert_namenoextinfo(document):
1256     " Merge buffer Info inset type name-noext to name. "
1257
1258     i = 0
1259     while True:
1260         i = find_token(document.body, "\\begin_inset Info", i)
1261         if i == -1:
1262             return
1263         j = find_end_of_inset(document.body, i + 1)
1264         if j == -1:
1265             document.warning("Malformed LyX document: Could not find end of Info inset.")
1266             i = i + 1
1267             continue
1268         tp = find_token(document.body, 'type', i, j)
1269         tpv = get_quoted_value(document.body, "type", tp)
1270         if tpv != "buffer":
1271             i = i + 1
1272             continue
1273         arg = find_token(document.body, 'arg', i, j)
1274         argv = get_quoted_value(document.body, "arg", arg)
1275         if argv != "name-noext":
1276             i = i + 1
1277             continue
1278         document.body[arg] = "arg \"name\""
1279         i = i + 1
1280
1281
1282 def revert_l7ninfo(document):
1283     " Revert l7n Info inset to text. "
1284
1285     i = 0
1286     while True:
1287         i = find_token(document.body, "\\begin_inset Info", i)
1288         if i == -1:
1289             return
1290         j = find_end_of_inset(document.body, i + 1)
1291         if j == -1:
1292             document.warning("Malformed LyX document: Could not find end of Info inset.")
1293             i = i + 1
1294             continue
1295         tp = find_token(document.body, 'type', i, j)
1296         tpv = get_quoted_value(document.body, "type", tp)
1297         if tpv != "l7n":
1298             i = i + 1
1299             continue
1300         arg = find_token(document.body, 'arg', i, j)
1301         argv = get_quoted_value(document.body, "arg", arg)
1302         # remove trailing colons, menu accelerator (|...) and qt accelerator (&), while keeping literal " & "
1303         argv = argv.rstrip(':').split('|')[0].replace(" & ", "</amp;>").replace("&", "").replace("</amp;>", " & ")
1304         document.body[i : j+1] = argv
1305         i = i + 1
1306
1307
1308 def revert_listpargs(document):
1309     " Reverts listpreamble arguments to TeX-code "
1310     i = 0
1311     while True:
1312         i = find_token(document.body, "\\begin_inset Argument listpreamble:", i)
1313         if i == -1:
1314             return
1315         j = find_end_of_inset(document.body, i)
1316         # Find containing paragraph layout
1317         parent = get_containing_layout(document.body, i)
1318         if parent == False:
1319             document.warning("Malformed LyX document: Can't find parent paragraph layout")
1320             i += 1
1321             continue
1322         parbeg = parent[3]
1323         beginPlain = find_token(document.body, "\\begin_layout Plain Layout", i)
1324         endPlain = find_end_of_layout(document.body, beginPlain)
1325         content = document.body[beginPlain + 1 : endPlain]
1326         del document.body[i:j+1]
1327         subst = ["\\begin_inset ERT", "status collapsed", "", "\\begin_layout Plain Layout",
1328                  "{"] + content + ["}", "\\end_layout", "", "\\end_inset", ""]
1329         document.body[parbeg : parbeg] = subst
1330         i += 1
1331
1332
1333 def revert_lformatinfo(document):
1334     " Revert layout format Info inset to text. "
1335
1336     i = 0
1337     while True:
1338         i = find_token(document.body, "\\begin_inset Info", i)
1339         if i == -1:
1340             return
1341         j = find_end_of_inset(document.body, i + 1)
1342         if j == -1:
1343             document.warning("Malformed LyX document: Could not find end of Info inset.")
1344             i = i + 1
1345             continue
1346         tp = find_token(document.body, 'type', i, j)
1347         tpv = get_quoted_value(document.body, "type", tp)
1348         if tpv != "lyxinfo":
1349             i = i + 1
1350             continue
1351         arg = find_token(document.body, 'arg', i, j)
1352         argv = get_quoted_value(document.body, "arg", arg)
1353         if argv != "layoutformat":
1354             i = i + 1
1355             continue
1356         # hardcoded for now
1357         document.body[i : j+1] = "69"
1358         i = i + 1
1359
1360
1361 ##
1362 # Conversion hub
1363 #
1364
1365 supported_versions = ["2.4.0", "2.4"]
1366 convert = [
1367            [545, [convert_lst_literalparam]],
1368            [546, []],
1369            [547, []],
1370            [548, []],
1371            [549, []],
1372            [550, [convert_fontenc]],
1373            [551, []],
1374            [552, []],
1375            [553, []],
1376            [554, []],
1377            [555, []],
1378            [556, []],
1379            [557, [convert_vcsinfo]],
1380            [558, [removeFrontMatterStyles]],
1381            [559, []],
1382            [560, []],
1383            [561, [convert_latexFonts]], # Handle dejavu, ibmplex fonts in GUI
1384            [562, []],
1385            [563, []],
1386            [564, []]
1387           ]
1388
1389 revert =  [
1390            [563, [revert_lformatinfo]],
1391            [562, [revert_listpargs]],
1392            [561, [revert_l7ninfo]],
1393            [560, [revert_latexFonts]], # Handle dejavu, ibmplex fonts in user preamble
1394            [559, [revert_timeinfo, revert_namenoextinfo]],
1395            [558, [revert_dateinfo]],
1396            [557, [addFrontMatterStyles]],
1397            [556, [revert_vcsinfo]],
1398            [555, [revert_bibencoding]],
1399            [554, [revert_vcolumns]],
1400            [553, [revert_stretchcolumn]],
1401            [552, [revert_tuftecite]],
1402            [551, [revert_floatpclass, revert_floatalignment]],
1403            [550, [revert_nospellcheck]],
1404            [549, [revert_fontenc]],
1405            [548, []],# dummy format change
1406            [547, [revert_lscape]],
1407            [546, [revert_xcharter]],
1408            [545, [revert_paratype]],
1409            [544, [revert_lst_literalparam]]
1410           ]
1411
1412
1413 if __name__ == "__main__":
1414     pass