lib/lyx2lyx/lyx_2_4.py

   1 # -*- coding: utf-8 -*-
   2 # This file is part of lyx2lyx
   3 # Copyright (C) 2018 The LyX team
   4 #
   5 # This program is free software; you can redistribute it and/or
   6 # modify it under the terms of the GNU General Public License
   7 # as published by the Free Software Foundation; either version 2
   8 # of the License, or (at your option) any later version.
   9 #
  10 # This program is distributed in the hope that it will be useful,
  11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 # GNU General Public License for more details.
  14 #
  15 # You should have received a copy of the GNU General Public License
  16 # along with this program; if not, write to the Free Software
  17 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  18
  19 """ Convert files to the file format generated by lyx 2.4"""
  20
  21 import re, string
  22 import unicodedata
  23 import sys, os
  24
  25 from datetime import (datetime, date, time)
  26
  27 # Uncomment only what you need to import, please.
  28
  29 from parser_tools import (count_pars_in_inset, find_end_of_inset, find_end_of_layout,
  30                           find_token, find_re, get_bool_value, get_containing_layout,
  31                           get_option_value, get_value, get_quoted_value)
  32 #    del_token, del_value, del_complete_lines,
  33 #    find_complete_lines, find_end_of,
  34 #    find_re, find_substring, find_token_backwards,
  35 #    get_containing_inset,
  36 #    is_in_inset, set_bool_value
  37 #    find_tokens, find_token_exact, check_token
  38
  39 from lyx2lyx_tools import (put_cmd_in_ert, add_to_preamble, revert_language, revert_flex_inset)
  40 #  revert_font_attrs, insert_to_preamble, latex_length
  41 #  get_ert, lyx2latex, lyx2verbatim, length_in_bp, convert_info_insets
  42 #  revert_flex_inset, hex2ratio, str2bool
  43
  44 ####################################################################
  45 # Private helper functions
  46
  47 def add_preamble_fonts(document, fontmap):
  48     " Add collected font-packages with their option to user-preamble"
  49
  50     for pkg in fontmap:
  51         if len(fontmap[pkg]) > 0:
  52             xoption = "[" + ",".join(fontmap[pkg]) + "]"
  53         else:
  54             xoption = ""
  55         preamble = "\\usepackage" + xoption + "{%s}" % pkg
  56         add_to_preamble(document, [preamble])
  57
  58
  59 def createkey(pkg, options):
  60     options.sort()
  61     return pkg + ':' + "-".join(options)
  62
  63 class fontinfo:
  64     def __init__(self):
  65         self.fontname = None    # key into font2pkgmap
  66         self.fonttype = None    # roman,sans,typewriter,math
  67         self.scaletype = None   # None,sf,tt
  68         self.scaleopt = None    # None, 'scaled', 'scale'
  69         self.scaleval = 1
  70         self.package = None
  71         self.options = []
  72         self.pkgkey = None      # key into pkg2fontmap
  73
  74     def addkey(self):
  75         self.pkgkey = createkey(self.package, self.options)
  76
  77 class fontmapping:
  78     def __init__(self):
  79         self.font2pkgmap = dict()
  80         self.pkg2fontmap = dict()
  81         self.pkginmap = dict()  # defines, if a map for package exists
  82
  83     def expandFontMapping(self, font_list, font_type, scale_type, pkg, scaleopt = None):
  84         " Expand fontinfo mapping"
  85         #
  86         # fontlist:    list of fontnames, each element
  87         #              may contain a ','-separated list of needed options
  88         #              like e.g. 'IBMPlexSansCondensed,condensed'
  89         # font_type:   one of 'roman', 'sans', 'typewriter', 'math'
  90         # scale_type:  one of None, 'sf', 'tt'
  91         # pkg:         package defining the font. Defaults to fontname if None
  92         # scaleopt:    one of None, 'scale', 'scaled', or some other string
  93         #              to be used in scale option (e.g. scaled=0.7)
  94         for fl in font_list:
  95             fe = fontinfo()
  96             fe.fonttype = font_type
  97             fe.scaletype = scale_type
  98             flt = fl.split(",")
  99             font_name = flt[0]
 100             fe.fontname = font_name
 101             fe.options = flt[1:]
 102             fe.scaleopt = scaleopt
 103             if pkg == None:
 104                 fe.package = font_name
 105             else:
 106                 fe.package = pkg
 107             fe.addkey()
 108             self.font2pkgmap[font_name] = fe
 109             if fe.pkgkey in self.pkg2fontmap:
 110                 # Repeated the same entry? Check content
 111                 if self.pkg2fontmap[fe.pkgkey] != font_name:
 112                     document.error("Something is wrong in pkgname+options <-> fontname mapping")
 113             self.pkg2fontmap[fe.pkgkey] = font_name
 114             self.pkginmap[fe.package] = 1
 115
 116     def getfontname(self, pkg, options):
 117         options.sort()
 118         pkgkey = createkey(pkg, options)
 119         if not pkgkey in self.pkg2fontmap:
 120             return None
 121         fontname = self.pkg2fontmap[pkgkey]
 122         if not fontname in self.font2pkgmap:
 123             document.error("Something is wrong in pkgname+options <-> fontname mapping")
 124             return None
 125         if pkgkey == self.font2pkgmap[fontname].pkgkey:
 126             return fontname
 127         return None
 128
 129 def createFontMapping(fontlist):
 130     # Create info for known fonts for the use in
 131     #   convert_latexFonts() and
 132     #   revert_latexFonts()
 133     #
 134     # * Would be more handy to parse latexFonts file,
 135     #   but the path to this file is unknown
 136     # * For now, add DejaVu and IBMPlex only.
 137     # * Expand, if desired
 138     fm = fontmapping()
 139     for font in fontlist:
 140         if font == 'DejaVu':
 141             fm.expandFontMapping(['DejaVuSerif', 'DejaVuSerifCondensed'], "roman", None, None)
 142             fm.expandFontMapping(['DejaVuSans','DejaVuSansCondensed'], "sans", "sf", None, "scaled")
 143             fm.expandFontMapping(['DejaVuSansMono'], "typewriter", "tt", None, "scaled")
 144         elif font == 'IBM':
 145             fm.expandFontMapping(['IBMPlexSerif', 'IBMPlexSerifThin,thin',
 146                                   'IBMPlexSerifExtraLight,extralight', 'IBMPlexSerifLight,light',
 147                                   'IBMPlexSerifSemibold,semibold'],
 148                                  "roman", None, "plex-serif")
 149             fm.expandFontMapping(['IBMPlexSans','IBMPlexSansCondensed,condensed',
 150                                   'IBMPlexSansThin,thin', 'IBMPlexSansExtraLight,extralight',
 151                                   'IBMPlexSansLight,light', 'IBMPlexSansSemibold,semibold'],
 152                                  "sans", "sf", "plex-sans", "scale")
 153             fm.expandFontMapping(['IBMPlexMono', 'IBMPlexMonoThin,thin',
 154                                   'IBMPlexMonoExtraLight,extralight', 'IBMPlexMonoLight,light',
 155                                   'IBMPlexMonoSemibold,semibold'],
 156                                  "typewriter", "tt", "plex-mono", "scale")
 157         elif font == 'Adobe':
 158             fm.expandFontMapping(['ADOBESourceSerifPro'], "roman", None, "sourceserifpro")
 159             fm.expandFontMapping(['ADOBESourceSansPro'], "sans", "sf", "sourcesanspro", "scaled")
 160             fm.expandFontMapping(['ADOBESourceCodePro'], "typewriter", "tt", "sourcecodepro", "scaled")
 161         elif font == 'Noto':
 162             fm.expandFontMapping(['NotoSerifRegular,regular', 'NotoSerifMedium,medium',
 163                                   'NotoSerifThin,thin', 'NotoSerifLight,light',
 164                                   'NotoSerifExtralight,extralight'],
 165                                   "roman", None, "noto-serif")
 166             fm.expandFontMapping(['NotoSansRegular,regular', 'NotoSansMedium,medium',
 167                                   'NotoSansThin,thin', 'NotoSansLight,light',
 168                                   'NotoSansExtralight,extralight'],
 169                                   "sans", "sf", "noto-sans", "scaled")
 170             fm.expandFontMapping(['NotoMonoRegular'], "typewriter", "tt", "noto-mono", "scaled")
 171     return fm
 172
 173 def convert_fonts(document, fm):
 174     " Handle font definition to LaTeX "
 175
 176     rpkg = re.compile(r'^\\usepackage(\[([^\]]*)\])?\{([^\}]+)\}')
 177     rscaleopt = re.compile(r'^scaled?=(.*)')
 178
 179     i = 0
 180     while i < len(document.preamble):
 181         i = find_re(document.preamble, rpkg, i)
 182         if i == -1:
 183             return
 184         mo = rpkg.search(document.preamble[i])
 185         if mo == None or mo.group(2) == None:
 186             options = []
 187         else:
 188             options = mo.group(2).replace(' ', '').split(",")
 189         pkg = mo.group(3)
 190         o = 0
 191         oscale = 1
 192         while o < len(options):
 193             mo = rscaleopt.search(options[o])
 194             if mo == None:
 195                 o += 1
 196                 continue
 197             oscale = mo.group(1)
 198             del options[o]
 199             break
 200
 201         if not pkg in fm.pkginmap:
 202             i += 1
 203             continue
 204         # determine fontname
 205         fn = fm.getfontname(pkg, options)
 206         if fn == None:
 207             i += 1
 208             continue
 209         del document.preamble[i]
 210         fontinfo = fm.font2pkgmap[fn]
 211         if fontinfo.scaletype == None:
 212             fontscale = None
 213         else:
 214             fontscale = "\\font_" + fontinfo.scaletype + "_scale"
 215             fontinfo.scaleval = oscale
 216
 217         if i > 0 and document.preamble[i-1] == "% Added by lyx2lyx":
 218             del document.preamble[i-1]
 219         if fontscale != None:
 220             j = find_token(document.header, fontscale, 0)
 221             if j != -1:
 222                 val = get_value(document.header, fontscale, j)
 223                 vals = val.split()
 224                 scale = "100"
 225                 if oscale != None:
 226                     scale = "%03d" % int(float(oscale) * 100)
 227                 document.header[j] = fontscale + " " + scale + " " + vals[1]
 228         ft = "\\font_" + fontinfo.fonttype
 229         j = find_token(document.header, ft, 0)
 230         if j != -1:
 231             val = get_value(document.header, ft, j)
 232             words = val.split() # ! splits also values like '"DejaVu Sans"'
 233             words[0] = '"' + fn + '"'
 234             document.header[j] = ft + ' ' + ' '.join(words)
 235
 236 def revert_fonts(document, fm, fontmap):
 237     " Revert native font definition to LaTeX "
 238     # fonlist := list of fonts created from the same package
 239     # Empty package means that the font-name is the same as the package-name
 240     # fontmap (key = package, val += found options) will be filled
 241     # and used later in add_preamble_fonts() to be added to user-preamble
 242
 243     rfontscale = re.compile(r'^\s*(\\font_(roman|sans|typewriter|math))\s+')
 244     rscales = re.compile(r'^\s*(\d+)\s+(\d+)')
 245     i = 0
 246     while i < len(document.header):
 247         i = find_re(document.header, rfontscale, i)
 248         if (i == -1):
 249             break
 250         mo = rfontscale.search(document.header[i])
 251         if mo == None:
 252             i += 1
 253             continue
 254         ft = mo.group(1)    # 'roman', 'sans', 'typewriter', 'math'
 255         val = get_value(document.header, ft, i)
 256         words = val.split(' ')     # ! splits also values like '"DejaVu Sans"'
 257         font = words[0].strip('"') # TeX font name has no whitespace
 258         if not font in fm.font2pkgmap:
 259             i += 1
 260             continue
 261         fontinfo = fm.font2pkgmap[font]
 262         val = fontinfo.package
 263         if not val in fontmap:
 264             fontmap[val] = []
 265         words[0] = '"default"'
 266         document.header[i] = ft + ' ' + ' '.join(words)
 267         if fontinfo.scaleopt != None:
 268             xval =  get_value(document.header, "\\font_" + fontinfo.scaletype + "_scale", 0)
 269             mo = rscales.search(xval)
 270             if mo != None:
 271                 xval1 = mo.group(1)
 272                 xval2 = mo.group(2)
 273                 if xval1 != "100":
 274                     # set correct scale option
 275                     fontmap[val].extend([fontinfo.scaleopt + "=" + format(float(xval1) / 100, '.2f')])
 276         if len(fontinfo.options) > 0:
 277             fontmap[val].extend(fontinfo.options)
 278         i += 1
 279
 280 ###############################################################################
 281 ###
 282 ### Conversion and reversion routines
 283 ###
 284 ###############################################################################
 285
 286 def convert_inputencoding_namechange(document):
 287     " Rename inputencoding settings. "
 288     i = find_token(document.header, "\\inputencoding", 0)
 289     if i == -1:
 290         return
 291     s = document.header[i].replace("auto", "auto-legacy")
 292     document.header[i] = s.replace("default", "auto-legacy-plain")
 293
 294 def revert_inputencoding_namechange(document):
 295     " Rename inputencoding settings. "
 296     i = find_token(document.header, "\\inputencoding", 0)
 297     if i == -1:
 298         return
 299     s = document.header[i].replace("auto-legacy-plain", "default")
 300     document.header[i] = s.replace("auto-legacy", "auto")
 301
 302 def convert_notoFonts(document):
 303     " Handle Noto fonts definition to LaTeX "
 304
 305     if find_token(document.header, "\\use_non_tex_fonts false", 0) != -1:
 306         fm = createFontMapping(['Noto'])
 307         convert_fonts(document, fm)
 308
 309 def revert_notoFonts(document):
 310     " Revert native Noto font definition to LaTeX "
 311
 312     if find_token(document.header, "\\use_non_tex_fonts false", 0) != -1:
 313         fontmap = dict()
 314         fm = createFontMapping(['Noto'])
 315         revert_fonts(document, fm, fontmap)
 316         add_preamble_fonts(document, fontmap)
 317
 318 def convert_latexFonts(document):
 319     " Handle DejaVu and IBMPlex fonts definition to LaTeX "
 320
 321     if find_token(document.header, "\\use_non_tex_fonts false", 0) != -1:
 322         fm = createFontMapping(['DejaVu', 'IBM'])
 323         convert_fonts(document, fm)
 324
 325 def revert_latexFonts(document):
 326     " Revert native DejaVu font definition to LaTeX "
 327
 328     if find_token(document.header, "\\use_non_tex_fonts false", 0) != -1:
 329         fontmap = dict()
 330         fm = createFontMapping(['DejaVu', 'IBM'])
 331         revert_fonts(document, fm, fontmap)
 332         add_preamble_fonts(document, fontmap)
 333
 334 def convert_AdobeFonts(document):
 335     " Handle DejaVu and IBMPlex fonts definition to LaTeX "
 336
 337     if find_token(document.header, "\\use_non_tex_fonts false", 0) != -1:
 338         fm = createFontMapping(['Adobe'])
 339         convert_fonts(document, fm)
 340
 341 def revert_AdobeFonts(document):
 342     " Revert native DejaVu font definition to LaTeX "
 343
 344     if find_token(document.header, "\\use_non_tex_fonts false", 0) != -1:
 345         fontmap = dict()
 346         fm = createFontMapping(['Adobe'])
 347         revert_fonts(document, fm, fontmap)
 348         add_preamble_fonts(document, fontmap)
 349
 350 def removeFrontMatterStyles(document):
 351     " Remove styles Begin/EndFrontmatter"
 352
 353     layouts = ['BeginFrontmatter', 'EndFrontmatter']
 354     for layout in layouts:
 355         i = 0
 356         while True:
 357             i = find_token(document.body, '\\begin_layout ' + layout, i)
 358             if i == -1:
 359                 break
 360             j = find_end_of_layout(document.body, i)
 361             if j == -1:
 362                 document.warning("Malformed LyX document: Can't find end of layout at line %d" % i)
 363                 i += 1
 364                 continue
 365             while i > 0 and document.body[i-1].strip() == '':
 366                 i -= 1
 367             while document.body[j+1].strip() == '':
 368                 j = j + 1
 369             document.body[i:j+1] = ['']
 370
 371 def addFrontMatterStyles(document):
 372     " Use styles Begin/EndFrontmatter for elsarticle"
 373
 374     def insertFrontmatter(prefix, line):
 375         above = line
 376         while above > 0 and document.body[above-1].strip() == '':
 377             above -= 1
 378         below = line
 379         while document.body[below].strip() == '':
 380             below += 1
 381         document.body[above:below] = ['', '\\begin_layout ' + prefix + 'Frontmatter',
 382                                     '\\begin_inset Note Note',
 383                                     'status open', '',
 384                                     '\\begin_layout Plain Layout',
 385                                     'Keep this empty!',
 386                                     '\\end_layout', '',
 387                                     '\\end_inset', '', '',
 388                                     '\\end_layout', '']
 389
 390     if document.textclass == "elsarticle":
 391         layouts = ['Title', 'Title footnote', 'Author', 'Author footnote',
 392                    'Corresponding author', 'Address', 'Email', 'Abstract', 'Keywords']
 393         first = -1
 394         last = -1
 395         for layout in layouts:
 396             i = 0
 397             while True:
 398                 i = find_token(document.body, '\\begin_layout ' + layout, i)
 399                 if i == -1:
 400                     break
 401                 k = find_end_of_layout(document.body, i)
 402                 if k == -1:
 403                     document.warning("Malformed LyX document: Can't find end of layout at line %d" % i)
 404                     i += 1;
 405                     continue
 406                 if first == -1 or i < first:
 407                     first = i
 408                 if last == -1 or last <= k:
 409                     last = k+1
 410                 i = k+1
 411         if first == -1:
 412             return
 413         insertFrontmatter('End', last)
 414         insertFrontmatter('Begin', first)
 415
 416 def convert_lst_literalparam(document):
 417     " Add param literal to include inset "
 418
 419     i = 0
 420     while True:
 421         i = find_token(document.body, '\\begin_inset CommandInset include', i)
 422         if i == -1:
 423             break
 424         j = find_end_of_inset(document.body, i)
 425         if j == -1:
 426             document.warning("Malformed LyX document: Can't find end of command inset at line %d" % i)
 427             i += 1
 428             continue
 429         while i < j and document.body[i].strip() != '':
 430             i += 1
 431         document.body.insert(i, "literal \"true\"")
 432
 433
 434 def revert_lst_literalparam(document):
 435     " Remove param literal from include inset "
 436
 437     i = 0
 438     while True:
 439         i = find_token(document.body, '\\begin_inset CommandInset include', i)
 440         if i == -1:
 441             break
 442         j = find_end_of_inset(document.body, i)
 443         if j == -1:
 444             document.warning("Malformed LyX document: Can't find end of include inset at line %d" % i)
 445             i += 1
 446             continue
 447         k = find_token(document.body, 'literal', i, j)
 448         if k == -1:
 449             i += 1
 450             continue
 451         del document.body[k]
 452
 453
 454 def revert_paratype(document):
 455     " Revert ParaType font definitions to LaTeX "
 456
 457     if find_token(document.header, "\\use_non_tex_fonts false", 0) != -1:
 458         preamble = ""
 459         i1 = find_token(document.header, "\\font_roman \"PTSerif-TLF\"", 0)
 460         i2 = find_token(document.header, "\\font_sans \"default\"", 0)
 461         i3 = find_token(document.header, "\\font_typewriter \"default\"", 0)
 462         j = find_token(document.header, "\\font_sans \"PTSans-TLF\"", 0)
 463         sfval = get_value(document.header, "\\font_sf_scale", 0)
 464         # cutoff " 100"
 465         sfval = sfval[:-4]
 466         sfoption = ""
 467         if sfval != "100":
 468             sfoption = "scaled=" + format(float(sfval) / 100, '.2f')
 469         k = find_token(document.header, "\\font_typewriter \"PTMono-TLF\"", 0)
 470         ttval = get_value(document.header, "\\font_tt_scale", 0)
 471         # cutoff " 100"
 472         ttval = ttval[:-4]
 473         ttoption = ""
 474         if ttval != "100":
 475             ttoption = "scaled=" + format(float(ttval) / 100, '.2f')
 476         if i1 != -1 and i2 != -1 and i3!= -1:
 477             add_to_preamble(document, ["\\usepackage{paratype}"])
 478         else:
 479             if i1!= -1:
 480                 add_to_preamble(document, ["\\usepackage{PTSerif}"])
 481                 document.header[i1] = document.header[i1].replace("PTSerif-TLF", "default")
 482             if j!= -1:
 483                 if sfoption != "":
 484                     add_to_preamble(document, ["\\usepackage[" + sfoption + "]{PTSans}"])
 485                 else:
 486                     add_to_preamble(document, ["\\usepackage{PTSans}"])
 487                 document.header[j] = document.header[j].replace("PTSans-TLF", "default")
 488             if k!= -1:
 489                 if ttoption != "":
 490                     add_to_preamble(document, ["\\usepackage[" + ttoption + "]{PTMono}"])
 491                 else:
 492                     add_to_preamble(document, ["\\usepackage{PTMono}"])
 493                 document.header[k] = document.header[k].replace("PTMono-TLF", "default")
 494
 495
 496 def revert_xcharter(document):
 497     " Revert XCharter font definitions to LaTeX "
 498
 499     i = find_token(document.header, "\\font_roman \"xcharter\"", 0)
 500     if i == -1:
 501         return
 502
 503     # replace unsupported font setting
 504     document.header[i] = document.header[i].replace("xcharter", "default")
 505     # no need for preamble code with system fonts
 506     if get_bool_value(document.header, "\\use_non_tex_fonts"):
 507         return
 508
 509     # transfer old style figures setting to package options
 510     j = find_token(document.header, "\\font_osf true")
 511     if j != -1:
 512         options = "[osf]"
 513         document.header[j] = "\\font_osf false"
 514     else:
 515         options = ""
 516     if i != -1:
 517         add_to_preamble(document, ["\\usepackage%s{XCharter}"%options])
 518
 519
 520 def revert_lscape(document):
 521     " Reverts the landscape environment (Landscape module) to TeX-code "
 522
 523     if not "landscape" in document.get_module_list():
 524         return
 525
 526     i = 0
 527     while True:
 528         i = find_token(document.body, "\\begin_inset Flex Landscape", i)
 529         if i == -1:
 530             return
 531         j = find_end_of_inset(document.body, i)
 532         if j == -1:
 533             document.warning("Malformed LyX document: Can't find end of Landscape inset")
 534             i += 1
 535             continue
 536
 537         if document.body[i] == "\\begin_inset Flex Landscape (Floating)":
 538             document.body[j - 2 : j + 1] = put_cmd_in_ert("\\end{landscape}}")
 539             document.body[i : i + 4] = put_cmd_in_ert("\\afterpage{\\begin{landscape}")
 540             add_to_preamble(document, ["\\usepackage{afterpage}"])
 541         else:
 542             document.body[j - 2 : j + 1] = put_cmd_in_ert("\\end{landscape}")
 543             document.body[i : i + 4] = put_cmd_in_ert("\\begin{landscape}")
 544
 545         add_to_preamble(document, ["\\usepackage{pdflscape}"])
 546         # no need to reset i
 547
 548
 549 def convert_fontenc(document):
 550     " Convert default fontenc setting "
 551
 552     i = find_token(document.header, "\\fontencoding global", 0)
 553     if i == -1:
 554         return
 555
 556     document.header[i] = document.header[i].replace("global", "auto")
 557
 558
 559 def revert_fontenc(document):
 560     " Revert default fontenc setting "
 561
 562     i = find_token(document.header, "\\fontencoding auto", 0)
 563     if i == -1:
 564         return
 565
 566     document.header[i] = document.header[i].replace("auto", "global")
 567
 568
 569 def revert_nospellcheck(document):
 570     " Remove nospellcheck font info param "
 571
 572     i = 0
 573     while True:
 574         i = find_token(document.body, '\\nospellcheck', i)
 575         if i == -1:
 576             return
 577         del document.body[i]
 578
 579
 580 def revert_floatpclass(document):
 581     " Remove float placement params 'document' and 'class' "
 582
 583     i = 0
 584     i = find_token(document.header, "\\float_placement class", 0)
 585     if i != -1:
 586         del document.header[i]
 587
 588     i = 0
 589     while True:
 590         i = find_token(document.body, '\\begin_inset Float', i)
 591         if i == -1:
 592             break
 593         j = find_end_of_inset(document.body, i)
 594         k = find_token(document.body, 'placement class', i, i + 2)
 595         if k == -1:
 596             k = find_token(document.body, 'placement document', i, i + 2)
 597             if k != -1:
 598                 del document.body[k]
 599             i += 1
 600             continue
 601         del document.body[k]
 602
 603
 604 def revert_floatalignment(document):
 605     " Remove float alignment params "
 606
 607     i = 0
 608     i = find_token(document.header, "\\float_alignment", 0)
 609     galignment = ""
 610     if i != -1:
 611         galignment = get_value(document.header, "\\float_alignment", i)
 612         del document.header[i]
 613
 614     i = 0
 615     while True:
 616         i = find_token(document.body, '\\begin_inset Float', i)
 617         if i == -1:
 618             break
 619         j = find_end_of_inset(document.body, i)
 620         if j == -1:
 621             document.warning("Malformed LyX document: Can't find end of inset at line " + str(i))
 622             i += 1
 623         k = find_token(document.body, 'alignment', i, i + 4)
 624         if k == -1:
 625             i = j
 626             continue
 627         alignment = get_value(document.body, "alignment", k)
 628         if alignment == "document":
 629             alignment = galignment
 630         del document.body[k]
 631         l = find_token(document.body, "\\begin_layout Plain Layout", i, j)
 632         if l == -1:
 633             document.warning("Can't find float layout!")
 634             i += 1
 635             continue
 636         alcmd = []
 637         if alignment == "left":
 638             alcmd = put_cmd_in_ert("\\raggedright{}")
 639         elif alignment == "center":
 640             alcmd = put_cmd_in_ert("\\centering{}")
 641         elif alignment == "right":
 642             alcmd = put_cmd_in_ert("\\raggedleft{}")
 643         if len(alcmd) > 0:
 644             document.body[l+1:l+1] = alcmd
 645         i += 1
 646
 647
 648 def revert_tuftecite(document):
 649     " Revert \cite commands in tufte classes "
 650
 651     tufte = ["tufte-book", "tufte-handout"]
 652     if document.textclass not in tufte:
 653         return
 654
 655     i = 0
 656     while (True):
 657         i = find_token(document.body, "\\begin_inset CommandInset citation", i)
 658         if i == -1:
 659             break
 660         j = find_end_of_inset(document.body, i)
 661         if j == -1:
 662             document.warning("Can't find end of citation inset at line %d!!" %(i))
 663             i += 1
 664             continue
 665         k = find_token(document.body, "LatexCommand", i, j)
 666         if k == -1:
 667             document.warning("Can't find LatexCommand for citation inset at line %d!" %(i))
 668             i = j + 1
 669             continue
 670         cmd = get_value(document.body, "LatexCommand", k)
 671         if cmd != "cite":
 672             i = j + 1
 673             continue
 674         pre = get_quoted_value(document.body, "before", i, j)
 675         post = get_quoted_value(document.body, "after", i, j)
 676         key = get_quoted_value(document.body, "key", i, j)
 677         if not key:
 678             document.warning("Citation inset at line %d does not have a key!" %(i))
 679             key = "???"
 680         # Replace command with ERT
 681         res = "\\cite"
 682         if pre:
 683             res += "[" + pre + "]"
 684         if post:
 685             res += "[" + post + "]"
 686         elif pre:
 687             res += "[]"
 688         res += "{" + key + "}"
 689         document.body[i:j+1] = put_cmd_in_ert([res])
 690         i = j + 1
 691
 692
 693 def revert_stretchcolumn(document):
 694     " We remove the column varwidth flags or everything else will become a mess. "
 695     i = 0
 696     while True:
 697         i = find_token(document.body, "\\begin_inset Tabular", i)
 698         if i == -1:
 699             return
 700         j = find_end_of_inset(document.body, i + 1)
 701         if j == -1:
 702             document.warning("Malformed LyX document: Could not find end of tabular.")
 703             continue
 704         for k in range(i, j):
 705             if re.search('^<column.*varwidth="[^"]+".*>$', document.body[k]):
 706                 document.warning("Converting 'tabularx'/'xltabular' table to normal table.")
 707                 document.body[k] = document.body[k].replace(' varwidth="true"', '')
 708         i = i + 1
 709
 710
 711 def revert_vcolumns(document):
 712     " Revert standard columns with line breaks etc. "
 713     i = 0
 714     needvarwidth = False
 715     needarray = False
 716     try:
 717         while True:
 718             i = find_token(document.body, "\\begin_inset Tabular", i)
 719             if i == -1:
 720                 return
 721             j = find_end_of_inset(document.body, i)
 722             if j == -1:
 723                 document.warning("Malformed LyX document: Could not find end of tabular.")
 724                 i += 1
 725                 continue
 726
 727             # Collect necessary column information
 728             m = i + 1
 729             nrows = int(document.body[i+1].split('"')[3])
 730             ncols = int(document.body[i+1].split('"')[5])
 731             col_info = []
 732             for k in range(ncols):
 733                 m = find_token(document.body, "<column", m)
 734                 width = get_option_value(document.body[m], 'width')
 735                 varwidth = get_option_value(document.body[m], 'varwidth')
 736                 alignment = get_option_value(document.body[m], 'alignment')
 737                 special = get_option_value(document.body[m], 'special')
 738                 col_info.append([width, varwidth, alignment, special, m])
 739
 740             # Now parse cells
 741             m = i + 1
 742             lines = []
 743             for row in range(nrows):
 744                 for col in range(ncols):
 745                     m = find_token(document.body, "<cell", m)
 746                     multicolumn = get_option_value(document.body[m], 'multicolumn')
 747                     multirow = get_option_value(document.body[m], 'multirow')
 748                     width = get_option_value(document.body[m], 'width')
 749                     rotate = get_option_value(document.body[m], 'rotate')
 750                     # Check for: linebreaks, multipars, non-standard environments
 751                     begcell = m
 752                     endcell = find_token(document.body, "</cell>", begcell)
 753                     vcand = False
 754                     if find_token(document.body, "\\begin_inset Newline", begcell, endcell) != -1:
 755                         vcand = True
 756                     elif count_pars_in_inset(document.body, begcell + 2) > 1:
 757                         vcand = True
 758                     elif get_value(document.body, "\\begin_layout", begcell) != "Plain Layout":
 759                         vcand = True
 760                     if vcand and rotate == "" and ((multicolumn == "" and multirow == "") or width == ""):
 761                         if col_info[col][0] == "" and col_info[col][1] == "" and col_info[col][3] == "":
 762                             needvarwidth = True
 763                             alignment = col_info[col][2]
 764                             col_line = col_info[col][4]
 765                             vval = ""
 766                             if alignment == "center":
 767                                 vval = ">{\\centering}"
 768                             elif  alignment == "left":
 769                                 vval = ">{\\raggedright}"
 770                             elif alignment == "right":
 771                                 vval = ">{\\raggedleft}"
 772                             if vval != "":
 773                                 needarray = True
 774                             vval += "V{\\linewidth}"
 775
 776                             document.body[col_line] = document.body[col_line][:-1] + " special=\"" + vval + "\">"
 777                             # ERT newlines and linebreaks (since LyX < 2.4 automatically inserts parboxes
 778                             # with newlines, and we do not want that)
 779                             while True:
 780                                 endcell = find_token(document.body, "</cell>", begcell)
 781                                 linebreak = False
 782                                 nl = find_token(document.body, "\\begin_inset Newline newline", begcell, endcell)
 783                                 if nl == -1:
 784                                     nl = find_token(document.body, "\\begin_inset Newline linebreak", begcell, endcell)
 785                                     if nl == -1:
 786                                          break
 787                                     linebreak = True
 788                                 nle = find_end_of_inset(document.body, nl)
 789                                 del(document.body[nle:nle+1])
 790                                 if linebreak:
 791                                     document.body[nl:nl+1] = put_cmd_in_ert("\\linebreak{}")
 792                                 else:
 793                                     document.body[nl:nl+1] = put_cmd_in_ert("\\\\")
 794                     m += 1
 795
 796             i = j + 1
 797
 798     finally:
 799         if needarray == True:
 800             add_to_preamble(document, ["\\usepackage{array}"])
 801         if needvarwidth == True:
 802             add_to_preamble(document, ["\\usepackage{varwidth}"])
 803
 804
 805 def revert_bibencoding(document):
 806     " Revert bibliography encoding "
 807
 808     # Get cite engine
 809     engine = "basic"
 810     i = find_token(document.header, "\\cite_engine", 0)
 811     if i == -1:
 812         document.warning("Malformed document! Missing \\cite_engine")
 813     else:
 814         engine = get_value(document.header, "\\cite_engine", i)
 815
 816     # Check if biblatex
 817     biblatex = False
 818     if engine in ["biblatex", "biblatex-natbib"]:
 819         biblatex = True
 820
 821     # Map lyx to latex encoding names
 822     encodings = {
 823         "utf8" : "utf8",
 824         "utf8x" : "utf8x",
 825         "armscii8" : "armscii8",
 826         "iso8859-1" : "latin1",
 827         "iso8859-2" : "latin2",
 828         "iso8859-3" : "latin3",
 829         "iso8859-4" : "latin4",
 830         "iso8859-5" : "iso88595",
 831         "iso8859-6" : "8859-6",
 832         "iso8859-7" : "iso-8859-7",
 833         "iso8859-8" : "8859-8",
 834         "iso8859-9" : "latin5",
 835         "iso8859-13" : "latin7",
 836         "iso8859-15" : "latin9",
 837         "iso8859-16" : "latin10",
 838         "applemac" : "applemac",
 839         "cp437" : "cp437",
 840         "cp437de" : "cp437de",
 841         "cp850" : "cp850",
 842         "cp852" : "cp852",
 843         "cp855" : "cp855",
 844         "cp858" : "cp858",
 845         "cp862" : "cp862",
 846         "cp865" : "cp865",
 847         "cp866" : "cp866",
 848         "cp1250" : "cp1250",
 849         "cp1251" : "cp1251",
 850         "cp1252" : "cp1252",
 851         "cp1255" : "cp1255",
 852         "cp1256" : "cp1256",
 853         "cp1257" : "cp1257",
 854         "koi8-r" : "koi8-r",
 855         "koi8-u" : "koi8-u",
 856         "pt154" : "pt154",
 857         "utf8-platex" : "utf8",
 858         "ascii" : "ascii"
 859     }
 860
 861     i = 0
 862     bibresources = []
 863     while (True):
 864         i = find_token(document.body, "\\begin_inset CommandInset bibtex", i)
 865         if i == -1:
 866             break
 867         j = find_end_of_inset(document.body, i)
 868         if j == -1:
 869             document.warning("Can't find end of bibtex inset at line %d!!" %(i))
 870             i += 1
 871             continue
 872         encoding = get_quoted_value(document.body, "encoding", i, j)
 873         if not encoding:
 874             i += 1
 875             continue
 876         # remove encoding line
 877         k = find_token(document.body, "encoding", i, j)
 878         if k != -1:
 879             del document.body[k]
 880         if encoding == "default":
 881             i += 1
 882             continue
 883         # Re-find inset end line
 884         j = find_end_of_inset(document.body, i)
 885         if biblatex:
 886             biblio_options = ""
 887             h = find_token(document.header, "\\biblio_options", 0)
 888             if h != -1:
 889                 biblio_options = get_value(document.header, "\\biblio_options", h)
 890                 if not "bibencoding" in biblio_options:
 891                      document.header[h] += ",bibencoding=%s" % encodings[encoding]
 892             else:
 893                 bs = find_token(document.header, "\\biblatex_bibstyle", 0)
 894                 if bs == -1:
 895                     # this should not happen
 896                     document.warning("Malformed LyX document! No \\biblatex_bibstyle header found!")
 897                 else:
 898                     document.header[bs-1 : bs-1] = ["\\biblio_options bibencoding=" + encodings[encoding]]
 899         else:
 900             document.body[j+1:j+1] = put_cmd_in_ert("\\egroup")
 901             document.body[i:i] = put_cmd_in_ert("\\bgroup\\inputencoding{" + encodings[encoding] + "}")
 902
 903         i = j + 1
 904
 905
 906
 907 def convert_vcsinfo(document):
 908     " Separate vcs Info inset from buffer Info inset. "
 909
 910     types = {
 911         "vcs-revision" : "revision",
 912         "vcs-tree-revision" : "tree-revision",
 913         "vcs-author" : "author",
 914         "vcs-time" : "time",
 915         "vcs-date" : "date"
 916     }
 917     i = 0
 918     while True:
 919         i = find_token(document.body, "\\begin_inset Info", i)
 920         if i == -1:
 921             return
 922         j = find_end_of_inset(document.body, i + 1)
 923         if j == -1:
 924             document.warning("Malformed LyX document: Could not find end of Info inset.")
 925             i = i + 1
 926             continue
 927         tp = find_token(document.body, 'type', i, j)
 928         tpv = get_quoted_value(document.body, "type", tp)
 929         if tpv != "buffer":
 930             i = i + 1
 931             continue
 932         arg = find_token(document.body, 'arg', i, j)
 933         argv = get_quoted_value(document.body, "arg", arg)
 934         if argv not in list(types.keys()):
 935             i = i + 1
 936             continue
 937         document.body[tp] = "type \"vcs\""
 938         document.body[arg] = "arg \"" + types[argv] + "\""
 939         i = i + 1
 940
 941
 942 def revert_vcsinfo(document):
 943     " Merge vcs Info inset to buffer Info inset. "
 944
 945     args = ["revision", "tree-revision", "author", "time", "date" ]
 946     i = 0
 947     while True:
 948         i = find_token(document.body, "\\begin_inset Info", i)
 949         if i == -1:
 950             return
 951         j = find_end_of_inset(document.body, i + 1)
 952         if j == -1:
 953             document.warning("Malformed LyX document: Could not find end of Info inset.")
 954             i = i + 1
 955             continue
 956         tp = find_token(document.body, 'type', i, j)
 957         tpv = get_quoted_value(document.body, "type", tp)
 958         if tpv != "vcs":
 959             i = i + 1
 960             continue
 961         arg = find_token(document.body, 'arg', i, j)
 962         argv = get_quoted_value(document.body, "arg", arg)
 963         if argv not in args:
 964             document.warning("Malformed Info inset. Invalid vcs arg.")
 965             i = i + 1
 966             continue
 967         document.body[tp] = "type \"buffer\""
 968         document.body[arg] = "arg \"vcs-" + argv + "\""
 969         i = i + 1
 970
 971
 972 def revert_dateinfo(document):
 973     " Revert date info insets to static text. "
 974
 975 # FIXME This currently only considers the main language and uses the system locale
 976 # Ideally, it should honor context languages and switch the locale accordingly.
 977
 978     # The date formats for each language using strftime syntax:
 979     # long, short, loclong, locmedium, locshort
 980     dateformats = {
 981         "afrikaans" : ["%A, %d %B %Y", "%Y-%m-%d", "%d %B %Y", "%d %b %Y", "%Y/%m/%d"],
 982         "albanian" : ["%A, %d %B %Y", "%d.%m.%y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
 983         "american" : ["%A, %B %d, %Y", "%m/%d/%y", "%B %d, %Y", "%b %d, %Y", "%m/%d/%Y"],
 984         "amharic" : ["%A ፣%d %B %Y", "%d/%m/%Y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
 985         "ancientgreek" : ["%A, %d %B %Y", "%d %b %Y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
 986         "arabic_arabi" : ["%A، %d %B، %Y", "%d‏/%m‏/%Y", "%d %B، %Y", "%d/%m/%Y", "%d/%m/%Y"],
 987         "arabic_arabtex" : ["%A، %d %B، %Y", "%d‏/%m‏/%Y", "%d %B، %Y", "%d/%m/%Y", "%d/%m/%Y"],
 988         "armenian" : ["%Y թ. %B %d, %A", "%d.%m.%y", "%d %B، %Y", "%d %b، %Y", "%d/%m/%Y"],
 989         "asturian" : ["%A, %d %B de %Y", "%d/%m/%y", "%d de %B de %Y", "%d %b %Y", "%d/%m/%Y"],
 990         "australian" : ["%A, %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
 991         "austrian" : ["%A, %d. %B %Y", "%d.%m.%y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
 992         "bahasa" : ["%A, %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
 993         "bahasam" : ["%A, %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
 994         "basque" : ["%Y(e)ko %B %d, %A", "%y/%m/%d", "%Y %B %d", "%Y %b %d", "%Y/%m/%d"],
 995         "belarusian" : ["%A, %d %B %Y г.", "%d.%m.%y", "%d %B %Y", "%d %b %Y", "%d.%m.%Y"],
 996         "bosnian" : ["%A, %d. %B %Y.", "%d.%m.%y.", "%d. %B %Y", "%d. %b %Y", "%Y-%m-%d"],
 997         "brazilian" : ["%A, %d de %B de %Y", "%d/%m/%Y", "%d de %B de %Y", "%d de %b de %Y", "%d/%m/%Y"],
 998         "breton" : ["%Y %B %d, %A", "%Y-%m-%d", "%d %B %Y", "%d %b %Y", "%Y-%m-%d"],
 999         "british" : ["%A, %d %B %Y", "%d/%m/%Y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
1000         "bulgarian" : ["%A, %d %B %Y г.", "%d.%m.%y г.", "%d %B %Y", "%d %b %Y", "%Y-%m-%d"],
1001         "canadian" : ["%A, %B %d, %Y", "%Y-%m-%d", "%B %d, %Y", "%d %b %Y", "%Y-%m-%d"],
1002         "canadien" : ["%A %d %B %Y", "%y-%m-%d", "%d %B %Y", "%d %b %Y", "%Y-%m-%d"],
1003         "catalan" : ["%A, %d %B de %Y", "%d/%m/%y", "%d / %B / %Y", "%d / %b / %Y", "%d/%m/%Y"],
1004         "chinese-simplified" : ["%Y年%m月%d日%A", "%Y/%m/%d", "%Y年%m月%d日", "%Y-%m-%d", "%y-%m-%d"],
1005         "chinese-traditional" : ["%Y年%m月%d日 %A", "%Y/%m/%d", "%Y年%m月%d日", "%Y年%m月%d日", "%y年%m月%d日"],
1006         "coptic" : ["%A, %d %B %Y", "%d %b %Y", "%B %d, %Y", "%b %d, %Y", "%m/%d/%Y"],
1007         "croatian" : ["%A, %d. %B %Y.", "%d. %m. %Y.", "%d. %B %Y.", "%d. %b. %Y.", "%d.%m.%Y."],
1008         "czech" : ["%A %d. %B %Y", "%d.%m.%y", "%d. %B %Y", "%d. %b. %Y", "%d.%m.%Y"],
1009         "danish" : ["%A den %d. %B %Y", "%d/%m/%Y", "%d. %B %Y", "%d. %b %Y", "%d/%m/%Y"],
1010         "divehi" : ["%Y %B %d, %A", "%Y-%m-%d", "%Y %B %d", "%Y %b %d", "%d/%m/%Y"],
1011         "dutch" : ["%A %d %B %Y", "%d-%m-%y", "%d %B %Y", "%d %b %Y", "%d-%m-%Y"],
1012         "english" : ["%A, %B %d, %Y", "%m/%d/%y", "%B %d, %Y", "%b %d, %Y", "%m/%d/%Y"],
1013         "esperanto" : ["%A, %d %B %Y", "%d %b %Y", "la %d de %B %Y", "la %d de %b %Y", "%m/%d/%Y"],
1014         "estonian" : ["%A, %d. %B %Y", "%d.%m.%y", "%d %B %Y", "%d %b %Y", "%d.%m.%Y"],
1015         "farsi" : ["%A %d %B %Y", "%Y/%m/%d", "%d %B %Y", "%d %b %Y", "%Y/%m/%d"],
1016         "finnish" : ["%A %d. %B %Y", "%d.%m.%Y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
1017         "french" : ["%A %d %B %Y", "%d/%m/%Y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
1018         "friulan" : ["%A %d di %B dal %Y", "%d/%m/%y", "%d di %B dal %Y", "%d di %b dal %Y", "%d/%m/%Y"],
1019         "galician" : ["%A, %d de %B de %Y", "%d/%m/%y", "%d de %B de %Y", "%d de %b de %Y", "%d/%m/%Y"],
1020         "georgian" : ["%A, %d %B, %Y", "%d.%m.%y", "%B %d, %Y", "%b %d, %Y", "%m/%d/%Y"],
1021         "german" : ["%A, %d. %B %Y", "%d.%m.%y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
1022         "german-ch" : ["%A, %d. %B %Y", "%d.%m.%y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
1023         "german-ch-old" : ["%A, %d. %B %Y", "%d.%m.%y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
1024         "greek" : ["%A, %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
1025         "hebrew" : ["%A, %d ב%B %Y", "%d.%m.%Y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
1026         "hindi" : ["%A, %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d-%m-%Y"],
1027         "icelandic" : ["%A, %d. %B %Y", "%d.%m.%Y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
1028         "interlingua" : ["%Y %B %d, %A", "%Y-%m-%d", "le %d de %B %Y", "le %d de %b %Y", "%Y-%m-%d"],
1029         "irish" : ["%A %d %B %Y", "%d/%m/%Y", "%d. %B %Y", "%d. %b %Y", "%d/%m/%Y"],
1030         "italian" : ["%A %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d/%b/%Y", "%d/%m/%Y"],
1031         "japanese" : ["%Y年%m月%d日%A", "%Y/%m/%d", "%Y年%m月%d日", "%Y/%m/%d", "%y/%m/%d"],
1032         "japanese-cjk" : ["%Y年%m月%d日%A", "%Y/%m/%d", "%Y年%m月%d日", "%Y/%m/%d", "%y/%m/%d"],
1033         "kannada" : ["%A, %B %d, %Y", "%d/%m/%y", "%d %B %Y", "%d %B %Y", "%d-%m-%Y"],
1034         "kazakh" : ["%Y ж. %d %B, %A", "%d.%m.%y", "%d %B %Y", "%d %B %Y", "%Y-%d-%m"],
1035         "khmer" : ["%A %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %B %Y", "%d/%m/%Y"],
1036         "korean" : ["%Y년 %m월 %d일 %A", "%y. %m. %d.", "%Y년 %m월 %d일", "%Y. %m. %d.", "%y. %m. %d."],
1037         "kurmanji" : ["%A, %d %B %Y", "%d %b %Y", "%d. %B %Y", "%d. %m. %Y", "%Y-%m-%d"],
1038         "lao" : ["%A ທີ %d %B %Y", "%d/%m/%Y", "%d %B %Y", "%d %B %Y", "%d/%m/%Y"],
1039         "latin" : ["%A, %d %B %Y", "%d %b %Y", "%B %d, %Y", "%b %d, %Y", "%m/%d/%Y"],
1040         "latvian" : ["%A, %Y. gada %d. %B", "%d.%m.%y", "%Y. gada %d. %B", "%Y. gada %d. %b", "%d.%m.%Y"],
1041         "lithuanian" : ["%Y m. %B %d d., %A", "%Y-%m-%d", "%Y m. %B %d d.", "%Y m. %B %d d.", "%Y-%m-%d"],
1042         "lowersorbian" : ["%A, %d. %B %Y", "%d.%m.%y", "%d %B %Y", "%d %b %Y", "%d.%m.%Y"],
1043         "macedonian" : ["%A, %d %B %Y", "%d.%m.%y", "%d %B %Y", "%d %b %Y", "%d.%m.%Y"],
1044         "magyar" : ["%Y. %B %d., %A", "%Y. %m. %d.", "%Y. %B %d.", "%Y. %b %d.", "%Y.%m.%d."],
1045         "malayalam" : ["%A, %d %B, %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d-%m-%Y"],
1046         "marathi" : ["%A, %d %B, %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d-%m-%Y"],
1047         "mongolian" : ["%A, %Y оны %m сарын %d", "%Y-%m-%d", "%Y оны %m сарын %d", "%d-%m-%Y", "%d-%m-%Y"],
1048         "naustrian" : ["%A, %d. %B %Y", "%d.%m.%y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
1049         "newzealand" : ["%A, %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
1050         "ngerman" : ["%A, %d. %B %Y", "%d.%m.%y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
1051         "norsk" : ["%A %d. %B %Y", "%d.%m.%Y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
1052         "nynorsk" : ["%A %d. %B %Y", "%d.%m.%Y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
1053         "occitan" : ["%Y %B %d, %A", "%Y-%m-%d", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
1054         "piedmontese" : ["%A, %d %B %Y", "%d %b %Y", "%B %d, %Y", "%b %d, %Y", "%m/%d/%Y"],
1055         "polish" : ["%A, %d %B %Y", "%d.%m.%Y", "%d %B %Y", "%d %b %Y", "%Y-%m-%d"],
1056         "polutonikogreek" : ["%A, %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
1057         "portuguese" : ["%A, %d de %B de %Y", "%d/%m/%y", "%d de %B de %Y", "%d de %b de %Y", "%Y/%m/%d"],
1058         "romanian" : ["%A, %d %B %Y", "%d.%m.%Y", "%d %B %Y", "%d %b %Y", "%d.%m.%Y"],
1059         "romansh" : ["%A, ils %d da %B %Y", "%d-%m-%y", "%d %B %Y", "%d %b %Y", "%d.%m.%Y"],
1060         "russian" : ["%A, %d %B %Y г.", "%d.%m.%Y", "%d %B %Y г.", "%d %b %Y г.", "%d.%m.%Y"],
1061         "samin" : ["%Y %B %d, %A", "%Y-%m-%d", "%B %d. b. %Y", "%b %d. b. %Y", "%d.%m.%Y"],
1062         "sanskrit" : ["%Y %B %d, %A", "%Y-%m-%d", "%d %B %Y", "%d %b %Y", "%d-%m-%Y"],
1063         "scottish" : ["%A, %dmh %B %Y", "%d/%m/%Y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
1064         "serbian" : ["%A, %d. %B %Y.", "%d.%m.%y.", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
1065         "serbian-latin" : ["%A, %d. %B %Y.", "%d.%m.%y.", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
1066         "slovak" : ["%A, %d. %B %Y", "%d. %m. %Y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
1067         "slovene" : ["%A, %d. %B %Y", "%d. %m. %y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
1068         "spanish" : ["%A, %d de %B de %Y", "%d/%m/%y", "%d de %B %de %Y", "%d %b %Y", "%d/%m/%Y"],
1069         "spanish-mexico" : ["%A, %d de %B %de %Y", "%d/%m/%y", "%d de %B de %Y", "%d %b %Y", "%d/%m/%Y"],
1070         "swedish" : ["%A %d %B %Y", "%Y-%m-%d", "%d %B %Y", "%d %b %Y", "%Y-%m-%d"],
1071         "syriac" : ["%Y %B %d, %A", "%Y-%m-%d", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
1072         "tamil" : ["%A, %d %B, %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d-%m-%Y"],
1073         "telugu" : ["%d, %B %Y, %A", "%d-%m-%y", "%d %B %Y", "%d %b %Y", "%d-%m-%Y"],
1074         "thai" : ["%Aที่ %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
1075         "tibetan" : ["%Y %Bའི་ཚེས་%d, %A", "%Y-%m-%d", "%B %d, %Y", "%b %d, %Y", "%m/%d/%Y"],
1076         "turkish" : ["%d %B %Y %A", "%d.%m.%Y", "%d %B %Y", "%d.%b.%Y", "%d.%m.%Y"],
1077         "turkmen" : ["%d %B %Y %A", "%d.%m.%Y", "%Y ý. %B %d", "%d.%m.%Y ý.", "%d.%m.%y ý."],
1078         "ukrainian" : ["%A, %d %B %Y р.", "%d.%m.%y", "%d %B %Y", "%d %m %Y", "%d.%m.%Y"],
1079         "uppersorbian" : ["%A, %d. %B %Y", "%d.%m.%y", "%d %B %Y", "%d %b %Y", "%d.%m.%Y"],
1080         "urdu" : ["%A، %d %B، %Y", "%d/%m/%y", "%d %B, %Y", "%d %b %Y", "%d/%m/%Y"],
1081         "vietnamese" : ["%A, %d %B, %Y", "%d/%m/%Y", "%d tháng %B %Y", "%d-%m-%Y", "%d/%m/%Y"],
1082         "welsh" : ["%A, %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
1083     }
1084
1085     types = ["date", "fixdate", "moddate" ]
1086     lang = get_value(document.header, "\\language")
1087     if lang == "":
1088         document.warning("Malformed LyX document! No \\language header found!")
1089         return
1090
1091     i = 0
1092     while True:
1093         i = find_token(document.body, "\\begin_inset Info", i)
1094         if i == -1:
1095             return
1096         j = find_end_of_inset(document.body, i + 1)
1097         if j == -1:
1098             document.warning("Malformed LyX document: Could not find end of Info inset.")
1099             i = i + 1
1100             continue
1101         tp = find_token(document.body, 'type', i, j)
1102         tpv = get_quoted_value(document.body, "type", tp)
1103         if tpv not in types:
1104             i = i + 1
1105             continue
1106         arg = find_token(document.body, 'arg', i, j)
1107         argv = get_quoted_value(document.body, "arg", arg)
1108         isodate = ""
1109         dte = date.today()
1110         if tpv == "fixdate":
1111             datecomps = argv.split('@')
1112             if len(datecomps) > 1:
1113                 argv = datecomps[0]
1114                 isodate = datecomps[1]
1115                 m = re.search('(\d\d\d\d)-(\d\d)-(\d\d)', isodate)
1116                 if m:
1117                     dte = date(int(m.group(1)), int(m.group(2)), int(m.group(3)))
1118 # FIXME if we had the path to the original document (not the one in the tmp dir),
1119 #        we could use the mtime.
1120 #        elif tpv == "moddate":
1121 #            dte = date.fromtimestamp(os.path.getmtime(document.dir))
1122         result = ""
1123         if argv == "ISO":
1124             result = dte.isodate()
1125         elif argv == "long":
1126             result = dte.strftime(dateformats[lang][0])
1127         elif argv == "short":
1128             result = dte.strftime(dateformats[lang][1])
1129         elif argv == "loclong":
1130             result = dte.strftime(dateformats[lang][2])
1131         elif argv == "locmedium":
1132             result = dte.strftime(dateformats[lang][3])
1133         elif argv == "locshort":
1134             result = dte.strftime(dateformats[lang][4])
1135         else:
1136             fmt = argv.replace("MMMM", "%b").replace("MMM", "%b").replace("MM", "%m").replace("M", "%m")
1137             fmt = fmt.replace("yyyy", "%Y").replace("yy", "%y")
1138             fmt = fmt.replace("dddd", "%A").replace("ddd", "%a").replace("dd", "%d")
1139             fmt = re.sub('[^\'%]d', '%d', fmt)
1140             fmt = fmt.replace("'", "")
1141             result = dte.strftime(fmt)
1142         if sys.version_info < (3,0):
1143             # In Python 2, datetime module works with binary strings,
1144             # our dateformat strings are utf8-encoded:
1145             result = result.decode('utf-8')
1146         document.body[i : j+1] = [result]
1147         i = i + 1
1148
1149
1150 def revert_timeinfo(document):
1151     " Revert time info insets to static text. "
1152
1153 # FIXME This currently only considers the main language and uses the system locale
1154 # Ideally, it should honor context languages and switch the locale accordingly.
1155 # Also, the time object is "naive", i.e., it does not know of timezones (%Z will
1156 # be empty).
1157
1158     # The time formats for each language using strftime syntax:
1159     # long, short
1160     timeformats = {
1161         "afrikaans" : ["%H:%M:%S %Z", "%H:%M"],
1162         "albanian" : ["%I:%M:%S %p, %Z", "%I:%M %p"],
1163         "american" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1164         "amharic" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1165         "ancientgreek" : ["%H:%M:%S %Z", "%H:%M:%S"],
1166         "arabic_arabi" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1167         "arabic_arabtex" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1168         "armenian" : ["%H:%M:%S %Z", "%H:%M"],
1169         "asturian" : ["%H:%M:%S %Z", "%H:%M"],
1170         "australian" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1171         "austrian" : ["%H:%M:%S %Z", "%H:%M"],
1172         "bahasa" : ["%H.%M.%S %Z", "%H.%M"],
1173         "bahasam" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1174         "basque" : ["%H:%M:%S (%Z)", "%H:%M"],
1175         "belarusian" : ["%H:%M:%S, %Z", "%H:%M"],
1176         "bosnian" : ["%H:%M:%S %Z", "%H:%M"],
1177         "brazilian" : ["%H:%M:%S %Z", "%H:%M"],
1178         "breton" : ["%H:%M:%S %Z", "%H:%M"],
1179         "british" : ["%H:%M:%S %Z", "%H:%M"],
1180         "bulgarian" : ["%H:%M:%S %Z", "%H:%M"],
1181         "canadian" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1182         "canadien" : ["%H:%M:%S %Z", "%H h %M"],
1183         "catalan" : ["%H:%M:%S %Z", "%H:%M"],
1184         "chinese-simplified" : ["%Z %p%I:%M:%S", "%p%I:%M"],
1185         "chinese-traditional" : ["%p%I:%M:%S [%Z]", "%p%I:%M"],
1186         "coptic" : ["%H:%M:%S %Z", "%H:%M:%S"],
1187         "croatian" : ["%H:%M:%S (%Z)", "%H:%M"],
1188         "czech" : ["%H:%M:%S %Z", "%H:%M"],
1189         "danish" : ["%H.%M.%S %Z", "%H.%M"],
1190         "divehi" : ["%H:%M:%S %Z", "%H:%M"],
1191         "dutch" : ["%H:%M:%S %Z", "%H:%M"],
1192         "english" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1193         "esperanto" : ["%H:%M:%S %Z", "%H:%M:%S"],
1194         "estonian" : ["%H:%M:%S %Z", "%H:%M"],
1195         "farsi" : ["%H:%M:%S (%Z)", "%H:%M"],
1196         "finnish" : ["%H.%M.%S %Z", "%H.%M"],
1197         "french" : ["%H:%M:%S %Z", "%H:%M"],
1198         "friulan" : ["%H:%M:%S %Z", "%H:%M"],
1199         "galician" : ["%H:%M:%S %Z", "%H:%M"],
1200         "georgian" : ["%H:%M:%S %Z", "%H:%M"],
1201         "german" : ["%H:%M:%S %Z", "%H:%M"],
1202         "german-ch" : ["%H:%M:%S %Z", "%H:%M"],
1203         "german-ch-old" : ["%H:%M:%S %Z", "%H:%M"],
1204         "greek" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1205         "hebrew" : ["%H:%M:%S %Z", "%H:%M"],
1206         "hindi" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1207         "icelandic" : ["%H:%M:%S %Z", "%H:%M"],
1208         "interlingua" : ["%H:%M:%S %Z", "%H:%M"],
1209         "irish" : ["%H:%M:%S %Z", "%H:%M"],
1210         "italian" : ["%H:%M:%S %Z", "%H:%M"],
1211         "japanese" : ["%H時%M分%S秒 %Z", "%H:%M"],
1212         "japanese-cjk" : ["%H時%M分%S秒 %Z", "%H:%M"],
1213         "kannada" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1214         "kazakh" : ["%H:%M:%S %Z", "%H:%M"],
1215         "khmer" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1216         "korean" : ["%p %I시%M분 %S초 %Z", "%p %I:%M"],
1217         "kurmanji" : ["%H:%M:%S %Z", "%H:%M:%S"],
1218         "lao" : ["%H ໂມງ%M ນາທີ  %S ວິນາທີ %Z", "%H:%M"],
1219         "latin" : ["%H:%M:%S %Z", "%H:%M:%S"],
1220         "latvian" : ["%H:%M:%S %Z", "%H:%M"],
1221         "lithuanian" : ["%H:%M:%S %Z", "%H:%M"],
1222         "lowersorbian" : ["%H:%M:%S %Z", "%H:%M"],
1223         "macedonian" : ["%H:%M:%S %Z", "%H:%M"],
1224         "magyar" : ["%H:%M:%S %Z", "%H:%M"],
1225         "malayalam" : ["%p %I:%M:%S %Z", "%p %I:%M"],
1226         "marathi" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1227         "mongolian" : ["%H:%M:%S %Z", "%H:%M"],
1228         "naustrian" : ["%H:%M:%S %Z", "%H:%M"],
1229         "newzealand" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1230         "ngerman" : ["%H:%M:%S %Z", "%H:%M"],
1231         "norsk" : ["%H:%M:%S %Z", "%H:%M"],
1232         "nynorsk" : ["kl. %H:%M:%S %Z", "%H:%M"],
1233         "occitan" : ["%H:%M:%S %Z", "%H:%M"],
1234         "piedmontese" : ["%H:%M:%S %Z", "%H:%M:%S"],
1235         "polish" : ["%H:%M:%S %Z", "%H:%M"],
1236         "polutonikogreek" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1237         "portuguese" : ["%H:%M:%S %Z", "%H:%M"],
1238         "romanian" : ["%H:%M:%S %Z", "%H:%M"],
1239         "romansh" : ["%H:%M:%S %Z", "%H:%M"],
1240         "russian" : ["%H:%M:%S %Z", "%H:%M"],
1241         "samin" : ["%H:%M:%S %Z", "%H:%M"],
1242         "sanskrit" : ["%H:%M:%S %Z", "%H:%M"],
1243         "scottish" : ["%H:%M:%S %Z", "%H:%M"],
1244         "serbian" : ["%H:%M:%S %Z", "%H:%M"],
1245         "serbian-latin" : ["%H:%M:%S %Z", "%H:%M"],
1246         "slovak" : ["%H:%M:%S %Z", "%H:%M"],
1247         "slovene" : ["%H:%M:%S %Z", "%H:%M"],
1248         "spanish" : ["%H:%M:%S (%Z)", "%H:%M"],
1249         "spanish-mexico" : ["%H:%M:%S %Z", "%H:%M"],
1250         "swedish" : ["kl. %H:%M:%S %Z", "%H:%M"],
1251         "syriac" : ["%H:%M:%S %Z", "%H:%M"],
1252         "tamil" : ["%p %I:%M:%S %Z", "%p %I:%M"],
1253         "telugu" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1254         "thai" : ["%H นาฬิกา %M นาที  %S วินาที %Z", "%H:%M"],
1255         "tibetan" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1256         "turkish" : ["%H:%M:%S %Z", "%H:%M"],
1257         "turkmen" : ["%H:%M:%S %Z", "%H:%M"],
1258         "ukrainian" : ["%H:%M:%S %Z", "%H:%M"],
1259         "uppersorbian" : ["%H:%M:%S %Z", "%H:%M hodź."],
1260         "urdu" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1261         "vietnamese" : ["%H:%M:%S %Z", "%H:%M"],
1262         "welsh" : ["%H:%M:%S %Z", "%H:%M"]
1263     }
1264
1265     types = ["time", "fixtime", "modtime" ]
1266     i = 0
1267     i = find_token(document.header, "\\language", 0)
1268     if i == -1:
1269         # this should not happen
1270         document.warning("Malformed LyX document! No \\language header found!")
1271         return
1272     lang = get_value(document.header, "\\language", i)
1273
1274     i = 0
1275     while True:
1276         i = find_token(document.body, "\\begin_inset Info", i)
1277         if i == -1:
1278             return
1279         j = find_end_of_inset(document.body, i + 1)
1280         if j == -1:
1281             document.warning("Malformed LyX document: Could not find end of Info inset.")
1282             i = i + 1
1283             continue
1284         tp = find_token(document.body, 'type', i, j)
1285         tpv = get_quoted_value(document.body, "type", tp)
1286         if tpv not in types:
1287             i = i + 1
1288             continue
1289         arg = find_token(document.body, 'arg', i, j)
1290         argv = get_quoted_value(document.body, "arg", arg)
1291         isotime = ""
1292         dtme = datetime.now()
1293         tme = dtme.time()
1294         if tpv == "fixtime":
1295             timecomps = argv.split('@')
1296             if len(timecomps) > 1:
1297                 argv = timecomps[0]
1298                 isotime = timecomps[1]
1299                 m = re.search('(\d\d):(\d\d):(\d\d)', isotime)
1300                 if m:
1301                     tme = time(int(m.group(1)), int(m.group(2)), int(m.group(3)))
1302                 else:
1303                     m = re.search('(\d\d):(\d\d)', isotime)
1304                     if m:
1305                         tme = time(int(m.group(1)), int(m.group(2)))
1306 # FIXME if we had the path to the original document (not the one in the tmp dir),
1307 #        we could use the mtime.
1308 #        elif tpv == "moddate":
1309 #            dte = date.fromtimestamp(os.path.getmtime(document.dir))
1310         result = ""
1311         if argv == "ISO":
1312             result = tme.isoformat()
1313         elif argv == "long":
1314             result = tme.strftime(timeformats[lang][0])
1315         elif argv == "short":
1316             result = tme.strftime(timeformats[lang][1])
1317         else:
1318             fmt = argv.replace("HH", "%H").replace("H", "%H").replace("hh", "%I").replace("h", "%I")
1319             fmt = fmt.replace("mm", "%M").replace("m", "%M").replace("ss", "%S").replace("s", "%S")
1320             fmt = fmt.replace("zzz", "%f").replace("z", "%f").replace("t", "%Z")
1321             fmt = fmt.replace("AP", "%p").replace("ap", "%p").replace("A", "%p").replace("a", "%p")
1322             fmt = fmt.replace("'", "")
1323             result = dte.strftime(fmt)
1324         document.body[i : j+1] = result
1325         i = i + 1
1326
1327
1328 def revert_namenoextinfo(document):
1329     " Merge buffer Info inset type name-noext to name. "
1330
1331     i = 0
1332     while True:
1333         i = find_token(document.body, "\\begin_inset Info", i)
1334         if i == -1:
1335             return
1336         j = find_end_of_inset(document.body, i + 1)
1337         if j == -1:
1338             document.warning("Malformed LyX document: Could not find end of Info inset.")
1339             i = i + 1
1340             continue
1341         tp = find_token(document.body, 'type', i, j)
1342         tpv = get_quoted_value(document.body, "type", tp)
1343         if tpv != "buffer":
1344             i = i + 1
1345             continue
1346         arg = find_token(document.body, 'arg', i, j)
1347         argv = get_quoted_value(document.body, "arg", arg)
1348         if argv != "name-noext":
1349             i = i + 1
1350             continue
1351         document.body[arg] = "arg \"name\""
1352         i = i + 1
1353
1354
1355 def revert_l7ninfo(document):
1356     " Revert l7n Info inset to text. "
1357
1358     i = 0
1359     while True:
1360         i = find_token(document.body, "\\begin_inset Info", i)
1361         if i == -1:
1362             return
1363         j = find_end_of_inset(document.body, i + 1)
1364         if j == -1:
1365             document.warning("Malformed LyX document: Could not find end of Info inset.")
1366             i = i + 1
1367             continue
1368         tp = find_token(document.body, 'type', i, j)
1369         tpv = get_quoted_value(document.body, "type", tp)
1370         if tpv != "l7n":
1371             i = i + 1
1372             continue
1373         arg = find_token(document.body, 'arg', i, j)
1374         argv = get_quoted_value(document.body, "arg", arg)
1375         # remove trailing colons, menu accelerator (|...) and qt accelerator (&), while keeping literal " & "
1376         argv = argv.rstrip(':').split('|')[0].replace(" & ", "</amp;>").replace("&", "").replace("</amp;>", " & ")
1377         document.body[i : j+1] = argv
1378         i = i + 1
1379
1380
1381 def revert_listpargs(document):
1382     " Reverts listpreamble arguments to TeX-code "
1383     i = 0
1384     while True:
1385         i = find_token(document.body, "\\begin_inset Argument listpreamble:", i)
1386         if i == -1:
1387             return
1388         j = find_end_of_inset(document.body, i)
1389         # Find containing paragraph layout
1390         parent = get_containing_layout(document.body, i)
1391         if parent == False:
1392             document.warning("Malformed LyX document: Can't find parent paragraph layout")
1393             i += 1
1394             continue
1395         parbeg = parent[3]
1396         beginPlain = find_token(document.body, "\\begin_layout Plain Layout", i)
1397         endPlain = find_end_of_layout(document.body, beginPlain)
1398         content = document.body[beginPlain + 1 : endPlain]
1399         del document.body[i:j+1]
1400         subst = ["\\begin_inset ERT", "status collapsed", "", "\\begin_layout Plain Layout",
1401                  "{"] + content + ["}", "\\end_layout", "", "\\end_inset", ""]
1402         document.body[parbeg : parbeg] = subst
1403         i += 1
1404
1405
1406 def revert_lformatinfo(document):
1407     " Revert layout format Info inset to text. "
1408
1409     i = 0
1410     while True:
1411         i = find_token(document.body, "\\begin_inset Info", i)
1412         if i == -1:
1413             return
1414         j = find_end_of_inset(document.body, i + 1)
1415         if j == -1:
1416             document.warning("Malformed LyX document: Could not find end of Info inset.")
1417             i = i + 1
1418             continue
1419         tp = find_token(document.body, 'type', i, j)
1420         tpv = get_quoted_value(document.body, "type", tp)
1421         if tpv != "lyxinfo":
1422             i = i + 1
1423             continue
1424         arg = find_token(document.body, 'arg', i, j)
1425         argv = get_quoted_value(document.body, "arg", arg)
1426         if argv != "layoutformat":
1427             i = i + 1
1428             continue
1429         # hardcoded for now
1430         document.body[i : j+1] = "69"
1431         i = i + 1
1432
1433
1434 def convert_hebrew_parentheses(document):
1435     """ Swap opening/closing parentheses in Hebrew text.
1436
1437     Up to LyX 2.4, "(" was used as closing parenthesis and
1438     ")" as opening parenthesis for Hebrew in the LyX source.
1439     """
1440     # print("convert hebrew parentheses")
1441     current_languages = [document.language]
1442     for i, line in enumerate(document.body):
1443         if line.startswith('\\lang '):
1444             current_languages[-1] = line.lstrip('\\lang ')
1445         elif line.startswith('\\begin_layout'):
1446             current_languages.append(current_languages[-1])
1447             # print (line, current_languages[-1])
1448         elif line.startswith('\\end_layout'):
1449             current_languages.pop()
1450         elif current_languages[-1] == 'hebrew' and not line.startswith('\\'):
1451             document.body[i] = line.replace('(','\x00').replace(')','(').replace('\x00',')')
1452
1453
1454 def revert_hebrew_parentheses(document):
1455     " Store parentheses in Hebrew text reversed"
1456     # This only exists to keep the convert/revert naming convention
1457     convert_hebrew_parentheses(document)
1458
1459
1460 def revert_malayalam(document):
1461     " Set the document language to English but assure Malayalam output "
1462
1463     revert_language(document, "malayalam", "", "malayalam")
1464
1465
1466 def revert_soul(document):
1467     " Revert soul module flex insets to ERT "
1468
1469     flexes = ["Spaceletters", "Strikethrough", "Underline", "Highlight", "Capitalize"]
1470
1471     for flex in flexes:
1472         i = find_token(document.body, "\\begin_inset Flex %s" % flex, 0)
1473         if i != -1:
1474             add_to_preamble(document, ["\\usepackage{soul}"])
1475             break
1476     i = find_token(document.body, "\\begin_inset Flex Highlight", 0)
1477     if i != -1:
1478         add_to_preamble(document, ["\\usepackage{color}"])
1479
1480     revert_flex_inset(document.body, "Spaceletters", "\\so")
1481     revert_flex_inset(document.body, "Strikethrough", "\\st")
1482     revert_flex_inset(document.body, "Underline", "\\ul")
1483     revert_flex_inset(document.body, "Highlight", "\\hl")
1484     revert_flex_inset(document.body, "Capitalize", "\\caps")
1485
1486
1487 def revert_tablestyle(document):
1488     " Remove tablestyle params "
1489
1490     i = 0
1491     i = find_token(document.header, "\\tablestyle", 0)
1492     if i != -1:
1493         del document.header[i]
1494
1495
1496 def revert_bibfileencodings(document):
1497     " Revert individual Biblatex bibliography encodings "
1498
1499     # Get cite engine
1500     engine = "basic"
1501     i = find_token(document.header, "\\cite_engine", 0)
1502     if i == -1:
1503         document.warning("Malformed document! Missing \\cite_engine")
1504     else:
1505         engine = get_value(document.header, "\\cite_engine", i)
1506
1507     # Check if biblatex
1508     biblatex = False
1509     if engine in ["biblatex", "biblatex-natbib"]:
1510         biblatex = True
1511
1512     # Map lyx to latex encoding names
1513     encodings = {
1514         "utf8" : "utf8",
1515         "utf8x" : "utf8x",
1516         "armscii8" : "armscii8",
1517         "iso8859-1" : "latin1",
1518         "iso8859-2" : "latin2",
1519         "iso8859-3" : "latin3",
1520         "iso8859-4" : "latin4",
1521         "iso8859-5" : "iso88595",
1522         "iso8859-6" : "8859-6",
1523         "iso8859-7" : "iso-8859-7",
1524         "iso8859-8" : "8859-8",
1525         "iso8859-9" : "latin5",
1526         "iso8859-13" : "latin7",
1527         "iso8859-15" : "latin9",
1528         "iso8859-16" : "latin10",
1529         "applemac" : "applemac",
1530         "cp437" : "cp437",
1531         "cp437de" : "cp437de",
1532         "cp850" : "cp850",
1533         "cp852" : "cp852",
1534         "cp855" : "cp855",
1535         "cp858" : "cp858",
1536         "cp862" : "cp862",
1537         "cp865" : "cp865",
1538         "cp866" : "cp866",
1539         "cp1250" : "cp1250",
1540         "cp1251" : "cp1251",
1541         "cp1252" : "cp1252",
1542         "cp1255" : "cp1255",
1543         "cp1256" : "cp1256",
1544         "cp1257" : "cp1257",
1545         "koi8-r" : "koi8-r",
1546         "koi8-u" : "koi8-u",
1547         "pt154" : "pt154",
1548         "utf8-platex" : "utf8",
1549         "ascii" : "ascii"
1550     }
1551
1552     i = 0
1553     bibresources = []
1554     while (True):
1555         i = find_token(document.body, "\\begin_inset CommandInset bibtex", i)
1556         if i == -1:
1557             break
1558         j = find_end_of_inset(document.body, i)
1559         if j == -1:
1560             document.warning("Can't find end of bibtex inset at line %d!!" %(i))
1561             i += 1
1562             continue
1563         encodings = get_quoted_value(document.body, "file_encodings", i, j)
1564         if not encodings:
1565             i += 1
1566             continue
1567         bibfiles = get_quoted_value(document.body, "bibfiles", i, j).split(",")
1568         opts = get_quoted_value(document.body, "biblatexopts", i, j)
1569         if len(bibfiles) == 0:
1570             document.warning("Bibtex inset at line %d does not have a bibfile!" %(i))
1571         # remove encoding line
1572         k = find_token(document.body, "file_encodings", i, j)
1573         if k != -1:
1574             del document.body[k]
1575         # Re-find inset end line
1576         j = find_end_of_inset(document.body, i)
1577         if biblatex:
1578             enclist = encodings.split("\t")
1579             encmap = dict()
1580             for pp in enclist:
1581                 ppp = pp.split(" ", 1)
1582                 encmap[ppp[0]] = ppp[1]
1583             for bib in bibfiles:
1584                 pr = "\\addbibresource"
1585                 if bib in encmap.keys():
1586                     pr += "[bibencoding=" + encmap[bib] + "]"
1587                 pr += "{" + bib + "}"
1588                 add_to_preamble(document, [pr])
1589             # Insert ERT \\printbibliography and wrap bibtex inset to a Note
1590             pcmd = "printbibliography"
1591             if opts:
1592                 pcmd += "[" + opts + "]"
1593             repl = ["\\begin_inset ERT", "status open", "", "\\begin_layout Plain Layout",\
1594                     "", "", "\\backslash", pcmd, "\\end_layout", "", "\\end_inset", "", "",\
1595                     "\\end_layout", "", "\\begin_layout Standard", "\\begin_inset Note Note",\
1596                     "status open", "", "\\begin_layout Plain Layout" ]
1597             repl += document.body[i:j+1]
1598             repl += ["", "\\end_layout", "", "\\end_inset", "", ""]
1599             document.body[i:j+1] = repl
1600             j += 27
1601
1602         i = j + 1
1603
1604
1605 def revert_cmidruletrimming(document):
1606     " Remove \\cmidrule trimming "
1607
1608     # FIXME: Revert to TeX code?
1609     i = 0
1610     while True:
1611         # first, let's find out if we need to do anything
1612         i = find_token(document.body, '<cell ', i)
1613         if i == -1:
1614             return
1615         j = document.body[i].find('trim="')
1616         if j == -1:
1617              i += 1
1618              continue
1619         rgx = re.compile(r' (bottom|top)line[lr]trim="true"')
1620         # remove trim option
1621         document.body[i] = rgx.sub('', document.body[i])
1622
1623         i += 1
1624
1625
1626 ruby_inset_def = [
1627     r'### Inserted by lyx2lyx (ruby inset) ###',
1628     r'InsetLayout Flex:Ruby',
1629     r'  LyxType       charstyle',
1630     r'  LatexType     command',
1631     r'  LatexName     ruby',
1632     r'  HTMLTag       ruby',
1633     r'  HTMLAttr      ""',
1634     r'  HTMLInnerTag  rb',
1635     r'  HTMLInnerAttr ""',
1636     r'  BgColor       none',
1637     r'  LabelString   "Ruby"',
1638     r'  Decoration    Conglomerate',
1639     r'  Preamble',
1640     r'    \ifdefined\kanjiskip',
1641     r'      \IfFileExists{okumacro.sty}{\usepackage{okumacro}}{}',
1642     r'    \else \ifdefined\luatexversion',
1643     r'      \usepackage{luatexja-ruby}',
1644     r'    \else \ifdefined\XeTeXversion',
1645     r'      \usepackage{ruby}%',
1646     r'    \fi\fi\fi',
1647     r'    \providecommand{\ruby}[2]{\shortstack{\tiny #2\\#1}}',
1648     r'  EndPreamble',
1649     r'  Argument  post:1',
1650     r'    LabelString  "ruby text"',
1651     r'    MenuString  "Ruby Text|R"',
1652     r'    Tooltip    "Reading aid (ruby, furigana) for Chinese characters."',
1653     r'    Decoration  Conglomerate',
1654     r'    Font',
1655     r'      Size    tiny',
1656     r'    EndFont',
1657     r'    LabelFont',
1658     r'      Size    tiny',
1659     r'    EndFont',
1660     r'    Mandatory  1',
1661     r'  EndArgument',
1662     r'End',
1663 ]
1664
1665 def convert_ruby_module(document):
1666     " Use ruby module instead of local module definition "
1667     if document.del_local_layout(ruby_inset_def):
1668         document.add_module("ruby")
1669
1670 def revert_ruby_module(document):
1671     " Replace ruby module with local module definition "
1672     if document.del_module("ruby"):
1673         document.append_local_layout(ruby_inset_def)
1674
1675
1676 def convert_utf8_japanese(document):
1677     " Use generic utf8 with Japanese documents."
1678     lang = get_value(document.header, "\\language")
1679     if not lang.startswith("japanese"):
1680         return
1681     inputenc = get_value(document.header, "\\inputencoding")
1682     if ((lang == "japanese" and inputenc == "utf8-platex")
1683         or (lang == "japanese-cjk" and inputenc == "utf8-cjk")):
1684         document.set_parameter("inputencoding", "utf8")
1685
1686 def revert_utf8_japanese(document):
1687     " Use Japanese utf8 variants with Japanese documents."
1688     inputenc = get_value(document.header, "\\inputencoding")
1689     if inputenc != "utf8":
1690         return
1691     lang = get_value(document.header, "\\language")
1692     if lang == "japanese":
1693         document.set_parameter("inputencoding", "utf8-platex")
1694     if lang == "japanese-cjk":
1695         document.set_parameter("inputencoding", "utf8-cjk")
1696
1697
1698 ##
1699 # Conversion hub
1700 #
1701
1702 supported_versions = ["2.4.0", "2.4"]
1703 convert = [
1704            [545, [convert_lst_literalparam]],
1705            [546, []],
1706            [547, []],
1707            [548, []],
1708            [549, []],
1709            [550, [convert_fontenc]],
1710            [551, []],
1711            [552, []],
1712            [553, []],
1713            [554, []],
1714            [555, []],
1715            [556, []],
1716            [557, [convert_vcsinfo]],
1717            [558, [removeFrontMatterStyles]],
1718            [559, []],
1719            [560, []],
1720            [561, [convert_latexFonts]], # Handle dejavu, ibmplex fonts in GUI
1721            [562, []],
1722            [563, []],
1723            [564, []],
1724            [565, [convert_AdobeFonts]], # Handle adobe fonts in GUI
1725            [566, [convert_hebrew_parentheses]],
1726            [567, []],
1727            [568, []],
1728            [569, []],
1729            [570, []],
1730            [571, []],
1731            [572, [convert_notoFonts]],  # Added options thin, light, extralight for Noto
1732            [573, [convert_inputencoding_namechange]],
1733            [574, [convert_ruby_module, convert_utf8_japanese]],
1734           ]
1735
1736 revert =  [[573, [revert_ruby_module, revert_utf8_japanese]],
1737            [572, [revert_inputencoding_namechange]],
1738            [571, [revert_notoFonts]],
1739            [570, [revert_cmidruletrimming]],
1740            [569, [revert_bibfileencodings]],
1741            [568, [revert_tablestyle]],
1742            [567, [revert_soul]],
1743            [566, [revert_malayalam]],
1744            [565, [revert_hebrew_parentheses]],
1745            [564, [revert_AdobeFonts]],
1746            [563, [revert_lformatinfo]],
1747            [562, [revert_listpargs]],
1748            [561, [revert_l7ninfo]],
1749            [560, [revert_latexFonts]], # Handle dejavu, ibmplex fonts in user preamble
1750            [559, [revert_timeinfo, revert_namenoextinfo]],
1751            [558, [revert_dateinfo]],
1752            [557, [addFrontMatterStyles]],
1753            [556, [revert_vcsinfo]],
1754            [555, [revert_bibencoding]],
1755            [554, [revert_vcolumns]],
1756            [553, [revert_stretchcolumn]],
1757            [552, [revert_tuftecite]],
1758            [551, [revert_floatpclass, revert_floatalignment]],
1759            [550, [revert_nospellcheck]],
1760            [549, [revert_fontenc]],
1761            [548, []],# dummy format change
1762            [547, [revert_lscape]],
1763            [546, [revert_xcharter]],
1764            [545, [revert_paratype]],
1765            [544, [revert_lst_literalparam]]
1766           ]
1767
1768
1769 if __name__ == "__main__":
1770     pass