lib/lyx2lyx/lyx_2_4.py

   1 # -*- coding: utf-8 -*-
   2 # This file is part of lyx2lyx
   3 # Copyright (C) 2018 The LyX team
   4 #
   5 # This program is free software; you can redistribute it and/or
   6 # modify it under the terms of the GNU General Public License
   7 # as published by the Free Software Foundation; either version 2
   8 # of the License, or (at your option) any later version.
   9 #
  10 # This program is distributed in the hope that it will be useful,
  11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 # GNU General Public License for more details.
  14 #
  15 # You should have received a copy of the GNU General Public License
  16 # along with this program; if not, write to the Free Software
  17 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  18
  19 """ Convert files to the file format generated by lyx 2.4"""
  20
  21 import re, string
  22 import unicodedata
  23 import sys, os
  24
  25 from datetime import (datetime, date, time)
  26
  27 # Uncomment only what you need to import, please.
  28
  29 from parser_tools import (count_pars_in_inset, del_token, find_end_of_inset,
  30     find_end_of_layout, find_token, find_token_backwards, find_token_exact,
  31     find_re, get_bool_value,
  32     get_containing_layout, get_option_value, get_value, get_quoted_value)
  33 #    del_value, del_complete_lines,
  34 #    find_complete_lines, find_end_of,
  35 #    find_re, find_substring,
  36 #    get_containing_inset,
  37 #    is_in_inset, set_bool_value
  38 #    find_tokens, check_token
  39
  40 from lyx2lyx_tools import (put_cmd_in_ert, add_to_preamble, insert_to_preamble, lyx2latex,
  41                            revert_language, revert_flex_inset, str2bool)
  42 #  revert_font_attrs, latex_length
  43 #  get_ert, lyx2verbatim, length_in_bp, convert_info_insets
  44 #  revert_flex_inset, hex2ratio
  45
  46 ####################################################################
  47 # Private helper functions
  48
  49 def add_preamble_fonts(document, fontmap):
  50     " Add collected font-packages with their option to user-preamble"
  51
  52     for pkg in fontmap:
  53         if len(fontmap[pkg]) > 0:
  54             xoption = "[" + ",".join(fontmap[pkg]) + "]"
  55         else:
  56             xoption = ""
  57         preamble = "\\usepackage%s{%s}" % (xoption, pkg)
  58         add_to_preamble(document, [preamble])
  59
  60
  61 def createkey(pkg, options):
  62     options.sort()
  63     return pkg + ':' + "-".join(options)
  64
  65 class fontinfo:
  66     def __init__(self):
  67         self.fontname = None    # key into font2pkgmap
  68         self.fonttype = None    # roman,sans,typewriter,math
  69         self.scaletype = None   # None,sf,tt
  70         self.scaleopt = None    # None, 'scaled', 'scale'
  71         self.scaleval = 1
  72         self.package = None
  73         self.options = []
  74         self.pkgkey = None      # key into pkg2fontmap
  75         self.osfopt = None    # None, string
  76
  77     def addkey(self):
  78         self.pkgkey = createkey(self.package, self.options)
  79
  80 class fontmapping:
  81     def __init__(self):
  82         self.font2pkgmap = dict()
  83         self.pkg2fontmap = dict()
  84         self.pkginmap = dict()  # defines, if a map for package exists
  85
  86     def expandFontMapping(self, font_list, font_type, scale_type, pkg, scaleopt = None, osfopt = None):
  87         " Expand fontinfo mapping"
  88         #
  89         # fontlist:    list of fontnames, each element
  90         #              may contain a ','-separated list of needed options
  91         #              like e.g. 'IBMPlexSansCondensed,condensed'
  92         # font_type:   one of 'roman', 'sans', 'typewriter', 'math'
  93         # scale_type:  one of None, 'sf', 'tt'
  94         # pkg:         package defining the font. Defaults to fontname if None
  95         # scaleopt:    one of None, 'scale', 'scaled', or some other string
  96         #              to be used in scale option (e.g. scaled=0.7)
  97         # osfopt:      None or some other string to be used in osf option
  98         for fl in font_list:
  99             fe = fontinfo()
 100             fe.fonttype = font_type
 101             fe.scaletype = scale_type
 102             flt = fl.split(",")
 103             font_name = flt[0]
 104             fe.fontname = font_name
 105             fe.options = flt[1:]
 106             fe.scaleopt = scaleopt
 107             fe.osfopt = osfopt
 108             if pkg == None:
 109                 fe.package = font_name
 110             else:
 111                 fe.package = pkg
 112             fe.addkey()
 113             self.font2pkgmap[font_name] = fe
 114             if fe.pkgkey in self.pkg2fontmap:
 115                 # Repeated the same entry? Check content
 116                 if self.pkg2fontmap[fe.pkgkey] != font_name:
 117                     document.error("Something is wrong in pkgname+options <-> fontname mapping")
 118             self.pkg2fontmap[fe.pkgkey] = font_name
 119             self.pkginmap[fe.package] = 1
 120
 121     def getfontname(self, pkg, options):
 122         options.sort()
 123         pkgkey = createkey(pkg, options)
 124         if not pkgkey in self.pkg2fontmap:
 125             return None
 126         fontname = self.pkg2fontmap[pkgkey]
 127         if not fontname in self.font2pkgmap:
 128             document.error("Something is wrong in pkgname+options <-> fontname mapping")
 129             return None
 130         if pkgkey == self.font2pkgmap[fontname].pkgkey:
 131             return fontname
 132         return None
 133
 134 def createFontMapping(fontlist):
 135     # Create info for known fonts for the use in
 136     #   convert_latexFonts() and
 137     #   revert_latexFonts()
 138     #
 139     # * Would be more handy to parse latexFonts file,
 140     #   but the path to this file is unknown
 141     # * For now, add DejaVu and IBMPlex only.
 142     # * Expand, if desired
 143     fm = fontmapping()
 144     for font in fontlist:
 145         if font == 'DejaVu':
 146             fm.expandFontMapping(['DejaVuSerif', 'DejaVuSerifCondensed'], "roman", None, None)
 147             fm.expandFontMapping(['DejaVuSans','DejaVuSansCondensed'], "sans", "sf", None, "scaled")
 148             fm.expandFontMapping(['DejaVuSansMono'], "typewriter", "tt", None, "scaled")
 149         elif font == 'IBM':
 150             fm.expandFontMapping(['IBMPlexSerif', 'IBMPlexSerifThin,thin',
 151                                   'IBMPlexSerifExtraLight,extralight', 'IBMPlexSerifLight,light',
 152                                   'IBMPlexSerifSemibold,semibold'],
 153                                  "roman", None, "plex-serif")
 154             fm.expandFontMapping(['IBMPlexSans','IBMPlexSansCondensed,condensed',
 155                                   'IBMPlexSansThin,thin', 'IBMPlexSansExtraLight,extralight',
 156                                   'IBMPlexSansLight,light', 'IBMPlexSansSemibold,semibold'],
 157                                  "sans", "sf", "plex-sans", "scale")
 158             fm.expandFontMapping(['IBMPlexMono', 'IBMPlexMonoThin,thin',
 159                                   'IBMPlexMonoExtraLight,extralight', 'IBMPlexMonoLight,light',
 160                                   'IBMPlexMonoSemibold,semibold'],
 161                                  "typewriter", "tt", "plex-mono", "scale")
 162         elif font == 'Adobe':
 163             fm.expandFontMapping(['ADOBESourceSerifPro'], "roman", None, "sourceserifpro", None, "osf")
 164             fm.expandFontMapping(['ADOBESourceSansPro'], "sans", "sf", "sourcesanspro", "scaled", "osf")
 165             fm.expandFontMapping(['ADOBESourceCodePro'], "typewriter", "tt", "sourcecodepro", "scaled", "osf")
 166         elif font == 'Noto':
 167             fm.expandFontMapping(['NotoSerifRegular,regular', 'NotoSerifMedium,medium',
 168                                   'NotoSerifThin,thin', 'NotoSerifLight,light',
 169                                   'NotoSerifExtralight,extralight'],
 170                                   "roman", None, "noto-serif", None, "osf")
 171             fm.expandFontMapping(['NotoSansRegular,regular', 'NotoSansMedium,medium',
 172                                   'NotoSansThin,thin', 'NotoSansLight,light',
 173                                   'NotoSansExtralight,extralight'],
 174                                   "sans", "sf", "noto-sans", "scaled")
 175             fm.expandFontMapping(['NotoMonoRegular,regular'], "typewriter", "tt", "noto-mono", "scaled")
 176         elif font == 'Cantarell':
 177             fm.expandFontMapping(['cantarell,defaultsans'],
 178                                   "sans", "sf", "cantarell", "scaled", "oldstyle")
 179     return fm
 180
 181 def convert_fonts(document, fm, osfoption = "osf"):
 182     " Handle font definition (LaTeX preamble -> native) "
 183
 184     rpkg = re.compile(r'^\\usepackage(\[([^\]]*)\])?\{([^\}]+)\}')
 185     rscaleopt = re.compile(r'^scaled?=(.*)')
 186
 187     i = 0
 188     while i < len(document.preamble):
 189         i = find_re(document.preamble, rpkg, i+1)
 190         if i == -1:
 191             return
 192         mo = rpkg.search(document.preamble[i])
 193         if mo == None or mo.group(2) == None:
 194             options = []
 195         else:
 196             options = mo.group(2).replace(' ', '').split(",")
 197         pkg = mo.group(3)
 198         o = 0
 199         oscale = 1
 200         has_osf = False
 201         while o < len(options):
 202             if options[o] == osfoption:
 203                 has_osf = True
 204                 del options[o]
 205                 continue
 206             mo = rscaleopt.search(options[o])
 207             if mo == None:
 208                 o += 1
 209                 continue
 210             oscale = mo.group(1)
 211             del options[o]
 212             continue
 213
 214         if not pkg in fm.pkginmap:
 215             continue
 216         # determine fontname
 217         fn = fm.getfontname(pkg, options)
 218         if fn == None:
 219             continue
 220         del document.preamble[i]
 221         fontinfo = fm.font2pkgmap[fn]
 222         if fontinfo.scaletype == None:
 223             fontscale = None
 224         else:
 225             fontscale = "\\font_" + fontinfo.scaletype + "_scale"
 226             fontinfo.scaleval = oscale
 227         if has_osf:
 228             if fontinfo.osfopt == None:
 229                 options.extend(osfoption)
 230                 continue
 231             osf = find_token(document.header, "\\font_osf false")
 232             osftag = "\\font_osf"
 233             if osf == -1 and fontinfo.fonttype != "math":
 234                 # Try with newer format
 235                 osftag = "\\font_" + fontinfo.fonttype + "_osf"
 236                 osf = find_token(document.header, osftag + " false")
 237             if osf != -1:
 238                 document.header[osf] = osftag + " true"
 239         if i > 0 and document.preamble[i-1] == "% Added by lyx2lyx":
 240             del document.preamble[i-1]
 241             i -= 1
 242         if fontscale != None:
 243             j = find_token(document.header, fontscale, 0)
 244             if j != -1:
 245                 val = get_value(document.header, fontscale, j)
 246                 vals = val.split()
 247                 scale = "100"
 248                 if oscale != None:
 249                     scale = "%03d" % int(float(oscale) * 100)
 250                 document.header[j] = fontscale + " " + scale + " " + vals[1]
 251         ft = "\\font_" + fontinfo.fonttype
 252         j = find_token(document.header, ft, 0)
 253         if j != -1:
 254             val = get_value(document.header, ft, j)
 255             words = val.split() # ! splits also values like '"DejaVu Sans"'
 256             words[0] = '"' + fn + '"'
 257             document.header[j] = ft + ' ' + ' '.join(words)
 258
 259 def revert_fonts(document, fm, fontmap, OnlyWithXOpts = False, WithXOpts = False):
 260     " Revert native font definition to LaTeX "
 261     # fonlist := list of fonts created from the same package
 262     # Empty package means that the font-name is the same as the package-name
 263     # fontmap (key = package, val += found options) will be filled
 264     # and used later in add_preamble_fonts() to be added to user-preamble
 265
 266     rfontscale = re.compile(r'^\s*(\\font_(roman|sans|typewriter|math))\s+')
 267     rscales = re.compile(r'^\s*(\d+)\s+(\d+)')
 268     i = 0
 269     while i < len(document.header):
 270         i = find_re(document.header, rfontscale, i+1)
 271         if (i == -1):
 272             return True
 273         mo = rfontscale.search(document.header[i])
 274         if mo == None:
 275             continue
 276         ft = mo.group(1)    # 'roman', 'sans', 'typewriter', 'math'
 277         val = get_value(document.header, ft, i)
 278         words = val.split(' ')     # ! splits also values like '"DejaVu Sans"'
 279         font = words[0].strip('"') # TeX font name has no whitespace
 280         if not font in fm.font2pkgmap:
 281             continue
 282         fontinfo = fm.font2pkgmap[font]
 283         val = fontinfo.package
 284         if not val in fontmap:
 285             fontmap[val] = []
 286         x = -1
 287         if OnlyWithXOpts or WithXOpts:
 288             if ft == "\\font_math":
 289                 return False
 290             regexp = re.compile(r'^\s*(\\font_roman_opts)\s+')
 291             if ft == "\\font_sans":
 292                 regexp = re.compile(r'^\s*(\\font_sans_opts)\s+')
 293             elif ft == "\\font_typewriter":
 294                 regexp = re.compile(r'^\s*(\\font_typewriter_opts)\s+')
 295             x = find_re(document.header, regexp, 0)
 296             if x == -1 and OnlyWithXOpts:
 297                 return False
 298
 299             if x != -1:
 300                 # We need to use this regex since split() does not handle quote protection
 301                 xopts = re.findall(r'[^"\s]\S*|".+?"', document.header[x])
 302                 opts = xopts[1].strip('"').split(",")
 303                 fontmap[val].extend(opts)
 304                 del document.header[x]
 305         words[0] = '"default"'
 306         document.header[i] = ft + ' ' + ' '.join(words)
 307         if fontinfo.scaleopt != None:
 308             xval =  get_value(document.header, "\\font_" + fontinfo.scaletype + "_scale", 0)
 309             mo = rscales.search(xval)
 310             if mo != None:
 311                 xval1 = mo.group(1)
 312                 xval2 = mo.group(2)
 313                 if xval1 != "100":
 314                     # set correct scale option
 315                     fontmap[val].extend([fontinfo.scaleopt + "=" + format(float(xval1) / 100, '.2f')])
 316         if fontinfo.osfopt != None:
 317             osf = find_token(document.header, "\\font_osf true")
 318             if osf == -1 and ft != "\\font_math":
 319                 # Try with newer format
 320                 osftag = "\\font_roman_osf true"
 321                 if ft == "\\font_sans":
 322                     osftag = "\\font_sans_osf true"
 323                 elif ft == "\\font_typewriter":
 324                     osftag = "\\font_typewriter_osf true"
 325                 osf = find_token(document.header, osftag)
 326             if osf != -1:
 327                 fontmap[val].extend([fontinfo.osfopt])
 328         if len(fontinfo.options) > 0:
 329             fontmap[val].extend(fontinfo.options)
 330     return True
 331
 332 ###############################################################################
 333 ###
 334 ### Conversion and reversion routines
 335 ###
 336 ###############################################################################
 337
 338 def convert_inputencoding_namechange(document):
 339     " Rename inputencoding settings. "
 340     i = find_token(document.header, "\\inputencoding", 0)
 341     if i == -1:
 342         return
 343     s = document.header[i].replace("auto", "auto-legacy")
 344     document.header[i] = s.replace("default", "auto-legacy-plain")
 345
 346 def revert_inputencoding_namechange(document):
 347     " Rename inputencoding settings. "
 348     i = find_token(document.header, "\\inputencoding", 0)
 349     if i == -1:
 350         return
 351     s = document.header[i].replace("auto-legacy-plain", "default")
 352     document.header[i] = s.replace("auto-legacy", "auto")
 353
 354 def convert_notoFonts(document):
 355     " Handle Noto fonts definition to LaTeX "
 356
 357     if find_token(document.header, "\\use_non_tex_fonts false", 0) != -1:
 358         fm = createFontMapping(['Noto'])
 359         convert_fonts(document, fm)
 360
 361 def revert_notoFonts(document):
 362     " Revert native Noto font definition to LaTeX "
 363
 364     if find_token(document.header, "\\use_non_tex_fonts false", 0) != -1:
 365         fontmap = dict()
 366         fm = createFontMapping(['Noto'])
 367         if revert_fonts(document, fm, fontmap):
 368             add_preamble_fonts(document, fontmap)
 369
 370 def convert_latexFonts(document):
 371     " Handle DejaVu and IBMPlex fonts definition to LaTeX "
 372
 373     if find_token(document.header, "\\use_non_tex_fonts false", 0) != -1:
 374         fm = createFontMapping(['DejaVu', 'IBM'])
 375         convert_fonts(document, fm)
 376
 377 def revert_latexFonts(document):
 378     " Revert native DejaVu font definition to LaTeX "
 379
 380     if find_token(document.header, "\\use_non_tex_fonts false", 0) != -1:
 381         fontmap = dict()
 382         fm = createFontMapping(['DejaVu', 'IBM'])
 383         if revert_fonts(document, fm, fontmap):
 384             add_preamble_fonts(document, fontmap)
 385
 386 def convert_AdobeFonts(document):
 387     " Handle Adobe Source fonts definition to LaTeX "
 388
 389     if find_token(document.header, "\\use_non_tex_fonts false", 0) != -1:
 390         fm = createFontMapping(['Adobe'])
 391         convert_fonts(document, fm)
 392
 393 def revert_AdobeFonts(document):
 394     " Revert Adobe Source font definition to LaTeX "
 395
 396     if find_token(document.header, "\\use_non_tex_fonts false", 0) != -1:
 397         fontmap = dict()
 398         fm = createFontMapping(['Adobe'])
 399         if revert_fonts(document, fm, fontmap):
 400             add_preamble_fonts(document, fontmap)
 401
 402 def removeFrontMatterStyles(document):
 403     " Remove styles Begin/EndFrontmatter"
 404
 405     layouts = ['BeginFrontmatter', 'EndFrontmatter']
 406     tokenend = len('\\begin_layout ')
 407     i = 0
 408     while True:
 409         i = find_token_exact(document.body, '\\begin_layout ', i+1)
 410         if i == -1:
 411             return
 412         layout = document.body[i][tokenend:].strip()
 413         if layout not in layouts:
 414             continue
 415         j = find_end_of_layout(document.body, i)
 416         if j == -1:
 417             document.warning("Malformed LyX document: Can't find end of layout at line %d" % i)
 418             continue
 419         while document.body[j+1].strip() == '':
 420             j += 1
 421         document.body[i:j+1] = []
 422
 423 def addFrontMatterStyles(document):
 424     " Use styles Begin/EndFrontmatter for elsarticle"
 425
 426     if document.textclass != "elsarticle":
 427         return
 428
 429     def insertFrontmatter(prefix, line):
 430         above = line
 431         while above > 0 and document.body[above-1].strip() == '':
 432             above -= 1
 433         below = line
 434         while document.body[below].strip() == '':
 435             below += 1
 436         document.body[above:below] = ['', '\\begin_layout ' + prefix + 'Frontmatter',
 437                                     '\\begin_inset Note Note',
 438                                     'status open', '',
 439                                     '\\begin_layout Plain Layout',
 440                                     'Keep this empty!',
 441                                     '\\end_layout', '',
 442                                     '\\end_inset', '', '',
 443                                     '\\end_layout', '']
 444
 445     layouts = ['Title', 'Title footnote', 'Author', 'Author footnote',
 446                 'Corresponding author', 'Address', 'Email', 'Abstract', 'Keywords']
 447     tokenend = len('\\begin_layout ')
 448     first = -1
 449     i = 0
 450     while True:
 451         i = find_token_exact(document.body, '\\begin_layout ', i+1)
 452         if i == -1:
 453             break
 454         layout = document.body[i][tokenend:].strip()
 455         if layout not in layouts:
 456             continue
 457         k = find_end_of_layout(document.body, i)
 458         if k == -1:
 459             document.warning("Malformed LyX document: Can't find end of layout at line %d" % i)
 460             continue
 461         if first == -1:
 462             first = i
 463         i = k
 464     if first == -1:
 465         return
 466     insertFrontmatter('End', k+1)
 467     insertFrontmatter('Begin', first)
 468
 469
 470 def convert_lst_literalparam(document):
 471     " Add param literal to include inset "
 472
 473     i = 0
 474     while True:
 475         i = find_token(document.body, '\\begin_inset CommandInset include', i+1)
 476         if i == -1:
 477             break
 478         j = find_end_of_inset(document.body, i)
 479         if j == -1:
 480             document.warning("Malformed LyX document: Can't find end of command inset at line %d" % i)
 481             continue
 482         while i < j and document.body[i].strip() != '':
 483             i += 1
 484         document.body.insert(i, 'literal "true"')
 485
 486
 487 def revert_lst_literalparam(document):
 488     " Remove param literal from include inset "
 489
 490     i = 0
 491     while True:
 492         i = find_token(document.body, '\\begin_inset CommandInset include', i+1)
 493         if i == -1:
 494             break
 495         j = find_end_of_inset(document.body, i)
 496         if j == -1:
 497             document.warning("Malformed LyX document: Can't find end of include inset at line %d" % i)
 498             continue
 499         del_token(document.body, 'literal', i, j)
 500
 501
 502 def revert_paratype(document):
 503     " Revert ParaType font definitions to LaTeX "
 504
 505     if find_token(document.header, "\\use_non_tex_fonts false", 0) != -1:
 506         preamble = ""
 507         i1 = find_token(document.header, "\\font_roman \"PTSerif-TLF\"", 0)
 508         i2 = find_token(document.header, "\\font_sans \"default\"", 0)
 509         i3 = find_token(document.header, "\\font_typewriter \"default\"", 0)
 510         j = find_token(document.header, "\\font_sans \"PTSans-TLF\"", 0)
 511
 512         sf_scale = 100.0
 513         sfval = find_token(document.header, "\\font_sf_scale", 0)
 514         if sfval == -1:
 515             document.warning("Malformed LyX document: Missing \\font_sf_scale.")
 516         else:
 517             sfscale = document.header[sfval].split()
 518             val = sfscale[1]
 519             sfscale[1] = "100"
 520             document.header[sfval] = " ".join(sfscale)
 521             try:
 522                 # float() can throw
 523                 sf_scale = float(val)
 524             except:
 525                 document.warning("Invalid font_sf_scale value: " + val)
 526
 527         sfoption = ""
 528         if sf_scale != "100.0":
 529             sfoption = "scaled=" + str(sf_scale / 100.0)
 530         k = find_token(document.header, "\\font_typewriter \"PTMono-TLF\"", 0)
 531         ttval = get_value(document.header, "\\font_tt_scale", 0)
 532         # cutoff " 100"
 533         ttval = ttval[:-4]
 534         ttoption = ""
 535         if ttval != "100":
 536             ttoption = "scaled=" + format(float(ttval) / 100, '.2f')
 537         if i1 != -1 and i2 != -1 and i3!= -1:
 538             add_to_preamble(document, ["\\usepackage{paratype}"])
 539         else:
 540             if i1!= -1:
 541                 add_to_preamble(document, ["\\usepackage{PTSerif}"])
 542                 document.header[i1] = document.header[i1].replace("PTSerif-TLF", "default")
 543             if j!= -1:
 544                 if sfoption != "":
 545                     add_to_preamble(document, ["\\usepackage[" + sfoption + "]{PTSans}"])
 546                 else:
 547                     add_to_preamble(document, ["\\usepackage{PTSans}"])
 548                 document.header[j] = document.header[j].replace("PTSans-TLF", "default")
 549             if k!= -1:
 550                 if ttoption != "":
 551                     add_to_preamble(document, ["\\usepackage[" + ttoption + "]{PTMono}"])
 552                 else:
 553                     add_to_preamble(document, ["\\usepackage{PTMono}"])
 554                 document.header[k] = document.header[k].replace("PTMono-TLF", "default")
 555
 556
 557 def revert_xcharter(document):
 558     " Revert XCharter font definitions to LaTeX "
 559
 560     i = find_token(document.header, "\\font_roman \"xcharter\"", 0)
 561     if i == -1:
 562         return
 563
 564     # replace unsupported font setting
 565     document.header[i] = document.header[i].replace("xcharter", "default")
 566     # no need for preamble code with system fonts
 567     if get_bool_value(document.header, "\\use_non_tex_fonts"):
 568         return
 569
 570     # transfer old style figures setting to package options
 571     j = find_token(document.header, "\\font_osf true")
 572     if j != -1:
 573         options = "[osf]"
 574         document.header[j] = "\\font_osf false"
 575     else:
 576         options = ""
 577     if i != -1:
 578         add_to_preamble(document, ["\\usepackage%s{XCharter}"%options])
 579
 580
 581 def revert_lscape(document):
 582     " Reverts the landscape environment (Landscape module) to TeX-code "
 583
 584     if not "landscape" in document.get_module_list():
 585         return
 586
 587     i = 0
 588     while True:
 589         i = find_token(document.body, "\\begin_inset Flex Landscape", i+1)
 590         if i == -1:
 591             return
 592         j = find_end_of_inset(document.body, i)
 593         if j == -1:
 594             document.warning("Malformed LyX document: Can't find end of Landscape inset")
 595             continue
 596
 597         if document.body[i] == "\\begin_inset Flex Landscape (Floating)":
 598             document.body[j - 2 : j + 1] = put_cmd_in_ert("\\end{landscape}}")
 599             document.body[i : i + 4] = put_cmd_in_ert("\\afterpage{\\begin{landscape}")
 600             add_to_preamble(document, ["\\usepackage{afterpage}"])
 601         else:
 602             document.body[j - 2 : j + 1] = put_cmd_in_ert("\\end{landscape}")
 603             document.body[i : i + 4] = put_cmd_in_ert("\\begin{landscape}")
 604
 605         add_to_preamble(document, ["\\usepackage{pdflscape}"])
 606
 607
 608 def convert_fontenc(document):
 609     " Convert default fontenc setting "
 610
 611     i = find_token(document.header, "\\fontencoding global", 0)
 612     if i == -1:
 613         return
 614
 615     document.header[i] = document.header[i].replace("global", "auto")
 616
 617
 618 def revert_fontenc(document):
 619     " Revert default fontenc setting "
 620
 621     i = find_token(document.header, "\\fontencoding auto", 0)
 622     if i == -1:
 623         return
 624
 625     document.header[i] = document.header[i].replace("auto", "global")
 626
 627
 628 def revert_nospellcheck(document):
 629     " Remove nospellcheck font info param "
 630
 631     i = 0
 632     while True:
 633         i = find_token(document.body, '\\nospellcheck', i)
 634         if i == -1:
 635             return
 636         del document.body[i]
 637
 638
 639 def revert_floatpclass(document):
 640     " Remove float placement params 'document' and 'class' "
 641
 642     del_token(document.header, "\\float_placement class")
 643
 644     i = 0
 645     while True:
 646         i = find_token(document.body, '\\begin_inset Float', i+1)
 647         if i == -1:
 648             break
 649         j = find_end_of_inset(document.body, i)
 650         k = find_token(document.body, 'placement class', i, i + 2)
 651         if k == -1:
 652             k = find_token(document.body, 'placement document', i, i + 2)
 653             if k != -1:
 654                 del document.body[k]
 655             continue
 656         del document.body[k]
 657
 658
 659 def revert_floatalignment(document):
 660     " Remove float alignment params "
 661
 662     galignment = get_value(document.header, "\\float_alignment", delete=True)
 663
 664     i = 0
 665     while True:
 666         i = find_token(document.body, '\\begin_inset Float', i+1)
 667         if i == -1:
 668             break
 669         j = find_end_of_inset(document.body, i)
 670         if j == -1:
 671             document.warning("Malformed LyX document: Can't find end of inset at line " + str(i))
 672             continue
 673         k = find_token(document.body, 'alignment', i, i+4)
 674         if k == -1:
 675             i = j
 676             continue
 677         alignment = get_value(document.body, "alignment", k)
 678         if alignment == "document":
 679             alignment = galignment
 680         del document.body[k]
 681         l = find_token(document.body, "\\begin_layout Plain Layout", i, j)
 682         if l == -1:
 683             document.warning("Can't find float layout!")
 684             continue
 685         alcmd = []
 686         if alignment == "left":
 687             alcmd = put_cmd_in_ert("\\raggedright{}")
 688         elif alignment == "center":
 689             alcmd = put_cmd_in_ert("\\centering{}")
 690         elif alignment == "right":
 691             alcmd = put_cmd_in_ert("\\raggedleft{}")
 692         if len(alcmd) > 0:
 693             document.body[l+1:l+1] = alcmd
 694         i = j
 695
 696 def revert_tuftecite(document):
 697     " Revert \cite commands in tufte classes "
 698
 699     tufte = ["tufte-book", "tufte-handout"]
 700     if document.textclass not in tufte:
 701         return
 702
 703     i = 0
 704     while (True):
 705         i = find_token(document.body, "\\begin_inset CommandInset citation", i+1)
 706         if i == -1:
 707             break
 708         j = find_end_of_inset(document.body, i)
 709         if j == -1:
 710             document.warning("Can't find end of citation inset at line %d!!" %(i))
 711             continue
 712         k = find_token(document.body, "LatexCommand", i, j)
 713         if k == -1:
 714             document.warning("Can't find LatexCommand for citation inset at line %d!" %(i))
 715             i = j
 716             continue
 717         cmd = get_value(document.body, "LatexCommand", k)
 718         if cmd != "cite":
 719             i = j
 720             continue
 721         pre = get_quoted_value(document.body, "before", i, j)
 722         post = get_quoted_value(document.body, "after", i, j)
 723         key = get_quoted_value(document.body, "key", i, j)
 724         if not key:
 725             document.warning("Citation inset at line %d does not have a key!" %(i))
 726             key = "???"
 727         # Replace command with ERT
 728         res = "\\cite"
 729         if pre:
 730             res += "[" + pre + "]"
 731         if post:
 732             res += "[" + post + "]"
 733         elif pre:
 734             res += "[]"
 735         res += "{" + key + "}"
 736         document.body[i:j+1] = put_cmd_in_ert([res])
 737         i = j
 738
 739
 740 def revert_stretchcolumn(document):
 741     " We remove the column varwidth flags or everything else will become a mess. "
 742     i = 0
 743     while True:
 744         i = find_token(document.body, "\\begin_inset Tabular", i+1)
 745         if i == -1:
 746             return
 747         j = find_end_of_inset(document.body, i+1)
 748         if j == -1:
 749             document.warning("Malformed LyX document: Could not find end of tabular.")
 750             continue
 751         for k in range(i, j):
 752             if re.search('^<column.*varwidth="[^"]+".*>$', document.body[k]):
 753                 document.warning("Converting 'tabularx'/'xltabular' table to normal table.")
 754                 document.body[k] = document.body[k].replace(' varwidth="true"', '')
 755
 756
 757 def revert_vcolumns(document):
 758     " Revert standard columns with line breaks etc. "
 759     i = 0
 760     needvarwidth = False
 761     needarray = False
 762     try:
 763         while True:
 764             i = find_token(document.body, "\\begin_inset Tabular", i+1)
 765             if i == -1:
 766                 return
 767             j = find_end_of_inset(document.body, i)
 768             if j == -1:
 769                 document.warning("Malformed LyX document: Could not find end of tabular.")
 770                 continue
 771
 772             # Collect necessary column information
 773             m = i + 1
 774             nrows = int(document.body[i+1].split('"')[3])
 775             ncols = int(document.body[i+1].split('"')[5])
 776             col_info = []
 777             for k in range(ncols):
 778                 m = find_token(document.body, "<column", m)
 779                 width = get_option_value(document.body[m], 'width')
 780                 varwidth = get_option_value(document.body[m], 'varwidth')
 781                 alignment = get_option_value(document.body[m], 'alignment')
 782                 special = get_option_value(document.body[m], 'special')
 783                 col_info.append([width, varwidth, alignment, special, m])
 784
 785             # Now parse cells
 786             m = i + 1
 787             lines = []
 788             for row in range(nrows):
 789                 for col in range(ncols):
 790                     m = find_token(document.body, "<cell", m)
 791                     multicolumn = get_option_value(document.body[m], 'multicolumn')
 792                     multirow = get_option_value(document.body[m], 'multirow')
 793                     width = get_option_value(document.body[m], 'width')
 794                     rotate = get_option_value(document.body[m], 'rotate')
 795                     # Check for: linebreaks, multipars, non-standard environments
 796                     begcell = m
 797                     endcell = find_token(document.body, "</cell>", begcell)
 798                     vcand = False
 799                     if find_token(document.body, "\\begin_inset Newline", begcell, endcell) != -1:
 800                         vcand = True
 801                     elif count_pars_in_inset(document.body, begcell + 2) > 1:
 802                         vcand = True
 803                     elif get_value(document.body, "\\begin_layout", begcell) != "Plain Layout":
 804                         vcand = True
 805                     if vcand and rotate == "" and ((multicolumn == "" and multirow == "") or width == ""):
 806                         if col_info[col][0] == "" and col_info[col][1] == "" and col_info[col][3] == "":
 807                             needvarwidth = True
 808                             alignment = col_info[col][2]
 809                             col_line = col_info[col][4]
 810                             vval = ""
 811                             if alignment == "center":
 812                                 vval = ">{\\centering}"
 813                             elif  alignment == "left":
 814                                 vval = ">{\\raggedright}"
 815                             elif alignment == "right":
 816                                 vval = ">{\\raggedleft}"
 817                             if vval != "":
 818                                 needarray = True
 819                             vval += "V{\\linewidth}"
 820
 821                             document.body[col_line] = document.body[col_line][:-1] + " special=\"" + vval + "\">"
 822                             # ERT newlines and linebreaks (since LyX < 2.4 automatically inserts parboxes
 823                             # with newlines, and we do not want that)
 824                             while True:
 825                                 endcell = find_token(document.body, "</cell>", begcell)
 826                                 linebreak = False
 827                                 nl = find_token(document.body, "\\begin_inset Newline newline", begcell, endcell)
 828                                 if nl == -1:
 829                                     nl = find_token(document.body, "\\begin_inset Newline linebreak", begcell, endcell)
 830                                     if nl == -1:
 831                                          break
 832                                     linebreak = True
 833                                 nle = find_end_of_inset(document.body, nl)
 834                                 del(document.body[nle:nle+1])
 835                                 if linebreak:
 836                                     document.body[nl:nl+1] = put_cmd_in_ert("\\linebreak{}")
 837                                 else:
 838                                     document.body[nl:nl+1] = put_cmd_in_ert("\\\\")
 839                     m += 1
 840
 841             i = j
 842
 843     finally:
 844         if needarray == True:
 845             add_to_preamble(document, ["\\usepackage{array}"])
 846         if needvarwidth == True:
 847             add_to_preamble(document, ["\\usepackage{varwidth}"])
 848
 849
 850 def revert_bibencoding(document):
 851     " Revert bibliography encoding "
 852
 853     # Get cite engine
 854     engine = "basic"
 855     i = find_token(document.header, "\\cite_engine", 0)
 856     if i == -1:
 857         document.warning("Malformed document! Missing \\cite_engine")
 858     else:
 859         engine = get_value(document.header, "\\cite_engine", i)
 860
 861     # Check if biblatex
 862     biblatex = False
 863     if engine in ["biblatex", "biblatex-natbib"]:
 864         biblatex = True
 865
 866     # Map lyx to latex encoding names
 867     encodings = {
 868         "utf8" : "utf8",
 869         "utf8x" : "utf8x",
 870         "armscii8" : "armscii8",
 871         "iso8859-1" : "latin1",
 872         "iso8859-2" : "latin2",
 873         "iso8859-3" : "latin3",
 874         "iso8859-4" : "latin4",
 875         "iso8859-5" : "iso88595",
 876         "iso8859-6" : "8859-6",
 877         "iso8859-7" : "iso-8859-7",
 878         "iso8859-8" : "8859-8",
 879         "iso8859-9" : "latin5",
 880         "iso8859-13" : "latin7",
 881         "iso8859-15" : "latin9",
 882         "iso8859-16" : "latin10",
 883         "applemac" : "applemac",
 884         "cp437" : "cp437",
 885         "cp437de" : "cp437de",
 886         "cp850" : "cp850",
 887         "cp852" : "cp852",
 888         "cp855" : "cp855",
 889         "cp858" : "cp858",
 890         "cp862" : "cp862",
 891         "cp865" : "cp865",
 892         "cp866" : "cp866",
 893         "cp1250" : "cp1250",
 894         "cp1251" : "cp1251",
 895         "cp1252" : "cp1252",
 896         "cp1255" : "cp1255",
 897         "cp1256" : "cp1256",
 898         "cp1257" : "cp1257",
 899         "koi8-r" : "koi8-r",
 900         "koi8-u" : "koi8-u",
 901         "pt154" : "pt154",
 902         "utf8-platex" : "utf8",
 903         "ascii" : "ascii"
 904     }
 905
 906     i = 0
 907     bibresources = []
 908     while (True):
 909         i = find_token(document.body, "\\begin_inset CommandInset bibtex", i+1)
 910         if i == -1:
 911             break
 912         j = find_end_of_inset(document.body, i)
 913         if j == -1:
 914             document.warning("Can't find end of bibtex inset at line %d!!" %(i))
 915             continue
 916         encoding = get_quoted_value(document.body, "encoding", i, j)
 917         if not encoding:
 918             continue
 919         # remove encoding line
 920         k = find_token(document.body, "encoding", i, j)
 921         if k != -1:
 922             del document.body[k]
 923         if encoding == "default":
 924             continue
 925         # Re-find inset end line
 926         j = find_end_of_inset(document.body, i)
 927         if biblatex:
 928             biblio_options = ""
 929             h = find_token(document.header, "\\biblio_options", 0)
 930             if h != -1:
 931                 biblio_options = get_value(document.header, "\\biblio_options", h)
 932                 if not "bibencoding" in biblio_options:
 933                      document.header[h] += ",bibencoding=%s" % encodings[encoding]
 934             else:
 935                 bs = find_token(document.header, "\\biblatex_bibstyle", 0)
 936                 if bs == -1:
 937                     # this should not happen
 938                     document.warning("Malformed LyX document! No \\biblatex_bibstyle header found!")
 939                 else:
 940                     document.header[bs-1 : bs-1] = ["\\biblio_options bibencoding=" + encodings[encoding]]
 941         else:
 942             document.body[j+1:j+1] = put_cmd_in_ert("\\egroup")
 943             document.body[i:i] = put_cmd_in_ert("\\bgroup\\inputencoding{" + encodings[encoding] + "}")
 944
 945         i = j
 946
 947
 948
 949 def convert_vcsinfo(document):
 950     " Separate vcs Info inset from buffer Info inset. "
 951
 952     types = {
 953         "vcs-revision" : "revision",
 954         "vcs-tree-revision" : "tree-revision",
 955         "vcs-author" : "author",
 956         "vcs-time" : "time",
 957         "vcs-date" : "date"
 958     }
 959     i = 0
 960     while True:
 961         i = find_token(document.body, "\\begin_inset Info", i+1)
 962         if i == -1:
 963             return
 964         j = find_end_of_inset(document.body, i+1)
 965         if j == -1:
 966             document.warning("Malformed LyX document: Could not find end of Info inset.")
 967             continue
 968         tp = find_token(document.body, 'type', i, j)
 969         tpv = get_quoted_value(document.body, "type", tp)
 970         if tpv != "buffer":
 971             continue
 972         arg = find_token(document.body, 'arg', i, j)
 973         argv = get_quoted_value(document.body, "arg", arg)
 974         if argv not in list(types.keys()):
 975             continue
 976         document.body[tp] = "type \"vcs\""
 977         document.body[arg] = "arg \"" + types[argv] + "\""
 978
 979
 980 def revert_vcsinfo(document):
 981     " Merge vcs Info inset to buffer Info inset. "
 982
 983     args = ["revision", "tree-revision", "author", "time", "date" ]
 984     i = 0
 985     while True:
 986         i = find_token(document.body, "\\begin_inset Info", i+1)
 987         if i == -1:
 988             return
 989         j = find_end_of_inset(document.body, i+1)
 990         if j == -1:
 991             document.warning("Malformed LyX document: Could not find end of Info inset.")
 992             continue
 993         tp = find_token(document.body, 'type', i, j)
 994         tpv = get_quoted_value(document.body, "type", tp)
 995         if tpv != "vcs":
 996             continue
 997         arg = find_token(document.body, 'arg', i, j)
 998         argv = get_quoted_value(document.body, "arg", arg)
 999         if argv not in args:
1000             document.warning("Malformed Info inset. Invalid vcs arg.")
1001             continue
1002         document.body[tp] = "type \"buffer\""
1003         document.body[arg] = "arg \"vcs-" + argv + "\""
1004
1005
1006 def revert_dateinfo(document):
1007     " Revert date info insets to static text. "
1008
1009 # FIXME This currently only considers the main language and uses the system locale
1010 # Ideally, it should honor context languages and switch the locale accordingly.
1011
1012     # The date formats for each language using strftime syntax:
1013     # long, short, loclong, locmedium, locshort
1014     dateformats = {
1015         "afrikaans" : ["%A, %d %B %Y", "%Y-%m-%d", "%d %B %Y", "%d %b %Y", "%Y/%m/%d"],
1016         "albanian" : ["%A, %d %B %Y", "%d.%m.%y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
1017         "american" : ["%A, %B %d, %Y", "%m/%d/%y", "%B %d, %Y", "%b %d, %Y", "%m/%d/%Y"],
1018         "amharic" : ["%A ፣%d %B %Y", "%d/%m/%Y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
1019         "ancientgreek" : ["%A, %d %B %Y", "%d %b %Y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
1020         "arabic_arabi" : ["%A، %d %B، %Y", "%d‏/%m‏/%Y", "%d %B، %Y", "%d/%m/%Y", "%d/%m/%Y"],
1021         "arabic_arabtex" : ["%A، %d %B، %Y", "%d‏/%m‏/%Y", "%d %B، %Y", "%d/%m/%Y", "%d/%m/%Y"],
1022         "armenian" : ["%Y թ. %B %d, %A", "%d.%m.%y", "%d %B، %Y", "%d %b، %Y", "%d/%m/%Y"],
1023         "asturian" : ["%A, %d %B de %Y", "%d/%m/%y", "%d de %B de %Y", "%d %b %Y", "%d/%m/%Y"],
1024         "australian" : ["%A, %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
1025         "austrian" : ["%A, %d. %B %Y", "%d.%m.%y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
1026         "bahasa" : ["%A, %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
1027         "bahasam" : ["%A, %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
1028         "basque" : ["%Y(e)ko %B %d, %A", "%y/%m/%d", "%Y %B %d", "%Y %b %d", "%Y/%m/%d"],
1029         "belarusian" : ["%A, %d %B %Y г.", "%d.%m.%y", "%d %B %Y", "%d %b %Y", "%d.%m.%Y"],
1030         "bosnian" : ["%A, %d. %B %Y.", "%d.%m.%y.", "%d. %B %Y", "%d. %b %Y", "%Y-%m-%d"],
1031         "brazilian" : ["%A, %d de %B de %Y", "%d/%m/%Y", "%d de %B de %Y", "%d de %b de %Y", "%d/%m/%Y"],
1032         "breton" : ["%Y %B %d, %A", "%Y-%m-%d", "%d %B %Y", "%d %b %Y", "%Y-%m-%d"],
1033         "british" : ["%A, %d %B %Y", "%d/%m/%Y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
1034         "bulgarian" : ["%A, %d %B %Y г.", "%d.%m.%y г.", "%d %B %Y", "%d %b %Y", "%Y-%m-%d"],
1035         "canadian" : ["%A, %B %d, %Y", "%Y-%m-%d", "%B %d, %Y", "%d %b %Y", "%Y-%m-%d"],
1036         "canadien" : ["%A %d %B %Y", "%y-%m-%d", "%d %B %Y", "%d %b %Y", "%Y-%m-%d"],
1037         "catalan" : ["%A, %d %B de %Y", "%d/%m/%y", "%d / %B / %Y", "%d / %b / %Y", "%d/%m/%Y"],
1038         "chinese-simplified" : ["%Y年%m月%d日%A", "%Y/%m/%d", "%Y年%m月%d日", "%Y-%m-%d", "%y-%m-%d"],
1039         "chinese-traditional" : ["%Y年%m月%d日 %A", "%Y/%m/%d", "%Y年%m月%d日", "%Y年%m月%d日", "%y年%m月%d日"],
1040         "coptic" : ["%A, %d %B %Y", "%d %b %Y", "%B %d, %Y", "%b %d, %Y", "%m/%d/%Y"],
1041         "croatian" : ["%A, %d. %B %Y.", "%d. %m. %Y.", "%d. %B %Y.", "%d. %b. %Y.", "%d.%m.%Y."],
1042         "czech" : ["%A %d. %B %Y", "%d.%m.%y", "%d. %B %Y", "%d. %b. %Y", "%d.%m.%Y"],
1043         "danish" : ["%A den %d. %B %Y", "%d/%m/%Y", "%d. %B %Y", "%d. %b %Y", "%d/%m/%Y"],
1044         "divehi" : ["%Y %B %d, %A", "%Y-%m-%d", "%Y %B %d", "%Y %b %d", "%d/%m/%Y"],
1045         "dutch" : ["%A %d %B %Y", "%d-%m-%y", "%d %B %Y", "%d %b %Y", "%d-%m-%Y"],
1046         "english" : ["%A, %B %d, %Y", "%m/%d/%y", "%B %d, %Y", "%b %d, %Y", "%m/%d/%Y"],
1047         "esperanto" : ["%A, %d %B %Y", "%d %b %Y", "la %d de %B %Y", "la %d de %b %Y", "%m/%d/%Y"],
1048         "estonian" : ["%A, %d. %B %Y", "%d.%m.%y", "%d %B %Y", "%d %b %Y", "%d.%m.%Y"],
1049         "farsi" : ["%A %d %B %Y", "%Y/%m/%d", "%d %B %Y", "%d %b %Y", "%Y/%m/%d"],
1050         "finnish" : ["%A %d. %B %Y", "%d.%m.%Y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
1051         "french" : ["%A %d %B %Y", "%d/%m/%Y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
1052         "friulan" : ["%A %d di %B dal %Y", "%d/%m/%y", "%d di %B dal %Y", "%d di %b dal %Y", "%d/%m/%Y"],
1053         "galician" : ["%A, %d de %B de %Y", "%d/%m/%y", "%d de %B de %Y", "%d de %b de %Y", "%d/%m/%Y"],
1054         "georgian" : ["%A, %d %B, %Y", "%d.%m.%y", "%B %d, %Y", "%b %d, %Y", "%m/%d/%Y"],
1055         "german" : ["%A, %d. %B %Y", "%d.%m.%y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
1056         "german-ch" : ["%A, %d. %B %Y", "%d.%m.%y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
1057         "german-ch-old" : ["%A, %d. %B %Y", "%d.%m.%y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
1058         "greek" : ["%A, %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
1059         "hebrew" : ["%A, %d ב%B %Y", "%d.%m.%Y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
1060         "hindi" : ["%A, %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d-%m-%Y"],
1061         "icelandic" : ["%A, %d. %B %Y", "%d.%m.%Y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
1062         "interlingua" : ["%Y %B %d, %A", "%Y-%m-%d", "le %d de %B %Y", "le %d de %b %Y", "%Y-%m-%d"],
1063         "irish" : ["%A %d %B %Y", "%d/%m/%Y", "%d. %B %Y", "%d. %b %Y", "%d/%m/%Y"],
1064         "italian" : ["%A %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d/%b/%Y", "%d/%m/%Y"],
1065         "japanese" : ["%Y年%m月%d日%A", "%Y/%m/%d", "%Y年%m月%d日", "%Y/%m/%d", "%y/%m/%d"],
1066         "japanese-cjk" : ["%Y年%m月%d日%A", "%Y/%m/%d", "%Y年%m月%d日", "%Y/%m/%d", "%y/%m/%d"],
1067         "kannada" : ["%A, %B %d, %Y", "%d/%m/%y", "%d %B %Y", "%d %B %Y", "%d-%m-%Y"],
1068         "kazakh" : ["%Y ж. %d %B, %A", "%d.%m.%y", "%d %B %Y", "%d %B %Y", "%Y-%d-%m"],
1069         "khmer" : ["%A %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %B %Y", "%d/%m/%Y"],
1070         "korean" : ["%Y년 %m월 %d일 %A", "%y. %m. %d.", "%Y년 %m월 %d일", "%Y. %m. %d.", "%y. %m. %d."],
1071         "kurmanji" : ["%A, %d %B %Y", "%d %b %Y", "%d. %B %Y", "%d. %m. %Y", "%Y-%m-%d"],
1072         "lao" : ["%A ທີ %d %B %Y", "%d/%m/%Y", "%d %B %Y", "%d %B %Y", "%d/%m/%Y"],
1073         "latin" : ["%A, %d %B %Y", "%d %b %Y", "%B %d, %Y", "%b %d, %Y", "%m/%d/%Y"],
1074         "latvian" : ["%A, %Y. gada %d. %B", "%d.%m.%y", "%Y. gada %d. %B", "%Y. gada %d. %b", "%d.%m.%Y"],
1075         "lithuanian" : ["%Y m. %B %d d., %A", "%Y-%m-%d", "%Y m. %B %d d.", "%Y m. %B %d d.", "%Y-%m-%d"],
1076         "lowersorbian" : ["%A, %d. %B %Y", "%d.%m.%y", "%d %B %Y", "%d %b %Y", "%d.%m.%Y"],
1077         "macedonian" : ["%A, %d %B %Y", "%d.%m.%y", "%d %B %Y", "%d %b %Y", "%d.%m.%Y"],
1078         "magyar" : ["%Y. %B %d., %A", "%Y. %m. %d.", "%Y. %B %d.", "%Y. %b %d.", "%Y.%m.%d."],
1079         "malayalam" : ["%A, %d %B, %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d-%m-%Y"],
1080         "marathi" : ["%A, %d %B, %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d-%m-%Y"],
1081         "mongolian" : ["%A, %Y оны %m сарын %d", "%Y-%m-%d", "%Y оны %m сарын %d", "%d-%m-%Y", "%d-%m-%Y"],
1082         "naustrian" : ["%A, %d. %B %Y", "%d.%m.%y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
1083         "newzealand" : ["%A, %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
1084         "ngerman" : ["%A, %d. %B %Y", "%d.%m.%y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
1085         "norsk" : ["%A %d. %B %Y", "%d.%m.%Y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
1086         "nynorsk" : ["%A %d. %B %Y", "%d.%m.%Y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
1087         "occitan" : ["%Y %B %d, %A", "%Y-%m-%d", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
1088         "piedmontese" : ["%A, %d %B %Y", "%d %b %Y", "%B %d, %Y", "%b %d, %Y", "%m/%d/%Y"],
1089         "polish" : ["%A, %d %B %Y", "%d.%m.%Y", "%d %B %Y", "%d %b %Y", "%Y-%m-%d"],
1090         "polutonikogreek" : ["%A, %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
1091         "portuguese" : ["%A, %d de %B de %Y", "%d/%m/%y", "%d de %B de %Y", "%d de %b de %Y", "%Y/%m/%d"],
1092         "romanian" : ["%A, %d %B %Y", "%d.%m.%Y", "%d %B %Y", "%d %b %Y", "%d.%m.%Y"],
1093         "romansh" : ["%A, ils %d da %B %Y", "%d-%m-%y", "%d %B %Y", "%d %b %Y", "%d.%m.%Y"],
1094         "russian" : ["%A, %d %B %Y г.", "%d.%m.%Y", "%d %B %Y г.", "%d %b %Y г.", "%d.%m.%Y"],
1095         "samin" : ["%Y %B %d, %A", "%Y-%m-%d", "%B %d. b. %Y", "%b %d. b. %Y", "%d.%m.%Y"],
1096         "sanskrit" : ["%Y %B %d, %A", "%Y-%m-%d", "%d %B %Y", "%d %b %Y", "%d-%m-%Y"],
1097         "scottish" : ["%A, %dmh %B %Y", "%d/%m/%Y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
1098         "serbian" : ["%A, %d. %B %Y.", "%d.%m.%y.", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
1099         "serbian-latin" : ["%A, %d. %B %Y.", "%d.%m.%y.", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
1100         "slovak" : ["%A, %d. %B %Y", "%d. %m. %Y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
1101         "slovene" : ["%A, %d. %B %Y", "%d. %m. %y", "%d. %B %Y", "%d. %b %Y", "%d.%m.%Y"],
1102         "spanish" : ["%A, %d de %B de %Y", "%d/%m/%y", "%d de %B %de %Y", "%d %b %Y", "%d/%m/%Y"],
1103         "spanish-mexico" : ["%A, %d de %B %de %Y", "%d/%m/%y", "%d de %B de %Y", "%d %b %Y", "%d/%m/%Y"],
1104         "swedish" : ["%A %d %B %Y", "%Y-%m-%d", "%d %B %Y", "%d %b %Y", "%Y-%m-%d"],
1105         "syriac" : ["%Y %B %d, %A", "%Y-%m-%d", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
1106         "tamil" : ["%A, %d %B, %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d-%m-%Y"],
1107         "telugu" : ["%d, %B %Y, %A", "%d-%m-%y", "%d %B %Y", "%d %b %Y", "%d-%m-%Y"],
1108         "thai" : ["%Aที่ %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
1109         "tibetan" : ["%Y %Bའི་ཚེས་%d, %A", "%Y-%m-%d", "%B %d, %Y", "%b %d, %Y", "%m/%d/%Y"],
1110         "turkish" : ["%d %B %Y %A", "%d.%m.%Y", "%d %B %Y", "%d.%b.%Y", "%d.%m.%Y"],
1111         "turkmen" : ["%d %B %Y %A", "%d.%m.%Y", "%Y ý. %B %d", "%d.%m.%Y ý.", "%d.%m.%y ý."],
1112         "ukrainian" : ["%A, %d %B %Y р.", "%d.%m.%y", "%d %B %Y", "%d %m %Y", "%d.%m.%Y"],
1113         "uppersorbian" : ["%A, %d. %B %Y", "%d.%m.%y", "%d %B %Y", "%d %b %Y", "%d.%m.%Y"],
1114         "urdu" : ["%A، %d %B، %Y", "%d/%m/%y", "%d %B, %Y", "%d %b %Y", "%d/%m/%Y"],
1115         "vietnamese" : ["%A, %d %B, %Y", "%d/%m/%Y", "%d tháng %B %Y", "%d-%m-%Y", "%d/%m/%Y"],
1116         "welsh" : ["%A, %d %B %Y", "%d/%m/%y", "%d %B %Y", "%d %b %Y", "%d/%m/%Y"],
1117     }
1118
1119     types = ["date", "fixdate", "moddate" ]
1120     lang = get_value(document.header, "\\language")
1121     if lang == "":
1122         document.warning("Malformed LyX document! No \\language header found!")
1123         return
1124
1125     i = 0
1126     while True:
1127         i = find_token(document.body, "\\begin_inset Info", i+1)
1128         if i == -1:
1129             return
1130         j = find_end_of_inset(document.body, i+1)
1131         if j == -1:
1132             document.warning("Malformed LyX document: Could not find end of Info inset.")
1133             continue
1134         tp = find_token(document.body, 'type', i, j)
1135         tpv = get_quoted_value(document.body, "type", tp)
1136         if tpv not in types:
1137             continue
1138         arg = find_token(document.body, 'arg', i, j)
1139         argv = get_quoted_value(document.body, "arg", arg)
1140         isodate = ""
1141         dte = date.today()
1142         if tpv == "fixdate":
1143             datecomps = argv.split('@')
1144             if len(datecomps) > 1:
1145                 argv = datecomps[0]
1146                 isodate = datecomps[1]
1147                 m = re.search('(\d\d\d\d)-(\d\d)-(\d\d)', isodate)
1148                 if m:
1149                     dte = date(int(m.group(1)), int(m.group(2)), int(m.group(3)))
1150 # FIXME if we had the path to the original document (not the one in the tmp dir),
1151 #        we could use the mtime.
1152 #        elif tpv == "moddate":
1153 #            dte = date.fromtimestamp(os.path.getmtime(document.dir))
1154         result = ""
1155         if argv == "ISO":
1156             result = dte.isodate()
1157         elif argv == "long":
1158             result = dte.strftime(dateformats[lang][0])
1159         elif argv == "short":
1160             result = dte.strftime(dateformats[lang][1])
1161         elif argv == "loclong":
1162             result = dte.strftime(dateformats[lang][2])
1163         elif argv == "locmedium":
1164             result = dte.strftime(dateformats[lang][3])
1165         elif argv == "locshort":
1166             result = dte.strftime(dateformats[lang][4])
1167         else:
1168             fmt = argv.replace("MMMM", "%b").replace("MMM", "%b").replace("MM", "%m").replace("M", "%m")
1169             fmt = fmt.replace("yyyy", "%Y").replace("yy", "%y")
1170             fmt = fmt.replace("dddd", "%A").replace("ddd", "%a").replace("dd", "%d")
1171             fmt = re.sub('[^\'%]d', '%d', fmt)
1172             fmt = fmt.replace("'", "")
1173             result = dte.strftime(fmt)
1174         if sys.version_info < (3,0):
1175             # In Python 2, datetime module works with binary strings,
1176             # our dateformat strings are utf8-encoded:
1177             result = result.decode('utf-8')
1178         document.body[i : j+1] = [result]
1179
1180
1181 def revert_timeinfo(document):
1182     " Revert time info insets to static text. "
1183
1184 # FIXME This currently only considers the main language and uses the system locale
1185 # Ideally, it should honor context languages and switch the locale accordingly.
1186 # Also, the time object is "naive", i.e., it does not know of timezones (%Z will
1187 # be empty).
1188
1189     # The time formats for each language using strftime syntax:
1190     # long, short
1191     timeformats = {
1192         "afrikaans" : ["%H:%M:%S %Z", "%H:%M"],
1193         "albanian" : ["%I:%M:%S %p, %Z", "%I:%M %p"],
1194         "american" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1195         "amharic" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1196         "ancientgreek" : ["%H:%M:%S %Z", "%H:%M:%S"],
1197         "arabic_arabi" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1198         "arabic_arabtex" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1199         "armenian" : ["%H:%M:%S %Z", "%H:%M"],
1200         "asturian" : ["%H:%M:%S %Z", "%H:%M"],
1201         "australian" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1202         "austrian" : ["%H:%M:%S %Z", "%H:%M"],
1203         "bahasa" : ["%H.%M.%S %Z", "%H.%M"],
1204         "bahasam" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1205         "basque" : ["%H:%M:%S (%Z)", "%H:%M"],
1206         "belarusian" : ["%H:%M:%S, %Z", "%H:%M"],
1207         "bosnian" : ["%H:%M:%S %Z", "%H:%M"],
1208         "brazilian" : ["%H:%M:%S %Z", "%H:%M"],
1209         "breton" : ["%H:%M:%S %Z", "%H:%M"],
1210         "british" : ["%H:%M:%S %Z", "%H:%M"],
1211         "bulgarian" : ["%H:%M:%S %Z", "%H:%M"],
1212         "canadian" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1213         "canadien" : ["%H:%M:%S %Z", "%H h %M"],
1214         "catalan" : ["%H:%M:%S %Z", "%H:%M"],
1215         "chinese-simplified" : ["%Z %p%I:%M:%S", "%p%I:%M"],
1216         "chinese-traditional" : ["%p%I:%M:%S [%Z]", "%p%I:%M"],
1217         "coptic" : ["%H:%M:%S %Z", "%H:%M:%S"],
1218         "croatian" : ["%H:%M:%S (%Z)", "%H:%M"],
1219         "czech" : ["%H:%M:%S %Z", "%H:%M"],
1220         "danish" : ["%H.%M.%S %Z", "%H.%M"],
1221         "divehi" : ["%H:%M:%S %Z", "%H:%M"],
1222         "dutch" : ["%H:%M:%S %Z", "%H:%M"],
1223         "english" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1224         "esperanto" : ["%H:%M:%S %Z", "%H:%M:%S"],
1225         "estonian" : ["%H:%M:%S %Z", "%H:%M"],
1226         "farsi" : ["%H:%M:%S (%Z)", "%H:%M"],
1227         "finnish" : ["%H.%M.%S %Z", "%H.%M"],
1228         "french" : ["%H:%M:%S %Z", "%H:%M"],
1229         "friulan" : ["%H:%M:%S %Z", "%H:%M"],
1230         "galician" : ["%H:%M:%S %Z", "%H:%M"],
1231         "georgian" : ["%H:%M:%S %Z", "%H:%M"],
1232         "german" : ["%H:%M:%S %Z", "%H:%M"],
1233         "german-ch" : ["%H:%M:%S %Z", "%H:%M"],
1234         "german-ch-old" : ["%H:%M:%S %Z", "%H:%M"],
1235         "greek" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1236         "hebrew" : ["%H:%M:%S %Z", "%H:%M"],
1237         "hindi" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1238         "icelandic" : ["%H:%M:%S %Z", "%H:%M"],
1239         "interlingua" : ["%H:%M:%S %Z", "%H:%M"],
1240         "irish" : ["%H:%M:%S %Z", "%H:%M"],
1241         "italian" : ["%H:%M:%S %Z", "%H:%M"],
1242         "japanese" : ["%H時%M分%S秒 %Z", "%H:%M"],
1243         "japanese-cjk" : ["%H時%M分%S秒 %Z", "%H:%M"],
1244         "kannada" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1245         "kazakh" : ["%H:%M:%S %Z", "%H:%M"],
1246         "khmer" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1247         "korean" : ["%p %I시%M분 %S초 %Z", "%p %I:%M"],
1248         "kurmanji" : ["%H:%M:%S %Z", "%H:%M:%S"],
1249         "lao" : ["%H ໂມງ%M ນາທີ  %S ວິນາທີ %Z", "%H:%M"],
1250         "latin" : ["%H:%M:%S %Z", "%H:%M:%S"],
1251         "latvian" : ["%H:%M:%S %Z", "%H:%M"],
1252         "lithuanian" : ["%H:%M:%S %Z", "%H:%M"],
1253         "lowersorbian" : ["%H:%M:%S %Z", "%H:%M"],
1254         "macedonian" : ["%H:%M:%S %Z", "%H:%M"],
1255         "magyar" : ["%H:%M:%S %Z", "%H:%M"],
1256         "malayalam" : ["%p %I:%M:%S %Z", "%p %I:%M"],
1257         "marathi" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1258         "mongolian" : ["%H:%M:%S %Z", "%H:%M"],
1259         "naustrian" : ["%H:%M:%S %Z", "%H:%M"],
1260         "newzealand" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1261         "ngerman" : ["%H:%M:%S %Z", "%H:%M"],
1262         "norsk" : ["%H:%M:%S %Z", "%H:%M"],
1263         "nynorsk" : ["kl. %H:%M:%S %Z", "%H:%M"],
1264         "occitan" : ["%H:%M:%S %Z", "%H:%M"],
1265         "piedmontese" : ["%H:%M:%S %Z", "%H:%M:%S"],
1266         "polish" : ["%H:%M:%S %Z", "%H:%M"],
1267         "polutonikogreek" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1268         "portuguese" : ["%H:%M:%S %Z", "%H:%M"],
1269         "romanian" : ["%H:%M:%S %Z", "%H:%M"],
1270         "romansh" : ["%H:%M:%S %Z", "%H:%M"],
1271         "russian" : ["%H:%M:%S %Z", "%H:%M"],
1272         "samin" : ["%H:%M:%S %Z", "%H:%M"],
1273         "sanskrit" : ["%H:%M:%S %Z", "%H:%M"],
1274         "scottish" : ["%H:%M:%S %Z", "%H:%M"],
1275         "serbian" : ["%H:%M:%S %Z", "%H:%M"],
1276         "serbian-latin" : ["%H:%M:%S %Z", "%H:%M"],
1277         "slovak" : ["%H:%M:%S %Z", "%H:%M"],
1278         "slovene" : ["%H:%M:%S %Z", "%H:%M"],
1279         "spanish" : ["%H:%M:%S (%Z)", "%H:%M"],
1280         "spanish-mexico" : ["%H:%M:%S %Z", "%H:%M"],
1281         "swedish" : ["kl. %H:%M:%S %Z", "%H:%M"],
1282         "syriac" : ["%H:%M:%S %Z", "%H:%M"],
1283         "tamil" : ["%p %I:%M:%S %Z", "%p %I:%M"],
1284         "telugu" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1285         "thai" : ["%H นาฬิกา %M นาที  %S วินาที %Z", "%H:%M"],
1286         "tibetan" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1287         "turkish" : ["%H:%M:%S %Z", "%H:%M"],
1288         "turkmen" : ["%H:%M:%S %Z", "%H:%M"],
1289         "ukrainian" : ["%H:%M:%S %Z", "%H:%M"],
1290         "uppersorbian" : ["%H:%M:%S %Z", "%H:%M hodź."],
1291         "urdu" : ["%I:%M:%S %p %Z", "%I:%M %p"],
1292         "vietnamese" : ["%H:%M:%S %Z", "%H:%M"],
1293         "welsh" : ["%H:%M:%S %Z", "%H:%M"]
1294     }
1295
1296     types = ["time", "fixtime", "modtime" ]
1297     i = 0
1298     i = find_token(document.header, "\\language", 0)
1299     if i == -1:
1300         # this should not happen
1301         document.warning("Malformed LyX document! No \\language header found!")
1302         return
1303     lang = get_value(document.header, "\\language", i)
1304
1305     i = 0
1306     while True:
1307         i = find_token(document.body, "\\begin_inset Info", i+1)
1308         if i == -1:
1309             return
1310         j = find_end_of_inset(document.body, i+1)
1311         if j == -1:
1312             document.warning("Malformed LyX document: Could not find end of Info inset.")
1313             continue
1314         tp = find_token(document.body, 'type', i, j)
1315         tpv = get_quoted_value(document.body, "type", tp)
1316         if tpv not in types:
1317             continue
1318         arg = find_token(document.body, 'arg', i, j)
1319         argv = get_quoted_value(document.body, "arg", arg)
1320         isotime = ""
1321         dtme = datetime.now()
1322         tme = dtme.time()
1323         if tpv == "fixtime":
1324             timecomps = argv.split('@')
1325             if len(timecomps) > 1:
1326                 argv = timecomps[0]
1327                 isotime = timecomps[1]
1328                 m = re.search('(\d\d):(\d\d):(\d\d)', isotime)
1329                 if m:
1330                     tme = time(int(m.group(1)), int(m.group(2)), int(m.group(3)))
1331                 else:
1332                     m = re.search('(\d\d):(\d\d)', isotime)
1333                     if m:
1334                         tme = time(int(m.group(1)), int(m.group(2)))
1335 # FIXME if we had the path to the original document (not the one in the tmp dir),
1336 #        we could use the mtime.
1337 #        elif tpv == "moddate":
1338 #            dte = date.fromtimestamp(os.path.getmtime(document.dir))
1339         result = ""
1340         if argv == "ISO":
1341             result = tme.isoformat()
1342         elif argv == "long":
1343             result = tme.strftime(timeformats[lang][0])
1344         elif argv == "short":
1345             result = tme.strftime(timeformats[lang][1])
1346         else:
1347             fmt = argv.replace("HH", "%H").replace("H", "%H").replace("hh", "%I").replace("h", "%I")
1348             fmt = fmt.replace("mm", "%M").replace("m", "%M").replace("ss", "%S").replace("s", "%S")
1349             fmt = fmt.replace("zzz", "%f").replace("z", "%f").replace("t", "%Z")
1350             fmt = fmt.replace("AP", "%p").replace("ap", "%p").replace("A", "%p").replace("a", "%p")
1351             fmt = fmt.replace("'", "")
1352             result = dte.strftime(fmt)
1353         document.body[i : j+1] = result
1354
1355
1356 def revert_namenoextinfo(document):
1357     " Merge buffer Info inset type name-noext to name. "
1358
1359     i = 0
1360     while True:
1361         i = find_token(document.body, "\\begin_inset Info", i+1)
1362         if i == -1:
1363             return
1364         j = find_end_of_inset(document.body, i+1)
1365         if j == -1:
1366             document.warning("Malformed LyX document: Could not find end of Info inset.")
1367             continue
1368         tp = find_token(document.body, 'type', i, j)
1369         tpv = get_quoted_value(document.body, "type", tp)
1370         if tpv != "buffer":
1371             continue
1372         arg = find_token(document.body, 'arg', i, j)
1373         argv = get_quoted_value(document.body, "arg", arg)
1374         if argv != "name-noext":
1375             continue
1376         document.body[arg] = "arg \"name\""
1377
1378
1379 def revert_l7ninfo(document):
1380     " Revert l7n Info inset to text. "
1381
1382     i = 0
1383     while True:
1384         i = find_token(document.body, "\\begin_inset Info", i+1)
1385         if i == -1:
1386             return
1387         j = find_end_of_inset(document.body, i+1)
1388         if j == -1:
1389             document.warning("Malformed LyX document: Could not find end of Info inset.")
1390             continue
1391         tp = find_token(document.body, 'type', i, j)
1392         tpv = get_quoted_value(document.body, "type", tp)
1393         if tpv != "l7n":
1394             continue
1395         arg = find_token(document.body, 'arg', i, j)
1396         argv = get_quoted_value(document.body, "arg", arg)
1397         # remove trailing colons, menu accelerator (|...) and qt accelerator (&), while keeping literal " & "
1398         argv = argv.rstrip(':').split('|')[0].replace(" & ", "</amp;>").replace("&", "").replace("</amp;>", " & ")
1399         document.body[i : j+1] = argv
1400
1401
1402 def revert_listpargs(document):
1403     " Reverts listpreamble arguments to TeX-code "
1404     i = 0
1405     while True:
1406         i = find_token(document.body, "\\begin_inset Argument listpreamble:", i+1)
1407         if i == -1:
1408             return
1409         j = find_end_of_inset(document.body, i)
1410         # Find containing paragraph layout
1411         parent = get_containing_layout(document.body, i)
1412         if parent == False:
1413             document.warning("Malformed LyX document: Can't find parent paragraph layout")
1414             continue
1415         parbeg = parent[3]
1416         beginPlain = find_token(document.body, "\\begin_layout Plain Layout", i)
1417         endPlain = find_end_of_layout(document.body, beginPlain)
1418         content = document.body[beginPlain + 1 : endPlain]
1419         del document.body[i:j+1]
1420         subst = ["\\begin_inset ERT", "status collapsed", "", "\\begin_layout Plain Layout",
1421                  "{"] + content + ["}", "\\end_layout", "", "\\end_inset", ""]
1422         document.body[parbeg : parbeg] = subst
1423
1424
1425 def revert_lformatinfo(document):
1426     " Revert layout format Info inset to text. "
1427
1428     i = 0
1429     while True:
1430         i = find_token(document.body, "\\begin_inset Info", i+1)
1431         if i == -1:
1432             return
1433         j = find_end_of_inset(document.body, i+1)
1434         if j == -1:
1435             document.warning("Malformed LyX document: Could not find end of Info inset.")
1436             continue
1437         tp = find_token(document.body, 'type', i, j)
1438         tpv = get_quoted_value(document.body, "type", tp)
1439         if tpv != "lyxinfo":
1440             continue
1441         arg = find_token(document.body, 'arg', i, j)
1442         argv = get_quoted_value(document.body, "arg", arg)
1443         if argv != "layoutformat":
1444             continue
1445         # hardcoded for now
1446         document.body[i : j+1] = "69"
1447
1448
1449 def convert_hebrew_parentheses(document):
1450     """ Swap opening/closing parentheses in Hebrew text.
1451
1452     Up to LyX 2.4, "(" was used as closing parenthesis and
1453     ")" as opening parenthesis for Hebrew in the LyX source.
1454     """
1455     # print("convert hebrew parentheses")
1456     current_languages = [document.language]
1457     for i, line in enumerate(document.body):
1458         if line.startswith('\\lang '):
1459             current_languages[-1] = line.lstrip('\\lang ')
1460         elif line.startswith('\\begin_layout'):
1461             current_languages.append(current_languages[-1])
1462             # print (line, current_languages[-1])
1463         elif line.startswith('\\end_layout'):
1464             current_languages.pop()
1465         elif current_languages[-1] == 'hebrew' and not line.startswith('\\'):
1466             document.body[i] = line.replace('(','\x00').replace(')','(').replace('\x00',')')
1467
1468
1469 def revert_hebrew_parentheses(document):
1470     " Store parentheses in Hebrew text reversed"
1471     # This only exists to keep the convert/revert naming convention
1472     convert_hebrew_parentheses(document)
1473
1474
1475 def revert_malayalam(document):
1476     " Set the document language to English but assure Malayalam output "
1477
1478     revert_language(document, "malayalam", "", "malayalam")
1479
1480
1481 def revert_soul(document):
1482     " Revert soul module flex insets to ERT "
1483
1484     flexes = ["Spaceletters", "Strikethrough", "Underline", "Highlight", "Capitalize"]
1485
1486     for flex in flexes:
1487         i = find_token(document.body, "\\begin_inset Flex %s" % flex, 0)
1488         if i != -1:
1489             add_to_preamble(document, ["\\usepackage{soul}"])
1490             break
1491     i = find_token(document.body, "\\begin_inset Flex Highlight", 0)
1492     if i != -1:
1493         add_to_preamble(document, ["\\usepackage{color}"])
1494
1495     revert_flex_inset(document.body, "Spaceletters", "\\so")
1496     revert_flex_inset(document.body, "Strikethrough", "\\st")
1497     revert_flex_inset(document.body, "Underline", "\\ul")
1498     revert_flex_inset(document.body, "Highlight", "\\hl")
1499     revert_flex_inset(document.body, "Capitalize", "\\caps")
1500
1501
1502 def revert_tablestyle(document):
1503     " Remove tablestyle params "
1504
1505     i = 0
1506     i = find_token(document.header, "\\tablestyle")
1507     if i != -1:
1508         del document.header[i]
1509
1510
1511 def revert_bibfileencodings(document):
1512     " Revert individual Biblatex bibliography encodings "
1513
1514     # Get cite engine
1515     engine = "basic"
1516     i = find_token(document.header, "\\cite_engine", 0)
1517     if i == -1:
1518         document.warning("Malformed document! Missing \\cite_engine")
1519     else:
1520         engine = get_value(document.header, "\\cite_engine", i)
1521
1522     # Check if biblatex
1523     biblatex = False
1524     if engine in ["biblatex", "biblatex-natbib"]:
1525         biblatex = True
1526
1527     # Map lyx to latex encoding names
1528     encodings = {
1529         "utf8" : "utf8",
1530         "utf8x" : "utf8x",
1531         "armscii8" : "armscii8",
1532         "iso8859-1" : "latin1",
1533         "iso8859-2" : "latin2",
1534         "iso8859-3" : "latin3",
1535         "iso8859-4" : "latin4",
1536         "iso8859-5" : "iso88595",
1537         "iso8859-6" : "8859-6",
1538         "iso8859-7" : "iso-8859-7",
1539         "iso8859-8" : "8859-8",
1540         "iso8859-9" : "latin5",
1541         "iso8859-13" : "latin7",
1542         "iso8859-15" : "latin9",
1543         "iso8859-16" : "latin10",
1544         "applemac" : "applemac",
1545         "cp437" : "cp437",
1546         "cp437de" : "cp437de",
1547         "cp850" : "cp850",
1548         "cp852" : "cp852",
1549         "cp855" : "cp855",
1550         "cp858" : "cp858",
1551         "cp862" : "cp862",
1552         "cp865" : "cp865",
1553         "cp866" : "cp866",
1554         "cp1250" : "cp1250",
1555         "cp1251" : "cp1251",
1556         "cp1252" : "cp1252",
1557         "cp1255" : "cp1255",
1558         "cp1256" : "cp1256",
1559         "cp1257" : "cp1257",
1560         "koi8-r" : "koi8-r",
1561         "koi8-u" : "koi8-u",
1562         "pt154" : "pt154",
1563         "utf8-platex" : "utf8",
1564         "ascii" : "ascii"
1565     }
1566
1567     i = 0
1568     bibresources = []
1569     while (True):
1570         i = find_token(document.body, "\\begin_inset CommandInset bibtex", i+1)
1571         if i == -1:
1572             break
1573         j = find_end_of_inset(document.body, i)
1574         if j == -1:
1575             document.warning("Can't find end of bibtex inset at line %d!!" %(i))
1576             continue
1577         encodings = get_quoted_value(document.body, "file_encodings", i, j)
1578         if not encodings:
1579             i = j
1580             continue
1581         bibfiles = get_quoted_value(document.body, "bibfiles", i, j).split(",")
1582         opts = get_quoted_value(document.body, "biblatexopts", i, j)
1583         if len(bibfiles) == 0:
1584             document.warning("Bibtex inset at line %d does not have a bibfile!" %(i))
1585         # remove encoding line
1586         k = find_token(document.body, "file_encodings", i, j)
1587         if k != -1:
1588             del document.body[k]
1589         # Re-find inset end line
1590         j = find_end_of_inset(document.body, i)
1591         if biblatex:
1592             enclist = encodings.split("\t")
1593             encmap = dict()
1594             for pp in enclist:
1595                 ppp = pp.split(" ", 1)
1596                 encmap[ppp[0]] = ppp[1]
1597             for bib in bibfiles:
1598                 pr = "\\addbibresource"
1599                 if bib in encmap.keys():
1600                     pr += "[bibencoding=" + encmap[bib] + "]"
1601                 pr += "{" + bib + "}"
1602                 add_to_preamble(document, [pr])
1603             # Insert ERT \\printbibliography and wrap bibtex inset to a Note
1604             pcmd = "printbibliography"
1605             if opts:
1606                 pcmd += "[" + opts + "]"
1607             repl = ["\\begin_inset ERT", "status open", "", "\\begin_layout Plain Layout",\
1608                     "", "", "\\backslash", pcmd, "\\end_layout", "", "\\end_inset", "", "",\
1609                     "\\end_layout", "", "\\begin_layout Standard", "\\begin_inset Note Note",\
1610                     "status open", "", "\\begin_layout Plain Layout" ]
1611             repl += document.body[i:j+1]
1612             repl += ["", "\\end_layout", "", "\\end_inset", "", ""]
1613             document.body[i:j+1] = repl
1614             j += 27
1615
1616         i = j
1617
1618
1619 def revert_cmidruletrimming(document):
1620     " Remove \\cmidrule trimming "
1621
1622     # FIXME: Revert to TeX code?
1623     i = 0
1624     while True:
1625         # first, let's find out if we need to do anything
1626         i = find_token(document.body, '<cell ', i+1)
1627         if i == -1:
1628             return
1629         j = document.body[i].find('trim="')
1630         if j == -1:
1631              continue
1632         rgx = re.compile(r' (bottom|top)line[lr]trim="true"')
1633         # remove trim option
1634         document.body[i] = rgx.sub('', document.body[i])
1635
1636
1637 ruby_inset_def = [
1638     r'### Inserted by lyx2lyx (ruby inset) ###',
1639     r'InsetLayout Flex:Ruby',
1640     r'  LyxType       charstyle',
1641     r'  LatexType     command',
1642     r'  LatexName     ruby',
1643     r'  HTMLTag       ruby',
1644     r'  HTMLAttr      ""',
1645     r'  HTMLInnerTag  rb',
1646     r'  HTMLInnerAttr ""',
1647     r'  BgColor       none',
1648     r'  LabelString   "Ruby"',
1649     r'  Decoration    Conglomerate',
1650     r'  Preamble',
1651     r'    \ifdefined\kanjiskip',
1652     r'      \IfFileExists{okumacro.sty}{\usepackage{okumacro}}{}',
1653     r'    \else \ifdefined\luatexversion',
1654     r'      \usepackage{luatexja-ruby}',
1655     r'    \else \ifdefined\XeTeXversion',
1656     r'      \usepackage{ruby}%',
1657     r'    \fi\fi\fi',
1658     r'    \providecommand{\ruby}[2]{\shortstack{\tiny #2\\#1}}',
1659     r'  EndPreamble',
1660     r'  Argument  post:1',
1661     r'    LabelString  "ruby text"',
1662     r'    MenuString  "Ruby Text|R"',
1663     r'    Tooltip    "Reading aid (ruby, furigana) for Chinese characters."',
1664     r'    Decoration  Conglomerate',
1665     r'    Font',
1666     r'      Size    tiny',
1667     r'    EndFont',
1668     r'    LabelFont',
1669     r'      Size    tiny',
1670     r'    EndFont',
1671     r'    Mandatory  1',
1672     r'  EndArgument',
1673     r'End',
1674 ]
1675
1676 def convert_ruby_module(document):
1677     " Use ruby module instead of local module definition "
1678     if document.del_local_layout(ruby_inset_def):
1679         document.add_module("ruby")
1680
1681 def revert_ruby_module(document):
1682     " Replace ruby module with local module definition "
1683     if document.del_module("ruby"):
1684         document.append_local_layout(ruby_inset_def)
1685
1686
1687 def convert_utf8_japanese(document):
1688     " Use generic utf8 with Japanese documents."
1689     lang = get_value(document.header, "\\language")
1690     if not lang.startswith("japanese"):
1691         return
1692     inputenc = get_value(document.header, "\\inputencoding")
1693     if ((lang == "japanese" and inputenc == "utf8-platex")
1694         or (lang == "japanese-cjk" and inputenc == "utf8-cjk")):
1695         document.set_parameter("inputencoding", "utf8")
1696
1697 def revert_utf8_japanese(document):
1698     " Use Japanese utf8 variants with Japanese documents."
1699     inputenc = get_value(document.header, "\\inputencoding")
1700     if inputenc != "utf8":
1701         return
1702     lang = get_value(document.header, "\\language")
1703     if lang == "japanese":
1704         document.set_parameter("inputencoding", "utf8-platex")
1705     if lang == "japanese-cjk":
1706         document.set_parameter("inputencoding", "utf8-cjk")
1707
1708
1709 def revert_lineno(document):
1710     " Replace lineno setting with user-preamble code."
1711
1712     options = get_quoted_value(document.header, "\\lineno_options",
1713                                delete=True)
1714     if not get_bool_value(document.header, "\\use_lineno", delete=True):
1715         return
1716     if options:
1717         options = "[" + options + "]"
1718     add_to_preamble(document, ["\\usepackage%s{lineno}" % options,
1719                                "\\linenumbers"])
1720
1721 def convert_lineno(document):
1722     " Replace user-preamble code with native lineno support."
1723     use_lineno = 0
1724     options = ""
1725     i = find_token(document.preamble, "\\linenumbers", 1)
1726     if i > -1:
1727         usepkg = re.match(r"\\usepackage(.*){lineno}", document.preamble[i-1])
1728         if usepkg:
1729             use_lineno = 1
1730             options = usepkg.group(1).strip("[]")
1731             del(document.preamble[i-1:i+1])
1732             del_token(document.preamble, "% Added by lyx2lyx", i-2, i-1)
1733
1734     k = find_token(document.header, "\\index ")
1735     if options == "":
1736         document.header[k:k] = ["\\use_lineno %d" % use_lineno]
1737     else:
1738         document.header[k:k] = ["\\use_lineno %d" % use_lineno,
1739                                 "\\lineno_options %s" % options]
1740
1741
1742 def revert_new_languages(document):
1743     """Emulate support for Azerbaijani, Bengali, Church Slavonic, Korean,
1744     and Russian (Petrine orthography)."""
1745
1746     #                lyxname:          (babelname, polyglossianame)
1747     new_languages = {"azerbaijani":    ("azerbaijani", ""),
1748                      "bengali":        ("", "bengali"),
1749                      "churchslavonic": ("", "churchslavonic"),
1750                      "oldrussian":     ("", "russian"),
1751                      "korean":         ("", "korean"),
1752                     }
1753     used_languages = set()
1754     if document.language in new_languages:
1755         used_languages.add(document.language)
1756     i = 0
1757     while True:
1758         i = find_token(document.body, "\\lang", i+1)
1759         if i == -1:
1760             break
1761         if document.body[i][6:].strip() in new_languages:
1762             used_languages.add(document.language)
1763
1764     # Korean is already supported via CJK, so leave as-is for Babel
1765     if ("korean" in used_languages
1766         and get_bool_value(document.header, "\\use_non_tex_fonts")
1767         and get_value(document.header, "\\language_package") in ("default", "auto")):
1768         revert_language(document, "korean", "", "korean")
1769     used_languages.discard("korean")
1770
1771     for lang in used_languages:
1772         revert(lang, *new_languages[lang])
1773
1774
1775 gloss_inset_def = [
1776     r'### Inserted by lyx2lyx (deprecated ling glosses) ###',
1777     r'InsetLayout Flex:Glosse',
1778     r'  LyXType               custom',
1779     r'  LabelString           "Gloss (old version)"',
1780     r'  MenuString            "Gloss (old version)"',
1781     r'  LatexType             environment',
1782     r'  LatexName             linggloss',
1783     r'  Decoration            minimalistic',
1784     r'  LabelFont',
1785     r'    Size                Small',
1786     r'  EndFont',
1787     r'  MultiPar              true',
1788     r'  CustomPars            false',
1789     r'  ForcePlain            true',
1790     r'  ParbreakIsNewline     true',
1791     r'  FreeSpacing           true',
1792     r'  Requires              covington',
1793     r'  Preamble',
1794     r'          \def\glosstr{}',
1795     r'          \@ifundefined{linggloss}{%',
1796     r'          \newenvironment{linggloss}[2][]{',
1797     r'             \def\glosstr{\glt #1}%',
1798     r'             \gll #2}',
1799     r'          {\glosstr\glend}}{}',
1800     r'  EndPreamble',
1801     r'  InToc                 true',
1802     r'  ResetsFont            true',
1803     r'  Argument 1',
1804     r'          Decoration    conglomerate',
1805     r'          LabelString   "Translation"',
1806     r'          MenuString    "Glosse Translation|s"',
1807     r'          Tooltip       "Add a translation for the glosse"',
1808     r'  EndArgument',
1809     r'End'
1810 ]
1811
1812 glosss_inset_def = [
1813     r'### Inserted by lyx2lyx (deprecated ling glosses) ###',
1814     r'InsetLayout Flex:Tri-Glosse',
1815     r'  LyXType               custom',
1816     r'  LabelString           "Tri-Gloss (old version)"',
1817     r'  MenuString            "Tri-Gloss (old version)"',
1818     r'  LatexType             environment',
1819     r'  LatexName             lingglosss',
1820     r'  Decoration            minimalistic',
1821     r'  LabelFont',
1822     r'    Size                Small',
1823     r'  EndFont',
1824     r'  MultiPar              true',
1825     r'  CustomPars            false',
1826     r'  ForcePlain            true',
1827     r'  ParbreakIsNewline     true',
1828     r'  FreeSpacing           true',
1829     r'  InToc                 true',
1830     r'  Requires              covington',
1831     r'  Preamble',
1832     r'          \def\glosstr{}',
1833     r'          \@ifundefined{lingglosss}{%',
1834     r'          \newenvironment{lingglosss}[2][]{',
1835     r'              \def\glosstr{\glt #1}%',
1836     r'              \glll #2}',
1837     r'          {\glosstr\glend}}{}',
1838     r'  EndPreamble',
1839     r'  ResetsFont            true',
1840     r'  Argument 1',
1841     r'          Decoration    conglomerate',
1842     r'          LabelString   "Translation"',
1843     r'          MenuString    "Glosse Translation|s"',
1844     r'          Tooltip       "Add a translation for the glosse"',
1845     r'  EndArgument',
1846     r'End'
1847 ]
1848
1849 def convert_linggloss(document):
1850     " Move old ling glosses to local layout "
1851     if find_token(document.body, '\\begin_inset Flex Glosse', 0) != -1:
1852         document.append_local_layout(gloss_inset_def)
1853     if find_token(document.body, '\\begin_inset Flex Tri-Glosse', 0) != -1:
1854         document.append_local_layout(glosss_inset_def)
1855
1856 def revert_linggloss(document):
1857     " Revert to old ling gloss definitions "
1858     if not "linguistics" in document.get_module_list():
1859         return
1860     document.del_local_layout(gloss_inset_def)
1861     document.del_local_layout(glosss_inset_def)
1862
1863     cov_req = False
1864     glosses = ["\\begin_inset Flex Interlinear Gloss (2 Lines)", "\\begin_inset Flex Interlinear Gloss (3 Lines)"]
1865     for glosse in glosses:
1866         i = 0
1867         while True:
1868             i = find_token(document.body, glosse, i+1)
1869             if i == -1:
1870                 break
1871             j = find_end_of_inset(document.body, i)
1872             if j == -1:
1873                 document.warning("Malformed LyX document: Can't find end of Gloss inset")
1874                 continue
1875
1876             arg = find_token(document.body, "\\begin_inset Argument 1", i, j)
1877             endarg = find_end_of_inset(document.body, arg)
1878             optargcontent = []
1879             if arg != -1:
1880                 argbeginPlain = find_token(document.body, "\\begin_layout Plain Layout", arg, endarg)
1881                 if argbeginPlain == -1:
1882                     document.warning("Malformed LyX document: Can't find optarg plain Layout")
1883                     continue
1884                 argendPlain = find_end_of_inset(document.body, argbeginPlain)
1885                 optargcontent = document.body[argbeginPlain + 1 : argendPlain - 2]
1886
1887                 # remove Arg insets and paragraph, if it only contains this inset
1888                 if document.body[arg - 1] == "\\begin_layout Plain Layout" and find_end_of_layout(document.body, arg - 1) == endarg + 3:
1889                     del document.body[arg - 1 : endarg + 4]
1890                 else:
1891                     del document.body[arg : endarg + 1]
1892
1893             arg = find_token(document.body, "\\begin_inset Argument post:1", i, j)
1894             endarg = find_end_of_inset(document.body, arg)
1895             marg1content = []
1896             if arg != -1:
1897                 argbeginPlain = find_token(document.body, "\\begin_layout Plain Layout", arg, endarg)
1898                 if argbeginPlain == -1:
1899                     document.warning("Malformed LyX document: Can't find arg 1 plain Layout")
1900                     continue
1901                 argendPlain = find_end_of_inset(document.body, argbeginPlain)
1902                 marg1content = document.body[argbeginPlain + 1 : argendPlain - 2]
1903
1904                 # remove Arg insets and paragraph, if it only contains this inset
1905                 if document.body[arg - 1] == "\\begin_layout Plain Layout" and find_end_of_layout(document.body, arg - 1) == endarg + 3:
1906                     del document.body[arg - 1 : endarg + 4]
1907                 else:
1908                     del document.body[arg : endarg + 1]
1909
1910             arg = find_token(document.body, "\\begin_inset Argument post:2", i, j)
1911             endarg = find_end_of_inset(document.body, arg)
1912             marg2content = []
1913             if arg != -1:
1914                 argbeginPlain = find_token(document.body, "\\begin_layout Plain Layout", arg, endarg)
1915                 if argbeginPlain == -1:
1916                     document.warning("Malformed LyX document: Can't find arg 2 plain Layout")
1917                     continue
1918                 argendPlain = find_end_of_inset(document.body, argbeginPlain)
1919                 marg2content = document.body[argbeginPlain + 1 : argendPlain - 2]
1920
1921                 # remove Arg insets and paragraph, if it only contains this inset
1922                 if document.body[arg - 1] == "\\begin_layout Plain Layout" and find_end_of_layout(document.body, arg - 1) == endarg + 3:
1923                     del document.body[arg - 1 : endarg + 4]
1924                 else:
1925                     del document.body[arg : endarg + 1]
1926
1927             arg = find_token(document.body, "\\begin_inset Argument post:3", i, j)
1928             endarg = find_end_of_inset(document.body, arg)
1929             marg3content = []
1930             if arg != -1:
1931                 argbeginPlain = find_token(document.body, "\\begin_layout Plain Layout", arg, endarg)
1932                 if argbeginPlain == -1:
1933                     document.warning("Malformed LyX document: Can't find arg 3 plain Layout")
1934                     continue
1935                 argendPlain = find_end_of_inset(document.body, argbeginPlain)
1936                 marg3content = document.body[argbeginPlain + 1 : argendPlain - 2]
1937
1938                 # remove Arg insets and paragraph, if it only contains this inset
1939                 if document.body[arg - 1] == "\\begin_layout Plain Layout" and find_end_of_layout(document.body, arg - 1) == endarg + 3:
1940                     del document.body[arg - 1 : endarg + 4]
1941                 else:
1942                     del document.body[arg : endarg + 1]
1943
1944             cmd = "\\digloss"
1945             if glosse == "\\begin_inset Flex Interlinear Gloss (3 Lines)":
1946                 cmd = "\\trigloss"
1947
1948             beginPlain = find_token(document.body, "\\begin_layout Plain Layout", i)
1949             endInset = find_end_of_inset(document.body, i)
1950             endPlain = find_token_backwards(document.body, "\\end_layout", endInset)
1951             precontent = put_cmd_in_ert(cmd)
1952             if len(optargcontent) > 0:
1953                 precontent += put_cmd_in_ert("[") + optargcontent + put_cmd_in_ert("]")
1954             precontent += put_cmd_in_ert("{")
1955
1956             postcontent = put_cmd_in_ert("}{") + marg1content + put_cmd_in_ert("}{") + marg2content
1957             if cmd == "\\trigloss":
1958                 postcontent += put_cmd_in_ert("}{") + marg3content
1959             postcontent += put_cmd_in_ert("}")
1960
1961             document.body[endPlain:endInset + 1] = postcontent
1962             document.body[beginPlain + 1:beginPlain] = precontent
1963             del document.body[i : beginPlain + 1]
1964             if not cov_req:
1965                 document.append_local_layout("Requires covington")
1966                 cov_req = True
1967             i = beginPlain
1968
1969
1970 def revert_subexarg(document):
1971     " Revert linguistic subexamples with argument to ERT "
1972
1973     if not "linguistics" in document.get_module_list():
1974         return
1975
1976     cov_req = False
1977     i = 0
1978     while True:
1979         i = find_token(document.body, "\\begin_layout Subexample", i+1)
1980         if i == -1:
1981             break
1982         j = find_end_of_layout(document.body, i)
1983         if j == -1:
1984             document.warning("Malformed LyX document: Can't find end of Subexample layout")
1985             continue
1986         while True:
1987             # check for consecutive layouts
1988             k = find_token(document.body, "\\begin_layout", j)
1989             if k == -1 or document.body[k] != "\\begin_layout Subexample":
1990                 break
1991             j = find_end_of_layout(document.body, k)
1992             if j == -1:
1993                  document.warning("Malformed LyX document: Can't find end of Subexample layout")
1994                  continue
1995
1996         arg = find_token(document.body, "\\begin_inset Argument 1", i, j)
1997         if arg == -1:
1998             continue
1999
2000         endarg = find_end_of_inset(document.body, arg)
2001         optargcontent = ""
2002         argbeginPlain = find_token(document.body, "\\begin_layout Plain Layout", arg, endarg)
2003         if argbeginPlain == -1:
2004             document.warning("Malformed LyX document: Can't find optarg plain Layout")
2005             continue
2006         argendPlain = find_end_of_inset(document.body, argbeginPlain)
2007         optargcontent = lyx2latex(document, document.body[argbeginPlain + 1 : argendPlain - 2])
2008
2009         # remove Arg insets and paragraph, if it only contains this inset
2010         if document.body[arg - 1] == "\\begin_layout Plain Layout" and find_end_of_layout(document.body, arg - 1) == endarg + 3:
2011             del document.body[arg - 1 : endarg + 4]
2012         else:
2013             del document.body[arg : endarg + 1]
2014
2015         cmd = put_cmd_in_ert("\\begin{subexamples}[" + optargcontent + "]")
2016
2017         # re-find end of layout
2018         j = find_end_of_layout(document.body, i)
2019         if j == -1:
2020             document.warning("Malformed LyX document: Can't find end of Subexample layout")
2021             continue
2022         while True:
2023             # check for consecutive layouts
2024             k = find_token(document.body, "\\begin_layout", j)
2025             if k == -1 or document.body[k] != "\\begin_layout Subexample":
2026                 break
2027             document.body[k : k + 1] = ["\\begin_layout Standard"] + put_cmd_in_ert("\\item ")
2028             j = find_end_of_layout(document.body, k)
2029             if j == -1:
2030                  document.warning("Malformed LyX document: Can't find end of Subexample layout")
2031                  continue
2032
2033         endev = put_cmd_in_ert("\\end{subexamples}")
2034
2035         document.body[j : j] = ["\\end_layout", "", "\\begin_layout Standard"] + endev
2036         document.body[i : i + 1] = ["\\begin_layout Standard"] + cmd \
2037                 + ["\\end_layout", "", "\\begin_layout Standard"] + put_cmd_in_ert("\\item ")
2038         if not cov_req:
2039             document.append_local_layout("Requires covington")
2040             cov_req = True
2041
2042
2043 def revert_drs(document):
2044     " Revert DRS insets (linguistics) to ERT "
2045
2046     if not "linguistics" in document.get_module_list():
2047         return
2048
2049     cov_req = False
2050     drses = ["\\begin_inset Flex DRS", "\\begin_inset Flex DRS*",
2051              "\\begin_inset Flex IfThen-DRS", "\\begin_inset Flex Cond-DRS",
2052              "\\begin_inset Flex QDRS", "\\begin_inset Flex NegDRS",
2053              "\\begin_inset Flex SDRS"]
2054     for drs in drses:
2055         i = 0
2056         while True:
2057             i = find_token(document.body, drs, i+1)
2058             if i == -1:
2059                 break
2060             j = find_end_of_inset(document.body, i)
2061             if j == -1:
2062                 document.warning("Malformed LyX document: Can't find end of DRS inset")
2063                 continue
2064
2065             # Check for arguments
2066             arg = find_token(document.body, "\\begin_inset Argument 1", i, j)
2067             endarg = find_end_of_inset(document.body, arg)
2068             prearg1content = []
2069             if arg != -1:
2070                 argbeginPlain = find_token(document.body, "\\begin_layout Plain Layout", arg, endarg)
2071                 if argbeginPlain == -1:
2072                     document.warning("Malformed LyX document: Can't find Argument 1 plain Layout")
2073                     continue
2074                 argendPlain = find_end_of_inset(document.body, argbeginPlain)
2075                 prearg1content = document.body[argbeginPlain + 1 : argendPlain - 2]
2076
2077                 # remove Arg insets and paragraph, if it only contains this inset
2078                 if document.body[arg - 1] == "\\begin_layout Plain Layout" and find_end_of_layout(document.body, arg - 1) == endarg + 3:
2079                     del document.body[arg - 1 : endarg + 4]
2080                 else:
2081                     del document.body[arg : endarg + 1]
2082
2083             # re-find inset end
2084             j = find_end_of_inset(document.body, i)
2085             if j == -1:
2086                 document.warning("Malformed LyX document: Can't find end of DRS inset")
2087                 continue
2088
2089             arg = find_token(document.body, "\\begin_inset Argument 2", i, j)
2090             endarg = find_end_of_inset(document.body, arg)
2091             prearg2content = []
2092             if arg != -1:
2093                 argbeginPlain = find_token(document.body, "\\begin_layout Plain Layout", arg, endarg)
2094                 if argbeginPlain == -1:
2095                     document.warning("Malformed LyX document: Can't find Argument 2 plain Layout")
2096                     continue
2097                 argendPlain = find_end_of_inset(document.body, argbeginPlain)
2098                 prearg2content = document.body[argbeginPlain + 1 : argendPlain - 2]
2099
2100                 # remove Arg insets and paragraph, if it only contains this inset
2101                 if document.body[arg - 1] == "\\begin_layout Plain Layout" and find_end_of_layout(document.body, arg - 1) == endarg + 3:
2102                     del document.body[arg - 1 : endarg + 4]
2103                 else:
2104                     del document.body[arg : endarg + 1]
2105
2106             # re-find inset end
2107             j = find_end_of_inset(document.body, i)
2108             if j == -1:
2109                 document.warning("Malformed LyX document: Can't find end of DRS inset")
2110                 continue
2111
2112             arg = find_token(document.body, "\\begin_inset Argument post:1", i, j)
2113             endarg = find_end_of_inset(document.body, arg)
2114             postarg1content = []
2115             if arg != -1:
2116                 argbeginPlain = find_token(document.body, "\\begin_layout Plain Layout", arg, endarg)
2117                 if argbeginPlain == -1:
2118                     document.warning("Malformed LyX document: Can't find Argument post:1 plain Layout")
2119                     continue
2120                 argendPlain = find_end_of_inset(document.body, argbeginPlain)
2121                 postarg1content = document.body[argbeginPlain + 1 : argendPlain - 2]
2122
2123                 # remove Arg insets and paragraph, if it only contains this inset
2124                 if document.body[arg - 1] == "\\begin_layout Plain Layout" and find_end_of_layout(document.body, arg - 1) == endarg + 3:
2125                     del document.body[arg - 1 : endarg + 4]
2126                 else:
2127                     del document.body[arg : endarg + 1]
2128
2129             # re-find inset end
2130             j = find_end_of_inset(document.body, i)
2131             if j == -1:
2132                 document.warning("Malformed LyX document: Can't find end of DRS inset")
2133                 continue
2134
2135             arg = find_token(document.body, "\\begin_inset Argument post:2", i, j)
2136             endarg = find_end_of_inset(document.body, arg)
2137             postarg2content = []
2138             if arg != -1:
2139                 argbeginPlain = find_token(document.body, "\\begin_layout Plain Layout", arg, endarg)
2140                 if argbeginPlain == -1:
2141                     document.warning("Malformed LyX document: Can't find Argument post:2 plain Layout")
2142                     continue
2143                 argendPlain = find_end_of_inset(document.body, argbeginPlain)
2144                 postarg2content = document.body[argbeginPlain + 1 : argendPlain - 2]
2145
2146                 # remove Arg insets and paragraph, if it only contains this inset
2147                 if document.body[arg - 1] == "\\begin_layout Plain Layout" and find_end_of_layout(document.body, arg - 1) == endarg + 3:
2148                     del document.body[arg - 1 : endarg + 4]
2149                 else:
2150                     del document.body[arg : endarg + 1]
2151
2152             # re-find inset end
2153             j = find_end_of_inset(document.body, i)
2154             if j == -1:
2155                 document.warning("Malformed LyX document: Can't find end of DRS inset")
2156                 continue
2157
2158             arg = find_token(document.body, "\\begin_inset Argument post:3", i, j)
2159             endarg = find_end_of_inset(document.body, arg)
2160             postarg3content = []
2161             if arg != -1:
2162                 argbeginPlain = find_token(document.body, "\\begin_layout Plain Layout", arg, endarg)
2163                 if argbeginPlain == -1:
2164                     document.warning("Malformed LyX document: Can't find Argument post:3 plain Layout")
2165                     continue
2166                 argendPlain = find_end_of_inset(document.body, argbeginPlain)
2167                 postarg3content = document.body[argbeginPlain + 1 : argendPlain - 2]
2168
2169                 # remove Arg insets and paragraph, if it only contains this inset
2170                 if document.body[arg - 1] == "\\begin_layout Plain Layout" and find_end_of_layout(document.body, arg - 1) == endarg + 3:
2171                     del document.body[arg - 1 : endarg + 4]
2172                 else:
2173                     del document.body[arg : endarg + 1]
2174
2175             # re-find inset end
2176             j = find_end_of_inset(document.body, i)
2177             if j == -1:
2178                 document.warning("Malformed LyX document: Can't find end of DRS inset")
2179                 continue
2180
2181             arg = find_token(document.body, "\\begin_inset Argument post:4", i, j)
2182             endarg = find_end_of_inset(document.body, arg)
2183             postarg4content = []
2184             if arg != -1:
2185                 argbeginPlain = find_token(document.body, "\\begin_layout Plain Layout", arg, endarg)
2186                 if argbeginPlain == -1:
2187                     document.warning("Malformed LyX document: Can't find Argument post:4 plain Layout")
2188                     continue
2189                 argendPlain = find_end_of_inset(document.body, argbeginPlain)
2190                 postarg4content = document.body[argbeginPlain + 1 : argendPlain - 2]
2191
2192                 # remove Arg insets and paragraph, if it only contains this inset
2193                 if document.body[arg - 1] == "\\begin_layout Plain Layout" and find_end_of_layout(document.body, arg - 1) == endarg + 3:
2194                     del document.body[arg - 1 : endarg + 4]
2195                 else:
2196                     del document.body[arg : endarg + 1]
2197
2198             # The respective LaTeX command
2199             cmd = "\\drs"
2200             if drs == "\\begin_inset Flex DRS*":
2201                 cmd = "\\drs*"
2202             elif drs == "\\begin_inset Flex IfThen-DRS":
2203                 cmd = "\\ifdrs"
2204             elif drs == "\\begin_inset Flex Cond-DRS":
2205                 cmd = "\\condrs"
2206             elif drs == "\\begin_inset Flex QDRS":
2207                 cmd = "\\qdrs"
2208             elif drs == "\\begin_inset Flex NegDRS":
2209                 cmd = "\\negdrs"
2210             elif drs == "\\begin_inset Flex SDRS":
2211                 cmd = "\\sdrs"
2212
2213             beginPlain = find_token(document.body, "\\begin_layout Plain Layout", i)
2214             endInset = find_end_of_inset(document.body, i)
2215             endPlain = find_token_backwards(document.body, "\\end_layout", endInset)
2216             precontent = put_cmd_in_ert(cmd)
2217             precontent += put_cmd_in_ert("{") + prearg1content + put_cmd_in_ert("}")
2218             if drs == "\\begin_inset Flex SDRS":
2219                 precontent += put_cmd_in_ert("{") + prearg2content + put_cmd_in_ert("}")
2220             precontent += put_cmd_in_ert("{")
2221
2222             postcontent = []
2223             if cmd == "\\qdrs" or cmd == "\\condrs" or cmd == "\\ifdrs":
2224                 postcontent = put_cmd_in_ert("}{") + postarg1content + put_cmd_in_ert("}{") + postarg2content + put_cmd_in_ert("}")
2225                 if cmd == "\\condrs" or cmd == "\\qdrs":
2226                     postcontent += put_cmd_in_ert("{") + postarg3content + put_cmd_in_ert("}")
2227                 if cmd == "\\qdrs":
2228                     postcontent += put_cmd_in_ert("{") + postarg4content + put_cmd_in_ert("}")
2229             else:
2230                 postcontent = put_cmd_in_ert("}")
2231
2232             document.body[endPlain:endInset + 1] = postcontent
2233             document.body[beginPlain + 1:beginPlain] = precontent
2234             del document.body[i : beginPlain + 1]
2235             if not cov_req:
2236                 document.append_local_layout("Provides covington 1")
2237                 add_to_preamble(document, ["\\usepackage{drs,covington}"])
2238                 cov_req = True
2239             i = beginPlain
2240
2241
2242
2243 def revert_babelfont(document):
2244     " Reverts the use of \\babelfont to user preamble "
2245
2246     i = find_token(document.header, '\\use_non_tex_fonts', 0)
2247     if i == -1:
2248         document.warning("Malformed LyX document: Missing \\use_non_tex_fonts.")
2249         return
2250     if not str2bool(get_value(document.header, "\\use_non_tex_fonts", i)):
2251         return
2252     i = find_token(document.header, '\\language_package', 0)
2253     if i == -1:
2254         document.warning("Malformed LyX document: Missing \\language_package.")
2255         return
2256     if get_value(document.header, "\\language_package", 0) != "babel":
2257         return
2258
2259     # check font settings
2260     # defaults
2261     roman = sans = typew = "default"
2262     osf = False
2263     sf_scale = tt_scale = 100.0
2264
2265     j = find_token(document.header, "\\font_roman", 0)
2266     if j == -1:
2267         document.warning("Malformed LyX document: Missing \\font_roman.")
2268     else:
2269         # We need to use this regex since split() does not handle quote protection
2270         romanfont = re.findall(r'[^"\s]\S*|".+?"', document.header[j])
2271         roman = romanfont[2].strip('"')
2272         romanfont[2] = '"default"'
2273         document.header[j] = " ".join(romanfont)
2274
2275     j = find_token(document.header, "\\font_sans", 0)
2276     if j == -1:
2277         document.warning("Malformed LyX document: Missing \\font_sans.")
2278     else:
2279         # We need to use this regex since split() does not handle quote protection
2280         sansfont = re.findall(r'[^"\s]\S*|".+?"', document.header[j])
2281         sans = sansfont[2].strip('"')
2282         sansfont[2] = '"default"'
2283         document.header[j] = " ".join(sansfont)
2284
2285     j = find_token(document.header, "\\font_typewriter", 0)
2286     if j == -1:
2287         document.warning("Malformed LyX document: Missing \\font_typewriter.")
2288     else:
2289         # We need to use this regex since split() does not handle quote protection
2290         ttfont = re.findall(r'[^"\s]\S*|".+?"', document.header[j])
2291         typew = ttfont[2].strip('"')
2292         ttfont[2] = '"default"'
2293         document.header[j] = " ".join(ttfont)
2294
2295     i = find_token(document.header, "\\font_osf", 0)
2296     if i == -1:
2297         document.warning("Malformed LyX document: Missing \\font_osf.")
2298     else:
2299         osf = str2bool(get_value(document.header, "\\font_osf", i))
2300
2301     j = find_token(document.header, "\\font_sf_scale", 0)
2302     if j == -1:
2303         document.warning("Malformed LyX document: Missing \\font_sf_scale.")
2304     else:
2305         sfscale = document.header[j].split()
2306         val = sfscale[2]
2307         sfscale[2] = "100"
2308         document.header[j] = " ".join(sfscale)
2309         try:
2310             # float() can throw
2311             sf_scale = float(val)
2312         except:
2313             document.warning("Invalid font_sf_scale value: " + val)
2314
2315     j = find_token(document.header, "\\font_tt_scale", 0)
2316     if j == -1:
2317         document.warning("Malformed LyX document: Missing \\font_tt_scale.")
2318     else:
2319         ttscale = document.header[j].split()
2320         val = ttscale[2]
2321         ttscale[2] = "100"
2322         document.header[j] = " ".join(ttscale)
2323         try:
2324             # float() can throw
2325             tt_scale = float(val)
2326         except:
2327             document.warning("Invalid font_tt_scale value: " + val)
2328
2329     # set preamble stuff
2330     pretext = ['%% This document must be processed with xelatex or lualatex!']
2331     pretext.append('\\AtBeginDocument{%')
2332     if roman != "default":
2333         pretext.append('\\babelfont{rm}[Mapping=tex-text]{' + roman + '}')
2334     if sans != "default":
2335         sf = '\\babelfont{sf}['
2336         if sf_scale != 100.0:
2337             sf += 'Scale=' + str(sf_scale / 100.0) + ','
2338         sf += 'Mapping=tex-text]{' + sans + '}'
2339         pretext.append(sf)
2340     if typew != "default":
2341         tw = '\\babelfont{tt}'
2342         if tt_scale != 100.0:
2343             tw += '[Scale=' + str(tt_scale / 100.0) + ']'
2344         tw += '{' + typew + '}'
2345         pretext.append(tw)
2346     if osf:
2347         pretext.append('\\defaultfontfeatures{Numbers=OldStyle}')
2348     pretext.append('}')
2349     insert_to_preamble(document, pretext)
2350
2351
2352 def revert_minionpro(document):
2353     " Revert native MinionPro font definition (with extra options) to LaTeX "
2354
2355     i = find_token(document.header, '\\use_non_tex_fonts', 0)
2356     if i == -1:
2357         document.warning("Malformed LyX document: Missing \\use_non_tex_fonts.")
2358         return
2359     if str2bool(get_value(document.header, "\\use_non_tex_fonts", i)):
2360         return
2361
2362     regexp = re.compile(r'(\\font_roman_opts)')
2363     x = find_re(document.header, regexp, 0)
2364     if x == -1:
2365         return
2366
2367     # We need to use this regex since split() does not handle quote protection
2368     romanopts = re.findall(r'[^"\s]\S*|".+?"', document.header[x])
2369     opts = romanopts[1].strip('"')
2370
2371     i = find_token(document.header, "\\font_roman", 0)
2372     if i == -1:
2373         document.warning("Malformed LyX document: Missing \\font_roman.")
2374         return
2375     else:
2376         # We need to use this regex since split() does not handle quote protection
2377         romanfont = re.findall(r'[^"\s]\S*|".+?"', document.header[i])
2378         roman = romanfont[1].strip('"')
2379         if roman != "minionpro":
2380             return
2381         romanfont[1] = '"default"'
2382         document.header[i] = " ".join(romanfont)
2383         osf = False
2384         j = find_token(document.header, "\\font_osf true", 0)
2385         if j != -1:
2386             osf = True
2387         preamble = "\\usepackage["
2388         if osf:
2389             document.header[j] = "\\font_osf false"
2390         else:
2391             preamble += "lf,"
2392         preamble += opts
2393         preamble += "]{MinionPro}"
2394         add_to_preamble(document, [preamble])
2395         del document.header[x]
2396
2397
2398 def revert_font_opts(document):
2399     " revert font options by outputting \\setxxxfont or \\babelfont to the preamble "
2400
2401     i = find_token(document.header, '\\use_non_tex_fonts', 0)
2402     if i == -1:
2403         document.warning("Malformed LyX document: Missing \\use_non_tex_fonts.")
2404         return
2405     NonTeXFonts = str2bool(get_value(document.header, "\\use_non_tex_fonts", i))
2406     i = find_token(document.header, '\\language_package', 0)
2407     if i == -1:
2408         document.warning("Malformed LyX document: Missing \\language_package.")
2409         return
2410     Babel = (get_value(document.header, "\\language_package", 0) == "babel")
2411
2412     # 1. Roman
2413     regexp = re.compile(r'(\\font_roman_opts)')
2414     i = find_re(document.header, regexp, 0)
2415     if i != -1:
2416         # We need to use this regex since split() does not handle quote protection
2417         romanopts = re.findall(r'[^"\s]\S*|".+?"', document.header[i])
2418         opts = romanopts[1].strip('"')
2419         del document.header[i]
2420         if NonTeXFonts:
2421             regexp = re.compile(r'(\\font_roman)')
2422             i = find_re(document.header, regexp, 0)
2423             if i != -1:
2424                 # We need to use this regex since split() does not handle quote protection
2425                 romanfont = re.findall(r'[^"\s]\S*|".+?"', document.header[i])
2426                 font = romanfont[2].strip('"')
2427                 romanfont[2] = '"default"'
2428                 document.header[i] = " ".join(romanfont)
2429                 if font != "default":
2430                     if Babel:
2431                         preamble = "\\babelfont{rm}["
2432                     else:
2433                         preamble = "\\setmainfont["
2434                     preamble += opts
2435                     preamble += ","
2436                     preamble += "Mapping=tex-text]{"
2437                     preamble += font
2438                     preamble += "}"
2439                     add_to_preamble(document, [preamble])
2440
2441     # 2. Sans
2442     regexp = re.compile(r'(\\font_sans_opts)')
2443     i = find_re(document.header, regexp, 0)
2444     if i != -1:
2445         scaleval = 100
2446         # We need to use this regex since split() does not handle quote protection
2447         sfopts = re.findall(r'[^"\s]\S*|".+?"', document.header[i])
2448         opts = sfopts[1].strip('"')
2449         del document.header[i]
2450         if NonTeXFonts:
2451             regexp = re.compile(r'(\\font_sf_scale)')
2452             i = find_re(document.header, regexp, 0)
2453             if i != -1:
2454                 scaleval = get_value(document.header, "\\font_sf_scale" , i).split()[1]
2455             regexp = re.compile(r'(\\font_sans)')
2456             i = find_re(document.header, regexp, 0)
2457             if i != -1:
2458                 # We need to use this regex since split() does not handle quote protection
2459                 sffont = re.findall(r'[^"\s]\S*|".+?"', document.header[i])
2460                 font = sffont[2].strip('"')
2461                 sffont[2] = '"default"'
2462                 document.header[i] = " ".join(sffont)
2463                 if font != "default":
2464                     if Babel:
2465                         preamble = "\\babelfont{sf}["
2466                     else:
2467                         preamble = "\\setsansfont["
2468                     preamble += opts
2469                     preamble += ","
2470                     if scaleval != 100:
2471                         preamble += "Scale=0."
2472                         preamble += scaleval
2473                         preamble += ","
2474                     preamble += "Mapping=tex-text]{"
2475                     preamble += font
2476                     preamble += "}"
2477                     add_to_preamble(document, [preamble])
2478
2479     # 3. Typewriter
2480     regexp = re.compile(r'(\\font_typewriter_opts)')
2481     i = find_re(document.header, regexp, 0)
2482     if i != -1:
2483         scaleval = 100
2484         # We need to use this regex since split() does not handle quote protection
2485         ttopts = re.findall(r'[^"\s]\S*|".+?"', document.header[i])
2486         opts = ttopts[1].strip('"')
2487         del document.header[i]
2488         if NonTeXFonts:
2489             regexp = re.compile(r'(\\font_tt_scale)')
2490             i = find_re(document.header, regexp, 0)
2491             if i != -1:
2492                 scaleval = get_value(document.header, "\\font_tt_scale" , i).split()[1]
2493             regexp = re.compile(r'(\\font_typewriter)')
2494             i = find_re(document.header, regexp, 0)
2495             if i != -1:
2496                 # We need to use this regex since split() does not handle quote protection
2497                 ttfont = re.findall(r'[^"\s]\S*|".+?"', document.header[i])
2498                 font = ttfont[2].strip('"')
2499                 ttfont[2] = '"default"'
2500                 document.header[i] = " ".join(ttfont)
2501                 if font != "default":
2502                     if Babel:
2503                         preamble = "\\babelfont{tt}["
2504                     else:
2505                         preamble = "\\setmonofont["
2506                     preamble += opts
2507                     preamble += ","
2508                     if scaleval != 100:
2509                         preamble += "Scale=0."
2510                         preamble += scaleval
2511                         preamble += ","
2512                     preamble += "Mapping=tex-text]{"
2513                     preamble += font
2514                     preamble += "}"
2515                     add_to_preamble(document, [preamble])
2516
2517
2518 def revert_plainNotoFonts_xopts(document):
2519     " Revert native (straight) Noto font definition (with extra options) to LaTeX "
2520
2521     i = find_token(document.header, '\\use_non_tex_fonts', 0)
2522     if i == -1:
2523         document.warning("Malformed LyX document: Missing \\use_non_tex_fonts.")
2524         return
2525     if str2bool(get_value(document.header, "\\use_non_tex_fonts", i)):
2526         return
2527
2528     osf = False
2529     y = find_token(document.header, "\\font_osf true", 0)
2530     if y != -1:
2531         osf = True
2532
2533     regexp = re.compile(r'(\\font_roman_opts)')
2534     x = find_re(document.header, regexp, 0)
2535     if x == -1 and not osf:
2536         return
2537
2538     opts = ""
2539     if x != -1:
2540         # We need to use this regex since split() does not handle quote protection
2541         romanopts = re.findall(r'[^"\s]\S*|".+?"', document.header[x])
2542         opts = romanopts[1].strip('"')
2543     if osf:
2544         if opts != "":
2545             opts += ", "
2546         opts += "osf"
2547
2548     i = find_token(document.header, "\\font_roman", 0)
2549     if i == -1:
2550         return
2551
2552     # We need to use this regex since split() does not handle quote protection
2553     romanfont = re.findall(r'[^"\s]\S*|".+?"', document.header[i])
2554     roman = romanfont[1].strip('"')
2555     if roman != "NotoSerif-TLF":
2556         return
2557
2558     j = find_token(document.header, "\\font_sans", 0)
2559     if j == -1:
2560         return
2561
2562     # We need to use this regex since split() does not handle quote protection
2563     sffont = re.findall(r'[^"\s]\S*|".+?"', document.header[j])
2564     sf = sffont[1].strip('"')
2565     if sf != "default":
2566         return
2567
2568     j = find_token(document.header, "\\font_typewriter", 0)
2569     if j == -1:
2570         return
2571
2572     # We need to use this regex since split() does not handle quote protection
2573     ttfont = re.findall(r'[^"\s]\S*|".+?"', document.header[j])
2574     tt = ttfont[1].strip('"')
2575     if tt != "default":
2576         return
2577
2578     # So we have noto as "complete font"
2579     romanfont[1] = '"default"'
2580     document.header[i] = " ".join(romanfont)
2581
2582     preamble = "\\usepackage["
2583     preamble += opts
2584     preamble += "]{noto}"
2585     add_to_preamble(document, [preamble])
2586     if osf:
2587         document.header[y] = "\\font_osf false"
2588     if x != -1:
2589         del document.header[x]
2590
2591
2592 def revert_notoFonts_xopts(document):
2593     " Revert native (extended) Noto font definition (with extra options) to LaTeX "
2594
2595     i = find_token(document.header, '\\use_non_tex_fonts', 0)
2596     if i == -1:
2597         document.warning("Malformed LyX document: Missing \\use_non_tex_fonts.")
2598         return
2599     if str2bool(get_value(document.header, "\\use_non_tex_fonts", i)):
2600         return
2601
2602     fontmap = dict()
2603     fm = createFontMapping(['Noto'])
2604     if revert_fonts(document, fm, fontmap, True):
2605         add_preamble_fonts(document, fontmap)
2606
2607
2608 def revert_IBMFonts_xopts(document):
2609     " Revert native IBM font definition (with extra options) to LaTeX "
2610
2611     i = find_token(document.header, '\\use_non_tex_fonts', 0)
2612     if i == -1:
2613         document.warning("Malformed LyX document: Missing \\use_non_tex_fonts.")
2614         return
2615     if str2bool(get_value(document.header, "\\use_non_tex_fonts", i)):
2616         return
2617
2618     fontmap = dict()
2619     fm = createFontMapping(['IBM'])
2620     ft = ""
2621     if revert_fonts(document, fm, fontmap, True):
2622         add_preamble_fonts(document, fontmap)
2623
2624
2625 def revert_AdobeFonts_xopts(document):
2626     " Revert native Adobe font definition (with extra options) to LaTeX "
2627
2628     i = find_token(document.header, '\\use_non_tex_fonts', 0)
2629     if i == -1:
2630         document.warning("Malformed LyX document: Missing \\use_non_tex_fonts.")
2631         return
2632     if str2bool(get_value(document.header, "\\use_non_tex_fonts", i)):
2633         return
2634
2635     fontmap = dict()
2636     fm = createFontMapping(['Adobe'])
2637     ft = ""
2638     if revert_fonts(document, fm, fontmap, True):
2639         add_preamble_fonts(document, fontmap)
2640
2641
2642 def convert_osf(document):
2643     " Convert \\font_osf param to new format "
2644
2645     NonTeXFonts = False
2646     i = find_token(document.header, '\\use_non_tex_fonts', 0)
2647     if i == -1:
2648         document.warning("Malformed LyX document: Missing \\use_non_tex_fonts.")
2649     else:
2650         NonTeXFonts = str2bool(get_value(document.header, "\\use_non_tex_fonts", i))
2651
2652     i = find_token(document.header, '\\font_osf', 0)
2653     if i == -1:
2654         document.warning("Malformed LyX document: Missing \\font_osf.")
2655         return
2656
2657     osfsf = ["biolinum", "ADOBESourceSansPro", "NotoSansRegular", "NotoSansMedium", "NotoSansThin", "NotoSansLight", "NotoSansExtralight" ]
2658     osftt = ["ADOBESourceCodePro", "NotoMonoRegular" ]
2659
2660     osfval = str2bool(get_value(document.header, "\\font_osf", i))
2661     document.header[i] = document.header[i].replace("\\font_osf", "\\font_roman_osf")
2662
2663     if NonTeXFonts:
2664         document.header.insert(i, "\\font_sans_osf false")
2665         document.header.insert(i + 1, "\\font_typewriter_osf false")
2666         return
2667
2668     if osfval:
2669         x = find_token(document.header, "\\font_sans", 0)
2670         if x == -1:
2671             document.warning("Malformed LyX document: Missing \\font_sans.")
2672         else:
2673             # We need to use this regex since split() does not handle quote protection
2674             sffont = re.findall(r'[^"\s]\S*|".+?"', document.header[x])
2675             sf = sffont[1].strip('"')
2676             if sf in osfsf:
2677                 document.header.insert(i, "\\font_sans_osf true")
2678             else:
2679                 document.header.insert(i, "\\font_sans_osf false")
2680
2681         x = find_token(document.header, "\\font_typewriter", 0)
2682         if x == -1:
2683             document.warning("Malformed LyX document: Missing \\font_typewriter.")
2684         else:
2685             # We need to use this regex since split() does not handle quote protection
2686             ttfont = re.findall(r'[^"\s]\S*|".+?"', document.header[x])
2687             tt = ttfont[1].strip('"')
2688             if tt in osftt:
2689                 document.header.insert(i + 1, "\\font_typewriter_osf true")
2690             else:
2691                 document.header.insert(i + 1, "\\font_typewriter_osf false")
2692
2693     else:
2694         document.header.insert(i, "\\font_sans_osf false")
2695         document.header.insert(i + 1, "\\font_typewriter_osf false")
2696
2697
2698 def revert_osf(document):
2699     " Revert \\font_*_osf params "
2700
2701     NonTeXFonts = False
2702     i = find_token(document.header, '\\use_non_tex_fonts', 0)
2703     if i == -1:
2704         document.warning("Malformed LyX document: Missing \\use_non_tex_fonts.")
2705     else:
2706         NonTeXFonts = str2bool(get_value(document.header, "\\use_non_tex_fonts", i))
2707
2708     i = find_token(document.header, '\\font_roman_osf', 0)
2709     if i == -1:
2710         document.warning("Malformed LyX document: Missing \\font_roman_osf.")
2711         return
2712
2713     osfval = str2bool(get_value(document.header, "\\font_roman_osf", i))
2714     document.header[i] = document.header[i].replace("\\font_roman_osf", "\\font_osf")
2715
2716     i = find_token(document.header, '\\font_sans_osf', 0)
2717     if i == -1:
2718         document.warning("Malformed LyX document: Missing \\font_sans_osf.")
2719         return
2720
2721     osfval = str2bool(get_value(document.header, "\\font_sans_osf", i))
2722     del document.header[i]
2723
2724     i = find_token(document.header, '\\font_typewriter_osf', 0)
2725     if i == -1:
2726         document.warning("Malformed LyX document: Missing \\font_typewriter_osf.")
2727         return
2728
2729     osfval |= str2bool(get_value(document.header, "\\font_typewriter_osf", i))
2730     del document.header[i]
2731
2732     if osfval:
2733         i = find_token(document.header, '\\font_osf', 0)
2734         if i == -1:
2735             document.warning("Malformed LyX document: Missing \\font_osf.")
2736             return
2737         document.header[i] = "\\font_osf true"
2738
2739
2740 def revert_texfontopts(document):
2741     " Revert native TeX font definitions (with extra options) to LaTeX "
2742
2743     i = find_token(document.header, '\\use_non_tex_fonts', 0)
2744     if i == -1:
2745         document.warning("Malformed LyX document: Missing \\use_non_tex_fonts.")
2746         return
2747     if str2bool(get_value(document.header, "\\use_non_tex_fonts", i)):
2748         return
2749
2750     rmfonts = ["ccfonts", "cochineal", "utopia", "garamondx", "libertine", "lmodern", "palatino", "times", "xcharter" ]
2751
2752     # First the sf (biolinum only)
2753     regexp = re.compile(r'(\\font_sans_opts)')
2754     x = find_re(document.header, regexp, 0)
2755     if x != -1:
2756         # We need to use this regex since split() does not handle quote protection
2757         sfopts = re.findall(r'[^"\s]\S*|".+?"', document.header[x])
2758         opts = sfopts[1].strip('"')
2759         i = find_token(document.header, "\\font_sans", 0)
2760         if i == -1:
2761             document.warning("Malformed LyX document: Missing \\font_sans.")
2762         else:
2763             # We need to use this regex since split() does not handle quote protection
2764             sffont = re.findall(r'[^"\s]\S*|".+?"', document.header[i])
2765             sans = sffont[1].strip('"')
2766             if sans == "biolinum":
2767                 sf_scale = 100.0
2768                 sffont[1] = '"default"'
2769                 document.header[i] = " ".join(sffont)
2770                 osf = False
2771                 j = find_token(document.header, "\\font_sans_osf true", 0)
2772                 if j != -1:
2773                     osf = True
2774                 k = find_token(document.header, "\\font_sf_scale", 0)
2775                 if k == -1:
2776                     document.warning("Malformed LyX document: Missing \\font_sf_scale.")
2777                 else:
2778                     sfscale = document.header[k].split()
2779                     val = sfscale[1]
2780                     sfscale[1] = "100"
2781                     document.header[k] = " ".join(sfscale)
2782                     try:
2783                         # float() can throw
2784                         sf_scale = float(val)
2785                     except:
2786                         document.warning("Invalid font_sf_scale value: " + val)
2787                 preamble = "\\usepackage["
2788                 if osf:
2789                     document.header[j] = "\\font_sans_osf false"
2790                     preamble += "osf,"
2791                 if sf_scale != 100.0:
2792                     preamble += 'scaled=' + str(sf_scale / 100.0) + ','
2793                 preamble += opts
2794                 preamble += "]{biolinum}"
2795                 add_to_preamble(document, [preamble])
2796                 del document.header[x]
2797
2798     regexp = re.compile(r'(\\font_roman_opts)')
2799     x = find_re(document.header, regexp, 0)
2800     if x == -1:
2801         return
2802
2803     # We need to use this regex since split() does not handle quote protection
2804     romanopts = re.findall(r'[^"\s]\S*|".+?"', document.header[x])
2805     opts = romanopts[1].strip('"')
2806
2807     i = find_token(document.header, "\\font_roman", 0)
2808     if i == -1:
2809         document.warning("Malformed LyX document: Missing \\font_roman.")
2810         return
2811     else:
2812         # We need to use this regex since split() does not handle quote protection
2813         romanfont = re.findall(r'[^"\s]\S*|".+?"', document.header[i])
2814         roman = romanfont[1].strip('"')
2815         if not roman in rmfonts:
2816             return
2817         romanfont[1] = '"default"'
2818         document.header[i] = " ".join(romanfont)
2819         package = roman
2820         if roman == "utopia":
2821             package = "fourier"
2822         elif roman == "palatino":
2823             package = "mathpazo"
2824         elif roman == "times":
2825             package = "mathptmx"
2826         elif roman == "xcharter":
2827             package = "XCharter"
2828         osf = ""
2829         j = find_token(document.header, "\\font_roman_osf true", 0)
2830         if j != -1:
2831             if roman == "cochineal":
2832                 osf = "proportional,osf,"
2833             elif roman == "utopia":
2834                 osf = "oldstyle,"
2835             elif roman == "garamondx":
2836                 osf = "osfI,"
2837             elif roman == "libertine":
2838                 osf = "osf,"
2839             elif roman == "palatino":
2840                 osf = "osf,"
2841             elif roman == "xcharter":
2842                 osf = "osf,"
2843             document.header[j] = "\\font_roman_osf false"
2844         k = find_token(document.header, "\\font_sc true", 0)
2845         if k != -1:
2846             if roman == "utopia":
2847                 osf += "expert,"
2848             if roman == "palatino" and osf == "":
2849                 osf = "sc,"
2850             document.header[k] = "\\font_sc false"
2851         preamble = "\\usepackage["
2852         preamble += osf
2853         preamble += opts
2854         preamble += "]{" + package + "}"
2855         add_to_preamble(document, [preamble])
2856         del document.header[x]
2857
2858
2859 def convert_CantarellFont(document):
2860     " Handle Cantarell font definition to LaTeX "
2861
2862     if find_token(document.header, "\\use_non_tex_fonts false", 0) != -1:
2863         fm = createFontMapping(['Cantarell'])
2864         convert_fonts(document, fm, "oldstyle")
2865
2866 def revert_CantarellFont(document):
2867     " Revert native Cantarell font definition to LaTeX "
2868
2869     if find_token(document.header, "\\use_non_tex_fonts false", 0) != -1:
2870         fontmap = dict()
2871         fm = createFontMapping(['Cantarell'])
2872         if revert_fonts(document, fm, fontmap, False, True):
2873             add_preamble_fonts(document, fontmap)
2874
2875
2876 ##
2877 # Conversion hub
2878 #
2879
2880 supported_versions = ["2.4.0", "2.4"]
2881 convert = [
2882            [545, [convert_lst_literalparam]],
2883            [546, []],
2884            [547, []],
2885            [548, []],
2886            [549, []],
2887            [550, [convert_fontenc]],
2888            [551, []],
2889            [552, []],
2890            [553, []],
2891            [554, []],
2892            [555, []],
2893            [556, []],
2894            [557, [convert_vcsinfo]],
2895            [558, [removeFrontMatterStyles]],
2896            [559, []],
2897            [560, []],
2898            [561, [convert_latexFonts]], # Handle dejavu, ibmplex fonts in GUI
2899            [562, []],
2900            [563, []],
2901            [564, []],
2902            [565, [convert_AdobeFonts]], # Handle adobe fonts in GUI
2903            [566, [convert_hebrew_parentheses]],
2904            [567, []],
2905            [568, []],
2906            [569, []],
2907            [570, []],
2908            [571, []],
2909            [572, [convert_notoFonts]],  # Added options thin, light, extralight for Noto
2910            [573, [convert_inputencoding_namechange]],
2911            [574, [convert_ruby_module, convert_utf8_japanese]],
2912            [575, [convert_lineno]],
2913            [576, []],
2914            [577, [convert_linggloss]],
2915            [578, []],
2916            [579, []],
2917            [580, []],
2918            [581, [convert_osf]],
2919            [582, [convert_CantarellFont]],
2920           ]
2921
2922 revert =  [[581, [revert_CantarellFont]],
2923            [580, [revert_texfontopts,revert_osf]],
2924            [579, [revert_minionpro, revert_plainNotoFonts_xopts, revert_notoFonts_xopts, revert_IBMFonts_xopts, revert_AdobeFonts_xopts, revert_font_opts]], # keep revert_font_opts last!
2925            [578, [revert_babelfont]],
2926            [577, [revert_drs]],
2927            [576, [revert_linggloss, revert_subexarg]],
2928            [575, [revert_new_languages]],
2929            [574, [revert_lineno]],
2930            [573, [revert_ruby_module, revert_utf8_japanese]],
2931            [572, [revert_inputencoding_namechange]],
2932            [571, [revert_notoFonts]],
2933            [570, [revert_cmidruletrimming]],
2934            [569, [revert_bibfileencodings]],
2935            [568, [revert_tablestyle]],
2936            [567, [revert_soul]],
2937            [566, [revert_malayalam]],
2938            [565, [revert_hebrew_parentheses]],
2939            [564, [revert_AdobeFonts]],
2940            [563, [revert_lformatinfo]],
2941            [562, [revert_listpargs]],
2942            [561, [revert_l7ninfo]],
2943            [560, [revert_latexFonts]], # Handle dejavu, ibmplex fonts in user preamble
2944            [559, [revert_timeinfo, revert_namenoextinfo]],
2945            [558, [revert_dateinfo]],
2946            [557, [addFrontMatterStyles]],
2947            [556, [revert_vcsinfo]],
2948            [555, [revert_bibencoding]],
2949            [554, [revert_vcolumns]],
2950            [553, [revert_stretchcolumn]],
2951            [552, [revert_tuftecite]],
2952            [551, [revert_floatpclass, revert_floatalignment]],
2953            [550, [revert_nospellcheck]],
2954            [549, [revert_fontenc]],
2955            [548, []],# dummy format change
2956            [547, [revert_lscape]],
2957            [546, [revert_xcharter]],
2958            [545, [revert_paratype]],
2959            [544, [revert_lst_literalparam]]
2960           ]
2961
2962
2963 if __name__ == "__main__":
2964     pass