lib/lyx2lyx/lyx2lyx_tools.py

   1 # This file is part of lyx2lyx
   2 # Copyright (C) 2011 The LyX team
   3 #
   4 # This program is free software; you can redistribute it and/or
   5 # modify it under the terms of the GNU General Public License
   6 # as published by the Free Software Foundation; either version 2
   7 # of the License, or (at your option) any later version.
   8 #
   9 # This program is distributed in the hope that it will be useful,
  10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 # GNU General Public License for more details.
  13 #
  14 # You should have received a copy of the GNU General Public License
  15 # along with this program; if not, write to the Free Software
  16 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
  17
  18 """
  19 This module offers several free functions to help with lyx2lyx'ing.
  20 More documentaton is below, but here is a quick guide to what
  21 they do. Optional arguments are marked by brackets.
  22
  23 add_to_preamble(document, text):
  24   Here, text can be either a single line or a list of lines. It
  25   is bad practice to pass something with embedded newlines, but
  26   we will handle that properly.
  27   The routine checks to see whether the provided material is
  28   already in the preamble. If not, it adds it.
  29   Prepends a comment "% Added by lyx2lyx" to text.
  30
  31 insert_to_preamble(document, text[, index]):
  32   Here, text can be either a single line or a list of lines. It
  33   is bad practice to pass something with embedded newlines, but
  34   we will handle that properly.
  35   The routine inserts text at document.preamble[index], where by
  36   default index is 0, so the material is inserted at the beginning.
  37   Prepends a comment "% Added by lyx2lyx" to text.
  38
  39 put_cmd_in_ert(cmd):
  40   Here cmd should be a list of strings (lines), which we want to
  41   wrap in ERT. Returns a list of strings so wrapped.
  42   A call to this routine will often go something like this:
  43     i = find_token('\\begin_inset FunkyInset', ...)
  44     j = find_end_of_inset(document.body, i)
  45     content = lyx2latex(document[i:j + 1])
  46     ert = put_cmd_in_ert(content)
  47     document.body[i:j+1] = ert
  48
  49 get_ert(lines, i[, verbatim]):
  50   Here, lines is a list of lines of LyX material containing an ERT inset,
  51   whose content we want to convert to LaTeX. The ERT starts at index i.
  52   If the optional (by default: False) bool verbatim is True, the content
  53   of the ERT is returned verbatim, that is in LyX syntax (not LaTeX syntax)
  54   for the use in verbatim insets.
  55
  56 lyx2latex(document, lines):
  57   Here, lines is a list of lines of LyX material we want to convert
  58   to LaTeX. We do the best we can and return a string containing
  59   the translated material.
  60
  61 lyx2verbatim(document, lines):
  62   Here, lines is a list of lines of LyX material we want to convert
  63   to verbatim material (used in ERT an the like). We do the best we
  64   can and return a string containing the translated material.
  65
  66 latex_length(slen):
  67   Convert lengths (in LyX form) to their LaTeX representation. Returns
  68   (bool, length), where the bool tells us if it was a percentage, and
  69   the length is the LaTeX representation.
  70
  71 convert_info_insets(document, type, func):
  72   Applies func to the argument of all info insets matching certain types
  73   type : the type to match. This can be a regular expression.
  74   func : function from string to string to apply to the "arg" field of
  75          the info insets.
  76
  77 is_document_option(document, option):
  78   Find if _option_ is a document option (\\options in the header).
  79
  80 insert_document_option(document, option):
  81   Insert _option_ as a document option.
  82
  83 remove_document_option(document, option):
  84   Remove _option_ as a document option.
  85
  86 revert_language(document, lyxname, babelname="", polyglossianame=""):
  87   Reverts native language support to ERT
  88   If babelname or polyglossianame is empty, it is assumed
  89   this language package is not supported for the given language.
  90 """
  91
  92 import re
  93 import sys
  94 from parser_tools import (
  95     find_token,
  96     find_end_of_inset,
  97     get_containing_layout,
  98     get_containing_inset,
  99     get_value,
 100     get_bool_value,
 101 )
 102 from unicode_symbols import unicode_reps
 103
 104
 105 # This will accept either a list of lines or a single line.
 106 # It is bad practice to pass something with embedded newlines,
 107 # though we will handle that.
 108 def add_to_preamble(document, text):
 109     "Add text to the preamble if it is not already there."
 110
 111     if not type(text) is list:
 112         # split on \n just in case
 113         # it'll give us the one element list we want
 114         # if there's no \n, too
 115         text = text.split("\n")
 116
 117     i = 0
 118     prelen = len(document.preamble)
 119     while True:
 120         i = find_token(document.preamble, text[0], i)
 121         if i == -1:
 122             break
 123         # we need a perfect match
 124         matched = True
 125         for line in text:
 126             if i >= prelen or line != document.preamble[i]:
 127                 matched = False
 128                 break
 129             i += 1
 130         if matched:
 131             return
 132
 133     document.preamble.extend(["% Added by lyx2lyx"])
 134     document.preamble.extend(text)
 135
 136
 137 # Note that text can be either a list of lines or a single line.
 138 # It should really be a list.
 139 def insert_to_preamble(document, text, index=0):
 140     """Insert text to the preamble at a given line"""
 141
 142     if not type(text) is list:
 143         # split on \n just in case
 144         # it'll give us the one element list we want
 145         # if there's no \n, too
 146         text = text.split("\n")
 147
 148     text.insert(0, "% Added by lyx2lyx")
 149     document.preamble[index:index] = text
 150
 151
 152 # A dictionary of Unicode->LICR mappings for use in a Unicode string's translate() method
 153 # Created from the reversed list to keep the first of alternative definitions.
 154 licr_table = {ord(ch): cmd for cmd, ch in unicode_reps[::-1]}
 155
 156
 157 def put_cmd_in_ert(cmd, is_open=False, as_paragraph=False):
 158     """
 159     Return ERT inset wrapping `cmd` as a list of strings.
 160
 161     `cmd` can be a string or list of lines. Non-ASCII characters are converted
 162     to the respective LICR macros if defined in unicodesymbols,
 163     `is_open` is a boolean setting the inset status to "open",
 164     `as_paragraph` wraps the ERT inset in a Standard paragraph.
 165     """
 166
 167     status = {False: "collapsed", True: "open"}
 168     ert_inset = [
 169         "\\begin_inset ERT",
 170         "status %s" % status[is_open],
 171         "",
 172         "\\begin_layout Plain Layout",
 173         "",
 174         # content here ([5:5])
 175         "\\end_layout",
 176         "",
 177         "\\end_inset",
 178     ]
 179
 180     paragraph = [
 181         "\\begin_layout Standard",
 182         # content here ([1:1])
 183         "",
 184         "",
 185         "\\end_layout",
 186         "",
 187     ]
 188     # ensure cmd is an unicode instance and make it "LyX safe".
 189     if isinstance(cmd, list):
 190         cmd = "\n".join(cmd)
 191     cmd = cmd.translate(licr_table)
 192     cmd = cmd.replace("\\", "\n\\backslash\n")
 193
 194     ert_inset[5:5] = cmd.splitlines()
 195     if not as_paragraph:
 196         return ert_inset
 197     paragraph[1:1] = ert_inset
 198     return paragraph
 199
 200
 201 def get_ert(lines, i, verbatim=False):
 202     "Convert an ERT inset into LaTeX."
 203     if not lines[i].startswith("\\begin_inset ERT"):
 204         return ""
 205     j = find_end_of_inset(lines, i)
 206     if j == -1:
 207         return ""
 208     while i < j and not lines[i].startswith("status"):
 209         i = i + 1
 210     i = i + 1
 211     ret = ""
 212     first = True
 213     while i < j:
 214         if lines[i] == "\\begin_layout Plain Layout":
 215             if first:
 216                 first = False
 217             else:
 218                 ret = ret + "\n"
 219             while i + 1 < j and lines[i + 1] == "":
 220                 i = i + 1
 221         elif lines[i] == "\\end_layout":
 222             while i + 1 < j and lines[i + 1] == "":
 223                 i = i + 1
 224         elif lines[i] == "\\backslash":
 225             if verbatim:
 226                 ret = ret + "\n" + lines[i] + "\n"
 227             else:
 228                 ret = ret + "\\"
 229         else:
 230             ret = ret + lines[i]
 231         i = i + 1
 232     return ret
 233
 234
 235 def lyx2latex(document, lines):
 236     "Convert some LyX stuff into corresponding LaTeX stuff, as best we can."
 237
 238     content = ""
 239     ert_end = 0
 240     note_end = 0
 241     hspace = ""
 242
 243     for curline in range(len(lines)):
 244         line = lines[curline]
 245         if line.startswith("\\begin_inset Note Note"):
 246             # We want to skip LyX notes, so remember where the inset ends
 247             note_end = find_end_of_inset(lines, curline + 1)
 248             continue
 249         elif note_end >= curline:
 250             # Skip LyX notes
 251             continue
 252         elif line.startswith("\\begin_inset ERT"):
 253             # We don't want to replace things inside ERT, so figure out
 254             # where the end of the inset is.
 255             ert_end = find_end_of_inset(lines, curline + 1)
 256             continue
 257         elif line.startswith("\\begin_inset Formula"):
 258             line = line[20:]
 259         elif line.startswith("\\begin_inset Quotes"):
 260             # For now, we do a very basic reversion. Someone who understands
 261             # quotes is welcome to fix it up.
 262             qtype = line[20:].strip()
 263             # lang = qtype[0]
 264             side = qtype[1]
 265             dbls = qtype[2]
 266             if side == "l":
 267                 if dbls == "d":
 268                     line = "``"
 269                 else:
 270                     line = "`"
 271             else:
 272                 if dbls == "d":
 273                     line = "''"
 274                 else:
 275                     line = "'"
 276         elif line.startswith("\\begin_inset Newline newline"):
 277             line = "\\\\ "
 278         elif line.startswith("\\noindent"):
 279             line = "\\noindent "  # we need the space behind the command
 280         elif line.startswith("\\begin_inset space"):
 281             line = line[18:].strip()
 282             if line.startswith("\\hspace"):
 283                 # Account for both \hspace and \hspace*
 284                 hspace = line[:-2]
 285                 continue
 286             elif line == "\\space{}":
 287                 line = "\\ "
 288             elif line == "\\thinspace{}":
 289                 line = "\\,"
 290         elif hspace != "":
 291             # The LyX length is in line[8:], after the \length keyword
 292             length = latex_length(line[8:])[1]
 293             line = hspace + "{" + length + "}"
 294             hspace = ""
 295         elif (
 296             line.isspace()
 297             or line.startswith("\\begin_layout")
 298             or line.startswith("\\end_layout")
 299             or line.startswith("\\begin_inset")
 300             or line.startswith("\\end_inset")
 301             or line.startswith("\\lang")
 302             or line.strip() == "status collapsed"
 303             or line.strip() == "status open"
 304         ):
 305             # skip all that stuff
 306             continue
 307
 308         # this needs to be added to the preamble because of cases like
 309         # \textmu, \textbackslash, etc.
 310         add_to_preamble(
 311             document,
 312             [
 313                 "% added by lyx2lyx for converted index entries",
 314                 "\\@ifundefined{textmu}",
 315                 " {\\usepackage{textcomp}}{}",
 316             ],
 317         )
 318         # a lossless reversion is not possible
 319         # try at least to handle some common insets and settings
 320         if ert_end >= curline:
 321             line = line.replace(r"\backslash", "\\")
 322         else:
 323             # No need to add "{}" after single-nonletter macros
 324             line = line.replace("&", "\\&")
 325             line = line.replace("#", "\\#")
 326             line = line.replace("^", "\\textasciicircum{}")
 327             line = line.replace("%", "\\%")
 328             line = line.replace("_", "\\_")
 329             line = line.replace("$", "\\$")
 330
 331             # Do the LyX text --> LaTeX conversion
 332             for rep in unicode_reps:
 333                 line = line.replace(rep[1], rep[0])
 334             line = line.replace(r"\backslash", r"\textbackslash{}")
 335             line = line.replace(r"\series bold", r"\bfseries{}").replace(
 336                 r"\series default", r"\mdseries{}"
 337             )
 338             line = line.replace(r"\shape italic", r"\itshape{}").replace(
 339                 r"\shape smallcaps", r"\scshape{}"
 340             )
 341             line = line.replace(r"\shape slanted", r"\slshape{}").replace(
 342                 r"\shape default", r"\upshape{}"
 343             )
 344             line = line.replace(r"\emph on", r"\em{}").replace(r"\emph default", r"\em{}")
 345             line = line.replace(r"\noun on", r"\scshape{}").replace(
 346                 r"\noun default", r"\upshape{}"
 347             )
 348             line = line.replace(r"\bar under", r"\underbar{").replace(r"\bar default", r"}")
 349             line = line.replace(r"\family sans", r"\sffamily{}").replace(
 350                 r"\family default", r"\normalfont{}"
 351             )
 352             line = line.replace(r"\family typewriter", r"\ttfamily{}").replace(
 353                 r"\family roman", r"\rmfamily{}"
 354             )
 355             line = line.replace(r"\InsetSpace ", r"").replace(r"\SpecialChar ", r"")
 356         content += line
 357     return content
 358
 359
 360 def lyx2verbatim(document, lines):
 361     "Convert some LyX stuff into corresponding verbatim stuff, as best we can."
 362
 363     content = lyx2latex(document, lines)
 364     content = re.sub(r"\\(?!backslash)", r"\n\\backslash\n", content)
 365
 366     return content
 367
 368
 369 def latex_length(slen):
 370     """
 371     Convert lengths to their LaTeX representation. Returns (bool, length),
 372     where the bool tells us if it was a percentage, and the length is the
 373     LaTeX representation.
 374     """
 375     i = 0
 376     percent = False
 377     # the slen has the form
 378     # ValueUnit+ValueUnit-ValueUnit or
 379     # ValueUnit+-ValueUnit
 380     # the + and - (glue lengths) are optional
 381     # the + always precedes the -
 382
 383     # Convert relative lengths to LaTeX units
 384     units = {
 385         "col%": "\\columnwidth",
 386         "text%": "\\textwidth",
 387         "page%": "\\paperwidth",
 388         "line%": "\\linewidth",
 389         "theight%": "\\textheight",
 390         "pheight%": "\\paperheight",
 391         "baselineskip%": "\\baselineskip",
 392     }
 393     for unit in list(units.keys()):
 394         i = slen.find(unit)
 395         if i == -1:
 396             continue
 397         percent = True
 398         minus = slen.rfind("-", 1, i)
 399         plus = slen.rfind("+", 0, i)
 400         latex_unit = units[unit]
 401         if plus == -1 and minus == -1:
 402             value = slen[:i]
 403             value = str(float(value) / 100)
 404             end = slen[i + len(unit) :]
 405             slen = value + latex_unit + end
 406         if plus > minus:
 407             value = slen[plus + 1 : i]
 408             value = str(float(value) / 100)
 409             begin = slen[: plus + 1]
 410             end = slen[i + len(unit) :]
 411             slen = begin + value + latex_unit + end
 412         if plus < minus:
 413             value = slen[minus + 1 : i]
 414             value = str(float(value) / 100)
 415             begin = slen[: minus + 1]
 416             slen = begin + value + latex_unit
 417
 418     # replace + and -, but only if the - is not the first character
 419     slen = slen[0] + slen[1:].replace("+", " plus ").replace("-", " minus ")
 420     # handle the case where "+-1mm" was used, because LaTeX only understands
 421     # "plus 1mm minus 1mm"
 422     if slen.find("plus  minus"):
 423         lastvaluepos = slen.rfind(" ")
 424         lastvalue = slen[lastvaluepos:]
 425         slen = slen.replace("  ", lastvalue + " ")
 426     return (percent, slen)
 427
 428
 429 def length_in_bp(length):
 430     "Convert a length in LyX format to its value in bp units"
 431
 432     em_width = 10.0 / 72.27  # assume 10pt font size
 433     text_width = 8.27 / 1.7  # assume A4 with default margins
 434     # scale factors are taken from Length::inInch()
 435     scales = {
 436         "bp": 1.0,
 437         "cc": (72.0 / (72.27 / (12.0 * 0.376 * 2.845))),
 438         "cm": (72.0 / 2.54),
 439         "dd": (72.0 / (72.27 / (0.376 * 2.845))),
 440         "em": (72.0 * em_width),
 441         "ex": (72.0 * em_width * 0.4305),
 442         "in": 72.0,
 443         "mm": (72.0 / 25.4),
 444         "mu": (72.0 * em_width / 18.0),
 445         "pc": (72.0 / (72.27 / 12.0)),
 446         "pt": (72.0 / (72.27)),
 447         "sp": (72.0 / (72.27 * 65536.0)),
 448         "text%": (72.0 * text_width / 100.0),
 449         "col%": (72.0 * text_width / 100.0),  # assume 1 column
 450         "page%": (72.0 * text_width * 1.7 / 100.0),
 451         "line%": (72.0 * text_width / 100.0),
 452         "theight%": (72.0 * text_width * 1.787 / 100.0),
 453         "pheight%": (72.0 * text_width * 2.2 / 100.0),
 454     }
 455
 456     rx = re.compile(r"^\s*([^a-zA-Z%]+)([a-zA-Z%]+)\s*$")
 457     m = rx.match(length)
 458     if not m:
 459         document.warning("Invalid length value: " + length + ".")
 460         return 0
 461     value = m.group(1)
 462     unit = m.group(2)
 463     if not unit in scales.keys():
 464         document.warning("Unknown length unit: " + unit + ".")
 465         return value
 466     return "%g" % (float(value) * scales[unit])
 467
 468
 469 def revert_flex_inset(lines, name, LaTeXname):
 470     "Convert flex insets to TeX code"
 471     i = 0
 472     while True:
 473         i = find_token(lines, "\\begin_inset Flex " + name, i)
 474         if i == -1:
 475             return
 476         z = find_end_of_inset(lines, i)
 477         if z == -1:
 478             document.warning("Can't find end of Flex " + name + " inset.")
 479             i += 1
 480             continue
 481         # remove the \end_inset
 482         lines[z - 2 : z + 1] = put_cmd_in_ert("}")
 483         # we need to reset character layouts if necessary
 484         j = find_token(lines, "\\emph on", i, z)
 485         k = find_token(lines, "\\noun on", i, z)
 486         l = find_token(lines, "\\series", i, z)
 487         m = find_token(lines, "\\family", i, z)
 488         n = find_token(lines, "\\shape", i, z)
 489         o = find_token(lines, "\\color", i, z)
 490         p = find_token(lines, "\\size", i, z)
 491         q = find_token(lines, "\\bar under", i, z)
 492         r = find_token(lines, "\\uuline on", i, z)
 493         s = find_token(lines, "\\uwave on", i, z)
 494         t = find_token(lines, "\\strikeout on", i, z)
 495         if j != -1:
 496             lines.insert(z - 2, "\\emph default")
 497         if k != -1:
 498             lines.insert(z - 2, "\\noun default")
 499         if l != -1:
 500             lines.insert(z - 2, "\\series default")
 501         if m != -1:
 502             lines.insert(z - 2, "\\family default")
 503         if n != -1:
 504             lines.insert(z - 2, "\\shape default")
 505         if o != -1:
 506             lines.insert(z - 2, "\\color inherit")
 507         if p != -1:
 508             lines.insert(z - 2, "\\size default")
 509         if q != -1:
 510             lines.insert(z - 2, "\\bar default")
 511         if r != -1:
 512             lines.insert(z - 2, "\\uuline default")
 513         if s != -1:
 514             lines.insert(z - 2, "\\uwave default")
 515         if t != -1:
 516             lines.insert(z - 2, "\\strikeout default")
 517         lines[i : i + 4] = put_cmd_in_ert(LaTeXname + "{")
 518         i += 1
 519
 520
 521 def revert_font_attrs(lines, name, LaTeXname):
 522     "Reverts font changes to TeX code"
 523     i = 0
 524     changed = False
 525     while True:
 526         i = find_token(lines, name + " on", i)
 527         if i == -1:
 528             break
 529         j = find_token(lines, name + " default", i)
 530         k = find_token(lines, name + " on", i + 1)
 531         # if there is no default set, the style ends with the layout
 532         # assure hereby that we found the correct layout end
 533         if j != -1 and (j < k or k == -1):
 534             lines[j : j + 1] = put_cmd_in_ert("}")
 535         else:
 536             j = find_token(lines, "\\end_layout", i)
 537             lines[j:j] = put_cmd_in_ert("}")
 538         lines[i : i + 1] = put_cmd_in_ert(LaTeXname + "{")
 539         changed = True
 540         i += 1
 541
 542     # now delete all remaining lines that manipulate this attribute
 543     i = 0
 544     while True:
 545         i = find_token(lines, name, i)
 546         if i == -1:
 547             break
 548         del lines[i]
 549
 550     return changed
 551
 552
 553 def revert_layout_command(lines, name, LaTeXname):
 554     "Reverts a command from a layout to TeX code"
 555     i = 0
 556     while True:
 557         i = find_token(lines, "\\begin_layout " + name, i)
 558         if i == -1:
 559             return
 560         k = -1
 561         # find the next layout
 562         j = i + 1
 563         while k == -1:
 564             j = find_token(lines, "\\begin_layout", j)
 565             l = len(lines)
 566             # if nothing was found it was the last layout of the document
 567             if j == -1:
 568                 lines[l - 4 : l - 4] = put_cmd_in_ert("}")
 569                 k = 0
 570             # exclude plain layout because this can be TeX code or another inset
 571             elif lines[j] != "\\begin_layout Plain Layout":
 572                 lines[j - 2 : j - 2] = put_cmd_in_ert("}")
 573                 k = 0
 574             else:
 575                 j += 1
 576         lines[i] = "\\begin_layout Standard"
 577         lines[i + 1 : i + 1] = put_cmd_in_ert(LaTeXname + "{")
 578         i += 1
 579
 580
 581 def hex2ratio(s):
 582     "Converts an RRGGBB-type hexadecimal string to a float in [0.0,1.0]"
 583     try:
 584         val = int(s, 16)
 585     except:
 586         val = 0
 587     if val != 0:
 588         val += 1
 589     return str(val / 256.0)
 590
 591
 592 def str2bool(s):
 593     "'true' goes to True, case-insensitively, and we strip whitespace."
 594     s = s.strip().lower()
 595     return s == "true"
 596
 597
 598 def convert_info_insets(document, type, func):
 599     "Convert info insets matching type using func."
 600     i = 0
 601     type_re = re.compile(r'^type\s+"(%s)"$' % type)
 602     arg_re = re.compile(r'^arg\s+"(.*)"$')
 603     while True:
 604         i = find_token(document.body, "\\begin_inset Info", i)
 605         if i == -1:
 606             return
 607         t = type_re.match(document.body[i + 1])
 608         if t:
 609             arg = arg_re.match(document.body[i + 2])
 610             if arg:
 611                 new_arg = func(arg.group(1))
 612                 document.body[i + 2] = 'arg   "%s"' % new_arg
 613         i += 3
 614
 615
 616 def insert_document_option(document, option):
 617     "Insert _option_ as a document option."
 618
 619     # Find \options in the header
 620     i = find_token(document.header, "\\options", 0)
 621     # if the options does not exists add it after the textclass
 622     if i == -1:
 623         i = find_token(document.header, "\\textclass", 0) + 1
 624         document.header.insert(i, r"\options %s" % option)
 625         return
 626     # otherwise append to options
 627     if not is_document_option(document, option):
 628         document.header[i] += ",%s" % option
 629
 630
 631 def remove_document_option(document, option):
 632     """Remove _option_ as a document option."""
 633
 634     i = find_token(document.header, "\\options")
 635     options = get_value(document.header, "\\options", i)
 636     options = [op.strip() for op in options.split(",")]
 637
 638     # Remove `option` from \options
 639     options = [op for op in options if op != option]
 640
 641     if options:
 642         document.header[i] = "\\options " + ",".join(options)
 643     else:
 644         del document.header[i]
 645
 646
 647 def is_document_option(document, option):
 648     "Find if _option_ is a document option"
 649
 650     options = get_value(document.header, "\\options")
 651     options = [op.strip() for op in options.split(",")]
 652     return option in options
 653
 654
 655 singlepar_insets = [
 656     s.strip()
 657     for s in "Argument, Caption Above, Caption Below, Caption Bicaption,"
 658     "Caption Centered, Caption FigCaption, Caption Standard, Caption Table,"
 659     "Flex Chemistry, Flex Fixme_Note, Flex Latin, Flex ListOfSlides,"
 660     "Flex Missing_Figure, Flex PDF-Annotation, Flex PDF-Comment-Setup,"
 661     "Flex Reflectbox, Flex S/R expression, Flex Sweave Input File,"
 662     "Flex Sweave Options, Flex Thanks_Reference, Flex URL, Foot InTitle,"
 663     "IPADeco, Index, Info, Phantom, Script".split(",")
 664 ]
 665 # print(singlepar_insets)
 666
 667
 668 def revert_language(document, lyxname, babelname="", polyglossianame=""):
 669     "Revert native language support"
 670
 671     # Does the document use polyglossia?
 672     use_polyglossia = False
 673     if get_bool_value(document.header, "\\use_non_tex_fonts"):
 674         i = find_token(document.header, "\\language_package")
 675         if i == -1:
 676             document.warning("Malformed document! Missing \\language_package")
 677         else:
 678             pack = get_value(document.header, "\\language_package", i)
 679             if pack in ("default", "auto"):
 680                 use_polyglossia = True
 681
 682     # Do we use this language with polyglossia?
 683     with_polyglossia = use_polyglossia and polyglossianame != ""
 684     # Do we use this language with babel?
 685     with_babel = with_polyglossia == False and babelname != ""
 686
 687     # Are we dealing with a primary or secondary language?
 688     primary = document.language == lyxname
 689     secondary = False
 690
 691     # Main language first
 692     orig_doc_language = document.language
 693     if primary:
 694         # Change LyX document language to English (we will tell LaTeX
 695         # to use the original language at the end of this function):
 696         document.language = "english"
 697         i = find_token(document.header, "\\language %s" % lyxname, 0)
 698         if i != -1:
 699             document.header[i] = "\\language english"
 700
 701     # Now look for occurences in the body
 702     i = 0
 703     while True:
 704         i = find_token(document.body, "\\lang", i + 1)
 705         if i == -1:
 706             break
 707         if document.body[i].startswith("\\lang %s" % lyxname):
 708             secondary = True
 709             texname = use_polyglossia and polyglossianame or babelname
 710         elif primary and document.body[i].startswith("\\lang english"):
 711             # Since we switched the main language manually, English parts need to be marked
 712             texname = "english"
 713         else:
 714             continue
 715
 716         parent = get_containing_layout(document.body, i)
 717         i_e = parent[2]  # end line no,
 718         # print(i, texname, parent, document.body[i+1], file=sys.stderr)
 719
 720         # Move leading space to the previous line:
 721         if document.body[i + 1].startswith(" "):
 722             document.body[i + 1] = document.body[i + 1][1:]
 723             document.body.insert(i, " ")
 724             continue
 725
 726         # TODO: handle nesting issues with font attributes, e.g.
 727         # \begin_layout Standard
 728         #
 729         # \emph on
 730         # \lang macedonian
 731         # Македонски јазик
 732         # \emph default
 733         #  — јужнословенски јазик, дел од групата на словенски јазици од јазичното
 734         #  семејство на индоевропски јазици.
 735         #  Македонскиот е службен и национален јазик во Македонија.
 736         # \end_layout
 737
 738         # Ensure correct handling of list labels
 739         if parent[0] in ["Labeling", "Description"] and not " " in "\n".join(
 740             document.body[parent[3] : i]
 741         ):
 742             # line `i+1` is first line of a list item,
 743             # part before a space character is the label
 744             # TODO: insets or language change before first space character
 745             labelline = document.body[i + 1].split(" ", 1)
 746             if len(labelline) > 1:
 747                 # Insert a space in the (original) document language
 748                 # between label and remainder.
 749                 # print("  Label:", labelline, file=sys.stderr)
 750                 lines = [
 751                     labelline[0],
 752                     "\\lang %s" % orig_doc_language,
 753                     " ",
 754                     "\\lang %s" % (primary and "english" or lyxname),
 755                     labelline[1],
 756                 ]
 757                 document.body[i + 1 : i + 2] = lines
 758                 i_e += 4
 759
 760         # Find out where to end the language change.
 761         langswitch = i
 762         while True:
 763             langswitch = find_token(document.body, "\\lang", langswitch + 1, i_e)
 764             if langswitch == -1:
 765                 break
 766             # print("  ", langswitch, document.body[langswitch], file=sys.stderr)
 767             # skip insets
 768             i_a = parent[3]  # paragraph start line
 769             container = get_containing_inset(document.body[i_a:i_e], langswitch - i_a)
 770             if (
 771                 container
 772                 and container[1] < langswitch - i_a
 773                 and container[2] > langswitch - i_a
 774             ):
 775                 # print("  inset", container, file=sys.stderr)
 776                 continue
 777             i_e = langswitch
 778             break
 779
 780         # use function or environment?
 781         singlepar = i_e - i < 3
 782         if not singlepar and parent[0] == "Plain Layout":
 783             # environment not allowed in some insets
 784             container = get_containing_inset(document.body, i)
 785             singlepar = container[0] in singlepar_insets
 786
 787         # Delete empty language switches:
 788         if not "".join(document.body[i + 1 : i_e]):
 789             del document.body[i:i_e]
 790             i -= 1
 791             continue
 792
 793         if singlepar:
 794             if with_polyglossia:
 795                 begin_cmd = "\\text%s{" % texname
 796             elif with_babel:
 797                 begin_cmd = "\\foreignlanguage{%s}{" % texname
 798             end_cmd = "}"
 799         else:
 800             if with_polyglossia:
 801                 begin_cmd = "\\begin{%s}" % texname
 802                 end_cmd = "\\end{%s}" % texname
 803             elif with_babel:
 804                 begin_cmd = "\\begin{otherlanguage}{%s}" % texname
 805                 end_cmd = "\\end{otherlanguage}"
 806
 807         if not primary or texname == "english":
 808             try:
 809                 document.body[i_e:i_e] = put_cmd_in_ert(end_cmd)
 810                 document.body[i + 1 : i + 1] = put_cmd_in_ert(begin_cmd)
 811             except UnboundLocalError:
 812                 pass
 813         del document.body[i]
 814
 815     if not (primary or secondary):
 816         return
 817
 818     # Make the language known to Babel/Polyglossia and ensure the correct
 819     # document language:
 820     doc_lang_switch = ""
 821     if with_babel:
 822         # add as global option
 823         insert_document_option(document, babelname)
 824         # Since user options are appended to the document options,
 825         # Babel will treat `babelname` as primary language.
 826         if not primary:
 827             doc_lang_switch = "\\selectlanguage{%s}" % orig_doc_language
 828     if with_polyglossia:
 829         # Define language in the user preamble
 830         # (don't use \AtBeginDocument, this fails with some languages).
 831         add_to_preamble(
 832             document,
 833             ["\\usepackage{polyglossia}", "\\setotherlanguage{%s}" % polyglossianame],
 834         )
 835         if primary:
 836             # Changing the main language must be done in the document body.
 837             doc_lang_switch = "\\resetdefaultlanguage{%s}" % polyglossianame
 838
 839     # Reset LaTeX main language if required and not already done
 840     if doc_lang_switch and doc_lang_switch[1:] not in document.body[8:20]:
 841         document.body[2:2] = put_cmd_in_ert(doc_lang_switch, is_open=True, as_paragraph=True)