lib/lyx2lyx/lyx_1_5.py

   1 # This file is part of lyx2lyx
   2 # Copyright (C) 2006 José Matos <jamatos@lyx.org>
   3 # Copyright (C) 2004-2006 Georg Baum <Georg.Baum@post.rwth-aachen.de>
   4 #
   5 # This program is free software; you can redistribute it and/or
   6 # modify it under the terms of the GNU General Public License
   7 # as published by the Free Software Foundation; either version 2
   8 # of the License, or (at your option) any later version.
   9 #
  10 # This program is distributed in the hope that it will be useful,
  11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 # GNU General Public License for more details.
  14 #
  15 # You should have received a copy of the GNU General Public License
  16 # along with this program; if not, write to the Free Software
  17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
  18
  19 """Convert files to the file format generated by lyx 1.5"""
  20
  21 import re
  22 import unicodedata
  23 import sys, os
  24
  25 from parser_tools import (
  26     find_re,
  27     find_token,
  28     find_token_backwards,
  29     find_token_exact,
  30     find_tokens,
  31     find_end_of,
  32     get_value,
  33     find_beginning_of,
  34     find_nonempty_line,
  35 )
  36 from lyx2lyx_tools import insert_document_option
  37 from LyX import get_encoding
  38
  39 ####################################################################
  40 # Private helper functions
  41
  42
  43 def find_end_of_inset(lines, i):
  44     "Find end of inset, where lines[i] is included."
  45     return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
  46
  47
  48 def find_end_of_layout(lines, i):
  49     "Find end of layout, where lines[i] is included."
  50     return find_end_of(lines, i, "\\begin_layout", "\\end_layout")
  51
  52
  53 def find_beginning_of_layout(lines, i):
  54     "Find beginning of layout, where lines[i] is included."
  55     return find_beginning_of(lines, i, "\\begin_layout", "\\end_layout")
  56
  57
  58 # End of helper functions
  59 ####################################################################
  60
  61
  62 ##
  63 #  Notes: Framed/Shaded
  64 #
  65
  66
  67 def revert_framed(document):
  68     "Revert framed notes."
  69     i = 0
  70     while True:
  71         i = find_tokens(
  72             document.body, ["\\begin_inset Note Framed", "\\begin_inset Note Shaded"], i
  73         )
  74
  75         if i == -1:
  76             return
  77         document.body[i] = "\\begin_inset Note"
  78         i = i + 1
  79
  80
  81 ##
  82 #  Fonts
  83 #
  84
  85 roman_fonts = {
  86     "default": "default",
  87     "ae": "ae",
  88     "times": "times",
  89     "palatino": "palatino",
  90     "helvet": "default",
  91     "avant": "default",
  92     "newcent": "newcent",
  93     "bookman": "bookman",
  94     "pslatex": "times",
  95 }
  96 sans_fonts = {
  97     "default": "default",
  98     "ae": "default",
  99     "times": "default",
 100     "palatino": "default",
 101     "helvet": "helvet",
 102     "avant": "avant",
 103     "newcent": "default",
 104     "bookman": "default",
 105     "pslatex": "helvet",
 106 }
 107 typewriter_fonts = {
 108     "default": "default",
 109     "ae": "default",
 110     "times": "default",
 111     "palatino": "default",
 112     "helvet": "default",
 113     "avant": "default",
 114     "newcent": "default",
 115     "bookman": "default",
 116     "pslatex": "courier",
 117 }
 118
 119
 120 def convert_font_settings(document):
 121     "Convert font settings."
 122     i = 0
 123     i = find_token_exact(document.header, "\\fontscheme", i)
 124     if i == -1:
 125         document.warning("Malformed LyX document: Missing `\\fontscheme'.")
 126         return
 127     font_scheme = get_value(document.header, "\\fontscheme", i, i + 1)
 128     if font_scheme == "":
 129         document.warning("Malformed LyX document: Empty `\\fontscheme'.")
 130         font_scheme = "default"
 131     if not font_scheme in list(roman_fonts.keys()):
 132         document.warning("Malformed LyX document: Unknown `\\fontscheme' `%s'." % font_scheme)
 133         font_scheme = "default"
 134     document.header[i : i + 1] = [
 135         "\\font_roman %s" % roman_fonts[font_scheme],
 136         "\\font_sans %s" % sans_fonts[font_scheme],
 137         "\\font_typewriter %s" % typewriter_fonts[font_scheme],
 138         "\\font_default_family default",
 139         "\\font_sc false",
 140         "\\font_osf false",
 141         "\\font_sf_scale 100",
 142         "\\font_tt_scale 100",
 143     ]
 144
 145
 146 def revert_font_settings(document):
 147     "Revert font settings."
 148     i = 0
 149     insert_line = -1
 150     fonts = {"roman": "default", "sans": "default", "typewriter": "default"}
 151     for family in "roman", "sans", "typewriter":
 152         name = "\\font_%s" % family
 153         i = find_token_exact(document.header, name, i)
 154         if i == -1:
 155             document.warning("Malformed LyX document: Missing `%s'." % name)
 156             i = 0
 157         else:
 158             if insert_line < 0:
 159                 insert_line = i
 160             fonts[family] = get_value(document.header, name, i, i + 1)
 161             del document.header[i]
 162     i = find_token_exact(document.header, "\\font_default_family", i)
 163     if i == -1:
 164         document.warning("Malformed LyX document: Missing `\\font_default_family'.")
 165         font_default_family = "default"
 166     else:
 167         font_default_family = get_value(document.header, "\\font_default_family", i, i + 1)
 168         del document.header[i]
 169     i = find_token_exact(document.header, "\\font_sc", i)
 170     if i == -1:
 171         document.warning("Malformed LyX document: Missing `\\font_sc'.")
 172         font_sc = "false"
 173     else:
 174         font_sc = get_value(document.header, "\\font_sc", i, i + 1)
 175         del document.header[i]
 176     if font_sc != "false":
 177         document.warning("Conversion of '\\font_sc' not yet implemented.")
 178     i = find_token_exact(document.header, "\\font_osf", i)
 179     if i == -1:
 180         document.warning("Malformed LyX document: Missing `\\font_osf'.")
 181         font_osf = "false"
 182     else:
 183         font_osf = get_value(document.header, "\\font_osf", i, i + 1)
 184         del document.header[i]
 185     i = find_token_exact(document.header, "\\font_sf_scale", i)
 186     if i == -1:
 187         document.warning("Malformed LyX document: Missing `\\font_sf_scale'.")
 188         font_sf_scale = "100"
 189     else:
 190         font_sf_scale = get_value(document.header, "\\font_sf_scale", i, i + 1)
 191         del document.header[i]
 192     if font_sf_scale != "100":
 193         document.warning("Conversion of '\\font_sf_scale' not yet implemented.")
 194     i = find_token_exact(document.header, "\\font_tt_scale", i)
 195     if i == -1:
 196         document.warning("Malformed LyX document: Missing `\\font_tt_scale'.")
 197         font_tt_scale = "100"
 198     else:
 199         font_tt_scale = get_value(document.header, "\\font_tt_scale", i, i + 1)
 200         del document.header[i]
 201     if font_tt_scale != "100":
 202         document.warning("Conversion of '\\font_tt_scale' not yet implemented.")
 203     for font_scheme in list(roman_fonts.keys()):
 204         if (
 205             roman_fonts[font_scheme] == fonts["roman"]
 206             and sans_fonts[font_scheme] == fonts["sans"]
 207             and typewriter_fonts[font_scheme] == fonts["typewriter"]
 208         ):
 209             document.header.insert(insert_line, "\\fontscheme %s" % font_scheme)
 210             if font_default_family != "default":
 211                 document.preamble.append(
 212                     "\\renewcommand{\\familydefault}{\\%s}" % font_default_family
 213                 )
 214             if font_osf == "true":
 215                 document.warning("Ignoring `\\font_osf = true'")
 216             return
 217     font_scheme = "default"
 218     document.header.insert(insert_line, "\\fontscheme %s" % font_scheme)
 219     if fonts["roman"] == "cmr":
 220         document.preamble.append("\\renewcommand{\\rmdefault}{cmr}")
 221         if font_osf == "true":
 222             document.preamble.append("\\usepackage{eco}")
 223             font_osf = "false"
 224     for font in "lmodern", "charter", "utopia", "beraserif", "ccfonts", "chancery":
 225         if fonts["roman"] == font:
 226             document.preamble.append("\\usepackage{%s}" % font)
 227     for font in "cmss", "lmss", "cmbr":
 228         if fonts["sans"] == font:
 229             document.preamble.append("\\renewcommand{\\sfdefault}{%s}" % font)
 230     for font in "berasans":
 231         if fonts["sans"] == font:
 232             document.preamble.append("\\usepackage{%s}" % font)
 233     for font in "cmtt", "lmtt", "cmtl":
 234         if fonts["typewriter"] == font:
 235             document.preamble.append("\\renewcommand{\\ttdefault}{%s}" % font)
 236     for font in "courier", "beramono", "luximono":
 237         if fonts["typewriter"] == font:
 238             document.preamble.append("\\usepackage{%s}" % font)
 239     if font_default_family != "default":
 240         document.preamble.append("\\renewcommand{\\familydefault}{\\%s}" % font_default_family)
 241     if font_osf == "true":
 242         document.warning("Ignoring `\\font_osf = true'")
 243
 244
 245 def revert_booktabs(document):
 246     "We remove the booktabs flag or everything else will become a mess."
 247     re_row = re.compile(r'^<row.*space="[^"]+".*>$')
 248     re_tspace = re.compile(r'\s+topspace="[^"]+"')
 249     re_bspace = re.compile(r'\s+bottomspace="[^"]+"')
 250     re_ispace = re.compile(r'\s+interlinespace="[^"]+"')
 251     i = 0
 252     while True:
 253         i = find_token(document.body, "\\begin_inset Tabular", i)
 254         if i == -1:
 255             return
 256         j = find_end_of_inset(document.body, i + 1)
 257         if j == -1:
 258             document.warning("Malformed LyX document: Could not find end of tabular.")
 259             continue
 260         for k in range(i, j):
 261             if re.search('^<features.* booktabs="true".*>$', document.body[k]):
 262                 document.warning("Converting 'booktabs' table to normal table.")
 263                 document.body[k] = document.body[k].replace(' booktabs="true"', "")
 264             if re.search(re_row, document.body[k]):
 265                 document.warning("Removing extra row space.")
 266                 document.body[k] = re_tspace.sub("", document.body[k])
 267                 document.body[k] = re_bspace.sub("", document.body[k])
 268                 document.body[k] = re_ispace.sub("", document.body[k])
 269         i = i + 1
 270
 271
 272 def convert_multiencoding(document, forward):
 273     """Fix files with multiple encodings.
 274     Files with an inputencoding of "auto" or "default" and multiple languages
 275     where at least two languages have different default encodings are encoded
 276     in multiple encodings for file formats < 249. These files are incorrectly
 277     read and written (as if the whole file was in the encoding of the main
 278     language).
 279     This is not true for files written by CJK-LyX, they are always in the locale
 280     encoding.
 281
 282     This function
 283     - converts from fake unicode values to true unicode if forward is true, and
 284     - converts from true unicode values to fake unicode if forward is false.
 285     document.encoding must be set to the old value (format 248) in both cases.
 286
 287     We do this here and not in LyX.py because it is far easier to do the
 288     necessary parsing in modern formats than in ancient ones.
 289     """
 290     inset_types = ["Foot", "Note"]
 291     if document.cjk_encoding != "":
 292         return
 293     encoding_stack = [document.encoding]
 294     insets = []
 295     lang_re = re.compile(r"^\\lang\s(\S+)")
 296     inset_re = re.compile(r"^\\begin_inset\s(\S+)")
 297     if not forward:  # no need to read file unless we are reverting
 298         spec_chars = read_unicodesymbols()
 299
 300     if document.inputencoding == "auto" or document.inputencoding == "default":
 301         i = 0
 302         while i < len(document.body):
 303             result = lang_re.match(document.body[i])
 304             if result:
 305                 language = result.group(1)
 306                 if language == "default":
 307                     document.warning(
 308                         f"Resetting encoding from {encoding_stack[-1]} to {document.encoding}.",
 309                         3,
 310                     )
 311                     encoding_stack[-1] = document.encoding
 312                 else:
 313                     from lyx2lyx_lang import lang
 314
 315                     document.warning(
 316                         f"Setting encoding from {encoding_stack[-1]} to {lang[language][3]}.",
 317                         3,
 318                     )
 319                     encoding_stack[-1] = lang[language][3]
 320             elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
 321                 document.warning("Adding nested encoding %s." % encoding_stack[-1], 3)
 322                 if len(insets) > 0 and insets[-1] in inset_types:
 323                     from lyx2lyx_lang import lang
 324
 325                     encoding_stack.append(lang[document.language][3])
 326                 else:
 327                     encoding_stack.append(encoding_stack[-1])
 328             elif find_token(document.body, "\\end_layout", i, i + 1) == i:
 329                 document.warning("Removing nested encoding %s." % encoding_stack[-1], 3)
 330                 if len(encoding_stack) == 1:
 331                     # Don't remove the document encoding from the stack
 332                     document.warning("Malformed LyX document: Unexpected `\\end_layout'.")
 333                 else:
 334                     del encoding_stack[-1]
 335             elif find_token(document.body, "\\begin_inset", i, i + 1) == i:
 336                 inset_result = inset_re.match(document.body[i])
 337                 if inset_result:
 338                     insets.append(inset_result.group(1))
 339                 else:
 340                     insets.append("")
 341             elif find_token(document.body, "\\end_inset", i, i + 1) == i:
 342                 del insets[-1]
 343             if encoding_stack[-1] != document.encoding:
 344                 if forward:
 345                     # This line has been incorrectly interpreted as if it was
 346                     # encoded in 'encoding'.
 347                     # Convert back to the 8bit string that was in the file.
 348                     orig = document.body[i].encode(document.encoding)
 349                     # Convert the 8bit string that was in the file to unicode
 350                     # with the correct encoding.
 351                     document.body[i] = orig.decode(encoding_stack[-1])
 352                 else:
 353                     try:
 354                         # Convert unicode to the 8bit string that will be written
 355                         # to the file with the correct encoding.
 356                         orig = document.body[i].encode(encoding_stack[-1])
 357                         # Convert the 8bit string that will be written to the
 358                         # file to fake unicode with the encoding that will later
 359                         # be used when writing to the file.
 360                         document.body[i] = orig.decode(document.encoding)
 361                     except:
 362                         mod_line = revert_unicode_line(document, i, insets, spec_chars)
 363                         document.body[i : i + 1] = mod_line.split("\n")
 364                         i += len(mod_line.split("\n")) - 1
 365             i += 1
 366
 367
 368 def convert_utf8(document):
 369     "Set document encoding to UTF-8."
 370     convert_multiencoding(document, True)
 371     document.encoding = "utf8"
 372
 373
 374 def revert_utf8(document):
 375     "Set document encoding to the value corresponding to inputencoding."
 376     i = find_token(document.header, "\\inputencoding", 0)
 377     if i == -1:
 378         document.header.append("\\inputencoding auto")
 379     elif get_value(document.header, "\\inputencoding", i) == "utf8":
 380         document.header[i] = "\\inputencoding auto"
 381     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
 382     document.encoding = get_encoding(
 383         document.language, document.inputencoding, 248, document.cjk_encoding
 384     )
 385     convert_multiencoding(document, False)
 386
 387
 388 # FIXME: Use the version in unicode_symbols.py which has some bug fixes
 389 def read_unicodesymbols():
 390     "Read the unicodesymbols list of unicode characters and corresponding commands."
 391     pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
 392     fp = open(os.path.join(pathname.strip("lyx2lyx"), "unicodesymbols"))
 393     spec_chars = {}
 394     for line in fp.readlines():
 395         if line[0] != "#":
 396             line = line.replace(' "', " ")  # remove all quotation marks with spaces before
 397             line = line.replace('" ', " ")  # remove all quotation marks with spaces after
 398             line = line.replace(r"\"", '"')  # replace \" by " (for characters with diaeresis)
 399             try:
 400                 # flag1 and flag2 are preamble and other flags
 401                 [ucs4, command, flag1, flag2] = line.split(None, 3)
 402                 spec_chars[chr(eval(ucs4))] = [command, flag1, flag2]
 403             except:
 404                 pass
 405     fp.close()
 406     return spec_chars
 407
 408
 409 def revert_unicode_line(document, i, insets, spec_chars, replacement_character="???"):
 410     # Define strings to start and end ERT and math insets
 411     ert_intro = (
 412         "\n\n\\begin_inset ERT\nstatus collapsed\n\\begin_layout %s" % document.default_layout
 413     )
 414     ert_outro = "\n\\end_layout\n\n\\end_inset\n"
 415     math_intro = "\n\\begin_inset Formula $"
 416     math_outro = "$\n\\end_inset"
 417
 418     mod_line = ""
 419     if i and not is_inset_line(document, i - 1):
 420         last_char = document.body[i - 1][-1:]
 421     else:
 422         last_char = ""
 423
 424     line = document.body[i]
 425     for character in line:
 426         try:
 427             # Try to write the character
 428             dummy = character.encode(document.encoding)
 429             mod_line += character
 430             last_char = character
 431         except:
 432             # Try to replace with ERT/math inset
 433             if character in spec_chars:
 434                 command = spec_chars[character][0]  # the command to replace unicode
 435                 flag1 = spec_chars[character][1]
 436                 flag2 = spec_chars[character][2]
 437                 if flag1.find("combining") > -1 or flag2.find("combining") > -1:
 438                     # We have a character that should be combined with the previous
 439                     command += "{" + last_char + "}"
 440                     # Remove the last character. Ignore if it is whitespace
 441                     if len(last_char.rstrip()):
 442                         # last_char was found and is not whitespace
 443                         if mod_line:
 444                             mod_line = mod_line[:-1]
 445                         else:  # last_char belongs to the last line
 446                             document.body[i - 1] = document.body[i - 1][:-1]
 447                     else:
 448                         # The last character was replaced by a command. For now it is
 449                         # ignored. This could be handled better.
 450                         pass
 451                 if command[0:2] == "\\\\":
 452                     if command[2:12] == "ensuremath":
 453                         if insets and insets[-1] == "ERT":
 454                             # math in ERT
 455                             command = command.replace("\\\\ensuremath{\\\\", "$\n\\backslash\n")
 456                             command = command.replace("}", "$\n")
 457                         elif not insets or insets[-1] != "Formula":
 458                             # add a math inset with the replacement character
 459                             command = command.replace("\\\\ensuremath{\\", math_intro)
 460                             command = command.replace("}", math_outro)
 461                         else:
 462                             # we are already in a math inset
 463                             command = command.replace("\\\\ensuremath{\\", "")
 464                             command = command.replace("}", "")
 465                     else:
 466                         if insets and insets[-1] == "Formula":
 467                             # avoid putting an ERT in a math; instead put command as text
 468                             command = command.replace("\\\\", r"\mathrm{")
 469                             command = command + "}"
 470                         elif not insets or insets[-1] != "ERT":
 471                             # add an ERT inset with the replacement character
 472                             command = command.replace("\\\\", "\n\\backslash\n")
 473                             command = ert_intro + command + ert_outro
 474                         else:
 475                             command = command.replace("\\\\", "\n\\backslash\n")
 476                     last_char = ""  # indicate that the character should not be removed
 477                 mod_line += command
 478             else:
 479                 # Replace with replacement string
 480                 mod_line += replacement_character
 481     return mod_line
 482
 483
 484 def revert_unicode(document):
 485     """Transform unicode characters that can not be written using the
 486     document encoding to commands according to the unicodesymbols
 487     file. Characters that can not be replaced by commands are replaced by
 488     an replacement string.  Flags other than 'combined' are currently not
 489     implemented."""
 490     spec_chars = read_unicodesymbols()
 491     insets = []  # list of active insets
 492
 493     # Go through the document to capture all combining characters
 494     i = 0
 495     while i < len(document.body):
 496         line = document.body[i]
 497         # Check for insets
 498         if line.find("\\begin_inset") > -1:
 499             insets.append(line[13:].split()[0])
 500         if line.find("\\end_inset") > -1:
 501             del insets[-1]
 502
 503         # Try to write the line
 504         try:
 505             # If all goes well the line is written here
 506             dummy = line.encode(document.encoding)
 507             i += 1
 508         except:
 509             # Error, some character(s) in the line need to be replaced
 510             mod_line = revert_unicode_line(document, i, insets, spec_chars)
 511             document.body[i : i + 1] = mod_line.split("\n")
 512             i += len(mod_line.split("\n"))
 513
 514
 515 def revert_cs_label(document):
 516     "Remove status flag of charstyle label."
 517     i = 0
 518     while True:
 519         i = find_token(document.body, "\\begin_inset CharStyle", i)
 520         if i == -1:
 521             return
 522         # Seach for a line starting 'show_label'
 523         # If it is not there, break with a warning message
 524         i = i + 1
 525         while True:
 526             if document.body[i][:10] == "show_label":
 527                 del document.body[i]
 528                 break
 529             elif document.body[i][:13] == "\\begin_layout":
 530                 document.warning("Malformed LyX document: Missing 'show_label'.")
 531                 break
 532             i = i + 1
 533
 534         i = i + 1
 535
 536
 537 def convert_bibitem(document):
 538     r"""Convert
 539     \bibitem [option]{argument}
 540
 541     to
 542
 543     \begin_inset LatexCommand bibitem
 544     label "option"
 545     key "argument"
 546
 547     \end_inset
 548
 549     This must be called after convert_commandparams.
 550     """
 551     i = 0
 552     while True:
 553         i = find_token(document.body, "\\bibitem", i)
 554         if i == -1:
 555             break
 556         j = document.body[i].find("[") + 1
 557         k = document.body[i].rfind("]")
 558         if j == 0:  # No optional argument found
 559             option = None
 560         else:
 561             option = document.body[i][j:k]
 562         j = document.body[i].rfind("{") + 1
 563         k = document.body[i].rfind("}")
 564         argument = document.body[i][j:k]
 565         lines = ["\\begin_inset LatexCommand bibitem"]
 566         if option != None:
 567             lines.append('label "%s"' % option.replace('"', '\\"'))
 568         lines.append('key "%s"' % argument.replace('"', '\\"'))
 569         lines.append("")
 570         lines.append("\\end_inset")
 571         document.body[i : i + 1] = lines
 572         i = i + 1
 573
 574
 575 commandparams_info = {
 576     # command : [option1, option2, argument]
 577     "bibitem": ["label", "", "key"],
 578     "bibtex": ["options", "btprint", "bibfiles"],
 579     "cite": ["after", "before", "key"],
 580     "citet": ["after", "before", "key"],
 581     "citep": ["after", "before", "key"],
 582     "citealt": ["after", "before", "key"],
 583     "citealp": ["after", "before", "key"],
 584     "citeauthor": ["after", "before", "key"],
 585     "citeyear": ["after", "before", "key"],
 586     "citeyearpar": ["after", "before", "key"],
 587     "citet*": ["after", "before", "key"],
 588     "citep*": ["after", "before", "key"],
 589     "citealt*": ["after", "before", "key"],
 590     "citealp*": ["after", "before", "key"],
 591     "citeauthor*": ["after", "before", "key"],
 592     "Citet": ["after", "before", "key"],
 593     "Citep": ["after", "before", "key"],
 594     "Citealt": ["after", "before", "key"],
 595     "Citealp": ["after", "before", "key"],
 596     "Citeauthor": ["after", "before", "key"],
 597     "Citet*": ["after", "before", "key"],
 598     "Citep*": ["after", "before", "key"],
 599     "Citealt*": ["after", "before", "key"],
 600     "Citealp*": ["after", "before", "key"],
 601     "Citeauthor*": ["after", "before", "key"],
 602     "citefield": ["after", "before", "key"],
 603     "citetitle": ["after", "before", "key"],
 604     "cite*": ["after", "before", "key"],
 605     "hfill": ["", "", ""],
 606     "index": ["", "", "name"],
 607     "printindex": ["", "", "name"],
 608     "label": ["", "", "name"],
 609     "eqref": ["name", "", "reference"],
 610     "pageref": ["name", "", "reference"],
 611     "prettyref": ["name", "", "reference"],
 612     "ref": ["name", "", "reference"],
 613     "vpageref": ["name", "", "reference"],
 614     "vref": ["name", "", "reference"],
 615     "tableofcontents": ["", "", "type"],
 616     "htmlurl": ["name", "", "target"],
 617     "url": ["name", "", "target"],
 618 }
 619
 620
 621 def convert_commandparams(document):
 622     """Convert
 623
 624     \\begin_inset LatexCommand \\cmdname[opt1][opt2]{arg}
 625     \\end_inset
 626
 627     to
 628
 629     \\begin_inset LatexCommand cmdname
 630     name1 "opt1"
 631     name2 "opt2"
 632     name3 "arg"
 633     \\end_inset
 634
 635     name1, name2 and name3 can be different for each command.
 636     """
 637     # \begin_inset LatexCommand bibitem was not the official version (see
 638     # convert_bibitem()), but could be read in, so we convert it here, too.
 639
 640     i = 0
 641     while True:
 642         i = find_token(document.body, "\\begin_inset LatexCommand", i)
 643         if i == -1:
 644             break
 645         command = document.body[i][26:].strip()
 646         if command == "":
 647             document.warning("Malformed LyX document: Missing LatexCommand name.")
 648             i = i + 1
 649             continue
 650
 651         j = find_token(document.body, "\\end_inset", i + 1)
 652         if j == -1:
 653             document.warning("Malformed document")
 654         else:
 655             command += "".join(document.body[i + 1 : j])
 656             document.body[i + 1 : j] = []
 657
 658         # The following parser is taken from the original InsetCommandParams::scanCommand
 659         name = ""
 660         option1 = ""
 661         option2 = ""
 662         argument = ""
 663         state = "WS"
 664         # Used to handle things like \command[foo[bar]]{foo{bar}}
 665         nestdepth = 0
 666         b = 0
 667         for c in command:
 668             if (
 669                 (state == "CMDNAME" and c == " ")
 670                 or (state == "CMDNAME" and c == "[")
 671                 or (state == "CMDNAME" and c == "{")
 672             ):
 673                 state = "WS"
 674             if (
 675                 (state == "OPTION" and c == "]")
 676                 or (state == "SECOPTION" and c == "]")
 677                 or (state == "CONTENT" and c == "}")
 678             ):
 679                 if nestdepth == 0:
 680                     state = "WS"
 681                 else:
 682                     nestdepth = nestdepth - 1
 683             if (
 684                 (state == "OPTION" and c == "[")
 685                 or (state == "SECOPTION" and c == "[")
 686                 or (state == "CONTENT" and c == "{")
 687             ):
 688                 nestdepth = nestdepth + 1
 689             if state == "CMDNAME":
 690                 name += c
 691             elif state == "OPTION":
 692                 option1 += c
 693             elif state == "SECOPTION":
 694                 option2 += c
 695             elif state == "CONTENT":
 696                 argument += c
 697             elif state == "WS":
 698                 if c == "\\":
 699                     state = "CMDNAME"
 700                 elif c == "[" and b != "]":
 701                     state = "OPTION"
 702                     nestdepth = 0  # Just to be sure
 703                 elif c == "[" and b == "]":
 704                     state = "SECOPTION"
 705                     nestdepth = 0  # Just to be sure
 706                 elif c == "{":
 707                     state = "CONTENT"
 708                     nestdepth = 0  # Just to be sure
 709             b = c
 710
 711         # Now we have parsed the command, output the parameters
 712         lines = ["\\begin_inset LatexCommand %s" % name]
 713         if option1 != "":
 714             if commandparams_info[name][0] == "":
 715                 document.warning(f"Ignoring invalid option `{option1}' of command `{name}'.")
 716             else:
 717                 lines.append(
 718                     '{} "{}"'.format(
 719                         commandparams_info[name][0],
 720                         option1.replace("\\", "\\\\").replace('"', '\\"'),
 721                     )
 722                 )
 723         if option2 != "":
 724             if commandparams_info[name][1] == "":
 725                 document.warning(
 726                     f"Ignoring invalid second option `{option2}' of command `{name}'."
 727                 )
 728             else:
 729                 lines.append(
 730                     '{} "{}"'.format(
 731                         commandparams_info[name][1],
 732                         option2.replace("\\", "\\\\").replace('"', '\\"'),
 733                     )
 734                 )
 735         if argument != "":
 736             if commandparams_info[name][2] == "":
 737                 document.warning(f"Ignoring invalid argument `{argument}' of command `{name}'.")
 738             else:
 739                 lines.append(
 740                     '{} "{}"'.format(
 741                         commandparams_info[name][2],
 742                         argument.replace("\\", "\\\\").replace('"', '\\"'),
 743                     )
 744                 )
 745         document.body[i : i + 1] = lines
 746         i = i + 1
 747
 748
 749 def revert_commandparams(document):
 750     regex = re.compile(r"(\S+)\s+(.+)")
 751     i = 0
 752     while True:
 753         i = find_token(document.body, "\\begin_inset LatexCommand", i)
 754         if i == -1:
 755             break
 756         name = document.body[i].split()[2]
 757         j = find_end_of_inset(document.body, i)
 758         preview_line = ""
 759         option1 = ""
 760         option2 = ""
 761         argument = ""
 762         for k in range(i + 1, j):
 763             match = re.match(regex, document.body[k])
 764             if match:
 765                 pname = match.group(1)
 766                 pvalue = match.group(2)
 767                 if pname == "preview":
 768                     preview_line = document.body[k]
 769                 elif commandparams_info[name][0] != "" and pname == commandparams_info[name][0]:
 770                     option1 = pvalue.strip('"').replace('\\"', '"').replace("\\\\", "\\")
 771                 elif commandparams_info[name][1] != "" and pname == commandparams_info[name][1]:
 772                     option2 = pvalue.strip('"').replace('\\"', '"').replace("\\\\", "\\")
 773                 elif commandparams_info[name][2] != "" and pname == commandparams_info[name][2]:
 774                     argument = pvalue.strip('"').replace('\\"', '"').replace("\\\\", "\\")
 775             elif document.body[k].strip() != "":
 776                 document.warning(
 777                     f"Ignoring unknown contents `{document.body[k]}' in command inset {name}."
 778                 )
 779         if name == "bibitem":
 780             if option1 == "":
 781                 lines = ["\\bibitem {%s}" % argument]
 782             else:
 783                 lines = [f"\\bibitem [{option1}]{{{argument}}}"]
 784         else:
 785             if option1 == "":
 786                 if option2 == "":
 787                     lines = [f"\\begin_inset LatexCommand \\{name}{{{argument}}}"]
 788                 else:
 789                     lines = [f"\\begin_inset LatexCommand \\{name}[][{option2}]{{{argument}}}"]
 790             else:
 791                 if option2 == "":
 792                     lines = [f"\\begin_inset LatexCommand \\{name}[{option1}]{{{argument}}}"]
 793                 else:
 794                     lines = [
 795                         f"\\begin_inset LatexCommand \\{name}[{option1}][{option2}]{{{argument}}}"
 796                     ]
 797         if name != "bibitem":
 798             if preview_line != "":
 799                 lines.append(preview_line)
 800             lines.append("")
 801             lines.append("\\end_inset")
 802         document.body[i : j + 1] = lines
 803         i += len(lines) + 1
 804
 805
 806 def revert_nomenclature(document):
 807     "Convert nomenclature entry to ERT."
 808     regex = re.compile(r"(\S+)\s+(.+)")
 809     i = 0
 810     use_nomencl = 0
 811     while True:
 812         i = find_token(document.body, "\\begin_inset LatexCommand nomenclature", i)
 813         if i == -1:
 814             break
 815         use_nomencl = 1
 816         j = find_end_of_inset(document.body, i + 1)
 817         preview_line = ""
 818         symbol = ""
 819         description = ""
 820         prefix = ""
 821         for k in range(i + 1, j):
 822             match = re.match(regex, document.body[k])
 823             if match:
 824                 name = match.group(1)
 825                 value = match.group(2)
 826                 if name == "preview":
 827                     preview_line = document.body[k]
 828                 elif name == "symbol":
 829                     symbol = value.strip('"').replace('\\"', '"')
 830                 elif name == "description":
 831                     description = value.strip('"').replace('\\"', '"')
 832                 elif name == "prefix":
 833                     prefix = value.strip('"').replace('\\"', '"')
 834             elif document.body[k].strip() != "":
 835                 document.warning(
 836                     "Ignoring unknown contents `%s' in nomenclature inset." % document.body[k]
 837                 )
 838         if prefix == "":
 839             command = f"nomenclature{{{symbol}}}{{{description}}}"
 840         else:
 841             command = f"nomenclature[{prefix}]{{{symbol}}}{{{description}}}"
 842         document.body[i : j + 1] = [
 843             "\\begin_inset ERT",
 844             "status collapsed",
 845             "",
 846             "\\begin_layout %s" % document.default_layout,
 847             "",
 848             "",
 849             "\\backslash",
 850             command,
 851             "\\end_layout",
 852             "",
 853             "\\end_inset",
 854         ]
 855         i = i + 11
 856     if (
 857         use_nomencl
 858         and find_token(document.preamble, "\\usepackage{nomencl}[2005/09/22]", 0) == -1
 859     ):
 860         document.preamble.append("\\usepackage{nomencl}[2005/09/22]")
 861         document.preamble.append("\\makenomenclature")
 862
 863
 864 def revert_printnomenclature(document):
 865     "Convert printnomenclature to ERT."
 866     regex = re.compile(r"(\S+)\s+(.+)")
 867     i = 0
 868     use_nomencl = 0
 869     while True:
 870         i = find_token(document.body, "\\begin_inset LatexCommand printnomenclature", i)
 871         if i == -1:
 872             break
 873         use_nomencl = 1
 874         j = find_end_of_inset(document.body, i + 1)
 875         preview_line = ""
 876         labelwidth = ""
 877         for k in range(i + 1, j):
 878             match = re.match(regex, document.body[k])
 879             if match:
 880                 name = match.group(1)
 881                 value = match.group(2)
 882                 if name == "preview":
 883                     preview_line = document.body[k]
 884                 elif name == "labelwidth":
 885                     labelwidth = value.strip('"').replace('\\"', '"')
 886             elif document.body[k].strip() != "":
 887                 document.warning(
 888                     "Ignoring unknown contents `%s' in printnomenclature inset."
 889                     % document.body[k]
 890                 )
 891         if labelwidth == "":
 892             command = "nomenclature{}"
 893         else:
 894             command = "nomenclature[%s]" % labelwidth
 895         document.body[i : j + 1] = [
 896             "\\begin_inset ERT",
 897             "status collapsed",
 898             "",
 899             "\\begin_layout %s" % document.default_layout,
 900             "",
 901             "",
 902             "\\backslash",
 903             command,
 904             "\\end_layout",
 905             "",
 906             "\\end_inset",
 907         ]
 908         i = i + 11
 909     if (
 910         use_nomencl
 911         and find_token(document.preamble, "\\usepackage{nomencl}[2005/09/22]", 0) == -1
 912     ):
 913         document.preamble.append("\\usepackage{nomencl}[2005/09/22]")
 914         document.preamble.append("\\makenomenclature")
 915
 916
 917 def convert_esint(document):
 918     "Add \\use_esint setting to header."
 919     i = find_token(document.header, "\\cite_engine", 0)
 920     if i == -1:
 921         document.warning("Malformed LyX document: Missing `\\cite_engine'.")
 922         return
 923     # 0 is off, 1 is auto, 2 is on.
 924     document.header.insert(i, "\\use_esint 0")
 925
 926
 927 def revert_esint(document):
 928     "Remove \\use_esint setting from header."
 929     i = find_token(document.header, "\\use_esint", 0)
 930     if i == -1:
 931         document.warning("Malformed LyX document: Missing `\\use_esint'.")
 932         return
 933     use_esint = document.header[i].split()[1]
 934     del document.header[i]
 935     # 0 is off, 1 is auto, 2 is on.
 936     if use_esint == 2:
 937         document.preamble.append("\\usepackage{esint}")
 938
 939
 940 def revert_clearpage(document):
 941     "clearpage -> ERT"
 942     i = 0
 943     while True:
 944         i = find_token(document.body, "\\clearpage", i)
 945         if i == -1:
 946             break
 947         document.body[i : i + 1] = [
 948             "\\begin_inset ERT",
 949             "status collapsed",
 950             "",
 951             "\\begin_layout %s" % document.default_layout,
 952             "",
 953             "",
 954             "\\backslash",
 955             "clearpage",
 956             "\\end_layout",
 957             "",
 958             "\\end_inset",
 959         ]
 960     i = i + 1
 961
 962
 963 def revert_cleardoublepage(document):
 964     "cleardoublepage -> ERT"
 965     i = 0
 966     while True:
 967         i = find_token(document.body, "\\cleardoublepage", i)
 968         if i == -1:
 969             break
 970         document.body[i : i + 1] = [
 971             "\\begin_inset ERT",
 972             "status collapsed",
 973             "",
 974             "\\begin_layout %s" % document.default_layout,
 975             "",
 976             "",
 977             "\\backslash",
 978             "cleardoublepage",
 979             "\\end_layout",
 980             "",
 981             "\\end_inset",
 982         ]
 983     i = i + 1
 984
 985
 986 def convert_lyxline(document):
 987     r"remove fontsize commands for \lyxline"
 988     # The problematic is: The old \lyxline definition doesn't handle the fontsize
 989     # to change the line thickness. The new definiton does this so that imported
 990     # \lyxlines would have a different line thickness. The eventual fontsize command
 991     # before \lyxline is therefore removed to get the same output.
 992     fontsizes = [
 993         "tiny",
 994         "scriptsize",
 995         "footnotesize",
 996         "small",
 997         "normalsize",
 998         "large",
 999         "Large",
1000         "LARGE",
1001         "huge",
1002         "Huge",
1003     ]
1004     for n in range(0, len(fontsizes)):
1005         i = 0
1006         k = 0
1007         while i < len(document.body):
1008             i = find_token(document.body, "\\size " + fontsizes[n], i)
1009             k = find_token(document.body, "\\lyxline", i)
1010             # the corresponding fontsize command is always 2 lines before the \lyxline
1011             if i != -1 and k == i + 2:
1012                 document.body[i : i + 1] = []
1013             else:
1014                 break
1015         i = i + 1
1016
1017
1018 def revert_encodings(document):
1019     "Set new encodings to auto."
1020     encodings = [
1021         "8859-6",
1022         "8859-8",
1023         "cp437",
1024         "cp437de",
1025         "cp850",
1026         "cp852",
1027         "cp855",
1028         "cp858",
1029         "cp862",
1030         "cp865",
1031         "cp866",
1032         "cp1250",
1033         "cp1252",
1034         "cp1256",
1035         "cp1257",
1036         "latin10",
1037         "pt254",
1038         "tis620-0",
1039     ]
1040     i = find_token(document.header, "\\inputencoding", 0)
1041     if i == -1:
1042         document.header.append("\\inputencoding auto")
1043     else:
1044         inputenc = get_value(document.header, "\\inputencoding", i)
1045         if inputenc in encodings:
1046             document.header[i] = "\\inputencoding auto"
1047     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1048
1049
1050 def convert_caption(document):
1051     "Convert caption layouts to caption insets."
1052     i = 0
1053     while True:
1054         i = find_token(document.body, "\\begin_layout Caption", i)
1055         if i == -1:
1056             return
1057         j = find_end_of_layout(document.body, i)
1058         if j == -1:
1059             document.warning("Malformed LyX document: Missing `\\end_layout'.")
1060             return
1061
1062         document.body[j:j] = ["\\end_layout", "", "\\end_inset", "", ""]
1063         document.body[i : i + 1] = [
1064             "\\begin_layout %s" % document.default_layout,
1065             "\\begin_inset Caption",
1066             "",
1067             "\\begin_layout %s" % document.default_layout,
1068         ]
1069         i = i + 1
1070
1071
1072 def revert_caption(document):
1073     "Convert caption insets to caption layouts."
1074     " This assumes that the text class has a caption style. "
1075     i = 0
1076     while True:
1077         i = find_token(document.body, "\\begin_inset Caption", i)
1078         if i == -1:
1079             return
1080
1081         # We either need to delete the previous \begin_layout line, or we
1082         # need to end the previous layout if this inset is not in the first
1083         # position of the paragraph.
1084         layout_before = find_token_backwards(document.body, "\\begin_layout", i)
1085         if layout_before == -1:
1086             document.warning("Malformed LyX document: Missing `\\begin_layout'.")
1087             return
1088         layout_line = document.body[layout_before]
1089         del_layout_before = True
1090         l = layout_before + 1
1091         while l < i:
1092             if document.body[l] != "":
1093                 del_layout_before = False
1094                 break
1095             l = l + 1
1096         if del_layout_before:
1097             del document.body[layout_before:i]
1098             i = layout_before
1099         else:
1100             document.body[i:i] = ["\\end_layout", ""]
1101             i = i + 2
1102
1103         # Find start of layout in the inset and end of inset
1104         j = find_token(document.body, "\\begin_layout", i)
1105         if j == -1:
1106             document.warning("Malformed LyX document: Missing `\\begin_layout'.")
1107             return
1108         k = find_end_of_inset(document.body, i)
1109         if k == -1:
1110             document.warning("Malformed LyX document: Missing `\\end_inset'.")
1111             return
1112
1113         # We either need to delete the following \end_layout line, or we need
1114         # to restart the old layout if this inset is not at the paragraph end.
1115         layout_after = find_token(document.body, "\\end_layout", k)
1116         if layout_after == -1:
1117             document.warning("Malformed LyX document: Missing `\\end_layout'.")
1118             return
1119         del_layout_after = True
1120         l = k + 1
1121         while l < layout_after:
1122             if document.body[l] != "":
1123                 del_layout_after = False
1124                 break
1125             l = l + 1
1126         if del_layout_after:
1127             del document.body[k + 1 : layout_after + 1]
1128         else:
1129             document.body[k + 1 : k + 1] = [layout_line, ""]
1130
1131         # delete \begin_layout and \end_inset and replace \begin_inset with
1132         # "\begin_layout Caption". This works because we can only have one
1133         # paragraph in the caption inset: The old \end_layout will be recycled.
1134         del document.body[k]
1135         if document.body[k] == "":
1136             del document.body[k]
1137         del document.body[j]
1138         if document.body[j] == "":
1139             del document.body[j]
1140         document.body[i] = "\\begin_layout Caption"
1141         if document.body[i + 1] == "":
1142             del document.body[i + 1]
1143         i = i + 1
1144
1145
1146 # Accents of InsetLaTeXAccent
1147 accent_map = {
1148     "`": "\u0300",  # grave
1149     "'": "\u0301",  # acute
1150     "^": "\u0302",  # circumflex
1151     "~": "\u0303",  # tilde
1152     "=": "\u0304",  # macron
1153     "u": "\u0306",  # breve
1154     ".": "\u0307",  # dot above
1155     '"': "\u0308",  # diaeresis
1156     "r": "\u030a",  # ring above
1157     "H": "\u030b",  # double acute
1158     "v": "\u030c",  # caron
1159     "b": "\u0320",  # minus sign below
1160     "d": "\u0323",  # dot below
1161     "c": "\u0327",  # cedilla
1162     "k": "\u0328",  # ogonek
1163     "t": "\u0361",  # tie. This is special: It spans two characters, but
1164     # only one is given as argument, so we don't need to
1165     # treat it differently.
1166 }
1167
1168
1169 # special accents of InsetLaTeXAccent without argument
1170 special_accent_map = {
1171     "i": "\u0131",  # dotless i
1172     "j": "\u0237",  # dotless j
1173     "l": "\u0142",  # l with stroke
1174     "L": "\u0141",  # L with stroke
1175 }
1176
1177
1178 # special accent arguments of InsetLaTeXAccent
1179 accented_map = {
1180     "\\i": "\u0131",  # dotless i
1181     "\\j": "\u0237",  # dotless j
1182 }
1183
1184
1185 def _convert_accent(accent, accented_char):
1186     type = accent
1187     char = accented_char
1188     if char == "":
1189         if type in special_accent_map:
1190             return special_accent_map[type]
1191         # a missing char is treated as space by LyX
1192         char = " "
1193     elif type == "q" and char in ["t", "d", "l", "L"]:
1194         # Special caron, only used with t, d, l and L.
1195         # It is not in the map because we convert it to the same unicode
1196         # character as the normal caron: \q{} is only defined if babel with
1197         # the czech or slovak language is used, and the normal caron
1198         # produces the correct output if the T1 font encoding is used.
1199         # For the same reason we never convert to \q{} in the other direction.
1200         type = "v"
1201     elif char in accented_map:
1202         char = accented_map[char]
1203     elif len(char) > 1:
1204         # We can only convert accents on a single char
1205         return ""
1206     a = accent_map.get(type)
1207     if a:
1208         return unicodedata.normalize("NFC", f"{char}{a}")
1209     return ""
1210
1211
1212 def convert_ertbackslash(body, i, ert, default_layout):
1213     r"""-------------------------------------------------------------------------------------------
1214     Convert backslashes and '\n' into valid ERT code, append the converted
1215     text to body[i] and return the (maybe incremented) line index i"""
1216
1217     for c in ert:
1218         if c == "\\":
1219             body[i] = body[i] + "\\backslash "
1220             i = i + 1
1221             body.insert(i, "")
1222         elif c == "\n":
1223             body[i + 1 : i + 1] = [
1224                 "\\end_layout",
1225                 "",
1226                 "\\begin_layout %s" % default_layout,
1227                 "",
1228             ]
1229             i = i + 4
1230         else:
1231             body[i] = body[i] + c
1232     return i
1233
1234
1235 def convert_accent(document):
1236     # The following forms are supported by LyX:
1237     # '\i \"{a}' (standard form, as written by LyX)
1238     # '\i \"{}' (standard form, as written by LyX if the accented char is a space)
1239     # '\i \"{ }' (also accepted if the accented char is a space)
1240     # '\i \" a'  (also accepted)
1241     # '\i \"'    (also accepted)
1242     re_wholeinset = re.compile(r"^(.*)(\\i\s+)(.*)$")
1243     re_contents = re.compile(r"^([^\s{]+)(.*)$")
1244     re_accentedcontents = re.compile(r"^\s*{?([^{}]*)}?\s*$")
1245     i = 0
1246     while True:
1247         i = find_re(document.body, re_wholeinset, i)
1248         if i == -1:
1249             return
1250         match = re_wholeinset.match(document.body[i])
1251         prefix = match.group(1)
1252         contents = match.group(3).strip()
1253         match = re_contents.match(contents)
1254         if match:
1255             # Strip first char (always \)
1256             accent = match.group(1)[1:]
1257             accented_contents = match.group(2).strip()
1258             match = re_accentedcontents.match(accented_contents)
1259             accented_char = match.group(1)
1260             converted = _convert_accent(accent, accented_char)
1261             if converted == "":
1262                 # Normalize contents
1263                 contents = (f"{accent}{{{accented_char}}}",)
1264             else:
1265                 document.body[i] = f"{prefix}{converted}"
1266                 i += 1
1267                 continue
1268         document.warning("Converting unknown InsetLaTeXAccent `\\i %s' to ERT." % contents)
1269         document.body[i] = prefix
1270         document.body[i + 1 : i + 1] = [
1271             "\\begin_inset ERT",
1272             "status collapsed",
1273             "",
1274             "\\begin_layout %s" % document.default_layout,
1275             "",
1276             "",
1277             "",
1278         ]
1279         i = convert_ertbackslash(
1280             document.body, i + 7, "\\%s" % contents, document.default_layout
1281         )
1282         document.body[i + 1 : i + 1] = ["\\end_layout", "", "\\end_inset"]
1283         i += 3
1284
1285
1286 def is_inset_line(document, i):
1287     """Line i of body has an inset"""
1288     if document.body[i][:1] == "\\":
1289         return True
1290     last_tokens = "".join(document.body[i].split()[-2:])
1291     return last_tokens.find("\\") != -1
1292
1293
1294 # A wrapper around normalize that handles special cases (cf. bug 3313)
1295 def normalize(form, text):
1296     # do not normalize OHM, ANGSTROM
1297     keep_characters = [0x2126, 0x212B]
1298     result = ""
1299     convert = ""
1300     for i in text:
1301         if ord(i) in keep_characters:
1302             if len(convert) > 0:
1303                 result = result + unicodedata.normalize(form, convert)
1304                 convert = ""
1305             result = result + i
1306         else:
1307             convert = convert + i
1308     if len(convert) > 0:
1309         result = result + unicodedata.normalize(form, convert)
1310     return result
1311
1312
1313 def revert_accent(document):
1314     inverse_accent_map = {}
1315     for k in accent_map:
1316         inverse_accent_map[accent_map[k]] = k
1317     inverse_special_accent_map = {}
1318     for k in special_accent_map:
1319         inverse_special_accent_map[special_accent_map[k]] = k
1320     inverse_accented_map = {}
1321     for k in accented_map:
1322         inverse_accented_map[accented_map[k]] = k
1323
1324     # Since LyX may insert a line break within a word we must combine all
1325     # words before unicode normalization.
1326     # We do this only if the next line starts with an accent, otherwise we
1327     # would create things like '\begin_inset ERTstatus'.
1328     for i in range(len(document.body) - 1):
1329         if document.body[i] == "" or document.body[i + 1] == "" or document.body[i][-1] == " ":
1330             continue
1331         if document.body[i + 1][0] in inverse_accent_map and not is_inset_line(document, i):
1332             # the last character of this line and the first of the next line
1333             # form probably a surrogate pair, inline insets are excluded (second part of the test)
1334             while len(document.body[i + 1]) > 0 and document.body[i + 1][0] != " ":
1335                 document.body[i] += document.body[i + 1][0]
1336                 document.body[i + 1] = document.body[i + 1][1:]
1337
1338     # Normalize to "Normal form D" (NFD, also known as canonical decomposition).
1339     # This is needed to catch all accented characters.
1340     for i in range(len(document.body)):
1341         # Unfortunately we have a mixture of unicode strings and plain strings,
1342         # because we never use u'xxx' for string literals, but 'xxx'.
1343         # Therefore we may have to try two times to normalize the data.
1344         try:
1345             document.body[i] = normalize("NFD", document.body[i])
1346         except TypeError:
1347             document.body[i] = normalize("NFD", str(document.body[i], "utf-8"))
1348
1349     # Replace accented characters with InsetLaTeXAccent
1350     # Do not convert characters that can be represented in the chosen
1351     # encoding.
1352     encoding_stack = [
1353         get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)
1354     ]
1355     lang_re = re.compile(r"^\\lang\s(\S+)")
1356
1357     i = 0
1358     while i < len(document.body):
1359         if (
1360             document.inputencoding == "auto" or document.inputencoding == "default"
1361         ) and document.cjk_encoding != "":
1362             # Track the encoding of the current line
1363             result = lang_re.match(document.body[i])
1364             if result:
1365                 language = result.group(1)
1366                 if language == "default":
1367                     encoding_stack[-1] = document.encoding
1368                 else:
1369                     from lyx2lyx_lang import lang
1370
1371                     encoding_stack[-1] = lang[language][3]
1372                 continue
1373             elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
1374                 encoding_stack.append(encoding_stack[-1])
1375                 continue
1376             elif find_token(document.body, "\\end_layout", i, i + 1) == i:
1377                 del encoding_stack[-1]
1378                 continue
1379
1380         for j in range(len(document.body[i])):
1381             # dotless i and dotless j are both in special_accent_map and can
1382             # occur as an accented character, so we need to test that the
1383             # following character is no accent
1384             if document.body[i][j] in inverse_special_accent_map and (
1385                 j == len(document.body[i]) - 1
1386                 or document.body[i][j + 1] not in inverse_accent_map
1387             ):
1388                 accent = document.body[i][j]
1389                 try:
1390                     dummy = accent.encode(encoding_stack[-1])
1391                 except UnicodeEncodeError:
1392                     # Insert the rest of the line as new line
1393                     if j < len(document.body[i]) - 1:
1394                         document.body.insert(i + 1, document.body[i][j + 1 :])
1395                     # Delete the accented character
1396                     document.body[i] = document.body[i][:j]
1397                     # Finally add the InsetLaTeXAccent
1398                     document.body[i] += "\\i \\%s{}" % inverse_special_accent_map[accent]
1399                     break
1400             elif j > 0 and document.body[i][j] in inverse_accent_map:
1401                 accented_char = document.body[i][j - 1]
1402                 if accented_char == " ":
1403                     # Conform to LyX output
1404                     accented_char = ""
1405                 elif accented_char in inverse_accented_map:
1406                     accented_char = inverse_accented_map[accented_char]
1407                 accent = document.body[i][j]
1408                 try:
1409                     dummy = normalize("NFC", accented_char + accent).encode(encoding_stack[-1])
1410                 except UnicodeEncodeError:
1411                     # Insert the rest of the line as new line
1412                     if j < len(document.body[i]) - 1:
1413                         document.body.insert(i + 1, document.body[i][j + 1 :])
1414                     # Delete the accented characters
1415                     document.body[i] = document.body[i][: j - 1]
1416                     # Finally add the InsetLaTeXAccent
1417                     document.body[i] += f"\\i \\{inverse_accent_map[accent]}{{{accented_char}}}"
1418                     break
1419         i = i + 1
1420
1421     # Normalize to "Normal form C" (NFC, pre-composed characters) again
1422     for i in range(len(document.body)):
1423         document.body[i] = normalize("NFC", document.body[i])
1424
1425
1426 def normalize_font_whitespace_259(document):
1427     """Before format 259 the font changes were ignored if a
1428     whitespace was the first or last character in the sequence, this function
1429     transfers the whitespace outside."""
1430
1431     char_properties = {
1432         "\\series": "default",
1433         "\\emph": "default",
1434         "\\color": "none",
1435         "\\shape": "default",
1436         "\\bar": "default",
1437         "\\family": "default",
1438     }
1439     return normalize_font_whitespace(document, char_properties)
1440
1441
1442 def normalize_font_whitespace_274(document):
1443     """Before format 259 (sic) the font changes were ignored if a
1444     whitespace was the first or last character in the sequence. This was
1445     corrected for most font properties in format 259, but the language
1446     was forgotten then. This function applies the same conversion done
1447     there (namely, transfers the whitespace outside) for font language
1448     changes, as well."""
1449
1450     char_properties = {"\\lang": "default"}
1451     return normalize_font_whitespace(document, char_properties)
1452
1453
1454 def get_paragraph_language(document, i):
1455     """Return the language of the paragraph in which line i of the document
1456     body is. If the first thing in the paragraph is a \\lang command, that
1457     is the paragraph's langauge; otherwise, the paragraph's language is the
1458     document's language."""
1459
1460     lines = document.body
1461
1462     first_nonempty_line = find_nonempty_line(lines, find_beginning_of_layout(lines, i) + 1)
1463
1464     words = lines[first_nonempty_line].split()
1465
1466     if len(words) > 1 and words[0] == "\\lang":
1467         return words[1]
1468     else:
1469         return document.language
1470
1471
1472 def normalize_font_whitespace(document, char_properties):
1473     """Before format 259 the font changes were ignored if a
1474     whitespace was the first or last character in the sequence, this function
1475     transfers the whitespace outside. Only a change in one of the properties
1476     in the provided     char_properties is handled by this function."""
1477
1478     if document.backend != "latex":
1479         return
1480
1481     lines = document.body
1482
1483     changes = {}
1484
1485     i = 0
1486     while i < len(lines):
1487         words = lines[i].split()
1488
1489         if len(words) > 0 and words[0] == "\\begin_layout":
1490             # a new paragraph resets all font changes
1491             changes.clear()
1492             # also reset the default language to be the paragraph's language
1493             if "\\lang" in list(char_properties.keys()):
1494                 char_properties["\\lang"] = get_paragraph_language(document, i + 1)
1495
1496         elif len(words) > 1 and words[0] in list(char_properties.keys()):
1497             # we have a font change
1498             if char_properties[words[0]] == words[1]:
1499                 # property gets reset
1500                 if words[0] in list(changes.keys()):
1501                     del changes[words[0]]
1502                 defaultproperty = True
1503             else:
1504                 # property gets set
1505                 changes[words[0]] = words[1]
1506                 defaultproperty = False
1507
1508             # We need to explicitly reset all changed properties if we find
1509             # a space below, because LyX 1.4 would output the space after
1510             # closing the previous change and before starting the new one,
1511             # and closing a font change means to close all properties, not
1512             # just the changed one.
1513
1514             if lines[i - 1] and lines[i - 1][-1] == " ":
1515                 lines[i - 1] = lines[i - 1][:-1]
1516                 # a space before the font change
1517                 added_lines = [" "]
1518                 for k in list(changes.keys()):
1519                     # exclude property k because that is already in lines[i]
1520                     if k != words[0]:
1521                         added_lines[1:1] = [f"{k} {changes[k]}"]
1522                 for k in list(changes.keys()):
1523                     # exclude property k because that must be added below anyway
1524                     if k != words[0]:
1525                         added_lines[0:0] = [f"{k} {char_properties[k]}"]
1526                 if defaultproperty:
1527                     # Property is reset in lines[i], so add the new stuff afterwards
1528                     lines[i + 1 : i + 1] = added_lines
1529                 else:
1530                     # Reset property for the space
1531                     added_lines[0:0] = [f"{words[0]} {char_properties[words[0]]}"]
1532                     lines[i:i] = added_lines
1533                 i = i + len(added_lines)
1534
1535             elif (
1536                 lines[i + 1]
1537                 and lines[i + 1][0] == " "
1538                 and (len(changes) > 0 or not defaultproperty)
1539             ):
1540                 # a space after the font change
1541                 if lines[i + 1] == " " and lines[i + 2]:
1542                     next_words = lines[i + 2].split()
1543                     if len(next_words) > 0 and next_words[0] == words[0]:
1544                         # a single blank with a property different from the
1545                         # previous and the next line must not be changed
1546                         i = i + 2
1547                         continue
1548                 lines[i + 1] = lines[i + 1][1:]
1549                 added_lines = [" "]
1550                 for k in list(changes.keys()):
1551                     # exclude property k because that is already in lines[i]
1552                     if k != words[0]:
1553                         added_lines[1:1] = [f"{k} {changes[k]}"]
1554                 for k in list(changes.keys()):
1555                     # exclude property k because that must be added below anyway
1556                     if k != words[0]:
1557                         added_lines[0:0] = [f"{k} {char_properties[k]}"]
1558                 # Reset property for the space
1559                 added_lines[0:0] = [f"{words[0]} {char_properties[words[0]]}"]
1560                 lines[i:i] = added_lines
1561                 i = i + len(added_lines)
1562
1563         i = i + 1
1564
1565
1566 def revert_utf8x(document):
1567     "Set utf8x encoding to utf8."
1568     i = find_token(document.header, "\\inputencoding", 0)
1569     if i == -1:
1570         document.header.append("\\inputencoding auto")
1571     else:
1572         inputenc = get_value(document.header, "\\inputencoding", i)
1573         if inputenc == "utf8x":
1574             document.header[i] = "\\inputencoding utf8"
1575     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1576
1577
1578 def revert_utf8plain(document):
1579     "Set utf8plain encoding to utf8."
1580     i = find_token(document.header, "\\inputencoding", 0)
1581     if i == -1:
1582         document.header.append("\\inputencoding auto")
1583     else:
1584         inputenc = get_value(document.header, "\\inputencoding", i)
1585         if inputenc == "utf8-plain":
1586             document.header[i] = "\\inputencoding utf8"
1587     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1588
1589
1590 def revert_beamer_alert(document):
1591     "Revert beamer's \\alert inset back to ERT."
1592     i = 0
1593     while True:
1594         i = find_token(document.body, "\\begin_inset CharStyle Alert", i)
1595         if i == -1:
1596             return
1597         document.body[i] = "\\begin_inset ERT"
1598         i = i + 1
1599         while True:
1600             if document.body[i][:13] == "\\begin_layout":
1601                 # Insert the \alert command
1602                 document.body[i + 1] = "\\alert{" + document.body[i + 1] + "}"
1603                 break
1604             i = i + 1
1605
1606         i = i + 1
1607
1608
1609 def revert_beamer_structure(document):
1610     "Revert beamer's \\structure inset back to ERT."
1611     i = 0
1612     while True:
1613         i = find_token(document.body, "\\begin_inset CharStyle Structure", i)
1614         if i == -1:
1615             return
1616         document.body[i] = "\\begin_inset ERT"
1617         i = i + 1
1618         while True:
1619             if document.body[i][:13] == "\\begin_layout":
1620                 document.body[i + 1] = "\\structure{" + document.body[i + 1] + "}"
1621                 break
1622             i = i + 1
1623
1624         i = i + 1
1625
1626
1627 def convert_changes(document):
1628     "Switch output_changes off if tracking_changes is off."
1629     i = find_token(document.header, "\\tracking_changes", 0)
1630     if i == -1:
1631         document.warning("Malformed lyx document: Missing '\\tracking_changes'.")
1632         return
1633     j = find_token(document.header, "\\output_changes", 0)
1634     if j == -1:
1635         document.warning("Malformed lyx document: Missing '\\output_changes'.")
1636         return
1637     tracking_changes = get_value(document.header, "\\tracking_changes", i)
1638     output_changes = get_value(document.header, "\\output_changes", j)
1639     if tracking_changes == "false" and output_changes == "true":
1640         document.header[j] = "\\output_changes false"
1641
1642
1643 def revert_ascii(document):
1644     "Set ascii encoding to auto."
1645     i = find_token(document.header, "\\inputencoding", 0)
1646     if i == -1:
1647         document.header.append("\\inputencoding auto")
1648     else:
1649         inputenc = get_value(document.header, "\\inputencoding", i)
1650         if inputenc == "ascii":
1651             document.header[i] = "\\inputencoding auto"
1652     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1653
1654
1655 def normalize_language_name(document):
1656     lang = {"brazil": "brazilian", "portuges": "portuguese"}
1657
1658     if document.language in lang:
1659         document.language = lang[document.language]
1660         i = find_token(document.header, "\\language", 0)
1661         document.header[i] = "\\language %s" % document.language
1662
1663
1664 def revert_language_name(document):
1665     lang = {"brazilian": "brazil", "portuguese": "portuges"}
1666
1667     if document.language in lang:
1668         document.language = lang[document.language]
1669         i = find_token(document.header, "\\language", 0)
1670         document.header[i] = "\\language %s" % document.language
1671
1672
1673 #
1674 #  \textclass cv -> \textclass simplecv
1675 def convert_cv_textclass(document):
1676     if document.textclass == "cv":
1677         document.textclass = "simplecv"
1678
1679
1680 def revert_cv_textclass(document):
1681     if document.textclass == "simplecv":
1682         document.textclass = "cv"
1683
1684
1685 #
1686 # add scaleBeforeRotation graphics param
1687 def convert_graphics_rotation(document):
1688     "add scaleBeforeRotation graphics parameter."
1689     i = 0
1690     while True:
1691         i = find_token(document.body, "\\begin_inset Graphics", i)
1692         if i == -1:
1693             return
1694         j = find_end_of_inset(document.body, i + 1)
1695         if j == -1:
1696             # should not happen
1697             document.warning("Malformed LyX document: Could not find end of graphics inset.")
1698         # Seach for rotateAngle and width or height or scale
1699         # If these params are not there, nothing needs to be done.
1700         k = find_token(document.body, "\trotateAngle", i + 1, j)
1701         l = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j)
1702         if k != -1 and l != -1:
1703             document.body.insert(j, "scaleBeforeRotation")
1704         i = i + 1
1705
1706
1707 #
1708 # remove scaleBeforeRotation graphics param
1709 def revert_graphics_rotation(document):
1710     "remove scaleBeforeRotation graphics parameter."
1711     i = 0
1712     while True:
1713         i = find_token(document.body, "\\begin_inset Graphics", i)
1714         if i == -1:
1715             return
1716         j = find_end_of_inset(document.body, i + 1)
1717         if j == -1:
1718             # should not happen
1719             document.warning("Malformed LyX document: Could not find end of graphics inset.")
1720         # If there's a scaleBeforeRotation param, just remove that
1721         k = find_token(document.body, "\tscaleBeforeRotation", i + 1, j)
1722         if k != -1:
1723             del document.body[k]
1724         else:
1725             # if not, and if we have rotateAngle and width or height or scale,
1726             # we have to put the rotateAngle value to special
1727             rotateAngle = get_value(document.body, "rotateAngle", i + 1, j)
1728             special = get_value(document.body, "special", i + 1, j)
1729             if rotateAngle != "":
1730                 k = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j)
1731                 if k == -1:
1732                     break
1733                 if special == "":
1734                     document.body.insert(j - 1, "\tspecial angle=%s" % rotateAngle)
1735                 else:
1736                     l = find_token(document.body, "\tspecial", i + 1, j)
1737                     document.body[l] = document.body[l].replace(
1738                         special, f"angle={rotateAngle},{special}"
1739                     )
1740                 k = find_token(document.body, "\trotateAngle", i + 1, j)
1741                 if k != -1:
1742                     del document.body[k]
1743         i = i + 1
1744
1745
1746 def convert_tableborder(document):
1747     # The problem is: LyX doubles the table cell border as it ignores the "|" character in
1748     # the cell arguments. A fix takes care of this and therefore the "|" has to be removed
1749     i = 0
1750     while i < len(document.body):
1751         h = document.body[i].find('leftline="true"', 0, len(document.body[i]))
1752         k = document.body[i].find("|>{", 0, len(document.body[i]))
1753         # the two tokens have to be in one line
1754         if h != -1 and k != -1:
1755             # delete the "|"
1756             document.body[i] = (
1757                 document.body[i][:k] + document.body[i][k + 1 : len(document.body[i])]
1758             )
1759         i = i + 1
1760
1761
1762 def revert_tableborder(document):
1763     i = 0
1764     while i < len(document.body):
1765         h = document.body[i].find('leftline="true"', 0, len(document.body[i]))
1766         k = document.body[i].find(">{", 0, len(document.body[i]))
1767         # the two tokens have to be in one line
1768         if h != -1 and k != -1:
1769             # add the "|"
1770             document.body[i] = document.body[i][:k] + "|" + document.body[i][k:]
1771         i = i + 1
1772
1773
1774 def revert_armenian(document):
1775     # set inputencoding from armscii8 to auto
1776     if document.inputencoding == "armscii8":
1777         i = find_token(document.header, "\\inputencoding", 0)
1778         if i != -1:
1779             document.header[i] = "\\inputencoding auto"
1780     # check if preamble exists, if not k is set to -1
1781     i = 0
1782     k = -1
1783     while i < len(document.preamble):
1784         if k == -1:
1785             k = document.preamble[i].find("\\", 0, len(document.preamble[i]))
1786         if k == -1:
1787             k = document.preamble[i].find("%", 0, len(document.preamble[i]))
1788         i = i + 1
1789     # add the entry \usepackage{armtex} to the document preamble
1790     if document.language == "armenian":
1791         # set the armtex entry as the first preamble line
1792         if k != -1:
1793             document.preamble[0:0] = ["\\usepackage{armtex}"]
1794         # create the preamble when it doesn't exist
1795         else:
1796             document.preamble.append("\\usepackage{armtex}")
1797     # Set document language from armenian to english
1798     if document.language == "armenian":
1799         document.language = "english"
1800         i = find_token(document.header, "\\language", 0)
1801         if i != -1:
1802             document.header[i] = "\\language english"
1803
1804
1805 def revert_CJK(document):
1806     "Set CJK encodings to default and languages chinese, japanese and korean to english."
1807     encodings = [
1808         "Bg5",
1809         "Bg5+",
1810         "GB",
1811         "GBt",
1812         "GBK",
1813         "JIS",
1814         "KS",
1815         "SJIS",
1816         "UTF8",
1817         "EUC-TW",
1818         "EUC-JP",
1819     ]
1820     i = find_token(document.header, "\\inputencoding", 0)
1821     if i == -1:
1822         document.header.append("\\inputencoding auto")
1823     else:
1824         inputenc = get_value(document.header, "\\inputencoding", i)
1825         if inputenc in encodings:
1826             document.header[i] = "\\inputencoding default"
1827     document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1828
1829     if (
1830         document.language == "chinese-simplified"
1831         or document.language == "chinese-traditional"
1832         or document.language == "japanese"
1833         or document.language == "korean"
1834     ):
1835         document.language = "english"
1836         i = find_token(document.header, "\\language", 0)
1837         if i != -1:
1838             document.header[i] = "\\language english"
1839
1840
1841 def revert_preamble_listings_params(document):
1842     r"Revert preamble option \listings_params"
1843     i = find_token(document.header, "\\listings_params", 0)
1844     if i != -1:
1845         document.preamble.append("\\usepackage{listings}")
1846         document.preamble.append("\\lstset{%s}" % document.header[i].split()[1].strip('"'))
1847         document.header.pop(i)
1848
1849
1850 def revert_listings_inset(document):
1851     r"""Revert listings inset to \lstinline or \begin, \end lstlisting, translate
1852     FROM
1853
1854     \begin_inset
1855     lstparams "language=Delphi"
1856     inline true
1857     status open
1858
1859     \begin_layout Standard
1860     var i = 10;
1861     \end_layout
1862
1863     \end_inset
1864
1865     TO
1866
1867     \begin_inset ERT
1868     status open
1869     \begin_layout Standard
1870
1871
1872     \backslash
1873     lstinline[language=Delphi]{var i = 10;}
1874     \end_layout
1875
1876     \end_inset
1877
1878     There can be an caption inset in this inset
1879
1880     \begin_layout Standard
1881     \begin_inset Caption
1882
1883     \begin_layout Standard
1884     before label
1885     \begin_inset LatexCommand label
1886     name "lst:caption"
1887
1888     \end_inset
1889
1890     after label
1891     \end_layout
1892
1893     \end_inset
1894
1895
1896     \end_layout
1897
1898     """
1899     i = 0
1900     while True:
1901         i = find_token(document.body, "\\begin_inset listings", i)
1902         if i == -1:
1903             break
1904         else:
1905             if not "\\usepackage{listings}" in document.preamble:
1906                 document.preamble.append("\\usepackage{listings}")
1907         j = find_end_of_inset(document.body, i + 1)
1908         if j == -1:
1909             # this should not happen
1910             break
1911         inline = "false"
1912         params = ""
1913         status = "open"
1914         # first three lines
1915         for line in range(i + 1, i + 4):
1916             if document.body[line].startswith("inline"):
1917                 inline = document.body[line].split()[1]
1918             if document.body[line].startswith("lstparams"):
1919                 params = document.body[line].split()[1].strip('"')
1920             if document.body[line].startswith("status"):
1921                 status = document.body[line].split()[1].strip()
1922                 k = line + 1
1923         # caption?
1924         caption = ""
1925         label = ""
1926         cap = find_token(document.body, "\\begin_inset Caption", i)
1927         if cap != -1:
1928             cap_end = find_end_of_inset(document.body, cap + 1)
1929             if cap_end == -1:
1930                 # this should not happen
1931                 break
1932             # label?
1933             lbl = find_token(document.body, "\\begin_inset LatexCommand label", cap + 1)
1934             if lbl != -1:
1935                 lbl_end = find_end_of_inset(document.body, lbl + 1)
1936                 if lbl_end == -1:
1937                     # this should not happen
1938                     break
1939             else:
1940                 lbl = cap_end
1941                 lbl_end = cap_end
1942             for line in document.body[lbl : lbl_end + 1]:
1943                 if line.startswith("name "):
1944                     label = line.split()[1].strip('"')
1945                     break
1946             for line in document.body[cap:lbl] + document.body[lbl_end + 1 : cap_end + 1]:
1947                 if not line.startswith("\\"):
1948                     caption += line.strip()
1949             k = cap_end + 1
1950         inlinecode = ""
1951         # looking for the oneline code for lstinline
1952         inlinecode = document.body[
1953             find_end_of_layout(
1954                 document.body,
1955                 find_token(document.body, "\\begin_layout %s" % document.default_layout, i + 1)
1956                 + 1,
1957             )
1958             - 1
1959         ]
1960         if len(caption) > 0:
1961             if len(params) == 0:
1962                 params = "caption={%s}" % caption
1963             else:
1964                 params += ",caption={%s}" % caption
1965         if len(label) > 0:
1966             if len(params) == 0:
1967                 params = "label={%s}" % label
1968             else:
1969                 params += ",label={%s}" % label
1970         if len(params) > 0:
1971             params = "[%s]" % params
1972             params = params.replace("\\", "\\backslash\n")
1973         if inline == "true":
1974             document.body[i : (j + 1)] = [
1975                 r"\begin_inset ERT",
1976                 "status %s" % status,
1977                 r"\begin_layout %s" % document.default_layout,
1978                 "",
1979                 "",
1980                 r"\backslash",
1981                 f"lstinline{params}{{{inlinecode}}}",
1982                 r"\end_layout",
1983                 "",
1984                 r"\end_inset",
1985             ]
1986         else:
1987             document.body[i : j + 1] = (
1988                 [
1989                     r"\begin_inset ERT",
1990                     "status %s" % status,
1991                     "",
1992                     r"\begin_layout %s" % document.default_layout,
1993                     "",
1994                     "",
1995                     r"\backslash",
1996                     r"begin{lstlisting}%s" % params,
1997                     r"\end_layout",
1998                     "",
1999                     r"\begin_layout %s" % document.default_layout,
2000                 ]
2001                 + document.body[k : j - 1]
2002                 + [
2003                     "",
2004                     r"\begin_layout %s" % document.default_layout,
2005                     "",
2006                     r"\backslash",
2007                     "end{lstlisting}",
2008                     r"\end_layout",
2009                     "",
2010                     r"\end_inset",
2011                 ]
2012             )
2013
2014
2015 def revert_include_listings(document):
2016     r"""Revert lstinputlisting Include option , translate
2017     \begin_inset Include \lstinputlisting{file}[opt]
2018     preview false
2019
2020     \end_inset
2021
2022     TO
2023
2024     \begin_inset ERT
2025     status open
2026
2027     \begin_layout Standard
2028
2029
2030     \backslash
2031     lstinputlisting{file}[opt]
2032     \end_layout
2033
2034     \end_inset
2035     """
2036
2037     i = 0
2038     while True:
2039         i = find_token(document.body, r"\begin_inset Include \lstinputlisting", i)
2040         if i == -1:
2041             break
2042         else:
2043             if not "\\usepackage{listings}" in document.preamble:
2044                 document.preamble.append("\\usepackage{listings}")
2045         j = find_end_of_inset(document.body, i + 1)
2046         if j == -1:
2047             # this should not happen
2048             break
2049         # find command line lstinputlisting{file}[options]
2050         cmd, file, option = "", "", ""
2051         if re.match(r"\\(lstinputlisting){([.\w]*)}(.*)", document.body[i].split()[2]):
2052             cmd, file, option = re.match(
2053                 r"\\(lstinputlisting){([.\w]*)}(.*)", document.body[i].split()[2]
2054             ).groups()
2055         option = option.replace("\\", "\\backslash\n")
2056         document.body[i : j + 1] = [
2057             r"\begin_inset ERT",
2058             "status open",
2059             "",
2060             r"\begin_layout %s" % document.default_layout,
2061             "",
2062             "",
2063             r"\backslash",
2064             f"{cmd}{option}{{{file}}}",
2065             r"\end_layout",
2066             "",
2067             r"\end_inset",
2068         ]
2069
2070
2071 def revert_ext_font_sizes(document):
2072     if document.backend != "latex":
2073         return
2074     if not document.textclass.startswith("ext"):
2075         return
2076
2077     fontsize = get_value(document.header, "\\paperfontsize", 0)
2078     if fontsize not in ("10", "11", "12"):
2079         return
2080     fontsize += "pt"
2081
2082     i = find_token(document.header, "\\paperfontsize", 0)
2083     document.header[i] = "\\paperfontsize default"
2084     insert_document_option(document, fontsize)
2085
2086
2087 def convert_ext_font_sizes(document):
2088     if document.backend != "latex":
2089         return
2090     if not document.textclass.startswith("ext"):
2091         return
2092
2093     fontsize = get_value(document.header, "\\paperfontsize", 0)
2094     if fontsize != "default":
2095         return
2096
2097     i = find_token(document.header, "\\options", 0)
2098     if i == -1:
2099         return
2100
2101     options = get_value(document.header, "\\options", i)
2102
2103     fontsizes = "10pt", "11pt", "12pt"
2104     for fs in fontsizes:
2105         if options.find(fs) != -1:
2106             break
2107     else:  # this else will only be attained if the for cycle had no match
2108         return
2109
2110     options = options.split(",")
2111     for j, opt in enumerate(options):
2112         if opt in fontsizes:
2113             fontsize = opt[:-2]
2114             del options[j]
2115             break
2116     else:
2117         return
2118
2119     k = find_token(document.header, "\\paperfontsize", 0)
2120     document.header[k] = "\\paperfontsize %s" % fontsize
2121
2122     if options:
2123         document.header[i] = "\\options %s" % ",".join(options)
2124     else:
2125         del document.header[i]
2126
2127
2128 def revert_separator_layout(document):
2129     r"""Revert --Separator-- to a lyx note
2130     From
2131
2132     \begin_layout --Separator--
2133     something
2134     \end_layout
2135
2136     to
2137
2138     \begin_layout Standard
2139     \begin_inset Note Note
2140     status open
2141
2142     \begin_layout Standard
2143     Separate Evironment
2144     \end_layout
2145
2146     \end_inset
2147     something
2148
2149     \end_layout
2150
2151     """
2152
2153     i = 0
2154     while True:
2155         i = find_token(document.body, r"\begin_layout --Separator--", i)
2156         if i == -1:
2157             break
2158         j = find_end_of_layout(document.body, i + 1)
2159         if j == -1:
2160             # this should not happen
2161             break
2162         document.body[i : j + 1] = (
2163             [
2164                 r"\begin_layout %s" % document.default_layout,
2165                 r"\begin_inset Note Note",
2166                 "status open",
2167                 "",
2168                 r"\begin_layout %s" % document.default_layout,
2169                 "Separate Environment",
2170                 r"\end_layout",
2171                 "",
2172                 r"\end_inset",
2173             ]
2174             + document.body[i + 1 : j]
2175             + ["", r"\end_layout"]
2176         )
2177
2178
2179 def convert_arabic(document):
2180     if document.language == "arabic":
2181         document.language = "arabic_arabtex"
2182         i = find_token(document.header, "\\language", 0)
2183         if i != -1:
2184             document.header[i] = "\\language arabic_arabtex"
2185     i = 0
2186     while i < len(document.body):
2187         h = document.body[i].find(r"\lang arabic", 0, len(document.body[i]))
2188         if h != -1:
2189             # change the language name
2190             document.body[i] = r"\lang arabic_arabtex"
2191         i = i + 1
2192
2193
2194 def revert_arabic(document):
2195     if document.language == "arabic_arabtex":
2196         document.language = "arabic"
2197         i = find_token(document.header, "\\language", 0)
2198         if i != -1:
2199             document.header[i] = "\\language arabic"
2200     i = 0
2201     while i < len(document.body):
2202         h = document.body[i].find(r"\lang arabic_arabtex", 0, len(document.body[i]))
2203         if h != -1:
2204             # change the language name
2205             document.body[i] = r"\lang arabic"
2206         i = i + 1
2207
2208
2209 ##
2210 # Conversion hub
2211 #
2212
2213 supported_versions = ["1.5.0", "1.5"]
2214 convert = [
2215     [246, []],
2216     [247, [convert_font_settings]],
2217     [248, []],
2218     [249, [convert_utf8]],
2219     [250, []],
2220     [251, []],
2221     [252, [convert_commandparams, convert_bibitem]],
2222     [253, []],
2223     [254, [convert_esint]],
2224     [255, []],
2225     [256, []],
2226     [257, [convert_caption]],
2227     [258, [convert_lyxline]],
2228     [259, [convert_accent, normalize_font_whitespace_259]],
2229     [260, []],
2230     [261, [convert_changes]],
2231     [262, []],
2232     [263, [normalize_language_name]],
2233     [264, [convert_cv_textclass]],
2234     [265, [convert_tableborder]],
2235     [266, []],
2236     [267, []],
2237     [268, []],
2238     [269, []],
2239     [270, []],
2240     [271, [convert_ext_font_sizes]],
2241     [272, []],
2242     [273, []],
2243     [274, [normalize_font_whitespace_274]],
2244     [275, [convert_graphics_rotation]],
2245     [276, [convert_arabic]],
2246 ]
2247
2248 revert = [
2249     [275, [revert_arabic]],
2250     [274, [revert_graphics_rotation]],
2251     [273, []],
2252     [272, [revert_separator_layout]],
2253     [
2254         271,
2255         [
2256             revert_preamble_listings_params,
2257             revert_listings_inset,
2258             revert_include_listings,
2259         ],
2260     ],
2261     [270, [revert_ext_font_sizes]],
2262     [269, [revert_beamer_alert, revert_beamer_structure]],
2263     [
2264         268,
2265         [
2266             revert_preamble_listings_params,
2267             revert_listings_inset,
2268             revert_include_listings,
2269         ],
2270     ],
2271     [267, [revert_CJK]],
2272     [266, [revert_utf8plain]],
2273     [265, [revert_armenian]],
2274     [264, [revert_tableborder]],
2275     [263, [revert_cv_textclass]],
2276     [262, [revert_language_name]],
2277     [261, [revert_ascii]],
2278     [260, []],
2279     [259, [revert_utf8x]],
2280     [258, []],
2281     [257, []],
2282     [256, [revert_caption]],
2283     [255, [revert_encodings]],
2284     [254, [revert_clearpage, revert_cleardoublepage]],
2285     [253, [revert_esint]],
2286     [252, [revert_nomenclature, revert_printnomenclature]],
2287     [251, [revert_commandparams]],
2288     [250, [revert_cs_label]],
2289     [249, []],
2290     [248, [revert_accent, revert_utf8, revert_unicode]],
2291     [247, [revert_booktabs]],
2292     [246, [revert_font_settings]],
2293     [245, [revert_framed]],
2294 ]
2295
2296
2297 if __name__ == "__main__":
2298     pass