1 # This file is part of lyx2lyx
2 # Copyright (C) 2006 José Matos <jamatos@lyx.org>
3 # Copyright (C) 2004-2006 Georg Baum <Georg.Baum@post.rwth-aachen.de>
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; either version 2
8 # of the License, or (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 """Convert files to the file format generated by lyx 1.5"""
26 from LyX import get_encoding
27 from lyx2lyx_tools import insert_document_option
28 from parser_tools import (
40 ####################################################################
41 # Private helper functions
44 def find_end_of_inset(lines, i):
45 "Find end of inset, where lines[i] is included."
46 return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
49 def find_end_of_layout(lines, i):
50 "Find end of layout, where lines[i] is included."
51 return find_end_of(lines, i, "\\begin_layout", "\\end_layout")
54 def find_beginning_of_layout(lines, i):
55 "Find beginning of layout, where lines[i] is included."
56 return find_beginning_of(lines, i, "\\begin_layout", "\\end_layout")
59 # End of helper functions
60 ####################################################################
64 # Notes: Framed/Shaded
68 def revert_framed(document):
69 "Revert framed notes."
73 document.body, ["\\begin_inset Note Framed", "\\begin_inset Note Shaded"], i
78 document.body[i] = "\\begin_inset Note"
90 "palatino": "palatino",
101 "palatino": "default",
104 "newcent": "default",
105 "bookman": "default",
109 "default": "default",
112 "palatino": "default",
115 "newcent": "default",
116 "bookman": "default",
117 "pslatex": "courier",
121 def convert_font_settings(document):
122 "Convert font settings."
124 i = find_token_exact(document.header, "\\fontscheme", i)
126 document.warning("Malformed LyX document: Missing `\\fontscheme'.")
128 font_scheme = get_value(document.header, "\\fontscheme", i, i + 1)
129 if font_scheme == "":
130 document.warning("Malformed LyX document: Empty `\\fontscheme'.")
131 font_scheme = "default"
132 if font_scheme not in list(roman_fonts.keys()):
133 document.warning("Malformed LyX document: Unknown `\\fontscheme' `%s'." % font_scheme)
134 font_scheme = "default"
135 document.header[i : i + 1] = [
136 "\\font_roman %s" % roman_fonts[font_scheme],
137 "\\font_sans %s" % sans_fonts[font_scheme],
138 "\\font_typewriter %s" % typewriter_fonts[font_scheme],
139 "\\font_default_family default",
142 "\\font_sf_scale 100",
143 "\\font_tt_scale 100",
147 def revert_font_settings(document):
148 "Revert font settings."
151 fonts = {"roman": "default", "sans": "default", "typewriter": "default"}
152 for family in "roman", "sans", "typewriter":
153 name = "\\font_%s" % family
154 i = find_token_exact(document.header, name, i)
156 document.warning("Malformed LyX document: Missing `%s'." % name)
161 fonts[family] = get_value(document.header, name, i, i + 1)
162 del document.header[i]
163 i = find_token_exact(document.header, "\\font_default_family", i)
165 document.warning("Malformed LyX document: Missing `\\font_default_family'.")
166 font_default_family = "default"
168 font_default_family = get_value(document.header, "\\font_default_family", i, i + 1)
169 del document.header[i]
170 i = find_token_exact(document.header, "\\font_sc", i)
172 document.warning("Malformed LyX document: Missing `\\font_sc'.")
175 font_sc = get_value(document.header, "\\font_sc", i, i + 1)
176 del document.header[i]
177 if font_sc != "false":
178 document.warning("Conversion of '\\font_sc' not yet implemented.")
179 i = find_token_exact(document.header, "\\font_osf", i)
181 document.warning("Malformed LyX document: Missing `\\font_osf'.")
184 font_osf = get_value(document.header, "\\font_osf", i, i + 1)
185 del document.header[i]
186 i = find_token_exact(document.header, "\\font_sf_scale", i)
188 document.warning("Malformed LyX document: Missing `\\font_sf_scale'.")
189 font_sf_scale = "100"
191 font_sf_scale = get_value(document.header, "\\font_sf_scale", i, i + 1)
192 del document.header[i]
193 if font_sf_scale != "100":
194 document.warning("Conversion of '\\font_sf_scale' not yet implemented.")
195 i = find_token_exact(document.header, "\\font_tt_scale", i)
197 document.warning("Malformed LyX document: Missing `\\font_tt_scale'.")
198 font_tt_scale = "100"
200 font_tt_scale = get_value(document.header, "\\font_tt_scale", i, i + 1)
201 del document.header[i]
202 if font_tt_scale != "100":
203 document.warning("Conversion of '\\font_tt_scale' not yet implemented.")
204 for font_scheme in list(roman_fonts.keys()):
206 roman_fonts[font_scheme] == fonts["roman"]
207 and sans_fonts[font_scheme] == fonts["sans"]
208 and typewriter_fonts[font_scheme] == fonts["typewriter"]
210 document.header.insert(insert_line, "\\fontscheme %s" % font_scheme)
211 if font_default_family != "default":
212 document.preamble.append(
213 "\\renewcommand{\\familydefault}{\\%s}" % font_default_family
215 if font_osf == "true":
216 document.warning("Ignoring `\\font_osf = true'")
218 font_scheme = "default"
219 document.header.insert(insert_line, "\\fontscheme %s" % font_scheme)
220 if fonts["roman"] == "cmr":
221 document.preamble.append("\\renewcommand{\\rmdefault}{cmr}")
222 if font_osf == "true":
223 document.preamble.append("\\usepackage{eco}")
225 for font in "lmodern", "charter", "utopia", "beraserif", "ccfonts", "chancery":
226 if fonts["roman"] == font:
227 document.preamble.append("\\usepackage{%s}" % font)
228 for font in "cmss", "lmss", "cmbr":
229 if fonts["sans"] == font:
230 document.preamble.append("\\renewcommand{\\sfdefault}{%s}" % font)
231 for font in "berasans":
232 if fonts["sans"] == font:
233 document.preamble.append("\\usepackage{%s}" % font)
234 for font in "cmtt", "lmtt", "cmtl":
235 if fonts["typewriter"] == font:
236 document.preamble.append("\\renewcommand{\\ttdefault}{%s}" % font)
237 for font in "courier", "beramono", "luximono":
238 if fonts["typewriter"] == font:
239 document.preamble.append("\\usepackage{%s}" % font)
240 if font_default_family != "default":
241 document.preamble.append("\\renewcommand{\\familydefault}{\\%s}" % font_default_family)
242 if font_osf == "true":
243 document.warning("Ignoring `\\font_osf = true'")
246 def revert_booktabs(document):
247 "We remove the booktabs flag or everything else will become a mess."
248 re_row = re.compile(r'^<row.*space="[^"]+".*>$')
249 re_tspace = re.compile(r'\s+topspace="[^"]+"')
250 re_bspace = re.compile(r'\s+bottomspace="[^"]+"')
251 re_ispace = re.compile(r'\s+interlinespace="[^"]+"')
254 i = find_token(document.body, "\\begin_inset Tabular", i)
257 j = find_end_of_inset(document.body, i + 1)
259 document.warning("Malformed LyX document: Could not find end of tabular.")
261 for k in range(i, j):
262 if re.search('^<features.* booktabs="true".*>$', document.body[k]):
263 document.warning("Converting 'booktabs' table to normal table.")
264 document.body[k] = document.body[k].replace(' booktabs="true"', "")
265 if re.search(re_row, document.body[k]):
266 document.warning("Removing extra row space.")
267 document.body[k] = re_tspace.sub("", document.body[k])
268 document.body[k] = re_bspace.sub("", document.body[k])
269 document.body[k] = re_ispace.sub("", document.body[k])
273 def convert_multiencoding(document, forward):
274 """Fix files with multiple encodings.
275 Files with an inputencoding of "auto" or "default" and multiple languages
276 where at least two languages have different default encodings are encoded
277 in multiple encodings for file formats < 249. These files are incorrectly
278 read and written (as if the whole file was in the encoding of the main
280 This is not true for files written by CJK-LyX, they are always in the locale
284 - converts from fake unicode values to true unicode if forward is true, and
285 - converts from true unicode values to fake unicode if forward is false.
286 document.encoding must be set to the old value (format 248) in both cases.
288 We do this here and not in LyX.py because it is far easier to do the
289 necessary parsing in modern formats than in ancient ones.
291 inset_types = ["Foot", "Note"]
292 if document.cjk_encoding != "":
294 encoding_stack = [document.encoding]
296 lang_re = re.compile(r"^\\lang\s(\S+)")
297 inset_re = re.compile(r"^\\begin_inset\s(\S+)")
298 if not forward: # no need to read file unless we are reverting
299 spec_chars = read_unicodesymbols()
301 if document.inputencoding == "auto" or document.inputencoding == "default":
303 while i < len(document.body):
304 result = lang_re.match(document.body[i])
306 language = result.group(1)
307 if language == "default":
309 f"Resetting encoding from {encoding_stack[-1]} to {document.encoding}.",
312 encoding_stack[-1] = document.encoding
314 from lyx2lyx_lang import lang
317 f"Setting encoding from {encoding_stack[-1]} to {lang[language][3]}.",
320 encoding_stack[-1] = lang[language][3]
321 elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
322 document.warning("Adding nested encoding %s." % encoding_stack[-1], 3)
323 if len(insets) > 0 and insets[-1] in inset_types:
324 from lyx2lyx_lang import lang
326 encoding_stack.append(lang[document.language][3])
328 encoding_stack.append(encoding_stack[-1])
329 elif find_token(document.body, "\\end_layout", i, i + 1) == i:
330 document.warning("Removing nested encoding %s." % encoding_stack[-1], 3)
331 if len(encoding_stack) == 1:
332 # Don't remove the document encoding from the stack
333 document.warning("Malformed LyX document: Unexpected `\\end_layout'.")
335 del encoding_stack[-1]
336 elif find_token(document.body, "\\begin_inset", i, i + 1) == i:
337 inset_result = inset_re.match(document.body[i])
339 insets.append(inset_result.group(1))
342 elif find_token(document.body, "\\end_inset", i, i + 1) == i:
344 if encoding_stack[-1] != document.encoding:
346 # This line has been incorrectly interpreted as if it was
347 # encoded in 'encoding'.
348 # Convert back to the 8bit string that was in the file.
349 orig = document.body[i].encode(document.encoding)
350 # Convert the 8bit string that was in the file to unicode
351 # with the correct encoding.
352 document.body[i] = orig.decode(encoding_stack[-1])
355 # Convert unicode to the 8bit string that will be written
356 # to the file with the correct encoding.
357 orig = document.body[i].encode(encoding_stack[-1])
358 # Convert the 8bit string that will be written to the
359 # file to fake unicode with the encoding that will later
360 # be used when writing to the file.
361 document.body[i] = orig.decode(document.encoding)
363 mod_line = revert_unicode_line(document, i, insets, spec_chars)
364 document.body[i : i + 1] = mod_line.split("\n")
365 i += len(mod_line.split("\n")) - 1
369 def convert_utf8(document):
370 "Set document encoding to UTF-8."
371 convert_multiencoding(document, True)
372 document.encoding = "utf8"
375 def revert_utf8(document):
376 "Set document encoding to the value corresponding to inputencoding."
377 i = find_token(document.header, "\\inputencoding", 0)
379 document.header.append("\\inputencoding auto")
380 elif get_value(document.header, "\\inputencoding", i) == "utf8":
381 document.header[i] = "\\inputencoding auto"
382 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
383 document.encoding = get_encoding(
384 document.language, document.inputencoding, 248, document.cjk_encoding
386 convert_multiencoding(document, False)
389 # FIXME: Use the version in unicode_symbols.py which has some bug fixes
390 def read_unicodesymbols():
391 "Read the unicodesymbols list of unicode characters and corresponding commands."
392 pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
393 fp = open(os.path.join(pathname.strip("lyx2lyx"), "unicodesymbols"))
395 for line in fp.readlines():
397 line = line.replace(' "', " ") # remove all quotation marks with spaces before
398 line = line.replace('" ', " ") # remove all quotation marks with spaces after
399 line = line.replace(r"\"", '"') # replace \" by " (for characters with diaeresis)
401 # flag1 and flag2 are preamble and other flags
402 [ucs4, command, flag1, flag2] = line.split(None, 3)
403 spec_chars[chr(eval(ucs4))] = [command, flag1, flag2]
410 def revert_unicode_line(document, i, insets, spec_chars, replacement_character="???"):
411 # Define strings to start and end ERT and math insets
413 "\n\n\\begin_inset ERT\nstatus collapsed\n\\begin_layout %s" % document.default_layout
415 ert_outro = "\n\\end_layout\n\n\\end_inset\n"
416 math_intro = "\n\\begin_inset Formula $"
417 math_outro = "$\n\\end_inset"
420 if i and not is_inset_line(document, i - 1):
421 last_char = document.body[i - 1][-1:]
425 line = document.body[i]
426 for character in line:
428 # Try to write the character
429 dummy = character.encode(document.encoding)
430 mod_line += character
431 last_char = character
433 # Try to replace with ERT/math inset
434 if character in spec_chars:
435 command = spec_chars[character][0] # the command to replace unicode
436 flag1 = spec_chars[character][1]
437 flag2 = spec_chars[character][2]
438 if flag1.find("combining") > -1 or flag2.find("combining") > -1:
439 # We have a character that should be combined with the previous
440 command += "{" + last_char + "}"
441 # Remove the last character. Ignore if it is whitespace
442 if len(last_char.rstrip()):
443 # last_char was found and is not whitespace
445 mod_line = mod_line[:-1]
446 else: # last_char belongs to the last line
447 document.body[i - 1] = document.body[i - 1][:-1]
449 # The last character was replaced by a command. For now it is
450 # ignored. This could be handled better.
452 if command[0:2] == "\\\\":
453 if command[2:12] == "ensuremath":
454 if insets and insets[-1] == "ERT":
456 command = command.replace("\\\\ensuremath{\\\\", "$\n\\backslash\n")
457 command = command.replace("}", "$\n")
458 elif not insets or insets[-1] != "Formula":
459 # add a math inset with the replacement character
460 command = command.replace("\\\\ensuremath{\\", math_intro)
461 command = command.replace("}", math_outro)
463 # we are already in a math inset
464 command = command.replace("\\\\ensuremath{\\", "")
465 command = command.replace("}", "")
467 if insets and insets[-1] == "Formula":
468 # avoid putting an ERT in a math; instead put command as text
469 command = command.replace("\\\\", r"\mathrm{")
470 command = command + "}"
471 elif not insets or insets[-1] != "ERT":
472 # add an ERT inset with the replacement character
473 command = command.replace("\\\\", "\n\\backslash\n")
474 command = ert_intro + command + ert_outro
476 command = command.replace("\\\\", "\n\\backslash\n")
477 last_char = "" # indicate that the character should not be removed
480 # Replace with replacement string
481 mod_line += replacement_character
485 def revert_unicode(document):
486 """Transform unicode characters that can not be written using the
487 document encoding to commands according to the unicodesymbols
488 file. Characters that can not be replaced by commands are replaced by
489 an replacement string. Flags other than 'combined' are currently not
491 spec_chars = read_unicodesymbols()
492 insets = [] # list of active insets
494 # Go through the document to capture all combining characters
496 while i < len(document.body):
497 line = document.body[i]
499 if line.find("\\begin_inset") > -1:
500 insets.append(line[13:].split()[0])
501 if line.find("\\end_inset") > -1:
504 # Try to write the line
506 # If all goes well the line is written here
507 dummy = line.encode(document.encoding)
510 # Error, some character(s) in the line need to be replaced
511 mod_line = revert_unicode_line(document, i, insets, spec_chars)
512 document.body[i : i + 1] = mod_line.split("\n")
513 i += len(mod_line.split("\n"))
516 def revert_cs_label(document):
517 "Remove status flag of charstyle label."
520 i = find_token(document.body, "\\begin_inset CharStyle", i)
523 # Seach for a line starting 'show_label'
524 # If it is not there, break with a warning message
527 if document.body[i][:10] == "show_label":
530 elif document.body[i][:13] == "\\begin_layout":
531 document.warning("Malformed LyX document: Missing 'show_label'.")
538 def convert_bibitem(document):
540 \bibitem [option]{argument}
544 \begin_inset LatexCommand bibitem
550 This must be called after convert_commandparams.
554 i = find_token(document.body, "\\bibitem", i)
557 j = document.body[i].find("[") + 1
558 k = document.body[i].rfind("]")
559 if j == 0: # No optional argument found
562 option = document.body[i][j:k]
563 j = document.body[i].rfind("{") + 1
564 k = document.body[i].rfind("}")
565 argument = document.body[i][j:k]
566 lines = ["\\begin_inset LatexCommand bibitem"]
568 lines.append('label "%s"' % option.replace('"', '\\"'))
569 lines.append('key "%s"' % argument.replace('"', '\\"'))
571 lines.append("\\end_inset")
572 document.body[i : i + 1] = lines
576 commandparams_info = {
577 # command : [option1, option2, argument]
578 "bibitem": ["label", "", "key"],
579 "bibtex": ["options", "btprint", "bibfiles"],
580 "cite": ["after", "before", "key"],
581 "citet": ["after", "before", "key"],
582 "citep": ["after", "before", "key"],
583 "citealt": ["after", "before", "key"],
584 "citealp": ["after", "before", "key"],
585 "citeauthor": ["after", "before", "key"],
586 "citeyear": ["after", "before", "key"],
587 "citeyearpar": ["after", "before", "key"],
588 "citet*": ["after", "before", "key"],
589 "citep*": ["after", "before", "key"],
590 "citealt*": ["after", "before", "key"],
591 "citealp*": ["after", "before", "key"],
592 "citeauthor*": ["after", "before", "key"],
593 "Citet": ["after", "before", "key"],
594 "Citep": ["after", "before", "key"],
595 "Citealt": ["after", "before", "key"],
596 "Citealp": ["after", "before", "key"],
597 "Citeauthor": ["after", "before", "key"],
598 "Citet*": ["after", "before", "key"],
599 "Citep*": ["after", "before", "key"],
600 "Citealt*": ["after", "before", "key"],
601 "Citealp*": ["after", "before", "key"],
602 "Citeauthor*": ["after", "before", "key"],
603 "citefield": ["after", "before", "key"],
604 "citetitle": ["after", "before", "key"],
605 "cite*": ["after", "before", "key"],
606 "hfill": ["", "", ""],
607 "index": ["", "", "name"],
608 "printindex": ["", "", "name"],
609 "label": ["", "", "name"],
610 "eqref": ["name", "", "reference"],
611 "pageref": ["name", "", "reference"],
612 "prettyref": ["name", "", "reference"],
613 "ref": ["name", "", "reference"],
614 "vpageref": ["name", "", "reference"],
615 "vref": ["name", "", "reference"],
616 "tableofcontents": ["", "", "type"],
617 "htmlurl": ["name", "", "target"],
618 "url": ["name", "", "target"],
622 def convert_commandparams(document):
625 \\begin_inset LatexCommand \\cmdname[opt1][opt2]{arg}
630 \\begin_inset LatexCommand cmdname
636 name1, name2 and name3 can be different for each command.
638 # \begin_inset LatexCommand bibitem was not the official version (see
639 # convert_bibitem()), but could be read in, so we convert it here, too.
643 i = find_token(document.body, "\\begin_inset LatexCommand", i)
646 command = document.body[i][26:].strip()
648 document.warning("Malformed LyX document: Missing LatexCommand name.")
652 j = find_token(document.body, "\\end_inset", i + 1)
654 document.warning("Malformed document")
656 command += "".join(document.body[i + 1 : j])
657 document.body[i + 1 : j] = []
659 # The following parser is taken from the original InsetCommandParams::scanCommand
665 # Used to handle things like \command[foo[bar]]{foo{bar}}
670 (state == "CMDNAME" and c == " ")
671 or (state == "CMDNAME" and c == "[")
672 or (state == "CMDNAME" and c == "{")
676 (state == "OPTION" and c == "]")
677 or (state == "SECOPTION" and c == "]")
678 or (state == "CONTENT" and c == "}")
683 nestdepth = nestdepth - 1
685 (state == "OPTION" and c == "[")
686 or (state == "SECOPTION" and c == "[")
687 or (state == "CONTENT" and c == "{")
689 nestdepth = nestdepth + 1
690 if state == "CMDNAME":
692 elif state == "OPTION":
694 elif state == "SECOPTION":
696 elif state == "CONTENT":
701 elif c == "[" and b != "]":
703 nestdepth = 0 # Just to be sure
704 elif c == "[" and b == "]":
706 nestdepth = 0 # Just to be sure
709 nestdepth = 0 # Just to be sure
712 # Now we have parsed the command, output the parameters
713 lines = ["\\begin_inset LatexCommand %s" % name]
715 if commandparams_info[name][0] == "":
716 document.warning(f"Ignoring invalid option `{option1}' of command `{name}'.")
720 commandparams_info[name][0],
721 option1.replace("\\", "\\\\").replace('"', '\\"'),
725 if commandparams_info[name][1] == "":
727 f"Ignoring invalid second option `{option2}' of command `{name}'."
732 commandparams_info[name][1],
733 option2.replace("\\", "\\\\").replace('"', '\\"'),
737 if commandparams_info[name][2] == "":
738 document.warning(f"Ignoring invalid argument `{argument}' of command `{name}'.")
742 commandparams_info[name][2],
743 argument.replace("\\", "\\\\").replace('"', '\\"'),
746 document.body[i : i + 1] = lines
750 def revert_commandparams(document):
751 regex = re.compile(r"(\S+)\s+(.+)")
754 i = find_token(document.body, "\\begin_inset LatexCommand", i)
757 name = document.body[i].split()[2]
758 j = find_end_of_inset(document.body, i)
763 for k in range(i + 1, j):
764 match = re.match(regex, document.body[k])
766 pname = match.group(1)
767 pvalue = match.group(2)
768 if pname == "preview":
769 preview_line = document.body[k]
770 elif commandparams_info[name][0] != "" and pname == commandparams_info[name][0]:
771 option1 = pvalue.strip('"').replace('\\"', '"').replace("\\\\", "\\")
772 elif commandparams_info[name][1] != "" and pname == commandparams_info[name][1]:
773 option2 = pvalue.strip('"').replace('\\"', '"').replace("\\\\", "\\")
774 elif commandparams_info[name][2] != "" and pname == commandparams_info[name][2]:
775 argument = pvalue.strip('"').replace('\\"', '"').replace("\\\\", "\\")
776 elif document.body[k].strip() != "":
778 f"Ignoring unknown contents `{document.body[k]}' in command inset {name}."
780 if name == "bibitem":
782 lines = ["\\bibitem {%s}" % argument]
784 lines = [f"\\bibitem [{option1}]{{{argument}}}"]
788 lines = [f"\\begin_inset LatexCommand \\{name}{{{argument}}}"]
790 lines = [f"\\begin_inset LatexCommand \\{name}[][{option2}]{{{argument}}}"]
793 lines = [f"\\begin_inset LatexCommand \\{name}[{option1}]{{{argument}}}"]
796 f"\\begin_inset LatexCommand \\{name}[{option1}][{option2}]{{{argument}}}"
798 if name != "bibitem":
799 if preview_line != "":
800 lines.append(preview_line)
802 lines.append("\\end_inset")
803 document.body[i : j + 1] = lines
807 def revert_nomenclature(document):
808 "Convert nomenclature entry to ERT."
809 regex = re.compile(r"(\S+)\s+(.+)")
813 i = find_token(document.body, "\\begin_inset LatexCommand nomenclature", i)
817 j = find_end_of_inset(document.body, i + 1)
822 for k in range(i + 1, j):
823 match = re.match(regex, document.body[k])
825 name = match.group(1)
826 value = match.group(2)
827 if name == "preview":
828 preview_line = document.body[k]
829 elif name == "symbol":
830 symbol = value.strip('"').replace('\\"', '"')
831 elif name == "description":
832 description = value.strip('"').replace('\\"', '"')
833 elif name == "prefix":
834 prefix = value.strip('"').replace('\\"', '"')
835 elif document.body[k].strip() != "":
837 "Ignoring unknown contents `%s' in nomenclature inset." % document.body[k]
840 command = f"nomenclature{{{symbol}}}{{{description}}}"
842 command = f"nomenclature[{prefix}]{{{symbol}}}{{{description}}}"
843 document.body[i : j + 1] = [
847 "\\begin_layout %s" % document.default_layout,
859 and find_token(document.preamble, "\\usepackage{nomencl}[2005/09/22]", 0) == -1
861 document.preamble.append("\\usepackage{nomencl}[2005/09/22]")
862 document.preamble.append("\\makenomenclature")
865 def revert_printnomenclature(document):
866 "Convert printnomenclature to ERT."
867 regex = re.compile(r"(\S+)\s+(.+)")
871 i = find_token(document.body, "\\begin_inset LatexCommand printnomenclature", i)
875 j = find_end_of_inset(document.body, i + 1)
878 for k in range(i + 1, j):
879 match = re.match(regex, document.body[k])
881 name = match.group(1)
882 value = match.group(2)
883 if name == "preview":
884 preview_line = document.body[k]
885 elif name == "labelwidth":
886 labelwidth = value.strip('"').replace('\\"', '"')
887 elif document.body[k].strip() != "":
889 "Ignoring unknown contents `%s' in printnomenclature inset."
893 command = "nomenclature{}"
895 command = "nomenclature[%s]" % labelwidth
896 document.body[i : j + 1] = [
900 "\\begin_layout %s" % document.default_layout,
912 and find_token(document.preamble, "\\usepackage{nomencl}[2005/09/22]", 0) == -1
914 document.preamble.append("\\usepackage{nomencl}[2005/09/22]")
915 document.preamble.append("\\makenomenclature")
918 def convert_esint(document):
919 "Add \\use_esint setting to header."
920 i = find_token(document.header, "\\cite_engine", 0)
922 document.warning("Malformed LyX document: Missing `\\cite_engine'.")
924 # 0 is off, 1 is auto, 2 is on.
925 document.header.insert(i, "\\use_esint 0")
928 def revert_esint(document):
929 "Remove \\use_esint setting from header."
930 i = find_token(document.header, "\\use_esint", 0)
932 document.warning("Malformed LyX document: Missing `\\use_esint'.")
934 use_esint = document.header[i].split()[1]
935 del document.header[i]
936 # 0 is off, 1 is auto, 2 is on.
938 document.preamble.append("\\usepackage{esint}")
941 def revert_clearpage(document):
945 i = find_token(document.body, "\\clearpage", i)
948 document.body[i : i + 1] = [
952 "\\begin_layout %s" % document.default_layout,
964 def revert_cleardoublepage(document):
965 "cleardoublepage -> ERT"
968 i = find_token(document.body, "\\cleardoublepage", i)
971 document.body[i : i + 1] = [
975 "\\begin_layout %s" % document.default_layout,
987 def convert_lyxline(document):
988 r"remove fontsize commands for \lyxline"
989 # The problematic is: The old \lyxline definition doesn't handle the fontsize
990 # to change the line thickness. The new definiton does this so that imported
991 # \lyxlines would have a different line thickness. The eventual fontsize command
992 # before \lyxline is therefore removed to get the same output.
1005 for n in range(0, len(fontsizes)):
1008 while i < len(document.body):
1009 i = find_token(document.body, "\\size " + fontsizes[n], i)
1010 k = find_token(document.body, "\\lyxline", i)
1011 # the corresponding fontsize command is always 2 lines before the \lyxline
1012 if i != -1 and k == i + 2:
1013 document.body[i : i + 1] = []
1019 def revert_encodings(document):
1020 "Set new encodings to auto."
1041 i = find_token(document.header, "\\inputencoding", 0)
1043 document.header.append("\\inputencoding auto")
1045 inputenc = get_value(document.header, "\\inputencoding", i)
1046 if inputenc in encodings:
1047 document.header[i] = "\\inputencoding auto"
1048 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1051 def convert_caption(document):
1052 "Convert caption layouts to caption insets."
1055 i = find_token(document.body, "\\begin_layout Caption", i)
1058 j = find_end_of_layout(document.body, i)
1060 document.warning("Malformed LyX document: Missing `\\end_layout'.")
1063 document.body[j:j] = ["\\end_layout", "", "\\end_inset", "", ""]
1064 document.body[i : i + 1] = [
1065 "\\begin_layout %s" % document.default_layout,
1066 "\\begin_inset Caption",
1068 "\\begin_layout %s" % document.default_layout,
1073 def revert_caption(document):
1074 "Convert caption insets to caption layouts."
1075 " This assumes that the text class has a caption style. "
1078 i = find_token(document.body, "\\begin_inset Caption", i)
1082 # We either need to delete the previous \begin_layout line, or we
1083 # need to end the previous layout if this inset is not in the first
1084 # position of the paragraph.
1085 layout_before = find_token_backwards(document.body, "\\begin_layout", i)
1086 if layout_before == -1:
1087 document.warning("Malformed LyX document: Missing `\\begin_layout'.")
1089 layout_line = document.body[layout_before]
1090 del_layout_before = True
1091 l = layout_before + 1
1093 if document.body[l] != "":
1094 del_layout_before = False
1097 if del_layout_before:
1098 del document.body[layout_before:i]
1101 document.body[i:i] = ["\\end_layout", ""]
1104 # Find start of layout in the inset and end of inset
1105 j = find_token(document.body, "\\begin_layout", i)
1107 document.warning("Malformed LyX document: Missing `\\begin_layout'.")
1109 k = find_end_of_inset(document.body, i)
1111 document.warning("Malformed LyX document: Missing `\\end_inset'.")
1114 # We either need to delete the following \end_layout line, or we need
1115 # to restart the old layout if this inset is not at the paragraph end.
1116 layout_after = find_token(document.body, "\\end_layout", k)
1117 if layout_after == -1:
1118 document.warning("Malformed LyX document: Missing `\\end_layout'.")
1120 del_layout_after = True
1122 while l < layout_after:
1123 if document.body[l] != "":
1124 del_layout_after = False
1127 if del_layout_after:
1128 del document.body[k + 1 : layout_after + 1]
1130 document.body[k + 1 : k + 1] = [layout_line, ""]
1132 # delete \begin_layout and \end_inset and replace \begin_inset with
1133 # "\begin_layout Caption". This works because we can only have one
1134 # paragraph in the caption inset: The old \end_layout will be recycled.
1135 del document.body[k]
1136 if document.body[k] == "":
1137 del document.body[k]
1138 del document.body[j]
1139 if document.body[j] == "":
1140 del document.body[j]
1141 document.body[i] = "\\begin_layout Caption"
1142 if document.body[i + 1] == "":
1143 del document.body[i + 1]
1147 # Accents of InsetLaTeXAccent
1149 "`": "\u0300", # grave
1150 "'": "\u0301", # acute
1151 "^": "\u0302", # circumflex
1152 "~": "\u0303", # tilde
1153 "=": "\u0304", # macron
1154 "u": "\u0306", # breve
1155 ".": "\u0307", # dot above
1156 '"': "\u0308", # diaeresis
1157 "r": "\u030a", # ring above
1158 "H": "\u030b", # double acute
1159 "v": "\u030c", # caron
1160 "b": "\u0320", # minus sign below
1161 "d": "\u0323", # dot below
1162 "c": "\u0327", # cedilla
1163 "k": "\u0328", # ogonek
1164 "t": "\u0361", # tie. This is special: It spans two characters, but
1165 # only one is given as argument, so we don't need to
1166 # treat it differently.
1170 # special accents of InsetLaTeXAccent without argument
1171 special_accent_map = {
1172 "i": "\u0131", # dotless i
1173 "j": "\u0237", # dotless j
1174 "l": "\u0142", # l with stroke
1175 "L": "\u0141", # L with stroke
1179 # special accent arguments of InsetLaTeXAccent
1181 "\\i": "\u0131", # dotless i
1182 "\\j": "\u0237", # dotless j
1186 def _convert_accent(accent, accented_char):
1188 char = accented_char
1190 if type in special_accent_map:
1191 return special_accent_map[type]
1192 # a missing char is treated as space by LyX
1194 elif type == "q" and char in ["t", "d", "l", "L"]:
1195 # Special caron, only used with t, d, l and L.
1196 # It is not in the map because we convert it to the same unicode
1197 # character as the normal caron: \q{} is only defined if babel with
1198 # the czech or slovak language is used, and the normal caron
1199 # produces the correct output if the T1 font encoding is used.
1200 # For the same reason we never convert to \q{} in the other direction.
1202 elif char in accented_map:
1203 char = accented_map[char]
1205 # We can only convert accents on a single char
1207 a = accent_map.get(type)
1209 return unicodedata.normalize("NFC", f"{char}{a}")
1213 def convert_ertbackslash(body, i, ert, default_layout):
1214 r"""-------------------------------------------------------------------------------------------
1215 Convert backslashes and '\n' into valid ERT code, append the converted
1216 text to body[i] and return the (maybe incremented) line index i"""
1220 body[i] = body[i] + "\\backslash "
1224 body[i + 1 : i + 1] = [
1227 "\\begin_layout %s" % default_layout,
1232 body[i] = body[i] + c
1236 def convert_accent(document):
1237 # The following forms are supported by LyX:
1238 # '\i \"{a}' (standard form, as written by LyX)
1239 # '\i \"{}' (standard form, as written by LyX if the accented char is a space)
1240 # '\i \"{ }' (also accepted if the accented char is a space)
1241 # '\i \" a' (also accepted)
1242 # '\i \"' (also accepted)
1243 re_wholeinset = re.compile(r"^(.*)(\\i\s+)(.*)$")
1244 re_contents = re.compile(r"^([^\s{]+)(.*)$")
1245 re_accentedcontents = re.compile(r"^\s*{?([^{}]*)}?\s*$")
1248 i = find_re(document.body, re_wholeinset, i)
1251 match = re_wholeinset.match(document.body[i])
1252 prefix = match.group(1)
1253 contents = match.group(3).strip()
1254 match = re_contents.match(contents)
1256 # Strip first char (always \)
1257 accent = match.group(1)[1:]
1258 accented_contents = match.group(2).strip()
1259 match = re_accentedcontents.match(accented_contents)
1260 accented_char = match.group(1)
1261 converted = _convert_accent(accent, accented_char)
1263 # Normalize contents
1264 contents = (f"{accent}{{{accented_char}}}",)
1266 document.body[i] = f"{prefix}{converted}"
1269 document.warning("Converting unknown InsetLaTeXAccent `\\i %s' to ERT." % contents)
1270 document.body[i] = prefix
1271 document.body[i + 1 : i + 1] = [
1272 "\\begin_inset ERT",
1275 "\\begin_layout %s" % document.default_layout,
1280 i = convert_ertbackslash(
1281 document.body, i + 7, "\\%s" % contents, document.default_layout
1283 document.body[i + 1 : i + 1] = ["\\end_layout", "", "\\end_inset"]
1287 def is_inset_line(document, i):
1288 """Line i of body has an inset"""
1289 if document.body[i][:1] == "\\":
1291 last_tokens = "".join(document.body[i].split()[-2:])
1292 return last_tokens.find("\\") != -1
1295 # A wrapper around normalize that handles special cases (cf. bug 3313)
1296 def normalize(form, text):
1297 # do not normalize OHM, ANGSTROM
1298 keep_characters = [0x2126, 0x212B]
1302 if ord(i) in keep_characters:
1303 if len(convert) > 0:
1304 result = result + unicodedata.normalize(form, convert)
1308 convert = convert + i
1309 if len(convert) > 0:
1310 result = result + unicodedata.normalize(form, convert)
1314 def revert_accent(document):
1315 inverse_accent_map = {}
1316 for k in accent_map:
1317 inverse_accent_map[accent_map[k]] = k
1318 inverse_special_accent_map = {}
1319 for k in special_accent_map:
1320 inverse_special_accent_map[special_accent_map[k]] = k
1321 inverse_accented_map = {}
1322 for k in accented_map:
1323 inverse_accented_map[accented_map[k]] = k
1325 # Since LyX may insert a line break within a word we must combine all
1326 # words before unicode normalization.
1327 # We do this only if the next line starts with an accent, otherwise we
1328 # would create things like '\begin_inset ERTstatus'.
1329 for i in range(len(document.body) - 1):
1330 if document.body[i] == "" or document.body[i + 1] == "" or document.body[i][-1] == " ":
1332 if document.body[i + 1][0] in inverse_accent_map and not is_inset_line(document, i):
1333 # the last character of this line and the first of the next line
1334 # form probably a surrogate pair, inline insets are excluded (second part of the test)
1335 while len(document.body[i + 1]) > 0 and document.body[i + 1][0] != " ":
1336 document.body[i] += document.body[i + 1][0]
1337 document.body[i + 1] = document.body[i + 1][1:]
1339 # Normalize to "Normal form D" (NFD, also known as canonical decomposition).
1340 # This is needed to catch all accented characters.
1341 for i in range(len(document.body)):
1342 # Unfortunately we have a mixture of unicode strings and plain strings,
1343 # because we never use u'xxx' for string literals, but 'xxx'.
1344 # Therefore we may have to try two times to normalize the data.
1346 document.body[i] = normalize("NFD", document.body[i])
1348 document.body[i] = normalize("NFD", str(document.body[i], "utf-8"))
1350 # Replace accented characters with InsetLaTeXAccent
1351 # Do not convert characters that can be represented in the chosen
1354 get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)
1356 lang_re = re.compile(r"^\\lang\s(\S+)")
1359 while i < len(document.body):
1361 document.inputencoding == "auto" or document.inputencoding == "default"
1362 ) and document.cjk_encoding != "":
1363 # Track the encoding of the current line
1364 result = lang_re.match(document.body[i])
1366 language = result.group(1)
1367 if language == "default":
1368 encoding_stack[-1] = document.encoding
1370 from lyx2lyx_lang import lang
1372 encoding_stack[-1] = lang[language][3]
1374 elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
1375 encoding_stack.append(encoding_stack[-1])
1377 elif find_token(document.body, "\\end_layout", i, i + 1) == i:
1378 del encoding_stack[-1]
1381 for j in range(len(document.body[i])):
1382 # dotless i and dotless j are both in special_accent_map and can
1383 # occur as an accented character, so we need to test that the
1384 # following character is no accent
1385 if document.body[i][j] in inverse_special_accent_map and (
1386 j == len(document.body[i]) - 1
1387 or document.body[i][j + 1] not in inverse_accent_map
1389 accent = document.body[i][j]
1391 dummy = accent.encode(encoding_stack[-1])
1392 except UnicodeEncodeError:
1393 # Insert the rest of the line as new line
1394 if j < len(document.body[i]) - 1:
1395 document.body.insert(i + 1, document.body[i][j + 1 :])
1396 # Delete the accented character
1397 document.body[i] = document.body[i][:j]
1398 # Finally add the InsetLaTeXAccent
1399 document.body[i] += "\\i \\%s{}" % inverse_special_accent_map[accent]
1401 elif j > 0 and document.body[i][j] in inverse_accent_map:
1402 accented_char = document.body[i][j - 1]
1403 if accented_char == " ":
1404 # Conform to LyX output
1406 elif accented_char in inverse_accented_map:
1407 accented_char = inverse_accented_map[accented_char]
1408 accent = document.body[i][j]
1410 dummy = normalize("NFC", accented_char + accent).encode(encoding_stack[-1])
1411 except UnicodeEncodeError:
1412 # Insert the rest of the line as new line
1413 if j < len(document.body[i]) - 1:
1414 document.body.insert(i + 1, document.body[i][j + 1 :])
1415 # Delete the accented characters
1416 document.body[i] = document.body[i][: j - 1]
1417 # Finally add the InsetLaTeXAccent
1418 document.body[i] += f"\\i \\{inverse_accent_map[accent]}{{{accented_char}}}"
1422 # Normalize to "Normal form C" (NFC, pre-composed characters) again
1423 for i in range(len(document.body)):
1424 document.body[i] = normalize("NFC", document.body[i])
1427 def normalize_font_whitespace_259(document):
1428 """Before format 259 the font changes were ignored if a
1429 whitespace was the first or last character in the sequence, this function
1430 transfers the whitespace outside."""
1433 "\\series": "default",
1434 "\\emph": "default",
1436 "\\shape": "default",
1438 "\\family": "default",
1440 return normalize_font_whitespace(document, char_properties)
1443 def normalize_font_whitespace_274(document):
1444 """Before format 259 (sic) the font changes were ignored if a
1445 whitespace was the first or last character in the sequence. This was
1446 corrected for most font properties in format 259, but the language
1447 was forgotten then. This function applies the same conversion done
1448 there (namely, transfers the whitespace outside) for font language
1449 changes, as well."""
1451 char_properties = {"\\lang": "default"}
1452 return normalize_font_whitespace(document, char_properties)
1455 def get_paragraph_language(document, i):
1456 """Return the language of the paragraph in which line i of the document
1457 body is. If the first thing in the paragraph is a \\lang command, that
1458 is the paragraph's langauge; otherwise, the paragraph's language is the
1459 document's language."""
1461 lines = document.body
1463 first_nonempty_line = find_nonempty_line(lines, find_beginning_of_layout(lines, i) + 1)
1465 words = lines[first_nonempty_line].split()
1467 if len(words) > 1 and words[0] == "\\lang":
1470 return document.language
1473 def normalize_font_whitespace(document, char_properties):
1474 """Before format 259 the font changes were ignored if a
1475 whitespace was the first or last character in the sequence, this function
1476 transfers the whitespace outside. Only a change in one of the properties
1477 in the provided char_properties is handled by this function."""
1479 if document.backend != "latex":
1482 lines = document.body
1487 while i < len(lines):
1488 words = lines[i].split()
1490 if len(words) > 0 and words[0] == "\\begin_layout":
1491 # a new paragraph resets all font changes
1493 # also reset the default language to be the paragraph's language
1494 if "\\lang" in list(char_properties.keys()):
1495 char_properties["\\lang"] = get_paragraph_language(document, i + 1)
1497 elif len(words) > 1 and words[0] in list(char_properties.keys()):
1498 # we have a font change
1499 if char_properties[words[0]] == words[1]:
1500 # property gets reset
1501 if words[0] in list(changes.keys()):
1502 del changes[words[0]]
1503 defaultproperty = True
1506 changes[words[0]] = words[1]
1507 defaultproperty = False
1509 # We need to explicitly reset all changed properties if we find
1510 # a space below, because LyX 1.4 would output the space after
1511 # closing the previous change and before starting the new one,
1512 # and closing a font change means to close all properties, not
1513 # just the changed one.
1515 if lines[i - 1] and lines[i - 1][-1] == " ":
1516 lines[i - 1] = lines[i - 1][:-1]
1517 # a space before the font change
1519 for k in list(changes.keys()):
1520 # exclude property k because that is already in lines[i]
1522 added_lines[1:1] = [f"{k} {changes[k]}"]
1523 for k in list(changes.keys()):
1524 # exclude property k because that must be added below anyway
1526 added_lines[0:0] = [f"{k} {char_properties[k]}"]
1528 # Property is reset in lines[i], so add the new stuff afterwards
1529 lines[i + 1 : i + 1] = added_lines
1531 # Reset property for the space
1532 added_lines[0:0] = [f"{words[0]} {char_properties[words[0]]}"]
1533 lines[i:i] = added_lines
1534 i = i + len(added_lines)
1538 and lines[i + 1][0] == " "
1539 and (len(changes) > 0 or not defaultproperty)
1541 # a space after the font change
1542 if lines[i + 1] == " " and lines[i + 2]:
1543 next_words = lines[i + 2].split()
1544 if len(next_words) > 0 and next_words[0] == words[0]:
1545 # a single blank with a property different from the
1546 # previous and the next line must not be changed
1549 lines[i + 1] = lines[i + 1][1:]
1551 for k in list(changes.keys()):
1552 # exclude property k because that is already in lines[i]
1554 added_lines[1:1] = [f"{k} {changes[k]}"]
1555 for k in list(changes.keys()):
1556 # exclude property k because that must be added below anyway
1558 added_lines[0:0] = [f"{k} {char_properties[k]}"]
1559 # Reset property for the space
1560 added_lines[0:0] = [f"{words[0]} {char_properties[words[0]]}"]
1561 lines[i:i] = added_lines
1562 i = i + len(added_lines)
1567 def revert_utf8x(document):
1568 "Set utf8x encoding to utf8."
1569 i = find_token(document.header, "\\inputencoding", 0)
1571 document.header.append("\\inputencoding auto")
1573 inputenc = get_value(document.header, "\\inputencoding", i)
1574 if inputenc == "utf8x":
1575 document.header[i] = "\\inputencoding utf8"
1576 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1579 def revert_utf8plain(document):
1580 "Set utf8plain encoding to utf8."
1581 i = find_token(document.header, "\\inputencoding", 0)
1583 document.header.append("\\inputencoding auto")
1585 inputenc = get_value(document.header, "\\inputencoding", i)
1586 if inputenc == "utf8-plain":
1587 document.header[i] = "\\inputencoding utf8"
1588 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1591 def revert_beamer_alert(document):
1592 "Revert beamer's \\alert inset back to ERT."
1595 i = find_token(document.body, "\\begin_inset CharStyle Alert", i)
1598 document.body[i] = "\\begin_inset ERT"
1601 if document.body[i][:13] == "\\begin_layout":
1602 # Insert the \alert command
1603 document.body[i + 1] = "\\alert{" + document.body[i + 1] + "}"
1610 def revert_beamer_structure(document):
1611 "Revert beamer's \\structure inset back to ERT."
1614 i = find_token(document.body, "\\begin_inset CharStyle Structure", i)
1617 document.body[i] = "\\begin_inset ERT"
1620 if document.body[i][:13] == "\\begin_layout":
1621 document.body[i + 1] = "\\structure{" + document.body[i + 1] + "}"
1628 def convert_changes(document):
1629 "Switch output_changes off if tracking_changes is off."
1630 i = find_token(document.header, "\\tracking_changes", 0)
1632 document.warning("Malformed lyx document: Missing '\\tracking_changes'.")
1634 j = find_token(document.header, "\\output_changes", 0)
1636 document.warning("Malformed lyx document: Missing '\\output_changes'.")
1638 tracking_changes = get_value(document.header, "\\tracking_changes", i)
1639 output_changes = get_value(document.header, "\\output_changes", j)
1640 if tracking_changes == "false" and output_changes == "true":
1641 document.header[j] = "\\output_changes false"
1644 def revert_ascii(document):
1645 "Set ascii encoding to auto."
1646 i = find_token(document.header, "\\inputencoding", 0)
1648 document.header.append("\\inputencoding auto")
1650 inputenc = get_value(document.header, "\\inputencoding", i)
1651 if inputenc == "ascii":
1652 document.header[i] = "\\inputencoding auto"
1653 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1656 def normalize_language_name(document):
1657 lang = {"brazil": "brazilian", "portuges": "portuguese"}
1659 if document.language in lang:
1660 document.language = lang[document.language]
1661 i = find_token(document.header, "\\language", 0)
1662 document.header[i] = "\\language %s" % document.language
1665 def revert_language_name(document):
1666 lang = {"brazilian": "brazil", "portuguese": "portuges"}
1668 if document.language in lang:
1669 document.language = lang[document.language]
1670 i = find_token(document.header, "\\language", 0)
1671 document.header[i] = "\\language %s" % document.language
1675 # \textclass cv -> \textclass simplecv
1676 def convert_cv_textclass(document):
1677 if document.textclass == "cv":
1678 document.textclass = "simplecv"
1681 def revert_cv_textclass(document):
1682 if document.textclass == "simplecv":
1683 document.textclass = "cv"
1687 # add scaleBeforeRotation graphics param
1688 def convert_graphics_rotation(document):
1689 "add scaleBeforeRotation graphics parameter."
1692 i = find_token(document.body, "\\begin_inset Graphics", i)
1695 j = find_end_of_inset(document.body, i + 1)
1698 document.warning("Malformed LyX document: Could not find end of graphics inset.")
1699 # Seach for rotateAngle and width or height or scale
1700 # If these params are not there, nothing needs to be done.
1701 k = find_token(document.body, "\trotateAngle", i + 1, j)
1702 l = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j)
1703 if k != -1 and l != -1:
1704 document.body.insert(j, "scaleBeforeRotation")
1709 # remove scaleBeforeRotation graphics param
1710 def revert_graphics_rotation(document):
1711 "remove scaleBeforeRotation graphics parameter."
1714 i = find_token(document.body, "\\begin_inset Graphics", i)
1717 j = find_end_of_inset(document.body, i + 1)
1720 document.warning("Malformed LyX document: Could not find end of graphics inset.")
1721 # If there's a scaleBeforeRotation param, just remove that
1722 k = find_token(document.body, "\tscaleBeforeRotation", i + 1, j)
1724 del document.body[k]
1726 # if not, and if we have rotateAngle and width or height or scale,
1727 # we have to put the rotateAngle value to special
1728 rotateAngle = get_value(document.body, "rotateAngle", i + 1, j)
1729 special = get_value(document.body, "special", i + 1, j)
1730 if rotateAngle != "":
1731 k = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j)
1735 document.body.insert(j - 1, "\tspecial angle=%s" % rotateAngle)
1737 l = find_token(document.body, "\tspecial", i + 1, j)
1738 document.body[l] = document.body[l].replace(
1739 special, f"angle={rotateAngle},{special}"
1741 k = find_token(document.body, "\trotateAngle", i + 1, j)
1743 del document.body[k]
1747 def convert_tableborder(document):
1748 # The problem is: LyX doubles the table cell border as it ignores the "|" character in
1749 # the cell arguments. A fix takes care of this and therefore the "|" has to be removed
1751 while i < len(document.body):
1752 h = document.body[i].find('leftline="true"', 0, len(document.body[i]))
1753 k = document.body[i].find("|>{", 0, len(document.body[i]))
1754 # the two tokens have to be in one line
1755 if h != -1 and k != -1:
1757 document.body[i] = (
1758 document.body[i][:k] + document.body[i][k + 1 : len(document.body[i])]
1763 def revert_tableborder(document):
1765 while i < len(document.body):
1766 h = document.body[i].find('leftline="true"', 0, len(document.body[i]))
1767 k = document.body[i].find(">{", 0, len(document.body[i]))
1768 # the two tokens have to be in one line
1769 if h != -1 and k != -1:
1771 document.body[i] = document.body[i][:k] + "|" + document.body[i][k:]
1775 def revert_armenian(document):
1776 # set inputencoding from armscii8 to auto
1777 if document.inputencoding == "armscii8":
1778 i = find_token(document.header, "\\inputencoding", 0)
1780 document.header[i] = "\\inputencoding auto"
1781 # check if preamble exists, if not k is set to -1
1784 while i < len(document.preamble):
1786 k = document.preamble[i].find("\\", 0, len(document.preamble[i]))
1788 k = document.preamble[i].find("%", 0, len(document.preamble[i]))
1790 # add the entry \usepackage{armtex} to the document preamble
1791 if document.language == "armenian":
1792 # set the armtex entry as the first preamble line
1794 document.preamble[0:0] = ["\\usepackage{armtex}"]
1795 # create the preamble when it doesn't exist
1797 document.preamble.append("\\usepackage{armtex}")
1798 # Set document language from armenian to english
1799 if document.language == "armenian":
1800 document.language = "english"
1801 i = find_token(document.header, "\\language", 0)
1803 document.header[i] = "\\language english"
1806 def revert_CJK(document):
1807 "Set CJK encodings to default and languages chinese, japanese and korean to english."
1821 i = find_token(document.header, "\\inputencoding", 0)
1823 document.header.append("\\inputencoding auto")
1825 inputenc = get_value(document.header, "\\inputencoding", i)
1826 if inputenc in encodings:
1827 document.header[i] = "\\inputencoding default"
1828 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1831 document.language == "chinese-simplified"
1832 or document.language == "chinese-traditional"
1833 or document.language == "japanese"
1834 or document.language == "korean"
1836 document.language = "english"
1837 i = find_token(document.header, "\\language", 0)
1839 document.header[i] = "\\language english"
1842 def revert_preamble_listings_params(document):
1843 r"Revert preamble option \listings_params"
1844 i = find_token(document.header, "\\listings_params", 0)
1846 document.preamble.append("\\usepackage{listings}")
1847 document.preamble.append("\\lstset{%s}" % document.header[i].split()[1].strip('"'))
1848 document.header.pop(i)
1851 def revert_listings_inset(document):
1852 r"""Revert listings inset to \lstinline or \begin, \end lstlisting, translate
1856 lstparams "language=Delphi"
1860 \begin_layout Standard
1870 \begin_layout Standard
1874 lstinline[language=Delphi]{var i = 10;}
1879 There can be an caption inset in this inset
1881 \begin_layout Standard
1882 \begin_inset Caption
1884 \begin_layout Standard
1886 \begin_inset LatexCommand label
1902 i = find_token(document.body, "\\begin_inset listings", i)
1906 if "\\usepackage{listings}" not in document.preamble:
1907 document.preamble.append("\\usepackage{listings}")
1908 j = find_end_of_inset(document.body, i + 1)
1910 # this should not happen
1916 for line in range(i + 1, i + 4):
1917 if document.body[line].startswith("inline"):
1918 inline = document.body[line].split()[1]
1919 if document.body[line].startswith("lstparams"):
1920 params = document.body[line].split()[1].strip('"')
1921 if document.body[line].startswith("status"):
1922 status = document.body[line].split()[1].strip()
1927 cap = find_token(document.body, "\\begin_inset Caption", i)
1929 cap_end = find_end_of_inset(document.body, cap + 1)
1931 # this should not happen
1934 lbl = find_token(document.body, "\\begin_inset LatexCommand label", cap + 1)
1936 lbl_end = find_end_of_inset(document.body, lbl + 1)
1938 # this should not happen
1943 for line in document.body[lbl : lbl_end + 1]:
1944 if line.startswith("name "):
1945 label = line.split()[1].strip('"')
1947 for line in document.body[cap:lbl] + document.body[lbl_end + 1 : cap_end + 1]:
1948 if not line.startswith("\\"):
1949 caption += line.strip()
1952 # looking for the oneline code for lstinline
1953 inlinecode = document.body[
1956 find_token(document.body, "\\begin_layout %s" % document.default_layout, i + 1)
1961 if len(caption) > 0:
1962 if len(params) == 0:
1963 params = "caption={%s}" % caption
1965 params += ",caption={%s}" % caption
1967 if len(params) == 0:
1968 params = "label={%s}" % label
1970 params += ",label={%s}" % label
1972 params = "[%s]" % params
1973 params = params.replace("\\", "\\backslash\n")
1974 if inline == "true":
1975 document.body[i : (j + 1)] = [
1976 r"\begin_inset ERT",
1977 "status %s" % status,
1978 r"\begin_layout %s" % document.default_layout,
1982 f"lstinline{params}{{{inlinecode}}}",
1988 document.body[i : j + 1] = (
1990 r"\begin_inset ERT",
1991 "status %s" % status,
1993 r"\begin_layout %s" % document.default_layout,
1997 r"begin{lstlisting}%s" % params,
2000 r"\begin_layout %s" % document.default_layout,
2002 + document.body[k : j - 1]
2005 r"\begin_layout %s" % document.default_layout,
2016 def revert_include_listings(document):
2017 r"""Revert lstinputlisting Include option , translate
2018 \begin_inset Include \lstinputlisting{file}[opt]
2028 \begin_layout Standard
2032 lstinputlisting{file}[opt]
2040 i = find_token(document.body, r"\begin_inset Include \lstinputlisting", i)
2044 if "\\usepackage{listings}" not in document.preamble:
2045 document.preamble.append("\\usepackage{listings}")
2046 j = find_end_of_inset(document.body, i + 1)
2048 # this should not happen
2050 # find command line lstinputlisting{file}[options]
2051 cmd, file, option = "", "", ""
2052 if re.match(r"\\(lstinputlisting){([.\w]*)}(.*)", document.body[i].split()[2]):
2053 cmd, file, option = re.match(
2054 r"\\(lstinputlisting){([.\w]*)}(.*)", document.body[i].split()[2]
2056 option = option.replace("\\", "\\backslash\n")
2057 document.body[i : j + 1] = [
2058 r"\begin_inset ERT",
2061 r"\begin_layout %s" % document.default_layout,
2065 f"{cmd}{option}{{{file}}}",
2072 def revert_ext_font_sizes(document):
2073 if document.backend != "latex":
2075 if not document.textclass.startswith("ext"):
2078 fontsize = get_value(document.header, "\\paperfontsize", 0)
2079 if fontsize not in ("10", "11", "12"):
2083 i = find_token(document.header, "\\paperfontsize", 0)
2084 document.header[i] = "\\paperfontsize default"
2085 insert_document_option(document, fontsize)
2088 def convert_ext_font_sizes(document):
2089 if document.backend != "latex":
2091 if not document.textclass.startswith("ext"):
2094 fontsize = get_value(document.header, "\\paperfontsize", 0)
2095 if fontsize != "default":
2098 i = find_token(document.header, "\\options", 0)
2102 options = get_value(document.header, "\\options", i)
2104 fontsizes = "10pt", "11pt", "12pt"
2105 for fs in fontsizes:
2106 if options.find(fs) != -1:
2108 else: # this else will only be attained if the for cycle had no match
2111 options = options.split(",")
2112 for j, opt in enumerate(options):
2113 if opt in fontsizes:
2120 k = find_token(document.header, "\\paperfontsize", 0)
2121 document.header[k] = "\\paperfontsize %s" % fontsize
2124 document.header[i] = "\\options %s" % ",".join(options)
2126 del document.header[i]
2129 def revert_separator_layout(document):
2130 r"""Revert --Separator-- to a lyx note
2133 \begin_layout --Separator--
2139 \begin_layout Standard
2140 \begin_inset Note Note
2143 \begin_layout Standard
2156 i = find_token(document.body, r"\begin_layout --Separator--", i)
2159 j = find_end_of_layout(document.body, i + 1)
2161 # this should not happen
2163 document.body[i : j + 1] = (
2165 r"\begin_layout %s" % document.default_layout,
2166 r"\begin_inset Note Note",
2169 r"\begin_layout %s" % document.default_layout,
2170 "Separate Environment",
2175 + document.body[i + 1 : j]
2176 + ["", r"\end_layout"]
2180 def convert_arabic(document):
2181 if document.language == "arabic":
2182 document.language = "arabic_arabtex"
2183 i = find_token(document.header, "\\language", 0)
2185 document.header[i] = "\\language arabic_arabtex"
2187 while i < len(document.body):
2188 h = document.body[i].find(r"\lang arabic", 0, len(document.body[i]))
2190 # change the language name
2191 document.body[i] = r"\lang arabic_arabtex"
2195 def revert_arabic(document):
2196 if document.language == "arabic_arabtex":
2197 document.language = "arabic"
2198 i = find_token(document.header, "\\language", 0)
2200 document.header[i] = "\\language arabic"
2202 while i < len(document.body):
2203 h = document.body[i].find(r"\lang arabic_arabtex", 0, len(document.body[i]))
2205 # change the language name
2206 document.body[i] = r"\lang arabic"
2214 supported_versions = ["1.5.0", "1.5"]
2217 [247, [convert_font_settings]],
2219 [249, [convert_utf8]],
2222 [252, [convert_commandparams, convert_bibitem]],
2224 [254, [convert_esint]],
2227 [257, [convert_caption]],
2228 [258, [convert_lyxline]],
2229 [259, [convert_accent, normalize_font_whitespace_259]],
2231 [261, [convert_changes]],
2233 [263, [normalize_language_name]],
2234 [264, [convert_cv_textclass]],
2235 [265, [convert_tableborder]],
2241 [271, [convert_ext_font_sizes]],
2244 [274, [normalize_font_whitespace_274]],
2245 [275, [convert_graphics_rotation]],
2246 [276, [convert_arabic]],
2250 [275, [revert_arabic]],
2251 [274, [revert_graphics_rotation]],
2253 [272, [revert_separator_layout]],
2257 revert_preamble_listings_params,
2258 revert_listings_inset,
2259 revert_include_listings,
2262 [270, [revert_ext_font_sizes]],
2263 [269, [revert_beamer_alert, revert_beamer_structure]],
2267 revert_preamble_listings_params,
2268 revert_listings_inset,
2269 revert_include_listings,
2272 [267, [revert_CJK]],
2273 [266, [revert_utf8plain]],
2274 [265, [revert_armenian]],
2275 [264, [revert_tableborder]],
2276 [263, [revert_cv_textclass]],
2277 [262, [revert_language_name]],
2278 [261, [revert_ascii]],
2280 [259, [revert_utf8x]],
2283 [256, [revert_caption]],
2284 [255, [revert_encodings]],
2285 [254, [revert_clearpage, revert_cleardoublepage]],
2286 [253, [revert_esint]],
2287 [252, [revert_nomenclature, revert_printnomenclature]],
2288 [251, [revert_commandparams]],
2289 [250, [revert_cs_label]],
2291 [248, [revert_accent, revert_utf8, revert_unicode]],
2292 [247, [revert_booktabs]],
2293 [246, [revert_font_settings]],
2294 [245, [revert_framed]],
2298 if __name__ == "__main__":