1 # This file is part of lyx2lyx
2 # Copyright (C) 2006 José Matos <jamatos@lyx.org>
3 # Copyright (C) 2004-2006 Georg Baum <Georg.Baum@post.rwth-aachen.de>
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; either version 2
8 # of the License, or (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 """Convert files to the file format generated by lyx 1.5"""
25 from parser_tools import (
36 from lyx2lyx_tools import insert_document_option
37 from LyX import get_encoding
39 ####################################################################
40 # Private helper functions
43 def find_end_of_inset(lines, i):
44 "Find end of inset, where lines[i] is included."
45 return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
48 def find_end_of_layout(lines, i):
49 "Find end of layout, where lines[i] is included."
50 return find_end_of(lines, i, "\\begin_layout", "\\end_layout")
53 def find_beginning_of_layout(lines, i):
54 "Find beginning of layout, where lines[i] is included."
55 return find_beginning_of(lines, i, "\\begin_layout", "\\end_layout")
58 # End of helper functions
59 ####################################################################
63 # Notes: Framed/Shaded
67 def revert_framed(document):
68 "Revert framed notes."
72 document.body, ["\\begin_inset Note Framed", "\\begin_inset Note Shaded"], i
77 document.body[i] = "\\begin_inset Note"
89 "palatino": "palatino",
100 "palatino": "default",
103 "newcent": "default",
104 "bookman": "default",
108 "default": "default",
111 "palatino": "default",
114 "newcent": "default",
115 "bookman": "default",
116 "pslatex": "courier",
120 def convert_font_settings(document):
121 "Convert font settings."
123 i = find_token_exact(document.header, "\\fontscheme", i)
125 document.warning("Malformed LyX document: Missing `\\fontscheme'.")
127 font_scheme = get_value(document.header, "\\fontscheme", i, i + 1)
128 if font_scheme == "":
129 document.warning("Malformed LyX document: Empty `\\fontscheme'.")
130 font_scheme = "default"
131 if not font_scheme in list(roman_fonts.keys()):
132 document.warning("Malformed LyX document: Unknown `\\fontscheme' `%s'." % font_scheme)
133 font_scheme = "default"
134 document.header[i : i + 1] = [
135 "\\font_roman %s" % roman_fonts[font_scheme],
136 "\\font_sans %s" % sans_fonts[font_scheme],
137 "\\font_typewriter %s" % typewriter_fonts[font_scheme],
138 "\\font_default_family default",
141 "\\font_sf_scale 100",
142 "\\font_tt_scale 100",
146 def revert_font_settings(document):
147 "Revert font settings."
150 fonts = {"roman": "default", "sans": "default", "typewriter": "default"}
151 for family in "roman", "sans", "typewriter":
152 name = "\\font_%s" % family
153 i = find_token_exact(document.header, name, i)
155 document.warning("Malformed LyX document: Missing `%s'." % name)
160 fonts[family] = get_value(document.header, name, i, i + 1)
161 del document.header[i]
162 i = find_token_exact(document.header, "\\font_default_family", i)
164 document.warning("Malformed LyX document: Missing `\\font_default_family'.")
165 font_default_family = "default"
167 font_default_family = get_value(document.header, "\\font_default_family", i, i + 1)
168 del document.header[i]
169 i = find_token_exact(document.header, "\\font_sc", i)
171 document.warning("Malformed LyX document: Missing `\\font_sc'.")
174 font_sc = get_value(document.header, "\\font_sc", i, i + 1)
175 del document.header[i]
176 if font_sc != "false":
177 document.warning("Conversion of '\\font_sc' not yet implemented.")
178 i = find_token_exact(document.header, "\\font_osf", i)
180 document.warning("Malformed LyX document: Missing `\\font_osf'.")
183 font_osf = get_value(document.header, "\\font_osf", i, i + 1)
184 del document.header[i]
185 i = find_token_exact(document.header, "\\font_sf_scale", i)
187 document.warning("Malformed LyX document: Missing `\\font_sf_scale'.")
188 font_sf_scale = "100"
190 font_sf_scale = get_value(document.header, "\\font_sf_scale", i, i + 1)
191 del document.header[i]
192 if font_sf_scale != "100":
193 document.warning("Conversion of '\\font_sf_scale' not yet implemented.")
194 i = find_token_exact(document.header, "\\font_tt_scale", i)
196 document.warning("Malformed LyX document: Missing `\\font_tt_scale'.")
197 font_tt_scale = "100"
199 font_tt_scale = get_value(document.header, "\\font_tt_scale", i, i + 1)
200 del document.header[i]
201 if font_tt_scale != "100":
202 document.warning("Conversion of '\\font_tt_scale' not yet implemented.")
203 for font_scheme in list(roman_fonts.keys()):
205 roman_fonts[font_scheme] == fonts["roman"]
206 and sans_fonts[font_scheme] == fonts["sans"]
207 and typewriter_fonts[font_scheme] == fonts["typewriter"]
209 document.header.insert(insert_line, "\\fontscheme %s" % font_scheme)
210 if font_default_family != "default":
211 document.preamble.append(
212 "\\renewcommand{\\familydefault}{\\%s}" % font_default_family
214 if font_osf == "true":
215 document.warning("Ignoring `\\font_osf = true'")
217 font_scheme = "default"
218 document.header.insert(insert_line, "\\fontscheme %s" % font_scheme)
219 if fonts["roman"] == "cmr":
220 document.preamble.append("\\renewcommand{\\rmdefault}{cmr}")
221 if font_osf == "true":
222 document.preamble.append("\\usepackage{eco}")
224 for font in "lmodern", "charter", "utopia", "beraserif", "ccfonts", "chancery":
225 if fonts["roman"] == font:
226 document.preamble.append("\\usepackage{%s}" % font)
227 for font in "cmss", "lmss", "cmbr":
228 if fonts["sans"] == font:
229 document.preamble.append("\\renewcommand{\\sfdefault}{%s}" % font)
230 for font in "berasans":
231 if fonts["sans"] == font:
232 document.preamble.append("\\usepackage{%s}" % font)
233 for font in "cmtt", "lmtt", "cmtl":
234 if fonts["typewriter"] == font:
235 document.preamble.append("\\renewcommand{\\ttdefault}{%s}" % font)
236 for font in "courier", "beramono", "luximono":
237 if fonts["typewriter"] == font:
238 document.preamble.append("\\usepackage{%s}" % font)
239 if font_default_family != "default":
240 document.preamble.append("\\renewcommand{\\familydefault}{\\%s}" % font_default_family)
241 if font_osf == "true":
242 document.warning("Ignoring `\\font_osf = true'")
245 def revert_booktabs(document):
246 "We remove the booktabs flag or everything else will become a mess."
247 re_row = re.compile(r'^<row.*space="[^"]+".*>$')
248 re_tspace = re.compile(r'\s+topspace="[^"]+"')
249 re_bspace = re.compile(r'\s+bottomspace="[^"]+"')
250 re_ispace = re.compile(r'\s+interlinespace="[^"]+"')
253 i = find_token(document.body, "\\begin_inset Tabular", i)
256 j = find_end_of_inset(document.body, i + 1)
258 document.warning("Malformed LyX document: Could not find end of tabular.")
260 for k in range(i, j):
261 if re.search('^<features.* booktabs="true".*>$', document.body[k]):
262 document.warning("Converting 'booktabs' table to normal table.")
263 document.body[k] = document.body[k].replace(' booktabs="true"', "")
264 if re.search(re_row, document.body[k]):
265 document.warning("Removing extra row space.")
266 document.body[k] = re_tspace.sub("", document.body[k])
267 document.body[k] = re_bspace.sub("", document.body[k])
268 document.body[k] = re_ispace.sub("", document.body[k])
272 def convert_multiencoding(document, forward):
273 """Fix files with multiple encodings.
274 Files with an inputencoding of "auto" or "default" and multiple languages
275 where at least two languages have different default encodings are encoded
276 in multiple encodings for file formats < 249. These files are incorrectly
277 read and written (as if the whole file was in the encoding of the main
279 This is not true for files written by CJK-LyX, they are always in the locale
283 - converts from fake unicode values to true unicode if forward is true, and
284 - converts from true unicode values to fake unicode if forward is false.
285 document.encoding must be set to the old value (format 248) in both cases.
287 We do this here and not in LyX.py because it is far easier to do the
288 necessary parsing in modern formats than in ancient ones.
290 inset_types = ["Foot", "Note"]
291 if document.cjk_encoding != "":
293 encoding_stack = [document.encoding]
295 lang_re = re.compile(r"^\\lang\s(\S+)")
296 inset_re = re.compile(r"^\\begin_inset\s(\S+)")
297 if not forward: # no need to read file unless we are reverting
298 spec_chars = read_unicodesymbols()
300 if document.inputencoding == "auto" or document.inputencoding == "default":
302 while i < len(document.body):
303 result = lang_re.match(document.body[i])
305 language = result.group(1)
306 if language == "default":
308 f"Resetting encoding from {encoding_stack[-1]} to {document.encoding}.",
311 encoding_stack[-1] = document.encoding
313 from lyx2lyx_lang import lang
316 f"Setting encoding from {encoding_stack[-1]} to {lang[language][3]}.",
319 encoding_stack[-1] = lang[language][3]
320 elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
321 document.warning("Adding nested encoding %s." % encoding_stack[-1], 3)
322 if len(insets) > 0 and insets[-1] in inset_types:
323 from lyx2lyx_lang import lang
325 encoding_stack.append(lang[document.language][3])
327 encoding_stack.append(encoding_stack[-1])
328 elif find_token(document.body, "\\end_layout", i, i + 1) == i:
329 document.warning("Removing nested encoding %s." % encoding_stack[-1], 3)
330 if len(encoding_stack) == 1:
331 # Don't remove the document encoding from the stack
332 document.warning("Malformed LyX document: Unexpected `\\end_layout'.")
334 del encoding_stack[-1]
335 elif find_token(document.body, "\\begin_inset", i, i + 1) == i:
336 inset_result = inset_re.match(document.body[i])
338 insets.append(inset_result.group(1))
341 elif find_token(document.body, "\\end_inset", i, i + 1) == i:
343 if encoding_stack[-1] != document.encoding:
345 # This line has been incorrectly interpreted as if it was
346 # encoded in 'encoding'.
347 # Convert back to the 8bit string that was in the file.
348 orig = document.body[i].encode(document.encoding)
349 # Convert the 8bit string that was in the file to unicode
350 # with the correct encoding.
351 document.body[i] = orig.decode(encoding_stack[-1])
354 # Convert unicode to the 8bit string that will be written
355 # to the file with the correct encoding.
356 orig = document.body[i].encode(encoding_stack[-1])
357 # Convert the 8bit string that will be written to the
358 # file to fake unicode with the encoding that will later
359 # be used when writing to the file.
360 document.body[i] = orig.decode(document.encoding)
362 mod_line = revert_unicode_line(document, i, insets, spec_chars)
363 document.body[i : i + 1] = mod_line.split("\n")
364 i += len(mod_line.split("\n")) - 1
368 def convert_utf8(document):
369 "Set document encoding to UTF-8."
370 convert_multiencoding(document, True)
371 document.encoding = "utf8"
374 def revert_utf8(document):
375 "Set document encoding to the value corresponding to inputencoding."
376 i = find_token(document.header, "\\inputencoding", 0)
378 document.header.append("\\inputencoding auto")
379 elif get_value(document.header, "\\inputencoding", i) == "utf8":
380 document.header[i] = "\\inputencoding auto"
381 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
382 document.encoding = get_encoding(
383 document.language, document.inputencoding, 248, document.cjk_encoding
385 convert_multiencoding(document, False)
388 # FIXME: Use the version in unicode_symbols.py which has some bug fixes
389 def read_unicodesymbols():
390 "Read the unicodesymbols list of unicode characters and corresponding commands."
391 pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
392 fp = open(os.path.join(pathname.strip("lyx2lyx"), "unicodesymbols"))
394 for line in fp.readlines():
396 line = line.replace(' "', " ") # remove all quotation marks with spaces before
397 line = line.replace('" ', " ") # remove all quotation marks with spaces after
398 line = line.replace(r"\"", '"') # replace \" by " (for characters with diaeresis)
400 # flag1 and flag2 are preamble and other flags
401 [ucs4, command, flag1, flag2] = line.split(None, 3)
402 spec_chars[chr(eval(ucs4))] = [command, flag1, flag2]
409 def revert_unicode_line(document, i, insets, spec_chars, replacement_character="???"):
410 # Define strings to start and end ERT and math insets
412 "\n\n\\begin_inset ERT\nstatus collapsed\n\\begin_layout %s" % document.default_layout
414 ert_outro = "\n\\end_layout\n\n\\end_inset\n"
415 math_intro = "\n\\begin_inset Formula $"
416 math_outro = "$\n\\end_inset"
419 if i and not is_inset_line(document, i - 1):
420 last_char = document.body[i - 1][-1:]
424 line = document.body[i]
425 for character in line:
427 # Try to write the character
428 dummy = character.encode(document.encoding)
429 mod_line += character
430 last_char = character
432 # Try to replace with ERT/math inset
433 if character in spec_chars:
434 command = spec_chars[character][0] # the command to replace unicode
435 flag1 = spec_chars[character][1]
436 flag2 = spec_chars[character][2]
437 if flag1.find("combining") > -1 or flag2.find("combining") > -1:
438 # We have a character that should be combined with the previous
439 command += "{" + last_char + "}"
440 # Remove the last character. Ignore if it is whitespace
441 if len(last_char.rstrip()):
442 # last_char was found and is not whitespace
444 mod_line = mod_line[:-1]
445 else: # last_char belongs to the last line
446 document.body[i - 1] = document.body[i - 1][:-1]
448 # The last character was replaced by a command. For now it is
449 # ignored. This could be handled better.
451 if command[0:2] == "\\\\":
452 if command[2:12] == "ensuremath":
453 if insets and insets[-1] == "ERT":
455 command = command.replace("\\\\ensuremath{\\\\", "$\n\\backslash\n")
456 command = command.replace("}", "$\n")
457 elif not insets or insets[-1] != "Formula":
458 # add a math inset with the replacement character
459 command = command.replace("\\\\ensuremath{\\", math_intro)
460 command = command.replace("}", math_outro)
462 # we are already in a math inset
463 command = command.replace("\\\\ensuremath{\\", "")
464 command = command.replace("}", "")
466 if insets and insets[-1] == "Formula":
467 # avoid putting an ERT in a math; instead put command as text
468 command = command.replace("\\\\", r"\mathrm{")
469 command = command + "}"
470 elif not insets or insets[-1] != "ERT":
471 # add an ERT inset with the replacement character
472 command = command.replace("\\\\", "\n\\backslash\n")
473 command = ert_intro + command + ert_outro
475 command = command.replace("\\\\", "\n\\backslash\n")
476 last_char = "" # indicate that the character should not be removed
479 # Replace with replacement string
480 mod_line += replacement_character
484 def revert_unicode(document):
485 """Transform unicode characters that can not be written using the
486 document encoding to commands according to the unicodesymbols
487 file. Characters that can not be replaced by commands are replaced by
488 an replacement string. Flags other than 'combined' are currently not
490 spec_chars = read_unicodesymbols()
491 insets = [] # list of active insets
493 # Go through the document to capture all combining characters
495 while i < len(document.body):
496 line = document.body[i]
498 if line.find("\\begin_inset") > -1:
499 insets.append(line[13:].split()[0])
500 if line.find("\\end_inset") > -1:
503 # Try to write the line
505 # If all goes well the line is written here
506 dummy = line.encode(document.encoding)
509 # Error, some character(s) in the line need to be replaced
510 mod_line = revert_unicode_line(document, i, insets, spec_chars)
511 document.body[i : i + 1] = mod_line.split("\n")
512 i += len(mod_line.split("\n"))
515 def revert_cs_label(document):
516 "Remove status flag of charstyle label."
519 i = find_token(document.body, "\\begin_inset CharStyle", i)
522 # Seach for a line starting 'show_label'
523 # If it is not there, break with a warning message
526 if document.body[i][:10] == "show_label":
529 elif document.body[i][:13] == "\\begin_layout":
530 document.warning("Malformed LyX document: Missing 'show_label'.")
537 def convert_bibitem(document):
539 \bibitem [option]{argument}
543 \begin_inset LatexCommand bibitem
549 This must be called after convert_commandparams.
553 i = find_token(document.body, "\\bibitem", i)
556 j = document.body[i].find("[") + 1
557 k = document.body[i].rfind("]")
558 if j == 0: # No optional argument found
561 option = document.body[i][j:k]
562 j = document.body[i].rfind("{") + 1
563 k = document.body[i].rfind("}")
564 argument = document.body[i][j:k]
565 lines = ["\\begin_inset LatexCommand bibitem"]
567 lines.append('label "%s"' % option.replace('"', '\\"'))
568 lines.append('key "%s"' % argument.replace('"', '\\"'))
570 lines.append("\\end_inset")
571 document.body[i : i + 1] = lines
575 commandparams_info = {
576 # command : [option1, option2, argument]
577 "bibitem": ["label", "", "key"],
578 "bibtex": ["options", "btprint", "bibfiles"],
579 "cite": ["after", "before", "key"],
580 "citet": ["after", "before", "key"],
581 "citep": ["after", "before", "key"],
582 "citealt": ["after", "before", "key"],
583 "citealp": ["after", "before", "key"],
584 "citeauthor": ["after", "before", "key"],
585 "citeyear": ["after", "before", "key"],
586 "citeyearpar": ["after", "before", "key"],
587 "citet*": ["after", "before", "key"],
588 "citep*": ["after", "before", "key"],
589 "citealt*": ["after", "before", "key"],
590 "citealp*": ["after", "before", "key"],
591 "citeauthor*": ["after", "before", "key"],
592 "Citet": ["after", "before", "key"],
593 "Citep": ["after", "before", "key"],
594 "Citealt": ["after", "before", "key"],
595 "Citealp": ["after", "before", "key"],
596 "Citeauthor": ["after", "before", "key"],
597 "Citet*": ["after", "before", "key"],
598 "Citep*": ["after", "before", "key"],
599 "Citealt*": ["after", "before", "key"],
600 "Citealp*": ["after", "before", "key"],
601 "Citeauthor*": ["after", "before", "key"],
602 "citefield": ["after", "before", "key"],
603 "citetitle": ["after", "before", "key"],
604 "cite*": ["after", "before", "key"],
605 "hfill": ["", "", ""],
606 "index": ["", "", "name"],
607 "printindex": ["", "", "name"],
608 "label": ["", "", "name"],
609 "eqref": ["name", "", "reference"],
610 "pageref": ["name", "", "reference"],
611 "prettyref": ["name", "", "reference"],
612 "ref": ["name", "", "reference"],
613 "vpageref": ["name", "", "reference"],
614 "vref": ["name", "", "reference"],
615 "tableofcontents": ["", "", "type"],
616 "htmlurl": ["name", "", "target"],
617 "url": ["name", "", "target"],
621 def convert_commandparams(document):
624 \\begin_inset LatexCommand \\cmdname[opt1][opt2]{arg}
629 \\begin_inset LatexCommand cmdname
635 name1, name2 and name3 can be different for each command.
637 # \begin_inset LatexCommand bibitem was not the official version (see
638 # convert_bibitem()), but could be read in, so we convert it here, too.
642 i = find_token(document.body, "\\begin_inset LatexCommand", i)
645 command = document.body[i][26:].strip()
647 document.warning("Malformed LyX document: Missing LatexCommand name.")
651 j = find_token(document.body, "\\end_inset", i + 1)
653 document.warning("Malformed document")
655 command += "".join(document.body[i + 1 : j])
656 document.body[i + 1 : j] = []
658 # The following parser is taken from the original InsetCommandParams::scanCommand
664 # Used to handle things like \command[foo[bar]]{foo{bar}}
669 (state == "CMDNAME" and c == " ")
670 or (state == "CMDNAME" and c == "[")
671 or (state == "CMDNAME" and c == "{")
675 (state == "OPTION" and c == "]")
676 or (state == "SECOPTION" and c == "]")
677 or (state == "CONTENT" and c == "}")
682 nestdepth = nestdepth - 1
684 (state == "OPTION" and c == "[")
685 or (state == "SECOPTION" and c == "[")
686 or (state == "CONTENT" and c == "{")
688 nestdepth = nestdepth + 1
689 if state == "CMDNAME":
691 elif state == "OPTION":
693 elif state == "SECOPTION":
695 elif state == "CONTENT":
700 elif c == "[" and b != "]":
702 nestdepth = 0 # Just to be sure
703 elif c == "[" and b == "]":
705 nestdepth = 0 # Just to be sure
708 nestdepth = 0 # Just to be sure
711 # Now we have parsed the command, output the parameters
712 lines = ["\\begin_inset LatexCommand %s" % name]
714 if commandparams_info[name][0] == "":
715 document.warning(f"Ignoring invalid option `{option1}' of command `{name}'.")
719 commandparams_info[name][0],
720 option1.replace("\\", "\\\\").replace('"', '\\"'),
724 if commandparams_info[name][1] == "":
726 f"Ignoring invalid second option `{option2}' of command `{name}'."
731 commandparams_info[name][1],
732 option2.replace("\\", "\\\\").replace('"', '\\"'),
736 if commandparams_info[name][2] == "":
737 document.warning(f"Ignoring invalid argument `{argument}' of command `{name}'.")
741 commandparams_info[name][2],
742 argument.replace("\\", "\\\\").replace('"', '\\"'),
745 document.body[i : i + 1] = lines
749 def revert_commandparams(document):
750 regex = re.compile(r"(\S+)\s+(.+)")
753 i = find_token(document.body, "\\begin_inset LatexCommand", i)
756 name = document.body[i].split()[2]
757 j = find_end_of_inset(document.body, i)
762 for k in range(i + 1, j):
763 match = re.match(regex, document.body[k])
765 pname = match.group(1)
766 pvalue = match.group(2)
767 if pname == "preview":
768 preview_line = document.body[k]
769 elif commandparams_info[name][0] != "" and pname == commandparams_info[name][0]:
770 option1 = pvalue.strip('"').replace('\\"', '"').replace("\\\\", "\\")
771 elif commandparams_info[name][1] != "" and pname == commandparams_info[name][1]:
772 option2 = pvalue.strip('"').replace('\\"', '"').replace("\\\\", "\\")
773 elif commandparams_info[name][2] != "" and pname == commandparams_info[name][2]:
774 argument = pvalue.strip('"').replace('\\"', '"').replace("\\\\", "\\")
775 elif document.body[k].strip() != "":
777 f"Ignoring unknown contents `{document.body[k]}' in command inset {name}."
779 if name == "bibitem":
781 lines = ["\\bibitem {%s}" % argument]
783 lines = [f"\\bibitem [{option1}]{{{argument}}}"]
787 lines = [f"\\begin_inset LatexCommand \\{name}{{{argument}}}"]
789 lines = [f"\\begin_inset LatexCommand \\{name}[][{option2}]{{{argument}}}"]
792 lines = [f"\\begin_inset LatexCommand \\{name}[{option1}]{{{argument}}}"]
795 f"\\begin_inset LatexCommand \\{name}[{option1}][{option2}]{{{argument}}}"
797 if name != "bibitem":
798 if preview_line != "":
799 lines.append(preview_line)
801 lines.append("\\end_inset")
802 document.body[i : j + 1] = lines
806 def revert_nomenclature(document):
807 "Convert nomenclature entry to ERT."
808 regex = re.compile(r"(\S+)\s+(.+)")
812 i = find_token(document.body, "\\begin_inset LatexCommand nomenclature", i)
816 j = find_end_of_inset(document.body, i + 1)
821 for k in range(i + 1, j):
822 match = re.match(regex, document.body[k])
824 name = match.group(1)
825 value = match.group(2)
826 if name == "preview":
827 preview_line = document.body[k]
828 elif name == "symbol":
829 symbol = value.strip('"').replace('\\"', '"')
830 elif name == "description":
831 description = value.strip('"').replace('\\"', '"')
832 elif name == "prefix":
833 prefix = value.strip('"').replace('\\"', '"')
834 elif document.body[k].strip() != "":
836 "Ignoring unknown contents `%s' in nomenclature inset." % document.body[k]
839 command = f"nomenclature{{{symbol}}}{{{description}}}"
841 command = f"nomenclature[{prefix}]{{{symbol}}}{{{description}}}"
842 document.body[i : j + 1] = [
846 "\\begin_layout %s" % document.default_layout,
858 and find_token(document.preamble, "\\usepackage{nomencl}[2005/09/22]", 0) == -1
860 document.preamble.append("\\usepackage{nomencl}[2005/09/22]")
861 document.preamble.append("\\makenomenclature")
864 def revert_printnomenclature(document):
865 "Convert printnomenclature to ERT."
866 regex = re.compile(r"(\S+)\s+(.+)")
870 i = find_token(document.body, "\\begin_inset LatexCommand printnomenclature", i)
874 j = find_end_of_inset(document.body, i + 1)
877 for k in range(i + 1, j):
878 match = re.match(regex, document.body[k])
880 name = match.group(1)
881 value = match.group(2)
882 if name == "preview":
883 preview_line = document.body[k]
884 elif name == "labelwidth":
885 labelwidth = value.strip('"').replace('\\"', '"')
886 elif document.body[k].strip() != "":
888 "Ignoring unknown contents `%s' in printnomenclature inset."
892 command = "nomenclature{}"
894 command = "nomenclature[%s]" % labelwidth
895 document.body[i : j + 1] = [
899 "\\begin_layout %s" % document.default_layout,
911 and find_token(document.preamble, "\\usepackage{nomencl}[2005/09/22]", 0) == -1
913 document.preamble.append("\\usepackage{nomencl}[2005/09/22]")
914 document.preamble.append("\\makenomenclature")
917 def convert_esint(document):
918 "Add \\use_esint setting to header."
919 i = find_token(document.header, "\\cite_engine", 0)
921 document.warning("Malformed LyX document: Missing `\\cite_engine'.")
923 # 0 is off, 1 is auto, 2 is on.
924 document.header.insert(i, "\\use_esint 0")
927 def revert_esint(document):
928 "Remove \\use_esint setting from header."
929 i = find_token(document.header, "\\use_esint", 0)
931 document.warning("Malformed LyX document: Missing `\\use_esint'.")
933 use_esint = document.header[i].split()[1]
934 del document.header[i]
935 # 0 is off, 1 is auto, 2 is on.
937 document.preamble.append("\\usepackage{esint}")
940 def revert_clearpage(document):
944 i = find_token(document.body, "\\clearpage", i)
947 document.body[i : i + 1] = [
951 "\\begin_layout %s" % document.default_layout,
963 def revert_cleardoublepage(document):
964 "cleardoublepage -> ERT"
967 i = find_token(document.body, "\\cleardoublepage", i)
970 document.body[i : i + 1] = [
974 "\\begin_layout %s" % document.default_layout,
986 def convert_lyxline(document):
987 r"remove fontsize commands for \lyxline"
988 # The problematic is: The old \lyxline definition doesn't handle the fontsize
989 # to change the line thickness. The new definiton does this so that imported
990 # \lyxlines would have a different line thickness. The eventual fontsize command
991 # before \lyxline is therefore removed to get the same output.
1004 for n in range(0, len(fontsizes)):
1007 while i < len(document.body):
1008 i = find_token(document.body, "\\size " + fontsizes[n], i)
1009 k = find_token(document.body, "\\lyxline", i)
1010 # the corresponding fontsize command is always 2 lines before the \lyxline
1011 if i != -1 and k == i + 2:
1012 document.body[i : i + 1] = []
1018 def revert_encodings(document):
1019 "Set new encodings to auto."
1040 i = find_token(document.header, "\\inputencoding", 0)
1042 document.header.append("\\inputencoding auto")
1044 inputenc = get_value(document.header, "\\inputencoding", i)
1045 if inputenc in encodings:
1046 document.header[i] = "\\inputencoding auto"
1047 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1050 def convert_caption(document):
1051 "Convert caption layouts to caption insets."
1054 i = find_token(document.body, "\\begin_layout Caption", i)
1057 j = find_end_of_layout(document.body, i)
1059 document.warning("Malformed LyX document: Missing `\\end_layout'.")
1062 document.body[j:j] = ["\\end_layout", "", "\\end_inset", "", ""]
1063 document.body[i : i + 1] = [
1064 "\\begin_layout %s" % document.default_layout,
1065 "\\begin_inset Caption",
1067 "\\begin_layout %s" % document.default_layout,
1072 def revert_caption(document):
1073 "Convert caption insets to caption layouts."
1074 " This assumes that the text class has a caption style. "
1077 i = find_token(document.body, "\\begin_inset Caption", i)
1081 # We either need to delete the previous \begin_layout line, or we
1082 # need to end the previous layout if this inset is not in the first
1083 # position of the paragraph.
1084 layout_before = find_token_backwards(document.body, "\\begin_layout", i)
1085 if layout_before == -1:
1086 document.warning("Malformed LyX document: Missing `\\begin_layout'.")
1088 layout_line = document.body[layout_before]
1089 del_layout_before = True
1090 l = layout_before + 1
1092 if document.body[l] != "":
1093 del_layout_before = False
1096 if del_layout_before:
1097 del document.body[layout_before:i]
1100 document.body[i:i] = ["\\end_layout", ""]
1103 # Find start of layout in the inset and end of inset
1104 j = find_token(document.body, "\\begin_layout", i)
1106 document.warning("Malformed LyX document: Missing `\\begin_layout'.")
1108 k = find_end_of_inset(document.body, i)
1110 document.warning("Malformed LyX document: Missing `\\end_inset'.")
1113 # We either need to delete the following \end_layout line, or we need
1114 # to restart the old layout if this inset is not at the paragraph end.
1115 layout_after = find_token(document.body, "\\end_layout", k)
1116 if layout_after == -1:
1117 document.warning("Malformed LyX document: Missing `\\end_layout'.")
1119 del_layout_after = True
1121 while l < layout_after:
1122 if document.body[l] != "":
1123 del_layout_after = False
1126 if del_layout_after:
1127 del document.body[k + 1 : layout_after + 1]
1129 document.body[k + 1 : k + 1] = [layout_line, ""]
1131 # delete \begin_layout and \end_inset and replace \begin_inset with
1132 # "\begin_layout Caption". This works because we can only have one
1133 # paragraph in the caption inset: The old \end_layout will be recycled.
1134 del document.body[k]
1135 if document.body[k] == "":
1136 del document.body[k]
1137 del document.body[j]
1138 if document.body[j] == "":
1139 del document.body[j]
1140 document.body[i] = "\\begin_layout Caption"
1141 if document.body[i + 1] == "":
1142 del document.body[i + 1]
1146 # Accents of InsetLaTeXAccent
1148 "`": "\u0300", # grave
1149 "'": "\u0301", # acute
1150 "^": "\u0302", # circumflex
1151 "~": "\u0303", # tilde
1152 "=": "\u0304", # macron
1153 "u": "\u0306", # breve
1154 ".": "\u0307", # dot above
1155 '"': "\u0308", # diaeresis
1156 "r": "\u030a", # ring above
1157 "H": "\u030b", # double acute
1158 "v": "\u030c", # caron
1159 "b": "\u0320", # minus sign below
1160 "d": "\u0323", # dot below
1161 "c": "\u0327", # cedilla
1162 "k": "\u0328", # ogonek
1163 "t": "\u0361", # tie. This is special: It spans two characters, but
1164 # only one is given as argument, so we don't need to
1165 # treat it differently.
1169 # special accents of InsetLaTeXAccent without argument
1170 special_accent_map = {
1171 "i": "\u0131", # dotless i
1172 "j": "\u0237", # dotless j
1173 "l": "\u0142", # l with stroke
1174 "L": "\u0141", # L with stroke
1178 # special accent arguments of InsetLaTeXAccent
1180 "\\i": "\u0131", # dotless i
1181 "\\j": "\u0237", # dotless j
1185 def _convert_accent(accent, accented_char):
1187 char = accented_char
1189 if type in special_accent_map:
1190 return special_accent_map[type]
1191 # a missing char is treated as space by LyX
1193 elif type == "q" and char in ["t", "d", "l", "L"]:
1194 # Special caron, only used with t, d, l and L.
1195 # It is not in the map because we convert it to the same unicode
1196 # character as the normal caron: \q{} is only defined if babel with
1197 # the czech or slovak language is used, and the normal caron
1198 # produces the correct output if the T1 font encoding is used.
1199 # For the same reason we never convert to \q{} in the other direction.
1201 elif char in accented_map:
1202 char = accented_map[char]
1204 # We can only convert accents on a single char
1206 a = accent_map.get(type)
1208 return unicodedata.normalize("NFC", f"{char}{a}")
1212 def convert_ertbackslash(body, i, ert, default_layout):
1213 r"""-------------------------------------------------------------------------------------------
1214 Convert backslashes and '\n' into valid ERT code, append the converted
1215 text to body[i] and return the (maybe incremented) line index i"""
1219 body[i] = body[i] + "\\backslash "
1223 body[i + 1 : i + 1] = [
1226 "\\begin_layout %s" % default_layout,
1231 body[i] = body[i] + c
1235 def convert_accent(document):
1236 # The following forms are supported by LyX:
1237 # '\i \"{a}' (standard form, as written by LyX)
1238 # '\i \"{}' (standard form, as written by LyX if the accented char is a space)
1239 # '\i \"{ }' (also accepted if the accented char is a space)
1240 # '\i \" a' (also accepted)
1241 # '\i \"' (also accepted)
1242 re_wholeinset = re.compile(r"^(.*)(\\i\s+)(.*)$")
1243 re_contents = re.compile(r"^([^\s{]+)(.*)$")
1244 re_accentedcontents = re.compile(r"^\s*{?([^{}]*)}?\s*$")
1247 i = find_re(document.body, re_wholeinset, i)
1250 match = re_wholeinset.match(document.body[i])
1251 prefix = match.group(1)
1252 contents = match.group(3).strip()
1253 match = re_contents.match(contents)
1255 # Strip first char (always \)
1256 accent = match.group(1)[1:]
1257 accented_contents = match.group(2).strip()
1258 match = re_accentedcontents.match(accented_contents)
1259 accented_char = match.group(1)
1260 converted = _convert_accent(accent, accented_char)
1262 # Normalize contents
1263 contents = (f"{accent}{{{accented_char}}}",)
1265 document.body[i] = f"{prefix}{converted}"
1268 document.warning("Converting unknown InsetLaTeXAccent `\\i %s' to ERT." % contents)
1269 document.body[i] = prefix
1270 document.body[i + 1 : i + 1] = [
1271 "\\begin_inset ERT",
1274 "\\begin_layout %s" % document.default_layout,
1279 i = convert_ertbackslash(
1280 document.body, i + 7, "\\%s" % contents, document.default_layout
1282 document.body[i + 1 : i + 1] = ["\\end_layout", "", "\\end_inset"]
1286 def is_inset_line(document, i):
1287 """Line i of body has an inset"""
1288 if document.body[i][:1] == "\\":
1290 last_tokens = "".join(document.body[i].split()[-2:])
1291 return last_tokens.find("\\") != -1
1294 # A wrapper around normalize that handles special cases (cf. bug 3313)
1295 def normalize(form, text):
1296 # do not normalize OHM, ANGSTROM
1297 keep_characters = [0x2126, 0x212B]
1301 if ord(i) in keep_characters:
1302 if len(convert) > 0:
1303 result = result + unicodedata.normalize(form, convert)
1307 convert = convert + i
1308 if len(convert) > 0:
1309 result = result + unicodedata.normalize(form, convert)
1313 def revert_accent(document):
1314 inverse_accent_map = {}
1315 for k in accent_map:
1316 inverse_accent_map[accent_map[k]] = k
1317 inverse_special_accent_map = {}
1318 for k in special_accent_map:
1319 inverse_special_accent_map[special_accent_map[k]] = k
1320 inverse_accented_map = {}
1321 for k in accented_map:
1322 inverse_accented_map[accented_map[k]] = k
1324 # Since LyX may insert a line break within a word we must combine all
1325 # words before unicode normalization.
1326 # We do this only if the next line starts with an accent, otherwise we
1327 # would create things like '\begin_inset ERTstatus'.
1328 for i in range(len(document.body) - 1):
1329 if document.body[i] == "" or document.body[i + 1] == "" or document.body[i][-1] == " ":
1331 if document.body[i + 1][0] in inverse_accent_map and not is_inset_line(document, i):
1332 # the last character of this line and the first of the next line
1333 # form probably a surrogate pair, inline insets are excluded (second part of the test)
1334 while len(document.body[i + 1]) > 0 and document.body[i + 1][0] != " ":
1335 document.body[i] += document.body[i + 1][0]
1336 document.body[i + 1] = document.body[i + 1][1:]
1338 # Normalize to "Normal form D" (NFD, also known as canonical decomposition).
1339 # This is needed to catch all accented characters.
1340 for i in range(len(document.body)):
1341 # Unfortunately we have a mixture of unicode strings and plain strings,
1342 # because we never use u'xxx' for string literals, but 'xxx'.
1343 # Therefore we may have to try two times to normalize the data.
1345 document.body[i] = normalize("NFD", document.body[i])
1347 document.body[i] = normalize("NFD", str(document.body[i], "utf-8"))
1349 # Replace accented characters with InsetLaTeXAccent
1350 # Do not convert characters that can be represented in the chosen
1353 get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)
1355 lang_re = re.compile(r"^\\lang\s(\S+)")
1358 while i < len(document.body):
1360 document.inputencoding == "auto" or document.inputencoding == "default"
1361 ) and document.cjk_encoding != "":
1362 # Track the encoding of the current line
1363 result = lang_re.match(document.body[i])
1365 language = result.group(1)
1366 if language == "default":
1367 encoding_stack[-1] = document.encoding
1369 from lyx2lyx_lang import lang
1371 encoding_stack[-1] = lang[language][3]
1373 elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
1374 encoding_stack.append(encoding_stack[-1])
1376 elif find_token(document.body, "\\end_layout", i, i + 1) == i:
1377 del encoding_stack[-1]
1380 for j in range(len(document.body[i])):
1381 # dotless i and dotless j are both in special_accent_map and can
1382 # occur as an accented character, so we need to test that the
1383 # following character is no accent
1384 if document.body[i][j] in inverse_special_accent_map and (
1385 j == len(document.body[i]) - 1
1386 or document.body[i][j + 1] not in inverse_accent_map
1388 accent = document.body[i][j]
1390 dummy = accent.encode(encoding_stack[-1])
1391 except UnicodeEncodeError:
1392 # Insert the rest of the line as new line
1393 if j < len(document.body[i]) - 1:
1394 document.body.insert(i + 1, document.body[i][j + 1 :])
1395 # Delete the accented character
1396 document.body[i] = document.body[i][:j]
1397 # Finally add the InsetLaTeXAccent
1398 document.body[i] += "\\i \\%s{}" % inverse_special_accent_map[accent]
1400 elif j > 0 and document.body[i][j] in inverse_accent_map:
1401 accented_char = document.body[i][j - 1]
1402 if accented_char == " ":
1403 # Conform to LyX output
1405 elif accented_char in inverse_accented_map:
1406 accented_char = inverse_accented_map[accented_char]
1407 accent = document.body[i][j]
1409 dummy = normalize("NFC", accented_char + accent).encode(encoding_stack[-1])
1410 except UnicodeEncodeError:
1411 # Insert the rest of the line as new line
1412 if j < len(document.body[i]) - 1:
1413 document.body.insert(i + 1, document.body[i][j + 1 :])
1414 # Delete the accented characters
1415 document.body[i] = document.body[i][: j - 1]
1416 # Finally add the InsetLaTeXAccent
1417 document.body[i] += f"\\i \\{inverse_accent_map[accent]}{{{accented_char}}}"
1421 # Normalize to "Normal form C" (NFC, pre-composed characters) again
1422 for i in range(len(document.body)):
1423 document.body[i] = normalize("NFC", document.body[i])
1426 def normalize_font_whitespace_259(document):
1427 """Before format 259 the font changes were ignored if a
1428 whitespace was the first or last character in the sequence, this function
1429 transfers the whitespace outside."""
1432 "\\series": "default",
1433 "\\emph": "default",
1435 "\\shape": "default",
1437 "\\family": "default",
1439 return normalize_font_whitespace(document, char_properties)
1442 def normalize_font_whitespace_274(document):
1443 """Before format 259 (sic) the font changes were ignored if a
1444 whitespace was the first or last character in the sequence. This was
1445 corrected for most font properties in format 259, but the language
1446 was forgotten then. This function applies the same conversion done
1447 there (namely, transfers the whitespace outside) for font language
1448 changes, as well."""
1450 char_properties = {"\\lang": "default"}
1451 return normalize_font_whitespace(document, char_properties)
1454 def get_paragraph_language(document, i):
1455 """Return the language of the paragraph in which line i of the document
1456 body is. If the first thing in the paragraph is a \\lang command, that
1457 is the paragraph's langauge; otherwise, the paragraph's language is the
1458 document's language."""
1460 lines = document.body
1462 first_nonempty_line = find_nonempty_line(lines, find_beginning_of_layout(lines, i) + 1)
1464 words = lines[first_nonempty_line].split()
1466 if len(words) > 1 and words[0] == "\\lang":
1469 return document.language
1472 def normalize_font_whitespace(document, char_properties):
1473 """Before format 259 the font changes were ignored if a
1474 whitespace was the first or last character in the sequence, this function
1475 transfers the whitespace outside. Only a change in one of the properties
1476 in the provided char_properties is handled by this function."""
1478 if document.backend != "latex":
1481 lines = document.body
1486 while i < len(lines):
1487 words = lines[i].split()
1489 if len(words) > 0 and words[0] == "\\begin_layout":
1490 # a new paragraph resets all font changes
1492 # also reset the default language to be the paragraph's language
1493 if "\\lang" in list(char_properties.keys()):
1494 char_properties["\\lang"] = get_paragraph_language(document, i + 1)
1496 elif len(words) > 1 and words[0] in list(char_properties.keys()):
1497 # we have a font change
1498 if char_properties[words[0]] == words[1]:
1499 # property gets reset
1500 if words[0] in list(changes.keys()):
1501 del changes[words[0]]
1502 defaultproperty = True
1505 changes[words[0]] = words[1]
1506 defaultproperty = False
1508 # We need to explicitly reset all changed properties if we find
1509 # a space below, because LyX 1.4 would output the space after
1510 # closing the previous change and before starting the new one,
1511 # and closing a font change means to close all properties, not
1512 # just the changed one.
1514 if lines[i - 1] and lines[i - 1][-1] == " ":
1515 lines[i - 1] = lines[i - 1][:-1]
1516 # a space before the font change
1518 for k in list(changes.keys()):
1519 # exclude property k because that is already in lines[i]
1521 added_lines[1:1] = [f"{k} {changes[k]}"]
1522 for k in list(changes.keys()):
1523 # exclude property k because that must be added below anyway
1525 added_lines[0:0] = [f"{k} {char_properties[k]}"]
1527 # Property is reset in lines[i], so add the new stuff afterwards
1528 lines[i + 1 : i + 1] = added_lines
1530 # Reset property for the space
1531 added_lines[0:0] = [f"{words[0]} {char_properties[words[0]]}"]
1532 lines[i:i] = added_lines
1533 i = i + len(added_lines)
1537 and lines[i + 1][0] == " "
1538 and (len(changes) > 0 or not defaultproperty)
1540 # a space after the font change
1541 if lines[i + 1] == " " and lines[i + 2]:
1542 next_words = lines[i + 2].split()
1543 if len(next_words) > 0 and next_words[0] == words[0]:
1544 # a single blank with a property different from the
1545 # previous and the next line must not be changed
1548 lines[i + 1] = lines[i + 1][1:]
1550 for k in list(changes.keys()):
1551 # exclude property k because that is already in lines[i]
1553 added_lines[1:1] = [f"{k} {changes[k]}"]
1554 for k in list(changes.keys()):
1555 # exclude property k because that must be added below anyway
1557 added_lines[0:0] = [f"{k} {char_properties[k]}"]
1558 # Reset property for the space
1559 added_lines[0:0] = [f"{words[0]} {char_properties[words[0]]}"]
1560 lines[i:i] = added_lines
1561 i = i + len(added_lines)
1566 def revert_utf8x(document):
1567 "Set utf8x encoding to utf8."
1568 i = find_token(document.header, "\\inputencoding", 0)
1570 document.header.append("\\inputencoding auto")
1572 inputenc = get_value(document.header, "\\inputencoding", i)
1573 if inputenc == "utf8x":
1574 document.header[i] = "\\inputencoding utf8"
1575 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1578 def revert_utf8plain(document):
1579 "Set utf8plain encoding to utf8."
1580 i = find_token(document.header, "\\inputencoding", 0)
1582 document.header.append("\\inputencoding auto")
1584 inputenc = get_value(document.header, "\\inputencoding", i)
1585 if inputenc == "utf8-plain":
1586 document.header[i] = "\\inputencoding utf8"
1587 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1590 def revert_beamer_alert(document):
1591 "Revert beamer's \\alert inset back to ERT."
1594 i = find_token(document.body, "\\begin_inset CharStyle Alert", i)
1597 document.body[i] = "\\begin_inset ERT"
1600 if document.body[i][:13] == "\\begin_layout":
1601 # Insert the \alert command
1602 document.body[i + 1] = "\\alert{" + document.body[i + 1] + "}"
1609 def revert_beamer_structure(document):
1610 "Revert beamer's \\structure inset back to ERT."
1613 i = find_token(document.body, "\\begin_inset CharStyle Structure", i)
1616 document.body[i] = "\\begin_inset ERT"
1619 if document.body[i][:13] == "\\begin_layout":
1620 document.body[i + 1] = "\\structure{" + document.body[i + 1] + "}"
1627 def convert_changes(document):
1628 "Switch output_changes off if tracking_changes is off."
1629 i = find_token(document.header, "\\tracking_changes", 0)
1631 document.warning("Malformed lyx document: Missing '\\tracking_changes'.")
1633 j = find_token(document.header, "\\output_changes", 0)
1635 document.warning("Malformed lyx document: Missing '\\output_changes'.")
1637 tracking_changes = get_value(document.header, "\\tracking_changes", i)
1638 output_changes = get_value(document.header, "\\output_changes", j)
1639 if tracking_changes == "false" and output_changes == "true":
1640 document.header[j] = "\\output_changes false"
1643 def revert_ascii(document):
1644 "Set ascii encoding to auto."
1645 i = find_token(document.header, "\\inputencoding", 0)
1647 document.header.append("\\inputencoding auto")
1649 inputenc = get_value(document.header, "\\inputencoding", i)
1650 if inputenc == "ascii":
1651 document.header[i] = "\\inputencoding auto"
1652 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1655 def normalize_language_name(document):
1656 lang = {"brazil": "brazilian", "portuges": "portuguese"}
1658 if document.language in lang:
1659 document.language = lang[document.language]
1660 i = find_token(document.header, "\\language", 0)
1661 document.header[i] = "\\language %s" % document.language
1664 def revert_language_name(document):
1665 lang = {"brazilian": "brazil", "portuguese": "portuges"}
1667 if document.language in lang:
1668 document.language = lang[document.language]
1669 i = find_token(document.header, "\\language", 0)
1670 document.header[i] = "\\language %s" % document.language
1674 # \textclass cv -> \textclass simplecv
1675 def convert_cv_textclass(document):
1676 if document.textclass == "cv":
1677 document.textclass = "simplecv"
1680 def revert_cv_textclass(document):
1681 if document.textclass == "simplecv":
1682 document.textclass = "cv"
1686 # add scaleBeforeRotation graphics param
1687 def convert_graphics_rotation(document):
1688 "add scaleBeforeRotation graphics parameter."
1691 i = find_token(document.body, "\\begin_inset Graphics", i)
1694 j = find_end_of_inset(document.body, i + 1)
1697 document.warning("Malformed LyX document: Could not find end of graphics inset.")
1698 # Seach for rotateAngle and width or height or scale
1699 # If these params are not there, nothing needs to be done.
1700 k = find_token(document.body, "\trotateAngle", i + 1, j)
1701 l = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j)
1702 if k != -1 and l != -1:
1703 document.body.insert(j, "scaleBeforeRotation")
1708 # remove scaleBeforeRotation graphics param
1709 def revert_graphics_rotation(document):
1710 "remove scaleBeforeRotation graphics parameter."
1713 i = find_token(document.body, "\\begin_inset Graphics", i)
1716 j = find_end_of_inset(document.body, i + 1)
1719 document.warning("Malformed LyX document: Could not find end of graphics inset.")
1720 # If there's a scaleBeforeRotation param, just remove that
1721 k = find_token(document.body, "\tscaleBeforeRotation", i + 1, j)
1723 del document.body[k]
1725 # if not, and if we have rotateAngle and width or height or scale,
1726 # we have to put the rotateAngle value to special
1727 rotateAngle = get_value(document.body, "rotateAngle", i + 1, j)
1728 special = get_value(document.body, "special", i + 1, j)
1729 if rotateAngle != "":
1730 k = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j)
1734 document.body.insert(j - 1, "\tspecial angle=%s" % rotateAngle)
1736 l = find_token(document.body, "\tspecial", i + 1, j)
1737 document.body[l] = document.body[l].replace(
1738 special, f"angle={rotateAngle},{special}"
1740 k = find_token(document.body, "\trotateAngle", i + 1, j)
1742 del document.body[k]
1746 def convert_tableborder(document):
1747 # The problem is: LyX doubles the table cell border as it ignores the "|" character in
1748 # the cell arguments. A fix takes care of this and therefore the "|" has to be removed
1750 while i < len(document.body):
1751 h = document.body[i].find('leftline="true"', 0, len(document.body[i]))
1752 k = document.body[i].find("|>{", 0, len(document.body[i]))
1753 # the two tokens have to be in one line
1754 if h != -1 and k != -1:
1756 document.body[i] = (
1757 document.body[i][:k] + document.body[i][k + 1 : len(document.body[i])]
1762 def revert_tableborder(document):
1764 while i < len(document.body):
1765 h = document.body[i].find('leftline="true"', 0, len(document.body[i]))
1766 k = document.body[i].find(">{", 0, len(document.body[i]))
1767 # the two tokens have to be in one line
1768 if h != -1 and k != -1:
1770 document.body[i] = document.body[i][:k] + "|" + document.body[i][k:]
1774 def revert_armenian(document):
1775 # set inputencoding from armscii8 to auto
1776 if document.inputencoding == "armscii8":
1777 i = find_token(document.header, "\\inputencoding", 0)
1779 document.header[i] = "\\inputencoding auto"
1780 # check if preamble exists, if not k is set to -1
1783 while i < len(document.preamble):
1785 k = document.preamble[i].find("\\", 0, len(document.preamble[i]))
1787 k = document.preamble[i].find("%", 0, len(document.preamble[i]))
1789 # add the entry \usepackage{armtex} to the document preamble
1790 if document.language == "armenian":
1791 # set the armtex entry as the first preamble line
1793 document.preamble[0:0] = ["\\usepackage{armtex}"]
1794 # create the preamble when it doesn't exist
1796 document.preamble.append("\\usepackage{armtex}")
1797 # Set document language from armenian to english
1798 if document.language == "armenian":
1799 document.language = "english"
1800 i = find_token(document.header, "\\language", 0)
1802 document.header[i] = "\\language english"
1805 def revert_CJK(document):
1806 "Set CJK encodings to default and languages chinese, japanese and korean to english."
1820 i = find_token(document.header, "\\inputencoding", 0)
1822 document.header.append("\\inputencoding auto")
1824 inputenc = get_value(document.header, "\\inputencoding", i)
1825 if inputenc in encodings:
1826 document.header[i] = "\\inputencoding default"
1827 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1830 document.language == "chinese-simplified"
1831 or document.language == "chinese-traditional"
1832 or document.language == "japanese"
1833 or document.language == "korean"
1835 document.language = "english"
1836 i = find_token(document.header, "\\language", 0)
1838 document.header[i] = "\\language english"
1841 def revert_preamble_listings_params(document):
1842 r"Revert preamble option \listings_params"
1843 i = find_token(document.header, "\\listings_params", 0)
1845 document.preamble.append("\\usepackage{listings}")
1846 document.preamble.append("\\lstset{%s}" % document.header[i].split()[1].strip('"'))
1847 document.header.pop(i)
1850 def revert_listings_inset(document):
1851 r"""Revert listings inset to \lstinline or \begin, \end lstlisting, translate
1855 lstparams "language=Delphi"
1859 \begin_layout Standard
1869 \begin_layout Standard
1873 lstinline[language=Delphi]{var i = 10;}
1878 There can be an caption inset in this inset
1880 \begin_layout Standard
1881 \begin_inset Caption
1883 \begin_layout Standard
1885 \begin_inset LatexCommand label
1901 i = find_token(document.body, "\\begin_inset listings", i)
1905 if not "\\usepackage{listings}" in document.preamble:
1906 document.preamble.append("\\usepackage{listings}")
1907 j = find_end_of_inset(document.body, i + 1)
1909 # this should not happen
1915 for line in range(i + 1, i + 4):
1916 if document.body[line].startswith("inline"):
1917 inline = document.body[line].split()[1]
1918 if document.body[line].startswith("lstparams"):
1919 params = document.body[line].split()[1].strip('"')
1920 if document.body[line].startswith("status"):
1921 status = document.body[line].split()[1].strip()
1926 cap = find_token(document.body, "\\begin_inset Caption", i)
1928 cap_end = find_end_of_inset(document.body, cap + 1)
1930 # this should not happen
1933 lbl = find_token(document.body, "\\begin_inset LatexCommand label", cap + 1)
1935 lbl_end = find_end_of_inset(document.body, lbl + 1)
1937 # this should not happen
1942 for line in document.body[lbl : lbl_end + 1]:
1943 if line.startswith("name "):
1944 label = line.split()[1].strip('"')
1946 for line in document.body[cap:lbl] + document.body[lbl_end + 1 : cap_end + 1]:
1947 if not line.startswith("\\"):
1948 caption += line.strip()
1951 # looking for the oneline code for lstinline
1952 inlinecode = document.body[
1955 find_token(document.body, "\\begin_layout %s" % document.default_layout, i + 1)
1960 if len(caption) > 0:
1961 if len(params) == 0:
1962 params = "caption={%s}" % caption
1964 params += ",caption={%s}" % caption
1966 if len(params) == 0:
1967 params = "label={%s}" % label
1969 params += ",label={%s}" % label
1971 params = "[%s]" % params
1972 params = params.replace("\\", "\\backslash\n")
1973 if inline == "true":
1974 document.body[i : (j + 1)] = [
1975 r"\begin_inset ERT",
1976 "status %s" % status,
1977 r"\begin_layout %s" % document.default_layout,
1981 f"lstinline{params}{{{inlinecode}}}",
1987 document.body[i : j + 1] = (
1989 r"\begin_inset ERT",
1990 "status %s" % status,
1992 r"\begin_layout %s" % document.default_layout,
1996 r"begin{lstlisting}%s" % params,
1999 r"\begin_layout %s" % document.default_layout,
2001 + document.body[k : j - 1]
2004 r"\begin_layout %s" % document.default_layout,
2015 def revert_include_listings(document):
2016 r"""Revert lstinputlisting Include option , translate
2017 \begin_inset Include \lstinputlisting{file}[opt]
2027 \begin_layout Standard
2031 lstinputlisting{file}[opt]
2039 i = find_token(document.body, r"\begin_inset Include \lstinputlisting", i)
2043 if not "\\usepackage{listings}" in document.preamble:
2044 document.preamble.append("\\usepackage{listings}")
2045 j = find_end_of_inset(document.body, i + 1)
2047 # this should not happen
2049 # find command line lstinputlisting{file}[options]
2050 cmd, file, option = "", "", ""
2051 if re.match(r"\\(lstinputlisting){([.\w]*)}(.*)", document.body[i].split()[2]):
2052 cmd, file, option = re.match(
2053 r"\\(lstinputlisting){([.\w]*)}(.*)", document.body[i].split()[2]
2055 option = option.replace("\\", "\\backslash\n")
2056 document.body[i : j + 1] = [
2057 r"\begin_inset ERT",
2060 r"\begin_layout %s" % document.default_layout,
2064 f"{cmd}{option}{{{file}}}",
2071 def revert_ext_font_sizes(document):
2072 if document.backend != "latex":
2074 if not document.textclass.startswith("ext"):
2077 fontsize = get_value(document.header, "\\paperfontsize", 0)
2078 if fontsize not in ("10", "11", "12"):
2082 i = find_token(document.header, "\\paperfontsize", 0)
2083 document.header[i] = "\\paperfontsize default"
2084 insert_document_option(document, fontsize)
2087 def convert_ext_font_sizes(document):
2088 if document.backend != "latex":
2090 if not document.textclass.startswith("ext"):
2093 fontsize = get_value(document.header, "\\paperfontsize", 0)
2094 if fontsize != "default":
2097 i = find_token(document.header, "\\options", 0)
2101 options = get_value(document.header, "\\options", i)
2103 fontsizes = "10pt", "11pt", "12pt"
2104 for fs in fontsizes:
2105 if options.find(fs) != -1:
2107 else: # this else will only be attained if the for cycle had no match
2110 options = options.split(",")
2111 for j, opt in enumerate(options):
2112 if opt in fontsizes:
2119 k = find_token(document.header, "\\paperfontsize", 0)
2120 document.header[k] = "\\paperfontsize %s" % fontsize
2123 document.header[i] = "\\options %s" % ",".join(options)
2125 del document.header[i]
2128 def revert_separator_layout(document):
2129 r"""Revert --Separator-- to a lyx note
2132 \begin_layout --Separator--
2138 \begin_layout Standard
2139 \begin_inset Note Note
2142 \begin_layout Standard
2155 i = find_token(document.body, r"\begin_layout --Separator--", i)
2158 j = find_end_of_layout(document.body, i + 1)
2160 # this should not happen
2162 document.body[i : j + 1] = (
2164 r"\begin_layout %s" % document.default_layout,
2165 r"\begin_inset Note Note",
2168 r"\begin_layout %s" % document.default_layout,
2169 "Separate Environment",
2174 + document.body[i + 1 : j]
2175 + ["", r"\end_layout"]
2179 def convert_arabic(document):
2180 if document.language == "arabic":
2181 document.language = "arabic_arabtex"
2182 i = find_token(document.header, "\\language", 0)
2184 document.header[i] = "\\language arabic_arabtex"
2186 while i < len(document.body):
2187 h = document.body[i].find(r"\lang arabic", 0, len(document.body[i]))
2189 # change the language name
2190 document.body[i] = r"\lang arabic_arabtex"
2194 def revert_arabic(document):
2195 if document.language == "arabic_arabtex":
2196 document.language = "arabic"
2197 i = find_token(document.header, "\\language", 0)
2199 document.header[i] = "\\language arabic"
2201 while i < len(document.body):
2202 h = document.body[i].find(r"\lang arabic_arabtex", 0, len(document.body[i]))
2204 # change the language name
2205 document.body[i] = r"\lang arabic"
2213 supported_versions = ["1.5.0", "1.5"]
2216 [247, [convert_font_settings]],
2218 [249, [convert_utf8]],
2221 [252, [convert_commandparams, convert_bibitem]],
2223 [254, [convert_esint]],
2226 [257, [convert_caption]],
2227 [258, [convert_lyxline]],
2228 [259, [convert_accent, normalize_font_whitespace_259]],
2230 [261, [convert_changes]],
2232 [263, [normalize_language_name]],
2233 [264, [convert_cv_textclass]],
2234 [265, [convert_tableborder]],
2240 [271, [convert_ext_font_sizes]],
2243 [274, [normalize_font_whitespace_274]],
2244 [275, [convert_graphics_rotation]],
2245 [276, [convert_arabic]],
2249 [275, [revert_arabic]],
2250 [274, [revert_graphics_rotation]],
2252 [272, [revert_separator_layout]],
2256 revert_preamble_listings_params,
2257 revert_listings_inset,
2258 revert_include_listings,
2261 [270, [revert_ext_font_sizes]],
2262 [269, [revert_beamer_alert, revert_beamer_structure]],
2266 revert_preamble_listings_params,
2267 revert_listings_inset,
2268 revert_include_listings,
2271 [267, [revert_CJK]],
2272 [266, [revert_utf8plain]],
2273 [265, [revert_armenian]],
2274 [264, [revert_tableborder]],
2275 [263, [revert_cv_textclass]],
2276 [262, [revert_language_name]],
2277 [261, [revert_ascii]],
2279 [259, [revert_utf8x]],
2282 [256, [revert_caption]],
2283 [255, [revert_encodings]],
2284 [254, [revert_clearpage, revert_cleardoublepage]],
2285 [253, [revert_esint]],
2286 [252, [revert_nomenclature, revert_printnomenclature]],
2287 [251, [revert_commandparams]],
2288 [250, [revert_cs_label]],
2290 [248, [revert_accent, revert_utf8, revert_unicode]],
2291 [247, [revert_booktabs]],
2292 [246, [revert_font_settings]],
2293 [245, [revert_framed]],
2297 if __name__ == "__main__":