1 # -*- coding: utf-8 -*-
2 # This file is part of lyx2lyx
3 # -*- coding: utf-8 -*-
4 # Copyright (C) 2011 The LyX team
6 # This program is free software; you can redistribute it and/or
7 # modify it under the terms of the GNU General Public License
8 # as published by the Free Software Foundation; either version 2
9 # of the License, or (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 """ Convert files to the file format generated by lyx 2.1"""
26 # Uncomment only what you need to import, please.
28 from parser_tools import del_token, find_token, find_end_of, find_end_of_inset, \
29 find_end_of_layout, find_re, get_option_value, get_value, get_quoted_value, \
32 #from parser_tools import find_token, find_end_of, find_tokens, \
33 #find_token_exact, find_end_of_inset, find_end_of_layout, \
34 #find_token_backwards, is_in_inset, get_value, get_quoted_value, \
35 #del_token, check_token
37 from lyx2lyx_tools import add_to_preamble, put_cmd_in_ert, get_ert
39 #from lyx2lyx_tools import insert_to_preamble, \
40 # lyx2latex, latex_length, revert_flex_inset, \
41 # revert_font_attrs, hex2ratio, str2bool
43 ####################################################################
44 # Private helper functions
46 #def remove_option(lines, m, option):
47 #''' removes option from line m. returns whether we did anything '''
48 #l = lines[m].find(option)
51 #val = lines[m][l:].split('"')[1]
52 #lines[m] = lines[m][:l - 1] + lines[m][l+len(option + '="' + val + '"'):]
56 ###############################################################################
58 ### Conversion and reversion routines
60 ###############################################################################
62 def revert_visible_space(document):
63 "Revert InsetSpace visible into its ERT counterpart"
66 i = find_token(document.body, "\\begin_inset space \\textvisiblespace{}", i)
69 end = find_end_of_inset(document.body, i)
70 subst = put_cmd_in_ert("\\textvisiblespace{}")
71 document.body[i:end + 1] = subst
74 def convert_undertilde(document):
75 " Load undertilde automatically "
76 i = find_token(document.header, "\\use_mathdots" , 0)
78 i = find_token(document.header, "\\use_mhchem" , 0)
80 i = find_token(document.header, "\\use_esint" , 0)
82 document.warning("Malformed LyX document: Can't find \\use_mathdots.")
84 j = find_token(document.preamble, "\\usepackage{undertilde}", 0)
86 document.header.insert(i + 1, "\\use_undertilde 0")
88 document.header.insert(i + 1, "\\use_undertilde 2")
89 del document.preamble[j]
92 def revert_undertilde(document):
93 " Load undertilde if used in the document "
94 undertilde = find_token(document.header, "\\use_undertilde" , 0)
96 document.warning("No \\use_undertilde line. Assuming auto.")
98 val = get_value(document.header, "\\use_undertilde", undertilde)
99 del document.header[undertilde]
103 document.warning("Invalid \\use_undertilde value: " + val + ". Assuming auto.")
104 # probably usedots has not been changed, but be safe.
112 add_to_preamble(document, ["\\usepackage{undertilde}"])
115 # so we are in the auto case. we want to load undertilde if \utilde is used.
118 i = find_token(document.body, '\\begin_inset Formula', i)
121 j = find_end_of_inset(document.body, i)
123 document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(i))
126 code = "\n".join(document.body[i:j])
127 if code.find("\\utilde") != -1:
128 add_to_preamble(document, ["\\@ifundefined{utilde}{\\usepackage{undertilde}}"])
133 def revert_negative_space(document):
134 "Revert InsetSpace negmedspace and negthickspace into its TeX-code counterpart"
139 i = find_token(document.body, "\\begin_inset space \\negmedspace{}", i)
141 j = find_token(document.body, "\\begin_inset space \\negthickspace{}", j)
143 # load amsmath in the preamble if not already loaded if we are at the end of checking
145 i = find_token(document.header, "\\use_amsmath 2", 0)
147 add_to_preamble(document, ["\\@ifundefined{negthickspace}{\\usepackage{amsmath}}"])
151 end = find_end_of_inset(document.body, i)
152 subst = put_cmd_in_ert("\\negmedspace{}")
153 document.body[i:end + 1] = subst
154 j = find_token(document.body, "\\begin_inset space \\negthickspace{}", j)
157 end = find_end_of_inset(document.body, j)
158 subst = put_cmd_in_ert("\\negthickspace{}")
159 document.body[j:end + 1] = subst
163 def revert_math_spaces(document):
164 "Revert formulas with protected custom space and protected hfills to TeX-code"
167 i = find_token(document.body, "\\begin_inset Formula", i)
170 j = document.body[i].find("\\hspace*")
172 end = find_end_of_inset(document.body, i)
173 subst = put_cmd_in_ert(document.body[i][21:])
174 document.body[i:end + 1] = subst
178 def convert_japanese_encodings(document):
179 " Rename the japanese encodings to names understood by platex "
181 "EUC-JP-pLaTeX": "euc",
183 "SJIS-pLaTeX": "sjis"
185 i = find_token(document.header, "\\inputencoding" , 0)
188 val = get_value(document.header, "\\inputencoding", i)
189 if val in jap_enc_dict.keys():
190 document.header[i] = "\\inputencoding %s" % jap_enc_dict[val]
193 def revert_japanese_encodings(document):
194 " Revert the japanese encodings name changes "
196 "euc": "EUC-JP-pLaTeX",
198 "sjis": "SJIS-pLaTeX"
200 i = find_token(document.header, "\\inputencoding" , 0)
203 val = get_value(document.header, "\\inputencoding", i)
204 if val in jap_enc_dict.keys():
205 document.header[i] = "\\inputencoding %s" % jap_enc_dict[val]
208 def revert_justification(document):
209 " Revert the \\justification buffer param"
210 if not del_token(document.header, '\\justification', 0):
211 document.warning("Malformed LyX document: Missing \\justification.")
214 def revert_australian(document):
215 "Set English language variants Australian and Newzealand to English"
217 if document.language == "australian" or document.language == "newzealand":
218 document.language = "english"
219 i = find_token(document.header, "\\language", 0)
221 document.header[i] = "\\language english"
225 j = find_token(document.body, "\\lang australian", j)
227 j = find_token(document.body, "\\lang newzealand", 0)
231 document.body[j] = document.body[j].replace("\\lang newzealand", "\\lang english")
233 document.body[j] = document.body[j].replace("\\lang australian", "\\lang english")
237 def convert_biblio_style(document):
238 "Add a sensible default for \\biblio_style based on the citation engine."
239 i = find_token(document.header, "\\cite_engine", 0)
241 engine = get_value(document.header, "\\cite_engine", i).split("_")[0]
242 style = {"basic": "plain", "natbib": "plainnat", "jurabib": "jurabib"}
243 document.header.insert(i + 1, "\\biblio_style " + style[engine])
246 def revert_biblio_style(document):
247 "BibTeX insets with default option use the style defined by \\biblio_style."
248 i = find_token(document.header, "\\biblio_style" , 0)
250 document.warning("No \\biblio_style line. Nothing to do.")
253 default_style = get_value(document.header, "\\biblio_style", i)
254 del document.header[i]
256 # We are looking for bibtex insets having the default option
259 i = find_token(document.body, "\\begin_inset CommandInset bibtex", i)
262 j = find_end_of_inset(document.body, i)
264 document.warning("Malformed LyX document: Can't find end of bibtex inset at line " + str(i))
267 k = find_token(document.body, "options", i, j)
269 options = get_quoted_value(document.body, "options", k)
270 if "default" in options.split(","):
271 document.body[k] = 'options "%s"' \
272 % options.replace("default", default_style)
276 def handle_longtable_captions(document, forward):
279 begin_table = find_token(document.body, '<lyxtabular version=', begin_table)
280 if begin_table == -1:
282 end_table = find_end_of(document.body, begin_table, '<lyxtabular', '</lyxtabular>')
284 document.warning("Malformed LyX document: Could not find end of table.")
287 fline = find_token(document.body, "<features", begin_table, end_table)
289 document.warning("Can't find features for inset at line " + str(begin_table))
292 p = document.body[fline].find("islongtable")
297 numrows = get_option_value(document.body[begin_table], "rows")
299 numrows = int(numrows)
301 document.warning(document.body[begin_table])
302 document.warning("Unable to determine rows!")
303 begin_table = end_table
305 begin_row = begin_table
306 for row in range(numrows):
307 begin_row = find_token(document.body, '<row', begin_row, end_table)
309 document.warning("Can't find row " + str(row + 1))
311 end_row = find_end_of(document.body, begin_row, '<row', '</row>')
313 document.warning("Can't find end of row " + str(row + 1))
316 if (get_option_value(document.body[begin_row], 'caption') == 'true' and
317 get_option_value(document.body[begin_row], 'endfirsthead') != 'true' and
318 get_option_value(document.body[begin_row], 'endhead') != 'true' and
319 get_option_value(document.body[begin_row], 'endfoot') != 'true' and
320 get_option_value(document.body[begin_row], 'endlastfoot') != 'true'):
321 document.body[begin_row] = set_option_value(document.body[begin_row], 'caption', 'true", endfirsthead="true')
322 elif get_option_value(document.body[begin_row], 'caption') == 'true':
323 if get_option_value(document.body[begin_row], 'endfirsthead') == 'true':
324 document.body[begin_row] = set_option_value(document.body[begin_row], 'endfirsthead', 'false')
325 if get_option_value(document.body[begin_row], 'endhead') == 'true':
326 document.body[begin_row] = set_option_value(document.body[begin_row], 'endhead', 'false')
327 if get_option_value(document.body[begin_row], 'endfoot') == 'true':
328 document.body[begin_row] = set_option_value(document.body[begin_row], 'endfoot', 'false')
329 if get_option_value(document.body[begin_row], 'endlastfoot') == 'true':
330 document.body[begin_row] = set_option_value(document.body[begin_row], 'endlastfoot', 'false')
332 # since there could be a tabular inside this one, we
333 # cannot jump to end.
337 def convert_longtable_captions(document):
338 "Add a firsthead flag to caption rows"
339 handle_longtable_captions(document, True)
342 def revert_longtable_captions(document):
343 "remove head/foot flag from caption rows"
344 handle_longtable_captions(document, False)
347 def convert_use_packages(document):
348 "use_xxx yyy => use_package xxx yyy"
349 packages = ["amsmath", "esint", "mathdots", "mhchem", "undertilde"]
351 i = find_token(document.header, "\\use_%s" % p , 0)
353 value = get_value(document.header, "\\use_%s" % p , i)
354 document.header[i] = "\\use_package %s %s" % (p, value)
357 def revert_use_packages(document):
358 "use_package xxx yyy => use_xxx yyy"
359 packages = {"amsmath":"1", "esint":"1", "mathdots":"1", "mhchem":"1", "undertilde":"1"}
360 # the order is arbitrary for the use_package version, and not all packages need to be given.
361 # Ensure a complete list and correct order (important for older LyX versions and especially lyx2lyx)
363 for p in packages.keys():
364 regexp = re.compile(r'(\\use_package\s+%s)' % p)
365 i = find_re(document.header, regexp, 0)
367 value = get_value(document.header, "\\use_package" , i).split()[1]
368 del document.header[i]
370 for (p, v) in packages.items():
371 document.header.insert(j, "\\use_%s %s" % (p, value))
375 def convert_use_mathtools(document):
376 "insert use_package mathtools"
377 i = find_token(document.header, "\\use_package", 0)
379 document.warning("Malformed LyX document: Can't find \\use_package.")
381 j = find_token(document.preamble, "\\usepackage{mathtools}", 0)
383 document.header.insert(i + 1, "\\use_package mathtools 0")
385 document.header.insert(i + 1, "\\use_package mathtools 2")
386 del document.preamble[j]
389 def revert_use_mathtools(document):
390 "remove use_package mathtools"
391 regexp = re.compile(r'(\\use_package\s+mathtools)')
392 i = find_re(document.header, regexp, 0)
393 value = "1" # default is auto
395 value = get_value(document.header, "\\use_package" , i).split()[1]
396 del document.header[i]
397 if value == "2": # on
398 add_to_preamble(document, ["\\usepackage{mathtools}"])
399 elif value == "1": # auto
400 commands = ["mathclap", "mathllap", "mathrlap", \
401 "lgathered", "rgathered", "vcentcolon", "dblcolon", \
402 "coloneqq", "Coloneqq", "coloneq", "Coloneq", "eqqcolon", \
403 "Eqqcolon", "eqcolon", "Eqcolon", "colonapprox", \
404 "Colonapprox", "colonsim", "Colonsim"]
407 i = find_token(document.body, '\\begin_inset Formula', i)
410 j = find_end_of_inset(document.body, i)
412 document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(i))
415 code = "\n".join(document.body[i:j])
417 if code.find("\\%s" % c) != -1:
418 add_to_preamble(document, ["\\usepackage{mathtools}"])
423 def convert_cite_engine_type(document):
424 "Determine the \\cite_engine_type from the citation engine."
425 i = find_token(document.header, "\\cite_engine", 0)
428 engine = get_value(document.header, "\\cite_engine", i)
430 engine, type = engine.split("_")
432 type = {"basic": "numerical", "jurabib": "authoryear"}[engine]
433 document.header[i] = "\\cite_engine " + engine
434 document.header.insert(i + 1, "\\cite_engine_type " + type)
437 def revert_cite_engine_type(document):
438 "Natbib had the type appended with an underscore."
439 engine_type = "numerical"
440 i = find_token(document.header, "\\cite_engine_type" , 0)
442 document.warning("No \\cite_engine_type line. Assuming numerical.")
444 engine_type = get_value(document.header, "\\cite_engine_type", i)
445 del document.header[i]
447 # We are looking for the natbib citation engine
448 i = find_token(document.header, "\\cite_engine natbib", 0)
451 document.header[i] = "\\cite_engine natbib_" + engine_type
454 def revert_cancel(document):
455 "add cancel to the preamble if necessary"
456 commands = ["cancelto", "cancel", "bcancel", "xcancel"]
459 i = find_token(document.body, '\\begin_inset Formula', i)
462 j = find_end_of_inset(document.body, i)
464 document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(i))
467 code = "\n".join(document.body[i:j])
469 if code.find("\\%s" % c) != -1:
470 add_to_preamble(document, ["\\usepackage{cancel}"])
475 def revert_verbatim(document):
476 " Revert verbatim einvironments completely to TeX-code. "
479 subst_end = ['\end_layout', '', '\\begin_layout Plain Layout',
481 '\\begin_layout Plain Layout', '', '',
484 '\\end_layout', '', '\\end_inset',
485 '', '', '\\end_layout']
486 subst_begin = ['\\begin_layout Standard', '\\noindent',
487 '\\begin_inset ERT', 'status collapsed', '',
488 '\\begin_layout Plain Layout', '', '', '\\backslash',
490 '\\end_layout', '', '\\begin_layout Plain Layout', '']
492 i = find_token(document.body, "\\begin_layout Verbatim", i)
495 j = find_end_of_layout(document.body, i)
497 document.warning("Malformed lyx document: Can't find end of Verbatim layout")
500 # delete all line breaks insets (there are no other insets)
503 n = find_token(document.body, "\\begin_inset Newline newline", l)
505 n = find_token(document.body, "\\begin_inset Newline linebreak", l)
508 m = find_end_of_inset(document.body, n)
509 del(document.body[m:m+1])
510 document.body[n:n+1] = ['\end_layout', '', '\\begin_layout Plain Layout']
513 # consecutive verbatim environments need to be connected
514 k = find_token(document.body, "\\begin_layout Verbatim", j)
515 if k == j + 2 and consecutive == False:
517 document.body[j:j+1] = ['\end_layout', '', '\\begin_layout Plain Layout']
518 document.body[i:i+1] = subst_begin
520 if k == j + 2 and consecutive == True:
521 document.body[j:j+1] = ['\end_layout', '', '\\begin_layout Plain Layout']
522 del(document.body[i:i+1])
524 if k != j + 2 and consecutive == True:
525 document.body[j:j+1] = subst_end
526 # the next paragraph must not be indented
527 document.body[j+19:j+19] = ['\\noindent']
528 del(document.body[i:i+1])
532 document.body[j:j+1] = subst_end
533 # the next paragraph must not be indented
534 document.body[j+19:j+19] = ['\\noindent']
535 document.body[i:i+1] = subst_begin
538 def revert_tipa(document):
539 " Revert native TIPA insets to mathed or ERT. "
542 i = find_token(document.body, "\\begin_inset IPA", i)
545 j = find_end_of_inset(document.body, i)
547 document.warning("Malformed lyx document: Can't find end of IPA inset")
551 n = find_token(document.body, "\\begin_layout", i, j)
553 document.warning("Malformed lyx document: IPA inset has no embedded layout")
556 m = find_end_of_layout(document.body, n)
558 document.warning("Malformed lyx document: Can't find end of embedded layout")
561 content = document.body[n+1:m]
562 p = find_token(document.body, "\\begin_layout", m, j)
563 if p != -1 or len(content) > 1:
565 content = document.body[i+1:j]
567 # IPA insets with multiple pars need to be wrapped by \begin{IPA}...\end{IPA}
568 document.body[i:j+1] = ['\\end_layout', '', '\\begin_layout Standard'] + put_cmd_in_ert("\\begin{IPA}") + ['\\end_layout'] + content + ['\\begin_layout Standard'] + put_cmd_in_ert("\\end{IPA}")
569 add_to_preamble(document, ["\\usepackage{tipa,tipx}"])
571 # single-par IPA insets can be reverted to mathed
572 document.body[i:j+1] = ["\\begin_inset Formula $\\text{\\textipa{" + content[0] + "}}$", "\\end_inset"]
576 def revert_cell_rotation(document):
577 "Revert cell rotations to TeX-code"
579 load_rotating = False
583 # first, let's find out if we need to do anything
584 i = find_token(document.body, '<cell ', i)
587 j = document.body[i].find('rotate="')
589 k = document.body[i].find('"', j + 8)
590 value = document.body[i][j + 8 : k]
592 rgx = re.compile(r' rotate="[^"]+?"')
593 # remove rotate option
594 document.body[i] = rgx.sub('', document.body[i])
596 rgx = re.compile(r' rotate="[^"]+?"')
597 document.body[i] = rgx.sub('rotate="true"', document.body[i])
599 rgx = re.compile(r' rotate="[^"]+?"')
601 # remove rotate option
602 document.body[i] = rgx.sub('', document.body[i])
604 document.body[i + 5 : i + 5] = \
605 put_cmd_in_ert("\\end{turn}")
606 document.body[i + 4 : i + 4] = \
607 put_cmd_in_ert("\\begin{turn}{" + value + "}")
613 add_to_preamble(document, ["\\@ifundefined{turnbox}{\usepackage{rotating}}{}"])
616 def convert_cell_rotation(document):
617 'Convert cell rotation statements from "true" to "90"'
621 # first, let's find out if we need to do anything
622 i = find_token(document.body, '<cell ', i)
625 j = document.body[i].find('rotate="true"')
627 rgx = re.compile(r'rotate="[^"]+?"')
628 # convert "true" to "90"
629 document.body[i] = rgx.sub('rotate="90"', document.body[i])
634 def revert_table_rotation(document):
635 "Revert table rotations to TeX-code"
637 load_rotating = False
641 # first, let's find out if we need to do anything
642 i = find_token(document.body, '<features ', i)
645 j = document.body[i].find('rotate="')
647 end_table = find_token(document.body, '</lyxtabular>', j)
648 k = document.body[i].find('"', j + 8)
649 value = document.body[i][j + 8 : k]
651 rgx = re.compile(r' rotate="[^"]+?"')
652 # remove rotate option
653 document.body[i] = rgx.sub('', document.body[i])
655 rgx = re.compile(r'rotate="[^"]+?"')
656 document.body[i] = rgx.sub('rotate="true"', document.body[i])
658 rgx = re.compile(r' rotate="[^"]+?"')
660 # remove rotate option
661 document.body[i] = rgx.sub('', document.body[i])
663 document.body[end_table + 3 : end_table + 3] = \
664 put_cmd_in_ert("\\end{turn}")
665 document.body[i - 2 : i - 2] = \
666 put_cmd_in_ert("\\begin{turn}{" + value + "}")
672 add_to_preamble(document, ["\\@ifundefined{turnbox}{\usepackage{rotating}}{}"])
675 def convert_table_rotation(document):
676 'Convert table rotation statements from "true" to "90"'
680 # first, let's find out if we need to do anything
681 i = find_token(document.body, '<features ', i)
684 j = document.body[i].find('rotate="true"')
686 rgx = re.compile(r'rotate="[^"]+?"')
687 # convert "true" to "90"
688 document.body[i] = rgx.sub('rotate="90"', document.body[i])
693 def convert_listoflistings(document):
694 'Convert ERT \lstlistoflistings to TOC lstlistoflistings inset'
695 # We can support roundtrip because the command is so simple
698 i = find_token(document.body, "\\begin_inset ERT", i)
701 j = find_end_of_inset(document.body, i)
703 document.warning("Malformed lyx document: Can't find end of ERT inset")
706 ert = get_ert(document.body, i)
707 if ert == "\\lstlistoflistings{}":
708 document.body[i:j] = ["\\begin_inset CommandInset toc", "LatexCommand lstlistoflistings", ""]
714 def revert_listoflistings(document):
715 'Convert TOC lstlistoflistings inset to ERT lstlistoflistings'
718 i = find_token(document.body, "\\begin_inset CommandInset toc", i)
721 if document.body[i+1] == "LatexCommand lstlistoflistings":
722 j = find_end_of_inset(document.body, i)
724 document.warning("Malformed lyx document: Can't find end of TOC inset")
727 subst = put_cmd_in_ert("\\lstlistoflistings{}")
728 document.body[i:j+1] = subst
729 add_to_preamble(document, ["\\usepackage{listings}"])
737 supported_versions = ["2.1.0","2.1"]
740 [415, [convert_undertilde]],
742 [417, [convert_japanese_encodings]],
745 [420, [convert_biblio_style]],
746 [421, [convert_longtable_captions]],
747 [422, [convert_use_packages]],
748 [423, [convert_use_mathtools]],
749 [424, [convert_cite_engine_type]],
753 [428, [convert_cell_rotation]],
754 [429, [convert_table_rotation]],
755 [430, [convert_listoflistings]],
759 [429, [revert_listoflistings]],
760 [428, [revert_table_rotation]],
761 [427, [revert_cell_rotation]],
762 [426, [revert_tipa]],
763 [425, [revert_verbatim]],
764 [424, [revert_cancel]],
765 [423, [revert_cite_engine_type]],
766 [422, [revert_use_mathtools]],
767 [421, [revert_use_packages]],
768 [420, [revert_longtable_captions]],
769 [419, [revert_biblio_style]],
770 [418, [revert_australian]],
771 [417, [revert_justification]],
772 [416, [revert_japanese_encodings]],
773 [415, [revert_negative_space, revert_math_spaces]],
774 [414, [revert_undertilde]],
775 [413, [revert_visible_space]]
779 if __name__ == "__main__":