1 # This file is part of lyx2lyx
2 # -*- coding: utf-8 -*-
3 # Copyright (C) 2007 José Matos <jamatos@lyx.org>
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; either version 2
8 # of the License, or (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 """ Convert files to the file format generated by lyx 1.6"""
25 from parser_tools import find_token, find_end_of, find_tokens, get_value
27 ####################################################################
28 # Private helper functions
30 def find_end_of_inset(lines, i):
31 " Find end of inset, where lines[i] is included."
32 return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
35 ####################################################################
37 def fix_wrong_tables(document):
40 i = find_token(document.body, "\\begin_inset Tabular", i)
43 j = find_end_of_inset(document.body, i + 1)
45 document.warning("Malformed LyX document: Could not find end of tabular.")
49 nrows = int(document.body[i+1].split('"')[3])
50 ncols = int(document.body[i+1].split('"')[5])
52 for l in range(nrows):
54 for k in range(ncols):
55 m = find_token(document.body, '<cell', m)
57 if document.body[m].find('multicolumn') != -1:
58 multicol_cont = int(document.body[m].split('"')[1])
60 if multicol_cont == 2 and (k == 0 or prev_multicolumn == 0):
61 document.body[m] = document.body[m][:5] + document.body[m][21:]
64 prev_multicolumn = multicol_cont
71 def close_begin_deeper(document):
75 i = find_tokens(document.body, ["\\begin_deeper", "\\end_deeper"], i)
80 if document.body[i][:13] == "\\begin_deeper":
87 document.body[-2:-2] = ['\\end_deeper' for i in range(depth)]
90 def long_charstyle_names(document):
93 i = find_token(document.body, "\\begin_inset CharStyle", i)
96 document.body[i] = document.body[i].replace("CharStyle ", "CharStyle CharStyle:")
99 def revert_long_charstyle_names(document):
102 i = find_token(document.body, "\\begin_inset CharStyle", i)
105 document.body[i] = document.body[i].replace("CharStyle CharStyle:", "CharStyle")
109 def axe_show_label(document):
112 i = find_token(document.body, "\\begin_inset CharStyle", i)
115 if document.body[i + 1].find("show_label") != -1:
116 if document.body[i + 1].find("true") != -1:
117 document.body[i + 1] = "status open"
118 del document.body[ i + 2]
120 if document.body[i + 1].find("false") != -1:
121 document.body[i + 1] = "status collapsed"
122 del document.body[ i + 2]
124 document.warning("Malformed LyX document: show_label neither false nor true.")
126 document.warning("Malformed LyX document: show_label missing in CharStyle.")
131 def revert_show_label(document):
134 i = find_token(document.body, "\\begin_inset CharStyle", i)
137 if document.body[i + 1].find("status open") != -1:
138 document.body.insert(i + 1, "show_label true")
140 if document.body[i + 1].find("status collapsed") != -1:
141 document.body.insert(i + 1, "show_label false")
143 document.warning("Malformed LyX document: no legal status line in CharStyle.")
146 def revert_begin_modules(document):
149 i = find_token(document.header, "\\begin_modules", i)
152 j = find_end_of(document.header, i, "\\begin_modules", "\\end_modules")
154 # this should not happen
156 document.header[i : j + 1] = []
158 def convert_flex(document):
159 "Convert CharStyle to Flex"
162 i = find_token(document.body, "\\begin_inset CharStyle", i)
165 document.body[i] = document.body[i].replace('\\begin_inset CharStyle', '\\begin_inset Flex')
167 def revert_flex(document):
168 "Convert Flex to CharStyle"
171 i = find_token(document.body, "\\begin_inset Flex", i)
174 document.body[i] = document.body[i].replace('\\begin_inset Flex', '\\begin_inset CharStyle')
177 # Discard PDF options for hyperref
178 def revert_pdf_options(document):
179 "Revert PDF options for hyperref."
181 i = find_token(document.header, "\\use_hyperref", i)
183 del document.header[i]
184 i = find_token(document.header, "\\pdf_store_options", i)
186 del document.header[i]
187 i = find_token(document.header, "\\pdf_title", 0)
189 del document.header[i]
190 i = find_token(document.header, "\\pdf_author", 0)
192 del document.header[i]
193 i = find_token(document.header, "\\pdf_subject", 0)
195 del document.header[i]
196 i = find_token(document.header, "\\pdf_keywords", 0)
198 del document.header[i]
199 i = find_token(document.header, "\\pdf_bookmarks", 0)
201 del document.header[i]
202 i = find_token(document.header, "\\pdf_bookmarksnumbered", i)
204 del document.header[i]
205 i = find_token(document.header, "\\pdf_bookmarksopen", i)
207 del document.header[i]
208 i = find_token(document.header, "\\pdf_bookmarksopenlevel", i)
210 del document.header[i]
211 i = find_token(document.header, "\\pdf_breaklinks", i)
213 del document.header[i]
214 i = find_token(document.header, "\\pdf_pdfborder", i)
216 del document.header[i]
217 i = find_token(document.header, "\\pdf_colorlinks", i)
219 del document.header[i]
220 i = find_token(document.header, "\\pdf_backref", i)
222 del document.header[i]
223 i = find_token(document.header, "\\pdf_pagebackref", i)
225 del document.header[i]
226 i = find_token(document.header, "\\pdf_pagemode", 0)
228 del document.header[i]
229 i = find_token(document.header, "\\pdf_quoted_options", 0)
231 del document.header[i]
234 def remove_inzip_options(document):
235 "Remove inzipName and embed options from the Graphics inset"
238 i = find_token(document.body, "\\begin_inset Graphics", i)
241 j = find_end_of_inset(document.body, i + 1)
244 document.warning("Malformed LyX document: Could not find end of graphics inset.")
245 # If there's a inzip param, just remove that
246 k = find_token(document.body, "\tinzipName", i + 1, j)
249 # embed option must follow the inzipName option
250 del document.body[k+1]
254 def convert_inset_command(document):
257 \begin_inset LatexCommand cmd
259 \begin_inset CommandInset InsetType
264 i = find_token(document.body, "\\begin_inset LatexCommand", i)
267 line = document.body[i]
268 r = re.compile(r'\\begin_inset LatexCommand (.*)$')
272 #this is adapted from factory.cpp
273 if cmdName[0:4].lower() == "cite":
274 insetName = "citation"
275 elif cmdName == "url" or cmdName == "htmlurl":
277 elif cmdName[-3:] == "ref":
279 elif cmdName == "tableofcontents":
281 elif cmdName == "printnomenclature":
282 insetName = "nomencl_print"
283 elif cmdName == "printindex":
284 insetName = "index_print"
287 insertion = ["\\begin_inset CommandInset " + insetName, "LatexCommand " + cmdName]
288 document.body[i : i+1] = insertion
291 def revert_inset_command(document):
294 \begin_inset CommandInset InsetType
297 \begin_inset LatexCommand cmd
298 Some insets may end up being converted to insets earlier versions of LyX
299 will not be able to recognize. Not sure what to do about that.
303 i = find_token(document.body, "\\begin_inset CommandInset", i)
306 nextline = document.body[i+1]
307 r = re.compile(r'LatexCommand\s+(.*)$')
308 m = r.match(nextline)
310 document.warning("Malformed LyX document: Missing LatexCommand in " + document.body[i] + ".")
313 insertion = ["\\begin_inset LatexCommand " + cmdName]
314 document.body[i : i+2] = insertion
317 def convert_wrapfig_options(document):
318 "Convert optional options for wrap floats (wrapfig)."
319 # adds the tokens "lines", "placement", and "overhang"
322 i = find_token(document.body, "\\begin_inset Wrap figure", i)
325 document.body.insert(i + 1, "lines 0")
326 j = find_token(document.body, "placement", i)
327 # placement can be already set or not; if not, set it
329 document.body.insert(i + 3, "overhang 0col%")
331 document.body.insert(i + 2, "placement o")
332 document.body.insert(i + 3, "overhang 0col%")
336 def revert_wrapfig_options(document):
337 "Revert optional options for wrap floats (wrapfig)."
340 i = find_token(document.body, "lines", i)
343 j = find_token(document.body, "overhang", i+1)
344 if j != i + 2 and j != -1:
345 document.warning("Malformed LyX document: Couldn't find overhang parameter of wrap float.")
349 del document.body[j-1]
353 def convert_latexcommand_index(document):
354 "Convert from LatexCommand form to collapsable form."
357 i = find_token(document.body, "\\begin_inset CommandInset index", i)
360 if document.body[i + 1] != "LatexCommand index": # Might also be index_print
362 fullcommand = document.body[i + 2]
363 document.body[i] = "\\begin_inset Index"
364 document.body[i + 1] = "status collapsed"
365 document.body[i + 2] = "\\begin_layout standard"
366 document.body.insert(i + 3, fullcommand[6:].strip('"'))
367 document.body.insert(i + 4, "\\end_layout")
371 def revert_latexcommand_index(document):
372 "Revert from collapsable form toLatexCommand form."
375 i = find_token(document.body, "\\begin_inset Index", i)
378 j = find_end_of_inset(document.body, i)
379 del document.body[j - 1]
380 del document.body[j - 2] # \end_layout
381 document.body[i] = "\\begin_inset CommandInset index"
382 document.body[i + 1] = "LatexCommand index"
383 document.body[i + 3] = "name " + '"' + document.body[i + 3] + '"'
384 document.body.insert(i + 4, "")
385 del document.body[i + 2] # \begin_layout standard
389 def revert_wraptable(document):
390 "Revert wrap table to wrap figure."
393 i = find_token(document.body, "\\begin_inset Wrap table", i)
396 document.body[i] = document.body[i].replace('\\begin_inset Wrap table', '\\begin_inset Wrap figure')
400 def revert_vietnamese(document):
401 "Set language Vietnamese to English"
402 # Set document language from Vietnamese to English
404 if document.language == "vietnamese":
405 document.language = "english"
406 i = find_token(document.header, "\\language", 0)
408 document.header[i] = "\\language english"
411 j = find_token(document.body, "\\lang vietnamese", j)
414 document.body[j] = document.body[j].replace("\\lang vietnamese", "\\lang english")
418 def revert_japanese(document):
419 "Set language japanese-plain to japanese"
420 # Set document language from japanese-plain to japanese
422 if document.language == "japanese-plain":
423 document.language = "japanese"
424 i = find_token(document.header, "\\language", 0)
426 document.header[i] = "\\language japanese"
429 j = find_token(document.body, "\\lang japanese-plain", j)
432 document.body[j] = document.body[j].replace("\\lang japanese-plain", "\\lang japanese")
436 def revert_japanese_encoding(document):
437 "Set input encoding form EUC-JP-plain to EUC-JP etc."
438 # Set input encoding form EUC-JP-plain to EUC-JP etc.
440 i = find_token(document.header, "\\inputencoding EUC-JP-plain", 0)
442 document.header[i] = "\\inputencoding EUC-JP"
444 j = find_token(document.header, "\\inputencoding JIS-plain", 0)
446 document.header[j] = "\\inputencoding JIS"
448 k = find_token(document.header, "\\inputencoding SJIS-plain", 0)
449 if k != -1: # convert to UTF8 since there is currently no SJIS encoding
450 document.header[k] = "\\inputencoding UTF8"
453 def revert_inset_info(document):
454 'Replace info inset with its content'
457 i = find_token(document.body, '\\begin_inset Info', i)
460 j = find_end_of_inset(document.body, i + 1)
463 document.warning("Malformed LyX document: Could not find end of Info inset.")
466 for k in range(i, j+1):
467 if document.body[k].startswith("arg"):
468 arg = document.body[k][3:].strip().strip('"')
469 if document.body[k].startswith("type"):
470 type = document.body[k][4:].strip().strip('"')
471 # I think there is a newline after \\end_inset, which should be removed.
472 if document.body[j + 1].strip() == "":
473 document.body[i : (j + 2)] = [type + ':' + arg]
475 document.body[i : (j + 1)] = [type + ':' + arg]
478 def convert_pdf_options(document):
479 # Set the pdfusetitle tag, delete the pdf_store_options,
480 # set quotes for bookmarksopenlevel"
481 has_hr = get_value(document.header, "\\use_hyperref", 0, default = "0")
483 k = find_token(document.header, "\\use_hyperref", 0)
484 document.header.insert(k + 1, "\\pdf_pdfusetitle true")
485 k = find_token(document.header, "\\pdf_store_options", 0)
487 del document.header[k]
488 i = find_token(document.header, "\\pdf_bookmarksopenlevel", k)
490 document.header[i] = document.header[i].replace('"', '')
493 def revert_pdf_options_2(document):
494 # reset the pdfusetitle tag, set quotes for bookmarksopenlevel"
495 k = find_token(document.header, "\\use_hyperref", 0)
496 i = find_token(document.header, "\\pdf_pdfusetitle", k)
498 del document.header[i]
499 i = find_token(document.header, "\\pdf_bookmarksopenlevel", k)
501 values = document.header[i].split()
502 values[1] = ' "' + values[1] + '"'
503 document.header[i] = ''.join(values)
506 def convert_htmlurl(document):
507 'Convert "htmlurl" to "href" insets for docbook'
508 if document.backend != "docbook":
512 i = find_token(document.body, "\\begin_inset CommandInset url", i)
515 document.body[i] = "\\begin_inset CommandInset href"
516 document.body[i + 1] = "LatexCommand href"
520 def convert_url(document):
521 'Convert url insets to url charstyles'
522 if document.backend == "docbook":
526 i = find_token(document.body, "\\begin_inset CommandInset url", i)
529 n = find_token(document.body, "name", i)
531 # place the URL name in typewriter before the new URL insert
532 # grab the name 'bla' from the e.g. the line 'name "bla"',
533 # therefore start with the 6th character
534 name = document.body[n][6:-1]
535 newname = [name + " "]
536 document.body[i:i] = newname
538 j = find_token(document.body, "target", i)
540 document.warning("Malformed LyX document: Can't find target for url inset")
543 target = document.body[j][8:-1]
544 k = find_token(document.body, "\\end_inset", j)
546 document.warning("Malformed LyX document: Can't find end of url inset")
549 newstuff = ["\\begin_inset Flex URL",
550 "status collapsed", "",
551 "\\begin_layout Standard",
556 document.body[i:k] = newstuff
560 def revert_href(document):
561 'Reverts hyperlink insets (href) to url insets (url)'
564 i = find_token(document.body, "\\begin_inset CommandInset href", i)
567 document.body[i : i + 2] = \
568 ["\\begin_inset CommandInset url", "LatexCommand url"]
572 def convert_include(document):
573 'Converts include insets to new format.'
575 r = re.compile(r'\\begin_inset Include\s+\\([^{]+){([^}]*)}(?:\[(.*)\])?')
577 i = find_token(document.body, "\\begin_inset Include", i)
580 line = document.body[i]
581 previewline = document.body[i + 1]
584 document.warning("Unable to match line " + str(i) + " of body!")
590 insertion = ["\\begin_inset CommandInset include",
591 "LatexCommand " + cmd, previewline,
592 "filename \"" + fn + "\""]
595 insertion.append("lstparams " + '"' + opt + '"')
597 document.body[i : i + 2] = insertion
601 def revert_include(document):
602 'Reverts include insets to old format.'
604 r1 = re.compile('LatexCommand (.+)')
605 r2 = re.compile('filename (.+)')
606 r3 = re.compile('options (.*)')
608 i = find_token(document.body, "\\begin_inset CommandInset include", i)
611 previewline = document.body[i + 1]
612 m = r1.match(document.body[i + 2])
614 document.warning("Malformed LyX document: No LatexCommand line for `" +
615 document.body[i] + "' on line " + str(i) + ".")
619 m = r2.match(document.body[i + 3])
621 document.warning("Malformed LyX document: No filename line for `" + \
622 document.body[i] + "' on line " + str(i) + ".")
628 if (cmd == "lstinputlisting"):
629 m = r3.match(document.body[i + 4])
633 newline = "\\begin_inset Include \\" + cmd + "{" + fn + "}"
635 newline += ("[" + options + "]")
636 insertion = [newline, previewline]
637 document.body[i : i + numlines] = insertion
641 def revert_albanian(document):
642 "Set language Albanian to English"
643 # Set document language from Albanian to English
645 if document.language == "albanian":
646 document.language = "english"
647 i = find_token(document.header, "\\language", 0)
649 document.header[i] = "\\language english"
652 j = find_token(document.body, "\\lang albanian", j)
655 document.body[j] = document.body[j].replace("\\lang albanian", "\\lang english")
659 def revert_lowersorbian(document):
660 "Set language lower Sorbian to English"
661 # Set document language from lower Sorbian to English
663 if document.language == "lowersorbian":
664 document.language = "english"
665 i = find_token(document.header, "\\language", 0)
667 document.header[i] = "\\language english"
670 j = find_token(document.body, "\\lang lowersorbian", j)
673 document.body[j] = document.body[j].replace("\\lang lowersorbian", "\\lang english")
677 def revert_uppersorbian(document):
678 "Set language uppersorbian to usorbian as this was used in LyX 1.5"
679 # Set document language from uppersorbian to usorbian
681 if document.language == "uppersorbian":
682 document.language = "usorbian"
683 i = find_token(document.header, "\\language", 0)
685 document.header[i] = "\\language usorbian"
688 j = find_token(document.body, "\\lang uppersorbian", j)
691 document.body[j] = document.body[j].replace("\\lang uppersorbian", "\\lang usorbian")
695 def convert_usorbian(document):
696 "Set language uppersorbian to usorbian as this was used in LyX 1.5"
697 # Set document language from uppersorbian to usorbian
699 if document.language == "usorbian":
700 document.language = "uppersorbian"
701 i = find_token(document.header, "\\language", 0)
703 document.header[i] = "\\language uppersorbian"
706 j = find_token(document.body, "\\lang usorbian", j)
709 document.body[j] = document.body[j].replace("\\lang usorbian", "\\lang uppersorbian")
713 def revert_macro_optional_params(document):
714 "Convert macro definitions with optional parameters into ERTs"
715 # Stub to convert macro definitions with one or more optional parameters
716 # into uninterpreted ERT insets
719 def revert_hyperlinktype(document):
720 'Reverts hyperlink type'
724 i = find_token(document.body, "target", i)
727 j = find_token(document.body, "type", i)
739 supported_versions = ["1.6.0","1.6"]
740 convert = [[277, [fix_wrong_tables]],
741 [278, [close_begin_deeper]],
742 [279, [long_charstyle_names]],
743 [280, [axe_show_label]],
746 [283, [convert_flex]],
750 [287, [convert_wrapfig_options]],
751 [288, [convert_inset_command]],
752 [289, [convert_latexcommand_index]],
757 [294, [convert_pdf_options]],
758 [295, [convert_htmlurl, convert_url]],
759 [296, [convert_include]],
760 [297, [convert_usorbian]],
765 revert = [[298, [revert_hyperlinktype]],
766 [297, [revert_macro_optional_params]],
767 [296, [revert_albanian, revert_lowersorbian, revert_uppersorbian]],
768 [295, [revert_include]],
769 [294, [revert_href]],
770 [293, [revert_pdf_options_2]],
771 [292, [revert_inset_info]],
772 [291, [revert_japanese, revert_japanese_encoding]],
773 [290, [revert_vietnamese]],
774 [289, [revert_wraptable]],
775 [288, [revert_latexcommand_index]],
776 [287, [revert_inset_command]],
777 [286, [revert_wrapfig_options]],
778 [285, [revert_pdf_options]],
779 [284, [remove_inzip_options]],
781 [282, [revert_flex]],
783 [280, [revert_begin_modules]],
784 [279, [revert_show_label]],
785 [278, [revert_long_charstyle_names]],
791 if __name__ == "__main__":