1 # This file is part of lyx2lyx
2 # -*- coding: utf-8 -*-
3 # Copyright (C) 2007 José Matos <jamatos@lyx.org>
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; either version 2
8 # of the License, or (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 """ Convert files to the file format generated by lyx 1.6"""
25 from parser_tools import find_token, find_end_of, find_tokens, get_value
27 ####################################################################
28 # Private helper functions
30 def find_end_of_inset(lines, i):
31 " Find end of inset, where lines[i] is included."
32 return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
35 ####################################################################
37 def fix_wrong_tables(document):
40 i = find_token(document.body, "\\begin_inset Tabular", i)
43 j = find_end_of_inset(document.body, i + 1)
45 document.warning("Malformed LyX document: Could not find end of tabular.")
49 nrows = int(document.body[i+1].split('"')[3])
50 ncols = int(document.body[i+1].split('"')[5])
52 for l in range(nrows):
54 for k in range(ncols):
55 m = find_token(document.body, '<cell', m)
57 if document.body[m].find('multicolumn') != -1:
58 multicol_cont = int(document.body[m].split('"')[1])
60 if multicol_cont == 2 and (k == 0 or prev_multicolumn == 0):
61 document.body[m] = document.body[m][:5] + document.body[m][21:]
64 prev_multicolumn = multicol_cont
71 def close_begin_deeper(document):
75 i = find_tokens(document.body, ["\\begin_deeper", "\\end_deeper"], i)
80 if document.body[i][:13] == "\\begin_deeper":
87 document.body[-2:-2] = ['\\end_deeper' for i in range(depth)]
90 def long_charstyle_names(document):
93 i = find_token(document.body, "\\begin_inset CharStyle", i)
96 document.body[i] = document.body[i].replace("CharStyle ", "CharStyle CharStyle:")
99 def revert_long_charstyle_names(document):
102 i = find_token(document.body, "\\begin_inset CharStyle", i)
105 document.body[i] = document.body[i].replace("CharStyle CharStyle:", "CharStyle")
109 def axe_show_label(document):
112 i = find_token(document.body, "\\begin_inset CharStyle", i)
115 if document.body[i + 1].find("show_label") != -1:
116 if document.body[i + 1].find("true") != -1:
117 document.body[i + 1] = "status open"
118 del document.body[ i + 2]
120 if document.body[i + 1].find("false") != -1:
121 document.body[i + 1] = "status collapsed"
122 del document.body[ i + 2]
124 document.warning("Malformed LyX document: show_label neither false nor true.")
126 document.warning("Malformed LyX document: show_label missing in CharStyle.")
131 def revert_show_label(document):
134 i = find_token(document.body, "\\begin_inset CharStyle", i)
137 if document.body[i + 1].find("status open") != -1:
138 document.body.insert(i + 1, "show_label true")
140 if document.body[i + 1].find("status collapsed") != -1:
141 document.body.insert(i + 1, "show_label false")
143 document.warning("Malformed LyX document: no legal status line in CharStyle.")
146 def revert_begin_modules(document):
149 i = find_token(document.header, "\\begin_modules", i)
152 j = find_end_of(document.header, i, "\\begin_modules", "\\end_modules")
154 # this should not happen
156 document.header[i : j + 1] = []
158 def convert_flex(document):
159 "Convert CharStyle to Flex"
162 i = find_token(document.body, "\\begin_inset CharStyle", i)
165 document.body[i] = document.body[i].replace('\\begin_inset CharStyle', '\\begin_inset Flex')
167 def revert_flex(document):
168 "Convert Flex to CharStyle"
171 i = find_token(document.body, "\\begin_inset Flex", i)
174 document.body[i] = document.body[i].replace('\\begin_inset Flex', '\\begin_inset CharStyle')
177 # Discard PDF options for hyperref
178 def revert_pdf_options(document):
179 "Revert PDF options for hyperref."
181 i = find_token(document.header, "\\use_hyperref", i)
183 del document.header[i]
184 i = find_token(document.header, "\\pdf_store_options", i)
186 del document.header[i]
187 i = find_token(document.header, "\\pdf_title", 0)
189 del document.header[i]
190 i = find_token(document.header, "\\pdf_author", 0)
192 del document.header[i]
193 i = find_token(document.header, "\\pdf_subject", 0)
195 del document.header[i]
196 i = find_token(document.header, "\\pdf_keywords", 0)
198 del document.header[i]
199 i = find_token(document.header, "\\pdf_bookmarks", 0)
201 del document.header[i]
202 i = find_token(document.header, "\\pdf_bookmarksnumbered", i)
204 del document.header[i]
205 i = find_token(document.header, "\\pdf_bookmarksopen", i)
207 del document.header[i]
208 i = find_token(document.header, "\\pdf_bookmarksopenlevel", i)
210 del document.header[i]
211 i = find_token(document.header, "\\pdf_breaklinks", i)
213 del document.header[i]
214 i = find_token(document.header, "\\pdf_pdfborder", i)
216 del document.header[i]
217 i = find_token(document.header, "\\pdf_colorlinks", i)
219 del document.header[i]
220 i = find_token(document.header, "\\pdf_backref", i)
222 del document.header[i]
223 i = find_token(document.header, "\\pdf_pagebackref", i)
225 del document.header[i]
226 i = find_token(document.header, "\\pdf_pagemode", 0)
228 del document.header[i]
229 i = find_token(document.header, "\\pdf_quoted_options", 0)
231 del document.header[i]
234 def remove_inzip_options(document):
235 "Remove inzipName and embed options from the Graphics inset"
238 i = find_token(document.body, "\\begin_inset Graphics", i)
241 j = find_end_of_inset(document.body, i + 1)
244 document.warning("Malformed LyX document: Could not find end of graphics inset.")
245 # If there's a inzip param, just remove that
246 k = find_token(document.body, "\tinzipName", i + 1, j)
249 # embed option must follow the inzipName option
250 del document.body[k+1]
254 def convert_inset_command(document):
257 \begin_inset LatexCommand cmd
259 \begin_inset CommandInset InsetType
264 i = find_token(document.body, "\\begin_inset LatexCommand", i)
267 line = document.body[i]
268 r = re.compile(r'\\begin_inset LatexCommand (.*)$')
272 #this is adapted from factory.cpp
273 if cmdName[0:4].lower() == "cite":
274 insetName = "citation"
275 elif cmdName == "url" or cmdName == "htmlurl":
277 elif cmdName[-3:] == "ref":
279 elif cmdName == "tableofcontents":
281 elif cmdName == "printnomenclature":
282 insetName = "nomencl_print"
283 elif cmdName == "printindex":
284 insetName = "index_print"
287 insertion = ["\\begin_inset CommandInset " + insetName, "LatexCommand " + cmdName]
288 document.body[i : i+1] = insertion
291 def revert_inset_command(document):
294 \begin_inset CommandInset InsetType
297 \begin_inset LatexCommand cmd
298 Some insets may end up being converted to insets earlier versions of LyX
299 will not be able to recognize. Not sure what to do about that.
303 i = find_token(document.body, "\\begin_inset CommandInset", i)
306 nextline = document.body[i+1]
307 r = re.compile(r'LatexCommand\s+(.*)$')
308 m = r.match(nextline)
310 document.warning("Malformed LyX document: Missing LatexCommand in " + document.body[i] + ".")
313 insertion = ["\\begin_inset LatexCommand " + cmdName]
314 document.body[i : i+2] = insertion
317 def convert_wrapfig_options(document):
318 "Convert optional options for wrap floats (wrapfig)."
319 # adds the tokens "lines", "placement", and "overhang"
322 i = find_token(document.body, "\\begin_inset Wrap figure", i)
325 document.body.insert(i + 1, "lines 0")
326 j = find_token(document.body, "placement", i)
327 # placement can be already set or not; if not, set it
329 document.body.insert(i + 3, "overhang 0col%")
331 document.body.insert(i + 2, "placement o")
332 document.body.insert(i + 3, "overhang 0col%")
336 def revert_wrapfig_options(document):
337 "Revert optional options for wrap floats (wrapfig)."
340 i = find_token(document.body, "lines", i)
343 j = find_token(document.body, "overhang", i+1)
344 if j != i + 2 and j != -1:
345 document.warning("Malformed LyX document: Couldn't find overhang parameter of wrap float.")
349 del document.body[j-1]
353 def convert_latexcommand_index(document):
354 "Convert from LatexCommand form to collapsable form."
357 i = find_token(document.body, "\\begin_inset CommandInset index", i)
360 if document.body[i + 1] != "LatexCommand index": # Might also be index_print
362 fullcommand = document.body[i + 2]
363 document.body[i] = "\\begin_inset Index"
364 document.body[i + 1] = "status collapsed"
365 document.body[i + 2] = "\\begin_layout standard"
366 document.body.insert(i + 3, fullcommand[6:].strip('"'))
367 document.body.insert(i + 4, "\\end_layout")
371 def revert_latexcommand_index(document):
372 "Revert from collapsable form toLatexCommand form."
375 i = find_token(document.body, "\\begin_inset Index", i)
378 j = find_end_of_inset(document.body, i)
379 del document.body[j - 1]
380 del document.body[j - 2] # \end_layout
381 document.body[i] = "\\begin_inset CommandInset index"
382 document.body[i + 1] = "LatexCommand index"
383 document.body[i + 3] = "name " + '"' + document.body[i + 3] + '"'
384 document.body.insert(i + 4, "")
385 del document.body[i + 2] # \begin_layout standard
389 def revert_wraptable(document):
390 "Revert wrap table to wrap figure."
393 i = find_token(document.body, "\\begin_inset Wrap table", i)
396 document.body[i] = document.body[i].replace('\\begin_inset Wrap table', '\\begin_inset Wrap figure')
400 def revert_vietnamese(document):
401 "Set language Vietnamese to English"
402 # Set document language from Vietnamese to English
404 if document.language == "vietnamese":
405 document.language = "english"
406 i = find_token(document.header, "\\language", 0)
408 document.header[i] = "\\language english"
411 j = find_token(document.body, "\\lang vietnamese", j)
414 document.body[j] = document.body[j].replace("\\lang vietnamese", "\\lang english")
418 def revert_japanese(document):
419 "Set language japanese-plain to japanese"
420 # Set document language from japanese-plain to japanese
422 if document.language == "japanese-plain":
423 document.language = "japanese"
424 i = find_token(document.header, "\\language", 0)
426 document.header[i] = "\\language japanese"
429 j = find_token(document.body, "\\lang japanese-plain", j)
432 document.body[j] = document.body[j].replace("\\lang japanese-plain", "\\lang japanese")
436 def revert_japanese_encoding(document):
437 "Set input encoding form EUC-JP-plain to EUC-JP etc."
438 # Set input encoding form EUC-JP-plain to EUC-JP etc.
440 i = find_token(document.header, "\\inputencoding EUC-JP-plain", 0)
442 document.header[i] = "\\inputencoding EUC-JP"
444 j = find_token(document.header, "\\inputencoding JIS-plain", 0)
446 document.header[j] = "\\inputencoding JIS"
448 k = find_token(document.header, "\\inputencoding SJIS-plain", 0)
449 if k != -1: # convert to UTF8 since there is currently no SJIS encoding
450 document.header[k] = "\\inputencoding UTF8"
453 def revert_inset_info(document):
454 'Replace info inset with its content'
457 i = find_token(document.body, '\\begin_inset Info', i)
460 j = find_end_of_inset(document.body, i + 1)
463 document.warning("Malformed LyX document: Could not find end of Info inset.")
466 for k in range(i, j+1):
467 if document.body[k].startswith("arg"):
468 arg = document.body[k][3:].strip().strip('"')
469 if document.body[k].startswith("type"):
470 type = document.body[k][4:].strip().strip('"')
471 # I think there is a newline after \\end_inset, which should be removed.
472 if document.body[j + 1].strip() == "":
473 document.body[i : (j + 2)] = [type + ':' + arg]
475 document.body[i : (j + 1)] = [type + ':' + arg]
478 def convert_pdf_options(document):
479 # Set the pdfusetitle tag, delete the pdf_store_options,
480 # set quotes for bookmarksopenlevel"
481 has_hr = get_value(document.header, "\\use_hyperref", 0, default = "0")
483 k = find_token(document.header, "\\use_hyperref", 0)
484 document.header.insert(k + 1, "\\pdf_pdfusetitle true")
485 k = find_token(document.header, "\\pdf_store_options", 0)
487 del document.header[k]
488 i = find_token(document.header, "\\pdf_bookmarksopenlevel", k)
490 document.header[i] = document.header[i].replace('"', '')
493 def revert_pdf_options_2(document):
494 # reset the pdfusetitle tag, set quotes for bookmarksopenlevel"
495 k = find_token(document.header, "\\use_hyperref", 0)
496 i = find_token(document.header, "\\pdf_pdfusetitle", k)
498 del document.header[i]
499 i = find_token(document.header, "\\pdf_bookmarksopenlevel", k)
501 values = document.header[i].split()
502 values[1] = ' "' + values[1] + '"'
503 document.header[i] = ''.join(values)
506 def convert_htmlurl(document):
507 'Convert "htmlurl" to "href" insets for docbook'
508 if document.backend != "docbook":
512 i = find_token(document.body, "\\begin_inset CommandInset url", i)
515 document.body[i] = "\\begin_inset CommandInset href"
516 document.body[i + 1] = "LatexCommand href"
519 def convert_url(document):
520 'Convert url insets to url charstyles'
521 if document.backend == "docbook":
523 r = re.compile(r'target\s+"(.*)"')
527 i = find_token(document.body, "\\begin_inset CommandInset url", i)
530 j = find_token(document.body, "target", i)
532 document.warning("Malformed LyX document: Can't find target for url inset")
535 m = r.match(document.body[j])
537 k = find_token(document.body, "\\end_inset", j)
539 document.warning("Malformed LyX document: Can't find end of url inset")
542 newstuff = ["\\begin_inset Flex URL",
543 "status collapsed", "",
544 "\\begin_layout Standard",
548 document.body[i:k] = newstuff
552 #If we did one, we need to add URL to the modules
554 i = find_token(document.header, "\\begin_modules", 0)
556 #No modules yet included
557 i = find_token(document.header, "\\textclass", 0)
559 document.warning("Malformed LyX document: No \\textclass!!")
561 modinfo = ["\\begin_modules", "URL", "\\end_modules"]
562 document.header[i + 1: i + 1] = modinfo
564 j = find_token(document.header, "\\end_modules", i)
566 document.warning("Malformed LyX document: No \\end_modules.")
568 k = find_token(document.header, "URL", i)
569 if k != -1 and k < j:
571 document.header.insert(i + 1, "URL")
573 def revert_href(document):
574 'Reverts hyperlink insets (href) to url insets (url)'
577 i = find_token(document.body, "\\begin_inset CommandInset href", i)
580 document.body[i : i + 2] = \
581 ["\\begin_inset CommandInset url", "LatexCommand url"]
584 def convert_include(document):
585 'Converts include insets to new format.'
587 r = re.compile(r'\\begin_inset Include\s+\\([^{]+){([^}]*)}(?:\[(.*)\])?')
589 i = find_token(document.body, "\\begin_inset Include", i)
592 line = document.body[i]
593 previewline = document.body[i + 1]
596 document.warning("Unable to match line " + str(i) + " of body!")
602 insertion = ["\\begin_inset CommandInset include",
603 "LatexCommand " + cmd, previewline,
604 "filename \"" + fn + "\""]
607 insertion.append("lstparams " + '"' + opt + '"')
609 document.body[i : i + 2] = insertion
612 def revert_include(document):
613 'Reverts include insets to old format.'
615 r1 = re.compile('LatexCommand (.+)')
616 r2 = re.compile('filename (.+)')
617 r3 = re.compile('options (.*)')
619 i = find_token(document.body, "\\begin_inset CommandInset include", i)
622 previewline = document.body[i + 1]
623 m = r1.match(document.body[i + 2])
625 document.warning("Malformed LyX document: No LatexCommand line for `" +
626 document.body[i] + "' on line " + str(i) + ".")
630 m = r2.match(document.body[i + 3])
632 document.warning("Malformed LyX document: No filename line for `" + \
633 document.body[i] + "' on line " + str(i) + ".")
639 if (cmd == "lstinputlisting"):
640 m = r3.match(document.body[i + 4])
644 newline = "\\begin_inset Include \\" + cmd + "{" + fn + "}"
646 newline += ("[" + options + "]")
647 insertion = [newline, previewline]
648 document.body[i : i + numlines] = insertion
656 supported_versions = ["1.6.0","1.6"]
657 convert = [[277, [fix_wrong_tables]],
658 [278, [close_begin_deeper]],
659 [279, [long_charstyle_names]],
660 [280, [axe_show_label]],
663 [283, [convert_flex]],
667 [287, [convert_wrapfig_options]],
668 [288, [convert_inset_command]],
669 [289, [convert_latexcommand_index]],
674 [294, [convert_pdf_options]],
675 [295, [convert_htmlurl, convert_url]],
676 [296, [convert_include]]
679 revert = [[295, [revert_include]],
680 [294, [revert_href]],
681 [293, [revert_pdf_options_2]],
682 [292, [revert_inset_info]],
683 [291, [revert_japanese, revert_japanese_encoding]],
684 [290, [revert_vietnamese]],
685 [289, [revert_wraptable]],
686 [288, [revert_latexcommand_index]],
687 [287, [revert_inset_command]],
688 [286, [revert_wrapfig_options]],
689 [285, [revert_pdf_options]],
690 [284, [remove_inzip_options]],
692 [282, [revert_flex]],
694 [280, [revert_begin_modules]],
695 [279, [revert_show_label]],
696 [278, [revert_long_charstyle_names]],
702 if __name__ == "__main__":