1 # This file is part of lyx2lyx
2 # -*- coding: utf-8 -*-
3 # Copyright (C) 2007 José Matos <jamatos@lyx.org>
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; either version 2
8 # of the License, or (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 """ Convert files to the file format generated by lyx 1.6"""
25 from parser_tools import find_token, find_end_of, find_tokens, get_value
27 ####################################################################
28 # Private helper functions
30 def find_end_of_inset(lines, i):
31 " Find end of inset, where lines[i] is included."
32 return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
35 ####################################################################
37 def fix_wrong_tables(document):
40 i = find_token(document.body, "\\begin_inset Tabular", i)
43 j = find_end_of_inset(document.body, i + 1)
45 document.warning("Malformed LyX document: Could not find end of tabular.")
49 nrows = int(document.body[i+1].split('"')[3])
50 ncols = int(document.body[i+1].split('"')[5])
52 for l in range(nrows):
54 for k in range(ncols):
55 m = find_token(document.body, '<cell', m)
57 if document.body[m].find('multicolumn') != -1:
58 multicol_cont = int(document.body[m].split('"')[1])
60 if multicol_cont == 2 and (k == 0 or prev_multicolumn == 0):
61 document.body[m] = document.body[m][:5] + document.body[m][21:]
64 prev_multicolumn = multicol_cont
71 def close_begin_deeper(document):
75 i = find_tokens(document.body, ["\\begin_deeper", "\\end_deeper"], i)
80 if document.body[i][:13] == "\\begin_deeper":
87 document.body[-2:-2] = ['\\end_deeper' for i in range(depth)]
90 def long_charstyle_names(document):
93 i = find_token(document.body, "\\begin_inset CharStyle", i)
96 document.body[i] = document.body[i].replace("CharStyle ", "CharStyle CharStyle:")
99 def revert_long_charstyle_names(document):
102 i = find_token(document.body, "\\begin_inset CharStyle", i)
105 document.body[i] = document.body[i].replace("CharStyle CharStyle:", "CharStyle")
109 def axe_show_label(document):
112 i = find_token(document.body, "\\begin_inset CharStyle", i)
115 if document.body[i + 1].find("show_label") != -1:
116 if document.body[i + 1].find("true") != -1:
117 document.body[i + 1] = "status open"
118 del document.body[ i + 2]
120 if document.body[i + 1].find("false") != -1:
121 document.body[i + 1] = "status collapsed"
122 del document.body[ i + 2]
124 document.warning("Malformed LyX document: show_label neither false nor true.")
126 document.warning("Malformed LyX document: show_label missing in CharStyle.")
131 def revert_show_label(document):
134 i = find_token(document.body, "\\begin_inset CharStyle", i)
137 if document.body[i + 1].find("status open") != -1:
138 document.body.insert(i + 1, "show_label true")
140 if document.body[i + 1].find("status collapsed") != -1:
141 document.body.insert(i + 1, "show_label false")
143 document.warning("Malformed LyX document: no legal status line in CharStyle.")
146 def revert_begin_modules(document):
149 i = find_token(document.header, "\\begin_modules", i)
152 j = find_end_of(document.header, i, "\\begin_modules", "\\end_modules")
154 # this should not happen
156 document.header[i : j + 1] = []
158 def convert_flex(document):
159 "Convert CharStyle to Flex"
162 i = find_token(document.body, "\\begin_inset CharStyle", i)
165 document.body[i] = document.body[i].replace('\\begin_inset CharStyle', '\\begin_inset Flex')
167 def revert_flex(document):
168 "Convert Flex to CharStyle"
171 i = find_token(document.body, "\\begin_inset Flex", i)
174 document.body[i] = document.body[i].replace('\\begin_inset Flex', '\\begin_inset CharStyle')
177 def remove_manifest(document):
178 "Remove the manifest section"
179 document.manifest = None
182 # Discard PDF options for hyperref
183 def revert_pdf_options(document):
184 "Revert PDF options for hyperref."
186 i = find_token(document.header, "\\use_hyperref", i)
188 del document.header[i]
189 i = find_token(document.header, "\\pdf_store_options", i)
191 del document.header[i]
192 i = find_token(document.header, "\\pdf_title", 0)
194 del document.header[i]
195 i = find_token(document.header, "\\pdf_author", 0)
197 del document.header[i]
198 i = find_token(document.header, "\\pdf_subject", 0)
200 del document.header[i]
201 i = find_token(document.header, "\\pdf_keywords", 0)
203 del document.header[i]
204 i = find_token(document.header, "\\pdf_bookmarks", 0)
206 del document.header[i]
207 i = find_token(document.header, "\\pdf_bookmarksnumbered", i)
209 del document.header[i]
210 i = find_token(document.header, "\\pdf_bookmarksopen", i)
212 del document.header[i]
213 i = find_token(document.header, "\\pdf_bookmarksopenlevel", i)
215 del document.header[i]
216 i = find_token(document.header, "\\pdf_breaklinks", i)
218 del document.header[i]
219 i = find_token(document.header, "\\pdf_pdfborder", i)
221 del document.header[i]
222 i = find_token(document.header, "\\pdf_colorlinks", i)
224 del document.header[i]
225 i = find_token(document.header, "\\pdf_backref", i)
227 del document.header[i]
228 i = find_token(document.header, "\\pdf_pagebackref", i)
230 del document.header[i]
231 i = find_token(document.header, "\\pdf_pagemode", 0)
233 del document.header[i]
234 i = find_token(document.header, "\\pdf_quoted_options", 0)
236 del document.header[i]
239 def remove_inzip_options(document):
240 "Remove inzipName and embed options from the Graphics inset"
243 i = find_token(document.body, "\\begin_inset Graphics", i)
246 j = find_end_of_inset(document.body, i + 1)
249 document.warning("Malformed LyX document: Could not find end of graphics inset.")
250 # If there's a inzip param, just remove that
251 k = find_token(document.body, "\tinzipName", i + 1, j)
254 # embed option must follow the inzipName option
255 del document.body[k+1]
259 def convert_inset_command(document):
262 \begin_inset LatexCommand cmd
264 \begin_inset CommandInset InsetType
269 i = find_token(document.body, "\\begin_inset LatexCommand", i)
272 line = document.body[i]
273 r = re.compile(r'\\begin_inset LatexCommand (.*)$')
277 #this is adapted from factory.cpp
278 if cmdName[0:4].lower() == "cite":
279 insetName = "citation"
280 elif cmdName == "url" or cmdName == "htmlurl":
282 elif cmdName[-3:] == "ref":
284 elif cmdName == "tableofcontents":
286 elif cmdName == "printnomenclature":
287 insetName = "nomencl_print"
288 elif cmdName == "printindex":
289 insetName = "index_print"
292 insertion = ["\\begin_inset CommandInset " + insetName, "LatexCommand " + cmdName]
293 document.body[i : i+1] = insertion
296 def revert_inset_command(document):
299 \begin_inset CommandInset InsetType
302 \begin_inset LatexCommand cmd
303 Some insets may end up being converted to insets earlier versions of LyX
304 will not be able to recognize. Not sure what to do about that.
308 i = find_token(document.body, "\\begin_inset CommandInset", i)
311 nextline = document.body[i+1]
312 r = re.compile(r'LatexCommand\s+(.*)$')
313 m = r.match(nextline)
315 document.warning("Malformed LyX document: Missing LatexCommand in " + document.body[i] + ".")
318 insertion = ["\\begin_inset LatexCommand " + cmdName]
319 document.body[i : i+2] = insertion
322 def convert_wrapfig_options(document):
323 "Convert optional options for wrap floats (wrapfig)."
324 # adds the tokens "lines", "placement", and "overhang"
327 i = find_token(document.body, "\\begin_inset Wrap figure", i)
330 document.body.insert(i + 1, "lines 0")
331 j = find_token(document.body, "placement", i)
332 # placement can be already set or not; if not, set it
334 document.body.insert(i + 3, "overhang 0col%")
336 document.body.insert(i + 2, "placement o")
337 document.body.insert(i + 3, "overhang 0col%")
341 def revert_wrapfig_options(document):
342 "Revert optional options for wrap floats (wrapfig)."
345 i = find_token(document.body, "lines", i)
348 j = find_token(document.body, "overhang", i+1)
349 if j != i + 2 and j != -1:
350 document.warning("Malformed LyX document: Couldn't find overhang parameter of wrap float.")
354 del document.body[j-1]
358 def convert_latexcommand_index(document):
359 "Convert from LatexCommand form to collapsable form."
362 i = find_token(document.body, "\\begin_inset CommandInset index", i)
365 if document.body[i + 1] != "LatexCommand index": # Might also be index_print
367 fullcommand = document.body[i + 2]
368 document.body[i] = "\\begin_inset Index"
369 document.body[i + 1] = "status collapsed"
370 document.body[i + 2] = "\\begin_layout standard"
371 document.body.insert(i + 3, fullcommand[6:].strip('"'))
372 document.body.insert(i + 4, "\\end_layout")
376 def revert_latexcommand_index(document):
377 "Revert from collapsable form toLatexCommand form."
380 i = find_token(document.body, "\\begin_inset Index", i)
383 j = find_end_of_inset(document.body, i)
384 del document.body[j - 1]
385 del document.body[j - 2] # \end_layout
386 document.body[i] = "\\begin_inset CommandInset index"
387 document.body[i + 1] = "LatexCommand index"
388 document.body[i + 3] = "name " + '"' + document.body[i + 3] + '"'
389 document.body.insert(i + 4, "")
390 del document.body[i + 2] # \begin_layout standard
394 def revert_wraptable(document):
395 "Revert wrap table to wrap figure."
398 i = find_token(document.body, "\\begin_inset Wrap table", i)
401 document.body[i] = document.body[i].replace('\\begin_inset Wrap table', '\\begin_inset Wrap figure')
405 def revert_vietnamese(document):
406 "Set language Vietnamese to English"
407 # Set document language from Vietnamese to English
409 if document.language == "vietnamese":
410 document.language = "english"
411 i = find_token(document.header, "\\language", 0)
413 document.header[i] = "\\language english"
416 j = find_token(document.body, "\\lang vietnamese", j)
419 document.body[j] = document.body[j].replace("\\lang vietnamese", "\\lang english")
423 def revert_japanese(document):
424 "Set language japanese-plain to japanese"
425 # Set document language from japanese-plain to japanese
427 if document.language == "japanese-plain":
428 document.language = "japanese"
429 i = find_token(document.header, "\\language", 0)
431 document.header[i] = "\\language japanese"
434 j = find_token(document.body, "\\lang japanese-plain", j)
437 document.body[j] = document.body[j].replace("\\lang japanese-plain", "\\lang japanese")
441 def revert_japanese_encoding(document):
442 "Set input encoding form EUC-JP-plain to EUC-JP etc."
443 # Set input encoding form EUC-JP-plain to EUC-JP etc.
445 i = find_token(document.header, "\\inputencoding EUC-JP-plain", 0)
447 document.header[i] = "\\inputencoding EUC-JP"
449 j = find_token(document.header, "\\inputencoding JIS-plain", 0)
451 document.header[j] = "\\inputencoding JIS"
453 k = find_token(document.header, "\\inputencoding SJIS-plain", 0)
454 if k != -1: # convert to UTF8 since there is currently no SJIS encoding
455 document.header[k] = "\\inputencoding UTF8"
458 def revert_inset_info(document):
459 'Replace info inset with its content'
462 i = find_token(document.body, '\\begin_inset Info', i)
465 j = find_end_of_inset(document.body, i + 1)
468 document.warning("Malformed LyX document: Could not find end of Info inset.")
471 for k in range(i, j+1):
472 if document.body[k].startswith("arg"):
473 arg = document.body[k][3:].strip().strip('"')
474 if document.body[k].startswith("type"):
475 type = document.body[k][4:].strip().strip('"')
476 # I think there is a newline after \\end_inset, which should be removed.
477 if document.body[j + 1].strip() == "":
478 document.body[i : (j + 2)] = [type + ':' + arg]
480 document.body[i : (j + 1)] = [type + ':' + arg]
483 def convert_pdf_options(document):
484 # Set the pdfusetitle tag, delete the pdf_store_options,
485 # set quotes for bookmarksopenlevel"
486 has_hr = get_value(document.header, "\\use_hyperref", 0, default = "0")
488 k = find_token(document.header, "\\use_hyperref", 0)
489 document.header.insert(k + 1, "\\pdf_pdfusetitle true")
490 k = find_token(document.header, "\\pdf_store_options", 0)
492 del document.header[k]
493 i = find_token(document.header, "\\pdf_bookmarksopenlevel", k)
495 document.header[i] = document.header[i].replace('"', '')
498 def revert_pdf_options_2(document):
499 # reset the pdfusetitle tag, set quotes for bookmarksopenlevel"
500 k = find_token(document.header, "\\use_hyperref", 0)
501 i = find_token(document.header, "\\pdf_pdfusetitle", k)
503 del document.header[i]
504 i = find_token(document.header, "\\pdf_bookmarksopenlevel", k)
506 values = document.header[i].split()
507 values[1] = ' "' + values[1] + '"'
508 document.header[i] = ''.join(values)
511 def convert_htmlurl(document):
512 'Convert "htmlurl" to "href" insets for docbook'
513 if document.backend != "docbook":
517 i = find_token(document.body, "\\begin_inset CommandInset url", i)
520 document.body[i] = "\\begin_inset CommandInset href"
521 document.body[i + 1] = "LatexCommand href"
524 def convert_url(document):
525 'Convert url insets to url charstyles'
526 if document.backend == "docbook":
528 r = re.compile(r'target\s+"(.*)"')
532 i = find_token(document.body, "\\begin_inset CommandInset url", i)
535 j = find_token(document.body, "target", i)
537 document.warning("Malformed LyX document: Can't find target for url inset")
540 m = r.match(document.body[j])
542 k = find_token(document.body, "\\end_inset", j)
544 document.warning("Malformed LyX document: Can't find end of url inset")
547 newstuff = ["\\begin_inset Flex URL",
548 "status collapsed", "",
549 "\\begin_layout Standard",
553 document.body[i:k] = newstuff
557 #If we did one, we need to add URL to the modules
559 i = find_token(document.header, "\\begin_modules", 0)
561 #No modules yet included
562 i = find_token(document.header, "\\textclass", 0)
564 document.warning("Malformed LyX document: No \\textclass!!")
566 modinfo = ["\\begin_modules", "URL", "\\end_modules"]
567 document.header[i + 1: i + 1] = modinfo
569 j = find_token(document.header, "\\end_modules", i)
571 document.warning("Malformed LyX document: No \\end_modules.")
573 k = find_token(document.header, "URL", i)
574 if k != -1 and k < j:
576 document.header.insert(i + 1, "URL")
578 def revert_href(document):
579 'Reverts hyperlink insets (href) to url insets (url)'
582 i = find_token(document.body, "\\begin_inset CommandInset href", i)
585 document.body[i : i + 2] =
586 ["\\begin_inset CommandInset url", "LatexCommand url"]
594 supported_versions = ["1.6.0","1.6"]
595 convert = [[277, [fix_wrong_tables]],
596 [278, [close_begin_deeper]],
597 [279, [long_charstyle_names]],
598 [280, [axe_show_label]],
601 [283, [convert_flex]],
603 [285, []], # an empty manifest is automatically added
605 [287, [convert_wrapfig_options]],
606 [288, [convert_inset_command]],
607 [289, [convert_latexcommand_index]],
612 [294, [convert_pdf_options]],
613 [295, [convert_htmlurl, convert_url]]
616 revert = [[294, [revert_href]],
617 [293, [revert_pdf_options_2]],
618 [292, [revert_inset_info]],
619 [291, [revert_japanese, revert_japanese_encoding]],
620 [290, [revert_vietnamese]],
621 [289, [revert_wraptable]],
622 [288, [revert_latexcommand_index]],
623 [287, [revert_inset_command]],
624 [286, [revert_wrapfig_options]],
625 [285, [revert_pdf_options]],
626 [284, [remove_manifest, remove_inzip_options]],
628 [282, [revert_flex]],
630 [280, [revert_begin_modules]],
631 [279, [revert_show_label]],
632 [278, [revert_long_charstyle_names]],
638 if __name__ == "__main__":