1 # This file is part of lyx2lyx
2 # -*- coding: utf-8 -*-
3 # Copyright (C) 2008 José Matos <jamatos@lyx.org>
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; either version 2
8 # of the License, or (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 """ Convert files to the file format generated by lyx 2.0"""
25 from parser_tools import find_token, find_end_of, find_tokens, get_value, get_value_string
27 ####################################################################
28 # Private helper functions
30 def find_end_of_inset(lines, i):
31 " Find end of inset, where lines[i] is included."
32 return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
35 def add_to_preamble(document, text):
36 """ Add text to the preamble if it is not already there.
37 Only the first line is checked!"""
39 if find_token(document.preamble, text[0], 0) != -1:
42 document.preamble.extend(text)
45 def insert_to_preamble(index, document, text):
46 """ Insert text to the preamble at a given line"""
48 document.preamble.insert(index, text)
51 def read_unicodesymbols():
52 " Read the unicodesymbols list of unicode characters and corresponding commands."
53 pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
54 fp = open(os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols'))
56 # Two backslashes, followed by some non-word character, and then a character
57 # in brackets. The idea is to check for constructs like: \"{u}, which is how
58 # they are written in the unicodesymbols file; but they can also be written
59 # as: \"u or even \" u.
60 r = re.compile(r'\\\\(\W)\{(\w)\}')
61 for line in fp.readlines():
62 if line[0] != '#' and line.strip() != "":
63 line=line.replace(' "',' ') # remove all quotation marks with spaces before
64 line=line.replace('" ',' ') # remove all quotation marks with spaces after
65 line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis)
67 [ucs4,command,dead] = line.split(None,2)
68 if command[0:1] != "\\":
70 spec_chars.append([command, unichr(eval(ucs4))])
76 # If the character is a double-quote, then we need to escape it, too,
77 # since it is done that way in the LyX file.
78 if m.group(1) == "\"":
81 command += m.group(1) + m.group(2)
82 commandbl += m.group(1) + ' ' + m.group(2)
83 spec_chars.append([command, unichr(eval(ucs4))])
84 spec_chars.append([commandbl, unichr(eval(ucs4))])
89 unicode_reps = read_unicodesymbols()
92 def put_cmd_in_ert(string):
93 for rep in unicode_reps:
94 string = string.replace(rep[1], rep[0].replace('\\\\', '\\'))
95 string = string.replace('\\', "\\backslash\n")
96 string = "\\begin_inset ERT\nstatus collapsed\n\\begin_layout Standard\n" \
97 + string + "\n\\end_layout\n\\end_inset"
101 def lyx2latex(document, lines):
102 'Convert some LyX stuff into corresponding LaTeX stuff, as best we can.'
103 # clean up multiline stuff
107 for curline in range(len(lines)):
108 line = lines[curline]
109 if line.startswith("\\begin_inset ERT"):
110 # We don't want to replace things inside ERT, so figure out
111 # where the end of the inset is.
112 ert_end = find_end_of_inset(lines, curline + 1)
114 elif line.startswith("\\begin_inset Formula"):
116 elif line.startswith("\\begin_inset Quotes"):
117 # For now, we do a very basic reversion. Someone who understands
118 # quotes is welcome to fix it up.
119 qtype = line[20:].strip()
133 elif line.isspace() or \
134 line.startswith("\\begin_layout") or \
135 line.startswith("\\end_layout") or \
136 line.startswith("\\begin_inset") or \
137 line.startswith("\\end_inset") or \
138 line.startswith("\\lang") or \
139 line.strip() == "status collapsed" or \
140 line.strip() == "status open":
144 # this needs to be added to the preamble because of cases like
145 # \textmu, \textbackslash, etc.
146 add_to_preamble(document, ['% added by lyx2lyx for converted index entries',
147 '\\@ifundefined{textmu}',
148 ' {\\usepackage{textcomp}}{}'])
149 # a lossless reversion is not possible
150 # try at least to handle some common insets and settings
151 if ert_end >= curline:
152 line = line.replace(r'\backslash', r'\\')
154 line = line.replace('&', '\\&{}')
155 line = line.replace('#', '\\#{}')
156 line = line.replace('^', '\\^{}')
157 line = line.replace('%', '\\%{}')
158 line = line.replace('_', '\\_{}')
159 line = line.replace('$', '\\${}')
161 # Do the LyX text --> LaTeX conversion
162 for rep in unicode_reps:
163 line = line.replace(rep[1], rep[0] + "{}")
164 line = line.replace(r'\backslash', r'\textbackslash{}')
165 line = line.replace(r'\series bold', r'\bfseries{}').replace(r'\series default', r'\mdseries{}')
166 line = line.replace(r'\shape italic', r'\itshape{}').replace(r'\shape smallcaps', r'\scshape{}')
167 line = line.replace(r'\shape slanted', r'\slshape{}').replace(r'\shape default', r'\upshape{}')
168 line = line.replace(r'\emph on', r'\em{}').replace(r'\emph default', r'\em{}')
169 line = line.replace(r'\noun on', r'\scshape{}').replace(r'\noun default', r'\upshape{}')
170 line = line.replace(r'\bar under', r'\underbar{').replace(r'\bar default', r'}')
171 line = line.replace(r'\family sans', r'\sffamily{}').replace(r'\family default', r'\normalfont{}')
172 line = line.replace(r'\family typewriter', r'\ttfamily{}').replace(r'\family roman', r'\rmfamily{}')
173 line = line.replace(r'\InsetSpace ', r'').replace(r'\SpecialChar ', r'')
178 ####################################################################
181 def revert_swiss(document):
182 " Set language german-ch to ngerman "
184 if document.language == "german-ch":
185 document.language = "ngerman"
186 i = find_token(document.header, "\\language", 0)
188 document.header[i] = "\\language ngerman"
191 j = find_token(document.body, "\\lang german-ch", j)
194 document.body[j] = document.body[j].replace("\\lang german-ch", "\\lang ngerman")
198 def revert_tabularvalign(document):
199 " Revert the tabular valign option "
202 i = find_token(document.body, "\\begin_inset Tabular", i)
205 j = find_end_of_inset(document.body, i)
207 document.warning("Malformed LyX document: Could not find end of tabular.")
210 # don't set a box for longtables, only delete tabularvalignment
211 p = find_token(document.body, "<features islongtable=", i)
213 q = document.body[p].find("tabularvalignment")
215 document.body[p] = document.body[p][:q-1]
216 document.body[p] = document.body[p] + '>'
221 k = find_token(document.body, "<features tabularvalignment=", i)
226 # which valignment is specified?
227 tabularvalignment_re = re.compile(r'<features tabularvalignment="(top|bottom)">')
228 m = tabularvalignment_re.match(document.body[k])
233 tabularvalignment = m.group(1)
235 subst = ['\\end_layout', '\\end_inset']
236 document.body[j+1:j+1] = subst # just inserts those lines
237 subst = ['\\begin_inset Box Frameless',
238 'position "' + tabularvalignment[0] +'"',
246 'height_special "totalheight"',
249 '\\begin_layout Plain Layout']
250 document.body[i:i] = subst # this just inserts the array at i
251 i += len(subst) + 2 # adjust i to save a few cycles
254 def revert_phantom(document):
255 " Reverts phantom to ERT "
259 i = find_token(document.body, "\\begin_inset Phantom Phantom", i)
262 substi = document.body[i].replace('\\begin_inset Phantom Phantom', \
263 '\\begin_inset ERT\nstatus collapsed\n\n' \
264 '\\begin_layout Plain Layout\n\n\n\\backslash\n' \
265 'phantom{\n\\end_layout\n\n\\end_inset\n')
266 substi = substi.split('\n')
267 document.body[i : i+4] = substi
269 j = find_token(document.body, "\\end_layout", i)
271 document.warning("Malformed LyX document: Could not find end of Phantom inset.")
273 substj = document.body[j].replace('\\end_layout', \
274 '\\size default\n\n\\begin_inset ERT\nstatus collapsed\n\n' \
275 '\\begin_layout Plain Layout\n\n' \
276 '}\n\\end_layout\n\n\\end_inset\n')
277 substj = substj.split('\n')
278 document.body[j : j+4] = substj
282 def revert_hphantom(document):
283 " Reverts hphantom to ERT "
287 i = find_token(document.body, "\\begin_inset Phantom HPhantom", i)
290 substi = document.body[i].replace('\\begin_inset Phantom HPhantom', \
291 '\\begin_inset ERT\nstatus collapsed\n\n' \
292 '\\begin_layout Plain Layout\n\n\n\\backslash\n' \
293 'hphantom{\n\\end_layout\n\n\\end_inset\n')
294 substi = substi.split('\n')
295 document.body[i : i+4] = substi
297 j = find_token(document.body, "\\end_layout", i)
299 document.warning("Malformed LyX document: Could not find end of HPhantom inset.")
301 substj = document.body[j].replace('\\end_layout', \
302 '\\size default\n\n\\begin_inset ERT\nstatus collapsed\n\n' \
303 '\\begin_layout Plain Layout\n\n' \
304 '}\n\\end_layout\n\n\\end_inset\n')
305 substj = substj.split('\n')
306 document.body[j : j+4] = substj
310 def revert_vphantom(document):
311 " Reverts vphantom to ERT "
315 i = find_token(document.body, "\\begin_inset Phantom VPhantom", i)
318 substi = document.body[i].replace('\\begin_inset Phantom VPhantom', \
319 '\\begin_inset ERT\nstatus collapsed\n\n' \
320 '\\begin_layout Plain Layout\n\n\n\\backslash\n' \
321 'vphantom{\n\\end_layout\n\n\\end_inset\n')
322 substi = substi.split('\n')
323 document.body[i : i+4] = substi
325 j = find_token(document.body, "\\end_layout", i)
327 document.warning("Malformed LyX document: Could not find end of VPhantom inset.")
329 substj = document.body[j].replace('\\end_layout', \
330 '\\size default\n\n\\begin_inset ERT\nstatus collapsed\n\n' \
331 '\\begin_layout Plain Layout\n\n' \
332 '}\n\\end_layout\n\n\\end_inset\n')
333 substj = substj.split('\n')
334 document.body[j : j+4] = substj
338 def revert_xetex(document):
339 " Reverts documents that use XeTeX "
340 i = find_token(document.header, '\\use_xetex', 0)
342 document.warning("Malformed LyX document: Missing \\use_xetex.")
344 if get_value(document.header, "\\use_xetex", i) == 'false':
345 del document.header[i]
347 del document.header[i]
348 # 1.) set doc encoding to utf8-plain
349 i = find_token(document.header, "\\inputencoding", 0)
351 document.warning("Malformed LyX document: Missing \\inputencoding.")
352 document.header[i] = "\\inputencoding utf8-plain"
353 # 2.) check font settings
354 l = find_token(document.header, "\\font_roman", 0)
356 document.warning("Malformed LyX document: Missing \\font_roman.")
357 line = document.header[l]
358 l = re.compile(r'\\font_roman (.*)$')
361 l = find_token(document.header, "\\font_sans", 0)
363 document.warning("Malformed LyX document: Missing \\font_sans.")
364 line = document.header[l]
365 l = re.compile(r'\\font_sans (.*)$')
368 l = find_token(document.header, "\\font_typewriter", 0)
370 document.warning("Malformed LyX document: Missing \\font_typewriter.")
371 line = document.header[l]
372 l = re.compile(r'\\font_typewriter (.*)$')
374 typewriter = m.group(1)
375 osf = get_value(document.header, '\\font_osf', 0) == "true"
376 sf_scale = float(get_value(document.header, '\\font_sf_scale', 0))
377 tt_scale = float(get_value(document.header, '\\font_tt_scale', 0))
378 # 3.) set preamble stuff
379 pretext = '%% This document must be processed with xelatex!\n'
380 pretext += '\\usepackage{fontspec}\n'
381 if roman != "default":
382 pretext += '\\setmainfont[Mapping=tex-text]{' + roman + '}\n'
383 if sans != "default":
384 pretext += '\\setsansfont['
386 pretext += 'Scale=' + str(sf_scale / 100) + ','
387 pretext += 'Mapping=tex-text]{' + sans + '}\n'
388 if typewriter != "default":
389 pretext += '\\setmonofont'
391 pretext += '[Scale=' + str(tt_scale / 100) + ']'
392 pretext += '{' + typewriter + '}\n'
394 pretext += '\\defaultfontfeatures{Numbers=OldStyle}\n'
395 pretext += '\usepackage{xunicode}\n'
396 pretext += '\usepackage{xltxtra}\n'
397 insert_to_preamble(0, document, pretext)
398 # 4.) reset font settings
399 i = find_token(document.header, "\\font_roman", 0)
401 document.warning("Malformed LyX document: Missing \\font_roman.")
402 document.header[i] = "\\font_roman default"
403 i = find_token(document.header, "\\font_sans", 0)
405 document.warning("Malformed LyX document: Missing \\font_sans.")
406 document.header[i] = "\\font_sans default"
407 i = find_token(document.header, "\\font_typewriter", 0)
409 document.warning("Malformed LyX document: Missing \\font_typewriter.")
410 document.header[i] = "\\font_typewriter default"
411 i = find_token(document.header, "\\font_osf", 0)
413 document.warning("Malformed LyX document: Missing \\font_osf.")
414 document.header[i] = "\\font_osf false"
415 i = find_token(document.header, "\\font_sc", 0)
417 document.warning("Malformed LyX document: Missing \\font_sc.")
418 document.header[i] = "\\font_sc false"
419 i = find_token(document.header, "\\font_sf_scale", 0)
421 document.warning("Malformed LyX document: Missing \\font_sf_scale.")
422 document.header[i] = "\\font_sf_scale 100"
423 i = find_token(document.header, "\\font_tt_scale", 0)
425 document.warning("Malformed LyX document: Missing \\font_tt_scale.")
426 document.header[i] = "\\font_tt_scale 100"
429 def revert_outputformat(document):
430 " Remove default output format param "
431 i = find_token(document.header, '\\default_output_format', 0)
433 document.warning("Malformed LyX document: Missing \\default_output_format.")
435 del document.header[i]
438 def revert_backgroundcolor(document):
439 " Reverts background color to preamble code "
443 i = find_token(document.header, "\\backgroundcolor", i)
446 colorcode = get_value(document.header, '\\backgroundcolor', 0)
447 del document.header[i]
448 # don't clutter the preamble if backgroundcolor is not set
449 if colorcode == "#ffffff":
451 # the color code is in the form #rrggbb where every character denotes a hex number
452 # convert the string to an int
453 red = string.atoi(colorcode[1:3],16)
454 # we want the output "0.5" for the value "127" therefore add here
457 redout = float(red) / 256
458 green = string.atoi(colorcode[3:5],16)
461 greenout = float(green) / 256
462 blue = string.atoi(colorcode[5:7],16)
465 blueout = float(blue) / 256
467 insert_to_preamble(0, document,
468 '% Commands inserted by lyx2lyx to set the background color\n'
469 + '\\@ifundefined{definecolor}{\\usepackage{color}}{}\n'
470 + '\\definecolor{page_backgroundcolor}{rgb}{'
471 + str(redout) + ', ' + str(greenout)
472 + ', ' + str(blueout) + '}\n'
473 + '\\pagecolor{page_backgroundcolor}\n')
476 def revert_splitindex(document):
477 " Reverts splitindex-aware documents "
478 i = find_token(document.header, '\\use_indices', 0)
480 document.warning("Malformed LyX document: Missing \\use_indices.")
482 indices = get_value(document.header, "\\use_indices", i)
484 if indices == "true":
485 preamble += "\\usepackage{splitidx}\n"
486 del document.header[i]
489 i = find_token(document.header, "\\index", i)
492 k = find_token(document.header, "\\end_index", i)
494 document.warning("Malformed LyX document: Missing \\end_index.")
496 line = document.header[i]
497 l = re.compile(r'\\index (.*)$')
500 ishortcut = get_value(document.header, '\\shortcut', i, k)
501 if ishortcut != "" and indices == "true":
502 preamble += "\\newindex[" + iname + "]{" + ishortcut + "}\n"
503 del document.header[i:k+1]
506 insert_to_preamble(0, document, preamble)
509 i = find_token(document.body, "\\begin_inset Index", i)
512 line = document.body[i]
513 l = re.compile(r'\\begin_inset Index (.*)$')
516 if itype == "idx" or indices == "false":
517 document.body[i] = "\\begin_inset Index"
519 k = find_end_of_inset(document.body, i)
522 content = lyx2latex(document, document.body[i:k])
524 content = content.replace('"', r'\"')
525 subst = [put_cmd_in_ert("\\sindex[" + itype + "]{" + content + "}")]
526 document.body[i:k+1] = subst
530 i = find_token(document.body, "\\begin_inset CommandInset index_print", i)
533 k = find_end_of_inset(document.body, i)
534 ptype = get_value(document.body, 'type', i, k).strip('"')
536 j = find_token(document.body, "type", i, k)
538 elif indices == "false":
539 del document.body[i:k+1]
541 subst = [put_cmd_in_ert("\\printindex[" + ptype + "]{}")]
542 document.body[i:k+1] = subst
546 def convert_splitindex(document):
547 " Converts index and printindex insets to splitindex-aware format "
550 i = find_token(document.body, "\\begin_inset Index", i)
553 document.body[i] = document.body[i].replace("\\begin_inset Index",
554 "\\begin_inset Index idx")
558 i = find_token(document.body, "\\begin_inset CommandInset index_print", i)
561 if document.body[i + 1].find('LatexCommand printindex') == -1:
562 document.warning("Malformed LyX document: Incomplete printindex inset.")
564 subst = ["LatexCommand printindex",
566 document.body[i + 1:i + 2] = subst
570 def revert_subindex(document):
571 " Reverts \\printsubindex CommandInset types "
572 i = find_token(document.header, '\\use_indices', 0)
574 document.warning("Malformed LyX document: Missing \\use_indices.")
576 indices = get_value(document.header, "\\use_indices", i)
579 i = find_token(document.body, "\\begin_inset CommandInset index_print", i)
582 k = find_end_of_inset(document.body, i)
583 ctype = get_value(document.body, 'LatexCommand', i, k)
584 if ctype != "printsubindex":
587 ptype = get_value(document.body, 'type', i, k).strip('"')
588 if indices == "false":
589 del document.body[i:k+1]
591 subst = [put_cmd_in_ert("\\printsubindex[" + ptype + "]{}")]
592 document.body[i:k+1] = subst
596 def revert_printindexall(document):
597 " Reverts \\print[sub]index* CommandInset types "
598 i = find_token(document.header, '\\use_indices', 0)
600 document.warning("Malformed LyX document: Missing \\use_indices.")
602 indices = get_value(document.header, "\\use_indices", i)
605 i = find_token(document.body, "\\begin_inset CommandInset index_print", i)
608 k = find_end_of_inset(document.body, i)
609 ctype = get_value(document.body, 'LatexCommand', i, k)
610 if ctype != "printindex*" and ctype != "printsubindex*":
613 if indices == "false":
614 del document.body[i:k+1]
616 subst = [put_cmd_in_ert("\\" + ctype + "{}")]
617 document.body[i:k+1] = subst
621 def revert_strikeout(document):
622 " Reverts \\strikeout character style "
624 i = find_token(document.body, '\\strikeout', 0)
630 def revert_uulinewave(document):
631 " Reverts \\uuline, and \\uwave character styles "
633 i = find_token(document.body, '\\uuline', 0)
638 i = find_token(document.body, '\\uwave', 0)
644 def revert_ulinelatex(document):
645 " Reverts \\uline character style "
646 i = find_token(document.body, '\\bar under', 0)
649 insert_to_preamble(0, document,
650 '% Commands inserted by lyx2lyx for proper underlining\n'
651 + '\\PassOptionsToPackage{normalem}{ulem}\n'
652 + '\\usepackage{ulem}\n'
653 + '\\let\\cite@rig\\cite\n'
654 + '\\newcommand{\\b@xcite}[2][\\%]{\\def\\def@pt{\\%}\\def\\pas@pt{#1}\n'
655 + ' \\mbox{\\ifx\\def@pt\\pas@pt\\cite@rig{#2}\\else\\cite@rig[#1]{#2}\\fi}}\n'
656 + '\\renewcommand{\\underbar}[1]{{\\let\\cite\\b@xcite\\uline{#1}}}\n')
659 def revert_custom_processors(document):
660 " Remove bibtex_command and index_command params "
661 i = find_token(document.header, '\\bibtex_command', 0)
663 document.warning("Malformed LyX document: Missing \\bibtex_command.")
665 del document.header[i]
666 i = find_token(document.header, '\\index_command', 0)
668 document.warning("Malformed LyX document: Missing \\index_command.")
670 del document.header[i]
673 def convert_nomencl_width(document):
674 " Add set_width param to nomencl_print "
677 i = find_token(document.body, "\\begin_inset CommandInset nomencl_print", i)
680 document.body.insert(i + 2, "set_width \"none\"")
684 def revert_nomencl_width(document):
685 " Remove set_width param from nomencl_print "
688 i = find_token(document.body, "\\begin_inset CommandInset nomencl_print", i)
691 j = find_end_of_inset(document.body, i)
692 l = find_token(document.body, "set_width", i, j)
694 document.warning("Can't find set_width option for nomencl_print!")
701 def revert_nomencl_cwidth(document):
702 " Remove width param from nomencl_print "
705 i = find_token(document.body, "\\begin_inset CommandInset nomencl_print", i)
708 j = find_end_of_inset(document.body, i)
709 l = find_token(document.body, "width", i, j)
711 document.warning("Can't find width option for nomencl_print!")
714 width = get_value(document.body, "width", i, j).strip('"')
716 add_to_preamble(document, ["\\setlength{\\nomlabelwidth}{" + width + "}"])
720 def revert_applemac(document):
721 " Revert applemac encoding to auto "
723 if document.encoding == "applemac":
724 document.encoding = "auto"
725 i = find_token(document.header, "\\encoding", 0)
727 document.header[i] = "\\encoding auto"
730 def revert_longtable_align(document):
731 " Remove longtable alignment setting "
735 i = find_token(document.body, "\\begin_inset Tabular", i)
738 # the alignment is 2 lines below \\begin_inset Tabular
739 j = document.body[i+2].find("longtabularalignment")
742 document.body[i+2] = document.body[i+2][:j-1]
743 document.body[i+2] = document.body[i+2] + '>'
751 supported_versions = ["2.0.0","2.0"]
752 convert = [[346, []],
758 [352, [convert_splitindex]],
765 [359, [convert_nomencl_width]],
772 revert = [[362, [revert_longtable_align]],
773 [361, [revert_applemac]],
775 [359, [revert_nomencl_cwidth]],
776 [358, [revert_nomencl_width]],
777 [357, [revert_custom_processors]],
778 [356, [revert_ulinelatex]],
779 [355, [revert_uulinewave]],
780 [354, [revert_strikeout]],
781 [353, [revert_printindexall]],
782 [352, [revert_subindex]],
783 [351, [revert_splitindex]],
784 [350, [revert_backgroundcolor]],
785 [349, [revert_outputformat]],
786 [348, [revert_xetex]],
787 [347, [revert_phantom, revert_hphantom, revert_vphantom]],
788 [346, [revert_tabularvalign]],
789 [345, [revert_swiss]]
793 if __name__ == "__main__":