1 # This file is part of lyx2lyx
2 # -*- coding: utf-8 -*-
3 # Copyright (C) 2008 José Matos <jamatos@lyx.org>
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; either version 2
8 # of the License, or (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 """ Convert files to the file format generated by lyx 2.0"""
25 from parser_tools import find_token, find_end_of, find_tokens, get_value, get_value_string
27 ####################################################################
28 # Private helper functions
30 def find_end_of_inset(lines, i):
31 " Find end of inset, where lines[i] is included."
32 return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
35 def add_to_preamble(document, text):
36 """ Add text to the preamble if it is not already there.
37 Only the first line is checked!"""
39 if find_token(document.preamble, text[0], 0) != -1:
42 document.preamble.extend(text)
45 def insert_to_preamble(index, document, text):
46 """ Insert text to the preamble at a given line"""
48 document.preamble.insert(index, text)
51 def read_unicodesymbols():
52 " Read the unicodesymbols list of unicode characters and corresponding commands."
53 pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
54 fp = open(os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols'))
56 # Two backslashes, followed by some non-word character, and then a character
57 # in brackets. The idea is to check for constructs like: \"{u}, which is how
58 # they are written in the unicodesymbols file; but they can also be written
59 # as: \"u or even \" u.
60 r = re.compile(r'\\\\(\W)\{(\w)\}')
61 for line in fp.readlines():
62 if line[0] != '#' and line.strip() != "":
63 line=line.replace(' "',' ') # remove all quotation marks with spaces before
64 line=line.replace('" ',' ') # remove all quotation marks with spaces after
65 line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis)
67 [ucs4,command,dead] = line.split(None,2)
68 if command[0:1] != "\\":
70 spec_chars.append([command, unichr(eval(ucs4))])
76 # If the character is a double-quote, then we need to escape it, too,
77 # since it is done that way in the LyX file.
78 if m.group(1) == "\"":
81 command += m.group(1) + m.group(2)
82 commandbl += m.group(1) + ' ' + m.group(2)
83 spec_chars.append([command, unichr(eval(ucs4))])
84 spec_chars.append([commandbl, unichr(eval(ucs4))])
89 unicode_reps = read_unicodesymbols()
92 def put_cmd_in_ert(string):
93 for rep in unicode_reps:
94 string = string.replace(rep[1], rep[0].replace('\\\\', '\\'))
95 string = string.replace('\\', "\\backslash\n")
96 string = "\\begin_inset ERT\nstatus collapsed\n\\begin_layout Standard\n" \
97 + string + "\n\\end_layout\n\\end_inset"
101 def lyx2latex(document, lines):
102 'Convert some LyX stuff into corresponding LaTeX stuff, as best we can.'
103 # clean up multiline stuff
107 for curline in range(len(lines)):
108 line = lines[curline]
109 if line.startswith("\\begin_inset ERT"):
110 # We don't want to replace things inside ERT, so figure out
111 # where the end of the inset is.
112 ert_end = find_end_of_inset(lines, curline + 1)
114 elif line.startswith("\\begin_inset Formula"):
116 elif line.startswith("\\begin_inset Quotes"):
117 # For now, we do a very basic reversion. Someone who understands
118 # quotes is welcome to fix it up.
119 qtype = line[20:].strip()
133 elif line.isspace() or \
134 line.startswith("\\begin_layout") or \
135 line.startswith("\\end_layout") or \
136 line.startswith("\\begin_inset") or \
137 line.startswith("\\end_inset") or \
138 line.startswith("\\lang") or \
139 line.strip() == "status collapsed" or \
140 line.strip() == "status open":
144 # this needs to be added to the preamble because of cases like
145 # \textmu, \textbackslash, etc.
146 add_to_preamble(document, ['% added by lyx2lyx for converted index entries',
147 '\\@ifundefined{textmu}',
148 ' {\\usepackage{textcomp}}{}'])
149 # a lossless reversion is not possible
150 # try at least to handle some common insets and settings
151 if ert_end >= curline:
152 line = line.replace(r'\backslash', r'\\')
154 line = line.replace('&', '\\&{}')
155 line = line.replace('#', '\\#{}')
156 line = line.replace('^', '\\^{}')
157 line = line.replace('%', '\\%{}')
158 line = line.replace('_', '\\_{}')
159 line = line.replace('$', '\\${}')
161 # Do the LyX text --> LaTeX conversion
162 for rep in unicode_reps:
163 line = line.replace(rep[1], rep[0] + "{}")
164 line = line.replace(r'\backslash', r'\textbackslash{}')
165 line = line.replace(r'\series bold', r'\bfseries{}').replace(r'\series default', r'\mdseries{}')
166 line = line.replace(r'\shape italic', r'\itshape{}').replace(r'\shape smallcaps', r'\scshape{}')
167 line = line.replace(r'\shape slanted', r'\slshape{}').replace(r'\shape default', r'\upshape{}')
168 line = line.replace(r'\emph on', r'\em{}').replace(r'\emph default', r'\em{}')
169 line = line.replace(r'\noun on', r'\scshape{}').replace(r'\noun default', r'\upshape{}')
170 line = line.replace(r'\bar under', r'\underbar{').replace(r'\bar default', r'}')
171 line = line.replace(r'\family sans', r'\sffamily{}').replace(r'\family default', r'\normalfont{}')
172 line = line.replace(r'\family typewriter', r'\ttfamily{}').replace(r'\family roman', r'\rmfamily{}')
173 line = line.replace(r'\InsetSpace ', r'').replace(r'\SpecialChar ', r'')
178 ####################################################################
181 def revert_swiss(document):
182 " Set language german-ch to ngerman "
184 if document.language == "german-ch":
185 document.language = "ngerman"
186 i = find_token(document.header, "\\language", 0)
188 document.header[i] = "\\language ngerman"
191 j = find_token(document.body, "\\lang german-ch", j)
194 document.body[j] = document.body[j].replace("\\lang german-ch", "\\lang ngerman")
198 def revert_tabularvalign(document):
199 " Revert the tabular valign option "
202 i = find_token(document.body, "\\begin_inset Tabular", i)
205 j = find_end_of_inset(document.body, i)
207 document.warning("Malformed LyX document: Could not find end of tabular.")
210 # don't set a box for longtables, only delete tabularvalignment
211 # the alignment is 2 lines below \\begin_inset Tabular
212 p = document.body[i+2].find("islongtable")
214 q = document.body[i+2].find("tabularvalignment")
216 document.body[i+2] = document.body[i+2][:q-1]
217 document.body[i+2] = document.body[i+2] + '>'
222 tabularvalignment = 'c'
223 # which valignment is specified?
224 m = document.body[i+2].find('tabularvalignment="top"')
226 tabularvalignment = 't'
227 m = document.body[i+2].find('tabularvalignment="bottom"')
229 tabularvalignment = 'b'
230 # delete tabularvalignment
231 q = document.body[i+2].find("tabularvalignment")
233 document.body[i+2] = document.body[i+2][:q-1]
234 document.body[i+2] = document.body[i+2] + '>'
236 # don't add a box when centered
237 if tabularvalignment == 'c':
240 subst = ['\\end_layout', '\\end_inset']
241 document.body[j+1:j+1] = subst # just inserts those lines
242 subst = ['\\begin_inset Box Frameless',
243 'position "' + tabularvalignment +'"',
248 # we don't know the width, assume 50%
252 'height_special "totalheight"',
255 '\\begin_layout Plain Layout']
256 document.body[i:i] = subst # this just inserts the array at i
257 i += len(subst) + 2 # adjust i to save a few cycles
260 def revert_phantom(document):
261 " Reverts phantom to ERT "
265 i = find_token(document.body, "\\begin_inset Phantom Phantom", i)
268 substi = document.body[i].replace('\\begin_inset Phantom Phantom', \
269 '\\begin_inset ERT\nstatus collapsed\n\n' \
270 '\\begin_layout Plain Layout\n\n\n\\backslash\n' \
271 'phantom{\n\\end_layout\n\n\\end_inset\n')
272 substi = substi.split('\n')
273 document.body[i : i+4] = substi
275 j = find_token(document.body, "\\end_layout", i)
277 document.warning("Malformed LyX document: Could not find end of Phantom inset.")
279 substj = document.body[j].replace('\\end_layout', \
280 '\\size default\n\n\\begin_inset ERT\nstatus collapsed\n\n' \
281 '\\begin_layout Plain Layout\n\n' \
282 '}\n\\end_layout\n\n\\end_inset\n')
283 substj = substj.split('\n')
284 document.body[j : j+4] = substj
288 def revert_hphantom(document):
289 " Reverts hphantom to ERT "
293 i = find_token(document.body, "\\begin_inset Phantom HPhantom", i)
296 substi = document.body[i].replace('\\begin_inset Phantom HPhantom', \
297 '\\begin_inset ERT\nstatus collapsed\n\n' \
298 '\\begin_layout Plain Layout\n\n\n\\backslash\n' \
299 'hphantom{\n\\end_layout\n\n\\end_inset\n')
300 substi = substi.split('\n')
301 document.body[i : i+4] = substi
303 j = find_token(document.body, "\\end_layout", i)
305 document.warning("Malformed LyX document: Could not find end of HPhantom inset.")
307 substj = document.body[j].replace('\\end_layout', \
308 '\\size default\n\n\\begin_inset ERT\nstatus collapsed\n\n' \
309 '\\begin_layout Plain Layout\n\n' \
310 '}\n\\end_layout\n\n\\end_inset\n')
311 substj = substj.split('\n')
312 document.body[j : j+4] = substj
316 def revert_vphantom(document):
317 " Reverts vphantom to ERT "
321 i = find_token(document.body, "\\begin_inset Phantom VPhantom", i)
324 substi = document.body[i].replace('\\begin_inset Phantom VPhantom', \
325 '\\begin_inset ERT\nstatus collapsed\n\n' \
326 '\\begin_layout Plain Layout\n\n\n\\backslash\n' \
327 'vphantom{\n\\end_layout\n\n\\end_inset\n')
328 substi = substi.split('\n')
329 document.body[i : i+4] = substi
331 j = find_token(document.body, "\\end_layout", i)
333 document.warning("Malformed LyX document: Could not find end of VPhantom inset.")
335 substj = document.body[j].replace('\\end_layout', \
336 '\\size default\n\n\\begin_inset ERT\nstatus collapsed\n\n' \
337 '\\begin_layout Plain Layout\n\n' \
338 '}\n\\end_layout\n\n\\end_inset\n')
339 substj = substj.split('\n')
340 document.body[j : j+4] = substj
344 def revert_xetex(document):
345 " Reverts documents that use XeTeX "
346 i = find_token(document.header, '\\use_xetex', 0)
348 document.warning("Malformed LyX document: Missing \\use_xetex.")
350 if get_value(document.header, "\\use_xetex", i) == 'false':
351 del document.header[i]
353 del document.header[i]
354 # 1.) set doc encoding to utf8-plain
355 i = find_token(document.header, "\\inputencoding", 0)
357 document.warning("Malformed LyX document: Missing \\inputencoding.")
358 document.header[i] = "\\inputencoding utf8-plain"
359 # 2.) check font settings
360 l = find_token(document.header, "\\font_roman", 0)
362 document.warning("Malformed LyX document: Missing \\font_roman.")
363 line = document.header[l]
364 l = re.compile(r'\\font_roman (.*)$')
367 l = find_token(document.header, "\\font_sans", 0)
369 document.warning("Malformed LyX document: Missing \\font_sans.")
370 line = document.header[l]
371 l = re.compile(r'\\font_sans (.*)$')
374 l = find_token(document.header, "\\font_typewriter", 0)
376 document.warning("Malformed LyX document: Missing \\font_typewriter.")
377 line = document.header[l]
378 l = re.compile(r'\\font_typewriter (.*)$')
380 typewriter = m.group(1)
381 osf = get_value(document.header, '\\font_osf', 0) == "true"
382 sf_scale = float(get_value(document.header, '\\font_sf_scale', 0))
383 tt_scale = float(get_value(document.header, '\\font_tt_scale', 0))
384 # 3.) set preamble stuff
385 pretext = '%% This document must be processed with xelatex!\n'
386 pretext += '\\usepackage{fontspec}\n'
387 if roman != "default":
388 pretext += '\\setmainfont[Mapping=tex-text]{' + roman + '}\n'
389 if sans != "default":
390 pretext += '\\setsansfont['
392 pretext += 'Scale=' + str(sf_scale / 100) + ','
393 pretext += 'Mapping=tex-text]{' + sans + '}\n'
394 if typewriter != "default":
395 pretext += '\\setmonofont'
397 pretext += '[Scale=' + str(tt_scale / 100) + ']'
398 pretext += '{' + typewriter + '}\n'
400 pretext += '\\defaultfontfeatures{Numbers=OldStyle}\n'
401 pretext += '\usepackage{xunicode}\n'
402 pretext += '\usepackage{xltxtra}\n'
403 insert_to_preamble(0, document, pretext)
404 # 4.) reset font settings
405 i = find_token(document.header, "\\font_roman", 0)
407 document.warning("Malformed LyX document: Missing \\font_roman.")
408 document.header[i] = "\\font_roman default"
409 i = find_token(document.header, "\\font_sans", 0)
411 document.warning("Malformed LyX document: Missing \\font_sans.")
412 document.header[i] = "\\font_sans default"
413 i = find_token(document.header, "\\font_typewriter", 0)
415 document.warning("Malformed LyX document: Missing \\font_typewriter.")
416 document.header[i] = "\\font_typewriter default"
417 i = find_token(document.header, "\\font_osf", 0)
419 document.warning("Malformed LyX document: Missing \\font_osf.")
420 document.header[i] = "\\font_osf false"
421 i = find_token(document.header, "\\font_sc", 0)
423 document.warning("Malformed LyX document: Missing \\font_sc.")
424 document.header[i] = "\\font_sc false"
425 i = find_token(document.header, "\\font_sf_scale", 0)
427 document.warning("Malformed LyX document: Missing \\font_sf_scale.")
428 document.header[i] = "\\font_sf_scale 100"
429 i = find_token(document.header, "\\font_tt_scale", 0)
431 document.warning("Malformed LyX document: Missing \\font_tt_scale.")
432 document.header[i] = "\\font_tt_scale 100"
435 def revert_outputformat(document):
436 " Remove default output format param "
437 i = find_token(document.header, '\\default_output_format', 0)
439 document.warning("Malformed LyX document: Missing \\default_output_format.")
441 del document.header[i]
444 def revert_backgroundcolor(document):
445 " Reverts background color to preamble code "
449 i = find_token(document.header, "\\backgroundcolor", i)
452 colorcode = get_value(document.header, '\\backgroundcolor', 0)
453 del document.header[i]
454 # don't clutter the preamble if backgroundcolor is not set
455 if colorcode == "#ffffff":
457 # the color code is in the form #rrggbb where every character denotes a hex number
458 # convert the string to an int
459 red = string.atoi(colorcode[1:3],16)
460 # we want the output "0.5" for the value "127" therefore add here
463 redout = float(red) / 256
464 green = string.atoi(colorcode[3:5],16)
467 greenout = float(green) / 256
468 blue = string.atoi(colorcode[5:7],16)
471 blueout = float(blue) / 256
473 insert_to_preamble(0, document,
474 '% Commands inserted by lyx2lyx to set the background color\n'
475 + '\\@ifundefined{definecolor}{\\usepackage{color}}{}\n'
476 + '\\definecolor{page_backgroundcolor}{rgb}{'
477 + str(redout) + ', ' + str(greenout)
478 + ', ' + str(blueout) + '}\n'
479 + '\\pagecolor{page_backgroundcolor}\n')
482 def revert_splitindex(document):
483 " Reverts splitindex-aware documents "
484 i = find_token(document.header, '\\use_indices', 0)
486 document.warning("Malformed LyX document: Missing \\use_indices.")
488 indices = get_value(document.header, "\\use_indices", i)
490 if indices == "true":
491 preamble += "\\usepackage{splitidx}\n"
492 del document.header[i]
495 i = find_token(document.header, "\\index", i)
498 k = find_token(document.header, "\\end_index", i)
500 document.warning("Malformed LyX document: Missing \\end_index.")
502 line = document.header[i]
503 l = re.compile(r'\\index (.*)$')
506 ishortcut = get_value(document.header, '\\shortcut', i, k)
507 if ishortcut != "" and indices == "true":
508 preamble += "\\newindex[" + iname + "]{" + ishortcut + "}\n"
509 del document.header[i:k+1]
512 insert_to_preamble(0, document, preamble)
515 i = find_token(document.body, "\\begin_inset Index", i)
518 line = document.body[i]
519 l = re.compile(r'\\begin_inset Index (.*)$')
522 if itype == "idx" or indices == "false":
523 document.body[i] = "\\begin_inset Index"
525 k = find_end_of_inset(document.body, i)
528 content = lyx2latex(document, document.body[i:k])
530 content = content.replace('"', r'\"')
531 subst = [put_cmd_in_ert("\\sindex[" + itype + "]{" + content + "}")]
532 document.body[i:k+1] = subst
536 i = find_token(document.body, "\\begin_inset CommandInset index_print", i)
539 k = find_end_of_inset(document.body, i)
540 ptype = get_value(document.body, 'type', i, k).strip('"')
542 j = find_token(document.body, "type", i, k)
544 elif indices == "false":
545 del document.body[i:k+1]
547 subst = [put_cmd_in_ert("\\printindex[" + ptype + "]{}")]
548 document.body[i:k+1] = subst
552 def convert_splitindex(document):
553 " Converts index and printindex insets to splitindex-aware format "
556 i = find_token(document.body, "\\begin_inset Index", i)
559 document.body[i] = document.body[i].replace("\\begin_inset Index",
560 "\\begin_inset Index idx")
564 i = find_token(document.body, "\\begin_inset CommandInset index_print", i)
567 if document.body[i + 1].find('LatexCommand printindex') == -1:
568 document.warning("Malformed LyX document: Incomplete printindex inset.")
570 subst = ["LatexCommand printindex",
572 document.body[i + 1:i + 2] = subst
576 def revert_subindex(document):
577 " Reverts \\printsubindex CommandInset types "
578 i = find_token(document.header, '\\use_indices', 0)
580 document.warning("Malformed LyX document: Missing \\use_indices.")
582 indices = get_value(document.header, "\\use_indices", i)
585 i = find_token(document.body, "\\begin_inset CommandInset index_print", i)
588 k = find_end_of_inset(document.body, i)
589 ctype = get_value(document.body, 'LatexCommand', i, k)
590 if ctype != "printsubindex":
593 ptype = get_value(document.body, 'type', i, k).strip('"')
594 if indices == "false":
595 del document.body[i:k+1]
597 subst = [put_cmd_in_ert("\\printsubindex[" + ptype + "]{}")]
598 document.body[i:k+1] = subst
602 def revert_printindexall(document):
603 " Reverts \\print[sub]index* CommandInset types "
604 i = find_token(document.header, '\\use_indices', 0)
606 document.warning("Malformed LyX document: Missing \\use_indices.")
608 indices = get_value(document.header, "\\use_indices", i)
611 i = find_token(document.body, "\\begin_inset CommandInset index_print", i)
614 k = find_end_of_inset(document.body, i)
615 ctype = get_value(document.body, 'LatexCommand', i, k)
616 if ctype != "printindex*" and ctype != "printsubindex*":
619 if indices == "false":
620 del document.body[i:k+1]
622 subst = [put_cmd_in_ert("\\" + ctype + "{}")]
623 document.body[i:k+1] = subst
627 def revert_strikeout(document):
628 " Reverts \\strikeout character style "
630 i = find_token(document.body, '\\strikeout', 0)
636 def revert_uulinewave(document):
637 " Reverts \\uuline, and \\uwave character styles "
639 i = find_token(document.body, '\\uuline', 0)
644 i = find_token(document.body, '\\uwave', 0)
650 def revert_ulinelatex(document):
651 " Reverts \\uline character style "
652 i = find_token(document.body, '\\bar under', 0)
655 insert_to_preamble(0, document,
656 '% Commands inserted by lyx2lyx for proper underlining\n'
657 + '\\PassOptionsToPackage{normalem}{ulem}\n'
658 + '\\usepackage{ulem}\n'
659 + '\\let\\cite@rig\\cite\n'
660 + '\\newcommand{\\b@xcite}[2][\\%]{\\def\\def@pt{\\%}\\def\\pas@pt{#1}\n'
661 + ' \\mbox{\\ifx\\def@pt\\pas@pt\\cite@rig{#2}\\else\\cite@rig[#1]{#2}\\fi}}\n'
662 + '\\renewcommand{\\underbar}[1]{{\\let\\cite\\b@xcite\\uline{#1}}}\n')
665 def revert_custom_processors(document):
666 " Remove bibtex_command and index_command params "
667 i = find_token(document.header, '\\bibtex_command', 0)
669 document.warning("Malformed LyX document: Missing \\bibtex_command.")
671 del document.header[i]
672 i = find_token(document.header, '\\index_command', 0)
674 document.warning("Malformed LyX document: Missing \\index_command.")
676 del document.header[i]
679 def convert_nomencl_width(document):
680 " Add set_width param to nomencl_print "
683 i = find_token(document.body, "\\begin_inset CommandInset nomencl_print", i)
686 document.body.insert(i + 2, "set_width \"none\"")
690 def revert_nomencl_width(document):
691 " Remove set_width param from nomencl_print "
694 i = find_token(document.body, "\\begin_inset CommandInset nomencl_print", i)
697 j = find_end_of_inset(document.body, i)
698 l = find_token(document.body, "set_width", i, j)
700 document.warning("Can't find set_width option for nomencl_print!")
707 def revert_nomencl_cwidth(document):
708 " Remove width param from nomencl_print "
711 i = find_token(document.body, "\\begin_inset CommandInset nomencl_print", i)
714 j = find_end_of_inset(document.body, i)
715 l = find_token(document.body, "width", i, j)
717 document.warning("Can't find width option for nomencl_print!")
720 width = get_value(document.body, "width", i, j).strip('"')
722 add_to_preamble(document, ["\\setlength{\\nomlabelwidth}{" + width + "}"])
726 def revert_applemac(document):
727 " Revert applemac encoding to auto "
729 if document.encoding == "applemac":
730 document.encoding = "auto"
731 i = find_token(document.header, "\\encoding", 0)
733 document.header[i] = "\\encoding auto"
736 def revert_longtable_align(document):
737 " Remove longtable alignment setting "
741 i = find_token(document.body, "\\begin_inset Tabular", i)
744 # the alignment is 2 lines below \\begin_inset Tabular
745 j = document.body[i+2].find("longtabularalignment")
748 document.body[i+2] = document.body[i+2][:j-1]
749 document.body[i+2] = document.body[i+2] + '>'
753 def revert_branch_filename(document):
754 " Remove \\filename_suffix parameter from branches "
757 i = find_token(document.header, "\\filename_suffix", i)
760 del document.header[i]
767 supported_versions = ["2.0.0","2.0"]
768 convert = [[346, []],
774 [352, [convert_splitindex]],
781 [359, [convert_nomencl_width]],
789 revert = [[363, [revert_branch_filename]],
790 [362, [revert_longtable_align]],
791 [361, [revert_applemac]],
793 [359, [revert_nomencl_cwidth]],
794 [358, [revert_nomencl_width]],
795 [357, [revert_custom_processors]],
796 [356, [revert_ulinelatex]],
797 [355, [revert_uulinewave]],
798 [354, [revert_strikeout]],
799 [353, [revert_printindexall]],
800 [352, [revert_subindex]],
801 [351, [revert_splitindex]],
802 [350, [revert_backgroundcolor]],
803 [349, [revert_outputformat]],
804 [348, [revert_xetex]],
805 [347, [revert_phantom, revert_hphantom, revert_vphantom]],
806 [346, [revert_tabularvalign]],
807 [345, [revert_swiss]]
811 if __name__ == "__main__":