1 # This file is part of lyx2lyx
2 # -*- coding: utf-8 -*-
3 # Copyright (C) 2008 José Matos <jamatos@lyx.org>
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; either version 2
8 # of the License, or (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 """ Convert files to the file format generated by lyx 2.0"""
25 from parser_tools import find_token, find_end_of, find_tokens, get_value, get_value_string
27 ####################################################################
28 # Private helper functions
30 def find_end_of_inset(lines, i):
31 " Find end of inset, where lines[i] is included."
32 return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
35 def add_to_preamble(document, text):
36 """ Add text to the preamble if it is not already there.
37 Only the first line is checked!"""
39 if find_token(document.preamble, text[0], 0) != -1:
42 document.preamble.extend(text)
45 def insert_to_preamble(index, document, text):
46 """ Insert text to the preamble at a given line"""
48 document.preamble.insert(index, text)
51 def read_unicodesymbols():
52 " Read the unicodesymbols list of unicode characters and corresponding commands."
53 pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
54 fp = open(os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols'))
56 # Two backslashes, followed by some non-word character, and then a character
57 # in brackets. The idea is to check for constructs like: \"{u}, which is how
58 # they are written in the unicodesymbols file; but they can also be written
59 # as: \"u or even \" u.
60 r = re.compile(r'\\\\(\W)\{(\w)\}')
61 for line in fp.readlines():
62 if line[0] != '#' and line.strip() != "":
63 line=line.replace(' "',' ') # remove all quotation marks with spaces before
64 line=line.replace('" ',' ') # remove all quotation marks with spaces after
65 line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis)
67 [ucs4,command,dead] = line.split(None,2)
68 if command[0:1] != "\\":
70 spec_chars.append([command, unichr(eval(ucs4))])
76 # If the character is a double-quote, then we need to escape it, too,
77 # since it is done that way in the LyX file.
78 if m.group(1) == "\"":
81 command += m.group(1) + m.group(2)
82 commandbl += m.group(1) + ' ' + m.group(2)
83 spec_chars.append([command, unichr(eval(ucs4))])
84 spec_chars.append([commandbl, unichr(eval(ucs4))])
89 unicode_reps = read_unicodesymbols()
92 def put_cmd_in_ert(string):
93 for rep in unicode_reps:
94 string = string.replace(rep[1], rep[0].replace('\\\\', '\\'))
95 string = string.replace('\\', "\\backslash\n")
96 string = "\\begin_inset ERT\nstatus collapsed\n\\begin_layout Standard\n" \
97 + string + "\n\\end_layout\n\\end_inset"
101 def lyx2latex(document, lines):
102 'Convert some LyX stuff into corresponding LaTeX stuff, as best we can.'
103 # clean up multiline stuff
107 for curline in range(len(lines)):
108 line = lines[curline]
109 if line.startswith("\\begin_inset ERT"):
110 # We don't want to replace things inside ERT, so figure out
111 # where the end of the inset is.
112 ert_end = find_end_of_inset(lines, curline + 1)
114 elif line.startswith("\\begin_inset Formula"):
116 elif line.startswith("\\begin_inset Quotes"):
117 # For now, we do a very basic reversion. Someone who understands
118 # quotes is welcome to fix it up.
119 qtype = line[20:].strip()
133 elif line.isspace() or \
134 line.startswith("\\begin_layout") or \
135 line.startswith("\\end_layout") or \
136 line.startswith("\\begin_inset") or \
137 line.startswith("\\end_inset") or \
138 line.startswith("\\lang") or \
139 line.strip() == "status collapsed" or \
140 line.strip() == "status open":
144 # this needs to be added to the preamble because of cases like
145 # \textmu, \textbackslash, etc.
146 add_to_preamble(document, ['% added by lyx2lyx for converted index entries',
147 '\\@ifundefined{textmu}',
148 ' {\\usepackage{textcomp}}{}'])
149 # a lossless reversion is not possible
150 # try at least to handle some common insets and settings
151 if ert_end >= curline:
152 line = line.replace(r'\backslash', r'\\')
154 line = line.replace('&', '\\&{}')
155 line = line.replace('#', '\\#{}')
156 line = line.replace('^', '\\^{}')
157 line = line.replace('%', '\\%{}')
158 line = line.replace('_', '\\_{}')
159 line = line.replace('$', '\\${}')
161 # Do the LyX text --> LaTeX conversion
162 for rep in unicode_reps:
163 line = line.replace(rep[1], rep[0] + "{}")
164 line = line.replace(r'\backslash', r'\textbackslash{}')
165 line = line.replace(r'\series bold', r'\bfseries{}').replace(r'\series default', r'\mdseries{}')
166 line = line.replace(r'\shape italic', r'\itshape{}').replace(r'\shape smallcaps', r'\scshape{}')
167 line = line.replace(r'\shape slanted', r'\slshape{}').replace(r'\shape default', r'\upshape{}')
168 line = line.replace(r'\emph on', r'\em{}').replace(r'\emph default', r'\em{}')
169 line = line.replace(r'\noun on', r'\scshape{}').replace(r'\noun default', r'\upshape{}')
170 line = line.replace(r'\bar under', r'\underbar{').replace(r'\bar default', r'}')
171 line = line.replace(r'\family sans', r'\sffamily{}').replace(r'\family default', r'\normalfont{}')
172 line = line.replace(r'\family typewriter', r'\ttfamily{}').replace(r'\family roman', r'\rmfamily{}')
173 line = line.replace(r'\InsetSpace ', r'').replace(r'\SpecialChar ', r'')
178 ####################################################################
181 def revert_swiss(document):
182 " Set language german-ch to ngerman "
184 if document.language == "german-ch":
185 document.language = "ngerman"
186 i = find_token(document.header, "\\language", 0)
188 document.header[i] = "\\language ngerman"
191 j = find_token(document.body, "\\lang german-ch", j)
194 document.body[j] = document.body[j].replace("\\lang german-ch", "\\lang ngerman")
198 def revert_tabularvalign(document):
199 " Revert the tabular valign option "
202 i = find_token(document.body, "\\begin_inset Tabular", i)
205 j = find_end_of_inset(document.body, i)
207 document.warning("Malformed LyX document: Could not find end of tabular.")
211 k = find_token(document.body, "<features tabularvalignment=", i)
216 # which valignment is specified?
217 tabularvalignment_re = re.compile(r'<features tabularvalignment="(top|bottom)">')
218 m = tabularvalignment_re.match(document.body[k])
223 tabularvalignment = m.group(1)
225 subst = ['\\end_layout', '\\end_inset']
226 document.body[j+1:j+1] = subst # just inserts those lines
227 subst = ['\\begin_inset Box Frameless',
228 'position "' + tabularvalignment[0] +'"',
236 'height_special "totalheight"',
239 '\\begin_layout Plain Layout']
240 document.body[i:i] = subst # this just inserts the array at i
241 i += len(subst) + 2 # adjust i to save a few cycles
244 def revert_phantom(document):
245 " Reverts phantom to ERT "
249 i = find_token(document.body, "\\begin_inset Phantom Phantom", i)
252 substi = document.body[i].replace('\\begin_inset Phantom Phantom', \
253 '\\begin_inset ERT\nstatus collapsed\n\n' \
254 '\\begin_layout Plain Layout\n\n\n\\backslash\n' \
255 'phantom{\n\\end_layout\n\n\\end_inset\n')
256 substi = substi.split('\n')
257 document.body[i : i+4] = substi
259 j = find_token(document.body, "\\end_layout", i)
261 document.warning("Malformed LyX document: Could not find end of Phantom inset.")
263 substj = document.body[j].replace('\\end_layout', \
264 '\\size default\n\n\\begin_inset ERT\nstatus collapsed\n\n' \
265 '\\begin_layout Plain Layout\n\n' \
266 '}\n\\end_layout\n\n\\end_inset\n')
267 substj = substj.split('\n')
268 document.body[j : j+4] = substj
272 def revert_hphantom(document):
273 " Reverts hphantom to ERT "
277 i = find_token(document.body, "\\begin_inset Phantom HPhantom", i)
280 substi = document.body[i].replace('\\begin_inset Phantom HPhantom', \
281 '\\begin_inset ERT\nstatus collapsed\n\n' \
282 '\\begin_layout Plain Layout\n\n\n\\backslash\n' \
283 'hphantom{\n\\end_layout\n\n\\end_inset\n')
284 substi = substi.split('\n')
285 document.body[i : i+4] = substi
287 j = find_token(document.body, "\\end_layout", i)
289 document.warning("Malformed LyX document: Could not find end of HPhantom inset.")
291 substj = document.body[j].replace('\\end_layout', \
292 '\\size default\n\n\\begin_inset ERT\nstatus collapsed\n\n' \
293 '\\begin_layout Plain Layout\n\n' \
294 '}\n\\end_layout\n\n\\end_inset\n')
295 substj = substj.split('\n')
296 document.body[j : j+4] = substj
300 def revert_vphantom(document):
301 " Reverts vphantom to ERT "
305 i = find_token(document.body, "\\begin_inset Phantom VPhantom", i)
308 substi = document.body[i].replace('\\begin_inset Phantom VPhantom', \
309 '\\begin_inset ERT\nstatus collapsed\n\n' \
310 '\\begin_layout Plain Layout\n\n\n\\backslash\n' \
311 'vphantom{\n\\end_layout\n\n\\end_inset\n')
312 substi = substi.split('\n')
313 document.body[i : i+4] = substi
315 j = find_token(document.body, "\\end_layout", i)
317 document.warning("Malformed LyX document: Could not find end of VPhantom inset.")
319 substj = document.body[j].replace('\\end_layout', \
320 '\\size default\n\n\\begin_inset ERT\nstatus collapsed\n\n' \
321 '\\begin_layout Plain Layout\n\n' \
322 '}\n\\end_layout\n\n\\end_inset\n')
323 substj = substj.split('\n')
324 document.body[j : j+4] = substj
328 def revert_xetex(document):
329 " Reverts documents that use XeTeX "
330 i = find_token(document.header, '\\use_xetex', 0)
332 document.warning("Malformed LyX document: Missing \\use_xetex.")
334 if get_value(document.header, "\\use_xetex", i) == 'false':
335 del document.header[i]
337 del document.header[i]
338 # 1.) set doc encoding to utf8-plain
339 i = find_token(document.header, "\\inputencoding", 0)
341 document.warning("Malformed LyX document: Missing \\inputencoding.")
342 document.header[i] = "\\inputencoding utf8-plain"
343 # 2.) check font settings
344 l = find_token(document.header, "\\font_roman", 0)
346 document.warning("Malformed LyX document: Missing \\font_roman.")
347 line = document.header[l]
348 l = re.compile(r'\\font_roman (.*)$')
351 l = find_token(document.header, "\\font_sans", 0)
353 document.warning("Malformed LyX document: Missing \\font_sans.")
354 line = document.header[l]
355 l = re.compile(r'\\font_sans (.*)$')
358 l = find_token(document.header, "\\font_typewriter", 0)
360 document.warning("Malformed LyX document: Missing \\font_typewriter.")
361 line = document.header[l]
362 l = re.compile(r'\\font_typewriter (.*)$')
364 typewriter = m.group(1)
365 osf = get_value(document.header, '\\font_osf', 0) == "true"
366 sf_scale = float(get_value(document.header, '\\font_sf_scale', 0))
367 tt_scale = float(get_value(document.header, '\\font_tt_scale', 0))
368 # 3.) set preamble stuff
369 pretext = '%% This document must be processed with xelatex!\n'
370 pretext += '\\usepackage{fontspec}\n'
371 if roman != "default":
372 pretext += '\\setmainfont[Mapping=tex-text]{' + roman + '}\n'
373 if sans != "default":
374 pretext += '\\setsansfont['
376 pretext += 'Scale=' + str(sf_scale / 100) + ','
377 pretext += 'Mapping=tex-text]{' + sans + '}\n'
378 if typewriter != "default":
379 pretext += '\\setmonofont'
381 pretext += '[Scale=' + str(tt_scale / 100) + ']'
382 pretext += '{' + typewriter + '}\n'
384 pretext += '\\defaultfontfeatures{Numbers=OldStyle}\n'
385 pretext += '\usepackage{xunicode}\n'
386 pretext += '\usepackage{xltxtra}\n'
387 insert_to_preamble(0, document, pretext)
388 # 4.) reset font settings
389 i = find_token(document.header, "\\font_roman", 0)
391 document.warning("Malformed LyX document: Missing \\font_roman.")
392 document.header[i] = "\\font_roman default"
393 i = find_token(document.header, "\\font_sans", 0)
395 document.warning("Malformed LyX document: Missing \\font_sans.")
396 document.header[i] = "\\font_sans default"
397 i = find_token(document.header, "\\font_typewriter", 0)
399 document.warning("Malformed LyX document: Missing \\font_typewriter.")
400 document.header[i] = "\\font_typewriter default"
401 i = find_token(document.header, "\\font_osf", 0)
403 document.warning("Malformed LyX document: Missing \\font_osf.")
404 document.header[i] = "\\font_osf false"
405 i = find_token(document.header, "\\font_sc", 0)
407 document.warning("Malformed LyX document: Missing \\font_sc.")
408 document.header[i] = "\\font_sc false"
409 i = find_token(document.header, "\\font_sf_scale", 0)
411 document.warning("Malformed LyX document: Missing \\font_sf_scale.")
412 document.header[i] = "\\font_sf_scale 100"
413 i = find_token(document.header, "\\font_tt_scale", 0)
415 document.warning("Malformed LyX document: Missing \\font_tt_scale.")
416 document.header[i] = "\\font_tt_scale 100"
419 def revert_outputformat(document):
420 " Remove default output format param "
421 i = find_token(document.header, '\\default_output_format', 0)
423 document.warning("Malformed LyX document: Missing \\default_output_format.")
425 del document.header[i]
428 def revert_backgroundcolor(document):
429 " Reverts background color to preamble code "
433 i = find_token(document.header, "\\backgroundcolor", i)
436 colorcode = get_value(document.header, '\\backgroundcolor', 0)
437 del document.header[i]
438 # don't clutter the preamble if backgroundcolor is not set
439 if colorcode == "#ffffff":
441 # the color code is in the form #rrggbb where every character denotes a hex number
442 # convert the string to an int
443 red = string.atoi(colorcode[1:3],16)
444 # we want the output "0.5" for the value "127" therefore add here
447 redout = float(red) / 256
448 green = string.atoi(colorcode[3:5],16)
451 greenout = float(green) / 256
452 blue = string.atoi(colorcode[5:7],16)
455 blueout = float(blue) / 256
457 insert_to_preamble(0, document,
458 '% Commands inserted by lyx2lyx to set the background color\n'
459 + '\\@ifundefined{definecolor}{\\usepackage{color}}{}\n'
460 + '\\definecolor{page_backgroundcolor}{rgb}{'
461 + str(redout) + ', ' + str(greenout)
462 + ', ' + str(blueout) + '}\n'
463 + '\\pagecolor{page_backgroundcolor}\n')
466 def revert_splitindex(document):
467 " Reverts splitindex-aware documents "
468 i = find_token(document.header, '\\use_indices', 0)
470 document.warning("Malformed LyX document: Missing \\use_indices.")
472 indices = get_value(document.header, "\\use_indices", i)
474 if indices == "true":
475 preamble += "\\usepackage{splitidx}\n"
476 del document.header[i]
479 i = find_token(document.header, "\\index", i)
482 k = find_token(document.header, "\\end_index", i)
484 document.warning("Malformed LyX document: Missing \\end_index.")
486 line = document.header[i]
487 l = re.compile(r'\\index (.*)$')
490 ishortcut = get_value(document.header, '\\shortcut', i, k)
491 if ishortcut != "" and indices == "true":
492 preamble += "\\newindex[" + iname + "]{" + ishortcut + "}\n"
493 del document.header[i:k+1]
496 insert_to_preamble(0, document, preamble)
499 i = find_token(document.body, "\\begin_inset Index", i)
502 line = document.body[i]
503 l = re.compile(r'\\begin_inset Index (.*)$')
506 if itype == "idx" or indices == "false":
507 document.body[i] = "\\begin_inset Index"
509 k = find_end_of_inset(document.body, i)
512 content = lyx2latex(document, document.body[i:k])
514 content = content.replace('"', r'\"')
515 subst = [put_cmd_in_ert("\\sindex[" + itype + "]{" + content + "}")]
516 document.body[i:k+1] = subst
520 i = find_token(document.body, "\\begin_inset CommandInset index_print", i)
523 k = find_end_of_inset(document.body, i)
524 ptype = get_value(document.body, 'type', i, k).strip('"')
526 j = find_token(document.body, "type", i, k)
528 elif indices == "false":
529 del document.body[i:k+1]
531 subst = [put_cmd_in_ert("\\printindex[" + ptype + "]{}")]
532 document.body[i:k+1] = subst
536 def convert_splitindex(document):
537 " Converts index and printindex insets to splitindex-aware format "
540 i = find_token(document.body, "\\begin_inset Index", i)
543 document.body[i] = document.body[i].replace("\\begin_inset Index",
544 "\\begin_inset Index idx")
548 i = find_token(document.body, "\\begin_inset CommandInset index_print", i)
551 if document.body[i + 1].find('LatexCommand printindex') == -1:
552 document.warning("Malformed LyX document: Incomplete printindex inset.")
554 subst = ["LatexCommand printindex",
556 document.body[i + 1:i + 2] = subst
560 def revert_subindex(document):
561 " Reverts \\printsubindex CommandInset types "
562 i = find_token(document.header, '\\use_indices', 0)
564 document.warning("Malformed LyX document: Missing \\use_indices.")
566 indices = get_value(document.header, "\\use_indices", i)
569 i = find_token(document.body, "\\begin_inset CommandInset index_print", i)
572 k = find_end_of_inset(document.body, i)
573 ctype = get_value(document.body, 'LatexCommand', i, k)
574 if ctype != "printsubindex":
577 ptype = get_value(document.body, 'type', i, k).strip('"')
578 if indices == "false":
579 del document.body[i:k+1]
581 subst = [put_cmd_in_ert("\\printsubindex[" + ptype + "]{}")]
582 document.body[i:k+1] = subst
586 def revert_printindexall(document):
587 " Reverts \\print[sub]index* CommandInset types "
588 i = find_token(document.header, '\\use_indices', 0)
590 document.warning("Malformed LyX document: Missing \\use_indices.")
592 indices = get_value(document.header, "\\use_indices", i)
595 i = find_token(document.body, "\\begin_inset CommandInset index_print", i)
598 k = find_end_of_inset(document.body, i)
599 ctype = get_value(document.body, 'LatexCommand', i, k)
600 if ctype != "printindex*" and ctype != "printsubindex*":
603 if indices == "false":
604 del document.body[i:k+1]
606 subst = [put_cmd_in_ert("\\" + ctype + "{}")]
607 document.body[i:k+1] = subst
611 def revert_strikeout(document):
612 " Reverts \\strikeout character style "
614 i = find_token(document.body, '\\strikeout', 0)
620 def revert_uulinewave(document):
621 " Reverts \\uuline, and \\uwave character styles "
623 i = find_token(document.body, '\\uuline', 0)
628 i = find_token(document.body, '\\uwave', 0)
634 def revert_ulinelatex(document):
635 " Reverts \\uline character style "
636 i = find_token(document.body, '\\bar under', 0)
639 insert_to_preamble(0, document,
640 '% Commands inserted by lyx2lyx for proper underlining\n'
641 + '\\PassOptionsToPackage{normalem}{ulem}\n'
642 + '\\usepackage{ulem}\n'
643 + '\\let\\cite@rig\\cite\n'
644 + '\\newcommand{\\b@xcite}[2][\\%]{\\def\\def@pt{\\%}\\def\\pas@pt{#1}\n'
645 + ' \\mbox{\\ifx\\def@pt\\pas@pt\\cite@rig{#2}\\else\\cite@rig[#1]{#2}\\fi}}\n'
646 + '\\renewcommand{\\underbar}[1]{{\\let\\cite\\b@xcite\\uline{#1}}}\n')
649 def revert_custom_processors(document):
650 " Remove bibtex_command and index_command params "
651 i = find_token(document.header, '\\bibtex_command', 0)
653 document.warning("Malformed LyX document: Missing \\bibtex_command.")
655 del document.header[i]
656 i = find_token(document.header, '\\index_command', 0)
658 document.warning("Malformed LyX document: Missing \\index_command.")
660 del document.header[i]
663 def convert_nomencl_width(document):
664 " Add set_width param to nomencl_print "
667 i = find_token(document.body, "\\begin_inset CommandInset nomencl_print", i)
670 document.body.insert(i + 2, "set_width \"none\"")
674 def revert_nomencl_width(document):
675 " Remove set_width param from nomencl_print "
678 i = find_token(document.body, "\\begin_inset CommandInset nomencl_print", i)
681 j = find_end_of_inset(document.body, i)
682 l = find_token(document.body, "set_width", i, j)
684 document.warning("Can't find set_width option for nomencl_print!")
691 def revert_nomencl_cwidth(document):
692 " Remove width param from nomencl_print "
695 i = find_token(document.body, "\\begin_inset CommandInset nomencl_print", i)
698 j = find_end_of_inset(document.body, i)
699 l = find_token(document.body, "width", i, j)
701 document.warning("Can't find width option for nomencl_print!")
704 width = get_value(document.body, "width", i, j).strip('"')
706 add_to_preamble(document, ["\\setlength{\\nomlabelwidth}{" + width + "}"])
714 supported_versions = ["2.0.0","2.0"]
715 convert = [[346, []],
721 [352, [convert_splitindex]],
728 [359, [convert_nomencl_width]],
734 [359, [revert_nomencl_cwidth]],
735 [358, [revert_nomencl_width]],
736 [357, [revert_custom_processors]],
737 [356, [revert_ulinelatex]],
738 [355, [revert_uulinewave]],
739 [354, [revert_strikeout]],
740 [353, [revert_printindexall]],
741 [352, [revert_subindex]],
742 [351, [revert_splitindex]],
743 [350, [revert_backgroundcolor]],
744 [349, [revert_outputformat]],
745 [348, [revert_xetex]],
746 [347, [revert_phantom, revert_hphantom, revert_vphantom]],
747 [346, [revert_tabularvalign]],
748 [345, [revert_swiss]]
752 if __name__ == "__main__":