1 # This file is part of lyx2lyx
2 # -*- coding: utf-8 -*-
3 # Copyright (C) 2006 José Matos <jamatos@lyx.org>
4 # Copyright (C) 2004-2006 Georg Baum <Georg.Baum@post.rwth-aachen.de>
6 # This program is free software; you can redistribute it and/or
7 # modify it under the terms of the GNU General Public License
8 # as published by the Free Software Foundation; either version 2
9 # of the License, or (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
20 """ Convert files to the file format generated by lyx 1.5"""
23 from parser_tools import find_token, find_token_exact, find_tokens, find_end_of, get_value
24 from LyX import get_encoding
27 ####################################################################
28 # Private helper functions
30 def find_end_of_inset(lines, i):
31 " Find beginning of inset, where lines[i] is included."
32 return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
34 # End of helper functions
35 ####################################################################
39 # Notes: Framed/Shaded
42 def revert_framed(document):
43 "Revert framed notes. "
46 i = find_tokens(document.body, ["\\begin_inset Note Framed", "\\begin_inset Note Shaded"], i)
50 document.body[i] = "\\begin_inset Note"
58 roman_fonts = {'default' : 'default', 'ae' : 'ae',
59 'times' : 'times', 'palatino' : 'palatino',
60 'helvet' : 'default', 'avant' : 'default',
61 'newcent' : 'newcent', 'bookman' : 'bookman',
63 sans_fonts = {'default' : 'default', 'ae' : 'default',
64 'times' : 'default', 'palatino' : 'default',
65 'helvet' : 'helvet', 'avant' : 'avant',
66 'newcent' : 'default', 'bookman' : 'default',
68 typewriter_fonts = {'default' : 'default', 'ae' : 'default',
69 'times' : 'default', 'palatino' : 'default',
70 'helvet' : 'default', 'avant' : 'default',
71 'newcent' : 'default', 'bookman' : 'default',
72 'pslatex' : 'courier'}
74 def convert_font_settings(document):
75 " Convert font settings. "
77 i = find_token_exact(document.header, "\\fontscheme", i)
79 document.warning("Malformed LyX document: Missing `\\fontscheme'.")
81 font_scheme = get_value(document.header, "\\fontscheme", i, i + 1)
83 document.warning("Malformed LyX document: Empty `\\fontscheme'.")
84 font_scheme = 'default'
85 if not font_scheme in roman_fonts.keys():
86 document.warning("Malformed LyX document: Unknown `\\fontscheme' `%s'." % font_scheme)
87 font_scheme = 'default'
88 document.header[i:i+1] = ['\\font_roman %s' % roman_fonts[font_scheme],
89 '\\font_sans %s' % sans_fonts[font_scheme],
90 '\\font_typewriter %s' % typewriter_fonts[font_scheme],
91 '\\font_default_family default',
94 '\\font_sf_scale 100',
95 '\\font_tt_scale 100']
98 def revert_font_settings(document):
99 " Revert font settings. "
102 fonts = {'roman' : 'default', 'sans' : 'default', 'typewriter' : 'default'}
103 for family in 'roman', 'sans', 'typewriter':
104 name = '\\font_%s' % family
105 i = find_token_exact(document.header, name, i)
107 document.warning("Malformed LyX document: Missing `%s'." % name)
110 if (insert_line < 0):
112 fonts[family] = get_value(document.header, name, i, i + 1)
113 del document.header[i]
114 i = find_token_exact(document.header, '\\font_default_family', i)
116 document.warning("Malformed LyX document: Missing `\\font_default_family'.")
117 font_default_family = 'default'
119 font_default_family = get_value(document.header, "\\font_default_family", i, i + 1)
120 del document.header[i]
121 i = find_token_exact(document.header, '\\font_sc', i)
123 document.warning("Malformed LyX document: Missing `\\font_sc'.")
126 font_sc = get_value(document.header, '\\font_sc', i, i + 1)
127 del document.header[i]
128 if font_sc != 'false':
129 document.warning("Conversion of '\\font_sc' not yet implemented.")
130 i = find_token_exact(document.header, '\\font_osf', i)
132 document.warning("Malformed LyX document: Missing `\\font_osf'.")
135 font_osf = get_value(document.header, '\\font_osf', i, i + 1)
136 del document.header[i]
137 i = find_token_exact(document.header, '\\font_sf_scale', i)
139 document.warning("Malformed LyX document: Missing `\\font_sf_scale'.")
140 font_sf_scale = '100'
142 font_sf_scale = get_value(document.header, '\\font_sf_scale', i, i + 1)
143 del document.header[i]
144 if font_sf_scale != '100':
145 document.warning("Conversion of '\\font_sf_scale' not yet implemented.")
146 i = find_token_exact(document.header, '\\font_tt_scale', i)
148 document.warning("Malformed LyX document: Missing `\\font_tt_scale'.")
149 font_tt_scale = '100'
151 font_tt_scale = get_value(document.header, '\\font_tt_scale', i, i + 1)
152 del document.header[i]
153 if font_tt_scale != '100':
154 document.warning("Conversion of '\\font_tt_scale' not yet implemented.")
155 for font_scheme in roman_fonts.keys():
156 if (roman_fonts[font_scheme] == fonts['roman'] and
157 sans_fonts[font_scheme] == fonts['sans'] and
158 typewriter_fonts[font_scheme] == fonts['typewriter']):
159 document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
160 if font_default_family != 'default':
161 document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
162 if font_osf == 'true':
163 document.warning("Ignoring `\\font_osf = true'")
165 font_scheme = 'default'
166 document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
167 if fonts['roman'] == 'cmr':
168 document.preamble.append('\\renewcommand{\\rmdefault}{cmr}')
169 if font_osf == 'true':
170 document.preamble.append('\\usepackage{eco}')
172 for font in 'lmodern', 'charter', 'utopia', 'beraserif', 'ccfonts', 'chancery':
173 if fonts['roman'] == font:
174 document.preamble.append('\\usepackage{%s}' % font)
175 for font in 'cmss', 'lmss', 'cmbr':
176 if fonts['sans'] == font:
177 document.preamble.append('\\renewcommand{\\sfdefault}{%s}' % font)
178 for font in 'berasans':
179 if fonts['sans'] == font:
180 document.preamble.append('\\usepackage{%s}' % font)
181 for font in 'cmtt', 'lmtt', 'cmtl':
182 if fonts['typewriter'] == font:
183 document.preamble.append('\\renewcommand{\\ttdefault}{%s}' % font)
184 for font in 'courier', 'beramono', 'luximono':
185 if fonts['typewriter'] == font:
186 document.preamble.append('\\usepackage{%s}' % font)
187 if font_default_family != 'default':
188 document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
189 if font_osf == 'true':
190 document.warning("Ignoring `\\font_osf = true'")
193 def revert_booktabs(document):
194 " We remove the booktabs flag or everything else will become a mess. "
195 re_row = re.compile(r'^<row.*space="[^"]+".*>$')
196 re_tspace = re.compile(r'\s+topspace="[^"]+"')
197 re_bspace = re.compile(r'\s+bottomspace="[^"]+"')
198 re_ispace = re.compile(r'\s+interlinespace="[^"]+"')
201 i = find_token(document.body, "\\begin_inset Tabular", i)
204 j = find_end_of_inset(document.body, i + 1)
206 document.warning("Malformed LyX document: Could not find end of tabular.")
208 for k in range(i, j):
209 if re.search('^<features.* booktabs="true".*>$', document.body[k]):
210 document.warning("Converting 'booktabs' table to normal table.")
211 document.body[k] = document.body[k].replace(' booktabs="true"', '')
212 if re.search(re_row, document.body[k]):
213 document.warning("Removing extra row space.")
214 document.body[k] = re_tspace.sub('', document.body[k])
215 document.body[k] = re_bspace.sub('', document.body[k])
216 document.body[k] = re_ispace.sub('', document.body[k])
220 def convert_multiencoding(document, forward):
221 """ Fix files with multiple encodings.
222 Files with an inputencoding of "auto" and multiple languages where at least
223 two languages have different default encodings are encoded in multiple
224 encodings for file formats < 249. These files are incorrectly read and
225 written (as if the whole file was in the encoding of the main language).
228 - converts from fake unicode values to true unicode if forward is true, and
229 - converts from true unicode values to fake unicode if forward is false.
230 document.encoding must be set to the old value (format 248) in both cases.
232 We do this here and not in LyX.py because it is far easier to do the
233 necessary parsing in modern formats than in ancient ones.
235 encoding_stack = [document.encoding]
236 lang_re = re.compile(r"^\\lang\s(\S+)")
237 if document.inputencoding == "auto":
238 for i in range(len(document.body)):
239 result = lang_re.match(document.body[i])
241 language = result.group(1)
242 if language == "default":
243 document.warning("Resetting encoding from %s to %s." % (encoding_stack[-1], document.encoding))
244 encoding_stack[-1] = document.encoding
246 from lyx2lyx_lang import lang
247 document.warning("Setting encoding from %s to %s." % (encoding_stack[-1], lang[language][3]))
248 encoding_stack[-1] = lang[language][3]
249 elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
250 document.warning("Adding nested encoding %s." % encoding_stack[-1])
251 encoding_stack.append(encoding_stack[-1])
252 elif find_token(document.body, "\\end_layout", i, i + 1) == i:
253 document.warning("Removing nested encoding %s." % encoding_stack[-1])
254 del encoding_stack[-1]
255 if encoding_stack[-1] != document.encoding:
257 # This line has been incorrectly interpreted as if it was
258 # encoded in 'encoding'.
259 # Convert back to the 8bit string that was in the file.
260 orig = document.body[i].encode(document.encoding)
261 # Convert the 8bit string that was in the file to unicode
262 # with the correct encoding.
263 document.body[i] = orig.decode(encoding_stack[-1])
265 # Convert unicode to the 8bit string that will be written
266 # to the file with the correct encoding.
267 orig = document.body[i].encode(encoding_stack[-1])
268 # Convert the 8bit string that will be written to the
269 # file to fake unicode with the encoding that will later
270 # be used when writing to the file.
271 document.body[i] = orig.decode(document.encoding)
274 def convert_utf8(document):
275 " Set document encoding to UTF-8. "
276 convert_multiencoding(document, True)
277 document.encoding = "utf8"
280 def revert_utf8(document):
281 " Set document encoding to the value corresponding to inputencoding. "
282 i = find_token(document.header, "\\inputencoding", 0)
284 document.header.append("\\inputencoding auto")
285 elif get_value(document.header, "\\inputencoding", i) == "utf8":
286 document.header[i] = "\\inputencoding auto"
287 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
288 document.encoding = get_encoding(document.language, document.inputencoding, 248)
289 convert_multiencoding(document, False)
292 def revert_cs_label(document):
293 " Remove status flag of charstyle label. "
296 i = find_token(document.body, "\\begin_inset CharStyle", i)
299 # Seach for a line starting 'show_label'
300 # If it is not there, break with a warning message
303 if (document.body[i][:10] == "show_label"):
306 elif (document.body[i][:13] == "\\begin_layout"):
307 document.warning("Malformed LyX document: Missing 'show_label'.")
314 def convert_bibitem(document):
316 \bibitem [option]{argument}
320 \begin_inset LatexCommand bibitem
326 This must be called after convert_commandparams.
328 regex = re.compile(r'\S+\s*(\[[^\[\{]*\])?(\{[^}]*\})')
331 i = find_token(document.body, "\\bibitem", i)
334 match = re.match(regex, document.body[i])
335 option = match.group(1)
336 argument = match.group(2)
337 lines = ['\\begin_inset LatexCommand bibitem']
339 lines.append('label "%s"' % option[1:-1].replace('"', '\\"'))
340 lines.append('key "%s"' % argument[1:-1].replace('"', '\\"'))
342 lines.append('\\end_inset')
343 document.body[i:i+1] = lines
347 commandparams_info = {
348 # command : [option1, option2, argument]
349 "bibitem" : ["label", "", "key"],
350 "bibtex" : ["options", "btprint", "bibfiles"],
351 "cite" : ["after", "before", "key"],
352 "citet" : ["after", "before", "key"],
353 "citep" : ["after", "before", "key"],
354 "citealt" : ["after", "before", "key"],
355 "citealp" : ["after", "before", "key"],
356 "citeauthor" : ["after", "before", "key"],
357 "citeyear" : ["after", "before", "key"],
358 "citeyearpar" : ["after", "before", "key"],
359 "citet*" : ["after", "before", "key"],
360 "citep*" : ["after", "before", "key"],
361 "citealt*" : ["after", "before", "key"],
362 "citealp*" : ["after", "before", "key"],
363 "citeauthor*" : ["after", "before", "key"],
364 "Citet" : ["after", "before", "key"],
365 "Citep" : ["after", "before", "key"],
366 "Citealt" : ["after", "before", "key"],
367 "Citealp" : ["after", "before", "key"],
368 "Citeauthor" : ["after", "before", "key"],
369 "Citet*" : ["after", "before", "key"],
370 "Citep*" : ["after", "before", "key"],
371 "Citealt*" : ["after", "before", "key"],
372 "Citealp*" : ["after", "before", "key"],
373 "Citeauthor*" : ["after", "before", "key"],
374 "citefield" : ["after", "before", "key"],
375 "citetitle" : ["after", "before", "key"],
376 "cite*" : ["after", "before", "key"],
377 "hfill" : ["", "", ""],
378 "index" : ["", "", "name"],
379 "printindex" : ["", "", "name"],
380 "label" : ["", "", "name"],
381 "eqref" : ["name", "", "reference"],
382 "pageref" : ["name", "", "reference"],
383 "prettyref" : ["name", "", "reference"],
384 "ref" : ["name", "", "reference"],
385 "vpageref" : ["name", "", "reference"],
386 "vref" : ["name", "", "reference"],
387 "tableofcontents" : ["", "", "type"],
388 "htmlurl" : ["name", "", "target"],
389 "url" : ["name", "", "target"]}
392 def convert_commandparams(document):
395 \begin_inset LatexCommand \cmdname[opt1][opt2]{arg}
400 \begin_inset LatexCommand cmdname
406 name1, name2 and name3 can be different for each command.
408 # \begin_inset LatexCommand bibitem was not the official version (see
409 # convert_bibitem()), but could be read in, so we convert it here, too.
413 i = find_token(document.body, "\\begin_inset LatexCommand", i)
416 command = document.body[i][26:].strip()
418 document.warning("Malformed LyX document: Missing LatexCommand name.")
422 # The following parser is taken from the original InsetCommandParams::scanCommand
428 # Used to handle things like \command[foo[bar]]{foo{bar}}
432 if ((state == "CMDNAME" and c == ' ') or
433 (state == "CMDNAME" and c == '[') or
434 (state == "CMDNAME" and c == '{')):
436 if ((state == "OPTION" and c == ']') or
437 (state == "SECOPTION" and c == ']') or
438 (state == "CONTENT" and c == '}')):
442 nestdepth = nestdepth - 1
443 if ((state == "OPTION" and c == '[') or
444 (state == "SECOPTION" and c == '[') or
445 (state == "CONTENT" and c == '{')):
446 nestdepth = nestdepth + 1
447 if state == "CMDNAME":
449 elif state == "OPTION":
451 elif state == "SECOPTION":
453 elif state == "CONTENT":
458 elif c == '[' and b != ']':
460 nestdepth = 0 # Just to be sure
461 elif c == '[' and b == ']':
463 nestdepth = 0 # Just to be sure
466 nestdepth = 0 # Just to be sure
469 # Now we have parsed the command, output the parameters
470 lines = ["\\begin_inset LatexCommand %s" % name]
472 if commandparams_info[name][0] == "":
473 document.warning("Ignoring invalid option `%s' of command `%s'." % (option1, name))
475 lines.append('%s "%s"' % (commandparams_info[name][0], option1.replace('"', '\\"')))
477 if commandparams_info[name][1] == "":
478 document.warning("Ignoring invalid second option `%s' of command `%s'." % (option2, name))
480 lines.append('%s "%s"' % (commandparams_info[name][1], option2.replace('"', '\\"')))
482 if commandparams_info[name][2] == "":
483 document.warning("Ignoring invalid argument `%s' of command `%s'." % (argument, name))
485 lines.append('%s "%s"' % (commandparams_info[name][2], argument.replace('"', '\\"')))
486 document.body[i:i+1] = lines
490 def revert_commandparams(document):
491 regex = re.compile(r'(\S+)\s+(.+)')
494 i = find_token(document.body, "\\begin_inset LatexCommand", i)
497 name = document.body[i].split()[2]
498 j = find_end_of_inset(document.body, i + 1)
503 for k in range(i + 1, j):
504 match = re.match(regex, document.body[k])
506 pname = match.group(1)
507 pvalue = match.group(2)
508 if pname == "preview":
509 preview_line = document.body[k]
510 elif (commandparams_info[name][0] != "" and
511 pname == commandparams_info[name][0]):
512 option1 = pvalue.strip('"').replace('\\"', '"')
513 elif (commandparams_info[name][1] != "" and
514 pname == commandparams_info[name][1]):
515 option2 = pvalue.strip('"').replace('\\"', '"')
516 elif (commandparams_info[name][2] != "" and
517 pname == commandparams_info[name][2]):
518 argument = pvalue.strip('"').replace('\\"', '"')
519 elif document.body[k].strip() != "":
520 document.warning("Ignoring unknown contents `%s' in command inset %s." % (document.body[k], name))
521 if name == "bibitem":
523 lines = ["\\bibitem {%s}" % argument]
525 lines = ["\\bibitem [%s]{%s}" % (option1, argument)]
529 lines = ["\\begin_inset LatexCommand \\%s{%s}" % (name, argument)]
531 lines = ["\\begin_inset LatexCommand \\%s[][%s]{%s}" % (name, option2, argument)]
534 lines = ["\\begin_inset LatexCommand \\%s[%s]{%s}" % (name, option1, argument)]
536 lines = ["\\begin_inset LatexCommand \\%s[%s][%s]{%s}" % (name, option1, option2, argument)]
537 if name != "bibitem":
538 if preview_line != "":
539 lines.append(preview_line)
541 lines.append('\\end_inset')
542 document.body[i:j+1] = lines
546 def revert_nomenclature(document):
547 " Convert nomenclature entry to ERT. "
548 regex = re.compile(r'(\S+)\s+(.+)')
552 i = find_token(document.body, "\\begin_inset LatexCommand nomenclature", i)
556 j = find_end_of_inset(document.body, i + 1)
561 for k in range(i + 1, j):
562 match = re.match(regex, document.body[k])
564 name = match.group(1)
565 value = match.group(2)
566 if name == "preview":
567 preview_line = document.body[k]
568 elif name == "symbol":
569 symbol = value.strip('"').replace('\\"', '"')
570 elif name == "description":
571 description = value.strip('"').replace('\\"', '"')
572 elif name == "prefix":
573 prefix = value.strip('"').replace('\\"', '"')
574 elif document.body[k].strip() != "":
575 document.warning("Ignoring unknown contents `%s' in nomenclature inset." % document.body[k])
577 command = 'nomenclature{%s}{%s}' % (symbol, description)
579 command = 'nomenclature[%s]{%s}{%s}' % (prefix, symbol, description)
580 document.body[i:j+1] = ['\\begin_inset ERT',
583 '\\begin_layout %s' % document.default_layout,
592 if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
593 document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
594 document.preamble.append('\\makenomenclature')
597 def revert_printnomenclature(document):
598 " Convert printnomenclature to ERT. "
599 regex = re.compile(r'(\S+)\s+(.+)')
603 i = find_token(document.body, "\\begin_inset LatexCommand printnomenclature", i)
607 j = find_end_of_inset(document.body, i + 1)
610 for k in range(i + 1, j):
611 match = re.match(regex, document.body[k])
613 name = match.group(1)
614 value = match.group(2)
615 if name == "preview":
616 preview_line = document.body[k]
617 elif name == "labelwidth":
618 labelwidth = value.strip('"').replace('\\"', '"')
619 elif document.body[k].strip() != "":
620 document.warning("Ignoring unknown contents `%s' in printnomenclature inset." % document.body[k])
622 command = 'nomenclature{}'
624 command = 'nomenclature[%s]' % labelwidth
625 document.body[i:j+1] = ['\\begin_inset ERT',
628 '\\begin_layout %s' % document.default_layout,
637 if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
638 document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
639 document.preamble.append('\\makenomenclature')
642 def convert_esint(document):
643 " Add \\use_esint setting to header. "
644 i = find_token(document.header, "\\cite_engine", 0)
646 document.warning("Malformed LyX document: Missing `\\cite_engine'.")
648 # 0 is off, 1 is auto, 2 is on.
649 document.header.insert(i, '\\use_esint 0')
652 def revert_esint(document):
653 " Remove \\use_esint setting from header. "
654 i = find_token(document.header, "\\use_esint", 0)
656 document.warning("Malformed LyX document: Missing `\\use_esint'.")
658 use_esint = document.header[i].split()[1]
659 del document.header[i]
660 # 0 is off, 1 is auto, 2 is on.
662 document.preamble.append('\\usepackage{esint}')
665 def revert_clearpage(document):
669 i = find_token(document.body, "\\clearpage", i)
672 document.body[i:i+1] = ['\\begin_inset ERT',
675 '\\begin_layout %s' % document.default_layout,
686 def revert_cleardoublepage(document):
687 " cleardoublepage -> ERT"
690 i = find_token(document.body, "\\cleardoublepage", i)
693 document.body[i:i+1] = ['\\begin_inset ERT',
696 '\\begin_layout %s' % document.default_layout,
707 def revert_encodings(document):
708 " Set new encodings to auto. "
709 encodings = ["8859-6", "8859-8", "cp437", "cp437de", "cp850", "cp852",
710 "cp855", "cp858", "cp862", "cp865", "cp866", "cp1250",
711 "cp1252", "cp1256", "cp1257", "latin10", "pt254", "tis620-0"]
712 i = find_token(document.header, "\\inputencoding", 0)
714 document.header.append("\\inputencoding auto")
716 inputenc = get_value(document.header, "\\inputencoding", i)
717 if inputenc in encodings:
718 document.header[i] = "\\inputencoding auto"
719 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
726 supported_versions = ["1.5.0","1.5"]
727 convert = [[246, []],
728 [247, [convert_font_settings]],
730 [249, [convert_utf8]],
733 [252, [convert_commandparams, convert_bibitem]],
735 [254, [convert_esint]],
739 revert = [[255, [revert_encodings]],
740 [254, [revert_clearpage, revert_cleardoublepage]],
741 [253, [revert_esint]],
742 [252, [revert_nomenclature, revert_printnomenclature]],
743 [251, [revert_commandparams]],
744 [250, [revert_cs_label]],
746 [248, [revert_utf8]],
747 [247, [revert_booktabs]],
748 [246, [revert_font_settings]],
749 [245, [revert_framed]]]
752 if __name__ == "__main__":