1 # This file is part of lyx2lyx
2 # -*- coding: utf-8 -*-
3 # Copyright (C) 2006 José Matos <jamatos@lyx.org>
4 # Copyright (C) 2004-2006 Georg Baum <Georg.Baum@post.rwth-aachen.de>
6 # This program is free software; you can redistribute it and/or
7 # modify it under the terms of the GNU General Public License
8 # as published by the Free Software Foundation; either version 2
9 # of the License, or (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
20 """ Convert files to the file format generated by lyx 1.5"""
23 from parser_tools import find_token, find_token_exact, find_tokens, find_end_of, get_value
24 from LyX import get_encoding
27 ####################################################################
28 # Private helper functions
30 def find_end_of_inset(lines, i):
31 " Find beginning of inset, where lines[i] is included."
32 return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
34 # End of helper functions
35 ####################################################################
39 # Notes: Framed/Shaded
42 def revert_framed(document):
43 "Revert framed notes. "
46 i = find_tokens(document.body, ["\\begin_inset Note Framed", "\\begin_inset Note Shaded"], i)
50 document.body[i] = "\\begin_inset Note"
58 roman_fonts = {'default' : 'default', 'ae' : 'ae',
59 'times' : 'times', 'palatino' : 'palatino',
60 'helvet' : 'default', 'avant' : 'default',
61 'newcent' : 'newcent', 'bookman' : 'bookman',
63 sans_fonts = {'default' : 'default', 'ae' : 'default',
64 'times' : 'default', 'palatino' : 'default',
65 'helvet' : 'helvet', 'avant' : 'avant',
66 'newcent' : 'default', 'bookman' : 'default',
68 typewriter_fonts = {'default' : 'default', 'ae' : 'default',
69 'times' : 'default', 'palatino' : 'default',
70 'helvet' : 'default', 'avant' : 'default',
71 'newcent' : 'default', 'bookman' : 'default',
72 'pslatex' : 'courier'}
74 def convert_font_settings(document):
75 " Convert font settings. "
77 i = find_token_exact(document.header, "\\fontscheme", i)
79 document.warning("Malformed LyX document: Missing `\\fontscheme'.")
81 font_scheme = get_value(document.header, "\\fontscheme", i, i + 1)
83 document.warning("Malformed LyX document: Empty `\\fontscheme'.")
84 font_scheme = 'default'
85 if not font_scheme in roman_fonts.keys():
86 document.warning("Malformed LyX document: Unknown `\\fontscheme' `%s'." % font_scheme)
87 font_scheme = 'default'
88 document.header[i:i+1] = ['\\font_roman %s' % roman_fonts[font_scheme],
89 '\\font_sans %s' % sans_fonts[font_scheme],
90 '\\font_typewriter %s' % typewriter_fonts[font_scheme],
91 '\\font_default_family default',
94 '\\font_sf_scale 100',
95 '\\font_tt_scale 100']
98 def revert_font_settings(document):
99 " Revert font settings. "
102 fonts = {'roman' : 'default', 'sans' : 'default', 'typewriter' : 'default'}
103 for family in 'roman', 'sans', 'typewriter':
104 name = '\\font_%s' % family
105 i = find_token_exact(document.header, name, i)
107 document.warning("Malformed LyX document: Missing `%s'." % name)
110 if (insert_line < 0):
112 fonts[family] = get_value(document.header, name, i, i + 1)
113 del document.header[i]
114 i = find_token_exact(document.header, '\\font_default_family', i)
116 document.warning("Malformed LyX document: Missing `\\font_default_family'.")
117 font_default_family = 'default'
119 font_default_family = get_value(document.header, "\\font_default_family", i, i + 1)
120 del document.header[i]
121 i = find_token_exact(document.header, '\\font_sc', i)
123 document.warning("Malformed LyX document: Missing `\\font_sc'.")
126 font_sc = get_value(document.header, '\\font_sc', i, i + 1)
127 del document.header[i]
128 if font_sc != 'false':
129 document.warning("Conversion of '\\font_sc' not yet implemented.")
130 i = find_token_exact(document.header, '\\font_osf', i)
132 document.warning("Malformed LyX document: Missing `\\font_osf'.")
135 font_osf = get_value(document.header, '\\font_osf', i, i + 1)
136 del document.header[i]
137 i = find_token_exact(document.header, '\\font_sf_scale', i)
139 document.warning("Malformed LyX document: Missing `\\font_sf_scale'.")
140 font_sf_scale = '100'
142 font_sf_scale = get_value(document.header, '\\font_sf_scale', i, i + 1)
143 del document.header[i]
144 if font_sf_scale != '100':
145 document.warning("Conversion of '\\font_sf_scale' not yet implemented.")
146 i = find_token_exact(document.header, '\\font_tt_scale', i)
148 document.warning("Malformed LyX document: Missing `\\font_tt_scale'.")
149 font_tt_scale = '100'
151 font_tt_scale = get_value(document.header, '\\font_tt_scale', i, i + 1)
152 del document.header[i]
153 if font_tt_scale != '100':
154 document.warning("Conversion of '\\font_tt_scale' not yet implemented.")
155 for font_scheme in roman_fonts.keys():
156 if (roman_fonts[font_scheme] == fonts['roman'] and
157 sans_fonts[font_scheme] == fonts['sans'] and
158 typewriter_fonts[font_scheme] == fonts['typewriter']):
159 document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
160 if font_default_family != 'default':
161 document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
162 if font_osf == 'true':
163 document.warning("Ignoring `\\font_osf = true'")
165 font_scheme = 'default'
166 document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
167 if fonts['roman'] == 'cmr':
168 document.preamble.append('\\renewcommand{\\rmdefault}{cmr}')
169 if font_osf == 'true':
170 document.preamble.append('\\usepackage{eco}')
172 for font in 'lmodern', 'charter', 'utopia', 'beraserif', 'ccfonts', 'chancery':
173 if fonts['roman'] == font:
174 document.preamble.append('\\usepackage{%s}' % font)
175 for font in 'cmss', 'lmss', 'cmbr':
176 if fonts['sans'] == font:
177 document.preamble.append('\\renewcommand{\\sfdefault}{%s}' % font)
178 for font in 'berasans':
179 if fonts['sans'] == font:
180 document.preamble.append('\\usepackage{%s}' % font)
181 for font in 'cmtt', 'lmtt', 'cmtl':
182 if fonts['typewriter'] == font:
183 document.preamble.append('\\renewcommand{\\ttdefault}{%s}' % font)
184 for font in 'courier', 'beramono', 'luximono':
185 if fonts['typewriter'] == font:
186 document.preamble.append('\\usepackage{%s}' % font)
187 if font_default_family != 'default':
188 document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
189 if font_osf == 'true':
190 document.warning("Ignoring `\\font_osf = true'")
193 def revert_booktabs(document):
194 " We remove the booktabs flag or everything else will become a mess. "
195 re_row = re.compile(r'^<row.*space="[^"]+".*>$')
196 re_tspace = re.compile(r'\s+topspace="[^"]+"')
197 re_bspace = re.compile(r'\s+bottomspace="[^"]+"')
198 re_ispace = re.compile(r'\s+interlinespace="[^"]+"')
201 i = find_token(document.body, "\\begin_inset Tabular", i)
204 j = find_end_of_inset(document.body, i + 1)
206 document.warning("Malformed LyX document: Could not find end of tabular.")
208 for k in range(i, j):
209 if re.search('^<features.* booktabs="true".*>$', document.body[k]):
210 document.warning("Converting 'booktabs' table to normal table.")
211 document.body[k] = document.body[k].replace(' booktabs="true"', '')
212 if re.search(re_row, document.body[k]):
213 document.warning("Removing extra row space.")
214 document.body[k] = re_tspace.sub('', document.body[k])
215 document.body[k] = re_bspace.sub('', document.body[k])
216 document.body[k] = re_ispace.sub('', document.body[k])
220 def convert_multiencoding(document, forward):
221 """ Fix files with multiple encodings.
222 Files with an inputencoding of "auto" or "default" and multiple languages
223 where at least two languages have different default encodings are encoded
224 in multiple encodings for file formats < 249. These files are incorrectly
225 read and written (as if the whole file was in the encoding of the main
229 - converts from fake unicode values to true unicode if forward is true, and
230 - converts from true unicode values to fake unicode if forward is false.
231 document.encoding must be set to the old value (format 248) in both cases.
233 We do this here and not in LyX.py because it is far easier to do the
234 necessary parsing in modern formats than in ancient ones.
236 encoding_stack = [document.encoding]
237 lang_re = re.compile(r"^\\lang\s(\S+)")
238 if document.inputencoding == "auto" or document.inputencoding == "default":
239 for i in range(len(document.body)):
240 result = lang_re.match(document.body[i])
242 language = result.group(1)
243 if language == "default":
244 document.warning("Resetting encoding from %s to %s." % (encoding_stack[-1], document.encoding))
245 encoding_stack[-1] = document.encoding
247 from lyx2lyx_lang import lang
248 document.warning("Setting encoding from %s to %s." % (encoding_stack[-1], lang[language][3]))
249 encoding_stack[-1] = lang[language][3]
250 elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
251 document.warning("Adding nested encoding %s." % encoding_stack[-1])
252 encoding_stack.append(encoding_stack[-1])
253 elif find_token(document.body, "\\end_layout", i, i + 1) == i:
254 document.warning("Removing nested encoding %s." % encoding_stack[-1])
255 del encoding_stack[-1]
256 if encoding_stack[-1] != document.encoding:
258 # This line has been incorrectly interpreted as if it was
259 # encoded in 'encoding'.
260 # Convert back to the 8bit string that was in the file.
261 orig = document.body[i].encode(document.encoding)
262 # Convert the 8bit string that was in the file to unicode
263 # with the correct encoding.
264 document.body[i] = orig.decode(encoding_stack[-1])
266 # Convert unicode to the 8bit string that will be written
267 # to the file with the correct encoding.
268 orig = document.body[i].encode(encoding_stack[-1])
269 # Convert the 8bit string that will be written to the
270 # file to fake unicode with the encoding that will later
271 # be used when writing to the file.
272 document.body[i] = orig.decode(document.encoding)
275 def convert_utf8(document):
276 " Set document encoding to UTF-8. "
277 convert_multiencoding(document, True)
278 document.encoding = "utf8"
281 def revert_utf8(document):
282 " Set document encoding to the value corresponding to inputencoding. "
283 i = find_token(document.header, "\\inputencoding", 0)
285 document.header.append("\\inputencoding auto")
286 elif get_value(document.header, "\\inputencoding", i) == "utf8":
287 document.header[i] = "\\inputencoding auto"
288 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
289 document.encoding = get_encoding(document.language, document.inputencoding, 248)
290 convert_multiencoding(document, False)
293 def revert_cs_label(document):
294 " Remove status flag of charstyle label. "
297 i = find_token(document.body, "\\begin_inset CharStyle", i)
300 # Seach for a line starting 'show_label'
301 # If it is not there, break with a warning message
304 if (document.body[i][:10] == "show_label"):
307 elif (document.body[i][:13] == "\\begin_layout"):
308 document.warning("Malformed LyX document: Missing 'show_label'.")
315 def convert_bibitem(document):
317 \bibitem [option]{argument}
321 \begin_inset LatexCommand bibitem
327 This must be called after convert_commandparams.
329 regex = re.compile(r'\S+\s*(\[[^\[\{]*\])?(\{[^}]*\})')
332 i = find_token(document.body, "\\bibitem", i)
335 match = re.match(regex, document.body[i])
336 option = match.group(1)
337 argument = match.group(2)
338 lines = ['\\begin_inset LatexCommand bibitem']
340 lines.append('label "%s"' % option[1:-1].replace('"', '\\"'))
341 lines.append('key "%s"' % argument[1:-1].replace('"', '\\"'))
343 lines.append('\\end_inset')
344 document.body[i:i+1] = lines
348 commandparams_info = {
349 # command : [option1, option2, argument]
350 "bibitem" : ["label", "", "key"],
351 "bibtex" : ["options", "btprint", "bibfiles"],
352 "cite" : ["after", "before", "key"],
353 "citet" : ["after", "before", "key"],
354 "citep" : ["after", "before", "key"],
355 "citealt" : ["after", "before", "key"],
356 "citealp" : ["after", "before", "key"],
357 "citeauthor" : ["after", "before", "key"],
358 "citeyear" : ["after", "before", "key"],
359 "citeyearpar" : ["after", "before", "key"],
360 "citet*" : ["after", "before", "key"],
361 "citep*" : ["after", "before", "key"],
362 "citealt*" : ["after", "before", "key"],
363 "citealp*" : ["after", "before", "key"],
364 "citeauthor*" : ["after", "before", "key"],
365 "Citet" : ["after", "before", "key"],
366 "Citep" : ["after", "before", "key"],
367 "Citealt" : ["after", "before", "key"],
368 "Citealp" : ["after", "before", "key"],
369 "Citeauthor" : ["after", "before", "key"],
370 "Citet*" : ["after", "before", "key"],
371 "Citep*" : ["after", "before", "key"],
372 "Citealt*" : ["after", "before", "key"],
373 "Citealp*" : ["after", "before", "key"],
374 "Citeauthor*" : ["after", "before", "key"],
375 "citefield" : ["after", "before", "key"],
376 "citetitle" : ["after", "before", "key"],
377 "cite*" : ["after", "before", "key"],
378 "hfill" : ["", "", ""],
379 "index" : ["", "", "name"],
380 "printindex" : ["", "", "name"],
381 "label" : ["", "", "name"],
382 "eqref" : ["name", "", "reference"],
383 "pageref" : ["name", "", "reference"],
384 "prettyref" : ["name", "", "reference"],
385 "ref" : ["name", "", "reference"],
386 "vpageref" : ["name", "", "reference"],
387 "vref" : ["name", "", "reference"],
388 "tableofcontents" : ["", "", "type"],
389 "htmlurl" : ["name", "", "target"],
390 "url" : ["name", "", "target"]}
393 def convert_commandparams(document):
396 \begin_inset LatexCommand \cmdname[opt1][opt2]{arg}
401 \begin_inset LatexCommand cmdname
407 name1, name2 and name3 can be different for each command.
409 # \begin_inset LatexCommand bibitem was not the official version (see
410 # convert_bibitem()), but could be read in, so we convert it here, too.
414 i = find_token(document.body, "\\begin_inset LatexCommand", i)
417 command = document.body[i][26:].strip()
419 document.warning("Malformed LyX document: Missing LatexCommand name.")
423 # The following parser is taken from the original InsetCommandParams::scanCommand
429 # Used to handle things like \command[foo[bar]]{foo{bar}}
433 if ((state == "CMDNAME" and c == ' ') or
434 (state == "CMDNAME" and c == '[') or
435 (state == "CMDNAME" and c == '{')):
437 if ((state == "OPTION" and c == ']') or
438 (state == "SECOPTION" and c == ']') or
439 (state == "CONTENT" and c == '}')):
443 nestdepth = nestdepth - 1
444 if ((state == "OPTION" and c == '[') or
445 (state == "SECOPTION" and c == '[') or
446 (state == "CONTENT" and c == '{')):
447 nestdepth = nestdepth + 1
448 if state == "CMDNAME":
450 elif state == "OPTION":
452 elif state == "SECOPTION":
454 elif state == "CONTENT":
459 elif c == '[' and b != ']':
461 nestdepth = 0 # Just to be sure
462 elif c == '[' and b == ']':
464 nestdepth = 0 # Just to be sure
467 nestdepth = 0 # Just to be sure
470 # Now we have parsed the command, output the parameters
471 lines = ["\\begin_inset LatexCommand %s" % name]
473 if commandparams_info[name][0] == "":
474 document.warning("Ignoring invalid option `%s' of command `%s'." % (option1, name))
476 lines.append('%s "%s"' % (commandparams_info[name][0], option1.replace('"', '\\"')))
478 if commandparams_info[name][1] == "":
479 document.warning("Ignoring invalid second option `%s' of command `%s'." % (option2, name))
481 lines.append('%s "%s"' % (commandparams_info[name][1], option2.replace('"', '\\"')))
483 if commandparams_info[name][2] == "":
484 document.warning("Ignoring invalid argument `%s' of command `%s'." % (argument, name))
486 lines.append('%s "%s"' % (commandparams_info[name][2], argument.replace('"', '\\"')))
487 document.body[i:i+1] = lines
491 def revert_commandparams(document):
492 regex = re.compile(r'(\S+)\s+(.+)')
495 i = find_token(document.body, "\\begin_inset LatexCommand", i)
498 name = document.body[i].split()[2]
499 j = find_end_of_inset(document.body, i + 1)
504 for k in range(i + 1, j):
505 match = re.match(regex, document.body[k])
507 pname = match.group(1)
508 pvalue = match.group(2)
509 if pname == "preview":
510 preview_line = document.body[k]
511 elif (commandparams_info[name][0] != "" and
512 pname == commandparams_info[name][0]):
513 option1 = pvalue.strip('"').replace('\\"', '"')
514 elif (commandparams_info[name][1] != "" and
515 pname == commandparams_info[name][1]):
516 option2 = pvalue.strip('"').replace('\\"', '"')
517 elif (commandparams_info[name][2] != "" and
518 pname == commandparams_info[name][2]):
519 argument = pvalue.strip('"').replace('\\"', '"')
520 elif document.body[k].strip() != "":
521 document.warning("Ignoring unknown contents `%s' in command inset %s." % (document.body[k], name))
522 if name == "bibitem":
524 lines = ["\\bibitem {%s}" % argument]
526 lines = ["\\bibitem [%s]{%s}" % (option1, argument)]
530 lines = ["\\begin_inset LatexCommand \\%s{%s}" % (name, argument)]
532 lines = ["\\begin_inset LatexCommand \\%s[][%s]{%s}" % (name, option2, argument)]
535 lines = ["\\begin_inset LatexCommand \\%s[%s]{%s}" % (name, option1, argument)]
537 lines = ["\\begin_inset LatexCommand \\%s[%s][%s]{%s}" % (name, option1, option2, argument)]
538 if name != "bibitem":
539 if preview_line != "":
540 lines.append(preview_line)
542 lines.append('\\end_inset')
543 document.body[i:j+1] = lines
547 def revert_nomenclature(document):
548 " Convert nomenclature entry to ERT. "
549 regex = re.compile(r'(\S+)\s+(.+)')
553 i = find_token(document.body, "\\begin_inset LatexCommand nomenclature", i)
557 j = find_end_of_inset(document.body, i + 1)
562 for k in range(i + 1, j):
563 match = re.match(regex, document.body[k])
565 name = match.group(1)
566 value = match.group(2)
567 if name == "preview":
568 preview_line = document.body[k]
569 elif name == "symbol":
570 symbol = value.strip('"').replace('\\"', '"')
571 elif name == "description":
572 description = value.strip('"').replace('\\"', '"')
573 elif name == "prefix":
574 prefix = value.strip('"').replace('\\"', '"')
575 elif document.body[k].strip() != "":
576 document.warning("Ignoring unknown contents `%s' in nomenclature inset." % document.body[k])
578 command = 'nomenclature{%s}{%s}' % (symbol, description)
580 command = 'nomenclature[%s]{%s}{%s}' % (prefix, symbol, description)
581 document.body[i:j+1] = ['\\begin_inset ERT',
584 '\\begin_layout %s' % document.default_layout,
593 if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
594 document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
595 document.preamble.append('\\makenomenclature')
598 def revert_printnomenclature(document):
599 " Convert printnomenclature to ERT. "
600 regex = re.compile(r'(\S+)\s+(.+)')
604 i = find_token(document.body, "\\begin_inset LatexCommand printnomenclature", i)
608 j = find_end_of_inset(document.body, i + 1)
611 for k in range(i + 1, j):
612 match = re.match(regex, document.body[k])
614 name = match.group(1)
615 value = match.group(2)
616 if name == "preview":
617 preview_line = document.body[k]
618 elif name == "labelwidth":
619 labelwidth = value.strip('"').replace('\\"', '"')
620 elif document.body[k].strip() != "":
621 document.warning("Ignoring unknown contents `%s' in printnomenclature inset." % document.body[k])
623 command = 'nomenclature{}'
625 command = 'nomenclature[%s]' % labelwidth
626 document.body[i:j+1] = ['\\begin_inset ERT',
629 '\\begin_layout %s' % document.default_layout,
638 if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
639 document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
640 document.preamble.append('\\makenomenclature')
643 def convert_esint(document):
644 " Add \\use_esint setting to header. "
645 i = find_token(document.header, "\\cite_engine", 0)
647 document.warning("Malformed LyX document: Missing `\\cite_engine'.")
649 # 0 is off, 1 is auto, 2 is on.
650 document.header.insert(i, '\\use_esint 0')
653 def revert_esint(document):
654 " Remove \\use_esint setting from header. "
655 i = find_token(document.header, "\\use_esint", 0)
657 document.warning("Malformed LyX document: Missing `\\use_esint'.")
659 use_esint = document.header[i].split()[1]
660 del document.header[i]
661 # 0 is off, 1 is auto, 2 is on.
663 document.preamble.append('\\usepackage{esint}')
666 def revert_clearpage(document):
670 i = find_token(document.body, "\\clearpage", i)
673 document.body[i:i+1] = ['\\begin_inset ERT',
676 '\\begin_layout %s' % document.default_layout,
687 def revert_cleardoublepage(document):
688 " cleardoublepage -> ERT"
691 i = find_token(document.body, "\\cleardoublepage", i)
694 document.body[i:i+1] = ['\\begin_inset ERT',
697 '\\begin_layout %s' % document.default_layout,
708 def revert_encodings(document):
709 " Set new encodings to auto. "
710 encodings = ["8859-6", "8859-8", "cp437", "cp437de", "cp850", "cp852",
711 "cp855", "cp858", "cp862", "cp865", "cp866", "cp1250",
712 "cp1252", "cp1256", "cp1257", "latin10", "pt254", "tis620-0"]
713 i = find_token(document.header, "\\inputencoding", 0)
715 document.header.append("\\inputencoding auto")
717 inputenc = get_value(document.header, "\\inputencoding", i)
718 if inputenc in encodings:
719 document.header[i] = "\\inputencoding auto"
720 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
727 supported_versions = ["1.5.0","1.5"]
728 convert = [[246, []],
729 [247, [convert_font_settings]],
731 [249, [convert_utf8]],
734 [252, [convert_commandparams, convert_bibitem]],
736 [254, [convert_esint]],
740 revert = [[255, [revert_encodings]],
741 [254, [revert_clearpage, revert_cleardoublepage]],
742 [253, [revert_esint]],
743 [252, [revert_nomenclature, revert_printnomenclature]],
744 [251, [revert_commandparams]],
745 [250, [revert_cs_label]],
747 [248, [revert_utf8]],
748 [247, [revert_booktabs]],
749 [246, [revert_font_settings]],
750 [245, [revert_framed]]]
753 if __name__ == "__main__":