1 # This file is part of lyx2lyx
2 # -*- coding: utf-8 -*-
3 # Copyright (C) 2006 José Matos <jamatos@lyx.org>
4 # Copyright (C) 2004-2006 Georg Baum <Georg.Baum@post.rwth-aachen.de>
6 # This program is free software; you can redistribute it and/or
7 # modify it under the terms of the GNU General Public License
8 # as published by the Free Software Foundation; either version 2
9 # of the License, or (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
20 """ Convert files to the file format generated by lyx 1.5"""
25 from parser_tools import find_re, find_token, find_token_backwards, find_token_exact, find_tokens, find_end_of, get_value
26 from LyX import get_encoding
29 ####################################################################
30 # Private helper functions
32 def find_end_of_inset(lines, i):
33 " Find end of inset, where lines[i] is included."
34 return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
36 def find_end_of_layout(lines, i):
37 " Find end of layout, where lines[i] is included."
38 return find_end_of(lines, i, "\\begin_layout", "\\end_layout")
40 # End of helper functions
41 ####################################################################
45 # Notes: Framed/Shaded
48 def revert_framed(document):
49 "Revert framed notes. "
52 i = find_tokens(document.body, ["\\begin_inset Note Framed", "\\begin_inset Note Shaded"], i)
56 document.body[i] = "\\begin_inset Note"
64 roman_fonts = {'default' : 'default', 'ae' : 'ae',
65 'times' : 'times', 'palatino' : 'palatino',
66 'helvet' : 'default', 'avant' : 'default',
67 'newcent' : 'newcent', 'bookman' : 'bookman',
69 sans_fonts = {'default' : 'default', 'ae' : 'default',
70 'times' : 'default', 'palatino' : 'default',
71 'helvet' : 'helvet', 'avant' : 'avant',
72 'newcent' : 'default', 'bookman' : 'default',
74 typewriter_fonts = {'default' : 'default', 'ae' : 'default',
75 'times' : 'default', 'palatino' : 'default',
76 'helvet' : 'default', 'avant' : 'default',
77 'newcent' : 'default', 'bookman' : 'default',
78 'pslatex' : 'courier'}
80 def convert_font_settings(document):
81 " Convert font settings. "
83 i = find_token_exact(document.header, "\\fontscheme", i)
85 document.warning("Malformed LyX document: Missing `\\fontscheme'.")
87 font_scheme = get_value(document.header, "\\fontscheme", i, i + 1)
89 document.warning("Malformed LyX document: Empty `\\fontscheme'.")
90 font_scheme = 'default'
91 if not font_scheme in roman_fonts.keys():
92 document.warning("Malformed LyX document: Unknown `\\fontscheme' `%s'." % font_scheme)
93 font_scheme = 'default'
94 document.header[i:i+1] = ['\\font_roman %s' % roman_fonts[font_scheme],
95 '\\font_sans %s' % sans_fonts[font_scheme],
96 '\\font_typewriter %s' % typewriter_fonts[font_scheme],
97 '\\font_default_family default',
100 '\\font_sf_scale 100',
101 '\\font_tt_scale 100']
104 def revert_font_settings(document):
105 " Revert font settings. "
108 fonts = {'roman' : 'default', 'sans' : 'default', 'typewriter' : 'default'}
109 for family in 'roman', 'sans', 'typewriter':
110 name = '\\font_%s' % family
111 i = find_token_exact(document.header, name, i)
113 document.warning("Malformed LyX document: Missing `%s'." % name)
116 if (insert_line < 0):
118 fonts[family] = get_value(document.header, name, i, i + 1)
119 del document.header[i]
120 i = find_token_exact(document.header, '\\font_default_family', i)
122 document.warning("Malformed LyX document: Missing `\\font_default_family'.")
123 font_default_family = 'default'
125 font_default_family = get_value(document.header, "\\font_default_family", i, i + 1)
126 del document.header[i]
127 i = find_token_exact(document.header, '\\font_sc', i)
129 document.warning("Malformed LyX document: Missing `\\font_sc'.")
132 font_sc = get_value(document.header, '\\font_sc', i, i + 1)
133 del document.header[i]
134 if font_sc != 'false':
135 document.warning("Conversion of '\\font_sc' not yet implemented.")
136 i = find_token_exact(document.header, '\\font_osf', i)
138 document.warning("Malformed LyX document: Missing `\\font_osf'.")
141 font_osf = get_value(document.header, '\\font_osf', i, i + 1)
142 del document.header[i]
143 i = find_token_exact(document.header, '\\font_sf_scale', i)
145 document.warning("Malformed LyX document: Missing `\\font_sf_scale'.")
146 font_sf_scale = '100'
148 font_sf_scale = get_value(document.header, '\\font_sf_scale', i, i + 1)
149 del document.header[i]
150 if font_sf_scale != '100':
151 document.warning("Conversion of '\\font_sf_scale' not yet implemented.")
152 i = find_token_exact(document.header, '\\font_tt_scale', i)
154 document.warning("Malformed LyX document: Missing `\\font_tt_scale'.")
155 font_tt_scale = '100'
157 font_tt_scale = get_value(document.header, '\\font_tt_scale', i, i + 1)
158 del document.header[i]
159 if font_tt_scale != '100':
160 document.warning("Conversion of '\\font_tt_scale' not yet implemented.")
161 for font_scheme in roman_fonts.keys():
162 if (roman_fonts[font_scheme] == fonts['roman'] and
163 sans_fonts[font_scheme] == fonts['sans'] and
164 typewriter_fonts[font_scheme] == fonts['typewriter']):
165 document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
166 if font_default_family != 'default':
167 document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
168 if font_osf == 'true':
169 document.warning("Ignoring `\\font_osf = true'")
171 font_scheme = 'default'
172 document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
173 if fonts['roman'] == 'cmr':
174 document.preamble.append('\\renewcommand{\\rmdefault}{cmr}')
175 if font_osf == 'true':
176 document.preamble.append('\\usepackage{eco}')
178 for font in 'lmodern', 'charter', 'utopia', 'beraserif', 'ccfonts', 'chancery':
179 if fonts['roman'] == font:
180 document.preamble.append('\\usepackage{%s}' % font)
181 for font in 'cmss', 'lmss', 'cmbr':
182 if fonts['sans'] == font:
183 document.preamble.append('\\renewcommand{\\sfdefault}{%s}' % font)
184 for font in 'berasans':
185 if fonts['sans'] == font:
186 document.preamble.append('\\usepackage{%s}' % font)
187 for font in 'cmtt', 'lmtt', 'cmtl':
188 if fonts['typewriter'] == font:
189 document.preamble.append('\\renewcommand{\\ttdefault}{%s}' % font)
190 for font in 'courier', 'beramono', 'luximono':
191 if fonts['typewriter'] == font:
192 document.preamble.append('\\usepackage{%s}' % font)
193 if font_default_family != 'default':
194 document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
195 if font_osf == 'true':
196 document.warning("Ignoring `\\font_osf = true'")
199 def revert_booktabs(document):
200 " We remove the booktabs flag or everything else will become a mess. "
201 re_row = re.compile(r'^<row.*space="[^"]+".*>$')
202 re_tspace = re.compile(r'\s+topspace="[^"]+"')
203 re_bspace = re.compile(r'\s+bottomspace="[^"]+"')
204 re_ispace = re.compile(r'\s+interlinespace="[^"]+"')
207 i = find_token(document.body, "\\begin_inset Tabular", i)
210 j = find_end_of_inset(document.body, i + 1)
212 document.warning("Malformed LyX document: Could not find end of tabular.")
214 for k in range(i, j):
215 if re.search('^<features.* booktabs="true".*>$', document.body[k]):
216 document.warning("Converting 'booktabs' table to normal table.")
217 document.body[k] = document.body[k].replace(' booktabs="true"', '')
218 if re.search(re_row, document.body[k]):
219 document.warning("Removing extra row space.")
220 document.body[k] = re_tspace.sub('', document.body[k])
221 document.body[k] = re_bspace.sub('', document.body[k])
222 document.body[k] = re_ispace.sub('', document.body[k])
226 def convert_multiencoding(document, forward):
227 """ Fix files with multiple encodings.
228 Files with an inputencoding of "auto" or "default" and multiple languages
229 where at least two languages have different default encodings are encoded
230 in multiple encodings for file formats < 249. These files are incorrectly
231 read and written (as if the whole file was in the encoding of the main
233 This is not true for files written by CJK-LyX, they are always in the locale
237 - converts from fake unicode values to true unicode if forward is true, and
238 - converts from true unicode values to fake unicode if forward is false.
239 document.encoding must be set to the old value (format 248) in both cases.
241 We do this here and not in LyX.py because it is far easier to do the
242 necessary parsing in modern formats than in ancient ones.
244 if document.cjk_encoding != '':
246 encoding_stack = [document.encoding]
247 lang_re = re.compile(r"^\\lang\s(\S+)")
248 if document.inputencoding == "auto" or document.inputencoding == "default":
249 for i in range(len(document.body)):
250 result = lang_re.match(document.body[i])
252 language = result.group(1)
253 if language == "default":
254 document.warning("Resetting encoding from %s to %s." % (encoding_stack[-1], document.encoding), 3)
255 encoding_stack[-1] = document.encoding
257 from lyx2lyx_lang import lang
258 document.warning("Setting encoding from %s to %s." % (encoding_stack[-1], lang[language][3]), 3)
259 encoding_stack[-1] = lang[language][3]
260 elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
261 document.warning("Adding nested encoding %s." % encoding_stack[-1], 3)
262 encoding_stack.append(encoding_stack[-1])
263 elif find_token(document.body, "\\end_layout", i, i + 1) == i:
264 document.warning("Removing nested encoding %s." % encoding_stack[-1], 3)
265 if len(encoding_stack) == 1:
266 # Don't remove the document encoding from the stack
267 document.warning("Malformed LyX document: Unexpected `\\end_layout'.")
269 del encoding_stack[-1]
270 if encoding_stack[-1] != document.encoding:
272 # This line has been incorrectly interpreted as if it was
273 # encoded in 'encoding'.
274 # Convert back to the 8bit string that was in the file.
275 orig = document.body[i].encode(document.encoding)
276 # Convert the 8bit string that was in the file to unicode
277 # with the correct encoding.
278 document.body[i] = orig.decode(encoding_stack[-1])
280 # Convert unicode to the 8bit string that will be written
281 # to the file with the correct encoding.
282 orig = document.body[i].encode(encoding_stack[-1])
283 # Convert the 8bit string that will be written to the
284 # file to fake unicode with the encoding that will later
285 # be used when writing to the file.
286 document.body[i] = orig.decode(document.encoding)
289 def convert_utf8(document):
290 " Set document encoding to UTF-8. "
291 convert_multiencoding(document, True)
292 document.encoding = "utf8"
295 def revert_utf8(document):
296 " Set document encoding to the value corresponding to inputencoding. "
297 i = find_token(document.header, "\\inputencoding", 0)
299 document.header.append("\\inputencoding auto")
300 elif get_value(document.header, "\\inputencoding", i) == "utf8":
301 document.header[i] = "\\inputencoding auto"
302 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
303 document.encoding = get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)
304 convert_multiencoding(document, False)
307 def revert_cs_label(document):
308 " Remove status flag of charstyle label. "
311 i = find_token(document.body, "\\begin_inset CharStyle", i)
314 # Seach for a line starting 'show_label'
315 # If it is not there, break with a warning message
318 if (document.body[i][:10] == "show_label"):
321 elif (document.body[i][:13] == "\\begin_layout"):
322 document.warning("Malformed LyX document: Missing 'show_label'.")
329 def convert_bibitem(document):
331 \bibitem [option]{argument}
335 \begin_inset LatexCommand bibitem
341 This must be called after convert_commandparams.
343 regex = re.compile(r'\S+\s*(\[[^\[\{]*\])?(\{[^}]*\})')
346 i = find_token(document.body, "\\bibitem", i)
349 match = re.match(regex, document.body[i])
350 option = match.group(1)
351 argument = match.group(2)
352 lines = ['\\begin_inset LatexCommand bibitem']
354 lines.append('label "%s"' % option[1:-1].replace('"', '\\"'))
355 lines.append('key "%s"' % argument[1:-1].replace('"', '\\"'))
357 lines.append('\\end_inset')
358 document.body[i:i+1] = lines
362 commandparams_info = {
363 # command : [option1, option2, argument]
364 "bibitem" : ["label", "", "key"],
365 "bibtex" : ["options", "btprint", "bibfiles"],
366 "cite" : ["after", "before", "key"],
367 "citet" : ["after", "before", "key"],
368 "citep" : ["after", "before", "key"],
369 "citealt" : ["after", "before", "key"],
370 "citealp" : ["after", "before", "key"],
371 "citeauthor" : ["after", "before", "key"],
372 "citeyear" : ["after", "before", "key"],
373 "citeyearpar" : ["after", "before", "key"],
374 "citet*" : ["after", "before", "key"],
375 "citep*" : ["after", "before", "key"],
376 "citealt*" : ["after", "before", "key"],
377 "citealp*" : ["after", "before", "key"],
378 "citeauthor*" : ["after", "before", "key"],
379 "Citet" : ["after", "before", "key"],
380 "Citep" : ["after", "before", "key"],
381 "Citealt" : ["after", "before", "key"],
382 "Citealp" : ["after", "before", "key"],
383 "Citeauthor" : ["after", "before", "key"],
384 "Citet*" : ["after", "before", "key"],
385 "Citep*" : ["after", "before", "key"],
386 "Citealt*" : ["after", "before", "key"],
387 "Citealp*" : ["after", "before", "key"],
388 "Citeauthor*" : ["after", "before", "key"],
389 "citefield" : ["after", "before", "key"],
390 "citetitle" : ["after", "before", "key"],
391 "cite*" : ["after", "before", "key"],
392 "hfill" : ["", "", ""],
393 "index" : ["", "", "name"],
394 "printindex" : ["", "", "name"],
395 "label" : ["", "", "name"],
396 "eqref" : ["name", "", "reference"],
397 "pageref" : ["name", "", "reference"],
398 "prettyref" : ["name", "", "reference"],
399 "ref" : ["name", "", "reference"],
400 "vpageref" : ["name", "", "reference"],
401 "vref" : ["name", "", "reference"],
402 "tableofcontents" : ["", "", "type"],
403 "htmlurl" : ["name", "", "target"],
404 "url" : ["name", "", "target"]}
407 def convert_commandparams(document):
410 \begin_inset LatexCommand \cmdname[opt1][opt2]{arg}
415 \begin_inset LatexCommand cmdname
421 name1, name2 and name3 can be different for each command.
423 # \begin_inset LatexCommand bibitem was not the official version (see
424 # convert_bibitem()), but could be read in, so we convert it here, too.
428 i = find_token(document.body, "\\begin_inset LatexCommand", i)
431 command = document.body[i][26:].strip()
433 document.warning("Malformed LyX document: Missing LatexCommand name.")
437 # The following parser is taken from the original InsetCommandParams::scanCommand
443 # Used to handle things like \command[foo[bar]]{foo{bar}}
447 if ((state == "CMDNAME" and c == ' ') or
448 (state == "CMDNAME" and c == '[') or
449 (state == "CMDNAME" and c == '{')):
451 if ((state == "OPTION" and c == ']') or
452 (state == "SECOPTION" and c == ']') or
453 (state == "CONTENT" and c == '}')):
457 nestdepth = nestdepth - 1
458 if ((state == "OPTION" and c == '[') or
459 (state == "SECOPTION" and c == '[') or
460 (state == "CONTENT" and c == '{')):
461 nestdepth = nestdepth + 1
462 if state == "CMDNAME":
464 elif state == "OPTION":
466 elif state == "SECOPTION":
468 elif state == "CONTENT":
473 elif c == '[' and b != ']':
475 nestdepth = 0 # Just to be sure
476 elif c == '[' and b == ']':
478 nestdepth = 0 # Just to be sure
481 nestdepth = 0 # Just to be sure
484 # Now we have parsed the command, output the parameters
485 lines = ["\\begin_inset LatexCommand %s" % name]
487 if commandparams_info[name][0] == "":
488 document.warning("Ignoring invalid option `%s' of command `%s'." % (option1, name))
490 lines.append('%s "%s"' % (commandparams_info[name][0], option1.replace('"', '\\"')))
492 if commandparams_info[name][1] == "":
493 document.warning("Ignoring invalid second option `%s' of command `%s'." % (option2, name))
495 lines.append('%s "%s"' % (commandparams_info[name][1], option2.replace('"', '\\"')))
497 if commandparams_info[name][2] == "":
498 document.warning("Ignoring invalid argument `%s' of command `%s'." % (argument, name))
500 lines.append('%s "%s"' % (commandparams_info[name][2], argument.replace('"', '\\"')))
501 document.body[i:i+1] = lines
505 def revert_commandparams(document):
506 regex = re.compile(r'(\S+)\s+(.+)')
509 i = find_token(document.body, "\\begin_inset LatexCommand", i)
512 name = document.body[i].split()[2]
513 j = find_end_of_inset(document.body, i + 1)
518 for k in range(i + 1, j):
519 match = re.match(regex, document.body[k])
521 pname = match.group(1)
522 pvalue = match.group(2)
523 if pname == "preview":
524 preview_line = document.body[k]
525 elif (commandparams_info[name][0] != "" and
526 pname == commandparams_info[name][0]):
527 option1 = pvalue.strip('"').replace('\\"', '"')
528 elif (commandparams_info[name][1] != "" and
529 pname == commandparams_info[name][1]):
530 option2 = pvalue.strip('"').replace('\\"', '"')
531 elif (commandparams_info[name][2] != "" and
532 pname == commandparams_info[name][2]):
533 argument = pvalue.strip('"').replace('\\"', '"')
534 elif document.body[k].strip() != "":
535 document.warning("Ignoring unknown contents `%s' in command inset %s." % (document.body[k], name))
536 if name == "bibitem":
538 lines = ["\\bibitem {%s}" % argument]
540 lines = ["\\bibitem [%s]{%s}" % (option1, argument)]
544 lines = ["\\begin_inset LatexCommand \\%s{%s}" % (name, argument)]
546 lines = ["\\begin_inset LatexCommand \\%s[][%s]{%s}" % (name, option2, argument)]
549 lines = ["\\begin_inset LatexCommand \\%s[%s]{%s}" % (name, option1, argument)]
551 lines = ["\\begin_inset LatexCommand \\%s[%s][%s]{%s}" % (name, option1, option2, argument)]
552 if name != "bibitem":
553 if preview_line != "":
554 lines.append(preview_line)
556 lines.append('\\end_inset')
557 document.body[i:j+1] = lines
561 def revert_nomenclature(document):
562 " Convert nomenclature entry to ERT. "
563 regex = re.compile(r'(\S+)\s+(.+)')
567 i = find_token(document.body, "\\begin_inset LatexCommand nomenclature", i)
571 j = find_end_of_inset(document.body, i + 1)
576 for k in range(i + 1, j):
577 match = re.match(regex, document.body[k])
579 name = match.group(1)
580 value = match.group(2)
581 if name == "preview":
582 preview_line = document.body[k]
583 elif name == "symbol":
584 symbol = value.strip('"').replace('\\"', '"')
585 elif name == "description":
586 description = value.strip('"').replace('\\"', '"')
587 elif name == "prefix":
588 prefix = value.strip('"').replace('\\"', '"')
589 elif document.body[k].strip() != "":
590 document.warning("Ignoring unknown contents `%s' in nomenclature inset." % document.body[k])
592 command = 'nomenclature{%s}{%s}' % (symbol, description)
594 command = 'nomenclature[%s]{%s}{%s}' % (prefix, symbol, description)
595 document.body[i:j+1] = ['\\begin_inset ERT',
598 '\\begin_layout %s' % document.default_layout,
607 if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
608 document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
609 document.preamble.append('\\makenomenclature')
612 def revert_printnomenclature(document):
613 " Convert printnomenclature to ERT. "
614 regex = re.compile(r'(\S+)\s+(.+)')
618 i = find_token(document.body, "\\begin_inset LatexCommand printnomenclature", i)
622 j = find_end_of_inset(document.body, i + 1)
625 for k in range(i + 1, j):
626 match = re.match(regex, document.body[k])
628 name = match.group(1)
629 value = match.group(2)
630 if name == "preview":
631 preview_line = document.body[k]
632 elif name == "labelwidth":
633 labelwidth = value.strip('"').replace('\\"', '"')
634 elif document.body[k].strip() != "":
635 document.warning("Ignoring unknown contents `%s' in printnomenclature inset." % document.body[k])
637 command = 'nomenclature{}'
639 command = 'nomenclature[%s]' % labelwidth
640 document.body[i:j+1] = ['\\begin_inset ERT',
643 '\\begin_layout %s' % document.default_layout,
652 if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
653 document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
654 document.preamble.append('\\makenomenclature')
657 def convert_esint(document):
658 " Add \\use_esint setting to header. "
659 i = find_token(document.header, "\\cite_engine", 0)
661 document.warning("Malformed LyX document: Missing `\\cite_engine'.")
663 # 0 is off, 1 is auto, 2 is on.
664 document.header.insert(i, '\\use_esint 0')
667 def revert_esint(document):
668 " Remove \\use_esint setting from header. "
669 i = find_token(document.header, "\\use_esint", 0)
671 document.warning("Malformed LyX document: Missing `\\use_esint'.")
673 use_esint = document.header[i].split()[1]
674 del document.header[i]
675 # 0 is off, 1 is auto, 2 is on.
677 document.preamble.append('\\usepackage{esint}')
680 def revert_clearpage(document):
684 i = find_token(document.body, "\\clearpage", i)
687 document.body[i:i+1] = ['\\begin_inset ERT',
690 '\\begin_layout %s' % document.default_layout,
701 def revert_cleardoublepage(document):
702 " cleardoublepage -> ERT "
705 i = find_token(document.body, "\\cleardoublepage", i)
708 document.body[i:i+1] = ['\\begin_inset ERT',
711 '\\begin_layout %s' % document.default_layout,
722 def convert_lyxline(document):
723 " remove fontsize commands for \lyxline "
724 # The problematic is: The old \lyxline definition doesn't handle the fontsize
725 # to change the line thickness. The new definiton does this so that imported
726 # \lyxlines would have a different line thickness. The eventual fontsize command
727 # before \lyxline is therefore removed to get the same output.
728 fontsizes = ["tiny", "scriptsize", "footnotesize", "small", "normalsize",
729 "large", "Large", "LARGE", "huge", "Huge"]
730 for n in range(0, len(fontsizes)):
733 while i < len(document.body):
734 i = find_token(document.body, "\\size " + fontsizes[n], i)
735 k = find_token(document.body, "\\lyxline", i)
736 # the corresponding fontsize command is always 2 lines before the \lyxline
737 if (i != -1 and k == i+2):
738 document.body[i:i+1] = []
744 def revert_encodings(document):
745 " Set new encodings to auto. "
746 encodings = ["8859-6", "8859-8", "cp437", "cp437de", "cp850", "cp852",
747 "cp855", "cp858", "cp862", "cp865", "cp866", "cp1250",
748 "cp1252", "cp1256", "cp1257", "latin10", "pt254", "tis620-0"]
749 i = find_token(document.header, "\\inputencoding", 0)
751 document.header.append("\\inputencoding auto")
753 inputenc = get_value(document.header, "\\inputencoding", i)
754 if inputenc in encodings:
755 document.header[i] = "\\inputencoding auto"
756 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
759 def convert_caption(document):
760 " Convert caption layouts to caption insets. "
763 i = find_token(document.body, "\\begin_layout Caption", i)
766 j = find_end_of_layout(document.body, i)
768 document.warning("Malformed LyX document: Missing `\\end_layout'.")
771 document.body[j:j] = ["\\end_layout", "", "\\end_inset", "", ""]
772 document.body[i:i+1] = ["\\begin_layout %s" % document.default_layout,
773 "\\begin_inset Caption", "",
774 "\\begin_layout %s" % document.default_layout]
778 def revert_caption(document):
779 " Convert caption insets to caption layouts. "
780 " This assumes that the text class has a caption style. "
783 i = find_token(document.body, "\\begin_inset Caption", i)
787 # We either need to delete the previous \begin_layout line, or we
788 # need to end the previous layout if this inset is not in the first
789 # position of the paragraph.
790 layout_before = find_token_backwards(document.body, "\\begin_layout", i)
791 if layout_before == -1:
792 document.warning("Malformed LyX document: Missing `\\begin_layout'.")
794 layout_line = document.body[layout_before]
795 del_layout_before = True
796 l = layout_before + 1
798 if document.body[l] != "":
799 del_layout_before = False
802 if del_layout_before:
803 del document.body[layout_before:i]
806 document.body[i:i] = ["\\end_layout", ""]
809 # Find start of layout in the inset and end of inset
810 j = find_token(document.body, "\\begin_layout", i)
812 document.warning("Malformed LyX document: Missing `\\begin_layout'.")
814 k = find_end_of_inset(document.body, i)
816 document.warning("Malformed LyX document: Missing `\\end_inset'.")
819 # We either need to delete the following \end_layout line, or we need
820 # to restart the old layout if this inset is not at the paragraph end.
821 layout_after = find_token(document.body, "\\end_layout", k)
822 if layout_after == -1:
823 document.warning("Malformed LyX document: Missing `\\end_layout'.")
825 del_layout_after = True
827 while l < layout_after:
828 if document.body[l] != "":
829 del_layout_after = False
833 del document.body[k+1:layout_after+1]
835 document.body[k+1:k+1] = [layout_line, ""]
837 # delete \begin_layout and \end_inset and replace \begin_inset with
838 # "\begin_layout Caption". This works because we can only have one
839 # paragraph in the caption inset: The old \end_layout will be recycled.
841 if document.body[k] == "":
844 if document.body[j] == "":
846 document.body[i] = "\\begin_layout Caption"
847 if document.body[i+1] == "":
848 del document.body[i+1]
852 # Accents of InsetLaTeXAccent
854 "`" : u'\u0300', # grave
855 "'" : u'\u0301', # acute
856 "^" : u'\u0302', # circumflex
857 "~" : u'\u0303', # tilde
858 "=" : u'\u0304', # macron
859 "u" : u'\u0306', # breve
860 "." : u'\u0307', # dot above
861 "\"": u'\u0308', # diaresis
862 "r" : u'\u030a', # ring above
863 "H" : u'\u030b', # double acute
864 "v" : u'\u030c', # caron
865 "b" : u'\u0320', # minus sign below
866 "d" : u'\u0323', # dot below
867 "c" : u'\u0327', # cedilla
868 "k" : u'\u0328', # ogonek
869 "t" : u'\u0361' # tie. This is special: It spans two characters, but
870 # only one is given as argument, so we don't need to
871 # treat it differently.
875 # special accents of InsetLaTeXAccent without argument
876 special_accent_map = {
877 'i' : u'\u0131', # dotless i
878 'j' : u'\u0237', # dotless j
879 'l' : u'\u0142', # l with stroke
880 'L' : u'\u0141' # L with stroke
884 # special accent arguments of InsetLaTeXAccent
886 '\\i' : u'\u0131', # dotless i
887 '\\j' : u'\u0237' # dotless j
891 def _convert_accent(accent, accented_char):
895 if type in special_accent_map:
896 return special_accent_map[type]
897 # a missing char is treated as space by LyX
899 elif type == 'q' and char in ['t', 'd', 'l', 'L']:
900 # Special caron, only used with t, d, l and L.
901 # It is not in the map because we convert it to the same unicode
902 # character as the normal caron: \q{} is only defined if babel with
903 # the czech or slovak language is used, and the normal caron
904 # produces the correct output if the T1 font encoding is used.
905 # For the same reason we never convert to \q{} in the other direction.
907 elif char in accented_map:
908 char = accented_map[char]
909 elif (len(char) > 1):
910 # We can only convert accents on a single char
912 a = accent_map.get(type)
914 return unicodedata.normalize("NFKC", "%s%s" % (char, a))
918 def convert_ertbackslash(body, i, ert, default_layout):
919 r""" -------------------------------------------------------------------------------------------
920 Convert backslashes and '\n' into valid ERT code, append the converted
921 text to body[i] and return the (maybe incremented) line index i"""
925 body[i] = body[i] + '\\backslash '
929 body[i+1:i+1] = ['\\end_layout', '', '\\begin_layout %s' % default_layout, '']
932 body[i] = body[i] + c
936 def convert_accent(document):
937 # The following forms are supported by LyX:
938 # '\i \"{a}' (standard form, as written by LyX)
939 # '\i \"{}' (standard form, as written by LyX if the accented char is a space)
940 # '\i \"{ }' (also accepted if the accented char is a space)
941 # '\i \" a' (also accepted)
942 # '\i \"' (also accepted)
943 re_wholeinset = re.compile(r'^(.*)(\\i\s+)(.*)$')
944 re_contents = re.compile(r'^([^\s{]+)(.*)$')
945 re_accentedcontents = re.compile(r'^\s*{?([^{}]*)}?\s*$')
948 i = find_re(document.body, re_wholeinset, i)
951 match = re_wholeinset.match(document.body[i])
952 prefix = match.group(1)
953 contents = match.group(3).strip()
954 match = re_contents.match(contents)
956 # Strip first char (always \)
957 accent = match.group(1)[1:]
958 accented_contents = match.group(2).strip()
959 match = re_accentedcontents.match(accented_contents)
960 accented_char = match.group(1)
961 converted = _convert_accent(accent, accented_char)
964 contents = '%s{%s}' % (accent, accented_char),
966 document.body[i] = '%s%s' % (prefix, converted)
969 document.warning("Converting unknown InsetLaTeXAccent `\\i %s' to ERT." % contents)
970 document.body[i] = prefix
971 document.body[i+1:i+1] = ['\\begin_inset ERT',
974 '\\begin_layout %s' % document.default_layout,
978 i = convert_ertbackslash(document.body, i + 7,
980 document.default_layout)
981 document.body[i+1:i+1] = ['\\end_layout',
987 def revert_accent(document):
988 inverse_accent_map = {}
990 inverse_accent_map[accent_map[k]] = k
991 inverse_special_accent_map = {}
992 for k in special_accent_map:
993 inverse_special_accent_map[special_accent_map[k]] = k
994 inverse_accented_map = {}
995 for k in accented_map:
996 inverse_accented_map[accented_map[k]] = k
998 # Since LyX may insert a line break within a word we must combine all
999 # words before unicode normalization.
1000 # We do this only if the next line starts with an accent, otherwise we
1001 # would create things like '\begin_inset ERTstatus'.
1002 numberoflines = len(document.body)
1003 for i in range(numberoflines-1):
1004 if document.body[i] == '' or document.body[i+1] == '' or document.body[i][-1] == ' ':
1006 if (document.body[i+1][0] in inverse_accent_map):
1007 # the last character of this line and the first of the next line
1008 # form probably a surrogate pair.
1009 while (len(document.body[i+1]) > 0 and document.body[i+1][0] != ' '):
1010 document.body[i] += document.body[i+1][0]
1011 document.body[i+1] = document.body[i+1][1:]
1013 # Normalize to "Normal form D" (NFD, also known as canonical decomposition).
1014 # This is needed to catch all accented characters.
1015 for i in range(numberoflines):
1016 # Unfortunately we have a mixture of unicode strings and plain strings,
1017 # because we never use u'xxx' for string literals, but 'xxx'.
1018 # Therefore we may have to try two times to normalize the data.
1020 document.body[i] = unicodedata.normalize("NFKD", document.body[i])
1022 document.body[i] = unicodedata.normalize("NFKD", unicode(document.body[i], 'utf-8'))
1024 # Replace accented characters with InsetLaTeXAccent
1025 # Do not convert characters that can be represented in the chosen
1027 encoding_stack = [get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)]
1028 lang_re = re.compile(r"^\\lang\s(\S+)")
1029 for i in range(len(document.body)):
1031 if (document.inputencoding == "auto" or document.inputencoding == "default") and document.cjk_encoding != '':
1032 # Track the encoding of the current line
1033 result = lang_re.match(document.body[i])
1035 language = result.group(1)
1036 if language == "default":
1037 encoding_stack[-1] = document.encoding
1039 from lyx2lyx_lang import lang
1040 encoding_stack[-1] = lang[language][3]
1042 elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
1043 encoding_stack.append(encoding_stack[-1])
1045 elif find_token(document.body, "\\end_layout", i, i + 1) == i:
1046 del encoding_stack[-1]
1049 for j in range(len(document.body[i])):
1050 # dotless i and dotless j are both in special_accent_map and can
1051 # occur as an accented character, so we need to test that the
1052 # following character is no accent
1053 if (document.body[i][j] in inverse_special_accent_map and
1054 (j == len(document.body[i]) - 1 or document.body[i][j+1] not in inverse_accent_map)):
1055 accent = document.body[i][j]
1057 dummy = accent.encode(encoding_stack[-1])
1058 except UnicodeEncodeError:
1059 # Insert the rest of the line as new line
1060 if j < len(document.body[i]) - 1:
1061 document.body[i+1:i+1] = document.body[i][j+1:]
1062 # Delete the accented character
1064 document.body[i] = document.body[i][:j-1]
1066 document.body[i] = u''
1067 # Finally add the InsetLaTeXAccent
1068 document.body[i] += "\\i \\%s{}" % inverse_special_accent_map[accent]
1070 elif j > 0 and document.body[i][j] in inverse_accent_map:
1071 accented_char = document.body[i][j-1]
1072 if accented_char == ' ':
1073 # Conform to LyX output
1075 elif accented_char in inverse_accented_map:
1076 accented_char = inverse_accented_map[accented_char]
1077 accent = document.body[i][j]
1079 dummy = unicodedata.normalize("NFKC", accented_char + accent).encode(encoding_stack[-1])
1080 except UnicodeEncodeError:
1081 # Insert the rest of the line as new line
1082 if j < len(document.body[i]) - 1:
1083 document.body[i+1:i+1] = document.body[i][j+1:]
1084 # Delete the accented characters
1086 document.body[i] = document.body[i][:j-2]
1088 document.body[i] = u''
1089 # Finally add the InsetLaTeXAccent
1090 document.body[i] += "\\i \\%s{%s}" % (inverse_accent_map[accent], accented_char)
1092 # Normalize to "Normal form C" (NFC, pre-composed characters) again
1093 for i in range(numberoflines):
1094 document.body[i] = unicodedata.normalize("NFKC", document.body[i])
1097 def normalize_font_whitespace(document):
1098 """ Before format 259 the font changes were ignored if a
1099 whitespace was the first or last character in the sequence, this function
1100 transfers the whitespace outside."""
1102 if document.backend != "latex":
1105 lines = document.body
1107 char_properties = {"\\series": "default",
1108 "\\emph": "default",
1110 "\\shape": "default",
1112 "\\family": "default"}
1116 while i < len(lines):
1117 words = lines[i].split()
1119 if len(words) > 0 and words[0] == "\\begin_layout":
1120 # a new paragraph resets all font changes
1123 elif len(words) > 1 and words[0] in char_properties.keys():
1124 # we have a font change
1125 if char_properties[words[0]] == words[1]:
1126 # property gets reset
1127 if words[0] in changes.keys():
1128 del changes[words[0]]
1129 defaultproperty = True
1132 changes[words[0]] = words[1]
1133 defaultproperty = False
1135 # We need to explicitly reset all changed properties if we find
1136 # a space below, because LyX 1.4 would output the space after
1137 # closing the previous change and before starting the new one,
1138 # and closing a font change means to close all properties, not
1139 # just the changed one.
1141 if lines[i-1] and lines[i-1][-1] == " ":
1142 lines[i-1] = lines[i-1][:-1]
1143 # a space before the font change
1145 for k in changes.keys():
1146 # exclude property k because that is already in lines[i]
1148 added_lines[1:1] = ["%s %s" % (k, changes[k])]
1149 for k in changes.keys():
1150 # exclude property k because that must be added below anyway
1152 added_lines[0:0] = ["%s %s" % (k, char_properties[k])]
1154 # Property is reset in lines[i], so add the new stuff afterwards
1155 lines[i+1:i+1] = added_lines
1157 # Reset property for the space
1158 added_lines[0:0] = ["%s %s" % (words[0], char_properties[words[0]])]
1159 lines[i:i] = added_lines
1160 i = i + len(added_lines)
1162 elif lines[i+1] and lines[i+1][0] == " " and (len(changes) > 0 or not defaultproperty):
1163 # a space after the font change
1164 if (lines[i+1] == " " and lines[i+2]):
1165 next_words = lines[i+2].split()
1166 if len(next_words) > 0 and next_words[0] == words[0]:
1167 # a single blank with a property different from the
1168 # previous and the next line must not be changed
1171 lines[i+1] = lines[i+1][1:]
1173 for k in changes.keys():
1174 # exclude property k because that is already in lines[i]
1176 added_lines[1:1] = ["%s %s" % (k, changes[k])]
1177 for k in changes.keys():
1178 # exclude property k because that must be added below anyway
1180 added_lines[0:0] = ["%s %s" % (k, char_properties[k])]
1181 # Reset property for the space
1182 added_lines[0:0] = ["%s %s" % (words[0], char_properties[words[0]])]
1183 lines[i:i] = added_lines
1184 i = i + len(added_lines)
1189 def revert_utf8x(document):
1190 " Set utf8x encoding to utf8. "
1191 i = find_token(document.header, "\\inputencoding", 0)
1193 document.header.append("\\inputencoding auto")
1195 inputenc = get_value(document.header, "\\inputencoding", i)
1196 if inputenc == "utf8x":
1197 document.header[i] = "\\inputencoding utf8"
1198 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1201 def revert_utf8plain(document):
1202 " Set utf8plain encoding to utf8. "
1203 i = find_token(document.header, "\\inputencoding", 0)
1205 document.header.append("\\inputencoding auto")
1207 inputenc = get_value(document.header, "\\inputencoding", i)
1208 if inputenc == "utf8-plain":
1209 document.header[i] = "\\inputencoding utf8"
1210 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1213 def revert_beamer_alert(document):
1214 " Revert beamer's \\alert inset back to ERT. "
1217 i = find_token(document.body, "\\begin_inset CharStyle Alert", i)
1220 document.body[i] = "\\begin_inset ERT"
1223 if (document.body[i][:13] == "\\begin_layout"):
1224 # Insert the \alert command
1225 document.body[i + 1] = "\\alert{" + document.body[i + 1] + '}'
1232 def revert_beamer_structure(document):
1233 " Revert beamer's \\structure inset back to ERT. "
1236 i = find_token(document.body, "\\begin_inset CharStyle Structure", i)
1239 document.body[i] = "\\begin_inset ERT"
1242 if (document.body[i][:13] == "\\begin_layout"):
1243 document.body[i + 1] = "\\structure{" + document.body[i + 1] + '}'
1250 def convert_changes(document):
1251 " Switch output_changes off if tracking_changes is off. "
1252 i = find_token(document.header, '\\tracking_changes', 0)
1254 document.warning("Malformed lyx document: Missing '\\tracking_changes'.")
1256 j = find_token(document.header, '\\output_changes', 0)
1258 document.warning("Malformed lyx document: Missing '\\output_changes'.")
1260 tracking_changes = get_value(document.header, "\\tracking_changes", i)
1261 output_changes = get_value(document.header, "\\output_changes", j)
1262 if tracking_changes == "false" and output_changes == "true":
1263 document.header[j] = "\\output_changes false"
1266 def revert_ascii(document):
1267 " Set ascii encoding to auto. "
1268 i = find_token(document.header, "\\inputencoding", 0)
1270 document.header.append("\\inputencoding auto")
1272 inputenc = get_value(document.header, "\\inputencoding", i)
1273 if inputenc == "ascii":
1274 document.header[i] = "\\inputencoding auto"
1275 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1278 def normalize_language_name(document):
1279 lang = { "brazil": "brazilian",
1280 "portuges": "portuguese"}
1282 if document.language in lang:
1283 document.language = lang[document.language]
1284 i = find_token(document.header, "\\language", 0)
1285 document.header[i] = "\\language %s" % document.language
1288 def revert_language_name(document):
1289 lang = { "brazilian": "brazil",
1290 "portuguese": "portuges"}
1292 if document.language in lang:
1293 document.language = lang[document.language]
1294 i = find_token(document.header, "\\language", 0)
1295 document.header[i] = "\\language %s" % document.language
1298 # \textclass cv -> \textclass simplecv
1299 def convert_cv_textclass(document):
1300 if document.textclass == "cv":
1301 document.textclass = "simplecv"
1304 def revert_cv_textclass(document):
1305 if document.textclass == "simplecv":
1306 document.textclass = "cv"
1309 def convert_tableborder(document):
1310 # The problematic is: LyX double the table cell border as it ignores the "|" character in
1311 # the cell arguments. A fix takes care of this and therefore the "|" has to be removed
1313 while i < len(document.body):
1314 h = document.body[i].find("leftline=\"true\"", 0, len(document.body[i]))
1315 k = document.body[i].find("|>{", 0, len(document.body[i]))
1316 # the two tokens have to be in one line
1317 if (h != -1 and k != -1):
1319 document.body[i] = document.body[i][:k] + document.body[i][k+1:len(document.body[i])-1]
1323 def revert_tableborder(document):
1325 while i < len(document.body):
1326 h = document.body[i].find("leftline=\"true\"", 0, len(document.body[i]))
1327 k = document.body[i].find(">{", 0, len(document.body[i]))
1328 # the two tokens have to be in one line
1329 if (h != -1 and k != -1):
1331 document.body[i] = document.body[i][:k] + '|' + document.body[i][k:]
1335 def revert_armenian(document):
1337 # set inputencoding from armscii8 to auto
1338 if document.inputencoding == "armscii8":
1339 i = find_token(document.header, "\\inputencoding", 0)
1341 document.header[i] = "\\inputencoding auto"
1342 # check if preamble exists, if not k is set to -1
1345 while i < len(document.preamble):
1347 k = document.preamble[i].find("\\", 0, len(document.preamble[i]))
1349 k = document.preamble[i].find("%", 0, len(document.preamble[i]))
1351 # add the entry \usepackage{armtex} to the document preamble
1352 if document.language == "armenian":
1353 # set the armtex entry as the first preamble line
1355 document.preamble[0:0] = ["\\usepackage{armtex}"]
1356 # create the preamble when it doesn't exist
1358 document.preamble.append('\\usepackage{armtex}')
1359 # Set document language from armenian to english
1360 if document.language == "armenian":
1361 document.language = "english"
1362 i = find_token(document.header, "\\language", 0)
1364 document.header[i] = "\\language english"
1367 def revert_CJK(document):
1368 " Set CJK encodings to default and languages chinese, japanese and korean to english. "
1369 encodings = ["Bg5", "Bg5+", "GB", "GBt", "GBK", "JIS",
1370 "KS", "SJIS", "UTF8", "EUC-TW", "EUC-JP"]
1371 i = find_token(document.header, "\\inputencoding", 0)
1373 document.header.append("\\inputencoding auto")
1375 inputenc = get_value(document.header, "\\inputencoding", i)
1376 if inputenc in encodings:
1377 document.header[i] = "\\inputencoding default"
1378 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1380 if document.language == "chinese-simplified" or \
1381 document.language == "chinese-traditional" or \
1382 document.language == "japanese" or document.language == "korean":
1383 document.language = "english"
1384 i = find_token(document.header, "\\language", 0)
1386 document.header[i] = "\\language english"
1389 def revert_preamble_listings_params(document):
1390 " Revert preamble option \listings_params "
1391 i = find_token(document.header, "\\listings_params", 0)
1393 document.preamble.append('\\usepackage{listings}')
1394 document.preamble.append('\\lstset{%s}' % document.header[i].split()[1].strip('"'))
1395 document.header.pop(i);
1398 def revert_listings_inset(document):
1399 r''' Revert listings inset to \lstinline or \begin, \end lstlisting, translate
1403 lstparams "language=Delphi"
1407 \begin_layout Standard
1417 \begin_layout Standard
1421 lstinline[language=Delphi]{var i = 10;}
1428 i = find_token(document.body, '\\begin_inset listings', i)
1432 if not '\\usepackage{listings}' in document.preamble:
1433 document.preamble.append('\\usepackage{listings}')
1434 j = find_end_of_inset(document.body, i + 1)
1436 # this should not happen
1443 for line in range(i + 1, i + 4):
1444 if document.body[line].startswith('inline'):
1445 inline = document.body[line].split()[1]
1446 if document.body[line].startswith('lstparams'):
1447 params = document.body[line].split()[1].strip('"')
1448 if document.body[line].startswith('status'):
1449 status = document.body[line].split()[1].strip()
1451 # looking for the oneline code for lstinline
1452 for line in range(i + 2, j + 1):
1453 if document.body[line].startswith(r'\end_layout'):
1454 inlinecode = document.body[line - 1]
1457 params = '[%s]' % params
1458 if inline == 'true':
1459 document.body[i:(j+1)] = [r'\begin_inset ERT',
1460 'status %s' % status,
1461 r'\begin_layout Standard',
1465 'lstinline%s{%s}' % (params, inlinecode),
1470 document.body[i: j+1] = [r'\begin_inset ERT',
1471 'status %s' % status,
1473 r'\begin_layout Standard',
1477 r'begin{lstlisting}%s' % params,
1479 ] + document.body[k : j - 1] + \
1481 r'\begin_layout Standard',
1490 def revert_include_listings(document):
1491 r''' Revert lstinputlisting Include option , translate
1492 \begin_inset Include \lstinputlisting{file}[opt]
1502 \begin_layout Standard
1506 lstinputlisting{file}[opt]
1514 i = find_token(document.body, r'\begin_inset Include \lstinputlisting', i)
1518 if not '\\usepackage{listings}' in document.preamble:
1519 document.preamble.append('\\usepackage{listings}')
1520 j = find_end_of_inset(document.body, i + 1)
1522 # this should not happen
1525 cmd = document.body[i].split()[2]
1526 document.body[i : j + 1] = [r'\begin_inset ERT',
1529 r'\begin_layout Standard',
1543 supported_versions = ["1.5.0","1.5"]
1544 convert = [[246, []],
1545 [247, [convert_font_settings]],
1547 [249, [convert_utf8]],
1550 [252, [convert_commandparams, convert_bibitem]],
1552 [254, [convert_esint]],
1555 [257, [convert_caption]],
1556 [258, [convert_lyxline]],
1557 [259, [convert_accent, normalize_font_whitespace]],
1559 [261, [convert_changes]],
1561 [263, [normalize_language_name]],
1562 [264, [convert_cv_textclass]],
1563 [265, [convert_tableborder]],
1569 revert = [[269, [revert_beamer_alert, revert_beamer_structure]],
1570 [268, [revert_preamble_listings_params, revert_listings_inset, revert_include_listings]],
1571 [267, [revert_CJK]],
1572 [266, [revert_utf8plain]],
1573 [265, [revert_armenian]],
1574 [264, [revert_tableborder]],
1575 [263, [revert_cv_textclass]],
1576 [262, [revert_language_name]],
1577 [261, [revert_ascii]],
1579 [259, [revert_utf8x]],
1582 [256, [revert_caption]],
1583 [255, [revert_encodings]],
1584 [254, [revert_clearpage, revert_cleardoublepage]],
1585 [253, [revert_esint]],
1586 [252, [revert_nomenclature, revert_printnomenclature]],
1587 [251, [revert_commandparams]],
1588 [250, [revert_cs_label]],
1590 [248, [revert_accent, revert_utf8]],
1591 [247, [revert_booktabs]],
1592 [246, [revert_font_settings]],
1593 [245, [revert_framed]]]
1596 if __name__ == "__main__":