1 # This file is part of lyx2lyx
2 # -*- coding: utf-8 -*-
3 # Copyright (C) 2006 José Matos <jamatos@lyx.org>
4 # Copyright (C) 2004-2006 Georg Baum <Georg.Baum@post.rwth-aachen.de>
6 # This program is free software; you can redistribute it and/or
7 # modify it under the terms of the GNU General Public License
8 # as published by the Free Software Foundation; either version 2
9 # of the License, or (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
20 """ Convert files to the file format generated by lyx 1.5"""
25 from parser_tools import find_re, find_token, find_token_backwards, find_token_exact, find_tokens, find_end_of, get_value
26 from LyX import get_encoding
29 ####################################################################
30 # Private helper functions
32 def find_end_of_inset(lines, i):
33 " Find end of inset, where lines[i] is included."
34 return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
36 def find_end_of_layout(lines, i):
37 " Find end of layout, where lines[i] is included."
38 return find_end_of(lines, i, "\\begin_layout", "\\end_layout")
40 # End of helper functions
41 ####################################################################
45 # Notes: Framed/Shaded
48 def revert_framed(document):
49 "Revert framed notes. "
52 i = find_tokens(document.body, ["\\begin_inset Note Framed", "\\begin_inset Note Shaded"], i)
56 document.body[i] = "\\begin_inset Note"
64 roman_fonts = {'default' : 'default', 'ae' : 'ae',
65 'times' : 'times', 'palatino' : 'palatino',
66 'helvet' : 'default', 'avant' : 'default',
67 'newcent' : 'newcent', 'bookman' : 'bookman',
69 sans_fonts = {'default' : 'default', 'ae' : 'default',
70 'times' : 'default', 'palatino' : 'default',
71 'helvet' : 'helvet', 'avant' : 'avant',
72 'newcent' : 'default', 'bookman' : 'default',
74 typewriter_fonts = {'default' : 'default', 'ae' : 'default',
75 'times' : 'default', 'palatino' : 'default',
76 'helvet' : 'default', 'avant' : 'default',
77 'newcent' : 'default', 'bookman' : 'default',
78 'pslatex' : 'courier'}
80 def convert_font_settings(document):
81 " Convert font settings. "
83 i = find_token_exact(document.header, "\\fontscheme", i)
85 document.warning("Malformed LyX document: Missing `\\fontscheme'.")
87 font_scheme = get_value(document.header, "\\fontscheme", i, i + 1)
89 document.warning("Malformed LyX document: Empty `\\fontscheme'.")
90 font_scheme = 'default'
91 if not font_scheme in roman_fonts.keys():
92 document.warning("Malformed LyX document: Unknown `\\fontscheme' `%s'." % font_scheme)
93 font_scheme = 'default'
94 document.header[i:i+1] = ['\\font_roman %s' % roman_fonts[font_scheme],
95 '\\font_sans %s' % sans_fonts[font_scheme],
96 '\\font_typewriter %s' % typewriter_fonts[font_scheme],
97 '\\font_default_family default',
100 '\\font_sf_scale 100',
101 '\\font_tt_scale 100']
104 def revert_font_settings(document):
105 " Revert font settings. "
108 fonts = {'roman' : 'default', 'sans' : 'default', 'typewriter' : 'default'}
109 for family in 'roman', 'sans', 'typewriter':
110 name = '\\font_%s' % family
111 i = find_token_exact(document.header, name, i)
113 document.warning("Malformed LyX document: Missing `%s'." % name)
116 if (insert_line < 0):
118 fonts[family] = get_value(document.header, name, i, i + 1)
119 del document.header[i]
120 i = find_token_exact(document.header, '\\font_default_family', i)
122 document.warning("Malformed LyX document: Missing `\\font_default_family'.")
123 font_default_family = 'default'
125 font_default_family = get_value(document.header, "\\font_default_family", i, i + 1)
126 del document.header[i]
127 i = find_token_exact(document.header, '\\font_sc', i)
129 document.warning("Malformed LyX document: Missing `\\font_sc'.")
132 font_sc = get_value(document.header, '\\font_sc', i, i + 1)
133 del document.header[i]
134 if font_sc != 'false':
135 document.warning("Conversion of '\\font_sc' not yet implemented.")
136 i = find_token_exact(document.header, '\\font_osf', i)
138 document.warning("Malformed LyX document: Missing `\\font_osf'.")
141 font_osf = get_value(document.header, '\\font_osf', i, i + 1)
142 del document.header[i]
143 i = find_token_exact(document.header, '\\font_sf_scale', i)
145 document.warning("Malformed LyX document: Missing `\\font_sf_scale'.")
146 font_sf_scale = '100'
148 font_sf_scale = get_value(document.header, '\\font_sf_scale', i, i + 1)
149 del document.header[i]
150 if font_sf_scale != '100':
151 document.warning("Conversion of '\\font_sf_scale' not yet implemented.")
152 i = find_token_exact(document.header, '\\font_tt_scale', i)
154 document.warning("Malformed LyX document: Missing `\\font_tt_scale'.")
155 font_tt_scale = '100'
157 font_tt_scale = get_value(document.header, '\\font_tt_scale', i, i + 1)
158 del document.header[i]
159 if font_tt_scale != '100':
160 document.warning("Conversion of '\\font_tt_scale' not yet implemented.")
161 for font_scheme in roman_fonts.keys():
162 if (roman_fonts[font_scheme] == fonts['roman'] and
163 sans_fonts[font_scheme] == fonts['sans'] and
164 typewriter_fonts[font_scheme] == fonts['typewriter']):
165 document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
166 if font_default_family != 'default':
167 document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
168 if font_osf == 'true':
169 document.warning("Ignoring `\\font_osf = true'")
171 font_scheme = 'default'
172 document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
173 if fonts['roman'] == 'cmr':
174 document.preamble.append('\\renewcommand{\\rmdefault}{cmr}')
175 if font_osf == 'true':
176 document.preamble.append('\\usepackage{eco}')
178 for font in 'lmodern', 'charter', 'utopia', 'beraserif', 'ccfonts', 'chancery':
179 if fonts['roman'] == font:
180 document.preamble.append('\\usepackage{%s}' % font)
181 for font in 'cmss', 'lmss', 'cmbr':
182 if fonts['sans'] == font:
183 document.preamble.append('\\renewcommand{\\sfdefault}{%s}' % font)
184 for font in 'berasans':
185 if fonts['sans'] == font:
186 document.preamble.append('\\usepackage{%s}' % font)
187 for font in 'cmtt', 'lmtt', 'cmtl':
188 if fonts['typewriter'] == font:
189 document.preamble.append('\\renewcommand{\\ttdefault}{%s}' % font)
190 for font in 'courier', 'beramono', 'luximono':
191 if fonts['typewriter'] == font:
192 document.preamble.append('\\usepackage{%s}' % font)
193 if font_default_family != 'default':
194 document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
195 if font_osf == 'true':
196 document.warning("Ignoring `\\font_osf = true'")
199 def revert_booktabs(document):
200 " We remove the booktabs flag or everything else will become a mess. "
201 re_row = re.compile(r'^<row.*space="[^"]+".*>$')
202 re_tspace = re.compile(r'\s+topspace="[^"]+"')
203 re_bspace = re.compile(r'\s+bottomspace="[^"]+"')
204 re_ispace = re.compile(r'\s+interlinespace="[^"]+"')
207 i = find_token(document.body, "\\begin_inset Tabular", i)
210 j = find_end_of_inset(document.body, i + 1)
212 document.warning("Malformed LyX document: Could not find end of tabular.")
214 for k in range(i, j):
215 if re.search('^<features.* booktabs="true".*>$', document.body[k]):
216 document.warning("Converting 'booktabs' table to normal table.")
217 document.body[k] = document.body[k].replace(' booktabs="true"', '')
218 if re.search(re_row, document.body[k]):
219 document.warning("Removing extra row space.")
220 document.body[k] = re_tspace.sub('', document.body[k])
221 document.body[k] = re_bspace.sub('', document.body[k])
222 document.body[k] = re_ispace.sub('', document.body[k])
226 def convert_multiencoding(document, forward):
227 """ Fix files with multiple encodings.
228 Files with an inputencoding of "auto" or "default" and multiple languages
229 where at least two languages have different default encodings are encoded
230 in multiple encodings for file formats < 249. These files are incorrectly
231 read and written (as if the whole file was in the encoding of the main
233 This is not true for files written by CJK-LyX, they are always in the locale
237 - converts from fake unicode values to true unicode if forward is true, and
238 - converts from true unicode values to fake unicode if forward is false.
239 document.encoding must be set to the old value (format 248) in both cases.
241 We do this here and not in LyX.py because it is far easier to do the
242 necessary parsing in modern formats than in ancient ones.
244 if document.cjk_encoding != '':
246 encoding_stack = [document.encoding]
247 lang_re = re.compile(r"^\\lang\s(\S+)")
248 if document.inputencoding == "auto" or document.inputencoding == "default":
249 for i in range(len(document.body)):
250 result = lang_re.match(document.body[i])
252 language = result.group(1)
253 if language == "default":
254 document.warning("Resetting encoding from %s to %s." % (encoding_stack[-1], document.encoding))
255 encoding_stack[-1] = document.encoding
257 from lyx2lyx_lang import lang
258 document.warning("Setting encoding from %s to %s." % (encoding_stack[-1], lang[language][3]))
259 encoding_stack[-1] = lang[language][3]
260 elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
261 document.warning("Adding nested encoding %s." % encoding_stack[-1])
262 encoding_stack.append(encoding_stack[-1])
263 elif find_token(document.body, "\\end_layout", i, i + 1) == i:
264 document.warning("Removing nested encoding %s." % encoding_stack[-1])
265 del encoding_stack[-1]
266 if encoding_stack[-1] != document.encoding:
268 # This line has been incorrectly interpreted as if it was
269 # encoded in 'encoding'.
270 # Convert back to the 8bit string that was in the file.
271 orig = document.body[i].encode(document.encoding)
272 # Convert the 8bit string that was in the file to unicode
273 # with the correct encoding.
274 document.body[i] = orig.decode(encoding_stack[-1])
276 # Convert unicode to the 8bit string that will be written
277 # to the file with the correct encoding.
278 orig = document.body[i].encode(encoding_stack[-1])
279 # Convert the 8bit string that will be written to the
280 # file to fake unicode with the encoding that will later
281 # be used when writing to the file.
282 document.body[i] = orig.decode(document.encoding)
285 def convert_utf8(document):
286 " Set document encoding to UTF-8. "
287 convert_multiencoding(document, True)
288 document.encoding = "utf8"
291 def revert_utf8(document):
292 " Set document encoding to the value corresponding to inputencoding. "
293 i = find_token(document.header, "\\inputencoding", 0)
295 document.header.append("\\inputencoding auto")
296 elif get_value(document.header, "\\inputencoding", i) == "utf8":
297 document.header[i] = "\\inputencoding auto"
298 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
299 document.encoding = get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)
300 convert_multiencoding(document, False)
303 def revert_cs_label(document):
304 " Remove status flag of charstyle label. "
307 i = find_token(document.body, "\\begin_inset CharStyle", i)
310 # Seach for a line starting 'show_label'
311 # If it is not there, break with a warning message
314 if (document.body[i][:10] == "show_label"):
317 elif (document.body[i][:13] == "\\begin_layout"):
318 document.warning("Malformed LyX document: Missing 'show_label'.")
325 def convert_bibitem(document):
327 \bibitem [option]{argument}
331 \begin_inset LatexCommand bibitem
337 This must be called after convert_commandparams.
339 regex = re.compile(r'\S+\s*(\[[^\[\{]*\])?(\{[^}]*\})')
342 i = find_token(document.body, "\\bibitem", i)
345 match = re.match(regex, document.body[i])
346 option = match.group(1)
347 argument = match.group(2)
348 lines = ['\\begin_inset LatexCommand bibitem']
350 lines.append('label "%s"' % option[1:-1].replace('"', '\\"'))
351 lines.append('key "%s"' % argument[1:-1].replace('"', '\\"'))
353 lines.append('\\end_inset')
354 document.body[i:i+1] = lines
358 commandparams_info = {
359 # command : [option1, option2, argument]
360 "bibitem" : ["label", "", "key"],
361 "bibtex" : ["options", "btprint", "bibfiles"],
362 "cite" : ["after", "before", "key"],
363 "citet" : ["after", "before", "key"],
364 "citep" : ["after", "before", "key"],
365 "citealt" : ["after", "before", "key"],
366 "citealp" : ["after", "before", "key"],
367 "citeauthor" : ["after", "before", "key"],
368 "citeyear" : ["after", "before", "key"],
369 "citeyearpar" : ["after", "before", "key"],
370 "citet*" : ["after", "before", "key"],
371 "citep*" : ["after", "before", "key"],
372 "citealt*" : ["after", "before", "key"],
373 "citealp*" : ["after", "before", "key"],
374 "citeauthor*" : ["after", "before", "key"],
375 "Citet" : ["after", "before", "key"],
376 "Citep" : ["after", "before", "key"],
377 "Citealt" : ["after", "before", "key"],
378 "Citealp" : ["after", "before", "key"],
379 "Citeauthor" : ["after", "before", "key"],
380 "Citet*" : ["after", "before", "key"],
381 "Citep*" : ["after", "before", "key"],
382 "Citealt*" : ["after", "before", "key"],
383 "Citealp*" : ["after", "before", "key"],
384 "Citeauthor*" : ["after", "before", "key"],
385 "citefield" : ["after", "before", "key"],
386 "citetitle" : ["after", "before", "key"],
387 "cite*" : ["after", "before", "key"],
388 "hfill" : ["", "", ""],
389 "index" : ["", "", "name"],
390 "printindex" : ["", "", "name"],
391 "label" : ["", "", "name"],
392 "eqref" : ["name", "", "reference"],
393 "pageref" : ["name", "", "reference"],
394 "prettyref" : ["name", "", "reference"],
395 "ref" : ["name", "", "reference"],
396 "vpageref" : ["name", "", "reference"],
397 "vref" : ["name", "", "reference"],
398 "tableofcontents" : ["", "", "type"],
399 "htmlurl" : ["name", "", "target"],
400 "url" : ["name", "", "target"]}
403 def convert_commandparams(document):
406 \begin_inset LatexCommand \cmdname[opt1][opt2]{arg}
411 \begin_inset LatexCommand cmdname
417 name1, name2 and name3 can be different for each command.
419 # \begin_inset LatexCommand bibitem was not the official version (see
420 # convert_bibitem()), but could be read in, so we convert it here, too.
424 i = find_token(document.body, "\\begin_inset LatexCommand", i)
427 command = document.body[i][26:].strip()
429 document.warning("Malformed LyX document: Missing LatexCommand name.")
433 # The following parser is taken from the original InsetCommandParams::scanCommand
439 # Used to handle things like \command[foo[bar]]{foo{bar}}
443 if ((state == "CMDNAME" and c == ' ') or
444 (state == "CMDNAME" and c == '[') or
445 (state == "CMDNAME" and c == '{')):
447 if ((state == "OPTION" and c == ']') or
448 (state == "SECOPTION" and c == ']') or
449 (state == "CONTENT" and c == '}')):
453 nestdepth = nestdepth - 1
454 if ((state == "OPTION" and c == '[') or
455 (state == "SECOPTION" and c == '[') or
456 (state == "CONTENT" and c == '{')):
457 nestdepth = nestdepth + 1
458 if state == "CMDNAME":
460 elif state == "OPTION":
462 elif state == "SECOPTION":
464 elif state == "CONTENT":
469 elif c == '[' and b != ']':
471 nestdepth = 0 # Just to be sure
472 elif c == '[' and b == ']':
474 nestdepth = 0 # Just to be sure
477 nestdepth = 0 # Just to be sure
480 # Now we have parsed the command, output the parameters
481 lines = ["\\begin_inset LatexCommand %s" % name]
483 if commandparams_info[name][0] == "":
484 document.warning("Ignoring invalid option `%s' of command `%s'." % (option1, name))
486 lines.append('%s "%s"' % (commandparams_info[name][0], option1.replace('"', '\\"')))
488 if commandparams_info[name][1] == "":
489 document.warning("Ignoring invalid second option `%s' of command `%s'." % (option2, name))
491 lines.append('%s "%s"' % (commandparams_info[name][1], option2.replace('"', '\\"')))
493 if commandparams_info[name][2] == "":
494 document.warning("Ignoring invalid argument `%s' of command `%s'." % (argument, name))
496 lines.append('%s "%s"' % (commandparams_info[name][2], argument.replace('"', '\\"')))
497 document.body[i:i+1] = lines
501 def revert_commandparams(document):
502 regex = re.compile(r'(\S+)\s+(.+)')
505 i = find_token(document.body, "\\begin_inset LatexCommand", i)
508 name = document.body[i].split()[2]
509 j = find_end_of_inset(document.body, i + 1)
514 for k in range(i + 1, j):
515 match = re.match(regex, document.body[k])
517 pname = match.group(1)
518 pvalue = match.group(2)
519 if pname == "preview":
520 preview_line = document.body[k]
521 elif (commandparams_info[name][0] != "" and
522 pname == commandparams_info[name][0]):
523 option1 = pvalue.strip('"').replace('\\"', '"')
524 elif (commandparams_info[name][1] != "" and
525 pname == commandparams_info[name][1]):
526 option2 = pvalue.strip('"').replace('\\"', '"')
527 elif (commandparams_info[name][2] != "" and
528 pname == commandparams_info[name][2]):
529 argument = pvalue.strip('"').replace('\\"', '"')
530 elif document.body[k].strip() != "":
531 document.warning("Ignoring unknown contents `%s' in command inset %s." % (document.body[k], name))
532 if name == "bibitem":
534 lines = ["\\bibitem {%s}" % argument]
536 lines = ["\\bibitem [%s]{%s}" % (option1, argument)]
540 lines = ["\\begin_inset LatexCommand \\%s{%s}" % (name, argument)]
542 lines = ["\\begin_inset LatexCommand \\%s[][%s]{%s}" % (name, option2, argument)]
545 lines = ["\\begin_inset LatexCommand \\%s[%s]{%s}" % (name, option1, argument)]
547 lines = ["\\begin_inset LatexCommand \\%s[%s][%s]{%s}" % (name, option1, option2, argument)]
548 if name != "bibitem":
549 if preview_line != "":
550 lines.append(preview_line)
552 lines.append('\\end_inset')
553 document.body[i:j+1] = lines
557 def revert_nomenclature(document):
558 " Convert nomenclature entry to ERT. "
559 regex = re.compile(r'(\S+)\s+(.+)')
563 i = find_token(document.body, "\\begin_inset LatexCommand nomenclature", i)
567 j = find_end_of_inset(document.body, i + 1)
572 for k in range(i + 1, j):
573 match = re.match(regex, document.body[k])
575 name = match.group(1)
576 value = match.group(2)
577 if name == "preview":
578 preview_line = document.body[k]
579 elif name == "symbol":
580 symbol = value.strip('"').replace('\\"', '"')
581 elif name == "description":
582 description = value.strip('"').replace('\\"', '"')
583 elif name == "prefix":
584 prefix = value.strip('"').replace('\\"', '"')
585 elif document.body[k].strip() != "":
586 document.warning("Ignoring unknown contents `%s' in nomenclature inset." % document.body[k])
588 command = 'nomenclature{%s}{%s}' % (symbol, description)
590 command = 'nomenclature[%s]{%s}{%s}' % (prefix, symbol, description)
591 document.body[i:j+1] = ['\\begin_inset ERT',
594 '\\begin_layout %s' % document.default_layout,
603 if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
604 document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
605 document.preamble.append('\\makenomenclature')
608 def revert_printnomenclature(document):
609 " Convert printnomenclature to ERT. "
610 regex = re.compile(r'(\S+)\s+(.+)')
614 i = find_token(document.body, "\\begin_inset LatexCommand printnomenclature", i)
618 j = find_end_of_inset(document.body, i + 1)
621 for k in range(i + 1, j):
622 match = re.match(regex, document.body[k])
624 name = match.group(1)
625 value = match.group(2)
626 if name == "preview":
627 preview_line = document.body[k]
628 elif name == "labelwidth":
629 labelwidth = value.strip('"').replace('\\"', '"')
630 elif document.body[k].strip() != "":
631 document.warning("Ignoring unknown contents `%s' in printnomenclature inset." % document.body[k])
633 command = 'nomenclature{}'
635 command = 'nomenclature[%s]' % labelwidth
636 document.body[i:j+1] = ['\\begin_inset ERT',
639 '\\begin_layout %s' % document.default_layout,
648 if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
649 document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
650 document.preamble.append('\\makenomenclature')
653 def convert_esint(document):
654 " Add \\use_esint setting to header. "
655 i = find_token(document.header, "\\cite_engine", 0)
657 document.warning("Malformed LyX document: Missing `\\cite_engine'.")
659 # 0 is off, 1 is auto, 2 is on.
660 document.header.insert(i, '\\use_esint 0')
663 def revert_esint(document):
664 " Remove \\use_esint setting from header. "
665 i = find_token(document.header, "\\use_esint", 0)
667 document.warning("Malformed LyX document: Missing `\\use_esint'.")
669 use_esint = document.header[i].split()[1]
670 del document.header[i]
671 # 0 is off, 1 is auto, 2 is on.
673 document.preamble.append('\\usepackage{esint}')
676 def revert_clearpage(document):
680 i = find_token(document.body, "\\clearpage", i)
683 document.body[i:i+1] = ['\\begin_inset ERT',
686 '\\begin_layout %s' % document.default_layout,
697 def revert_cleardoublepage(document):
698 " cleardoublepage -> ERT "
701 i = find_token(document.body, "\\cleardoublepage", i)
704 document.body[i:i+1] = ['\\begin_inset ERT',
707 '\\begin_layout %s' % document.default_layout,
718 def convert_lyxline(document):
719 " remove fontsize commands for \lyxline "
720 # The problematic is: The old \lyxline definition doesn't handle the fontsize
721 # to change the line thickness. The new definiton does this so that imported
722 # \lyxlines would have a different line thickness. The eventual fontsize command
723 # before \lyxline is therefore removed to get the same output.
724 fontsizes = ["tiny", "scriptsize", "footnotesize", "small", "normalsize",
725 "large", "Large", "LARGE", "huge", "Huge"]
726 for n in range(0, len(fontsizes)):
729 while i < len(document.body):
730 i = find_token(document.body, "\\size " + fontsizes[n], i)
731 k = find_token(document.body, "\\lyxline",i)
732 # the corresponding fontsize command is always 2 lines before the \lyxline
733 if (i != -1 and k == i+2):
734 document.body[i:i+1] = []
740 def revert_encodings(document):
741 " Set new encodings to auto. "
742 encodings = ["8859-6", "8859-8", "cp437", "cp437de", "cp850", "cp852",
743 "cp855", "cp858", "cp862", "cp865", "cp866", "cp1250",
744 "cp1252", "cp1256", "cp1257", "latin10", "pt254", "tis620-0"]
745 i = find_token(document.header, "\\inputencoding", 0)
747 document.header.append("\\inputencoding auto")
749 inputenc = get_value(document.header, "\\inputencoding", i)
750 if inputenc in encodings:
751 document.header[i] = "\\inputencoding auto"
752 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
755 def convert_caption(document):
756 " Convert caption layouts to caption insets. "
759 i = find_token(document.body, "\\begin_layout Caption", i)
762 j = find_end_of_layout(document.body, i)
764 document.warning("Malformed LyX document: Missing `\\end_layout'.")
767 document.body[j:j] = ["\\end_layout", "", "\\end_inset", "", ""]
768 document.body[i:i+1] = ["\\begin_layout %s" % document.default_layout,
769 "\\begin_inset Caption", "",
770 "\\begin_layout %s" % document.default_layout]
774 def revert_caption(document):
775 " Convert caption insets to caption layouts. "
776 " This assumes that the text class has a caption style. "
779 i = find_token(document.body, "\\begin_inset Caption", i)
783 # We either need to delete the previous \begin_layout line, or we
784 # need to end the previous layout if this inset is not in the first
785 # position of the paragraph.
786 layout_before = find_token_backwards(document.body, "\\begin_layout", i)
787 if layout_before == -1:
788 document.warning("Malformed LyX document: Missing `\\begin_layout'.")
790 layout_line = document.body[layout_before]
791 del_layout_before = True
792 l = layout_before + 1
794 if document.body[l] != "":
795 del_layout_before = False
798 if del_layout_before:
799 del document.body[layout_before:i]
802 document.body[i:i] = ["\\end_layout", ""]
805 # Find start of layout in the inset and end of inset
806 j = find_token(document.body, "\\begin_layout", i)
808 document.warning("Malformed LyX document: Missing `\\begin_layout'.")
810 k = find_end_of_inset(document.body, i)
812 document.warning("Malformed LyX document: Missing `\\end_inset'.")
815 # We either need to delete the following \end_layout line, or we need
816 # to restart the old layout if this inset is not at the paragraph end.
817 layout_after = find_token(document.body, "\\end_layout", k)
818 if layout_after == -1:
819 document.warning("Malformed LyX document: Missing `\\end_layout'.")
821 del_layout_after = True
823 while l < layout_after:
824 if document.body[l] != "":
825 del_layout_after = False
829 del document.body[k+1:layout_after+1]
831 document.body[k+1:k+1] = [layout_line, ""]
833 # delete \begin_layout and \end_inset and replace \begin_inset with
834 # "\begin_layout Caption". This works because we can only have one
835 # paragraph in the caption inset: The old \end_layout will be recycled.
837 if document.body[k] == "":
840 if document.body[j] == "":
842 document.body[i] = "\\begin_layout Caption"
843 if document.body[i+1] == "":
844 del document.body[i+1]
848 # Accents of InsetLaTeXAccent
850 "`" : u'\u0300', # grave
851 "'" : u'\u0301', # acute
852 "^" : u'\u0302', # circumflex
853 "~" : u'\u0303', # tilde
854 "=" : u'\u0304', # macron
855 "u" : u'\u0306', # breve
856 "." : u'\u0307', # dot above
857 "\"": u'\u0308', # diaresis
858 "r" : u'\u030a', # ring above
859 "H" : u'\u030b', # double acute
860 "v" : u'\u030c', # caron
861 "b" : u'\u0320', # minus sign below
862 "d" : u'\u0323', # dot below
863 "c" : u'\u0327', # cedilla
864 "k" : u'\u0328', # ogonek
865 "t" : u'\u0361' # tie. This is special: It spans two characters, but
866 # only one is given as argument, so we don't need to
867 # treat it differently.
871 # special accents of InsetLaTeXAccent without argument
872 special_accent_map = {
873 'i' : u'\u0131', # dotless i
874 'j' : u'\u0237', # dotless j
875 'l' : u'\u0142', # l with stroke
876 'L' : u'\u0141' # L with stroke
880 # special accent arguments of InsetLaTeXAccent
882 '\\i' : u'\u0131', # dotless i
883 '\\j' : u'\u0237' # dotless j
887 def _convert_accent(accent, accented_char):
891 if type in special_accent_map:
892 return special_accent_map[type]
893 # a missing char is treated as space by LyX
895 elif type == 'q' and char in ['t', 'd', 'l', 'L']:
896 # Special caron, only used with t, d, l and L.
897 # It is not in the map because we convert it to the same unicode
898 # character as the normal caron: \q{} is only defined if babel with
899 # the czech or slovak language is used, and the normal caron
900 # produces the correct output if the T1 font encoding is used.
901 # For the same reason we never convert to \q{} in the other direction.
903 elif char in accented_map:
904 char = accented_map[char]
905 elif (len(char) > 1):
906 # We can only convert accents on a single char
908 a = accent_map.get(type)
910 return unicodedata.normalize("NFKC", "%s%s" % (char, a))
914 def convert_ertbackslash(body, i, ert, default_layout):
915 r""" -------------------------------------------------------------------------------------------
916 Convert backslashes and '\n' into valid ERT code, append the converted
917 text to body[i] and return the (maybe incremented) line index i"""
921 body[i] = body[i] + '\\backslash '
925 body[i+1:i+1] = ['\\end_layout', '', '\\begin_layout %s' % default_layout, '']
928 body[i] = body[i] + c
932 def convert_accent(document):
933 # The following forms are supported by LyX:
934 # '\i \"{a}' (standard form, as written by LyX)
935 # '\i \"{}' (standard form, as written by LyX if the accented char is a space)
936 # '\i \"{ }' (also accepted if the accented char is a space)
937 # '\i \" a' (also accepted)
938 # '\i \"' (also accepted)
939 re_wholeinset = re.compile(r'^(.*)(\\i\s+)(.*)$')
940 re_contents = re.compile(r'^([^\s{]+)(.*)$')
941 re_accentedcontents = re.compile(r'^\s*{?([^{}]*)}?\s*$')
944 i = find_re(document.body, re_wholeinset, i)
947 match = re_wholeinset.match(document.body[i])
948 prefix = match.group(1)
949 contents = match.group(3).strip()
950 match = re_contents.match(contents)
952 # Strip first char (always \)
953 accent = match.group(1)[1:]
954 accented_contents = match.group(2).strip()
955 match = re_accentedcontents.match(accented_contents)
956 accented_char = match.group(1)
957 converted = _convert_accent(accent, accented_char)
960 contents = '%s{%s}' % (accent, accented_char),
962 document.body[i] = '%s%s' % (prefix, converted)
965 document.warning("Converting unknown InsetLaTeXAccent `\\i %s' to ERT." % contents)
966 document.body[i] = prefix
967 document.body[i+1:i+1] = ['\\begin_inset ERT',
970 '\\begin_layout %s' % document.default_layout,
974 i = convert_ertbackslash(document.body, i + 7,
976 document.default_layout)
977 document.body[i+1:i+1] = ['\\end_layout',
983 def revert_accent(document):
984 inverse_accent_map = {}
986 inverse_accent_map[accent_map[k]] = k
987 inverse_special_accent_map = {}
988 for k in special_accent_map:
989 inverse_special_accent_map[special_accent_map[k]] = k
990 inverse_accented_map = {}
991 for k in accented_map:
992 inverse_accented_map[accented_map[k]] = k
994 # Since LyX may insert a line break within a word we must combine all
995 # words before unicode normalization.
996 # We do this only if the next line starts with an accent, otherwise we
997 # would create things like '\begin_inset ERTstatus'.
998 numberoflines = len(document.body)
999 for i in range(numberoflines-1):
1000 if document.body[i] == '' or document.body[i+1] == '' or document.body[i][-1] == ' ':
1002 if (document.body[i+1][0] in inverse_accent_map):
1003 # the last character of this line and the first of the next line
1004 # form probably a surrogate pair.
1005 while (len(document.body[i+1]) > 0 and document.body[i+1][0] != ' '):
1006 document.body[i] += document.body[i+1][0]
1007 document.body[i+1] = document.body[i+1][1:]
1009 # Normalize to "Normal form D" (NFD, also known as canonical decomposition).
1010 # This is needed to catch all accented characters.
1011 for i in range(numberoflines):
1012 # Unfortunately we have a mixture of unicode strings and plain strings,
1013 # because we never use u'xxx' for string literals, but 'xxx'.
1014 # Therefore we may have to try two times to normalize the data.
1016 document.body[i] = unicodedata.normalize("NFKD", document.body[i])
1018 document.body[i] = unicodedata.normalize("NFKD", unicode(document.body[i], 'utf-8'))
1020 # Replace accented characters with InsetLaTeXAccent
1021 # Do not convert characters that can be represented in the chosen
1023 encoding_stack = [get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)]
1024 lang_re = re.compile(r"^\\lang\s(\S+)")
1025 for i in range(len(document.body)):
1027 if (document.inputencoding == "auto" or document.inputencoding == "default") and document.cjk_encoding != '':
1028 # Track the encoding of the current line
1029 result = lang_re.match(document.body[i])
1031 language = result.group(1)
1032 if language == "default":
1033 encoding_stack[-1] = document.encoding
1035 from lyx2lyx_lang import lang
1036 encoding_stack[-1] = lang[language][3]
1038 elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
1039 encoding_stack.append(encoding_stack[-1])
1041 elif find_token(document.body, "\\end_layout", i, i + 1) == i:
1042 del encoding_stack[-1]
1045 for j in range(len(document.body[i])):
1046 # dotless i and dotless j are both in special_accent_map and can
1047 # occur as an accented character, so we need to test that the
1048 # following character is no accent
1049 if (document.body[i][j] in inverse_special_accent_map and
1050 (j == len(document.body[i]) - 1 or document.body[i][j+1] not in inverse_accent_map)):
1051 accent = document.body[i][j]
1053 dummy = accent.encode(encoding_stack[-1])
1054 except UnicodeEncodeError:
1055 # Insert the rest of the line as new line
1056 if j < len(document.body[i]) - 1:
1057 document.body[i+1:i+1] = document.body[i][j+1:]
1058 # Delete the accented character
1060 document.body[i] = document.body[i][:j-1]
1062 document.body[i] = u''
1063 # Finally add the InsetLaTeXAccent
1064 document.body[i] += "\\i \\%s{}" % inverse_special_accent_map[accent]
1066 elif j > 0 and document.body[i][j] in inverse_accent_map:
1067 accented_char = document.body[i][j-1]
1068 if accented_char == ' ':
1069 # Conform to LyX output
1071 elif accented_char in inverse_accented_map:
1072 accented_char = inverse_accented_map[accented_char]
1073 accent = document.body[i][j]
1075 dummy = unicodedata.normalize("NFKC", accented_char + accent).encode(encoding_stack[-1])
1076 except UnicodeEncodeError:
1077 # Insert the rest of the line as new line
1078 if j < len(document.body[i]) - 1:
1079 document.body[i+1:i+1] = document.body[i][j+1:]
1080 # Delete the accented characters
1082 document.body[i] = document.body[i][:j-2]
1084 document.body[i] = u''
1085 # Finally add the InsetLaTeXAccent
1086 document.body[i] += "\\i \\%s{%s}" % (inverse_accent_map[accent], accented_char)
1088 # Normalize to "Normal form C" (NFC, pre-composed characters) again
1089 for i in range(numberoflines):
1090 document.body[i] = unicodedata.normalize("NFKC", document.body[i])
1093 def normalize_font_whitespace(document):
1094 """ Before format 259 the font changes were ignored if a
1095 whitespace was the first or last character in the sequence, this function
1096 transfers the whitespace outside."""
1098 if document.backend != "latex":
1101 lines = document.body
1103 char_properties = {"\\series": "default",
1104 "\\emph": "default",
1106 "\\shape": "default",
1108 "\\family": "default"}
1112 while i < len(lines):
1113 words = lines[i].split()
1115 if len(words) > 0 and words[0] == "\\begin_layout":
1116 # a new paragraph resets all font changes
1119 elif len(words) > 1 and words[0] in char_properties.keys():
1120 # we have a font change
1121 if char_properties[words[0]] == words[1]:
1122 # property gets reset
1123 if words[0] in changes.keys():
1124 del changes[words[0]]
1125 defaultproperty = True
1128 changes[words[0]] = words[1]
1129 defaultproperty = False
1131 # We need to explicitly reset all changed properties if we find
1132 # a space below, because LyX 1.4 would output the space after
1133 # closing the previous change and before starting the new one,
1134 # and closing a font change means to close all properties, not
1135 # just the changed one.
1137 if lines[i-1] and lines[i-1][-1] == " ":
1138 lines[i-1] = lines[i-1][:-1]
1139 # a space before the font change
1141 for k in changes.keys():
1142 # exclude property k because that is already in lines[i]
1144 added_lines[1:1] = ["%s %s" % (k, changes[k])]
1145 for k in changes.keys():
1146 # exclude property k because that must be added below anyway
1148 added_lines[0:0] = ["%s %s" % (k, char_properties[k])]
1150 # Property is reset in lines[i], so add the new stuff afterwards
1151 lines[i+1:i+1] = added_lines
1153 # Reset property for the space
1154 added_lines[0:0] = ["%s %s" % (words[0], char_properties[words[0]])]
1155 lines[i:i] = added_lines
1156 i = i + len(added_lines)
1158 elif lines[i+1] and lines[i+1][0] == " " and (len(changes) > 0 or not defaultproperty):
1159 # a space after the font change
1160 if (lines[i+1] == " " and lines[i+2]):
1161 next_words = lines[i+2].split()
1162 if len(next_words) > 0 and next_words[0] == words[0]:
1163 # a single blank with a property different from the
1164 # previous and the next line must not be changed
1167 lines[i+1] = lines[i+1][1:]
1169 for k in changes.keys():
1170 # exclude property k because that is already in lines[i]
1172 added_lines[1:1] = ["%s %s" % (k, changes[k])]
1173 for k in changes.keys():
1174 # exclude property k because that must be added below anyway
1176 added_lines[0:0] = ["%s %s" % (k, char_properties[k])]
1177 # Reset property for the space
1178 added_lines[0:0] = ["%s %s" % (words[0], char_properties[words[0]])]
1179 lines[i:i] = added_lines
1180 i = i + len(added_lines)
1188 supported_versions = ["1.5.0","1.5"]
1189 convert = [[246, []],
1190 [247, [convert_font_settings]],
1192 [249, [convert_utf8]],
1195 [252, [convert_commandparams, convert_bibitem]],
1197 [254, [convert_esint]],
1200 [257, [convert_caption]],
1201 [258, [convert_lyxline]],
1202 [259, [convert_accent, normalize_font_whitespace]]]
1204 revert = [[258, []],
1206 [256, [revert_caption]],
1207 [255, [revert_encodings]],
1208 [254, [revert_clearpage, revert_cleardoublepage]],
1209 [253, [revert_esint]],
1210 [252, [revert_nomenclature, revert_printnomenclature]],
1211 [251, [revert_commandparams]],
1212 [250, [revert_cs_label]],
1214 [248, [revert_accent, revert_utf8]],
1215 [247, [revert_booktabs]],
1216 [246, [revert_font_settings]],
1217 [245, [revert_framed]]]
1220 if __name__ == "__main__":