1 # This file is part of lyx2lyx
2 # -*- coding: utf-8 -*-
3 # Copyright (C) 2006 José Matos <jamatos@lyx.org>
4 # Copyright (C) 2004-2006 Georg Baum <Georg.Baum@post.rwth-aachen.de>
6 # This program is free software; you can redistribute it and/or
7 # modify it under the terms of the GNU General Public License
8 # as published by the Free Software Foundation; either version 2
9 # of the License, or (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
20 """ Convert files to the file format generated by lyx 1.5"""
26 from parser_tools import find_re, find_token, find_token_backwards, find_token_exact, find_tokens, find_end_of, get_value, find_beginning_of, find_nonempty_line
27 from LyX import get_encoding
30 ####################################################################
31 # Private helper functions
33 def find_end_of_inset(lines, i):
34 " Find end of inset, where lines[i] is included."
35 return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
37 def find_end_of_layout(lines, i):
38 " Find end of layout, where lines[i] is included."
39 return find_end_of(lines, i, "\\begin_layout", "\\end_layout")
41 def find_beginning_of_layout(lines, i):
42 "Find beginning of layout, where lines[i] is included."
43 return find_beginning_of(lines, i, "\\begin_layout", "\\end_layout")
45 # End of helper functions
46 ####################################################################
50 # Notes: Framed/Shaded
53 def revert_framed(document):
54 "Revert framed notes. "
57 i = find_tokens(document.body, ["\\begin_inset Note Framed", "\\begin_inset Note Shaded"], i)
61 document.body[i] = "\\begin_inset Note"
69 roman_fonts = {'default' : 'default', 'ae' : 'ae',
70 'times' : 'times', 'palatino' : 'palatino',
71 'helvet' : 'default', 'avant' : 'default',
72 'newcent' : 'newcent', 'bookman' : 'bookman',
74 sans_fonts = {'default' : 'default', 'ae' : 'default',
75 'times' : 'default', 'palatino' : 'default',
76 'helvet' : 'helvet', 'avant' : 'avant',
77 'newcent' : 'default', 'bookman' : 'default',
79 typewriter_fonts = {'default' : 'default', 'ae' : 'default',
80 'times' : 'default', 'palatino' : 'default',
81 'helvet' : 'default', 'avant' : 'default',
82 'newcent' : 'default', 'bookman' : 'default',
83 'pslatex' : 'courier'}
85 def convert_font_settings(document):
86 " Convert font settings. "
88 i = find_token_exact(document.header, "\\fontscheme", i)
90 document.warning("Malformed LyX document: Missing `\\fontscheme'.")
92 font_scheme = get_value(document.header, "\\fontscheme", i, i + 1)
94 document.warning("Malformed LyX document: Empty `\\fontscheme'.")
95 font_scheme = 'default'
96 if not font_scheme in roman_fonts.keys():
97 document.warning("Malformed LyX document: Unknown `\\fontscheme' `%s'." % font_scheme)
98 font_scheme = 'default'
99 document.header[i:i+1] = ['\\font_roman %s' % roman_fonts[font_scheme],
100 '\\font_sans %s' % sans_fonts[font_scheme],
101 '\\font_typewriter %s' % typewriter_fonts[font_scheme],
102 '\\font_default_family default',
105 '\\font_sf_scale 100',
106 '\\font_tt_scale 100']
109 def revert_font_settings(document):
110 " Revert font settings. "
113 fonts = {'roman' : 'default', 'sans' : 'default', 'typewriter' : 'default'}
114 for family in 'roman', 'sans', 'typewriter':
115 name = '\\font_%s' % family
116 i = find_token_exact(document.header, name, i)
118 document.warning("Malformed LyX document: Missing `%s'." % name)
121 if (insert_line < 0):
123 fonts[family] = get_value(document.header, name, i, i + 1)
124 del document.header[i]
125 i = find_token_exact(document.header, '\\font_default_family', i)
127 document.warning("Malformed LyX document: Missing `\\font_default_family'.")
128 font_default_family = 'default'
130 font_default_family = get_value(document.header, "\\font_default_family", i, i + 1)
131 del document.header[i]
132 i = find_token_exact(document.header, '\\font_sc', i)
134 document.warning("Malformed LyX document: Missing `\\font_sc'.")
137 font_sc = get_value(document.header, '\\font_sc', i, i + 1)
138 del document.header[i]
139 if font_sc != 'false':
140 document.warning("Conversion of '\\font_sc' not yet implemented.")
141 i = find_token_exact(document.header, '\\font_osf', i)
143 document.warning("Malformed LyX document: Missing `\\font_osf'.")
146 font_osf = get_value(document.header, '\\font_osf', i, i + 1)
147 del document.header[i]
148 i = find_token_exact(document.header, '\\font_sf_scale', i)
150 document.warning("Malformed LyX document: Missing `\\font_sf_scale'.")
151 font_sf_scale = '100'
153 font_sf_scale = get_value(document.header, '\\font_sf_scale', i, i + 1)
154 del document.header[i]
155 if font_sf_scale != '100':
156 document.warning("Conversion of '\\font_sf_scale' not yet implemented.")
157 i = find_token_exact(document.header, '\\font_tt_scale', i)
159 document.warning("Malformed LyX document: Missing `\\font_tt_scale'.")
160 font_tt_scale = '100'
162 font_tt_scale = get_value(document.header, '\\font_tt_scale', i, i + 1)
163 del document.header[i]
164 if font_tt_scale != '100':
165 document.warning("Conversion of '\\font_tt_scale' not yet implemented.")
166 for font_scheme in roman_fonts.keys():
167 if (roman_fonts[font_scheme] == fonts['roman'] and
168 sans_fonts[font_scheme] == fonts['sans'] and
169 typewriter_fonts[font_scheme] == fonts['typewriter']):
170 document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
171 if font_default_family != 'default':
172 document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
173 if font_osf == 'true':
174 document.warning("Ignoring `\\font_osf = true'")
176 font_scheme = 'default'
177 document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
178 if fonts['roman'] == 'cmr':
179 document.preamble.append('\\renewcommand{\\rmdefault}{cmr}')
180 if font_osf == 'true':
181 document.preamble.append('\\usepackage{eco}')
183 for font in 'lmodern', 'charter', 'utopia', 'beraserif', 'ccfonts', 'chancery':
184 if fonts['roman'] == font:
185 document.preamble.append('\\usepackage{%s}' % font)
186 for font in 'cmss', 'lmss', 'cmbr':
187 if fonts['sans'] == font:
188 document.preamble.append('\\renewcommand{\\sfdefault}{%s}' % font)
189 for font in 'berasans':
190 if fonts['sans'] == font:
191 document.preamble.append('\\usepackage{%s}' % font)
192 for font in 'cmtt', 'lmtt', 'cmtl':
193 if fonts['typewriter'] == font:
194 document.preamble.append('\\renewcommand{\\ttdefault}{%s}' % font)
195 for font in 'courier', 'beramono', 'luximono':
196 if fonts['typewriter'] == font:
197 document.preamble.append('\\usepackage{%s}' % font)
198 if font_default_family != 'default':
199 document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
200 if font_osf == 'true':
201 document.warning("Ignoring `\\font_osf = true'")
204 def revert_booktabs(document):
205 " We remove the booktabs flag or everything else will become a mess. "
206 re_row = re.compile(r'^<row.*space="[^"]+".*>$')
207 re_tspace = re.compile(r'\s+topspace="[^"]+"')
208 re_bspace = re.compile(r'\s+bottomspace="[^"]+"')
209 re_ispace = re.compile(r'\s+interlinespace="[^"]+"')
212 i = find_token(document.body, "\\begin_inset Tabular", i)
215 j = find_end_of_inset(document.body, i + 1)
217 document.warning("Malformed LyX document: Could not find end of tabular.")
219 for k in range(i, j):
220 if re.search('^<features.* booktabs="true".*>$', document.body[k]):
221 document.warning("Converting 'booktabs' table to normal table.")
222 document.body[k] = document.body[k].replace(' booktabs="true"', '')
223 if re.search(re_row, document.body[k]):
224 document.warning("Removing extra row space.")
225 document.body[k] = re_tspace.sub('', document.body[k])
226 document.body[k] = re_bspace.sub('', document.body[k])
227 document.body[k] = re_ispace.sub('', document.body[k])
231 def convert_multiencoding(document, forward):
232 """ Fix files with multiple encodings.
233 Files with an inputencoding of "auto" or "default" and multiple languages
234 where at least two languages have different default encodings are encoded
235 in multiple encodings for file formats < 249. These files are incorrectly
236 read and written (as if the whole file was in the encoding of the main
238 This is not true for files written by CJK-LyX, they are always in the locale
242 - converts from fake unicode values to true unicode if forward is true, and
243 - converts from true unicode values to fake unicode if forward is false.
244 document.encoding must be set to the old value (format 248) in both cases.
246 We do this here and not in LyX.py because it is far easier to do the
247 necessary parsing in modern formats than in ancient ones.
249 if document.cjk_encoding != '':
251 encoding_stack = [document.encoding]
252 lang_re = re.compile(r"^\\lang\s(\S+)")
253 if document.inputencoding == "auto" or document.inputencoding == "default":
254 for i in range(len(document.body)):
255 result = lang_re.match(document.body[i])
257 language = result.group(1)
258 if language == "default":
259 document.warning("Resetting encoding from %s to %s." % (encoding_stack[-1], document.encoding), 3)
260 encoding_stack[-1] = document.encoding
262 from lyx2lyx_lang import lang
263 document.warning("Setting encoding from %s to %s." % (encoding_stack[-1], lang[language][3]), 3)
264 encoding_stack[-1] = lang[language][3]
265 elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
266 document.warning("Adding nested encoding %s." % encoding_stack[-1], 3)
267 encoding_stack.append(encoding_stack[-1])
268 elif find_token(document.body, "\\end_layout", i, i + 1) == i:
269 document.warning("Removing nested encoding %s." % encoding_stack[-1], 3)
270 if len(encoding_stack) == 1:
271 # Don't remove the document encoding from the stack
272 document.warning("Malformed LyX document: Unexpected `\\end_layout'.")
274 del encoding_stack[-1]
275 if encoding_stack[-1] != document.encoding:
277 # This line has been incorrectly interpreted as if it was
278 # encoded in 'encoding'.
279 # Convert back to the 8bit string that was in the file.
280 orig = document.body[i].encode(document.encoding)
281 # Convert the 8bit string that was in the file to unicode
282 # with the correct encoding.
283 document.body[i] = orig.decode(encoding_stack[-1])
285 # Convert unicode to the 8bit string that will be written
286 # to the file with the correct encoding.
287 orig = document.body[i].encode(encoding_stack[-1])
288 # Convert the 8bit string that will be written to the
289 # file to fake unicode with the encoding that will later
290 # be used when writing to the file.
291 document.body[i] = orig.decode(document.encoding)
294 def convert_utf8(document):
295 " Set document encoding to UTF-8. "
296 convert_multiencoding(document, True)
297 document.encoding = "utf8"
300 def revert_utf8(document):
301 " Set document encoding to the value corresponding to inputencoding. "
302 i = find_token(document.header, "\\inputencoding", 0)
304 document.header.append("\\inputencoding auto")
305 elif get_value(document.header, "\\inputencoding", i) == "utf8":
306 document.header[i] = "\\inputencoding auto"
307 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
308 document.encoding = get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)
309 convert_multiencoding(document, False)
312 def revert_cs_label(document):
313 " Remove status flag of charstyle label. "
316 i = find_token(document.body, "\\begin_inset CharStyle", i)
319 # Seach for a line starting 'show_label'
320 # If it is not there, break with a warning message
323 if (document.body[i][:10] == "show_label"):
326 elif (document.body[i][:13] == "\\begin_layout"):
327 document.warning("Malformed LyX document: Missing 'show_label'.")
334 def convert_bibitem(document):
336 \bibitem [option]{argument}
340 \begin_inset LatexCommand bibitem
346 This must be called after convert_commandparams.
350 i = find_token(document.body, "\\bibitem", i)
353 j = document.body[i].find('[') + 1
354 k = document.body[i].rfind(']')
355 if j == 0: # No optional argument found
358 option = document.body[i][j:k]
359 j = document.body[i].rfind('{') + 1
360 k = document.body[i].rfind('}')
361 argument = document.body[i][j:k]
362 lines = ['\\begin_inset LatexCommand bibitem']
364 lines.append('label "%s"' % option.replace('"', '\\"'))
365 lines.append('key "%s"' % argument.replace('"', '\\"'))
367 lines.append('\\end_inset')
368 document.body[i:i+1] = lines
372 commandparams_info = {
373 # command : [option1, option2, argument]
374 "bibitem" : ["label", "", "key"],
375 "bibtex" : ["options", "btprint", "bibfiles"],
376 "cite" : ["after", "before", "key"],
377 "citet" : ["after", "before", "key"],
378 "citep" : ["after", "before", "key"],
379 "citealt" : ["after", "before", "key"],
380 "citealp" : ["after", "before", "key"],
381 "citeauthor" : ["after", "before", "key"],
382 "citeyear" : ["after", "before", "key"],
383 "citeyearpar" : ["after", "before", "key"],
384 "citet*" : ["after", "before", "key"],
385 "citep*" : ["after", "before", "key"],
386 "citealt*" : ["after", "before", "key"],
387 "citealp*" : ["after", "before", "key"],
388 "citeauthor*" : ["after", "before", "key"],
389 "Citet" : ["after", "before", "key"],
390 "Citep" : ["after", "before", "key"],
391 "Citealt" : ["after", "before", "key"],
392 "Citealp" : ["after", "before", "key"],
393 "Citeauthor" : ["after", "before", "key"],
394 "Citet*" : ["after", "before", "key"],
395 "Citep*" : ["after", "before", "key"],
396 "Citealt*" : ["after", "before", "key"],
397 "Citealp*" : ["after", "before", "key"],
398 "Citeauthor*" : ["after", "before", "key"],
399 "citefield" : ["after", "before", "key"],
400 "citetitle" : ["after", "before", "key"],
401 "cite*" : ["after", "before", "key"],
402 "hfill" : ["", "", ""],
403 "index" : ["", "", "name"],
404 "printindex" : ["", "", "name"],
405 "label" : ["", "", "name"],
406 "eqref" : ["name", "", "reference"],
407 "pageref" : ["name", "", "reference"],
408 "prettyref" : ["name", "", "reference"],
409 "ref" : ["name", "", "reference"],
410 "vpageref" : ["name", "", "reference"],
411 "vref" : ["name", "", "reference"],
412 "tableofcontents" : ["", "", "type"],
413 "htmlurl" : ["name", "", "target"],
414 "url" : ["name", "", "target"]}
417 def convert_commandparams(document):
420 \begin_inset LatexCommand \cmdname[opt1][opt2]{arg}
425 \begin_inset LatexCommand cmdname
431 name1, name2 and name3 can be different for each command.
433 # \begin_inset LatexCommand bibitem was not the official version (see
434 # convert_bibitem()), but could be read in, so we convert it here, too.
438 i = find_token(document.body, "\\begin_inset LatexCommand", i)
441 command = document.body[i][26:].strip()
443 document.warning("Malformed LyX document: Missing LatexCommand name.")
447 j = find_token(document.body, "\\end_inset", i + 1)
449 document.warning("Malformed document")
451 command += "".join(document.body[i+1:j])
452 document.body[i+1:j] = []
454 # The following parser is taken from the original InsetCommandParams::scanCommand
460 # Used to handle things like \command[foo[bar]]{foo{bar}}
464 if ((state == "CMDNAME" and c == ' ') or
465 (state == "CMDNAME" and c == '[') or
466 (state == "CMDNAME" and c == '{')):
468 if ((state == "OPTION" and c == ']') or
469 (state == "SECOPTION" and c == ']') or
470 (state == "CONTENT" and c == '}')):
474 nestdepth = nestdepth - 1
475 if ((state == "OPTION" and c == '[') or
476 (state == "SECOPTION" and c == '[') or
477 (state == "CONTENT" and c == '{')):
478 nestdepth = nestdepth + 1
479 if state == "CMDNAME":
481 elif state == "OPTION":
483 elif state == "SECOPTION":
485 elif state == "CONTENT":
490 elif c == '[' and b != ']':
492 nestdepth = 0 # Just to be sure
493 elif c == '[' and b == ']':
495 nestdepth = 0 # Just to be sure
498 nestdepth = 0 # Just to be sure
501 # Now we have parsed the command, output the parameters
502 lines = ["\\begin_inset LatexCommand %s" % name]
504 if commandparams_info[name][0] == "":
505 document.warning("Ignoring invalid option `%s' of command `%s'." % (option1, name))
507 lines.append('%s "%s"' % (commandparams_info[name][0], option1.replace('"', '\\"')))
509 if commandparams_info[name][1] == "":
510 document.warning("Ignoring invalid second option `%s' of command `%s'." % (option2, name))
512 lines.append('%s "%s"' % (commandparams_info[name][1], option2.replace('"', '\\"')))
514 if commandparams_info[name][2] == "":
515 document.warning("Ignoring invalid argument `%s' of command `%s'." % (argument, name))
517 lines.append('%s "%s"' % (commandparams_info[name][2], argument.replace('"', '\\"')))
518 document.body[i:i+1] = lines
522 def revert_commandparams(document):
523 regex = re.compile(r'(\S+)\s+(.+)')
526 i = find_token(document.body, "\\begin_inset LatexCommand", i)
529 name = document.body[i].split()[2]
530 j = find_end_of_inset(document.body, i + 1)
535 for k in range(i + 1, j):
536 match = re.match(regex, document.body[k])
538 pname = match.group(1)
539 pvalue = match.group(2)
540 if pname == "preview":
541 preview_line = document.body[k]
542 elif (commandparams_info[name][0] != "" and
543 pname == commandparams_info[name][0]):
544 option1 = pvalue.strip('"').replace('\\"', '"')
545 elif (commandparams_info[name][1] != "" and
546 pname == commandparams_info[name][1]):
547 option2 = pvalue.strip('"').replace('\\"', '"')
548 elif (commandparams_info[name][2] != "" and
549 pname == commandparams_info[name][2]):
550 argument = pvalue.strip('"').replace('\\"', '"')
551 elif document.body[k].strip() != "":
552 document.warning("Ignoring unknown contents `%s' in command inset %s." % (document.body[k], name))
553 if name == "bibitem":
555 lines = ["\\bibitem {%s}" % argument]
557 lines = ["\\bibitem [%s]{%s}" % (option1, argument)]
561 lines = ["\\begin_inset LatexCommand \\%s{%s}" % (name, argument)]
563 lines = ["\\begin_inset LatexCommand \\%s[][%s]{%s}" % (name, option2, argument)]
566 lines = ["\\begin_inset LatexCommand \\%s[%s]{%s}" % (name, option1, argument)]
568 lines = ["\\begin_inset LatexCommand \\%s[%s][%s]{%s}" % (name, option1, option2, argument)]
569 if name != "bibitem":
570 if preview_line != "":
571 lines.append(preview_line)
573 lines.append('\\end_inset')
574 document.body[i:j+1] = lines
578 def revert_nomenclature(document):
579 " Convert nomenclature entry to ERT. "
580 regex = re.compile(r'(\S+)\s+(.+)')
584 i = find_token(document.body, "\\begin_inset LatexCommand nomenclature", i)
588 j = find_end_of_inset(document.body, i + 1)
593 for k in range(i + 1, j):
594 match = re.match(regex, document.body[k])
596 name = match.group(1)
597 value = match.group(2)
598 if name == "preview":
599 preview_line = document.body[k]
600 elif name == "symbol":
601 symbol = value.strip('"').replace('\\"', '"')
602 elif name == "description":
603 description = value.strip('"').replace('\\"', '"')
604 elif name == "prefix":
605 prefix = value.strip('"').replace('\\"', '"')
606 elif document.body[k].strip() != "":
607 document.warning("Ignoring unknown contents `%s' in nomenclature inset." % document.body[k])
609 command = 'nomenclature{%s}{%s}' % (symbol, description)
611 command = 'nomenclature[%s]{%s}{%s}' % (prefix, symbol, description)
612 document.body[i:j+1] = ['\\begin_inset ERT',
615 '\\begin_layout %s' % document.default_layout,
624 if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
625 document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
626 document.preamble.append('\\makenomenclature')
629 def revert_printnomenclature(document):
630 " Convert printnomenclature to ERT. "
631 regex = re.compile(r'(\S+)\s+(.+)')
635 i = find_token(document.body, "\\begin_inset LatexCommand printnomenclature", i)
639 j = find_end_of_inset(document.body, i + 1)
642 for k in range(i + 1, j):
643 match = re.match(regex, document.body[k])
645 name = match.group(1)
646 value = match.group(2)
647 if name == "preview":
648 preview_line = document.body[k]
649 elif name == "labelwidth":
650 labelwidth = value.strip('"').replace('\\"', '"')
651 elif document.body[k].strip() != "":
652 document.warning("Ignoring unknown contents `%s' in printnomenclature inset." % document.body[k])
654 command = 'nomenclature{}'
656 command = 'nomenclature[%s]' % labelwidth
657 document.body[i:j+1] = ['\\begin_inset ERT',
660 '\\begin_layout %s' % document.default_layout,
669 if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
670 document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
671 document.preamble.append('\\makenomenclature')
674 def convert_esint(document):
675 " Add \\use_esint setting to header. "
676 i = find_token(document.header, "\\cite_engine", 0)
678 document.warning("Malformed LyX document: Missing `\\cite_engine'.")
680 # 0 is off, 1 is auto, 2 is on.
681 document.header.insert(i, '\\use_esint 0')
684 def revert_esint(document):
685 " Remove \\use_esint setting from header. "
686 i = find_token(document.header, "\\use_esint", 0)
688 document.warning("Malformed LyX document: Missing `\\use_esint'.")
690 use_esint = document.header[i].split()[1]
691 del document.header[i]
692 # 0 is off, 1 is auto, 2 is on.
694 document.preamble.append('\\usepackage{esint}')
697 def revert_clearpage(document):
701 i = find_token(document.body, "\\clearpage", i)
704 document.body[i:i+1] = ['\\begin_inset ERT',
707 '\\begin_layout %s' % document.default_layout,
718 def revert_cleardoublepage(document):
719 " cleardoublepage -> ERT "
722 i = find_token(document.body, "\\cleardoublepage", i)
725 document.body[i:i+1] = ['\\begin_inset ERT',
728 '\\begin_layout %s' % document.default_layout,
739 def convert_lyxline(document):
740 " remove fontsize commands for \lyxline "
741 # The problematic is: The old \lyxline definition doesn't handle the fontsize
742 # to change the line thickness. The new definiton does this so that imported
743 # \lyxlines would have a different line thickness. The eventual fontsize command
744 # before \lyxline is therefore removed to get the same output.
745 fontsizes = ["tiny", "scriptsize", "footnotesize", "small", "normalsize",
746 "large", "Large", "LARGE", "huge", "Huge"]
747 for n in range(0, len(fontsizes)):
750 while i < len(document.body):
751 i = find_token(document.body, "\\size " + fontsizes[n], i)
752 k = find_token(document.body, "\\lyxline", i)
753 # the corresponding fontsize command is always 2 lines before the \lyxline
754 if (i != -1 and k == i+2):
755 document.body[i:i+1] = []
761 def revert_encodings(document):
762 " Set new encodings to auto. "
763 encodings = ["8859-6", "8859-8", "cp437", "cp437de", "cp850", "cp852",
764 "cp855", "cp858", "cp862", "cp865", "cp866", "cp1250",
765 "cp1252", "cp1256", "cp1257", "latin10", "pt254", "tis620-0"]
766 i = find_token(document.header, "\\inputencoding", 0)
768 document.header.append("\\inputencoding auto")
770 inputenc = get_value(document.header, "\\inputencoding", i)
771 if inputenc in encodings:
772 document.header[i] = "\\inputencoding auto"
773 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
776 def convert_caption(document):
777 " Convert caption layouts to caption insets. "
780 i = find_token(document.body, "\\begin_layout Caption", i)
783 j = find_end_of_layout(document.body, i)
785 document.warning("Malformed LyX document: Missing `\\end_layout'.")
788 document.body[j:j] = ["\\end_layout", "", "\\end_inset", "", ""]
789 document.body[i:i+1] = ["\\begin_layout %s" % document.default_layout,
790 "\\begin_inset Caption", "",
791 "\\begin_layout %s" % document.default_layout]
795 def revert_caption(document):
796 " Convert caption insets to caption layouts. "
797 " This assumes that the text class has a caption style. "
800 i = find_token(document.body, "\\begin_inset Caption", i)
804 # We either need to delete the previous \begin_layout line, or we
805 # need to end the previous layout if this inset is not in the first
806 # position of the paragraph.
807 layout_before = find_token_backwards(document.body, "\\begin_layout", i)
808 if layout_before == -1:
809 document.warning("Malformed LyX document: Missing `\\begin_layout'.")
811 layout_line = document.body[layout_before]
812 del_layout_before = True
813 l = layout_before + 1
815 if document.body[l] != "":
816 del_layout_before = False
819 if del_layout_before:
820 del document.body[layout_before:i]
823 document.body[i:i] = ["\\end_layout", ""]
826 # Find start of layout in the inset and end of inset
827 j = find_token(document.body, "\\begin_layout", i)
829 document.warning("Malformed LyX document: Missing `\\begin_layout'.")
831 k = find_end_of_inset(document.body, i)
833 document.warning("Malformed LyX document: Missing `\\end_inset'.")
836 # We either need to delete the following \end_layout line, or we need
837 # to restart the old layout if this inset is not at the paragraph end.
838 layout_after = find_token(document.body, "\\end_layout", k)
839 if layout_after == -1:
840 document.warning("Malformed LyX document: Missing `\\end_layout'.")
842 del_layout_after = True
844 while l < layout_after:
845 if document.body[l] != "":
846 del_layout_after = False
850 del document.body[k+1:layout_after+1]
852 document.body[k+1:k+1] = [layout_line, ""]
854 # delete \begin_layout and \end_inset and replace \begin_inset with
855 # "\begin_layout Caption". This works because we can only have one
856 # paragraph in the caption inset: The old \end_layout will be recycled.
858 if document.body[k] == "":
861 if document.body[j] == "":
863 document.body[i] = "\\begin_layout Caption"
864 if document.body[i+1] == "":
865 del document.body[i+1]
869 # Accents of InsetLaTeXAccent
871 "`" : u'\u0300', # grave
872 "'" : u'\u0301', # acute
873 "^" : u'\u0302', # circumflex
874 "~" : u'\u0303', # tilde
875 "=" : u'\u0304', # macron
876 "u" : u'\u0306', # breve
877 "." : u'\u0307', # dot above
878 "\"": u'\u0308', # diaeresis
879 "r" : u'\u030a', # ring above
880 "H" : u'\u030b', # double acute
881 "v" : u'\u030c', # caron
882 "b" : u'\u0320', # minus sign below
883 "d" : u'\u0323', # dot below
884 "c" : u'\u0327', # cedilla
885 "k" : u'\u0328', # ogonek
886 "t" : u'\u0361' # tie. This is special: It spans two characters, but
887 # only one is given as argument, so we don't need to
888 # treat it differently.
892 # special accents of InsetLaTeXAccent without argument
893 special_accent_map = {
894 'i' : u'\u0131', # dotless i
895 'j' : u'\u0237', # dotless j
896 'l' : u'\u0142', # l with stroke
897 'L' : u'\u0141' # L with stroke
901 # special accent arguments of InsetLaTeXAccent
903 '\\i' : u'\u0131', # dotless i
904 '\\j' : u'\u0237' # dotless j
908 def _convert_accent(accent, accented_char):
912 if type in special_accent_map:
913 return special_accent_map[type]
914 # a missing char is treated as space by LyX
916 elif type == 'q' and char in ['t', 'd', 'l', 'L']:
917 # Special caron, only used with t, d, l and L.
918 # It is not in the map because we convert it to the same unicode
919 # character as the normal caron: \q{} is only defined if babel with
920 # the czech or slovak language is used, and the normal caron
921 # produces the correct output if the T1 font encoding is used.
922 # For the same reason we never convert to \q{} in the other direction.
924 elif char in accented_map:
925 char = accented_map[char]
926 elif (len(char) > 1):
927 # We can only convert accents on a single char
929 a = accent_map.get(type)
931 return unicodedata.normalize("NFC", "%s%s" % (char, a))
935 def convert_ertbackslash(body, i, ert, default_layout):
936 r""" -------------------------------------------------------------------------------------------
937 Convert backslashes and '\n' into valid ERT code, append the converted
938 text to body[i] and return the (maybe incremented) line index i"""
942 body[i] = body[i] + '\\backslash '
946 body[i+1:i+1] = ['\\end_layout', '', '\\begin_layout %s' % default_layout, '']
949 body[i] = body[i] + c
953 def convert_accent(document):
954 # The following forms are supported by LyX:
955 # '\i \"{a}' (standard form, as written by LyX)
956 # '\i \"{}' (standard form, as written by LyX if the accented char is a space)
957 # '\i \"{ }' (also accepted if the accented char is a space)
958 # '\i \" a' (also accepted)
959 # '\i \"' (also accepted)
960 re_wholeinset = re.compile(r'^(.*)(\\i\s+)(.*)$')
961 re_contents = re.compile(r'^([^\s{]+)(.*)$')
962 re_accentedcontents = re.compile(r'^\s*{?([^{}]*)}?\s*$')
965 i = find_re(document.body, re_wholeinset, i)
968 match = re_wholeinset.match(document.body[i])
969 prefix = match.group(1)
970 contents = match.group(3).strip()
971 match = re_contents.match(contents)
973 # Strip first char (always \)
974 accent = match.group(1)[1:]
975 accented_contents = match.group(2).strip()
976 match = re_accentedcontents.match(accented_contents)
977 accented_char = match.group(1)
978 converted = _convert_accent(accent, accented_char)
981 contents = '%s{%s}' % (accent, accented_char),
983 document.body[i] = '%s%s' % (prefix, converted)
986 document.warning("Converting unknown InsetLaTeXAccent `\\i %s' to ERT." % contents)
987 document.body[i] = prefix
988 document.body[i+1:i+1] = ['\\begin_inset ERT',
991 '\\begin_layout %s' % document.default_layout,
995 i = convert_ertbackslash(document.body, i + 7,
997 document.default_layout)
998 document.body[i+1:i+1] = ['\\end_layout',
1004 def revert_accent(document):
1005 inverse_accent_map = {}
1006 for k in accent_map:
1007 inverse_accent_map[accent_map[k]] = k
1008 inverse_special_accent_map = {}
1009 for k in special_accent_map:
1010 inverse_special_accent_map[special_accent_map[k]] = k
1011 inverse_accented_map = {}
1012 for k in accented_map:
1013 inverse_accented_map[accented_map[k]] = k
1015 # Since LyX may insert a line break within a word we must combine all
1016 # words before unicode normalization.
1017 # We do this only if the next line starts with an accent, otherwise we
1018 # would create things like '\begin_inset ERTstatus'.
1019 numberoflines = len(document.body)
1020 for i in range(numberoflines-1):
1021 if document.body[i] == '' or document.body[i+1] == '' or document.body[i][-1] == ' ':
1023 if (document.body[i+1][0] in inverse_accent_map):
1024 # the last character of this line and the first of the next line
1025 # form probably a surrogate pair.
1026 while (len(document.body[i+1]) > 0 and document.body[i+1][0] != ' '):
1027 document.body[i] += document.body[i+1][0]
1028 document.body[i+1] = document.body[i+1][1:]
1030 # Normalize to "Normal form D" (NFD, also known as canonical decomposition).
1031 # This is needed to catch all accented characters.
1032 for i in range(numberoflines):
1033 # Unfortunately we have a mixture of unicode strings and plain strings,
1034 # because we never use u'xxx' for string literals, but 'xxx'.
1035 # Therefore we may have to try two times to normalize the data.
1037 document.body[i] = unicodedata.normalize("NFD", document.body[i])
1039 document.body[i] = unicodedata.normalize("NFD", unicode(document.body[i], 'utf-8'))
1041 # Replace accented characters with InsetLaTeXAccent
1042 # Do not convert characters that can be represented in the chosen
1044 encoding_stack = [get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)]
1045 lang_re = re.compile(r"^\\lang\s(\S+)")
1046 for i in range(len(document.body)):
1048 if (document.inputencoding == "auto" or document.inputencoding == "default") and document.cjk_encoding != '':
1049 # Track the encoding of the current line
1050 result = lang_re.match(document.body[i])
1052 language = result.group(1)
1053 if language == "default":
1054 encoding_stack[-1] = document.encoding
1056 from lyx2lyx_lang import lang
1057 encoding_stack[-1] = lang[language][3]
1059 elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
1060 encoding_stack.append(encoding_stack[-1])
1062 elif find_token(document.body, "\\end_layout", i, i + 1) == i:
1063 del encoding_stack[-1]
1066 for j in range(len(document.body[i])):
1067 # dotless i and dotless j are both in special_accent_map and can
1068 # occur as an accented character, so we need to test that the
1069 # following character is no accent
1070 if (document.body[i][j] in inverse_special_accent_map and
1071 (j == len(document.body[i]) - 1 or document.body[i][j+1] not in inverse_accent_map)):
1072 accent = document.body[i][j]
1074 dummy = accent.encode(encoding_stack[-1])
1075 except UnicodeEncodeError:
1076 # Insert the rest of the line as new line
1077 if j < len(document.body[i]) - 1:
1078 document.body[i+1:i+1] = document.body[i][j+1:]
1079 # Delete the accented character
1081 document.body[i] = document.body[i][:j-1]
1083 document.body[i] = u''
1084 # Finally add the InsetLaTeXAccent
1085 document.body[i] += "\\i \\%s{}" % inverse_special_accent_map[accent]
1087 elif j > 0 and document.body[i][j] in inverse_accent_map:
1088 accented_char = document.body[i][j-1]
1089 if accented_char == ' ':
1090 # Conform to LyX output
1092 elif accented_char in inverse_accented_map:
1093 accented_char = inverse_accented_map[accented_char]
1094 accent = document.body[i][j]
1096 dummy = unicodedata.normalize("NFC", accented_char + accent).encode(encoding_stack[-1])
1097 except UnicodeEncodeError:
1098 # Insert the rest of the line as new line
1099 if j < len(document.body[i]) - 1:
1100 document.body[i+1:i+1] = document.body[i][j+1:]
1101 # Delete the accented characters
1103 document.body[i] = document.body[i][:j-2]
1105 document.body[i] = u''
1106 # Finally add the InsetLaTeXAccent
1107 document.body[i] += "\\i \\%s{%s}" % (inverse_accent_map[accent], accented_char)
1109 # Normalize to "Normal form C" (NFC, pre-composed characters) again
1110 for i in range(numberoflines):
1111 document.body[i] = unicodedata.normalize("NFC", document.body[i])
1114 def normalize_font_whitespace_259(document):
1115 """ Before format 259 the font changes were ignored if a
1116 whitespace was the first or last character in the sequence, this function
1117 transfers the whitespace outside."""
1119 char_properties = {"\\series": "default",
1120 "\\emph": "default",
1122 "\\shape": "default",
1124 "\\family": "default"}
1125 return normalize_font_whitespace(document, char_properties)
1127 def normalize_font_whitespace_274(document):
1128 """ Before format 259 (sic) the font changes were ignored if a
1129 whitespace was the first or last character in the sequence. This was
1130 corrected for most font properties in format 259, but the language
1131 was forgotten then. This function applies the same conversion done
1132 there (namely, transfers the whitespace outside) for font language
1133 changes, as well."""
1135 char_properties = {"\\lang": "default"}
1136 return normalize_font_whitespace(document, char_properties)
1138 def get_paragraph_language(document, i):
1139 """ Return the language of the paragraph in which line i of the document
1140 body is. If the first thing in the paragraph is a \\lang command, that
1141 is the paragraph's langauge; otherwise, the paragraph's language is the
1142 document's language."""
1144 lines = document.body
1146 first_nonempty_line = \
1147 find_nonempty_line(lines, find_beginning_of_layout(lines, i) + 1)
1149 words = lines[first_nonempty_line].split()
1151 if len(words) > 1 and words[0] == "\\lang":
1154 return document.language
1156 def normalize_font_whitespace(document, char_properties):
1157 """ Before format 259 the font changes were ignored if a
1158 whitespace was the first or last character in the sequence, this function
1159 transfers the whitespace outside. Only a change in one of the properties
1160 in the provided char_properties is handled by this function."""
1162 if document.backend != "latex":
1165 lines = document.body
1170 while i < len(lines):
1171 words = lines[i].split()
1173 if len(words) > 0 and words[0] == "\\begin_layout":
1174 # a new paragraph resets all font changes
1176 # also reset the default language to be the paragraph's language
1177 if "\\lang" in char_properties.keys():
1178 char_properties["\\lang"] = \
1179 get_paragraph_language(document, i + 1)
1181 elif len(words) > 1 and words[0] in char_properties.keys():
1182 # we have a font change
1183 if char_properties[words[0]] == words[1]:
1184 # property gets reset
1185 if words[0] in changes.keys():
1186 del changes[words[0]]
1187 defaultproperty = True
1190 changes[words[0]] = words[1]
1191 defaultproperty = False
1193 # We need to explicitly reset all changed properties if we find
1194 # a space below, because LyX 1.4 would output the space after
1195 # closing the previous change and before starting the new one,
1196 # and closing a font change means to close all properties, not
1197 # just the changed one.
1199 if lines[i-1] and lines[i-1][-1] == " ":
1200 lines[i-1] = lines[i-1][:-1]
1201 # a space before the font change
1203 for k in changes.keys():
1204 # exclude property k because that is already in lines[i]
1206 added_lines[1:1] = ["%s %s" % (k, changes[k])]
1207 for k in changes.keys():
1208 # exclude property k because that must be added below anyway
1210 added_lines[0:0] = ["%s %s" % (k, char_properties[k])]
1212 # Property is reset in lines[i], so add the new stuff afterwards
1213 lines[i+1:i+1] = added_lines
1215 # Reset property for the space
1216 added_lines[0:0] = ["%s %s" % (words[0], char_properties[words[0]])]
1217 lines[i:i] = added_lines
1218 i = i + len(added_lines)
1220 elif lines[i+1] and lines[i+1][0] == " " and (len(changes) > 0 or not defaultproperty):
1221 # a space after the font change
1222 if (lines[i+1] == " " and lines[i+2]):
1223 next_words = lines[i+2].split()
1224 if len(next_words) > 0 and next_words[0] == words[0]:
1225 # a single blank with a property different from the
1226 # previous and the next line must not be changed
1229 lines[i+1] = lines[i+1][1:]
1231 for k in changes.keys():
1232 # exclude property k because that is already in lines[i]
1234 added_lines[1:1] = ["%s %s" % (k, changes[k])]
1235 for k in changes.keys():
1236 # exclude property k because that must be added below anyway
1238 added_lines[0:0] = ["%s %s" % (k, char_properties[k])]
1239 # Reset property for the space
1240 added_lines[0:0] = ["%s %s" % (words[0], char_properties[words[0]])]
1241 lines[i:i] = added_lines
1242 i = i + len(added_lines)
1247 def revert_utf8x(document):
1248 " Set utf8x encoding to utf8. "
1249 i = find_token(document.header, "\\inputencoding", 0)
1251 document.header.append("\\inputencoding auto")
1253 inputenc = get_value(document.header, "\\inputencoding", i)
1254 if inputenc == "utf8x":
1255 document.header[i] = "\\inputencoding utf8"
1256 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1259 def revert_utf8plain(document):
1260 " Set utf8plain encoding to utf8. "
1261 i = find_token(document.header, "\\inputencoding", 0)
1263 document.header.append("\\inputencoding auto")
1265 inputenc = get_value(document.header, "\\inputencoding", i)
1266 if inputenc == "utf8-plain":
1267 document.header[i] = "\\inputencoding utf8"
1268 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1271 def revert_beamer_alert(document):
1272 " Revert beamer's \\alert inset back to ERT. "
1275 i = find_token(document.body, "\\begin_inset CharStyle Alert", i)
1278 document.body[i] = "\\begin_inset ERT"
1281 if (document.body[i][:13] == "\\begin_layout"):
1282 # Insert the \alert command
1283 document.body[i + 1] = "\\alert{" + document.body[i + 1] + '}'
1290 def revert_beamer_structure(document):
1291 " Revert beamer's \\structure inset back to ERT. "
1294 i = find_token(document.body, "\\begin_inset CharStyle Structure", i)
1297 document.body[i] = "\\begin_inset ERT"
1300 if (document.body[i][:13] == "\\begin_layout"):
1301 document.body[i + 1] = "\\structure{" + document.body[i + 1] + '}'
1308 def convert_changes(document):
1309 " Switch output_changes off if tracking_changes is off. "
1310 i = find_token(document.header, '\\tracking_changes', 0)
1312 document.warning("Malformed lyx document: Missing '\\tracking_changes'.")
1314 j = find_token(document.header, '\\output_changes', 0)
1316 document.warning("Malformed lyx document: Missing '\\output_changes'.")
1318 tracking_changes = get_value(document.header, "\\tracking_changes", i)
1319 output_changes = get_value(document.header, "\\output_changes", j)
1320 if tracking_changes == "false" and output_changes == "true":
1321 document.header[j] = "\\output_changes false"
1324 def revert_ascii(document):
1325 " Set ascii encoding to auto. "
1326 i = find_token(document.header, "\\inputencoding", 0)
1328 document.header.append("\\inputencoding auto")
1330 inputenc = get_value(document.header, "\\inputencoding", i)
1331 if inputenc == "ascii":
1332 document.header[i] = "\\inputencoding auto"
1333 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1336 def normalize_language_name(document):
1337 lang = { "brazil": "brazilian",
1338 "portuges": "portuguese"}
1340 if document.language in lang:
1341 document.language = lang[document.language]
1342 i = find_token(document.header, "\\language", 0)
1343 document.header[i] = "\\language %s" % document.language
1346 def revert_language_name(document):
1347 lang = { "brazilian": "brazil",
1348 "portuguese": "portuges"}
1350 if document.language in lang:
1351 document.language = lang[document.language]
1352 i = find_token(document.header, "\\language", 0)
1353 document.header[i] = "\\language %s" % document.language
1356 # \textclass cv -> \textclass simplecv
1357 def convert_cv_textclass(document):
1358 if document.textclass == "cv":
1359 document.textclass = "simplecv"
1362 def revert_cv_textclass(document):
1363 if document.textclass == "simplecv":
1364 document.textclass = "cv"
1368 # add scaleBeforeRotation graphics param
1369 def convert_graphics_rotation(document):
1370 " add scaleBeforeRotation graphics parameter. "
1373 i = find_token(document.body, "\\begin_inset Graphics", i)
1376 j = find_end_of_inset(document.body, i+1)
1379 document.warning("Malformed LyX document: Could not find end of graphics inset.")
1380 # Seach for rotateAngle and width or height or scale
1381 # If these params are not there, nothing needs to be done.
1382 k = find_token(document.body, "\trotateAngle", i + 1, j)
1383 l = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j)
1384 if (k != -1 and l != -1):
1385 document.body.insert(j, 'scaleBeforeRotation')
1390 # remove scaleBeforeRotation graphics param
1391 def revert_graphics_rotation(document):
1392 " remove scaleBeforeRotation graphics parameter. "
1395 i = find_token(document.body, "\\begin_inset Graphics", i)
1398 j = find_end_of_inset(document.body, i + 1)
1401 document.warning("Malformed LyX document: Could not find end of graphics inset.")
1402 # If there's a scaleBeforeRotation param, just remove that
1403 k = find_token(document.body, "\tscaleBeforeRotation", i + 1, j)
1405 del document.body[k]
1407 # if not, and if we have rotateAngle and width or height or scale,
1408 # we have to put the rotateAngle value to special
1409 rotateAngle = get_value(document.body, 'rotateAngle', i + 1, j)
1410 special = get_value(document.body, 'special', i + 1, j)
1411 if rotateAngle != "":
1412 k = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j)
1416 document.body.insert(j-1, '\tspecial angle=%s' % rotateAngle)
1418 l = find_token(document.body, "\tspecial", i + 1, j)
1419 document.body[l] = document.body[l].replace(special, 'angle=%s,%s' % (rotateAngle, special))
1420 k = find_token(document.body, "\trotateAngle", i + 1, j)
1422 del document.body[k]
1427 def convert_tableborder(document):
1428 # The problematic is: LyX double the table cell border as it ignores the "|" character in
1429 # the cell arguments. A fix takes care of this and therefore the "|" has to be removed
1431 while i < len(document.body):
1432 h = document.body[i].find("leftline=\"true\"", 0, len(document.body[i]))
1433 k = document.body[i].find("|>{", 0, len(document.body[i]))
1434 # the two tokens have to be in one line
1435 if (h != -1 and k != -1):
1437 document.body[i] = document.body[i][:k] + document.body[i][k+1:len(document.body[i])-1]
1441 def revert_tableborder(document):
1443 while i < len(document.body):
1444 h = document.body[i].find("leftline=\"true\"", 0, len(document.body[i]))
1445 k = document.body[i].find(">{", 0, len(document.body[i]))
1446 # the two tokens have to be in one line
1447 if (h != -1 and k != -1):
1449 document.body[i] = document.body[i][:k] + '|' + document.body[i][k:]
1453 def revert_armenian(document):
1455 # set inputencoding from armscii8 to auto
1456 if document.inputencoding == "armscii8":
1457 i = find_token(document.header, "\\inputencoding", 0)
1459 document.header[i] = "\\inputencoding auto"
1460 # check if preamble exists, if not k is set to -1
1463 while i < len(document.preamble):
1465 k = document.preamble[i].find("\\", 0, len(document.preamble[i]))
1467 k = document.preamble[i].find("%", 0, len(document.preamble[i]))
1469 # add the entry \usepackage{armtex} to the document preamble
1470 if document.language == "armenian":
1471 # set the armtex entry as the first preamble line
1473 document.preamble[0:0] = ["\\usepackage{armtex}"]
1474 # create the preamble when it doesn't exist
1476 document.preamble.append('\\usepackage{armtex}')
1477 # Set document language from armenian to english
1478 if document.language == "armenian":
1479 document.language = "english"
1480 i = find_token(document.header, "\\language", 0)
1482 document.header[i] = "\\language english"
1485 def revert_CJK(document):
1486 " Set CJK encodings to default and languages chinese, japanese and korean to english. "
1487 encodings = ["Bg5", "Bg5+", "GB", "GBt", "GBK", "JIS",
1488 "KS", "SJIS", "UTF8", "EUC-TW", "EUC-JP"]
1489 i = find_token(document.header, "\\inputencoding", 0)
1491 document.header.append("\\inputencoding auto")
1493 inputenc = get_value(document.header, "\\inputencoding", i)
1494 if inputenc in encodings:
1495 document.header[i] = "\\inputencoding default"
1496 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1498 if document.language == "chinese-simplified" or \
1499 document.language == "chinese-traditional" or \
1500 document.language == "japanese" or document.language == "korean":
1501 document.language = "english"
1502 i = find_token(document.header, "\\language", 0)
1504 document.header[i] = "\\language english"
1507 def revert_preamble_listings_params(document):
1508 " Revert preamble option \listings_params "
1509 i = find_token(document.header, "\\listings_params", 0)
1511 document.preamble.append('\\usepackage{listings}')
1512 document.preamble.append('\\lstset{%s}' % document.header[i].split()[1].strip('"'))
1513 document.header.pop(i);
1516 def revert_listings_inset(document):
1517 r''' Revert listings inset to \lstinline or \begin, \end lstlisting, translate
1521 lstparams "language=Delphi"
1525 \begin_layout Standard
1535 \begin_layout Standard
1539 lstinline[language=Delphi]{var i = 10;}
1544 There can be an caption inset in this inset
1546 \begin_layout Standard
1547 \begin_inset Caption
1549 \begin_layout Standard
1551 \begin_inset LatexCommand label
1567 i = find_token(document.body, '\\begin_inset listings', i)
1571 if not '\\usepackage{listings}' in document.preamble:
1572 document.preamble.append('\\usepackage{listings}')
1573 j = find_end_of_inset(document.body, i + 1)
1575 # this should not happen
1581 for line in range(i + 1, i + 4):
1582 if document.body[line].startswith('inline'):
1583 inline = document.body[line].split()[1]
1584 if document.body[line].startswith('lstparams'):
1585 params = document.body[line].split()[1].strip('"')
1586 if document.body[line].startswith('status'):
1587 status = document.body[line].split()[1].strip()
1592 cap = find_token(document.body, '\\begin_inset Caption', i)
1594 cap_end = find_end_of_inset(document.body, cap + 1)
1596 # this should not happen
1599 lbl = find_token(document.body, '\\begin_inset LatexCommand label', cap + 1)
1601 lbl_end = find_end_of_inset(document.body, lbl + 1)
1603 # this should not happen
1608 for line in document.body[lbl : lbl_end + 1]:
1609 if line.startswith('name '):
1610 label = line.split()[1].strip('"')
1612 for line in document.body[cap : lbl ] + document.body[lbl_end + 1 : cap_end + 1]:
1613 if not line.startswith('\\'):
1614 caption += line.strip()
1617 # looking for the oneline code for lstinline
1618 inlinecode = document.body[find_end_of_layout(document.body,
1619 find_token(document.body, '\\begin_layout %s' % document.default_layout, i + 1) +1 ) - 1]
1620 if len(caption) > 0:
1621 if len(params) == 0:
1622 params = 'caption={%s}' % caption
1624 params += ',caption={%s}' % caption
1626 if len(params) == 0:
1627 params = 'label={%s}' % label
1629 params += ',label={%s}' % label
1631 params = '[%s]' % params
1632 params = params.replace('\\', '\\backslash\n')
1633 if inline == 'true':
1634 document.body[i:(j+1)] = [r'\begin_inset ERT',
1635 'status %s' % status,
1636 r'\begin_layout %s' % document.default_layout,
1640 'lstinline%s{%s}' % (params, inlinecode),
1645 document.body[i: j+1] = [r'\begin_inset ERT',
1646 'status %s' % status,
1648 r'\begin_layout %s' % document.default_layout,
1652 r'begin{lstlisting}%s' % params,
1654 ] + document.body[k : j - 1] + \
1656 r'\begin_layout %s' % document.default_layout,
1665 def revert_include_listings(document):
1666 r''' Revert lstinputlisting Include option , translate
1667 \begin_inset Include \lstinputlisting{file}[opt]
1677 \begin_layout Standard
1681 lstinputlisting{file}[opt]
1689 i = find_token(document.body, r'\begin_inset Include \lstinputlisting', i)
1693 if not '\\usepackage{listings}' in document.preamble:
1694 document.preamble.append('\\usepackage{listings}')
1695 j = find_end_of_inset(document.body, i + 1)
1697 # this should not happen
1699 # find command line lstinputlisting{file}[options]
1700 cmd, file, option = '', '', ''
1701 if re.match(r'\\(lstinputlisting){([.\w]*)}(.*)', document.body[i].split()[2]):
1702 cmd, file, option = re.match(r'\\(lstinputlisting){([.\w]*)}(.*)', document.body[i].split()[2]).groups()
1703 option = option.replace('\\', '\\backslash\n')
1704 document.body[i : j + 1] = [r'\begin_inset ERT',
1707 r'\begin_layout %s' % document.default_layout,
1711 '%s%s{%s}' % (cmd, option, file),
1717 def revert_ext_font_sizes(document):
1718 if document.backend != "latex": return
1719 if not document.textclass.startswith("ext"): return
1721 fontsize = get_value(document.header, '\\paperfontsize', 0)
1722 if fontsize not in ('10', '11', '12'): return
1725 i = find_token(document.header, '\\paperfontsize', 0)
1726 document.header[i] = '\\paperfontsize default'
1728 i = find_token(document.header, '\\options', 0)
1730 i = find_token(document.header, '\\textclass', 0) + 1
1731 document.header[i:i] = ['\\options %s' % fontsize]
1733 document.header[i] += ',%s' % fontsize
1736 def convert_ext_font_sizes(document):
1737 if document.backend != "latex": return
1738 if not document.textclass.startswith("ext"): return
1740 fontsize = get_value(document.header, '\\paperfontsize', 0)
1741 if fontsize != 'default': return
1743 i = find_token(document.header, '\\options', 0)
1746 options = get_value(document.header, '\\options', i)
1748 fontsizes = '10pt', '11pt', '12pt'
1749 for fs in fontsizes:
1750 if options.find(fs) != -1:
1752 else: # this else will only be attained if the for cycle had no match
1755 options = options.split(',')
1756 for j, opt in enumerate(options):
1757 if opt in fontsizes:
1764 k = find_token(document.header, '\\paperfontsize', 0)
1765 document.header[k] = '\\paperfontsize %s' % fontsize
1768 document.header[i] = '\\options %s' % ','.join(options)
1770 del document.header[i]
1773 def revert_separator_layout(document):
1774 r'''Revert --Separator-- to a lyx note
1777 \begin_layout --Separator--
1783 \begin_layout Standard
1784 \begin_inset Note Note
1787 \begin_layout Standard
1800 i = find_token(document.body, r'\begin_layout --Separator--', i)
1803 j = find_end_of_layout(document.body, i + 1)
1805 # this should not happen
1807 document.body[i : j + 1] = [r'\begin_layout %s' % document.default_layout,
1808 r'\begin_inset Note Note',
1811 r'\begin_layout %s' % document.default_layout,
1812 'Separate Environment',
1816 document.body[ i + 1 : j] + \
1822 def convert_arabic (document):
1823 if document.language == "arabic":
1824 document.language = "arabic_arabtex"
1825 i = find_token(document.header, "\\language", 0)
1827 document.header[i] = "\\language arabic_arabtex"
1829 while i < len(document.body):
1830 h = document.body[i].find("\lang arabic", 0, len(document.body[i]))
1832 # change the language name
1833 document.body[i] = '\lang arabic_arabtex'
1837 def revert_arabic (document):
1838 if document.language == "arabic_arabtex":
1839 document.language = "arabic"
1840 i = find_token(document.header, "\\language", 0)
1842 document.header[i] = "\\language arabic"
1844 while i < len(document.body):
1845 h = document.body[i].find("\lang arabic_arabtex", 0, len(document.body[i]))
1847 # change the language name
1848 document.body[i] = '\lang arabic'
1852 def read_unicodesymbols():
1853 " Read the unicodesymbols list of unicode characters and corresponding commands."
1854 pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
1855 fp = open(os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols'))
1857 for line in fp.readlines():
1859 line=line.replace(' "',' ') # remove all quotation marks with spaces before
1860 line=line.replace('" ',' ') # remove all quotation marks with spaces after
1861 line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis)
1863 # flag1 and flag2 are preamble and other flags
1864 [ucs4,command,flag1,flag2] =line.split(None,3)
1865 spec_chars[unichr(eval(ucs4))] = [command, flag1, flag2]
1873 def revert_unicode(document):
1874 '''Transform unicode characters that can not be written using the
1875 document encoding to commands according to the unicodesymbols
1876 file. Characters that can not be replaced by commands are replaced by
1877 an replacement string. Flags other than 'combined' are currently not
1880 replacement_character = '???'
1881 spec_chars = read_unicodesymbols()
1883 # Define strings to start and end ERT and math insets
1884 ert_intro='\n\n\\begin_inset ERT\nstatus collapsed\n\\begin_layout %s\n\\backslash\n' % document.default_layout
1885 ert_outro='\n\\end_layout\n\n\\end_inset\n\n'
1886 math_intro='\n\\begin_inset Formula $'
1887 math_outro='$\n\\end_inset\n'
1888 # Find unicode characters and replace them
1889 in_ert = False # flag set to 1 if in ERT inset
1890 in_math = False # flag set to 1 if in math inset
1891 temp_file = os.tmpfile()
1892 insets = [] # list of active insets
1893 mod_body = u'' # to store the modified document body
1895 # Go through the file to capture all combining characters
1896 last_char = '' # to store the previous character
1897 body_string = u'' # store the document temporarily as a string
1898 for line in document.body:
1899 body_string = body_string + line +'\n'
1900 [body_string, apa] = body_string.rsplit('\n',1)
1902 body = body_string.split('\n')
1905 if line.find('\\begin_inset') > -1:
1906 # check which inset to start
1907 if line.find('\\begin_inset ERT') > -1:
1909 insets.append('ert')
1910 elif line.find('\\begin_inset Formula') > -1:
1912 insets.append('math')
1914 insets.append('other')
1915 if line.find('\\end_inset') > -1:
1916 # check which inset to end
1918 cur_inset = insets.pop()
1919 if cur_inset == 'ert':
1921 elif cur_inset == 'math':
1924 pass # end of other inset
1926 pass # inset list was empty (for some reason)
1928 # Try to write the line
1930 # If all goes well the line is written here
1931 temp_file.write(line.encode(document.encoding) + '\n')
1932 mod_body = mod_body + line + '\n'
1933 last_char = line[-1]
1935 # Error, some character(s) in the line need to be replaced
1936 for character in line:
1938 # Try to write the character
1939 temp_file.write(character.encode(document.encoding))
1940 mod_body = mod_body + character
1941 last_char = character
1943 # Try to replace with ERT/math inset
1944 if spec_chars.has_key(character):
1945 command = spec_chars[character][0]; # the command to replace unicode
1946 flag1 = spec_chars[character][1]
1947 flag2 = spec_chars[character][2]
1948 if flag1.find('combining') > -1 or flag2.find('combining') > -1:
1949 # We have a character that should be combined with the previous
1950 command = command + '{' +last_char + '}'
1951 # Remove the last character. Ignore if it is whitespace
1952 if len(last_char.rstrip()) > 0:
1953 # last_char was found and is not whitespace
1954 [mod_body, apa] = mod_body.rsplit(last_char,1)
1956 # The last character was replaced by a command. For now it is
1957 # ignored. This could be handled better.
1959 if command[0:2] == '\\\\':
1960 if command[2:12]=='ensuremath':
1963 command = command.replace('\\\\ensuremath{\\\\', '$\n\\backslash\n')
1964 command = command.replace('}', '$\n')
1965 elif in_math == False:
1966 # add a math inset with the replacement character
1967 command = command.replace('\\\\ensuremath{\\', math_intro)
1968 command = command.replace('}', math_outro)
1970 # we are already in a math inset
1971 command = command.replace('\\\\ensuremath{\\', '')
1972 command = command.replace('}', '')
1975 # avoid putting an ERT in a math; instead put command as text
1976 command = command.replace('\\\\', '\mathrm{')
1977 command = command + '}'
1978 elif in_ert == False:
1979 # add an ERT inset with the replacement character
1980 command = command.replace('\\\\', ert_intro)
1981 command = command + ert_outro
1983 command = command.replace('\\\\', '\n\\backslash\n')
1984 last_char = '' # indicate that the character should not be removed
1985 mod_body = mod_body + command
1987 # Replace with replacement string
1988 mod_body = mod_body + replacement_character
1989 [mod_body, apa] = mod_body.rsplit('\n',1)
1990 document.body = mod_body.split('\n')
1998 supported_versions = ["1.5.0","1.5"]
1999 convert = [[246, []],
2000 [247, [convert_font_settings]],
2002 [249, [convert_utf8]],
2005 [252, [convert_commandparams, convert_bibitem]],
2007 [254, [convert_esint]],
2010 [257, [convert_caption]],
2011 [258, [convert_lyxline]],
2012 [259, [convert_accent, normalize_font_whitespace_259]],
2014 [261, [convert_changes]],
2016 [263, [normalize_language_name]],
2017 [264, [convert_cv_textclass]],
2018 [265, [convert_tableborder]],
2024 [271, [convert_ext_font_sizes]],
2027 [274, [normalize_font_whitespace_274]],
2028 [275, [convert_graphics_rotation]],
2029 [276, [convert_arabic]]
2033 [275, [revert_arabic]],
2034 [274, [revert_graphics_rotation]],
2036 [272, [revert_separator_layout]],
2037 [271, [revert_preamble_listings_params, revert_listings_inset, revert_include_listings]],
2038 [270, [revert_ext_font_sizes]],
2039 [269, [revert_beamer_alert, revert_beamer_structure]],
2040 [268, [revert_preamble_listings_params, revert_listings_inset, revert_include_listings]],
2041 [267, [revert_CJK]],
2042 [266, [revert_utf8plain]],
2043 [265, [revert_armenian]],
2044 [264, [revert_tableborder]],
2045 [263, [revert_cv_textclass]],
2046 [262, [revert_language_name]],
2047 [261, [revert_ascii]],
2049 [259, [revert_utf8x]],
2052 [256, [revert_caption]],
2053 [255, [revert_encodings]],
2054 [254, [revert_clearpage, revert_cleardoublepage]],
2055 [253, [revert_esint]],
2056 [252, [revert_nomenclature, revert_printnomenclature]],
2057 [251, [revert_commandparams]],
2058 [250, [revert_cs_label]],
2060 [248, [revert_accent, revert_utf8, revert_unicode]],
2061 [247, [revert_booktabs]],
2062 [246, [revert_font_settings]],
2063 [245, [revert_framed]]]
2066 if __name__ == "__main__":