1 # This file is part of lyx2lyx
2 # -*- coding: utf-8 -*-
3 # Copyright (C) 2006 José Matos <jamatos@lyx.org>
4 # Copyright (C) 2004-2006 Georg Baum <Georg.Baum@post.rwth-aachen.de>
6 # This program is free software; you can redistribute it and/or
7 # modify it under the terms of the GNU General Public License
8 # as published by the Free Software Foundation; either version 2
9 # of the License, or (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 """ Convert files to the file format generated by lyx 1.5"""
26 from parser_tools import find_re, find_token, find_token_backwards, find_token_exact, find_tokens, find_end_of, get_value, find_beginning_of, find_nonempty_line
27 from lyx2lyx_tools import insert_document_option
28 from LyX import get_encoding
30 # Provide support for both python 2 and 3
31 PY2 = sys.version_info[0] == 2
37 # End of code to support for both python 2 and 3
39 ####################################################################
40 # Private helper functions
42 def find_end_of_inset(lines, i):
43 " Find end of inset, where lines[i] is included."
44 return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
46 def find_end_of_layout(lines, i):
47 " Find end of layout, where lines[i] is included."
48 return find_end_of(lines, i, "\\begin_layout", "\\end_layout")
50 def find_beginning_of_layout(lines, i):
51 "Find beginning of layout, where lines[i] is included."
52 return find_beginning_of(lines, i, "\\begin_layout", "\\end_layout")
54 # End of helper functions
55 ####################################################################
59 # Notes: Framed/Shaded
62 def revert_framed(document):
63 "Revert framed notes. "
66 i = find_tokens(document.body, ["\\begin_inset Note Framed", "\\begin_inset Note Shaded"], i)
70 document.body[i] = "\\begin_inset Note"
78 roman_fonts = {'default' : 'default', 'ae' : 'ae',
79 'times' : 'times', 'palatino' : 'palatino',
80 'helvet' : 'default', 'avant' : 'default',
81 'newcent' : 'newcent', 'bookman' : 'bookman',
83 sans_fonts = {'default' : 'default', 'ae' : 'default',
84 'times' : 'default', 'palatino' : 'default',
85 'helvet' : 'helvet', 'avant' : 'avant',
86 'newcent' : 'default', 'bookman' : 'default',
88 typewriter_fonts = {'default' : 'default', 'ae' : 'default',
89 'times' : 'default', 'palatino' : 'default',
90 'helvet' : 'default', 'avant' : 'default',
91 'newcent' : 'default', 'bookman' : 'default',
92 'pslatex' : 'courier'}
94 def convert_font_settings(document):
95 " Convert font settings. "
97 i = find_token_exact(document.header, "\\fontscheme", i)
99 document.warning("Malformed LyX document: Missing `\\fontscheme'.")
101 font_scheme = get_value(document.header, "\\fontscheme", i, i + 1)
102 if font_scheme == '':
103 document.warning("Malformed LyX document: Empty `\\fontscheme'.")
104 font_scheme = 'default'
105 if not font_scheme in list(roman_fonts.keys()):
106 document.warning("Malformed LyX document: Unknown `\\fontscheme' `%s'." % font_scheme)
107 font_scheme = 'default'
108 document.header[i:i+1] = ['\\font_roman %s' % roman_fonts[font_scheme],
109 '\\font_sans %s' % sans_fonts[font_scheme],
110 '\\font_typewriter %s' % typewriter_fonts[font_scheme],
111 '\\font_default_family default',
114 '\\font_sf_scale 100',
115 '\\font_tt_scale 100']
118 def revert_font_settings(document):
119 " Revert font settings. "
122 fonts = {'roman' : 'default', 'sans' : 'default', 'typewriter' : 'default'}
123 for family in 'roman', 'sans', 'typewriter':
124 name = '\\font_%s' % family
125 i = find_token_exact(document.header, name, i)
127 document.warning("Malformed LyX document: Missing `%s'." % name)
130 if (insert_line < 0):
132 fonts[family] = get_value(document.header, name, i, i + 1)
133 del document.header[i]
134 i = find_token_exact(document.header, '\\font_default_family', i)
136 document.warning("Malformed LyX document: Missing `\\font_default_family'.")
137 font_default_family = 'default'
139 font_default_family = get_value(document.header, "\\font_default_family", i, i + 1)
140 del document.header[i]
141 i = find_token_exact(document.header, '\\font_sc', i)
143 document.warning("Malformed LyX document: Missing `\\font_sc'.")
146 font_sc = get_value(document.header, '\\font_sc', i, i + 1)
147 del document.header[i]
148 if font_sc != 'false':
149 document.warning("Conversion of '\\font_sc' not yet implemented.")
150 i = find_token_exact(document.header, '\\font_osf', i)
152 document.warning("Malformed LyX document: Missing `\\font_osf'.")
155 font_osf = get_value(document.header, '\\font_osf', i, i + 1)
156 del document.header[i]
157 i = find_token_exact(document.header, '\\font_sf_scale', i)
159 document.warning("Malformed LyX document: Missing `\\font_sf_scale'.")
160 font_sf_scale = '100'
162 font_sf_scale = get_value(document.header, '\\font_sf_scale', i, i + 1)
163 del document.header[i]
164 if font_sf_scale != '100':
165 document.warning("Conversion of '\\font_sf_scale' not yet implemented.")
166 i = find_token_exact(document.header, '\\font_tt_scale', i)
168 document.warning("Malformed LyX document: Missing `\\font_tt_scale'.")
169 font_tt_scale = '100'
171 font_tt_scale = get_value(document.header, '\\font_tt_scale', i, i + 1)
172 del document.header[i]
173 if font_tt_scale != '100':
174 document.warning("Conversion of '\\font_tt_scale' not yet implemented.")
175 for font_scheme in list(roman_fonts.keys()):
176 if (roman_fonts[font_scheme] == fonts['roman'] and
177 sans_fonts[font_scheme] == fonts['sans'] and
178 typewriter_fonts[font_scheme] == fonts['typewriter']):
179 document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
180 if font_default_family != 'default':
181 document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
182 if font_osf == 'true':
183 document.warning("Ignoring `\\font_osf = true'")
185 font_scheme = 'default'
186 document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
187 if fonts['roman'] == 'cmr':
188 document.preamble.append('\\renewcommand{\\rmdefault}{cmr}')
189 if font_osf == 'true':
190 document.preamble.append('\\usepackage{eco}')
192 for font in 'lmodern', 'charter', 'utopia', 'beraserif', 'ccfonts', 'chancery':
193 if fonts['roman'] == font:
194 document.preamble.append('\\usepackage{%s}' % font)
195 for font in 'cmss', 'lmss', 'cmbr':
196 if fonts['sans'] == font:
197 document.preamble.append('\\renewcommand{\\sfdefault}{%s}' % font)
198 for font in 'berasans':
199 if fonts['sans'] == font:
200 document.preamble.append('\\usepackage{%s}' % font)
201 for font in 'cmtt', 'lmtt', 'cmtl':
202 if fonts['typewriter'] == font:
203 document.preamble.append('\\renewcommand{\\ttdefault}{%s}' % font)
204 for font in 'courier', 'beramono', 'luximono':
205 if fonts['typewriter'] == font:
206 document.preamble.append('\\usepackage{%s}' % font)
207 if font_default_family != 'default':
208 document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
209 if font_osf == 'true':
210 document.warning("Ignoring `\\font_osf = true'")
213 def revert_booktabs(document):
214 " We remove the booktabs flag or everything else will become a mess. "
215 re_row = re.compile(r'^<row.*space="[^"]+".*>$')
216 re_tspace = re.compile(r'\s+topspace="[^"]+"')
217 re_bspace = re.compile(r'\s+bottomspace="[^"]+"')
218 re_ispace = re.compile(r'\s+interlinespace="[^"]+"')
221 i = find_token(document.body, "\\begin_inset Tabular", i)
224 j = find_end_of_inset(document.body, i + 1)
226 document.warning("Malformed LyX document: Could not find end of tabular.")
228 for k in range(i, j):
229 if re.search('^<features.* booktabs="true".*>$', document.body[k]):
230 document.warning("Converting 'booktabs' table to normal table.")
231 document.body[k] = document.body[k].replace(' booktabs="true"', '')
232 if re.search(re_row, document.body[k]):
233 document.warning("Removing extra row space.")
234 document.body[k] = re_tspace.sub('', document.body[k])
235 document.body[k] = re_bspace.sub('', document.body[k])
236 document.body[k] = re_ispace.sub('', document.body[k])
240 def convert_multiencoding(document, forward):
241 """ Fix files with multiple encodings.
242 Files with an inputencoding of "auto" or "default" and multiple languages
243 where at least two languages have different default encodings are encoded
244 in multiple encodings for file formats < 249. These files are incorrectly
245 read and written (as if the whole file was in the encoding of the main
247 This is not true for files written by CJK-LyX, they are always in the locale
251 - converts from fake unicode values to true unicode if forward is true, and
252 - converts from true unicode values to fake unicode if forward is false.
253 document.encoding must be set to the old value (format 248) in both cases.
255 We do this here and not in LyX.py because it is far easier to do the
256 necessary parsing in modern formats than in ancient ones.
258 inset_types = ["Foot", "Note"]
259 if document.cjk_encoding != '':
261 encoding_stack = [document.encoding]
263 lang_re = re.compile(r"^\\lang\s(\S+)")
264 inset_re = re.compile(r"^\\begin_inset\s(\S+)")
265 if not forward: # no need to read file unless we are reverting
266 spec_chars = read_unicodesymbols()
268 if document.inputencoding == "auto" or document.inputencoding == "default":
270 while i < len(document.body):
271 result = lang_re.match(document.body[i])
273 language = result.group(1)
274 if language == "default":
275 document.warning("Resetting encoding from %s to %s." % (encoding_stack[-1], document.encoding), 3)
276 encoding_stack[-1] = document.encoding
278 from lyx2lyx_lang import lang
279 document.warning("Setting encoding from %s to %s." % (encoding_stack[-1], lang[language][3]), 3)
280 encoding_stack[-1] = lang[language][3]
281 elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
282 document.warning("Adding nested encoding %s." % encoding_stack[-1], 3)
283 if len(insets) > 0 and insets[-1] in inset_types:
284 from lyx2lyx_lang import lang
285 encoding_stack.append(lang[document.language][3])
287 encoding_stack.append(encoding_stack[-1])
288 elif find_token(document.body, "\\end_layout", i, i + 1) == i:
289 document.warning("Removing nested encoding %s." % encoding_stack[-1], 3)
290 if len(encoding_stack) == 1:
291 # Don't remove the document encoding from the stack
292 document.warning("Malformed LyX document: Unexpected `\\end_layout'.")
294 del encoding_stack[-1]
295 elif find_token(document.body, "\\begin_inset", i, i + 1) == i:
296 inset_result = inset_re.match(document.body[i])
298 insets.append(inset_result.group(1))
301 elif find_token(document.body, "\\end_inset", i, i + 1) == i:
303 if encoding_stack[-1] != document.encoding:
305 # This line has been incorrectly interpreted as if it was
306 # encoded in 'encoding'.
307 # Convert back to the 8bit string that was in the file.
308 orig = document.body[i].encode(document.encoding)
309 # Convert the 8bit string that was in the file to unicode
310 # with the correct encoding.
311 document.body[i] = orig.decode(encoding_stack[-1])
314 # Convert unicode to the 8bit string that will be written
315 # to the file with the correct encoding.
316 orig = document.body[i].encode(encoding_stack[-1])
317 # Convert the 8bit string that will be written to the
318 # file to fake unicode with the encoding that will later
319 # be used when writing to the file.
320 document.body[i] = orig.decode(document.encoding)
322 mod_line = revert_unicode_line(document, i, insets, spec_chars)
323 document.body[i:i+1] = mod_line.split('\n')
324 i += len(mod_line.split('\n')) - 1
328 def convert_utf8(document):
329 " Set document encoding to UTF-8. "
330 convert_multiencoding(document, True)
331 document.encoding = "utf8"
334 def revert_utf8(document):
335 " Set document encoding to the value corresponding to inputencoding. "
336 i = find_token(document.header, "\\inputencoding", 0)
338 document.header.append("\\inputencoding auto")
339 elif get_value(document.header, "\\inputencoding", i) == "utf8":
340 document.header[i] = "\\inputencoding auto"
341 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
342 document.encoding = get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)
343 convert_multiencoding(document, False)
346 # FIXME: Use the version in unicode_symbols.py which has some bug fixes
347 def read_unicodesymbols():
348 " Read the unicodesymbols list of unicode characters and corresponding commands."
349 pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
350 fp = open(os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols'))
352 for line in fp.readlines():
354 line=line.replace(' "',' ') # remove all quotation marks with spaces before
355 line=line.replace('" ',' ') # remove all quotation marks with spaces after
356 line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis)
358 # flag1 and flag2 are preamble and other flags
359 [ucs4,command,flag1,flag2] =line.split(None,3)
360 spec_chars[unichr(eval(ucs4))] = [command, flag1, flag2]
367 def revert_unicode_line(document, i, insets, spec_chars, replacement_character = '???'):
368 # Define strings to start and end ERT and math insets
369 ert_intro='\n\n\\begin_inset ERT\nstatus collapsed\n\\begin_layout %s' % document.default_layout
370 ert_outro='\n\\end_layout\n\n\\end_inset\n'
371 math_intro='\n\\begin_inset Formula $'
372 math_outro='$\n\\end_inset'
375 if i and not is_inset_line(document, i-1):
376 last_char = document.body[i - 1][-1:]
380 line = document.body[i]
381 for character in line:
383 # Try to write the character
384 dummy = character.encode(document.encoding)
385 mod_line += character
386 last_char = character
388 # Try to replace with ERT/math inset
389 if character in spec_chars:
390 command = spec_chars[character][0] # the command to replace unicode
391 flag1 = spec_chars[character][1]
392 flag2 = spec_chars[character][2]
393 if flag1.find('combining') > -1 or flag2.find('combining') > -1:
394 # We have a character that should be combined with the previous
395 command += '{' + last_char + '}'
396 # Remove the last character. Ignore if it is whitespace
397 if len(last_char.rstrip()):
398 # last_char was found and is not whitespace
400 mod_line = mod_line[:-1]
401 else: # last_char belongs to the last line
402 document.body[i-1] = document.body[i-1][:-1]
404 # The last character was replaced by a command. For now it is
405 # ignored. This could be handled better.
407 if command[0:2] == '\\\\':
408 if command[2:12]=='ensuremath':
409 if insets and insets[-1] == "ERT":
411 command = command.replace('\\\\ensuremath{\\\\', '$\n\\backslash\n')
412 command = command.replace('}', '$\n')
413 elif not insets or insets[-1] != "Formula":
414 # add a math inset with the replacement character
415 command = command.replace('\\\\ensuremath{\\', math_intro)
416 command = command.replace('}', math_outro)
418 # we are already in a math inset
419 command = command.replace('\\\\ensuremath{\\', '')
420 command = command.replace('}', '')
422 if insets and insets[-1] == "Formula":
423 # avoid putting an ERT in a math; instead put command as text
424 command = command.replace('\\\\', '\mathrm{')
425 command = command + '}'
426 elif not insets or insets[-1] != "ERT":
427 # add an ERT inset with the replacement character
428 command = command.replace('\\\\', '\n\\backslash\n')
429 command = ert_intro + command + ert_outro
431 command = command.replace('\\\\', '\n\\backslash\n')
432 last_char = '' # indicate that the character should not be removed
435 # Replace with replacement string
436 mod_line += replacement_character
440 def revert_unicode(document):
441 '''Transform unicode characters that can not be written using the
442 document encoding to commands according to the unicodesymbols
443 file. Characters that can not be replaced by commands are replaced by
444 an replacement string. Flags other than 'combined' are currently not
446 spec_chars = read_unicodesymbols()
447 insets = [] # list of active insets
449 # Go through the document to capture all combining characters
451 while i < len(document.body):
452 line = document.body[i]
454 if line.find('\\begin_inset') > -1:
455 insets.append(line[13:].split()[0])
456 if line.find('\\end_inset') > -1:
459 # Try to write the line
461 # If all goes well the line is written here
462 dummy = line.encode(document.encoding)
465 # Error, some character(s) in the line need to be replaced
466 mod_line = revert_unicode_line(document, i, insets, spec_chars)
467 document.body[i:i+1] = mod_line.split('\n')
468 i += len(mod_line.split('\n'))
471 def revert_cs_label(document):
472 " Remove status flag of charstyle label. "
475 i = find_token(document.body, "\\begin_inset CharStyle", i)
478 # Seach for a line starting 'show_label'
479 # If it is not there, break with a warning message
482 if (document.body[i][:10] == "show_label"):
485 elif (document.body[i][:13] == "\\begin_layout"):
486 document.warning("Malformed LyX document: Missing 'show_label'.")
493 def convert_bibitem(document):
495 \bibitem [option]{argument}
499 \begin_inset LatexCommand bibitem
505 This must be called after convert_commandparams.
509 i = find_token(document.body, "\\bibitem", i)
512 j = document.body[i].find('[') + 1
513 k = document.body[i].rfind(']')
514 if j == 0: # No optional argument found
517 option = document.body[i][j:k]
518 j = document.body[i].rfind('{') + 1
519 k = document.body[i].rfind('}')
520 argument = document.body[i][j:k]
521 lines = ['\\begin_inset LatexCommand bibitem']
523 lines.append('label "%s"' % option.replace('"', '\\"'))
524 lines.append('key "%s"' % argument.replace('"', '\\"'))
526 lines.append('\\end_inset')
527 document.body[i:i+1] = lines
531 commandparams_info = {
532 # command : [option1, option2, argument]
533 "bibitem" : ["label", "", "key"],
534 "bibtex" : ["options", "btprint", "bibfiles"],
535 "cite" : ["after", "before", "key"],
536 "citet" : ["after", "before", "key"],
537 "citep" : ["after", "before", "key"],
538 "citealt" : ["after", "before", "key"],
539 "citealp" : ["after", "before", "key"],
540 "citeauthor" : ["after", "before", "key"],
541 "citeyear" : ["after", "before", "key"],
542 "citeyearpar" : ["after", "before", "key"],
543 "citet*" : ["after", "before", "key"],
544 "citep*" : ["after", "before", "key"],
545 "citealt*" : ["after", "before", "key"],
546 "citealp*" : ["after", "before", "key"],
547 "citeauthor*" : ["after", "before", "key"],
548 "Citet" : ["after", "before", "key"],
549 "Citep" : ["after", "before", "key"],
550 "Citealt" : ["after", "before", "key"],
551 "Citealp" : ["after", "before", "key"],
552 "Citeauthor" : ["after", "before", "key"],
553 "Citet*" : ["after", "before", "key"],
554 "Citep*" : ["after", "before", "key"],
555 "Citealt*" : ["after", "before", "key"],
556 "Citealp*" : ["after", "before", "key"],
557 "Citeauthor*" : ["after", "before", "key"],
558 "citefield" : ["after", "before", "key"],
559 "citetitle" : ["after", "before", "key"],
560 "cite*" : ["after", "before", "key"],
561 "hfill" : ["", "", ""],
562 "index" : ["", "", "name"],
563 "printindex" : ["", "", "name"],
564 "label" : ["", "", "name"],
565 "eqref" : ["name", "", "reference"],
566 "pageref" : ["name", "", "reference"],
567 "prettyref" : ["name", "", "reference"],
568 "ref" : ["name", "", "reference"],
569 "vpageref" : ["name", "", "reference"],
570 "vref" : ["name", "", "reference"],
571 "tableofcontents" : ["", "", "type"],
572 "htmlurl" : ["name", "", "target"],
573 "url" : ["name", "", "target"]}
576 def convert_commandparams(document):
579 \begin_inset LatexCommand \cmdname[opt1][opt2]{arg}
584 \begin_inset LatexCommand cmdname
590 name1, name2 and name3 can be different for each command.
592 # \begin_inset LatexCommand bibitem was not the official version (see
593 # convert_bibitem()), but could be read in, so we convert it here, too.
597 i = find_token(document.body, "\\begin_inset LatexCommand", i)
600 command = document.body[i][26:].strip()
602 document.warning("Malformed LyX document: Missing LatexCommand name.")
606 j = find_token(document.body, "\\end_inset", i + 1)
608 document.warning("Malformed document")
610 command += "".join(document.body[i+1:j])
611 document.body[i+1:j] = []
613 # The following parser is taken from the original InsetCommandParams::scanCommand
619 # Used to handle things like \command[foo[bar]]{foo{bar}}
623 if ((state == "CMDNAME" and c == ' ') or
624 (state == "CMDNAME" and c == '[') or
625 (state == "CMDNAME" and c == '{')):
627 if ((state == "OPTION" and c == ']') or
628 (state == "SECOPTION" and c == ']') or
629 (state == "CONTENT" and c == '}')):
633 nestdepth = nestdepth - 1
634 if ((state == "OPTION" and c == '[') or
635 (state == "SECOPTION" and c == '[') or
636 (state == "CONTENT" and c == '{')):
637 nestdepth = nestdepth + 1
638 if state == "CMDNAME":
640 elif state == "OPTION":
642 elif state == "SECOPTION":
644 elif state == "CONTENT":
649 elif c == '[' and b != ']':
651 nestdepth = 0 # Just to be sure
652 elif c == '[' and b == ']':
654 nestdepth = 0 # Just to be sure
657 nestdepth = 0 # Just to be sure
660 # Now we have parsed the command, output the parameters
661 lines = ["\\begin_inset LatexCommand %s" % name]
663 if commandparams_info[name][0] == "":
664 document.warning("Ignoring invalid option `%s' of command `%s'." % (option1, name))
666 lines.append('%s "%s"' % (commandparams_info[name][0], option1.replace('\\', '\\\\').replace('"', '\\"')))
668 if commandparams_info[name][1] == "":
669 document.warning("Ignoring invalid second option `%s' of command `%s'." % (option2, name))
671 lines.append('%s "%s"' % (commandparams_info[name][1], option2.replace('\\', '\\\\').replace('"', '\\"')))
673 if commandparams_info[name][2] == "":
674 document.warning("Ignoring invalid argument `%s' of command `%s'." % (argument, name))
676 lines.append('%s "%s"' % (commandparams_info[name][2], argument.replace('\\', '\\\\').replace('"', '\\"')))
677 document.body[i:i+1] = lines
681 def revert_commandparams(document):
682 regex = re.compile(r'(\S+)\s+(.+)')
685 i = find_token(document.body, "\\begin_inset LatexCommand", i)
688 name = document.body[i].split()[2]
689 j = find_end_of_inset(document.body, i)
694 for k in range(i + 1, j):
695 match = re.match(regex, document.body[k])
697 pname = match.group(1)
698 pvalue = match.group(2)
699 if pname == "preview":
700 preview_line = document.body[k]
701 elif (commandparams_info[name][0] != "" and
702 pname == commandparams_info[name][0]):
703 option1 = pvalue.strip('"').replace('\\"', '"').replace('\\\\', '\\')
704 elif (commandparams_info[name][1] != "" and
705 pname == commandparams_info[name][1]):
706 option2 = pvalue.strip('"').replace('\\"', '"').replace('\\\\', '\\')
707 elif (commandparams_info[name][2] != "" and
708 pname == commandparams_info[name][2]):
709 argument = pvalue.strip('"').replace('\\"', '"').replace('\\\\', '\\')
710 elif document.body[k].strip() != "":
711 document.warning("Ignoring unknown contents `%s' in command inset %s." % (document.body[k], name))
712 if name == "bibitem":
714 lines = ["\\bibitem {%s}" % argument]
716 lines = ["\\bibitem [%s]{%s}" % (option1, argument)]
720 lines = ["\\begin_inset LatexCommand \\%s{%s}" % (name, argument)]
722 lines = ["\\begin_inset LatexCommand \\%s[][%s]{%s}" % (name, option2, argument)]
725 lines = ["\\begin_inset LatexCommand \\%s[%s]{%s}" % (name, option1, argument)]
727 lines = ["\\begin_inset LatexCommand \\%s[%s][%s]{%s}" % (name, option1, option2, argument)]
728 if name != "bibitem":
729 if preview_line != "":
730 lines.append(preview_line)
732 lines.append('\\end_inset')
733 document.body[i:j+1] = lines
737 def revert_nomenclature(document):
738 " Convert nomenclature entry to ERT. "
739 regex = re.compile(r'(\S+)\s+(.+)')
743 i = find_token(document.body, "\\begin_inset LatexCommand nomenclature", i)
747 j = find_end_of_inset(document.body, i + 1)
752 for k in range(i + 1, j):
753 match = re.match(regex, document.body[k])
755 name = match.group(1)
756 value = match.group(2)
757 if name == "preview":
758 preview_line = document.body[k]
759 elif name == "symbol":
760 symbol = value.strip('"').replace('\\"', '"')
761 elif name == "description":
762 description = value.strip('"').replace('\\"', '"')
763 elif name == "prefix":
764 prefix = value.strip('"').replace('\\"', '"')
765 elif document.body[k].strip() != "":
766 document.warning("Ignoring unknown contents `%s' in nomenclature inset." % document.body[k])
768 command = 'nomenclature{%s}{%s}' % (symbol, description)
770 command = 'nomenclature[%s]{%s}{%s}' % (prefix, symbol, description)
771 document.body[i:j+1] = ['\\begin_inset ERT',
774 '\\begin_layout %s' % document.default_layout,
783 if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
784 document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
785 document.preamble.append('\\makenomenclature')
788 def revert_printnomenclature(document):
789 " Convert printnomenclature to ERT. "
790 regex = re.compile(r'(\S+)\s+(.+)')
794 i = find_token(document.body, "\\begin_inset LatexCommand printnomenclature", i)
798 j = find_end_of_inset(document.body, i + 1)
801 for k in range(i + 1, j):
802 match = re.match(regex, document.body[k])
804 name = match.group(1)
805 value = match.group(2)
806 if name == "preview":
807 preview_line = document.body[k]
808 elif name == "labelwidth":
809 labelwidth = value.strip('"').replace('\\"', '"')
810 elif document.body[k].strip() != "":
811 document.warning("Ignoring unknown contents `%s' in printnomenclature inset." % document.body[k])
813 command = 'nomenclature{}'
815 command = 'nomenclature[%s]' % labelwidth
816 document.body[i:j+1] = ['\\begin_inset ERT',
819 '\\begin_layout %s' % document.default_layout,
828 if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
829 document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
830 document.preamble.append('\\makenomenclature')
833 def convert_esint(document):
834 " Add \\use_esint setting to header. "
835 i = find_token(document.header, "\\cite_engine", 0)
837 document.warning("Malformed LyX document: Missing `\\cite_engine'.")
839 # 0 is off, 1 is auto, 2 is on.
840 document.header.insert(i, '\\use_esint 0')
843 def revert_esint(document):
844 " Remove \\use_esint setting from header. "
845 i = find_token(document.header, "\\use_esint", 0)
847 document.warning("Malformed LyX document: Missing `\\use_esint'.")
849 use_esint = document.header[i].split()[1]
850 del document.header[i]
851 # 0 is off, 1 is auto, 2 is on.
853 document.preamble.append('\\usepackage{esint}')
856 def revert_clearpage(document):
860 i = find_token(document.body, "\\clearpage", i)
863 document.body[i:i+1] = ['\\begin_inset ERT',
866 '\\begin_layout %s' % document.default_layout,
877 def revert_cleardoublepage(document):
878 " cleardoublepage -> ERT "
881 i = find_token(document.body, "\\cleardoublepage", i)
884 document.body[i:i+1] = ['\\begin_inset ERT',
887 '\\begin_layout %s' % document.default_layout,
898 def convert_lyxline(document):
899 " remove fontsize commands for \lyxline "
900 # The problematic is: The old \lyxline definition doesn't handle the fontsize
901 # to change the line thickness. The new definiton does this so that imported
902 # \lyxlines would have a different line thickness. The eventual fontsize command
903 # before \lyxline is therefore removed to get the same output.
904 fontsizes = ["tiny", "scriptsize", "footnotesize", "small", "normalsize",
905 "large", "Large", "LARGE", "huge", "Huge"]
906 for n in range(0, len(fontsizes)):
909 while i < len(document.body):
910 i = find_token(document.body, "\\size " + fontsizes[n], i)
911 k = find_token(document.body, "\\lyxline", i)
912 # the corresponding fontsize command is always 2 lines before the \lyxline
913 if (i != -1 and k == i+2):
914 document.body[i:i+1] = []
920 def revert_encodings(document):
921 " Set new encodings to auto. "
922 encodings = ["8859-6", "8859-8", "cp437", "cp437de", "cp850", "cp852",
923 "cp855", "cp858", "cp862", "cp865", "cp866", "cp1250",
924 "cp1252", "cp1256", "cp1257", "latin10", "pt254", "tis620-0"]
925 i = find_token(document.header, "\\inputencoding", 0)
927 document.header.append("\\inputencoding auto")
929 inputenc = get_value(document.header, "\\inputencoding", i)
930 if inputenc in encodings:
931 document.header[i] = "\\inputencoding auto"
932 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
935 def convert_caption(document):
936 " Convert caption layouts to caption insets. "
939 i = find_token(document.body, "\\begin_layout Caption", i)
942 j = find_end_of_layout(document.body, i)
944 document.warning("Malformed LyX document: Missing `\\end_layout'.")
947 document.body[j:j] = ["\\end_layout", "", "\\end_inset", "", ""]
948 document.body[i:i+1] = ["\\begin_layout %s" % document.default_layout,
949 "\\begin_inset Caption", "",
950 "\\begin_layout %s" % document.default_layout]
954 def revert_caption(document):
955 " Convert caption insets to caption layouts. "
956 " This assumes that the text class has a caption style. "
959 i = find_token(document.body, "\\begin_inset Caption", i)
963 # We either need to delete the previous \begin_layout line, or we
964 # need to end the previous layout if this inset is not in the first
965 # position of the paragraph.
966 layout_before = find_token_backwards(document.body, "\\begin_layout", i)
967 if layout_before == -1:
968 document.warning("Malformed LyX document: Missing `\\begin_layout'.")
970 layout_line = document.body[layout_before]
971 del_layout_before = True
972 l = layout_before + 1
974 if document.body[l] != "":
975 del_layout_before = False
978 if del_layout_before:
979 del document.body[layout_before:i]
982 document.body[i:i] = ["\\end_layout", ""]
985 # Find start of layout in the inset and end of inset
986 j = find_token(document.body, "\\begin_layout", i)
988 document.warning("Malformed LyX document: Missing `\\begin_layout'.")
990 k = find_end_of_inset(document.body, i)
992 document.warning("Malformed LyX document: Missing `\\end_inset'.")
995 # We either need to delete the following \end_layout line, or we need
996 # to restart the old layout if this inset is not at the paragraph end.
997 layout_after = find_token(document.body, "\\end_layout", k)
998 if layout_after == -1:
999 document.warning("Malformed LyX document: Missing `\\end_layout'.")
1001 del_layout_after = True
1003 while l < layout_after:
1004 if document.body[l] != "":
1005 del_layout_after = False
1008 if del_layout_after:
1009 del document.body[k+1:layout_after+1]
1011 document.body[k+1:k+1] = [layout_line, ""]
1013 # delete \begin_layout and \end_inset and replace \begin_inset with
1014 # "\begin_layout Caption". This works because we can only have one
1015 # paragraph in the caption inset: The old \end_layout will be recycled.
1016 del document.body[k]
1017 if document.body[k] == "":
1018 del document.body[k]
1019 del document.body[j]
1020 if document.body[j] == "":
1021 del document.body[j]
1022 document.body[i] = "\\begin_layout Caption"
1023 if document.body[i+1] == "":
1024 del document.body[i+1]
1028 # Accents of InsetLaTeXAccent
1030 "`" : u'\u0300', # grave
1031 "'" : u'\u0301', # acute
1032 "^" : u'\u0302', # circumflex
1033 "~" : u'\u0303', # tilde
1034 "=" : u'\u0304', # macron
1035 "u" : u'\u0306', # breve
1036 "." : u'\u0307', # dot above
1037 "\"": u'\u0308', # diaeresis
1038 "r" : u'\u030a', # ring above
1039 "H" : u'\u030b', # double acute
1040 "v" : u'\u030c', # caron
1041 "b" : u'\u0320', # minus sign below
1042 "d" : u'\u0323', # dot below
1043 "c" : u'\u0327', # cedilla
1044 "k" : u'\u0328', # ogonek
1045 "t" : u'\u0361' # tie. This is special: It spans two characters, but
1046 # only one is given as argument, so we don't need to
1047 # treat it differently.
1051 # special accents of InsetLaTeXAccent without argument
1052 special_accent_map = {
1053 'i' : u'\u0131', # dotless i
1054 'j' : u'\u0237', # dotless j
1055 'l' : u'\u0142', # l with stroke
1056 'L' : u'\u0141' # L with stroke
1060 # special accent arguments of InsetLaTeXAccent
1062 '\\i' : u'\u0131', # dotless i
1063 '\\j' : u'\u0237' # dotless j
1067 def _convert_accent(accent, accented_char):
1069 char = accented_char
1071 if type in special_accent_map:
1072 return special_accent_map[type]
1073 # a missing char is treated as space by LyX
1075 elif type == 'q' and char in ['t', 'd', 'l', 'L']:
1076 # Special caron, only used with t, d, l and L.
1077 # It is not in the map because we convert it to the same unicode
1078 # character as the normal caron: \q{} is only defined if babel with
1079 # the czech or slovak language is used, and the normal caron
1080 # produces the correct output if the T1 font encoding is used.
1081 # For the same reason we never convert to \q{} in the other direction.
1083 elif char in accented_map:
1084 char = accented_map[char]
1085 elif (len(char) > 1):
1086 # We can only convert accents on a single char
1088 a = accent_map.get(type)
1090 return unicodedata.normalize("NFC", "%s%s" % (char, a))
1094 def convert_ertbackslash(body, i, ert, default_layout):
1095 r""" -------------------------------------------------------------------------------------------
1096 Convert backslashes and '\n' into valid ERT code, append the converted
1097 text to body[i] and return the (maybe incremented) line index i"""
1101 body[i] = body[i] + '\\backslash '
1105 body[i+1:i+1] = ['\\end_layout', '', '\\begin_layout %s' % default_layout, '']
1108 body[i] = body[i] + c
1112 def convert_accent(document):
1113 # The following forms are supported by LyX:
1114 # '\i \"{a}' (standard form, as written by LyX)
1115 # '\i \"{}' (standard form, as written by LyX if the accented char is a space)
1116 # '\i \"{ }' (also accepted if the accented char is a space)
1117 # '\i \" a' (also accepted)
1118 # '\i \"' (also accepted)
1119 re_wholeinset = re.compile(r'^(.*)(\\i\s+)(.*)$')
1120 re_contents = re.compile(r'^([^\s{]+)(.*)$')
1121 re_accentedcontents = re.compile(r'^\s*{?([^{}]*)}?\s*$')
1124 i = find_re(document.body, re_wholeinset, i)
1127 match = re_wholeinset.match(document.body[i])
1128 prefix = match.group(1)
1129 contents = match.group(3).strip()
1130 match = re_contents.match(contents)
1132 # Strip first char (always \)
1133 accent = match.group(1)[1:]
1134 accented_contents = match.group(2).strip()
1135 match = re_accentedcontents.match(accented_contents)
1136 accented_char = match.group(1)
1137 converted = _convert_accent(accent, accented_char)
1139 # Normalize contents
1140 contents = '%s{%s}' % (accent, accented_char),
1142 document.body[i] = '%s%s' % (prefix, converted)
1145 document.warning("Converting unknown InsetLaTeXAccent `\\i %s' to ERT." % contents)
1146 document.body[i] = prefix
1147 document.body[i+1:i+1] = ['\\begin_inset ERT',
1150 '\\begin_layout %s' % document.default_layout,
1154 i = convert_ertbackslash(document.body, i + 7,
1156 document.default_layout)
1157 document.body[i+1:i+1] = ['\\end_layout',
1163 def is_inset_line(document, i):
1164 """ Line i of body has an inset """
1165 if document.body[i][:1] == '\\':
1167 last_tokens = "".join(document.body[i].split()[-2:])
1168 return last_tokens.find('\\') != -1
1171 # A wrapper around normalize that handles special cases (cf. bug 3313)
1172 def normalize(form, text):
1173 # do not normalize OHM, ANGSTROM
1174 keep_characters = [0x2126,0x212b]
1178 if ord(i) in keep_characters:
1179 if len(convert) > 0:
1180 result = result + unicodedata.normalize(form, convert)
1184 convert = convert + i
1185 if len(convert) > 0:
1186 result = result + unicodedata.normalize(form, convert)
1190 def revert_accent(document):
1191 inverse_accent_map = {}
1192 for k in accent_map:
1193 inverse_accent_map[accent_map[k]] = k
1194 inverse_special_accent_map = {}
1195 for k in special_accent_map:
1196 inverse_special_accent_map[special_accent_map[k]] = k
1197 inverse_accented_map = {}
1198 for k in accented_map:
1199 inverse_accented_map[accented_map[k]] = k
1201 # Since LyX may insert a line break within a word we must combine all
1202 # words before unicode normalization.
1203 # We do this only if the next line starts with an accent, otherwise we
1204 # would create things like '\begin_inset ERTstatus'.
1205 for i in range(len(document.body) - 1):
1206 if document.body[i] == '' or document.body[i+1] == '' or document.body[i][-1] == ' ':
1208 if (document.body[i+1][0] in inverse_accent_map and not is_inset_line(document, i)):
1209 # the last character of this line and the first of the next line
1210 # form probably a surrogate pair, inline insets are excluded (second part of the test)
1211 while (len(document.body[i+1]) > 0 and document.body[i+1][0] != ' '):
1212 document.body[i] += document.body[i+1][0]
1213 document.body[i+1] = document.body[i+1][1:]
1215 # Normalize to "Normal form D" (NFD, also known as canonical decomposition).
1216 # This is needed to catch all accented characters.
1217 for i in range(len(document.body)):
1218 # Unfortunately we have a mixture of unicode strings and plain strings,
1219 # because we never use u'xxx' for string literals, but 'xxx'.
1220 # Therefore we may have to try two times to normalize the data.
1222 document.body[i] = normalize("NFD", document.body[i])
1224 document.body[i] = normalize("NFD", text_type(document.body[i], 'utf-8'))
1226 # Replace accented characters with InsetLaTeXAccent
1227 # Do not convert characters that can be represented in the chosen
1229 encoding_stack = [get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)]
1230 lang_re = re.compile(r"^\\lang\s(\S+)")
1233 while i < len(document.body):
1234 if (document.inputencoding == "auto" or document.inputencoding == "default") and document.cjk_encoding != '':
1235 # Track the encoding of the current line
1236 result = lang_re.match(document.body[i])
1238 language = result.group(1)
1239 if language == "default":
1240 encoding_stack[-1] = document.encoding
1242 from lyx2lyx_lang import lang
1243 encoding_stack[-1] = lang[language][3]
1245 elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
1246 encoding_stack.append(encoding_stack[-1])
1248 elif find_token(document.body, "\\end_layout", i, i + 1) == i:
1249 del encoding_stack[-1]
1252 for j in range(len(document.body[i])):
1253 # dotless i and dotless j are both in special_accent_map and can
1254 # occur as an accented character, so we need to test that the
1255 # following character is no accent
1256 if (document.body[i][j] in inverse_special_accent_map and
1257 (j == len(document.body[i]) - 1 or document.body[i][j+1] not in inverse_accent_map)):
1258 accent = document.body[i][j]
1260 dummy = accent.encode(encoding_stack[-1])
1261 except UnicodeEncodeError:
1262 # Insert the rest of the line as new line
1263 if j < len(document.body[i]) - 1:
1264 document.body.insert(i+1, document.body[i][j+1:])
1265 # Delete the accented character
1266 document.body[i] = document.body[i][:j]
1267 # Finally add the InsetLaTeXAccent
1268 document.body[i] += "\\i \\%s{}" % inverse_special_accent_map[accent]
1270 elif j > 0 and document.body[i][j] in inverse_accent_map:
1271 accented_char = document.body[i][j-1]
1272 if accented_char == ' ':
1273 # Conform to LyX output
1275 elif accented_char in inverse_accented_map:
1276 accented_char = inverse_accented_map[accented_char]
1277 accent = document.body[i][j]
1279 dummy = normalize("NFC", accented_char + accent).encode(encoding_stack[-1])
1280 except UnicodeEncodeError:
1281 # Insert the rest of the line as new line
1282 if j < len(document.body[i]) - 1:
1283 document.body.insert(i+1, document.body[i][j+1:])
1284 # Delete the accented characters
1285 document.body[i] = document.body[i][:j-1]
1286 # Finally add the InsetLaTeXAccent
1287 document.body[i] += "\\i \\%s{%s}" % (inverse_accent_map[accent], accented_char)
1291 # Normalize to "Normal form C" (NFC, pre-composed characters) again
1292 for i in range(len(document.body)):
1293 document.body[i] = normalize("NFC", document.body[i])
1296 def normalize_font_whitespace_259(document):
1297 """ Before format 259 the font changes were ignored if a
1298 whitespace was the first or last character in the sequence, this function
1299 transfers the whitespace outside."""
1301 char_properties = {"\\series": "default",
1302 "\\emph": "default",
1304 "\\shape": "default",
1306 "\\family": "default"}
1307 return normalize_font_whitespace(document, char_properties)
1309 def normalize_font_whitespace_274(document):
1310 """ Before format 259 (sic) the font changes were ignored if a
1311 whitespace was the first or last character in the sequence. This was
1312 corrected for most font properties in format 259, but the language
1313 was forgotten then. This function applies the same conversion done
1314 there (namely, transfers the whitespace outside) for font language
1315 changes, as well."""
1317 char_properties = {"\\lang": "default"}
1318 return normalize_font_whitespace(document, char_properties)
1320 def get_paragraph_language(document, i):
1321 """ Return the language of the paragraph in which line i of the document
1322 body is. If the first thing in the paragraph is a \\lang command, that
1323 is the paragraph's langauge; otherwise, the paragraph's language is the
1324 document's language."""
1326 lines = document.body
1328 first_nonempty_line = \
1329 find_nonempty_line(lines, find_beginning_of_layout(lines, i) + 1)
1331 words = lines[first_nonempty_line].split()
1333 if len(words) > 1 and words[0] == "\\lang":
1336 return document.language
1338 def normalize_font_whitespace(document, char_properties):
1339 """ Before format 259 the font changes were ignored if a
1340 whitespace was the first or last character in the sequence, this function
1341 transfers the whitespace outside. Only a change in one of the properties
1342 in the provided char_properties is handled by this function."""
1344 if document.backend != "latex":
1347 lines = document.body
1352 while i < len(lines):
1353 words = lines[i].split()
1355 if len(words) > 0 and words[0] == "\\begin_layout":
1356 # a new paragraph resets all font changes
1358 # also reset the default language to be the paragraph's language
1359 if "\\lang" in list(char_properties.keys()):
1360 char_properties["\\lang"] = \
1361 get_paragraph_language(document, i + 1)
1363 elif len(words) > 1 and words[0] in list(char_properties.keys()):
1364 # we have a font change
1365 if char_properties[words[0]] == words[1]:
1366 # property gets reset
1367 if words[0] in list(changes.keys()):
1368 del changes[words[0]]
1369 defaultproperty = True
1372 changes[words[0]] = words[1]
1373 defaultproperty = False
1375 # We need to explicitly reset all changed properties if we find
1376 # a space below, because LyX 1.4 would output the space after
1377 # closing the previous change and before starting the new one,
1378 # and closing a font change means to close all properties, not
1379 # just the changed one.
1381 if lines[i-1] and lines[i-1][-1] == " ":
1382 lines[i-1] = lines[i-1][:-1]
1383 # a space before the font change
1385 for k in list(changes.keys()):
1386 # exclude property k because that is already in lines[i]
1388 added_lines[1:1] = ["%s %s" % (k, changes[k])]
1389 for k in list(changes.keys()):
1390 # exclude property k because that must be added below anyway
1392 added_lines[0:0] = ["%s %s" % (k, char_properties[k])]
1394 # Property is reset in lines[i], so add the new stuff afterwards
1395 lines[i+1:i+1] = added_lines
1397 # Reset property for the space
1398 added_lines[0:0] = ["%s %s" % (words[0], char_properties[words[0]])]
1399 lines[i:i] = added_lines
1400 i = i + len(added_lines)
1402 elif lines[i+1] and lines[i+1][0] == " " and (len(changes) > 0 or not defaultproperty):
1403 # a space after the font change
1404 if (lines[i+1] == " " and lines[i+2]):
1405 next_words = lines[i+2].split()
1406 if len(next_words) > 0 and next_words[0] == words[0]:
1407 # a single blank with a property different from the
1408 # previous and the next line must not be changed
1411 lines[i+1] = lines[i+1][1:]
1413 for k in list(changes.keys()):
1414 # exclude property k because that is already in lines[i]
1416 added_lines[1:1] = ["%s %s" % (k, changes[k])]
1417 for k in list(changes.keys()):
1418 # exclude property k because that must be added below anyway
1420 added_lines[0:0] = ["%s %s" % (k, char_properties[k])]
1421 # Reset property for the space
1422 added_lines[0:0] = ["%s %s" % (words[0], char_properties[words[0]])]
1423 lines[i:i] = added_lines
1424 i = i + len(added_lines)
1429 def revert_utf8x(document):
1430 " Set utf8x encoding to utf8. "
1431 i = find_token(document.header, "\\inputencoding", 0)
1433 document.header.append("\\inputencoding auto")
1435 inputenc = get_value(document.header, "\\inputencoding", i)
1436 if inputenc == "utf8x":
1437 document.header[i] = "\\inputencoding utf8"
1438 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1441 def revert_utf8plain(document):
1442 " Set utf8plain encoding to utf8. "
1443 i = find_token(document.header, "\\inputencoding", 0)
1445 document.header.append("\\inputencoding auto")
1447 inputenc = get_value(document.header, "\\inputencoding", i)
1448 if inputenc == "utf8-plain":
1449 document.header[i] = "\\inputencoding utf8"
1450 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1453 def revert_beamer_alert(document):
1454 " Revert beamer's \\alert inset back to ERT. "
1457 i = find_token(document.body, "\\begin_inset CharStyle Alert", i)
1460 document.body[i] = "\\begin_inset ERT"
1463 if (document.body[i][:13] == "\\begin_layout"):
1464 # Insert the \alert command
1465 document.body[i + 1] = "\\alert{" + document.body[i + 1] + '}'
1472 def revert_beamer_structure(document):
1473 " Revert beamer's \\structure inset back to ERT. "
1476 i = find_token(document.body, "\\begin_inset CharStyle Structure", i)
1479 document.body[i] = "\\begin_inset ERT"
1482 if (document.body[i][:13] == "\\begin_layout"):
1483 document.body[i + 1] = "\\structure{" + document.body[i + 1] + '}'
1490 def convert_changes(document):
1491 " Switch output_changes off if tracking_changes is off. "
1492 i = find_token(document.header, '\\tracking_changes', 0)
1494 document.warning("Malformed lyx document: Missing '\\tracking_changes'.")
1496 j = find_token(document.header, '\\output_changes', 0)
1498 document.warning("Malformed lyx document: Missing '\\output_changes'.")
1500 tracking_changes = get_value(document.header, "\\tracking_changes", i)
1501 output_changes = get_value(document.header, "\\output_changes", j)
1502 if tracking_changes == "false" and output_changes == "true":
1503 document.header[j] = "\\output_changes false"
1506 def revert_ascii(document):
1507 " Set ascii encoding to auto. "
1508 i = find_token(document.header, "\\inputencoding", 0)
1510 document.header.append("\\inputencoding auto")
1512 inputenc = get_value(document.header, "\\inputencoding", i)
1513 if inputenc == "ascii":
1514 document.header[i] = "\\inputencoding auto"
1515 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1518 def normalize_language_name(document):
1519 lang = { "brazil": "brazilian",
1520 "portuges": "portuguese"}
1522 if document.language in lang:
1523 document.language = lang[document.language]
1524 i = find_token(document.header, "\\language", 0)
1525 document.header[i] = "\\language %s" % document.language
1528 def revert_language_name(document):
1529 lang = { "brazilian": "brazil",
1530 "portuguese": "portuges"}
1532 if document.language in lang:
1533 document.language = lang[document.language]
1534 i = find_token(document.header, "\\language", 0)
1535 document.header[i] = "\\language %s" % document.language
1538 # \textclass cv -> \textclass simplecv
1539 def convert_cv_textclass(document):
1540 if document.textclass == "cv":
1541 document.textclass = "simplecv"
1544 def revert_cv_textclass(document):
1545 if document.textclass == "simplecv":
1546 document.textclass = "cv"
1550 # add scaleBeforeRotation graphics param
1551 def convert_graphics_rotation(document):
1552 " add scaleBeforeRotation graphics parameter. "
1555 i = find_token(document.body, "\\begin_inset Graphics", i)
1558 j = find_end_of_inset(document.body, i+1)
1561 document.warning("Malformed LyX document: Could not find end of graphics inset.")
1562 # Seach for rotateAngle and width or height or scale
1563 # If these params are not there, nothing needs to be done.
1564 k = find_token(document.body, "\trotateAngle", i + 1, j)
1565 l = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j)
1566 if (k != -1 and l != -1):
1567 document.body.insert(j, 'scaleBeforeRotation')
1572 # remove scaleBeforeRotation graphics param
1573 def revert_graphics_rotation(document):
1574 " remove scaleBeforeRotation graphics parameter. "
1577 i = find_token(document.body, "\\begin_inset Graphics", i)
1580 j = find_end_of_inset(document.body, i + 1)
1583 document.warning("Malformed LyX document: Could not find end of graphics inset.")
1584 # If there's a scaleBeforeRotation param, just remove that
1585 k = find_token(document.body, "\tscaleBeforeRotation", i + 1, j)
1587 del document.body[k]
1589 # if not, and if we have rotateAngle and width or height or scale,
1590 # we have to put the rotateAngle value to special
1591 rotateAngle = get_value(document.body, 'rotateAngle', i + 1, j)
1592 special = get_value(document.body, 'special', i + 1, j)
1593 if rotateAngle != "":
1594 k = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j)
1598 document.body.insert(j-1, '\tspecial angle=%s' % rotateAngle)
1600 l = find_token(document.body, "\tspecial", i + 1, j)
1601 document.body[l] = document.body[l].replace(special, 'angle=%s,%s' % (rotateAngle, special))
1602 k = find_token(document.body, "\trotateAngle", i + 1, j)
1604 del document.body[k]
1609 def convert_tableborder(document):
1610 # The problem is: LyX doubles the table cell border as it ignores the "|" character in
1611 # the cell arguments. A fix takes care of this and therefore the "|" has to be removed
1613 while i < len(document.body):
1614 h = document.body[i].find("leftline=\"true\"", 0, len(document.body[i]))
1615 k = document.body[i].find("|>{", 0, len(document.body[i]))
1616 # the two tokens have to be in one line
1617 if (h != -1 and k != -1):
1619 document.body[i] = document.body[i][:k] + document.body[i][k+1:len(document.body[i])]
1623 def revert_tableborder(document):
1625 while i < len(document.body):
1626 h = document.body[i].find("leftline=\"true\"", 0, len(document.body[i]))
1627 k = document.body[i].find(">{", 0, len(document.body[i]))
1628 # the two tokens have to be in one line
1629 if (h != -1 and k != -1):
1631 document.body[i] = document.body[i][:k] + '|' + document.body[i][k:]
1635 def revert_armenian(document):
1637 # set inputencoding from armscii8 to auto
1638 if document.inputencoding == "armscii8":
1639 i = find_token(document.header, "\\inputencoding", 0)
1641 document.header[i] = "\\inputencoding auto"
1642 # check if preamble exists, if not k is set to -1
1645 while i < len(document.preamble):
1647 k = document.preamble[i].find("\\", 0, len(document.preamble[i]))
1649 k = document.preamble[i].find("%", 0, len(document.preamble[i]))
1651 # add the entry \usepackage{armtex} to the document preamble
1652 if document.language == "armenian":
1653 # set the armtex entry as the first preamble line
1655 document.preamble[0:0] = ["\\usepackage{armtex}"]
1656 # create the preamble when it doesn't exist
1658 document.preamble.append('\\usepackage{armtex}')
1659 # Set document language from armenian to english
1660 if document.language == "armenian":
1661 document.language = "english"
1662 i = find_token(document.header, "\\language", 0)
1664 document.header[i] = "\\language english"
1667 def revert_CJK(document):
1668 " Set CJK encodings to default and languages chinese, japanese and korean to english. "
1669 encodings = ["Bg5", "Bg5+", "GB", "GBt", "GBK", "JIS",
1670 "KS", "SJIS", "UTF8", "EUC-TW", "EUC-JP"]
1671 i = find_token(document.header, "\\inputencoding", 0)
1673 document.header.append("\\inputencoding auto")
1675 inputenc = get_value(document.header, "\\inputencoding", i)
1676 if inputenc in encodings:
1677 document.header[i] = "\\inputencoding default"
1678 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1680 if document.language == "chinese-simplified" or \
1681 document.language == "chinese-traditional" or \
1682 document.language == "japanese" or document.language == "korean":
1683 document.language = "english"
1684 i = find_token(document.header, "\\language", 0)
1686 document.header[i] = "\\language english"
1689 def revert_preamble_listings_params(document):
1690 " Revert preamble option \listings_params "
1691 i = find_token(document.header, "\\listings_params", 0)
1693 document.preamble.append('\\usepackage{listings}')
1694 document.preamble.append('\\lstset{%s}' % document.header[i].split()[1].strip('"'))
1695 document.header.pop(i);
1698 def revert_listings_inset(document):
1699 r''' Revert listings inset to \lstinline or \begin, \end lstlisting, translate
1703 lstparams "language=Delphi"
1707 \begin_layout Standard
1717 \begin_layout Standard
1721 lstinline[language=Delphi]{var i = 10;}
1726 There can be an caption inset in this inset
1728 \begin_layout Standard
1729 \begin_inset Caption
1731 \begin_layout Standard
1733 \begin_inset LatexCommand label
1749 i = find_token(document.body, '\\begin_inset listings', i)
1753 if not '\\usepackage{listings}' in document.preamble:
1754 document.preamble.append('\\usepackage{listings}')
1755 j = find_end_of_inset(document.body, i + 1)
1757 # this should not happen
1763 for line in range(i + 1, i + 4):
1764 if document.body[line].startswith('inline'):
1765 inline = document.body[line].split()[1]
1766 if document.body[line].startswith('lstparams'):
1767 params = document.body[line].split()[1].strip('"')
1768 if document.body[line].startswith('status'):
1769 status = document.body[line].split()[1].strip()
1774 cap = find_token(document.body, '\\begin_inset Caption', i)
1776 cap_end = find_end_of_inset(document.body, cap + 1)
1778 # this should not happen
1781 lbl = find_token(document.body, '\\begin_inset LatexCommand label', cap + 1)
1783 lbl_end = find_end_of_inset(document.body, lbl + 1)
1785 # this should not happen
1790 for line in document.body[lbl : lbl_end + 1]:
1791 if line.startswith('name '):
1792 label = line.split()[1].strip('"')
1794 for line in document.body[cap : lbl ] + document.body[lbl_end + 1 : cap_end + 1]:
1795 if not line.startswith('\\'):
1796 caption += line.strip()
1799 # looking for the oneline code for lstinline
1800 inlinecode = document.body[find_end_of_layout(document.body,
1801 find_token(document.body, '\\begin_layout %s' % document.default_layout, i + 1) +1 ) - 1]
1802 if len(caption) > 0:
1803 if len(params) == 0:
1804 params = 'caption={%s}' % caption
1806 params += ',caption={%s}' % caption
1808 if len(params) == 0:
1809 params = 'label={%s}' % label
1811 params += ',label={%s}' % label
1813 params = '[%s]' % params
1814 params = params.replace('\\', '\\backslash\n')
1815 if inline == 'true':
1816 document.body[i:(j+1)] = [r'\begin_inset ERT',
1817 'status %s' % status,
1818 r'\begin_layout %s' % document.default_layout,
1822 'lstinline%s{%s}' % (params, inlinecode),
1827 document.body[i: j+1] = [r'\begin_inset ERT',
1828 'status %s' % status,
1830 r'\begin_layout %s' % document.default_layout,
1834 r'begin{lstlisting}%s' % params,
1837 r'\begin_layout %s' % document.default_layout,
1838 ] + document.body[k : j - 1] + \
1840 r'\begin_layout %s' % document.default_layout,
1849 def revert_include_listings(document):
1850 r''' Revert lstinputlisting Include option , translate
1851 \begin_inset Include \lstinputlisting{file}[opt]
1861 \begin_layout Standard
1865 lstinputlisting{file}[opt]
1873 i = find_token(document.body, r'\begin_inset Include \lstinputlisting', i)
1877 if not '\\usepackage{listings}' in document.preamble:
1878 document.preamble.append('\\usepackage{listings}')
1879 j = find_end_of_inset(document.body, i + 1)
1881 # this should not happen
1883 # find command line lstinputlisting{file}[options]
1884 cmd, file, option = '', '', ''
1885 if re.match(r'\\(lstinputlisting){([.\w]*)}(.*)', document.body[i].split()[2]):
1886 cmd, file, option = re.match(r'\\(lstinputlisting){([.\w]*)}(.*)', document.body[i].split()[2]).groups()
1887 option = option.replace('\\', '\\backslash\n')
1888 document.body[i : j + 1] = [r'\begin_inset ERT',
1891 r'\begin_layout %s' % document.default_layout,
1895 '%s%s{%s}' % (cmd, option, file),
1901 def revert_ext_font_sizes(document):
1902 if document.backend != "latex": return
1903 if not document.textclass.startswith("ext"): return
1905 fontsize = get_value(document.header, '\\paperfontsize', 0)
1906 if fontsize not in ('10', '11', '12'): return
1909 i = find_token(document.header, '\\paperfontsize', 0)
1910 document.header[i] = '\\paperfontsize default'
1911 insert_document_option(document, fontsize)
1914 def convert_ext_font_sizes(document):
1915 if document.backend != "latex": return
1916 if not document.textclass.startswith("ext"): return
1918 fontsize = get_value(document.header, '\\paperfontsize', 0)
1919 if fontsize != 'default': return
1921 i = find_token(document.header, '\\options', 0)
1924 options = get_value(document.header, '\\options', i)
1926 fontsizes = '10pt', '11pt', '12pt'
1927 for fs in fontsizes:
1928 if options.find(fs) != -1:
1930 else: # this else will only be attained if the for cycle had no match
1933 options = options.split(',')
1934 for j, opt in enumerate(options):
1935 if opt in fontsizes:
1942 k = find_token(document.header, '\\paperfontsize', 0)
1943 document.header[k] = '\\paperfontsize %s' % fontsize
1946 document.header[i] = '\\options %s' % ','.join(options)
1948 del document.header[i]
1951 def revert_separator_layout(document):
1952 r'''Revert --Separator-- to a lyx note
1955 \begin_layout --Separator--
1961 \begin_layout Standard
1962 \begin_inset Note Note
1965 \begin_layout Standard
1978 i = find_token(document.body, r'\begin_layout --Separator--', i)
1981 j = find_end_of_layout(document.body, i + 1)
1983 # this should not happen
1985 document.body[i : j + 1] = [r'\begin_layout %s' % document.default_layout,
1986 r'\begin_inset Note Note',
1989 r'\begin_layout %s' % document.default_layout,
1990 'Separate Environment',
1994 document.body[ i + 1 : j] + \
2000 def convert_arabic (document):
2001 if document.language == "arabic":
2002 document.language = "arabic_arabtex"
2003 i = find_token(document.header, "\\language", 0)
2005 document.header[i] = "\\language arabic_arabtex"
2007 while i < len(document.body):
2008 h = document.body[i].find("\lang arabic", 0, len(document.body[i]))
2010 # change the language name
2011 document.body[i] = '\lang arabic_arabtex'
2015 def revert_arabic (document):
2016 if document.language == "arabic_arabtex":
2017 document.language = "arabic"
2018 i = find_token(document.header, "\\language", 0)
2020 document.header[i] = "\\language arabic"
2022 while i < len(document.body):
2023 h = document.body[i].find("\lang arabic_arabtex", 0, len(document.body[i]))
2025 # change the language name
2026 document.body[i] = '\lang arabic'
2034 supported_versions = ["1.5.0","1.5"]
2035 convert = [[246, []],
2036 [247, [convert_font_settings]],
2038 [249, [convert_utf8]],
2041 [252, [convert_commandparams, convert_bibitem]],
2043 [254, [convert_esint]],
2046 [257, [convert_caption]],
2047 [258, [convert_lyxline]],
2048 [259, [convert_accent, normalize_font_whitespace_259]],
2050 [261, [convert_changes]],
2052 [263, [normalize_language_name]],
2053 [264, [convert_cv_textclass]],
2054 [265, [convert_tableborder]],
2060 [271, [convert_ext_font_sizes]],
2063 [274, [normalize_font_whitespace_274]],
2064 [275, [convert_graphics_rotation]],
2065 [276, [convert_arabic]]
2069 [275, [revert_arabic]],
2070 [274, [revert_graphics_rotation]],
2072 [272, [revert_separator_layout]],
2073 [271, [revert_preamble_listings_params, revert_listings_inset, revert_include_listings]],
2074 [270, [revert_ext_font_sizes]],
2075 [269, [revert_beamer_alert, revert_beamer_structure]],
2076 [268, [revert_preamble_listings_params, revert_listings_inset, revert_include_listings]],
2077 [267, [revert_CJK]],
2078 [266, [revert_utf8plain]],
2079 [265, [revert_armenian]],
2080 [264, [revert_tableborder]],
2081 [263, [revert_cv_textclass]],
2082 [262, [revert_language_name]],
2083 [261, [revert_ascii]],
2085 [259, [revert_utf8x]],
2088 [256, [revert_caption]],
2089 [255, [revert_encodings]],
2090 [254, [revert_clearpage, revert_cleardoublepage]],
2091 [253, [revert_esint]],
2092 [252, [revert_nomenclature, revert_printnomenclature]],
2093 [251, [revert_commandparams]],
2094 [250, [revert_cs_label]],
2096 [248, [revert_accent, revert_utf8, revert_unicode]],
2097 [247, [revert_booktabs]],
2098 [246, [revert_font_settings]],
2099 [245, [revert_framed]]]
2102 if __name__ == "__main__":