1 # This file is part of lyx2lyx
2 # -*- coding: utf-8 -*-
3 # Copyright (C) 2006 José Matos <jamatos@lyx.org>
4 # Copyright (C) 2004-2006 Georg Baum <Georg.Baum@post.rwth-aachen.de>
6 # This program is free software; you can redistribute it and/or
7 # modify it under the terms of the GNU General Public License
8 # as published by the Free Software Foundation; either version 2
9 # of the License, or (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 """ Convert files to the file format generated by lyx 1.5"""
26 from parser_tools import find_re, find_token, find_token_backwards, find_token_exact, find_tokens, find_end_of, get_value, find_beginning_of, find_nonempty_line
27 from LyX import get_encoding
29 # Provide support for both python 2 and 3
30 PY2 = sys.version_info[0] == 2
36 # End of code to support for both python 2 and 3
38 ####################################################################
39 # Private helper functions
41 def find_end_of_inset(lines, i):
42 " Find end of inset, where lines[i] is included."
43 return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
45 def find_end_of_layout(lines, i):
46 " Find end of layout, where lines[i] is included."
47 return find_end_of(lines, i, "\\begin_layout", "\\end_layout")
49 def find_beginning_of_layout(lines, i):
50 "Find beginning of layout, where lines[i] is included."
51 return find_beginning_of(lines, i, "\\begin_layout", "\\end_layout")
53 # End of helper functions
54 ####################################################################
58 # Notes: Framed/Shaded
61 def revert_framed(document):
62 "Revert framed notes. "
65 i = find_tokens(document.body, ["\\begin_inset Note Framed", "\\begin_inset Note Shaded"], i)
69 document.body[i] = "\\begin_inset Note"
77 roman_fonts = {'default' : 'default', 'ae' : 'ae',
78 'times' : 'times', 'palatino' : 'palatino',
79 'helvet' : 'default', 'avant' : 'default',
80 'newcent' : 'newcent', 'bookman' : 'bookman',
82 sans_fonts = {'default' : 'default', 'ae' : 'default',
83 'times' : 'default', 'palatino' : 'default',
84 'helvet' : 'helvet', 'avant' : 'avant',
85 'newcent' : 'default', 'bookman' : 'default',
87 typewriter_fonts = {'default' : 'default', 'ae' : 'default',
88 'times' : 'default', 'palatino' : 'default',
89 'helvet' : 'default', 'avant' : 'default',
90 'newcent' : 'default', 'bookman' : 'default',
91 'pslatex' : 'courier'}
93 def convert_font_settings(document):
94 " Convert font settings. "
96 i = find_token_exact(document.header, "\\fontscheme", i)
98 document.warning("Malformed LyX document: Missing `\\fontscheme'.")
100 font_scheme = get_value(document.header, "\\fontscheme", i, i + 1)
101 if font_scheme == '':
102 document.warning("Malformed LyX document: Empty `\\fontscheme'.")
103 font_scheme = 'default'
104 if not font_scheme in list(roman_fonts.keys()):
105 document.warning("Malformed LyX document: Unknown `\\fontscheme' `%s'." % font_scheme)
106 font_scheme = 'default'
107 document.header[i:i+1] = ['\\font_roman %s' % roman_fonts[font_scheme],
108 '\\font_sans %s' % sans_fonts[font_scheme],
109 '\\font_typewriter %s' % typewriter_fonts[font_scheme],
110 '\\font_default_family default',
113 '\\font_sf_scale 100',
114 '\\font_tt_scale 100']
117 def revert_font_settings(document):
118 " Revert font settings. "
121 fonts = {'roman' : 'default', 'sans' : 'default', 'typewriter' : 'default'}
122 for family in 'roman', 'sans', 'typewriter':
123 name = '\\font_%s' % family
124 i = find_token_exact(document.header, name, i)
126 document.warning("Malformed LyX document: Missing `%s'." % name)
129 if (insert_line < 0):
131 fonts[family] = get_value(document.header, name, i, i + 1)
132 del document.header[i]
133 i = find_token_exact(document.header, '\\font_default_family', i)
135 document.warning("Malformed LyX document: Missing `\\font_default_family'.")
136 font_default_family = 'default'
138 font_default_family = get_value(document.header, "\\font_default_family", i, i + 1)
139 del document.header[i]
140 i = find_token_exact(document.header, '\\font_sc', i)
142 document.warning("Malformed LyX document: Missing `\\font_sc'.")
145 font_sc = get_value(document.header, '\\font_sc', i, i + 1)
146 del document.header[i]
147 if font_sc != 'false':
148 document.warning("Conversion of '\\font_sc' not yet implemented.")
149 i = find_token_exact(document.header, '\\font_osf', i)
151 document.warning("Malformed LyX document: Missing `\\font_osf'.")
154 font_osf = get_value(document.header, '\\font_osf', i, i + 1)
155 del document.header[i]
156 i = find_token_exact(document.header, '\\font_sf_scale', i)
158 document.warning("Malformed LyX document: Missing `\\font_sf_scale'.")
159 font_sf_scale = '100'
161 font_sf_scale = get_value(document.header, '\\font_sf_scale', i, i + 1)
162 del document.header[i]
163 if font_sf_scale != '100':
164 document.warning("Conversion of '\\font_sf_scale' not yet implemented.")
165 i = find_token_exact(document.header, '\\font_tt_scale', i)
167 document.warning("Malformed LyX document: Missing `\\font_tt_scale'.")
168 font_tt_scale = '100'
170 font_tt_scale = get_value(document.header, '\\font_tt_scale', i, i + 1)
171 del document.header[i]
172 if font_tt_scale != '100':
173 document.warning("Conversion of '\\font_tt_scale' not yet implemented.")
174 for font_scheme in list(roman_fonts.keys()):
175 if (roman_fonts[font_scheme] == fonts['roman'] and
176 sans_fonts[font_scheme] == fonts['sans'] and
177 typewriter_fonts[font_scheme] == fonts['typewriter']):
178 document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
179 if font_default_family != 'default':
180 document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
181 if font_osf == 'true':
182 document.warning("Ignoring `\\font_osf = true'")
184 font_scheme = 'default'
185 document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
186 if fonts['roman'] == 'cmr':
187 document.preamble.append('\\renewcommand{\\rmdefault}{cmr}')
188 if font_osf == 'true':
189 document.preamble.append('\\usepackage{eco}')
191 for font in 'lmodern', 'charter', 'utopia', 'beraserif', 'ccfonts', 'chancery':
192 if fonts['roman'] == font:
193 document.preamble.append('\\usepackage{%s}' % font)
194 for font in 'cmss', 'lmss', 'cmbr':
195 if fonts['sans'] == font:
196 document.preamble.append('\\renewcommand{\\sfdefault}{%s}' % font)
197 for font in 'berasans':
198 if fonts['sans'] == font:
199 document.preamble.append('\\usepackage{%s}' % font)
200 for font in 'cmtt', 'lmtt', 'cmtl':
201 if fonts['typewriter'] == font:
202 document.preamble.append('\\renewcommand{\\ttdefault}{%s}' % font)
203 for font in 'courier', 'beramono', 'luximono':
204 if fonts['typewriter'] == font:
205 document.preamble.append('\\usepackage{%s}' % font)
206 if font_default_family != 'default':
207 document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
208 if font_osf == 'true':
209 document.warning("Ignoring `\\font_osf = true'")
212 def revert_booktabs(document):
213 " We remove the booktabs flag or everything else will become a mess. "
214 re_row = re.compile(r'^<row.*space="[^"]+".*>$')
215 re_tspace = re.compile(r'\s+topspace="[^"]+"')
216 re_bspace = re.compile(r'\s+bottomspace="[^"]+"')
217 re_ispace = re.compile(r'\s+interlinespace="[^"]+"')
220 i = find_token(document.body, "\\begin_inset Tabular", i)
223 j = find_end_of_inset(document.body, i + 1)
225 document.warning("Malformed LyX document: Could not find end of tabular.")
227 for k in range(i, j):
228 if re.search('^<features.* booktabs="true".*>$', document.body[k]):
229 document.warning("Converting 'booktabs' table to normal table.")
230 document.body[k] = document.body[k].replace(' booktabs="true"', '')
231 if re.search(re_row, document.body[k]):
232 document.warning("Removing extra row space.")
233 document.body[k] = re_tspace.sub('', document.body[k])
234 document.body[k] = re_bspace.sub('', document.body[k])
235 document.body[k] = re_ispace.sub('', document.body[k])
239 def convert_multiencoding(document, forward):
240 """ Fix files with multiple encodings.
241 Files with an inputencoding of "auto" or "default" and multiple languages
242 where at least two languages have different default encodings are encoded
243 in multiple encodings for file formats < 249. These files are incorrectly
244 read and written (as if the whole file was in the encoding of the main
246 This is not true for files written by CJK-LyX, they are always in the locale
250 - converts from fake unicode values to true unicode if forward is true, and
251 - converts from true unicode values to fake unicode if forward is false.
252 document.encoding must be set to the old value (format 248) in both cases.
254 We do this here and not in LyX.py because it is far easier to do the
255 necessary parsing in modern formats than in ancient ones.
257 inset_types = ["Foot", "Note"]
258 if document.cjk_encoding != '':
260 encoding_stack = [document.encoding]
262 lang_re = re.compile(r"^\\lang\s(\S+)")
263 inset_re = re.compile(r"^\\begin_inset\s(\S+)")
264 if not forward: # no need to read file unless we are reverting
265 spec_chars = read_unicodesymbols()
267 if document.inputencoding == "auto" or document.inputencoding == "default":
269 while i < len(document.body):
270 result = lang_re.match(document.body[i])
272 language = result.group(1)
273 if language == "default":
274 document.warning("Resetting encoding from %s to %s." % (encoding_stack[-1], document.encoding), 3)
275 encoding_stack[-1] = document.encoding
277 from lyx2lyx_lang import lang
278 document.warning("Setting encoding from %s to %s." % (encoding_stack[-1], lang[language][3]), 3)
279 encoding_stack[-1] = lang[language][3]
280 elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
281 document.warning("Adding nested encoding %s." % encoding_stack[-1], 3)
282 if len(insets) > 0 and insets[-1] in inset_types:
283 from lyx2lyx_lang import lang
284 encoding_stack.append(lang[document.language][3])
286 encoding_stack.append(encoding_stack[-1])
287 elif find_token(document.body, "\\end_layout", i, i + 1) == i:
288 document.warning("Removing nested encoding %s." % encoding_stack[-1], 3)
289 if len(encoding_stack) == 1:
290 # Don't remove the document encoding from the stack
291 document.warning("Malformed LyX document: Unexpected `\\end_layout'.")
293 del encoding_stack[-1]
294 elif find_token(document.body, "\\begin_inset", i, i + 1) == i:
295 inset_result = inset_re.match(document.body[i])
297 insets.append(inset_result.group(1))
300 elif find_token(document.body, "\\end_inset", i, i + 1) == i:
302 if encoding_stack[-1] != document.encoding:
304 # This line has been incorrectly interpreted as if it was
305 # encoded in 'encoding'.
306 # Convert back to the 8bit string that was in the file.
307 orig = document.body[i].encode(document.encoding)
308 # Convert the 8bit string that was in the file to unicode
309 # with the correct encoding.
310 document.body[i] = orig.decode(encoding_stack[-1])
313 # Convert unicode to the 8bit string that will be written
314 # to the file with the correct encoding.
315 orig = document.body[i].encode(encoding_stack[-1])
316 # Convert the 8bit string that will be written to the
317 # file to fake unicode with the encoding that will later
318 # be used when writing to the file.
319 document.body[i] = orig.decode(document.encoding)
321 mod_line = revert_unicode_line(document, i, insets, spec_chars)
322 document.body[i:i+1] = mod_line.split('\n')
323 i += len(mod_line.split('\n')) - 1
327 def convert_utf8(document):
328 " Set document encoding to UTF-8. "
329 convert_multiencoding(document, True)
330 document.encoding = "utf8"
333 def revert_utf8(document):
334 " Set document encoding to the value corresponding to inputencoding. "
335 i = find_token(document.header, "\\inputencoding", 0)
337 document.header.append("\\inputencoding auto")
338 elif get_value(document.header, "\\inputencoding", i) == "utf8":
339 document.header[i] = "\\inputencoding auto"
340 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
341 document.encoding = get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)
342 convert_multiencoding(document, False)
345 # FIXME: Use the version in unicode_symbols.py which has some bug fixes
346 def read_unicodesymbols():
347 " Read the unicodesymbols list of unicode characters and corresponding commands."
348 pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
349 fp = open(os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols'))
351 for line in fp.readlines():
353 line=line.replace(' "',' ') # remove all quotation marks with spaces before
354 line=line.replace('" ',' ') # remove all quotation marks with spaces after
355 line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis)
357 # flag1 and flag2 are preamble and other flags
358 [ucs4,command,flag1,flag2] =line.split(None,3)
359 spec_chars[unichr(eval(ucs4))] = [command, flag1, flag2]
366 def revert_unicode_line(document, i, insets, spec_chars, replacement_character = '???'):
367 # Define strings to start and end ERT and math insets
368 ert_intro='\n\n\\begin_inset ERT\nstatus collapsed\n\\begin_layout %s' % document.default_layout
369 ert_outro='\n\\end_layout\n\n\\end_inset\n'
370 math_intro='\n\\begin_inset Formula $'
371 math_outro='$\n\\end_inset'
374 if i and not is_inset_line(document, i-1):
375 last_char = document.body[i - 1][-1:]
379 line = document.body[i]
380 for character in line:
382 # Try to write the character
383 dummy = character.encode(document.encoding)
384 mod_line += character
385 last_char = character
387 # Try to replace with ERT/math inset
388 if character in spec_chars:
389 command = spec_chars[character][0] # the command to replace unicode
390 flag1 = spec_chars[character][1]
391 flag2 = spec_chars[character][2]
392 if flag1.find('combining') > -1 or flag2.find('combining') > -1:
393 # We have a character that should be combined with the previous
394 command += '{' + last_char + '}'
395 # Remove the last character. Ignore if it is whitespace
396 if len(last_char.rstrip()):
397 # last_char was found and is not whitespace
399 mod_line = mod_line[:-1]
400 else: # last_char belongs to the last line
401 document.body[i-1] = document.body[i-1][:-1]
403 # The last character was replaced by a command. For now it is
404 # ignored. This could be handled better.
406 if command[0:2] == '\\\\':
407 if command[2:12]=='ensuremath':
408 if insets and insets[-1] == "ERT":
410 command = command.replace('\\\\ensuremath{\\\\', '$\n\\backslash\n')
411 command = command.replace('}', '$\n')
412 elif not insets or insets[-1] != "Formula":
413 # add a math inset with the replacement character
414 command = command.replace('\\\\ensuremath{\\', math_intro)
415 command = command.replace('}', math_outro)
417 # we are already in a math inset
418 command = command.replace('\\\\ensuremath{\\', '')
419 command = command.replace('}', '')
421 if insets and insets[-1] == "Formula":
422 # avoid putting an ERT in a math; instead put command as text
423 command = command.replace('\\\\', '\mathrm{')
424 command = command + '}'
425 elif not insets or insets[-1] != "ERT":
426 # add an ERT inset with the replacement character
427 command = command.replace('\\\\', '\n\\backslash\n')
428 command = ert_intro + command + ert_outro
430 command = command.replace('\\\\', '\n\\backslash\n')
431 last_char = '' # indicate that the character should not be removed
434 # Replace with replacement string
435 mod_line += replacement_character
439 def revert_unicode(document):
440 '''Transform unicode characters that can not be written using the
441 document encoding to commands according to the unicodesymbols
442 file. Characters that can not be replaced by commands are replaced by
443 an replacement string. Flags other than 'combined' are currently not
445 spec_chars = read_unicodesymbols()
446 insets = [] # list of active insets
448 # Go through the document to capture all combining characters
450 while i < len(document.body):
451 line = document.body[i]
453 if line.find('\\begin_inset') > -1:
454 insets.append(line[13:].split()[0])
455 if line.find('\\end_inset') > -1:
458 # Try to write the line
460 # If all goes well the line is written here
461 dummy = line.encode(document.encoding)
464 # Error, some character(s) in the line need to be replaced
465 mod_line = revert_unicode_line(document, i, insets, spec_chars)
466 document.body[i:i+1] = mod_line.split('\n')
467 i += len(mod_line.split('\n'))
470 def revert_cs_label(document):
471 " Remove status flag of charstyle label. "
474 i = find_token(document.body, "\\begin_inset CharStyle", i)
477 # Seach for a line starting 'show_label'
478 # If it is not there, break with a warning message
481 if (document.body[i][:10] == "show_label"):
484 elif (document.body[i][:13] == "\\begin_layout"):
485 document.warning("Malformed LyX document: Missing 'show_label'.")
492 def convert_bibitem(document):
494 \bibitem [option]{argument}
498 \begin_inset LatexCommand bibitem
504 This must be called after convert_commandparams.
508 i = find_token(document.body, "\\bibitem", i)
511 j = document.body[i].find('[') + 1
512 k = document.body[i].rfind(']')
513 if j == 0: # No optional argument found
516 option = document.body[i][j:k]
517 j = document.body[i].rfind('{') + 1
518 k = document.body[i].rfind('}')
519 argument = document.body[i][j:k]
520 lines = ['\\begin_inset LatexCommand bibitem']
522 lines.append('label "%s"' % option.replace('"', '\\"'))
523 lines.append('key "%s"' % argument.replace('"', '\\"'))
525 lines.append('\\end_inset')
526 document.body[i:i+1] = lines
530 commandparams_info = {
531 # command : [option1, option2, argument]
532 "bibitem" : ["label", "", "key"],
533 "bibtex" : ["options", "btprint", "bibfiles"],
534 "cite" : ["after", "before", "key"],
535 "citet" : ["after", "before", "key"],
536 "citep" : ["after", "before", "key"],
537 "citealt" : ["after", "before", "key"],
538 "citealp" : ["after", "before", "key"],
539 "citeauthor" : ["after", "before", "key"],
540 "citeyear" : ["after", "before", "key"],
541 "citeyearpar" : ["after", "before", "key"],
542 "citet*" : ["after", "before", "key"],
543 "citep*" : ["after", "before", "key"],
544 "citealt*" : ["after", "before", "key"],
545 "citealp*" : ["after", "before", "key"],
546 "citeauthor*" : ["after", "before", "key"],
547 "Citet" : ["after", "before", "key"],
548 "Citep" : ["after", "before", "key"],
549 "Citealt" : ["after", "before", "key"],
550 "Citealp" : ["after", "before", "key"],
551 "Citeauthor" : ["after", "before", "key"],
552 "Citet*" : ["after", "before", "key"],
553 "Citep*" : ["after", "before", "key"],
554 "Citealt*" : ["after", "before", "key"],
555 "Citealp*" : ["after", "before", "key"],
556 "Citeauthor*" : ["after", "before", "key"],
557 "citefield" : ["after", "before", "key"],
558 "citetitle" : ["after", "before", "key"],
559 "cite*" : ["after", "before", "key"],
560 "hfill" : ["", "", ""],
561 "index" : ["", "", "name"],
562 "printindex" : ["", "", "name"],
563 "label" : ["", "", "name"],
564 "eqref" : ["name", "", "reference"],
565 "pageref" : ["name", "", "reference"],
566 "prettyref" : ["name", "", "reference"],
567 "ref" : ["name", "", "reference"],
568 "vpageref" : ["name", "", "reference"],
569 "vref" : ["name", "", "reference"],
570 "tableofcontents" : ["", "", "type"],
571 "htmlurl" : ["name", "", "target"],
572 "url" : ["name", "", "target"]}
575 def convert_commandparams(document):
578 \begin_inset LatexCommand \cmdname[opt1][opt2]{arg}
583 \begin_inset LatexCommand cmdname
589 name1, name2 and name3 can be different for each command.
591 # \begin_inset LatexCommand bibitem was not the official version (see
592 # convert_bibitem()), but could be read in, so we convert it here, too.
596 i = find_token(document.body, "\\begin_inset LatexCommand", i)
599 command = document.body[i][26:].strip()
601 document.warning("Malformed LyX document: Missing LatexCommand name.")
605 j = find_token(document.body, "\\end_inset", i + 1)
607 document.warning("Malformed document")
609 command += "".join(document.body[i+1:j])
610 document.body[i+1:j] = []
612 # The following parser is taken from the original InsetCommandParams::scanCommand
618 # Used to handle things like \command[foo[bar]]{foo{bar}}
622 if ((state == "CMDNAME" and c == ' ') or
623 (state == "CMDNAME" and c == '[') or
624 (state == "CMDNAME" and c == '{')):
626 if ((state == "OPTION" and c == ']') or
627 (state == "SECOPTION" and c == ']') or
628 (state == "CONTENT" and c == '}')):
632 nestdepth = nestdepth - 1
633 if ((state == "OPTION" and c == '[') or
634 (state == "SECOPTION" and c == '[') or
635 (state == "CONTENT" and c == '{')):
636 nestdepth = nestdepth + 1
637 if state == "CMDNAME":
639 elif state == "OPTION":
641 elif state == "SECOPTION":
643 elif state == "CONTENT":
648 elif c == '[' and b != ']':
650 nestdepth = 0 # Just to be sure
651 elif c == '[' and b == ']':
653 nestdepth = 0 # Just to be sure
656 nestdepth = 0 # Just to be sure
659 # Now we have parsed the command, output the parameters
660 lines = ["\\begin_inset LatexCommand %s" % name]
662 if commandparams_info[name][0] == "":
663 document.warning("Ignoring invalid option `%s' of command `%s'." % (option1, name))
665 lines.append('%s "%s"' % (commandparams_info[name][0], option1.replace('\\', '\\\\').replace('"', '\\"')))
667 if commandparams_info[name][1] == "":
668 document.warning("Ignoring invalid second option `%s' of command `%s'." % (option2, name))
670 lines.append('%s "%s"' % (commandparams_info[name][1], option2.replace('\\', '\\\\').replace('"', '\\"')))
672 if commandparams_info[name][2] == "":
673 document.warning("Ignoring invalid argument `%s' of command `%s'." % (argument, name))
675 lines.append('%s "%s"' % (commandparams_info[name][2], argument.replace('\\', '\\\\').replace('"', '\\"')))
676 document.body[i:i+1] = lines
680 def revert_commandparams(document):
681 regex = re.compile(r'(\S+)\s+(.+)')
684 i = find_token(document.body, "\\begin_inset LatexCommand", i)
687 name = document.body[i].split()[2]
688 j = find_end_of_inset(document.body, i)
693 for k in range(i + 1, j):
694 match = re.match(regex, document.body[k])
696 pname = match.group(1)
697 pvalue = match.group(2)
698 if pname == "preview":
699 preview_line = document.body[k]
700 elif (commandparams_info[name][0] != "" and
701 pname == commandparams_info[name][0]):
702 option1 = pvalue.strip('"').replace('\\"', '"').replace('\\\\', '\\')
703 elif (commandparams_info[name][1] != "" and
704 pname == commandparams_info[name][1]):
705 option2 = pvalue.strip('"').replace('\\"', '"').replace('\\\\', '\\')
706 elif (commandparams_info[name][2] != "" and
707 pname == commandparams_info[name][2]):
708 argument = pvalue.strip('"').replace('\\"', '"').replace('\\\\', '\\')
709 elif document.body[k].strip() != "":
710 document.warning("Ignoring unknown contents `%s' in command inset %s." % (document.body[k], name))
711 if name == "bibitem":
713 lines = ["\\bibitem {%s}" % argument]
715 lines = ["\\bibitem [%s]{%s}" % (option1, argument)]
719 lines = ["\\begin_inset LatexCommand \\%s{%s}" % (name, argument)]
721 lines = ["\\begin_inset LatexCommand \\%s[][%s]{%s}" % (name, option2, argument)]
724 lines = ["\\begin_inset LatexCommand \\%s[%s]{%s}" % (name, option1, argument)]
726 lines = ["\\begin_inset LatexCommand \\%s[%s][%s]{%s}" % (name, option1, option2, argument)]
727 if name != "bibitem":
728 if preview_line != "":
729 lines.append(preview_line)
731 lines.append('\\end_inset')
732 document.body[i:j+1] = lines
736 def revert_nomenclature(document):
737 " Convert nomenclature entry to ERT. "
738 regex = re.compile(r'(\S+)\s+(.+)')
742 i = find_token(document.body, "\\begin_inset LatexCommand nomenclature", i)
746 j = find_end_of_inset(document.body, i + 1)
751 for k in range(i + 1, j):
752 match = re.match(regex, document.body[k])
754 name = match.group(1)
755 value = match.group(2)
756 if name == "preview":
757 preview_line = document.body[k]
758 elif name == "symbol":
759 symbol = value.strip('"').replace('\\"', '"')
760 elif name == "description":
761 description = value.strip('"').replace('\\"', '"')
762 elif name == "prefix":
763 prefix = value.strip('"').replace('\\"', '"')
764 elif document.body[k].strip() != "":
765 document.warning("Ignoring unknown contents `%s' in nomenclature inset." % document.body[k])
767 command = 'nomenclature{%s}{%s}' % (symbol, description)
769 command = 'nomenclature[%s]{%s}{%s}' % (prefix, symbol, description)
770 document.body[i:j+1] = ['\\begin_inset ERT',
773 '\\begin_layout %s' % document.default_layout,
782 if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
783 document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
784 document.preamble.append('\\makenomenclature')
787 def revert_printnomenclature(document):
788 " Convert printnomenclature to ERT. "
789 regex = re.compile(r'(\S+)\s+(.+)')
793 i = find_token(document.body, "\\begin_inset LatexCommand printnomenclature", i)
797 j = find_end_of_inset(document.body, i + 1)
800 for k in range(i + 1, j):
801 match = re.match(regex, document.body[k])
803 name = match.group(1)
804 value = match.group(2)
805 if name == "preview":
806 preview_line = document.body[k]
807 elif name == "labelwidth":
808 labelwidth = value.strip('"').replace('\\"', '"')
809 elif document.body[k].strip() != "":
810 document.warning("Ignoring unknown contents `%s' in printnomenclature inset." % document.body[k])
812 command = 'nomenclature{}'
814 command = 'nomenclature[%s]' % labelwidth
815 document.body[i:j+1] = ['\\begin_inset ERT',
818 '\\begin_layout %s' % document.default_layout,
827 if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
828 document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
829 document.preamble.append('\\makenomenclature')
832 def convert_esint(document):
833 " Add \\use_esint setting to header. "
834 i = find_token(document.header, "\\cite_engine", 0)
836 document.warning("Malformed LyX document: Missing `\\cite_engine'.")
838 # 0 is off, 1 is auto, 2 is on.
839 document.header.insert(i, '\\use_esint 0')
842 def revert_esint(document):
843 " Remove \\use_esint setting from header. "
844 i = find_token(document.header, "\\use_esint", 0)
846 document.warning("Malformed LyX document: Missing `\\use_esint'.")
848 use_esint = document.header[i].split()[1]
849 del document.header[i]
850 # 0 is off, 1 is auto, 2 is on.
852 document.preamble.append('\\usepackage{esint}')
855 def revert_clearpage(document):
859 i = find_token(document.body, "\\clearpage", i)
862 document.body[i:i+1] = ['\\begin_inset ERT',
865 '\\begin_layout %s' % document.default_layout,
876 def revert_cleardoublepage(document):
877 " cleardoublepage -> ERT "
880 i = find_token(document.body, "\\cleardoublepage", i)
883 document.body[i:i+1] = ['\\begin_inset ERT',
886 '\\begin_layout %s' % document.default_layout,
897 def convert_lyxline(document):
898 " remove fontsize commands for \lyxline "
899 # The problematic is: The old \lyxline definition doesn't handle the fontsize
900 # to change the line thickness. The new definiton does this so that imported
901 # \lyxlines would have a different line thickness. The eventual fontsize command
902 # before \lyxline is therefore removed to get the same output.
903 fontsizes = ["tiny", "scriptsize", "footnotesize", "small", "normalsize",
904 "large", "Large", "LARGE", "huge", "Huge"]
905 for n in range(0, len(fontsizes)):
908 while i < len(document.body):
909 i = find_token(document.body, "\\size " + fontsizes[n], i)
910 k = find_token(document.body, "\\lyxline", i)
911 # the corresponding fontsize command is always 2 lines before the \lyxline
912 if (i != -1 and k == i+2):
913 document.body[i:i+1] = []
919 def revert_encodings(document):
920 " Set new encodings to auto. "
921 encodings = ["8859-6", "8859-8", "cp437", "cp437de", "cp850", "cp852",
922 "cp855", "cp858", "cp862", "cp865", "cp866", "cp1250",
923 "cp1252", "cp1256", "cp1257", "latin10", "pt254", "tis620-0"]
924 i = find_token(document.header, "\\inputencoding", 0)
926 document.header.append("\\inputencoding auto")
928 inputenc = get_value(document.header, "\\inputencoding", i)
929 if inputenc in encodings:
930 document.header[i] = "\\inputencoding auto"
931 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
934 def convert_caption(document):
935 " Convert caption layouts to caption insets. "
938 i = find_token(document.body, "\\begin_layout Caption", i)
941 j = find_end_of_layout(document.body, i)
943 document.warning("Malformed LyX document: Missing `\\end_layout'.")
946 document.body[j:j] = ["\\end_layout", "", "\\end_inset", "", ""]
947 document.body[i:i+1] = ["\\begin_layout %s" % document.default_layout,
948 "\\begin_inset Caption", "",
949 "\\begin_layout %s" % document.default_layout]
953 def revert_caption(document):
954 " Convert caption insets to caption layouts. "
955 " This assumes that the text class has a caption style. "
958 i = find_token(document.body, "\\begin_inset Caption", i)
962 # We either need to delete the previous \begin_layout line, or we
963 # need to end the previous layout if this inset is not in the first
964 # position of the paragraph.
965 layout_before = find_token_backwards(document.body, "\\begin_layout", i)
966 if layout_before == -1:
967 document.warning("Malformed LyX document: Missing `\\begin_layout'.")
969 layout_line = document.body[layout_before]
970 del_layout_before = True
971 l = layout_before + 1
973 if document.body[l] != "":
974 del_layout_before = False
977 if del_layout_before:
978 del document.body[layout_before:i]
981 document.body[i:i] = ["\\end_layout", ""]
984 # Find start of layout in the inset and end of inset
985 j = find_token(document.body, "\\begin_layout", i)
987 document.warning("Malformed LyX document: Missing `\\begin_layout'.")
989 k = find_end_of_inset(document.body, i)
991 document.warning("Malformed LyX document: Missing `\\end_inset'.")
994 # We either need to delete the following \end_layout line, or we need
995 # to restart the old layout if this inset is not at the paragraph end.
996 layout_after = find_token(document.body, "\\end_layout", k)
997 if layout_after == -1:
998 document.warning("Malformed LyX document: Missing `\\end_layout'.")
1000 del_layout_after = True
1002 while l < layout_after:
1003 if document.body[l] != "":
1004 del_layout_after = False
1007 if del_layout_after:
1008 del document.body[k+1:layout_after+1]
1010 document.body[k+1:k+1] = [layout_line, ""]
1012 # delete \begin_layout and \end_inset and replace \begin_inset with
1013 # "\begin_layout Caption". This works because we can only have one
1014 # paragraph in the caption inset: The old \end_layout will be recycled.
1015 del document.body[k]
1016 if document.body[k] == "":
1017 del document.body[k]
1018 del document.body[j]
1019 if document.body[j] == "":
1020 del document.body[j]
1021 document.body[i] = "\\begin_layout Caption"
1022 if document.body[i+1] == "":
1023 del document.body[i+1]
1027 # Accents of InsetLaTeXAccent
1029 "`" : u'\u0300', # grave
1030 "'" : u'\u0301', # acute
1031 "^" : u'\u0302', # circumflex
1032 "~" : u'\u0303', # tilde
1033 "=" : u'\u0304', # macron
1034 "u" : u'\u0306', # breve
1035 "." : u'\u0307', # dot above
1036 "\"": u'\u0308', # diaeresis
1037 "r" : u'\u030a', # ring above
1038 "H" : u'\u030b', # double acute
1039 "v" : u'\u030c', # caron
1040 "b" : u'\u0320', # minus sign below
1041 "d" : u'\u0323', # dot below
1042 "c" : u'\u0327', # cedilla
1043 "k" : u'\u0328', # ogonek
1044 "t" : u'\u0361' # tie. This is special: It spans two characters, but
1045 # only one is given as argument, so we don't need to
1046 # treat it differently.
1050 # special accents of InsetLaTeXAccent without argument
1051 special_accent_map = {
1052 'i' : u'\u0131', # dotless i
1053 'j' : u'\u0237', # dotless j
1054 'l' : u'\u0142', # l with stroke
1055 'L' : u'\u0141' # L with stroke
1059 # special accent arguments of InsetLaTeXAccent
1061 '\\i' : u'\u0131', # dotless i
1062 '\\j' : u'\u0237' # dotless j
1066 def _convert_accent(accent, accented_char):
1068 char = accented_char
1070 if type in special_accent_map:
1071 return special_accent_map[type]
1072 # a missing char is treated as space by LyX
1074 elif type == 'q' and char in ['t', 'd', 'l', 'L']:
1075 # Special caron, only used with t, d, l and L.
1076 # It is not in the map because we convert it to the same unicode
1077 # character as the normal caron: \q{} is only defined if babel with
1078 # the czech or slovak language is used, and the normal caron
1079 # produces the correct output if the T1 font encoding is used.
1080 # For the same reason we never convert to \q{} in the other direction.
1082 elif char in accented_map:
1083 char = accented_map[char]
1084 elif (len(char) > 1):
1085 # We can only convert accents on a single char
1087 a = accent_map.get(type)
1089 return unicodedata.normalize("NFC", "%s%s" % (char, a))
1093 def convert_ertbackslash(body, i, ert, default_layout):
1094 r""" -------------------------------------------------------------------------------------------
1095 Convert backslashes and '\n' into valid ERT code, append the converted
1096 text to body[i] and return the (maybe incremented) line index i"""
1100 body[i] = body[i] + '\\backslash '
1104 body[i+1:i+1] = ['\\end_layout', '', '\\begin_layout %s' % default_layout, '']
1107 body[i] = body[i] + c
1111 def convert_accent(document):
1112 # The following forms are supported by LyX:
1113 # '\i \"{a}' (standard form, as written by LyX)
1114 # '\i \"{}' (standard form, as written by LyX if the accented char is a space)
1115 # '\i \"{ }' (also accepted if the accented char is a space)
1116 # '\i \" a' (also accepted)
1117 # '\i \"' (also accepted)
1118 re_wholeinset = re.compile(r'^(.*)(\\i\s+)(.*)$')
1119 re_contents = re.compile(r'^([^\s{]+)(.*)$')
1120 re_accentedcontents = re.compile(r'^\s*{?([^{}]*)}?\s*$')
1123 i = find_re(document.body, re_wholeinset, i)
1126 match = re_wholeinset.match(document.body[i])
1127 prefix = match.group(1)
1128 contents = match.group(3).strip()
1129 match = re_contents.match(contents)
1131 # Strip first char (always \)
1132 accent = match.group(1)[1:]
1133 accented_contents = match.group(2).strip()
1134 match = re_accentedcontents.match(accented_contents)
1135 accented_char = match.group(1)
1136 converted = _convert_accent(accent, accented_char)
1138 # Normalize contents
1139 contents = '%s{%s}' % (accent, accented_char),
1141 document.body[i] = '%s%s' % (prefix, converted)
1144 document.warning("Converting unknown InsetLaTeXAccent `\\i %s' to ERT." % contents)
1145 document.body[i] = prefix
1146 document.body[i+1:i+1] = ['\\begin_inset ERT',
1149 '\\begin_layout %s' % document.default_layout,
1153 i = convert_ertbackslash(document.body, i + 7,
1155 document.default_layout)
1156 document.body[i+1:i+1] = ['\\end_layout',
1162 def is_inset_line(document, i):
1163 """ Line i of body has an inset """
1164 if document.body[i][:1] == '\\':
1166 last_tokens = "".join(document.body[i].split()[-2:])
1167 return last_tokens.find('\\') != -1
1170 # A wrapper around normalize that handles special cases (cf. bug 3313)
1171 def normalize(form, text):
1172 # do not normalize OHM, ANGSTROM
1173 keep_characters = [0x2126,0x212b]
1177 if ord(i) in keep_characters:
1178 if len(convert) > 0:
1179 result = result + unicodedata.normalize(form, convert)
1183 convert = convert + i
1184 if len(convert) > 0:
1185 result = result + unicodedata.normalize(form, convert)
1189 def revert_accent(document):
1190 inverse_accent_map = {}
1191 for k in accent_map:
1192 inverse_accent_map[accent_map[k]] = k
1193 inverse_special_accent_map = {}
1194 for k in special_accent_map:
1195 inverse_special_accent_map[special_accent_map[k]] = k
1196 inverse_accented_map = {}
1197 for k in accented_map:
1198 inverse_accented_map[accented_map[k]] = k
1200 # Since LyX may insert a line break within a word we must combine all
1201 # words before unicode normalization.
1202 # We do this only if the next line starts with an accent, otherwise we
1203 # would create things like '\begin_inset ERTstatus'.
1204 for i in range(len(document.body) - 1):
1205 if document.body[i] == '' or document.body[i+1] == '' or document.body[i][-1] == ' ':
1207 if (document.body[i+1][0] in inverse_accent_map and not is_inset_line(document, i)):
1208 # the last character of this line and the first of the next line
1209 # form probably a surrogate pair, inline insets are excluded (second part of the test)
1210 while (len(document.body[i+1]) > 0 and document.body[i+1][0] != ' '):
1211 document.body[i] += document.body[i+1][0]
1212 document.body[i+1] = document.body[i+1][1:]
1214 # Normalize to "Normal form D" (NFD, also known as canonical decomposition).
1215 # This is needed to catch all accented characters.
1216 for i in range(len(document.body)):
1217 # Unfortunately we have a mixture of unicode strings and plain strings,
1218 # because we never use u'xxx' for string literals, but 'xxx'.
1219 # Therefore we may have to try two times to normalize the data.
1221 document.body[i] = normalize("NFD", document.body[i])
1223 document.body[i] = normalize("NFD", text_type(document.body[i], 'utf-8'))
1225 # Replace accented characters with InsetLaTeXAccent
1226 # Do not convert characters that can be represented in the chosen
1228 encoding_stack = [get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)]
1229 lang_re = re.compile(r"^\\lang\s(\S+)")
1232 while i < len(document.body):
1233 if (document.inputencoding == "auto" or document.inputencoding == "default") and document.cjk_encoding != '':
1234 # Track the encoding of the current line
1235 result = lang_re.match(document.body[i])
1237 language = result.group(1)
1238 if language == "default":
1239 encoding_stack[-1] = document.encoding
1241 from lyx2lyx_lang import lang
1242 encoding_stack[-1] = lang[language][3]
1244 elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
1245 encoding_stack.append(encoding_stack[-1])
1247 elif find_token(document.body, "\\end_layout", i, i + 1) == i:
1248 del encoding_stack[-1]
1251 for j in range(len(document.body[i])):
1252 # dotless i and dotless j are both in special_accent_map and can
1253 # occur as an accented character, so we need to test that the
1254 # following character is no accent
1255 if (document.body[i][j] in inverse_special_accent_map and
1256 (j == len(document.body[i]) - 1 or document.body[i][j+1] not in inverse_accent_map)):
1257 accent = document.body[i][j]
1259 dummy = accent.encode(encoding_stack[-1])
1260 except UnicodeEncodeError:
1261 # Insert the rest of the line as new line
1262 if j < len(document.body[i]) - 1:
1263 document.body.insert(i+1, document.body[i][j+1:])
1264 # Delete the accented character
1265 document.body[i] = document.body[i][:j]
1266 # Finally add the InsetLaTeXAccent
1267 document.body[i] += "\\i \\%s{}" % inverse_special_accent_map[accent]
1269 elif j > 0 and document.body[i][j] in inverse_accent_map:
1270 accented_char = document.body[i][j-1]
1271 if accented_char == ' ':
1272 # Conform to LyX output
1274 elif accented_char in inverse_accented_map:
1275 accented_char = inverse_accented_map[accented_char]
1276 accent = document.body[i][j]
1278 dummy = normalize("NFC", accented_char + accent).encode(encoding_stack[-1])
1279 except UnicodeEncodeError:
1280 # Insert the rest of the line as new line
1281 if j < len(document.body[i]) - 1:
1282 document.body.insert(i+1, document.body[i][j+1:])
1283 # Delete the accented characters
1284 document.body[i] = document.body[i][:j-1]
1285 # Finally add the InsetLaTeXAccent
1286 document.body[i] += "\\i \\%s{%s}" % (inverse_accent_map[accent], accented_char)
1290 # Normalize to "Normal form C" (NFC, pre-composed characters) again
1291 for i in range(len(document.body)):
1292 document.body[i] = normalize("NFC", document.body[i])
1295 def normalize_font_whitespace_259(document):
1296 """ Before format 259 the font changes were ignored if a
1297 whitespace was the first or last character in the sequence, this function
1298 transfers the whitespace outside."""
1300 char_properties = {"\\series": "default",
1301 "\\emph": "default",
1303 "\\shape": "default",
1305 "\\family": "default"}
1306 return normalize_font_whitespace(document, char_properties)
1308 def normalize_font_whitespace_274(document):
1309 """ Before format 259 (sic) the font changes were ignored if a
1310 whitespace was the first or last character in the sequence. This was
1311 corrected for most font properties in format 259, but the language
1312 was forgotten then. This function applies the same conversion done
1313 there (namely, transfers the whitespace outside) for font language
1314 changes, as well."""
1316 char_properties = {"\\lang": "default"}
1317 return normalize_font_whitespace(document, char_properties)
1319 def get_paragraph_language(document, i):
1320 """ Return the language of the paragraph in which line i of the document
1321 body is. If the first thing in the paragraph is a \\lang command, that
1322 is the paragraph's langauge; otherwise, the paragraph's language is the
1323 document's language."""
1325 lines = document.body
1327 first_nonempty_line = \
1328 find_nonempty_line(lines, find_beginning_of_layout(lines, i) + 1)
1330 words = lines[first_nonempty_line].split()
1332 if len(words) > 1 and words[0] == "\\lang":
1335 return document.language
1337 def normalize_font_whitespace(document, char_properties):
1338 """ Before format 259 the font changes were ignored if a
1339 whitespace was the first or last character in the sequence, this function
1340 transfers the whitespace outside. Only a change in one of the properties
1341 in the provided char_properties is handled by this function."""
1343 if document.backend != "latex":
1346 lines = document.body
1351 while i < len(lines):
1352 words = lines[i].split()
1354 if len(words) > 0 and words[0] == "\\begin_layout":
1355 # a new paragraph resets all font changes
1357 # also reset the default language to be the paragraph's language
1358 if "\\lang" in list(char_properties.keys()):
1359 char_properties["\\lang"] = \
1360 get_paragraph_language(document, i + 1)
1362 elif len(words) > 1 and words[0] in list(char_properties.keys()):
1363 # we have a font change
1364 if char_properties[words[0]] == words[1]:
1365 # property gets reset
1366 if words[0] in list(changes.keys()):
1367 del changes[words[0]]
1368 defaultproperty = True
1371 changes[words[0]] = words[1]
1372 defaultproperty = False
1374 # We need to explicitly reset all changed properties if we find
1375 # a space below, because LyX 1.4 would output the space after
1376 # closing the previous change and before starting the new one,
1377 # and closing a font change means to close all properties, not
1378 # just the changed one.
1380 if lines[i-1] and lines[i-1][-1] == " ":
1381 lines[i-1] = lines[i-1][:-1]
1382 # a space before the font change
1384 for k in list(changes.keys()):
1385 # exclude property k because that is already in lines[i]
1387 added_lines[1:1] = ["%s %s" % (k, changes[k])]
1388 for k in list(changes.keys()):
1389 # exclude property k because that must be added below anyway
1391 added_lines[0:0] = ["%s %s" % (k, char_properties[k])]
1393 # Property is reset in lines[i], so add the new stuff afterwards
1394 lines[i+1:i+1] = added_lines
1396 # Reset property for the space
1397 added_lines[0:0] = ["%s %s" % (words[0], char_properties[words[0]])]
1398 lines[i:i] = added_lines
1399 i = i + len(added_lines)
1401 elif lines[i+1] and lines[i+1][0] == " " and (len(changes) > 0 or not defaultproperty):
1402 # a space after the font change
1403 if (lines[i+1] == " " and lines[i+2]):
1404 next_words = lines[i+2].split()
1405 if len(next_words) > 0 and next_words[0] == words[0]:
1406 # a single blank with a property different from the
1407 # previous and the next line must not be changed
1410 lines[i+1] = lines[i+1][1:]
1412 for k in list(changes.keys()):
1413 # exclude property k because that is already in lines[i]
1415 added_lines[1:1] = ["%s %s" % (k, changes[k])]
1416 for k in list(changes.keys()):
1417 # exclude property k because that must be added below anyway
1419 added_lines[0:0] = ["%s %s" % (k, char_properties[k])]
1420 # Reset property for the space
1421 added_lines[0:0] = ["%s %s" % (words[0], char_properties[words[0]])]
1422 lines[i:i] = added_lines
1423 i = i + len(added_lines)
1428 def revert_utf8x(document):
1429 " Set utf8x encoding to utf8. "
1430 i = find_token(document.header, "\\inputencoding", 0)
1432 document.header.append("\\inputencoding auto")
1434 inputenc = get_value(document.header, "\\inputencoding", i)
1435 if inputenc == "utf8x":
1436 document.header[i] = "\\inputencoding utf8"
1437 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1440 def revert_utf8plain(document):
1441 " Set utf8plain encoding to utf8. "
1442 i = find_token(document.header, "\\inputencoding", 0)
1444 document.header.append("\\inputencoding auto")
1446 inputenc = get_value(document.header, "\\inputencoding", i)
1447 if inputenc == "utf8-plain":
1448 document.header[i] = "\\inputencoding utf8"
1449 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1452 def revert_beamer_alert(document):
1453 " Revert beamer's \\alert inset back to ERT. "
1456 i = find_token(document.body, "\\begin_inset CharStyle Alert", i)
1459 document.body[i] = "\\begin_inset ERT"
1462 if (document.body[i][:13] == "\\begin_layout"):
1463 # Insert the \alert command
1464 document.body[i + 1] = "\\alert{" + document.body[i + 1] + '}'
1471 def revert_beamer_structure(document):
1472 " Revert beamer's \\structure inset back to ERT. "
1475 i = find_token(document.body, "\\begin_inset CharStyle Structure", i)
1478 document.body[i] = "\\begin_inset ERT"
1481 if (document.body[i][:13] == "\\begin_layout"):
1482 document.body[i + 1] = "\\structure{" + document.body[i + 1] + '}'
1489 def convert_changes(document):
1490 " Switch output_changes off if tracking_changes is off. "
1491 i = find_token(document.header, '\\tracking_changes', 0)
1493 document.warning("Malformed lyx document: Missing '\\tracking_changes'.")
1495 j = find_token(document.header, '\\output_changes', 0)
1497 document.warning("Malformed lyx document: Missing '\\output_changes'.")
1499 tracking_changes = get_value(document.header, "\\tracking_changes", i)
1500 output_changes = get_value(document.header, "\\output_changes", j)
1501 if tracking_changes == "false" and output_changes == "true":
1502 document.header[j] = "\\output_changes false"
1505 def revert_ascii(document):
1506 " Set ascii encoding to auto. "
1507 i = find_token(document.header, "\\inputencoding", 0)
1509 document.header.append("\\inputencoding auto")
1511 inputenc = get_value(document.header, "\\inputencoding", i)
1512 if inputenc == "ascii":
1513 document.header[i] = "\\inputencoding auto"
1514 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1517 def normalize_language_name(document):
1518 lang = { "brazil": "brazilian",
1519 "portuges": "portuguese"}
1521 if document.language in lang:
1522 document.language = lang[document.language]
1523 i = find_token(document.header, "\\language", 0)
1524 document.header[i] = "\\language %s" % document.language
1527 def revert_language_name(document):
1528 lang = { "brazilian": "brazil",
1529 "portuguese": "portuges"}
1531 if document.language in lang:
1532 document.language = lang[document.language]
1533 i = find_token(document.header, "\\language", 0)
1534 document.header[i] = "\\language %s" % document.language
1537 # \textclass cv -> \textclass simplecv
1538 def convert_cv_textclass(document):
1539 if document.textclass == "cv":
1540 document.textclass = "simplecv"
1543 def revert_cv_textclass(document):
1544 if document.textclass == "simplecv":
1545 document.textclass = "cv"
1549 # add scaleBeforeRotation graphics param
1550 def convert_graphics_rotation(document):
1551 " add scaleBeforeRotation graphics parameter. "
1554 i = find_token(document.body, "\\begin_inset Graphics", i)
1557 j = find_end_of_inset(document.body, i+1)
1560 document.warning("Malformed LyX document: Could not find end of graphics inset.")
1561 # Seach for rotateAngle and width or height or scale
1562 # If these params are not there, nothing needs to be done.
1563 k = find_token(document.body, "\trotateAngle", i + 1, j)
1564 l = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j)
1565 if (k != -1 and l != -1):
1566 document.body.insert(j, 'scaleBeforeRotation')
1571 # remove scaleBeforeRotation graphics param
1572 def revert_graphics_rotation(document):
1573 " remove scaleBeforeRotation graphics parameter. "
1576 i = find_token(document.body, "\\begin_inset Graphics", i)
1579 j = find_end_of_inset(document.body, i + 1)
1582 document.warning("Malformed LyX document: Could not find end of graphics inset.")
1583 # If there's a scaleBeforeRotation param, just remove that
1584 k = find_token(document.body, "\tscaleBeforeRotation", i + 1, j)
1586 del document.body[k]
1588 # if not, and if we have rotateAngle and width or height or scale,
1589 # we have to put the rotateAngle value to special
1590 rotateAngle = get_value(document.body, 'rotateAngle', i + 1, j)
1591 special = get_value(document.body, 'special', i + 1, j)
1592 if rotateAngle != "":
1593 k = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j)
1597 document.body.insert(j-1, '\tspecial angle=%s' % rotateAngle)
1599 l = find_token(document.body, "\tspecial", i + 1, j)
1600 document.body[l] = document.body[l].replace(special, 'angle=%s,%s' % (rotateAngle, special))
1601 k = find_token(document.body, "\trotateAngle", i + 1, j)
1603 del document.body[k]
1608 def convert_tableborder(document):
1609 # The problem is: LyX doubles the table cell border as it ignores the "|" character in
1610 # the cell arguments. A fix takes care of this and therefore the "|" has to be removed
1612 while i < len(document.body):
1613 h = document.body[i].find("leftline=\"true\"", 0, len(document.body[i]))
1614 k = document.body[i].find("|>{", 0, len(document.body[i]))
1615 # the two tokens have to be in one line
1616 if (h != -1 and k != -1):
1618 document.body[i] = document.body[i][:k] + document.body[i][k+1:len(document.body[i])]
1622 def revert_tableborder(document):
1624 while i < len(document.body):
1625 h = document.body[i].find("leftline=\"true\"", 0, len(document.body[i]))
1626 k = document.body[i].find(">{", 0, len(document.body[i]))
1627 # the two tokens have to be in one line
1628 if (h != -1 and k != -1):
1630 document.body[i] = document.body[i][:k] + '|' + document.body[i][k:]
1634 def revert_armenian(document):
1636 # set inputencoding from armscii8 to auto
1637 if document.inputencoding == "armscii8":
1638 i = find_token(document.header, "\\inputencoding", 0)
1640 document.header[i] = "\\inputencoding auto"
1641 # check if preamble exists, if not k is set to -1
1644 while i < len(document.preamble):
1646 k = document.preamble[i].find("\\", 0, len(document.preamble[i]))
1648 k = document.preamble[i].find("%", 0, len(document.preamble[i]))
1650 # add the entry \usepackage{armtex} to the document preamble
1651 if document.language == "armenian":
1652 # set the armtex entry as the first preamble line
1654 document.preamble[0:0] = ["\\usepackage{armtex}"]
1655 # create the preamble when it doesn't exist
1657 document.preamble.append('\\usepackage{armtex}')
1658 # Set document language from armenian to english
1659 if document.language == "armenian":
1660 document.language = "english"
1661 i = find_token(document.header, "\\language", 0)
1663 document.header[i] = "\\language english"
1666 def revert_CJK(document):
1667 " Set CJK encodings to default and languages chinese, japanese and korean to english. "
1668 encodings = ["Bg5", "Bg5+", "GB", "GBt", "GBK", "JIS",
1669 "KS", "SJIS", "UTF8", "EUC-TW", "EUC-JP"]
1670 i = find_token(document.header, "\\inputencoding", 0)
1672 document.header.append("\\inputencoding auto")
1674 inputenc = get_value(document.header, "\\inputencoding", i)
1675 if inputenc in encodings:
1676 document.header[i] = "\\inputencoding default"
1677 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1679 if document.language == "chinese-simplified" or \
1680 document.language == "chinese-traditional" or \
1681 document.language == "japanese" or document.language == "korean":
1682 document.language = "english"
1683 i = find_token(document.header, "\\language", 0)
1685 document.header[i] = "\\language english"
1688 def revert_preamble_listings_params(document):
1689 " Revert preamble option \listings_params "
1690 i = find_token(document.header, "\\listings_params", 0)
1692 document.preamble.append('\\usepackage{listings}')
1693 document.preamble.append('\\lstset{%s}' % document.header[i].split()[1].strip('"'))
1694 document.header.pop(i);
1697 def revert_listings_inset(document):
1698 r''' Revert listings inset to \lstinline or \begin, \end lstlisting, translate
1702 lstparams "language=Delphi"
1706 \begin_layout Standard
1716 \begin_layout Standard
1720 lstinline[language=Delphi]{var i = 10;}
1725 There can be an caption inset in this inset
1727 \begin_layout Standard
1728 \begin_inset Caption
1730 \begin_layout Standard
1732 \begin_inset LatexCommand label
1748 i = find_token(document.body, '\\begin_inset listings', i)
1752 if not '\\usepackage{listings}' in document.preamble:
1753 document.preamble.append('\\usepackage{listings}')
1754 j = find_end_of_inset(document.body, i + 1)
1756 # this should not happen
1762 for line in range(i + 1, i + 4):
1763 if document.body[line].startswith('inline'):
1764 inline = document.body[line].split()[1]
1765 if document.body[line].startswith('lstparams'):
1766 params = document.body[line].split()[1].strip('"')
1767 if document.body[line].startswith('status'):
1768 status = document.body[line].split()[1].strip()
1773 cap = find_token(document.body, '\\begin_inset Caption', i)
1775 cap_end = find_end_of_inset(document.body, cap + 1)
1777 # this should not happen
1780 lbl = find_token(document.body, '\\begin_inset LatexCommand label', cap + 1)
1782 lbl_end = find_end_of_inset(document.body, lbl + 1)
1784 # this should not happen
1789 for line in document.body[lbl : lbl_end + 1]:
1790 if line.startswith('name '):
1791 label = line.split()[1].strip('"')
1793 for line in document.body[cap : lbl ] + document.body[lbl_end + 1 : cap_end + 1]:
1794 if not line.startswith('\\'):
1795 caption += line.strip()
1798 # looking for the oneline code for lstinline
1799 inlinecode = document.body[find_end_of_layout(document.body,
1800 find_token(document.body, '\\begin_layout %s' % document.default_layout, i + 1) +1 ) - 1]
1801 if len(caption) > 0:
1802 if len(params) == 0:
1803 params = 'caption={%s}' % caption
1805 params += ',caption={%s}' % caption
1807 if len(params) == 0:
1808 params = 'label={%s}' % label
1810 params += ',label={%s}' % label
1812 params = '[%s]' % params
1813 params = params.replace('\\', '\\backslash\n')
1814 if inline == 'true':
1815 document.body[i:(j+1)] = [r'\begin_inset ERT',
1816 'status %s' % status,
1817 r'\begin_layout %s' % document.default_layout,
1821 'lstinline%s{%s}' % (params, inlinecode),
1826 document.body[i: j+1] = [r'\begin_inset ERT',
1827 'status %s' % status,
1829 r'\begin_layout %s' % document.default_layout,
1833 r'begin{lstlisting}%s' % params,
1836 r'\begin_layout %s' % document.default_layout,
1837 ] + document.body[k : j - 1] + \
1839 r'\begin_layout %s' % document.default_layout,
1848 def revert_include_listings(document):
1849 r''' Revert lstinputlisting Include option , translate
1850 \begin_inset Include \lstinputlisting{file}[opt]
1860 \begin_layout Standard
1864 lstinputlisting{file}[opt]
1872 i = find_token(document.body, r'\begin_inset Include \lstinputlisting', i)
1876 if not '\\usepackage{listings}' in document.preamble:
1877 document.preamble.append('\\usepackage{listings}')
1878 j = find_end_of_inset(document.body, i + 1)
1880 # this should not happen
1882 # find command line lstinputlisting{file}[options]
1883 cmd, file, option = '', '', ''
1884 if re.match(r'\\(lstinputlisting){([.\w]*)}(.*)', document.body[i].split()[2]):
1885 cmd, file, option = re.match(r'\\(lstinputlisting){([.\w]*)}(.*)', document.body[i].split()[2]).groups()
1886 option = option.replace('\\', '\\backslash\n')
1887 document.body[i : j + 1] = [r'\begin_inset ERT',
1890 r'\begin_layout %s' % document.default_layout,
1894 '%s%s{%s}' % (cmd, option, file),
1900 def revert_ext_font_sizes(document):
1901 if document.backend != "latex": return
1902 if not document.textclass.startswith("ext"): return
1904 fontsize = get_value(document.header, '\\paperfontsize', 0)
1905 if fontsize not in ('10', '11', '12'): return
1908 i = find_token(document.header, '\\paperfontsize', 0)
1909 document.header[i] = '\\paperfontsize default'
1911 i = find_token(document.header, '\\options', 0)
1913 i = find_token(document.header, '\\textclass', 0) + 1
1914 document.header[i:i] = ['\\options %s' % fontsize]
1916 document.header[i] += ',%s' % fontsize
1919 def convert_ext_font_sizes(document):
1920 if document.backend != "latex": return
1921 if not document.textclass.startswith("ext"): return
1923 fontsize = get_value(document.header, '\\paperfontsize', 0)
1924 if fontsize != 'default': return
1926 i = find_token(document.header, '\\options', 0)
1929 options = get_value(document.header, '\\options', i)
1931 fontsizes = '10pt', '11pt', '12pt'
1932 for fs in fontsizes:
1933 if options.find(fs) != -1:
1935 else: # this else will only be attained if the for cycle had no match
1938 options = options.split(',')
1939 for j, opt in enumerate(options):
1940 if opt in fontsizes:
1947 k = find_token(document.header, '\\paperfontsize', 0)
1948 document.header[k] = '\\paperfontsize %s' % fontsize
1951 document.header[i] = '\\options %s' % ','.join(options)
1953 del document.header[i]
1956 def revert_separator_layout(document):
1957 r'''Revert --Separator-- to a lyx note
1960 \begin_layout --Separator--
1966 \begin_layout Standard
1967 \begin_inset Note Note
1970 \begin_layout Standard
1983 i = find_token(document.body, r'\begin_layout --Separator--', i)
1986 j = find_end_of_layout(document.body, i + 1)
1988 # this should not happen
1990 document.body[i : j + 1] = [r'\begin_layout %s' % document.default_layout,
1991 r'\begin_inset Note Note',
1994 r'\begin_layout %s' % document.default_layout,
1995 'Separate Environment',
1999 document.body[ i + 1 : j] + \
2005 def convert_arabic (document):
2006 if document.language == "arabic":
2007 document.language = "arabic_arabtex"
2008 i = find_token(document.header, "\\language", 0)
2010 document.header[i] = "\\language arabic_arabtex"
2012 while i < len(document.body):
2013 h = document.body[i].find("\lang arabic", 0, len(document.body[i]))
2015 # change the language name
2016 document.body[i] = '\lang arabic_arabtex'
2020 def revert_arabic (document):
2021 if document.language == "arabic_arabtex":
2022 document.language = "arabic"
2023 i = find_token(document.header, "\\language", 0)
2025 document.header[i] = "\\language arabic"
2027 while i < len(document.body):
2028 h = document.body[i].find("\lang arabic_arabtex", 0, len(document.body[i]))
2030 # change the language name
2031 document.body[i] = '\lang arabic'
2039 supported_versions = ["1.5.0","1.5"]
2040 convert = [[246, []],
2041 [247, [convert_font_settings]],
2043 [249, [convert_utf8]],
2046 [252, [convert_commandparams, convert_bibitem]],
2048 [254, [convert_esint]],
2051 [257, [convert_caption]],
2052 [258, [convert_lyxline]],
2053 [259, [convert_accent, normalize_font_whitespace_259]],
2055 [261, [convert_changes]],
2057 [263, [normalize_language_name]],
2058 [264, [convert_cv_textclass]],
2059 [265, [convert_tableborder]],
2065 [271, [convert_ext_font_sizes]],
2068 [274, [normalize_font_whitespace_274]],
2069 [275, [convert_graphics_rotation]],
2070 [276, [convert_arabic]]
2074 [275, [revert_arabic]],
2075 [274, [revert_graphics_rotation]],
2077 [272, [revert_separator_layout]],
2078 [271, [revert_preamble_listings_params, revert_listings_inset, revert_include_listings]],
2079 [270, [revert_ext_font_sizes]],
2080 [269, [revert_beamer_alert, revert_beamer_structure]],
2081 [268, [revert_preamble_listings_params, revert_listings_inset, revert_include_listings]],
2082 [267, [revert_CJK]],
2083 [266, [revert_utf8plain]],
2084 [265, [revert_armenian]],
2085 [264, [revert_tableborder]],
2086 [263, [revert_cv_textclass]],
2087 [262, [revert_language_name]],
2088 [261, [revert_ascii]],
2090 [259, [revert_utf8x]],
2093 [256, [revert_caption]],
2094 [255, [revert_encodings]],
2095 [254, [revert_clearpage, revert_cleardoublepage]],
2096 [253, [revert_esint]],
2097 [252, [revert_nomenclature, revert_printnomenclature]],
2098 [251, [revert_commandparams]],
2099 [250, [revert_cs_label]],
2101 [248, [revert_accent, revert_utf8, revert_unicode]],
2102 [247, [revert_booktabs]],
2103 [246, [revert_font_settings]],
2104 [245, [revert_framed]]]
2107 if __name__ == "__main__":