1 # This file is part of lyx2lyx
2 # -*- coding: utf-8 -*-
3 # Copyright (C) 2006 José Matos <jamatos@lyx.org>
4 # Copyright (C) 2004-2006 Georg Baum <Georg.Baum@post.rwth-aachen.de>
6 # This program is free software; you can redistribute it and/or
7 # modify it under the terms of the GNU General Public License
8 # as published by the Free Software Foundation; either version 2
9 # of the License, or (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
20 """ Convert files to the file format generated by lyx 1.5"""
26 from parser_tools import find_re, find_token, find_token_backwards, find_token_exact, find_tokens, find_end_of, get_value, find_beginning_of, find_nonempty_line
27 from LyX import get_encoding
30 ####################################################################
31 # Private helper functions
33 def find_end_of_inset(lines, i):
34 " Find end of inset, where lines[i] is included."
35 return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
37 def find_end_of_layout(lines, i):
38 " Find end of layout, where lines[i] is included."
39 return find_end_of(lines, i, "\\begin_layout", "\\end_layout")
41 def find_beginning_of_layout(lines, i):
42 "Find beginning of layout, where lines[i] is included."
43 return find_beginning_of(lines, i, "\\begin_layout", "\\end_layout")
45 # End of helper functions
46 ####################################################################
50 # Notes: Framed/Shaded
53 def revert_framed(document):
54 "Revert framed notes. "
57 i = find_tokens(document.body, ["\\begin_inset Note Framed", "\\begin_inset Note Shaded"], i)
61 document.body[i] = "\\begin_inset Note"
69 roman_fonts = {'default' : 'default', 'ae' : 'ae',
70 'times' : 'times', 'palatino' : 'palatino',
71 'helvet' : 'default', 'avant' : 'default',
72 'newcent' : 'newcent', 'bookman' : 'bookman',
74 sans_fonts = {'default' : 'default', 'ae' : 'default',
75 'times' : 'default', 'palatino' : 'default',
76 'helvet' : 'helvet', 'avant' : 'avant',
77 'newcent' : 'default', 'bookman' : 'default',
79 typewriter_fonts = {'default' : 'default', 'ae' : 'default',
80 'times' : 'default', 'palatino' : 'default',
81 'helvet' : 'default', 'avant' : 'default',
82 'newcent' : 'default', 'bookman' : 'default',
83 'pslatex' : 'courier'}
85 def convert_font_settings(document):
86 " Convert font settings. "
88 i = find_token_exact(document.header, "\\fontscheme", i)
90 document.warning("Malformed LyX document: Missing `\\fontscheme'.")
92 font_scheme = get_value(document.header, "\\fontscheme", i, i + 1)
94 document.warning("Malformed LyX document: Empty `\\fontscheme'.")
95 font_scheme = 'default'
96 if not font_scheme in roman_fonts.keys():
97 document.warning("Malformed LyX document: Unknown `\\fontscheme' `%s'." % font_scheme)
98 font_scheme = 'default'
99 document.header[i:i+1] = ['\\font_roman %s' % roman_fonts[font_scheme],
100 '\\font_sans %s' % sans_fonts[font_scheme],
101 '\\font_typewriter %s' % typewriter_fonts[font_scheme],
102 '\\font_default_family default',
105 '\\font_sf_scale 100',
106 '\\font_tt_scale 100']
109 def revert_font_settings(document):
110 " Revert font settings. "
113 fonts = {'roman' : 'default', 'sans' : 'default', 'typewriter' : 'default'}
114 for family in 'roman', 'sans', 'typewriter':
115 name = '\\font_%s' % family
116 i = find_token_exact(document.header, name, i)
118 document.warning("Malformed LyX document: Missing `%s'." % name)
121 if (insert_line < 0):
123 fonts[family] = get_value(document.header, name, i, i + 1)
124 del document.header[i]
125 i = find_token_exact(document.header, '\\font_default_family', i)
127 document.warning("Malformed LyX document: Missing `\\font_default_family'.")
128 font_default_family = 'default'
130 font_default_family = get_value(document.header, "\\font_default_family", i, i + 1)
131 del document.header[i]
132 i = find_token_exact(document.header, '\\font_sc', i)
134 document.warning("Malformed LyX document: Missing `\\font_sc'.")
137 font_sc = get_value(document.header, '\\font_sc', i, i + 1)
138 del document.header[i]
139 if font_sc != 'false':
140 document.warning("Conversion of '\\font_sc' not yet implemented.")
141 i = find_token_exact(document.header, '\\font_osf', i)
143 document.warning("Malformed LyX document: Missing `\\font_osf'.")
146 font_osf = get_value(document.header, '\\font_osf', i, i + 1)
147 del document.header[i]
148 i = find_token_exact(document.header, '\\font_sf_scale', i)
150 document.warning("Malformed LyX document: Missing `\\font_sf_scale'.")
151 font_sf_scale = '100'
153 font_sf_scale = get_value(document.header, '\\font_sf_scale', i, i + 1)
154 del document.header[i]
155 if font_sf_scale != '100':
156 document.warning("Conversion of '\\font_sf_scale' not yet implemented.")
157 i = find_token_exact(document.header, '\\font_tt_scale', i)
159 document.warning("Malformed LyX document: Missing `\\font_tt_scale'.")
160 font_tt_scale = '100'
162 font_tt_scale = get_value(document.header, '\\font_tt_scale', i, i + 1)
163 del document.header[i]
164 if font_tt_scale != '100':
165 document.warning("Conversion of '\\font_tt_scale' not yet implemented.")
166 for font_scheme in roman_fonts.keys():
167 if (roman_fonts[font_scheme] == fonts['roman'] and
168 sans_fonts[font_scheme] == fonts['sans'] and
169 typewriter_fonts[font_scheme] == fonts['typewriter']):
170 document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
171 if font_default_family != 'default':
172 document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
173 if font_osf == 'true':
174 document.warning("Ignoring `\\font_osf = true'")
176 font_scheme = 'default'
177 document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
178 if fonts['roman'] == 'cmr':
179 document.preamble.append('\\renewcommand{\\rmdefault}{cmr}')
180 if font_osf == 'true':
181 document.preamble.append('\\usepackage{eco}')
183 for font in 'lmodern', 'charter', 'utopia', 'beraserif', 'ccfonts', 'chancery':
184 if fonts['roman'] == font:
185 document.preamble.append('\\usepackage{%s}' % font)
186 for font in 'cmss', 'lmss', 'cmbr':
187 if fonts['sans'] == font:
188 document.preamble.append('\\renewcommand{\\sfdefault}{%s}' % font)
189 for font in 'berasans':
190 if fonts['sans'] == font:
191 document.preamble.append('\\usepackage{%s}' % font)
192 for font in 'cmtt', 'lmtt', 'cmtl':
193 if fonts['typewriter'] == font:
194 document.preamble.append('\\renewcommand{\\ttdefault}{%s}' % font)
195 for font in 'courier', 'beramono', 'luximono':
196 if fonts['typewriter'] == font:
197 document.preamble.append('\\usepackage{%s}' % font)
198 if font_default_family != 'default':
199 document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
200 if font_osf == 'true':
201 document.warning("Ignoring `\\font_osf = true'")
204 def revert_booktabs(document):
205 " We remove the booktabs flag or everything else will become a mess. "
206 re_row = re.compile(r'^<row.*space="[^"]+".*>$')
207 re_tspace = re.compile(r'\s+topspace="[^"]+"')
208 re_bspace = re.compile(r'\s+bottomspace="[^"]+"')
209 re_ispace = re.compile(r'\s+interlinespace="[^"]+"')
212 i = find_token(document.body, "\\begin_inset Tabular", i)
215 j = find_end_of_inset(document.body, i + 1)
217 document.warning("Malformed LyX document: Could not find end of tabular.")
219 for k in range(i, j):
220 if re.search('^<features.* booktabs="true".*>$', document.body[k]):
221 document.warning("Converting 'booktabs' table to normal table.")
222 document.body[k] = document.body[k].replace(' booktabs="true"', '')
223 if re.search(re_row, document.body[k]):
224 document.warning("Removing extra row space.")
225 document.body[k] = re_tspace.sub('', document.body[k])
226 document.body[k] = re_bspace.sub('', document.body[k])
227 document.body[k] = re_ispace.sub('', document.body[k])
231 def convert_multiencoding(document, forward):
232 """ Fix files with multiple encodings.
233 Files with an inputencoding of "auto" or "default" and multiple languages
234 where at least two languages have different default encodings are encoded
235 in multiple encodings for file formats < 249. These files are incorrectly
236 read and written (as if the whole file was in the encoding of the main
238 This is not true for files written by CJK-LyX, they are always in the locale
242 - converts from fake unicode values to true unicode if forward is true, and
243 - converts from true unicode values to fake unicode if forward is false.
244 document.encoding must be set to the old value (format 248) in both cases.
246 We do this here and not in LyX.py because it is far easier to do the
247 necessary parsing in modern formats than in ancient ones.
249 inset_types = ["Foot", "Note"]
250 if document.cjk_encoding != '':
252 encoding_stack = [document.encoding]
254 lang_re = re.compile(r"^\\lang\s(\S+)")
255 inset_re = re.compile(r"^\\begin_inset\s(\S+)")
256 if document.inputencoding == "auto" or document.inputencoding == "default":
257 for i in range(len(document.body)):
258 result = lang_re.match(document.body[i])
260 language = result.group(1)
261 if language == "default":
262 document.warning("Resetting encoding from %s to %s." % (encoding_stack[-1], document.encoding), 3)
263 encoding_stack[-1] = document.encoding
265 from lyx2lyx_lang import lang
266 document.warning("Setting encoding from %s to %s." % (encoding_stack[-1], lang[language][3]), 3)
267 encoding_stack[-1] = lang[language][3]
268 elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
269 document.warning("Adding nested encoding %s." % encoding_stack[-1], 3)
270 if len(insets) > 0 and insets[-1] in inset_types:
271 from lyx2lyx_lang import lang
272 encoding_stack.append(lang[document.language][3])
274 encoding_stack.append(encoding_stack[-1])
275 elif find_token(document.body, "\\end_layout", i, i + 1) == i:
276 document.warning("Removing nested encoding %s." % encoding_stack[-1], 3)
277 if len(encoding_stack) == 1:
278 # Don't remove the document encoding from the stack
279 document.warning("Malformed LyX document: Unexpected `\\end_layout'.")
281 del encoding_stack[-1]
282 elif find_token(document.body, "\\begin_inset", i, i + 1) == i:
283 inset_result = inset_re.match(document.body[i])
285 insets.append(inset_result.group(1))
288 elif find_token(document.body, "\\end_inset", i, i + 1) == i:
290 if encoding_stack[-1] != document.encoding:
292 # This line has been incorrectly interpreted as if it was
293 # encoded in 'encoding'.
294 # Convert back to the 8bit string that was in the file.
295 orig = document.body[i].encode(document.encoding)
296 # Convert the 8bit string that was in the file to unicode
297 # with the correct encoding.
298 document.body[i] = orig.decode(encoding_stack[-1])
300 # Convert unicode to the 8bit string that will be written
301 # to the file with the correct encoding.
302 orig = document.body[i].encode(encoding_stack[-1])
303 # Convert the 8bit string that will be written to the
304 # file to fake unicode with the encoding that will later
305 # be used when writing to the file.
306 document.body[i] = orig.decode(document.encoding)
309 def convert_utf8(document):
310 " Set document encoding to UTF-8. "
311 convert_multiencoding(document, True)
312 document.encoding = "utf8"
315 def revert_utf8(document):
316 " Set document encoding to the value corresponding to inputencoding. "
317 i = find_token(document.header, "\\inputencoding", 0)
319 document.header.append("\\inputencoding auto")
320 elif get_value(document.header, "\\inputencoding", i) == "utf8":
321 document.header[i] = "\\inputencoding auto"
322 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
323 document.encoding = get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)
324 convert_multiencoding(document, False)
327 def read_unicodesymbols():
328 " Read the unicodesymbols list of unicode characters and corresponding commands."
329 pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
330 fp = open(os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols'))
332 for line in fp.readlines():
334 line=line.replace(' "',' ') # remove all quotation marks with spaces before
335 line=line.replace('" ',' ') # remove all quotation marks with spaces after
336 line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis)
338 # flag1 and flag2 are preamble and other flags
339 [ucs4,command,flag1,flag2] =line.split(None,3)
340 spec_chars[unichr(eval(ucs4))] = [command, flag1, flag2]
348 def revert_unicode(document):
349 '''Transform unicode characters that can not be written using the
350 document encoding to commands according to the unicodesymbols
351 file. Characters that can not be replaced by commands are replaced by
352 an replacement string. Flags other than 'combined' are currently not
355 replacement_character = '???'
356 spec_chars = read_unicodesymbols()
358 # Define strings to start and end ERT and math insets
359 ert_intro='\n\n\\begin_inset ERT\nstatus collapsed\n\\begin_layout %s\n\\backslash\n' % document.default_layout
360 ert_outro='\n\\end_layout\n\n\\end_inset\n'
361 math_intro='\n\\begin_inset Formula $'
362 math_outro='$\n\\end_inset'
363 # Find unicode characters and replace them
364 in_ert = False # flag set to 1 if in ERT inset
365 in_math = False # flag set to 1 if in math inset
366 insets = [] # list of active insets
368 # Go through the file to capture all combining characters
369 last_char = '' # to store the previous character
372 while i < len(document.body):
373 line = document.body[i]
375 if line.find('\\begin_inset') > -1:
376 insets.append(line[13:].split()[0])
377 if line.find('\\end_inset') > -1:
380 # Try to write the line
382 # If all goes well the line is written here
383 dummy = line.encode(document.encoding)
387 # Error, some character(s) in the line need to be replaced
389 for character in line:
391 # Try to write the character
392 dummy = character.encode(document.encoding)
393 mod_line += character
394 last_char = character
396 # Try to replace with ERT/math inset
397 if spec_chars.has_key(character):
398 command = spec_chars[character][0] # the command to replace unicode
399 flag1 = spec_chars[character][1]
400 flag2 = spec_chars[character][2]
401 if flag1.find('combining') > -1 or flag2.find('combining') > -1:
402 # We have a character that should be combined with the previous
403 command += '{' + last_char + '}'
404 # Remove the last character. Ignore if it is whitespace
405 if len(last_char.rstrip()):
406 # last_char was found and is not whitespace
408 mod_line = mod_line[:-1]
409 else: # last_char belongs to the last line
410 document.body[i-1] = document.body[i-1][:-1]
412 # The last character was replaced by a command. For now it is
413 # ignored. This could be handled better.
415 if command[0:2] == '\\\\':
416 if command[2:12]=='ensuremath':
417 if insets[-1] == "ERT":
419 command = command.replace('\\\\ensuremath{\\\\', '$\n\\backslash\n')
420 command = command.replace('}', '$\n')
421 elif insets[-1] != "Formula":
422 # add a math inset with the replacement character
423 command = command.replace('\\\\ensuremath{\\', math_intro)
424 command = command.replace('}', math_outro)
426 # we are already in a math inset
427 command = command.replace('\\\\ensuremath{\\', '')
428 command = command.replace('}', '')
430 if insets[-1] == "Formula":
431 # avoid putting an ERT in a math; instead put command as text
432 command = command.replace('\\\\', '\mathrm{')
433 command = command + '}'
434 elif insets[-1] != "ERT":
435 # add an ERT inset with the replacement character
436 command = command.replace('\\\\', ert_intro)
437 command = command + ert_outro
439 command = command.replace('\\\\', '\n\\backslash\n')
440 last_char = '' # indicate that the character should not be removed
443 # Replace with replacement string
444 mod_line += replacement_character
445 document.body[i:i+1] = mod_line.split('\n')
446 i += len(mod_line.split('\n'))
449 def revert_cs_label(document):
450 " Remove status flag of charstyle label. "
453 i = find_token(document.body, "\\begin_inset CharStyle", i)
456 # Seach for a line starting 'show_label'
457 # If it is not there, break with a warning message
460 if (document.body[i][:10] == "show_label"):
463 elif (document.body[i][:13] == "\\begin_layout"):
464 document.warning("Malformed LyX document: Missing 'show_label'.")
471 def convert_bibitem(document):
473 \bibitem [option]{argument}
477 \begin_inset LatexCommand bibitem
483 This must be called after convert_commandparams.
487 i = find_token(document.body, "\\bibitem", i)
490 j = document.body[i].find('[') + 1
491 k = document.body[i].rfind(']')
492 if j == 0: # No optional argument found
495 option = document.body[i][j:k]
496 j = document.body[i].rfind('{') + 1
497 k = document.body[i].rfind('}')
498 argument = document.body[i][j:k]
499 lines = ['\\begin_inset LatexCommand bibitem']
501 lines.append('label "%s"' % option.replace('"', '\\"'))
502 lines.append('key "%s"' % argument.replace('"', '\\"'))
504 lines.append('\\end_inset')
505 document.body[i:i+1] = lines
509 commandparams_info = {
510 # command : [option1, option2, argument]
511 "bibitem" : ["label", "", "key"],
512 "bibtex" : ["options", "btprint", "bibfiles"],
513 "cite" : ["after", "before", "key"],
514 "citet" : ["after", "before", "key"],
515 "citep" : ["after", "before", "key"],
516 "citealt" : ["after", "before", "key"],
517 "citealp" : ["after", "before", "key"],
518 "citeauthor" : ["after", "before", "key"],
519 "citeyear" : ["after", "before", "key"],
520 "citeyearpar" : ["after", "before", "key"],
521 "citet*" : ["after", "before", "key"],
522 "citep*" : ["after", "before", "key"],
523 "citealt*" : ["after", "before", "key"],
524 "citealp*" : ["after", "before", "key"],
525 "citeauthor*" : ["after", "before", "key"],
526 "Citet" : ["after", "before", "key"],
527 "Citep" : ["after", "before", "key"],
528 "Citealt" : ["after", "before", "key"],
529 "Citealp" : ["after", "before", "key"],
530 "Citeauthor" : ["after", "before", "key"],
531 "Citet*" : ["after", "before", "key"],
532 "Citep*" : ["after", "before", "key"],
533 "Citealt*" : ["after", "before", "key"],
534 "Citealp*" : ["after", "before", "key"],
535 "Citeauthor*" : ["after", "before", "key"],
536 "citefield" : ["after", "before", "key"],
537 "citetitle" : ["after", "before", "key"],
538 "cite*" : ["after", "before", "key"],
539 "hfill" : ["", "", ""],
540 "index" : ["", "", "name"],
541 "printindex" : ["", "", "name"],
542 "label" : ["", "", "name"],
543 "eqref" : ["name", "", "reference"],
544 "pageref" : ["name", "", "reference"],
545 "prettyref" : ["name", "", "reference"],
546 "ref" : ["name", "", "reference"],
547 "vpageref" : ["name", "", "reference"],
548 "vref" : ["name", "", "reference"],
549 "tableofcontents" : ["", "", "type"],
550 "htmlurl" : ["name", "", "target"],
551 "url" : ["name", "", "target"]}
554 def convert_commandparams(document):
557 \begin_inset LatexCommand \cmdname[opt1][opt2]{arg}
562 \begin_inset LatexCommand cmdname
568 name1, name2 and name3 can be different for each command.
570 # \begin_inset LatexCommand bibitem was not the official version (see
571 # convert_bibitem()), but could be read in, so we convert it here, too.
575 i = find_token(document.body, "\\begin_inset LatexCommand", i)
578 command = document.body[i][26:].strip()
580 document.warning("Malformed LyX document: Missing LatexCommand name.")
584 j = find_token(document.body, "\\end_inset", i + 1)
586 document.warning("Malformed document")
588 command += "".join(document.body[i+1:j])
589 document.body[i+1:j] = []
591 # The following parser is taken from the original InsetCommandParams::scanCommand
597 # Used to handle things like \command[foo[bar]]{foo{bar}}
601 if ((state == "CMDNAME" and c == ' ') or
602 (state == "CMDNAME" and c == '[') or
603 (state == "CMDNAME" and c == '{')):
605 if ((state == "OPTION" and c == ']') or
606 (state == "SECOPTION" and c == ']') or
607 (state == "CONTENT" and c == '}')):
611 nestdepth = nestdepth - 1
612 if ((state == "OPTION" and c == '[') or
613 (state == "SECOPTION" and c == '[') or
614 (state == "CONTENT" and c == '{')):
615 nestdepth = nestdepth + 1
616 if state == "CMDNAME":
618 elif state == "OPTION":
620 elif state == "SECOPTION":
622 elif state == "CONTENT":
627 elif c == '[' and b != ']':
629 nestdepth = 0 # Just to be sure
630 elif c == '[' and b == ']':
632 nestdepth = 0 # Just to be sure
635 nestdepth = 0 # Just to be sure
638 # Now we have parsed the command, output the parameters
639 lines = ["\\begin_inset LatexCommand %s" % name]
641 if commandparams_info[name][0] == "":
642 document.warning("Ignoring invalid option `%s' of command `%s'." % (option1, name))
644 lines.append('%s "%s"' % (commandparams_info[name][0], option1.replace('"', '\\"')))
646 if commandparams_info[name][1] == "":
647 document.warning("Ignoring invalid second option `%s' of command `%s'." % (option2, name))
649 lines.append('%s "%s"' % (commandparams_info[name][1], option2.replace('"', '\\"')))
651 if commandparams_info[name][2] == "":
652 document.warning("Ignoring invalid argument `%s' of command `%s'." % (argument, name))
654 lines.append('%s "%s"' % (commandparams_info[name][2], argument.replace('"', '\\"')))
655 document.body[i:i+1] = lines
659 def revert_commandparams(document):
660 regex = re.compile(r'(\S+)\s+(.+)')
663 i = find_token(document.body, "\\begin_inset LatexCommand", i)
666 name = document.body[i].split()[2]
667 j = find_end_of_inset(document.body, i + 1)
672 for k in range(i + 1, j):
673 match = re.match(regex, document.body[k])
675 pname = match.group(1)
676 pvalue = match.group(2)
677 if pname == "preview":
678 preview_line = document.body[k]
679 elif (commandparams_info[name][0] != "" and
680 pname == commandparams_info[name][0]):
681 option1 = pvalue.strip('"').replace('\\"', '"')
682 elif (commandparams_info[name][1] != "" and
683 pname == commandparams_info[name][1]):
684 option2 = pvalue.strip('"').replace('\\"', '"')
685 elif (commandparams_info[name][2] != "" and
686 pname == commandparams_info[name][2]):
687 argument = pvalue.strip('"').replace('\\"', '"')
688 elif document.body[k].strip() != "":
689 document.warning("Ignoring unknown contents `%s' in command inset %s." % (document.body[k], name))
690 if name == "bibitem":
692 lines = ["\\bibitem {%s}" % argument]
694 lines = ["\\bibitem [%s]{%s}" % (option1, argument)]
698 lines = ["\\begin_inset LatexCommand \\%s{%s}" % (name, argument)]
700 lines = ["\\begin_inset LatexCommand \\%s[][%s]{%s}" % (name, option2, argument)]
703 lines = ["\\begin_inset LatexCommand \\%s[%s]{%s}" % (name, option1, argument)]
705 lines = ["\\begin_inset LatexCommand \\%s[%s][%s]{%s}" % (name, option1, option2, argument)]
706 if name != "bibitem":
707 if preview_line != "":
708 lines.append(preview_line)
710 lines.append('\\end_inset')
711 document.body[i:j+1] = lines
715 def revert_nomenclature(document):
716 " Convert nomenclature entry to ERT. "
717 regex = re.compile(r'(\S+)\s+(.+)')
721 i = find_token(document.body, "\\begin_inset LatexCommand nomenclature", i)
725 j = find_end_of_inset(document.body, i + 1)
730 for k in range(i + 1, j):
731 match = re.match(regex, document.body[k])
733 name = match.group(1)
734 value = match.group(2)
735 if name == "preview":
736 preview_line = document.body[k]
737 elif name == "symbol":
738 symbol = value.strip('"').replace('\\"', '"')
739 elif name == "description":
740 description = value.strip('"').replace('\\"', '"')
741 elif name == "prefix":
742 prefix = value.strip('"').replace('\\"', '"')
743 elif document.body[k].strip() != "":
744 document.warning("Ignoring unknown contents `%s' in nomenclature inset." % document.body[k])
746 command = 'nomenclature{%s}{%s}' % (symbol, description)
748 command = 'nomenclature[%s]{%s}{%s}' % (prefix, symbol, description)
749 document.body[i:j+1] = ['\\begin_inset ERT',
752 '\\begin_layout %s' % document.default_layout,
761 if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
762 document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
763 document.preamble.append('\\makenomenclature')
766 def revert_printnomenclature(document):
767 " Convert printnomenclature to ERT. "
768 regex = re.compile(r'(\S+)\s+(.+)')
772 i = find_token(document.body, "\\begin_inset LatexCommand printnomenclature", i)
776 j = find_end_of_inset(document.body, i + 1)
779 for k in range(i + 1, j):
780 match = re.match(regex, document.body[k])
782 name = match.group(1)
783 value = match.group(2)
784 if name == "preview":
785 preview_line = document.body[k]
786 elif name == "labelwidth":
787 labelwidth = value.strip('"').replace('\\"', '"')
788 elif document.body[k].strip() != "":
789 document.warning("Ignoring unknown contents `%s' in printnomenclature inset." % document.body[k])
791 command = 'nomenclature{}'
793 command = 'nomenclature[%s]' % labelwidth
794 document.body[i:j+1] = ['\\begin_inset ERT',
797 '\\begin_layout %s' % document.default_layout,
806 if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
807 document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
808 document.preamble.append('\\makenomenclature')
811 def convert_esint(document):
812 " Add \\use_esint setting to header. "
813 i = find_token(document.header, "\\cite_engine", 0)
815 document.warning("Malformed LyX document: Missing `\\cite_engine'.")
817 # 0 is off, 1 is auto, 2 is on.
818 document.header.insert(i, '\\use_esint 0')
821 def revert_esint(document):
822 " Remove \\use_esint setting from header. "
823 i = find_token(document.header, "\\use_esint", 0)
825 document.warning("Malformed LyX document: Missing `\\use_esint'.")
827 use_esint = document.header[i].split()[1]
828 del document.header[i]
829 # 0 is off, 1 is auto, 2 is on.
831 document.preamble.append('\\usepackage{esint}')
834 def revert_clearpage(document):
838 i = find_token(document.body, "\\clearpage", i)
841 document.body[i:i+1] = ['\\begin_inset ERT',
844 '\\begin_layout %s' % document.default_layout,
855 def revert_cleardoublepage(document):
856 " cleardoublepage -> ERT "
859 i = find_token(document.body, "\\cleardoublepage", i)
862 document.body[i:i+1] = ['\\begin_inset ERT',
865 '\\begin_layout %s' % document.default_layout,
876 def convert_lyxline(document):
877 " remove fontsize commands for \lyxline "
878 # The problematic is: The old \lyxline definition doesn't handle the fontsize
879 # to change the line thickness. The new definiton does this so that imported
880 # \lyxlines would have a different line thickness. The eventual fontsize command
881 # before \lyxline is therefore removed to get the same output.
882 fontsizes = ["tiny", "scriptsize", "footnotesize", "small", "normalsize",
883 "large", "Large", "LARGE", "huge", "Huge"]
884 for n in range(0, len(fontsizes)):
887 while i < len(document.body):
888 i = find_token(document.body, "\\size " + fontsizes[n], i)
889 k = find_token(document.body, "\\lyxline", i)
890 # the corresponding fontsize command is always 2 lines before the \lyxline
891 if (i != -1 and k == i+2):
892 document.body[i:i+1] = []
898 def revert_encodings(document):
899 " Set new encodings to auto. "
900 encodings = ["8859-6", "8859-8", "cp437", "cp437de", "cp850", "cp852",
901 "cp855", "cp858", "cp862", "cp865", "cp866", "cp1250",
902 "cp1252", "cp1256", "cp1257", "latin10", "pt254", "tis620-0"]
903 i = find_token(document.header, "\\inputencoding", 0)
905 document.header.append("\\inputencoding auto")
907 inputenc = get_value(document.header, "\\inputencoding", i)
908 if inputenc in encodings:
909 document.header[i] = "\\inputencoding auto"
910 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
913 def convert_caption(document):
914 " Convert caption layouts to caption insets. "
917 i = find_token(document.body, "\\begin_layout Caption", i)
920 j = find_end_of_layout(document.body, i)
922 document.warning("Malformed LyX document: Missing `\\end_layout'.")
925 document.body[j:j] = ["\\end_layout", "", "\\end_inset", "", ""]
926 document.body[i:i+1] = ["\\begin_layout %s" % document.default_layout,
927 "\\begin_inset Caption", "",
928 "\\begin_layout %s" % document.default_layout]
932 def revert_caption(document):
933 " Convert caption insets to caption layouts. "
934 " This assumes that the text class has a caption style. "
937 i = find_token(document.body, "\\begin_inset Caption", i)
941 # We either need to delete the previous \begin_layout line, or we
942 # need to end the previous layout if this inset is not in the first
943 # position of the paragraph.
944 layout_before = find_token_backwards(document.body, "\\begin_layout", i)
945 if layout_before == -1:
946 document.warning("Malformed LyX document: Missing `\\begin_layout'.")
948 layout_line = document.body[layout_before]
949 del_layout_before = True
950 l = layout_before + 1
952 if document.body[l] != "":
953 del_layout_before = False
956 if del_layout_before:
957 del document.body[layout_before:i]
960 document.body[i:i] = ["\\end_layout", ""]
963 # Find start of layout in the inset and end of inset
964 j = find_token(document.body, "\\begin_layout", i)
966 document.warning("Malformed LyX document: Missing `\\begin_layout'.")
968 k = find_end_of_inset(document.body, i)
970 document.warning("Malformed LyX document: Missing `\\end_inset'.")
973 # We either need to delete the following \end_layout line, or we need
974 # to restart the old layout if this inset is not at the paragraph end.
975 layout_after = find_token(document.body, "\\end_layout", k)
976 if layout_after == -1:
977 document.warning("Malformed LyX document: Missing `\\end_layout'.")
979 del_layout_after = True
981 while l < layout_after:
982 if document.body[l] != "":
983 del_layout_after = False
987 del document.body[k+1:layout_after+1]
989 document.body[k+1:k+1] = [layout_line, ""]
991 # delete \begin_layout and \end_inset and replace \begin_inset with
992 # "\begin_layout Caption". This works because we can only have one
993 # paragraph in the caption inset: The old \end_layout will be recycled.
995 if document.body[k] == "":
998 if document.body[j] == "":
1000 document.body[i] = "\\begin_layout Caption"
1001 if document.body[i+1] == "":
1002 del document.body[i+1]
1006 # Accents of InsetLaTeXAccent
1008 "`" : u'\u0300', # grave
1009 "'" : u'\u0301', # acute
1010 "^" : u'\u0302', # circumflex
1011 "~" : u'\u0303', # tilde
1012 "=" : u'\u0304', # macron
1013 "u" : u'\u0306', # breve
1014 "." : u'\u0307', # dot above
1015 "\"": u'\u0308', # diaeresis
1016 "r" : u'\u030a', # ring above
1017 "H" : u'\u030b', # double acute
1018 "v" : u'\u030c', # caron
1019 "b" : u'\u0320', # minus sign below
1020 "d" : u'\u0323', # dot below
1021 "c" : u'\u0327', # cedilla
1022 "k" : u'\u0328', # ogonek
1023 "t" : u'\u0361' # tie. This is special: It spans two characters, but
1024 # only one is given as argument, so we don't need to
1025 # treat it differently.
1029 # special accents of InsetLaTeXAccent without argument
1030 special_accent_map = {
1031 'i' : u'\u0131', # dotless i
1032 'j' : u'\u0237', # dotless j
1033 'l' : u'\u0142', # l with stroke
1034 'L' : u'\u0141' # L with stroke
1038 # special accent arguments of InsetLaTeXAccent
1040 '\\i' : u'\u0131', # dotless i
1041 '\\j' : u'\u0237' # dotless j
1045 def _convert_accent(accent, accented_char):
1047 char = accented_char
1049 if type in special_accent_map:
1050 return special_accent_map[type]
1051 # a missing char is treated as space by LyX
1053 elif type == 'q' and char in ['t', 'd', 'l', 'L']:
1054 # Special caron, only used with t, d, l and L.
1055 # It is not in the map because we convert it to the same unicode
1056 # character as the normal caron: \q{} is only defined if babel with
1057 # the czech or slovak language is used, and the normal caron
1058 # produces the correct output if the T1 font encoding is used.
1059 # For the same reason we never convert to \q{} in the other direction.
1061 elif char in accented_map:
1062 char = accented_map[char]
1063 elif (len(char) > 1):
1064 # We can only convert accents on a single char
1066 a = accent_map.get(type)
1068 return unicodedata.normalize("NFC", "%s%s" % (char, a))
1072 def convert_ertbackslash(body, i, ert, default_layout):
1073 r""" -------------------------------------------------------------------------------------------
1074 Convert backslashes and '\n' into valid ERT code, append the converted
1075 text to body[i] and return the (maybe incremented) line index i"""
1079 body[i] = body[i] + '\\backslash '
1083 body[i+1:i+1] = ['\\end_layout', '', '\\begin_layout %s' % default_layout, '']
1086 body[i] = body[i] + c
1090 def convert_accent(document):
1091 # The following forms are supported by LyX:
1092 # '\i \"{a}' (standard form, as written by LyX)
1093 # '\i \"{}' (standard form, as written by LyX if the accented char is a space)
1094 # '\i \"{ }' (also accepted if the accented char is a space)
1095 # '\i \" a' (also accepted)
1096 # '\i \"' (also accepted)
1097 re_wholeinset = re.compile(r'^(.*)(\\i\s+)(.*)$')
1098 re_contents = re.compile(r'^([^\s{]+)(.*)$')
1099 re_accentedcontents = re.compile(r'^\s*{?([^{}]*)}?\s*$')
1102 i = find_re(document.body, re_wholeinset, i)
1105 match = re_wholeinset.match(document.body[i])
1106 prefix = match.group(1)
1107 contents = match.group(3).strip()
1108 match = re_contents.match(contents)
1110 # Strip first char (always \)
1111 accent = match.group(1)[1:]
1112 accented_contents = match.group(2).strip()
1113 match = re_accentedcontents.match(accented_contents)
1114 accented_char = match.group(1)
1115 converted = _convert_accent(accent, accented_char)
1117 # Normalize contents
1118 contents = '%s{%s}' % (accent, accented_char),
1120 document.body[i] = '%s%s' % (prefix, converted)
1123 document.warning("Converting unknown InsetLaTeXAccent `\\i %s' to ERT." % contents)
1124 document.body[i] = prefix
1125 document.body[i+1:i+1] = ['\\begin_inset ERT',
1128 '\\begin_layout %s' % document.default_layout,
1132 i = convert_ertbackslash(document.body, i + 7,
1134 document.default_layout)
1135 document.body[i+1:i+1] = ['\\end_layout',
1141 def revert_accent(document):
1142 inverse_accent_map = {}
1143 for k in accent_map:
1144 inverse_accent_map[accent_map[k]] = k
1145 inverse_special_accent_map = {}
1146 for k in special_accent_map:
1147 inverse_special_accent_map[special_accent_map[k]] = k
1148 inverse_accented_map = {}
1149 for k in accented_map:
1150 inverse_accented_map[accented_map[k]] = k
1152 # Since LyX may insert a line break within a word we must combine all
1153 # words before unicode normalization.
1154 # We do this only if the next line starts with an accent, otherwise we
1155 # would create things like '\begin_inset ERTstatus'.
1156 numberoflines = len(document.body)
1157 for i in range(numberoflines-1):
1158 if document.body[i] == '' or document.body[i+1] == '' or document.body[i][-1] == ' ':
1160 if (document.body[i+1][0] in inverse_accent_map):
1161 # the last character of this line and the first of the next line
1162 # form probably a surrogate pair.
1163 while (len(document.body[i+1]) > 0 and document.body[i+1][0] != ' '):
1164 document.body[i] += document.body[i+1][0]
1165 document.body[i+1] = document.body[i+1][1:]
1167 # Normalize to "Normal form D" (NFD, also known as canonical decomposition).
1168 # This is needed to catch all accented characters.
1169 for i in range(numberoflines):
1170 # Unfortunately we have a mixture of unicode strings and plain strings,
1171 # because we never use u'xxx' for string literals, but 'xxx'.
1172 # Therefore we may have to try two times to normalize the data.
1174 document.body[i] = unicodedata.normalize("NFD", document.body[i])
1176 document.body[i] = unicodedata.normalize("NFD", unicode(document.body[i], 'utf-8'))
1178 # Replace accented characters with InsetLaTeXAccent
1179 # Do not convert characters that can be represented in the chosen
1181 encoding_stack = [get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)]
1182 lang_re = re.compile(r"^\\lang\s(\S+)")
1184 while i < len(document.body):
1186 if (document.inputencoding == "auto" or document.inputencoding == "default") and document.cjk_encoding != '':
1187 # Track the encoding of the current line
1188 result = lang_re.match(document.body[i])
1190 language = result.group(1)
1191 if language == "default":
1192 encoding_stack[-1] = document.encoding
1194 from lyx2lyx_lang import lang
1195 encoding_stack[-1] = lang[language][3]
1197 elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
1198 encoding_stack.append(encoding_stack[-1])
1200 elif find_token(document.body, "\\end_layout", i, i + 1) == i:
1201 del encoding_stack[-1]
1204 for j in range(len(document.body[i])):
1205 # dotless i and dotless j are both in special_accent_map and can
1206 # occur as an accented character, so we need to test that the
1207 # following character is no accent
1208 if (document.body[i][j] in inverse_special_accent_map and
1209 (j == len(document.body[i]) - 1 or document.body[i][j+1] not in inverse_accent_map)):
1210 accent = document.body[i][j]
1212 dummy = accent.encode(encoding_stack[-1])
1213 except UnicodeEncodeError:
1214 # Insert the rest of the line as new line
1215 if j < len(document.body[i]) - 1:
1216 document.body.insert(i+1, document.body[i][j+1:])
1217 # Delete the accented character
1219 document.body[i] = document.body[i][:j-1]
1221 document.body[i] = u''
1222 # Finally add the InsetLaTeXAccent
1223 document.body[i] += "\\i \\%s{}" % inverse_special_accent_map[accent]
1225 elif j > 0 and document.body[i][j] in inverse_accent_map:
1226 accented_char = document.body[i][j-1]
1227 if accented_char == ' ':
1228 # Conform to LyX output
1230 elif accented_char in inverse_accented_map:
1231 accented_char = inverse_accented_map[accented_char]
1232 accent = document.body[i][j]
1234 dummy = unicodedata.normalize("NFC", accented_char + accent).encode(encoding_stack[-1])
1235 except UnicodeEncodeError:
1236 # Insert the rest of the line as new line
1237 if j < len(document.body[i]) - 1:
1238 document.body.insert(i+1, document.body[i][j+1:])
1239 # Delete the accented characters
1241 document.body[i] = document.body[i][:j-2]
1243 document.body[i] = u''
1244 # Finally add the InsetLaTeXAccent
1245 document.body[i] += "\\i \\%s{%s}" % (inverse_accent_map[accent], accented_char)
1249 # Normalize to "Normal form C" (NFC, pre-composed characters) again
1250 for i in range(numberoflines):
1251 document.body[i] = unicodedata.normalize("NFC", document.body[i])
1254 def normalize_font_whitespace_259(document):
1255 """ Before format 259 the font changes were ignored if a
1256 whitespace was the first or last character in the sequence, this function
1257 transfers the whitespace outside."""
1259 char_properties = {"\\series": "default",
1260 "\\emph": "default",
1262 "\\shape": "default",
1264 "\\family": "default"}
1265 return normalize_font_whitespace(document, char_properties)
1267 def normalize_font_whitespace_274(document):
1268 """ Before format 259 (sic) the font changes were ignored if a
1269 whitespace was the first or last character in the sequence. This was
1270 corrected for most font properties in format 259, but the language
1271 was forgotten then. This function applies the same conversion done
1272 there (namely, transfers the whitespace outside) for font language
1273 changes, as well."""
1275 char_properties = {"\\lang": "default"}
1276 return normalize_font_whitespace(document, char_properties)
1278 def get_paragraph_language(document, i):
1279 """ Return the language of the paragraph in which line i of the document
1280 body is. If the first thing in the paragraph is a \\lang command, that
1281 is the paragraph's langauge; otherwise, the paragraph's language is the
1282 document's language."""
1284 lines = document.body
1286 first_nonempty_line = \
1287 find_nonempty_line(lines, find_beginning_of_layout(lines, i) + 1)
1289 words = lines[first_nonempty_line].split()
1291 if len(words) > 1 and words[0] == "\\lang":
1294 return document.language
1296 def normalize_font_whitespace(document, char_properties):
1297 """ Before format 259 the font changes were ignored if a
1298 whitespace was the first or last character in the sequence, this function
1299 transfers the whitespace outside. Only a change in one of the properties
1300 in the provided char_properties is handled by this function."""
1302 if document.backend != "latex":
1305 lines = document.body
1310 while i < len(lines):
1311 words = lines[i].split()
1313 if len(words) > 0 and words[0] == "\\begin_layout":
1314 # a new paragraph resets all font changes
1316 # also reset the default language to be the paragraph's language
1317 if "\\lang" in char_properties.keys():
1318 char_properties["\\lang"] = \
1319 get_paragraph_language(document, i + 1)
1321 elif len(words) > 1 and words[0] in char_properties.keys():
1322 # we have a font change
1323 if char_properties[words[0]] == words[1]:
1324 # property gets reset
1325 if words[0] in changes.keys():
1326 del changes[words[0]]
1327 defaultproperty = True
1330 changes[words[0]] = words[1]
1331 defaultproperty = False
1333 # We need to explicitly reset all changed properties if we find
1334 # a space below, because LyX 1.4 would output the space after
1335 # closing the previous change and before starting the new one,
1336 # and closing a font change means to close all properties, not
1337 # just the changed one.
1339 if lines[i-1] and lines[i-1][-1] == " ":
1340 lines[i-1] = lines[i-1][:-1]
1341 # a space before the font change
1343 for k in changes.keys():
1344 # exclude property k because that is already in lines[i]
1346 added_lines[1:1] = ["%s %s" % (k, changes[k])]
1347 for k in changes.keys():
1348 # exclude property k because that must be added below anyway
1350 added_lines[0:0] = ["%s %s" % (k, char_properties[k])]
1352 # Property is reset in lines[i], so add the new stuff afterwards
1353 lines[i+1:i+1] = added_lines
1355 # Reset property for the space
1356 added_lines[0:0] = ["%s %s" % (words[0], char_properties[words[0]])]
1357 lines[i:i] = added_lines
1358 i = i + len(added_lines)
1360 elif lines[i+1] and lines[i+1][0] == " " and (len(changes) > 0 or not defaultproperty):
1361 # a space after the font change
1362 if (lines[i+1] == " " and lines[i+2]):
1363 next_words = lines[i+2].split()
1364 if len(next_words) > 0 and next_words[0] == words[0]:
1365 # a single blank with a property different from the
1366 # previous and the next line must not be changed
1369 lines[i+1] = lines[i+1][1:]
1371 for k in changes.keys():
1372 # exclude property k because that is already in lines[i]
1374 added_lines[1:1] = ["%s %s" % (k, changes[k])]
1375 for k in changes.keys():
1376 # exclude property k because that must be added below anyway
1378 added_lines[0:0] = ["%s %s" % (k, char_properties[k])]
1379 # Reset property for the space
1380 added_lines[0:0] = ["%s %s" % (words[0], char_properties[words[0]])]
1381 lines[i:i] = added_lines
1382 i = i + len(added_lines)
1387 def revert_utf8x(document):
1388 " Set utf8x encoding to utf8. "
1389 i = find_token(document.header, "\\inputencoding", 0)
1391 document.header.append("\\inputencoding auto")
1393 inputenc = get_value(document.header, "\\inputencoding", i)
1394 if inputenc == "utf8x":
1395 document.header[i] = "\\inputencoding utf8"
1396 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1399 def revert_utf8plain(document):
1400 " Set utf8plain encoding to utf8. "
1401 i = find_token(document.header, "\\inputencoding", 0)
1403 document.header.append("\\inputencoding auto")
1405 inputenc = get_value(document.header, "\\inputencoding", i)
1406 if inputenc == "utf8-plain":
1407 document.header[i] = "\\inputencoding utf8"
1408 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1411 def revert_beamer_alert(document):
1412 " Revert beamer's \\alert inset back to ERT. "
1415 i = find_token(document.body, "\\begin_inset CharStyle Alert", i)
1418 document.body[i] = "\\begin_inset ERT"
1421 if (document.body[i][:13] == "\\begin_layout"):
1422 # Insert the \alert command
1423 document.body[i + 1] = "\\alert{" + document.body[i + 1] + '}'
1430 def revert_beamer_structure(document):
1431 " Revert beamer's \\structure inset back to ERT. "
1434 i = find_token(document.body, "\\begin_inset CharStyle Structure", i)
1437 document.body[i] = "\\begin_inset ERT"
1440 if (document.body[i][:13] == "\\begin_layout"):
1441 document.body[i + 1] = "\\structure{" + document.body[i + 1] + '}'
1448 def convert_changes(document):
1449 " Switch output_changes off if tracking_changes is off. "
1450 i = find_token(document.header, '\\tracking_changes', 0)
1452 document.warning("Malformed lyx document: Missing '\\tracking_changes'.")
1454 j = find_token(document.header, '\\output_changes', 0)
1456 document.warning("Malformed lyx document: Missing '\\output_changes'.")
1458 tracking_changes = get_value(document.header, "\\tracking_changes", i)
1459 output_changes = get_value(document.header, "\\output_changes", j)
1460 if tracking_changes == "false" and output_changes == "true":
1461 document.header[j] = "\\output_changes false"
1464 def revert_ascii(document):
1465 " Set ascii encoding to auto. "
1466 i = find_token(document.header, "\\inputencoding", 0)
1468 document.header.append("\\inputencoding auto")
1470 inputenc = get_value(document.header, "\\inputencoding", i)
1471 if inputenc == "ascii":
1472 document.header[i] = "\\inputencoding auto"
1473 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1476 def normalize_language_name(document):
1477 lang = { "brazil": "brazilian",
1478 "portuges": "portuguese"}
1480 if document.language in lang:
1481 document.language = lang[document.language]
1482 i = find_token(document.header, "\\language", 0)
1483 document.header[i] = "\\language %s" % document.language
1486 def revert_language_name(document):
1487 lang = { "brazilian": "brazil",
1488 "portuguese": "portuges"}
1490 if document.language in lang:
1491 document.language = lang[document.language]
1492 i = find_token(document.header, "\\language", 0)
1493 document.header[i] = "\\language %s" % document.language
1496 # \textclass cv -> \textclass simplecv
1497 def convert_cv_textclass(document):
1498 if document.textclass == "cv":
1499 document.textclass = "simplecv"
1502 def revert_cv_textclass(document):
1503 if document.textclass == "simplecv":
1504 document.textclass = "cv"
1508 # add scaleBeforeRotation graphics param
1509 def convert_graphics_rotation(document):
1510 " add scaleBeforeRotation graphics parameter. "
1513 i = find_token(document.body, "\\begin_inset Graphics", i)
1516 j = find_end_of_inset(document.body, i+1)
1519 document.warning("Malformed LyX document: Could not find end of graphics inset.")
1520 # Seach for rotateAngle and width or height or scale
1521 # If these params are not there, nothing needs to be done.
1522 k = find_token(document.body, "\trotateAngle", i + 1, j)
1523 l = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j)
1524 if (k != -1 and l != -1):
1525 document.body.insert(j, 'scaleBeforeRotation')
1530 # remove scaleBeforeRotation graphics param
1531 def revert_graphics_rotation(document):
1532 " remove scaleBeforeRotation graphics parameter. "
1535 i = find_token(document.body, "\\begin_inset Graphics", i)
1538 j = find_end_of_inset(document.body, i + 1)
1541 document.warning("Malformed LyX document: Could not find end of graphics inset.")
1542 # If there's a scaleBeforeRotation param, just remove that
1543 k = find_token(document.body, "\tscaleBeforeRotation", i + 1, j)
1545 del document.body[k]
1547 # if not, and if we have rotateAngle and width or height or scale,
1548 # we have to put the rotateAngle value to special
1549 rotateAngle = get_value(document.body, 'rotateAngle', i + 1, j)
1550 special = get_value(document.body, 'special', i + 1, j)
1551 if rotateAngle != "":
1552 k = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j)
1556 document.body.insert(j-1, '\tspecial angle=%s' % rotateAngle)
1558 l = find_token(document.body, "\tspecial", i + 1, j)
1559 document.body[l] = document.body[l].replace(special, 'angle=%s,%s' % (rotateAngle, special))
1560 k = find_token(document.body, "\trotateAngle", i + 1, j)
1562 del document.body[k]
1567 def convert_tableborder(document):
1568 # The problematic is: LyX double the table cell border as it ignores the "|" character in
1569 # the cell arguments. A fix takes care of this and therefore the "|" has to be removed
1571 while i < len(document.body):
1572 h = document.body[i].find("leftline=\"true\"", 0, len(document.body[i]))
1573 k = document.body[i].find("|>{", 0, len(document.body[i]))
1574 # the two tokens have to be in one line
1575 if (h != -1 and k != -1):
1577 document.body[i] = document.body[i][:k] + document.body[i][k+1:len(document.body[i])-1]
1581 def revert_tableborder(document):
1583 while i < len(document.body):
1584 h = document.body[i].find("leftline=\"true\"", 0, len(document.body[i]))
1585 k = document.body[i].find(">{", 0, len(document.body[i]))
1586 # the two tokens have to be in one line
1587 if (h != -1 and k != -1):
1589 document.body[i] = document.body[i][:k] + '|' + document.body[i][k:]
1593 def revert_armenian(document):
1595 # set inputencoding from armscii8 to auto
1596 if document.inputencoding == "armscii8":
1597 i = find_token(document.header, "\\inputencoding", 0)
1599 document.header[i] = "\\inputencoding auto"
1600 # check if preamble exists, if not k is set to -1
1603 while i < len(document.preamble):
1605 k = document.preamble[i].find("\\", 0, len(document.preamble[i]))
1607 k = document.preamble[i].find("%", 0, len(document.preamble[i]))
1609 # add the entry \usepackage{armtex} to the document preamble
1610 if document.language == "armenian":
1611 # set the armtex entry as the first preamble line
1613 document.preamble[0:0] = ["\\usepackage{armtex}"]
1614 # create the preamble when it doesn't exist
1616 document.preamble.append('\\usepackage{armtex}')
1617 # Set document language from armenian to english
1618 if document.language == "armenian":
1619 document.language = "english"
1620 i = find_token(document.header, "\\language", 0)
1622 document.header[i] = "\\language english"
1625 def revert_CJK(document):
1626 " Set CJK encodings to default and languages chinese, japanese and korean to english. "
1627 encodings = ["Bg5", "Bg5+", "GB", "GBt", "GBK", "JIS",
1628 "KS", "SJIS", "UTF8", "EUC-TW", "EUC-JP"]
1629 i = find_token(document.header, "\\inputencoding", 0)
1631 document.header.append("\\inputencoding auto")
1633 inputenc = get_value(document.header, "\\inputencoding", i)
1634 if inputenc in encodings:
1635 document.header[i] = "\\inputencoding default"
1636 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1638 if document.language == "chinese-simplified" or \
1639 document.language == "chinese-traditional" or \
1640 document.language == "japanese" or document.language == "korean":
1641 document.language = "english"
1642 i = find_token(document.header, "\\language", 0)
1644 document.header[i] = "\\language english"
1647 def revert_preamble_listings_params(document):
1648 " Revert preamble option \listings_params "
1649 i = find_token(document.header, "\\listings_params", 0)
1651 document.preamble.append('\\usepackage{listings}')
1652 document.preamble.append('\\lstset{%s}' % document.header[i].split()[1].strip('"'))
1653 document.header.pop(i);
1656 def revert_listings_inset(document):
1657 r''' Revert listings inset to \lstinline or \begin, \end lstlisting, translate
1661 lstparams "language=Delphi"
1665 \begin_layout Standard
1675 \begin_layout Standard
1679 lstinline[language=Delphi]{var i = 10;}
1684 There can be an caption inset in this inset
1686 \begin_layout Standard
1687 \begin_inset Caption
1689 \begin_layout Standard
1691 \begin_inset LatexCommand label
1707 i = find_token(document.body, '\\begin_inset listings', i)
1711 if not '\\usepackage{listings}' in document.preamble:
1712 document.preamble.append('\\usepackage{listings}')
1713 j = find_end_of_inset(document.body, i + 1)
1715 # this should not happen
1721 for line in range(i + 1, i + 4):
1722 if document.body[line].startswith('inline'):
1723 inline = document.body[line].split()[1]
1724 if document.body[line].startswith('lstparams'):
1725 params = document.body[line].split()[1].strip('"')
1726 if document.body[line].startswith('status'):
1727 status = document.body[line].split()[1].strip()
1732 cap = find_token(document.body, '\\begin_inset Caption', i)
1734 cap_end = find_end_of_inset(document.body, cap + 1)
1736 # this should not happen
1739 lbl = find_token(document.body, '\\begin_inset LatexCommand label', cap + 1)
1741 lbl_end = find_end_of_inset(document.body, lbl + 1)
1743 # this should not happen
1748 for line in document.body[lbl : lbl_end + 1]:
1749 if line.startswith('name '):
1750 label = line.split()[1].strip('"')
1752 for line in document.body[cap : lbl ] + document.body[lbl_end + 1 : cap_end + 1]:
1753 if not line.startswith('\\'):
1754 caption += line.strip()
1757 # looking for the oneline code for lstinline
1758 inlinecode = document.body[find_end_of_layout(document.body,
1759 find_token(document.body, '\\begin_layout %s' % document.default_layout, i + 1) +1 ) - 1]
1760 if len(caption) > 0:
1761 if len(params) == 0:
1762 params = 'caption={%s}' % caption
1764 params += ',caption={%s}' % caption
1766 if len(params) == 0:
1767 params = 'label={%s}' % label
1769 params += ',label={%s}' % label
1771 params = '[%s]' % params
1772 params = params.replace('\\', '\\backslash\n')
1773 if inline == 'true':
1774 document.body[i:(j+1)] = [r'\begin_inset ERT',
1775 'status %s' % status,
1776 r'\begin_layout %s' % document.default_layout,
1780 'lstinline%s{%s}' % (params, inlinecode),
1785 document.body[i: j+1] = [r'\begin_inset ERT',
1786 'status %s' % status,
1788 r'\begin_layout %s' % document.default_layout,
1792 r'begin{lstlisting}%s' % params,
1794 ] + document.body[k : j - 1] + \
1796 r'\begin_layout %s' % document.default_layout,
1805 def revert_include_listings(document):
1806 r''' Revert lstinputlisting Include option , translate
1807 \begin_inset Include \lstinputlisting{file}[opt]
1817 \begin_layout Standard
1821 lstinputlisting{file}[opt]
1829 i = find_token(document.body, r'\begin_inset Include \lstinputlisting', i)
1833 if not '\\usepackage{listings}' in document.preamble:
1834 document.preamble.append('\\usepackage{listings}')
1835 j = find_end_of_inset(document.body, i + 1)
1837 # this should not happen
1839 # find command line lstinputlisting{file}[options]
1840 cmd, file, option = '', '', ''
1841 if re.match(r'\\(lstinputlisting){([.\w]*)}(.*)', document.body[i].split()[2]):
1842 cmd, file, option = re.match(r'\\(lstinputlisting){([.\w]*)}(.*)', document.body[i].split()[2]).groups()
1843 option = option.replace('\\', '\\backslash\n')
1844 document.body[i : j + 1] = [r'\begin_inset ERT',
1847 r'\begin_layout %s' % document.default_layout,
1851 '%s%s{%s}' % (cmd, option, file),
1857 def revert_ext_font_sizes(document):
1858 if document.backend != "latex": return
1859 if not document.textclass.startswith("ext"): return
1861 fontsize = get_value(document.header, '\\paperfontsize', 0)
1862 if fontsize not in ('10', '11', '12'): return
1865 i = find_token(document.header, '\\paperfontsize', 0)
1866 document.header[i] = '\\paperfontsize default'
1868 i = find_token(document.header, '\\options', 0)
1870 i = find_token(document.header, '\\textclass', 0) + 1
1871 document.header[i:i] = ['\\options %s' % fontsize]
1873 document.header[i] += ',%s' % fontsize
1876 def convert_ext_font_sizes(document):
1877 if document.backend != "latex": return
1878 if not document.textclass.startswith("ext"): return
1880 fontsize = get_value(document.header, '\\paperfontsize', 0)
1881 if fontsize != 'default': return
1883 i = find_token(document.header, '\\options', 0)
1886 options = get_value(document.header, '\\options', i)
1888 fontsizes = '10pt', '11pt', '12pt'
1889 for fs in fontsizes:
1890 if options.find(fs) != -1:
1892 else: # this else will only be attained if the for cycle had no match
1895 options = options.split(',')
1896 for j, opt in enumerate(options):
1897 if opt in fontsizes:
1904 k = find_token(document.header, '\\paperfontsize', 0)
1905 document.header[k] = '\\paperfontsize %s' % fontsize
1908 document.header[i] = '\\options %s' % ','.join(options)
1910 del document.header[i]
1913 def revert_separator_layout(document):
1914 r'''Revert --Separator-- to a lyx note
1917 \begin_layout --Separator--
1923 \begin_layout Standard
1924 \begin_inset Note Note
1927 \begin_layout Standard
1940 i = find_token(document.body, r'\begin_layout --Separator--', i)
1943 j = find_end_of_layout(document.body, i + 1)
1945 # this should not happen
1947 document.body[i : j + 1] = [r'\begin_layout %s' % document.default_layout,
1948 r'\begin_inset Note Note',
1951 r'\begin_layout %s' % document.default_layout,
1952 'Separate Environment',
1956 document.body[ i + 1 : j] + \
1962 def convert_arabic (document):
1963 if document.language == "arabic":
1964 document.language = "arabic_arabtex"
1965 i = find_token(document.header, "\\language", 0)
1967 document.header[i] = "\\language arabic_arabtex"
1969 while i < len(document.body):
1970 h = document.body[i].find("\lang arabic", 0, len(document.body[i]))
1972 # change the language name
1973 document.body[i] = '\lang arabic_arabtex'
1977 def revert_arabic (document):
1978 if document.language == "arabic_arabtex":
1979 document.language = "arabic"
1980 i = find_token(document.header, "\\language", 0)
1982 document.header[i] = "\\language arabic"
1984 while i < len(document.body):
1985 h = document.body[i].find("\lang arabic_arabtex", 0, len(document.body[i]))
1987 # change the language name
1988 document.body[i] = '\lang arabic'
1996 supported_versions = ["1.5.0","1.5"]
1997 convert = [[246, []],
1998 [247, [convert_font_settings]],
2000 [249, [convert_utf8]],
2003 [252, [convert_commandparams, convert_bibitem]],
2005 [254, [convert_esint]],
2008 [257, [convert_caption]],
2009 [258, [convert_lyxline]],
2010 [259, [convert_accent, normalize_font_whitespace_259]],
2012 [261, [convert_changes]],
2014 [263, [normalize_language_name]],
2015 [264, [convert_cv_textclass]],
2016 [265, [convert_tableborder]],
2022 [271, [convert_ext_font_sizes]],
2025 [274, [normalize_font_whitespace_274]],
2026 [275, [convert_graphics_rotation]],
2027 [276, [convert_arabic]]
2031 [275, [revert_arabic]],
2032 [274, [revert_graphics_rotation]],
2034 [272, [revert_separator_layout]],
2035 [271, [revert_preamble_listings_params, revert_listings_inset, revert_include_listings]],
2036 [270, [revert_ext_font_sizes]],
2037 [269, [revert_beamer_alert, revert_beamer_structure]],
2038 [268, [revert_preamble_listings_params, revert_listings_inset, revert_include_listings]],
2039 [267, [revert_CJK]],
2040 [266, [revert_utf8plain]],
2041 [265, [revert_armenian]],
2042 [264, [revert_tableborder]],
2043 [263, [revert_cv_textclass]],
2044 [262, [revert_language_name]],
2045 [261, [revert_ascii]],
2047 [259, [revert_utf8x]],
2050 [256, [revert_caption]],
2051 [255, [revert_encodings]],
2052 [254, [revert_clearpage, revert_cleardoublepage]],
2053 [253, [revert_esint]],
2054 [252, [revert_nomenclature, revert_printnomenclature]],
2055 [251, [revert_commandparams]],
2056 [250, [revert_cs_label]],
2058 [248, [revert_accent, revert_utf8, revert_unicode]],
2059 [247, [revert_booktabs]],
2060 [246, [revert_font_settings]],
2061 [245, [revert_framed]]]
2064 if __name__ == "__main__":