1 # This file is part of lyx2lyx
2 # -*- coding: utf-8 -*-
3 # Copyright (C) 2006 José Matos <jamatos@lyx.org>
4 # Copyright (C) 2004-2006 Georg Baum <Georg.Baum@post.rwth-aachen.de>
6 # This program is free software; you can redistribute it and/or
7 # modify it under the terms of the GNU General Public License
8 # as published by the Free Software Foundation; either version 2
9 # of the License, or (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
20 """ Convert files to the file format generated by lyx 1.5"""
26 from parser_tools import find_re, find_token, find_token_backwards, find_token_exact, find_tokens, find_end_of, get_value, find_beginning_of, find_nonempty_line
27 from LyX import get_encoding
30 ####################################################################
31 # Private helper functions
33 def find_end_of_inset(lines, i):
34 " Find end of inset, where lines[i] is included."
35 return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
37 def find_end_of_layout(lines, i):
38 " Find end of layout, where lines[i] is included."
39 return find_end_of(lines, i, "\\begin_layout", "\\end_layout")
41 def find_beginning_of_layout(lines, i):
42 "Find beginning of layout, where lines[i] is included."
43 return find_beginning_of(lines, i, "\\begin_layout", "\\end_layout")
45 # End of helper functions
46 ####################################################################
50 # Notes: Framed/Shaded
53 def revert_framed(document):
54 "Revert framed notes. "
57 i = find_tokens(document.body, ["\\begin_inset Note Framed", "\\begin_inset Note Shaded"], i)
61 document.body[i] = "\\begin_inset Note"
69 roman_fonts = {'default' : 'default', 'ae' : 'ae',
70 'times' : 'times', 'palatino' : 'palatino',
71 'helvet' : 'default', 'avant' : 'default',
72 'newcent' : 'newcent', 'bookman' : 'bookman',
74 sans_fonts = {'default' : 'default', 'ae' : 'default',
75 'times' : 'default', 'palatino' : 'default',
76 'helvet' : 'helvet', 'avant' : 'avant',
77 'newcent' : 'default', 'bookman' : 'default',
79 typewriter_fonts = {'default' : 'default', 'ae' : 'default',
80 'times' : 'default', 'palatino' : 'default',
81 'helvet' : 'default', 'avant' : 'default',
82 'newcent' : 'default', 'bookman' : 'default',
83 'pslatex' : 'courier'}
85 def convert_font_settings(document):
86 " Convert font settings. "
88 i = find_token_exact(document.header, "\\fontscheme", i)
90 document.warning("Malformed LyX document: Missing `\\fontscheme'.")
92 font_scheme = get_value(document.header, "\\fontscheme", i, i + 1)
94 document.warning("Malformed LyX document: Empty `\\fontscheme'.")
95 font_scheme = 'default'
96 if not font_scheme in roman_fonts.keys():
97 document.warning("Malformed LyX document: Unknown `\\fontscheme' `%s'." % font_scheme)
98 font_scheme = 'default'
99 document.header[i:i+1] = ['\\font_roman %s' % roman_fonts[font_scheme],
100 '\\font_sans %s' % sans_fonts[font_scheme],
101 '\\font_typewriter %s' % typewriter_fonts[font_scheme],
102 '\\font_default_family default',
105 '\\font_sf_scale 100',
106 '\\font_tt_scale 100']
109 def revert_font_settings(document):
110 " Revert font settings. "
113 fonts = {'roman' : 'default', 'sans' : 'default', 'typewriter' : 'default'}
114 for family in 'roman', 'sans', 'typewriter':
115 name = '\\font_%s' % family
116 i = find_token_exact(document.header, name, i)
118 document.warning("Malformed LyX document: Missing `%s'." % name)
121 if (insert_line < 0):
123 fonts[family] = get_value(document.header, name, i, i + 1)
124 del document.header[i]
125 i = find_token_exact(document.header, '\\font_default_family', i)
127 document.warning("Malformed LyX document: Missing `\\font_default_family'.")
128 font_default_family = 'default'
130 font_default_family = get_value(document.header, "\\font_default_family", i, i + 1)
131 del document.header[i]
132 i = find_token_exact(document.header, '\\font_sc', i)
134 document.warning("Malformed LyX document: Missing `\\font_sc'.")
137 font_sc = get_value(document.header, '\\font_sc', i, i + 1)
138 del document.header[i]
139 if font_sc != 'false':
140 document.warning("Conversion of '\\font_sc' not yet implemented.")
141 i = find_token_exact(document.header, '\\font_osf', i)
143 document.warning("Malformed LyX document: Missing `\\font_osf'.")
146 font_osf = get_value(document.header, '\\font_osf', i, i + 1)
147 del document.header[i]
148 i = find_token_exact(document.header, '\\font_sf_scale', i)
150 document.warning("Malformed LyX document: Missing `\\font_sf_scale'.")
151 font_sf_scale = '100'
153 font_sf_scale = get_value(document.header, '\\font_sf_scale', i, i + 1)
154 del document.header[i]
155 if font_sf_scale != '100':
156 document.warning("Conversion of '\\font_sf_scale' not yet implemented.")
157 i = find_token_exact(document.header, '\\font_tt_scale', i)
159 document.warning("Malformed LyX document: Missing `\\font_tt_scale'.")
160 font_tt_scale = '100'
162 font_tt_scale = get_value(document.header, '\\font_tt_scale', i, i + 1)
163 del document.header[i]
164 if font_tt_scale != '100':
165 document.warning("Conversion of '\\font_tt_scale' not yet implemented.")
166 for font_scheme in roman_fonts.keys():
167 if (roman_fonts[font_scheme] == fonts['roman'] and
168 sans_fonts[font_scheme] == fonts['sans'] and
169 typewriter_fonts[font_scheme] == fonts['typewriter']):
170 document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
171 if font_default_family != 'default':
172 document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
173 if font_osf == 'true':
174 document.warning("Ignoring `\\font_osf = true'")
176 font_scheme = 'default'
177 document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
178 if fonts['roman'] == 'cmr':
179 document.preamble.append('\\renewcommand{\\rmdefault}{cmr}')
180 if font_osf == 'true':
181 document.preamble.append('\\usepackage{eco}')
183 for font in 'lmodern', 'charter', 'utopia', 'beraserif', 'ccfonts', 'chancery':
184 if fonts['roman'] == font:
185 document.preamble.append('\\usepackage{%s}' % font)
186 for font in 'cmss', 'lmss', 'cmbr':
187 if fonts['sans'] == font:
188 document.preamble.append('\\renewcommand{\\sfdefault}{%s}' % font)
189 for font in 'berasans':
190 if fonts['sans'] == font:
191 document.preamble.append('\\usepackage{%s}' % font)
192 for font in 'cmtt', 'lmtt', 'cmtl':
193 if fonts['typewriter'] == font:
194 document.preamble.append('\\renewcommand{\\ttdefault}{%s}' % font)
195 for font in 'courier', 'beramono', 'luximono':
196 if fonts['typewriter'] == font:
197 document.preamble.append('\\usepackage{%s}' % font)
198 if font_default_family != 'default':
199 document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
200 if font_osf == 'true':
201 document.warning("Ignoring `\\font_osf = true'")
204 def revert_booktabs(document):
205 " We remove the booktabs flag or everything else will become a mess. "
206 re_row = re.compile(r'^<row.*space="[^"]+".*>$')
207 re_tspace = re.compile(r'\s+topspace="[^"]+"')
208 re_bspace = re.compile(r'\s+bottomspace="[^"]+"')
209 re_ispace = re.compile(r'\s+interlinespace="[^"]+"')
212 i = find_token(document.body, "\\begin_inset Tabular", i)
215 j = find_end_of_inset(document.body, i + 1)
217 document.warning("Malformed LyX document: Could not find end of tabular.")
219 for k in range(i, j):
220 if re.search('^<features.* booktabs="true".*>$', document.body[k]):
221 document.warning("Converting 'booktabs' table to normal table.")
222 document.body[k] = document.body[k].replace(' booktabs="true"', '')
223 if re.search(re_row, document.body[k]):
224 document.warning("Removing extra row space.")
225 document.body[k] = re_tspace.sub('', document.body[k])
226 document.body[k] = re_bspace.sub('', document.body[k])
227 document.body[k] = re_ispace.sub('', document.body[k])
231 def convert_multiencoding(document, forward):
232 """ Fix files with multiple encodings.
233 Files with an inputencoding of "auto" or "default" and multiple languages
234 where at least two languages have different default encodings are encoded
235 in multiple encodings for file formats < 249. These files are incorrectly
236 read and written (as if the whole file was in the encoding of the main
238 This is not true for files written by CJK-LyX, they are always in the locale
242 - converts from fake unicode values to true unicode if forward is true, and
243 - converts from true unicode values to fake unicode if forward is false.
244 document.encoding must be set to the old value (format 248) in both cases.
246 We do this here and not in LyX.py because it is far easier to do the
247 necessary parsing in modern formats than in ancient ones.
249 inset_types = ["Foot", "Note"]
250 if document.cjk_encoding != '':
252 encoding_stack = [document.encoding]
254 lang_re = re.compile(r"^\\lang\s(\S+)")
255 inset_re = re.compile(r"^\\begin_inset\s(\S+)")
256 if document.inputencoding == "auto" or document.inputencoding == "default":
257 for i in range(len(document.body)):
258 result = lang_re.match(document.body[i])
260 language = result.group(1)
261 if language == "default":
262 document.warning("Resetting encoding from %s to %s." % (encoding_stack[-1], document.encoding), 3)
263 encoding_stack[-1] = document.encoding
265 from lyx2lyx_lang import lang
266 document.warning("Setting encoding from %s to %s." % (encoding_stack[-1], lang[language][3]), 3)
267 encoding_stack[-1] = lang[language][3]
268 elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
269 document.warning("Adding nested encoding %s." % encoding_stack[-1], 3)
270 if len(inset_stack) > 0 and inset_stack[-1] in inset_types:
271 from lyx2lyx_lang import lang
272 encoding_stack.append(lang[document.language][3])
274 encoding_stack.append(encoding_stack[-1])
275 elif find_token(document.body, "\\end_layout", i, i + 1) == i:
276 document.warning("Removing nested encoding %s." % encoding_stack[-1], 3)
277 if len(encoding_stack) == 1:
278 # Don't remove the document encoding from the stack
279 document.warning("Malformed LyX document: Unexpected `\\end_layout'.")
281 del encoding_stack[-1]
282 elif find_token(document.body, "\\begin_inset", i, i + 1) == i:
283 inset_result = inset_re.match(document.body[i])
285 inset_type = inset_result.group(1)
286 inset_stack.append(inset_type)
288 inset_stack.append("")
289 elif find_token(document.body, "\\end_inset", i, i + 1) == i:
291 if encoding_stack[-1] != document.encoding:
293 # This line has been incorrectly interpreted as if it was
294 # encoded in 'encoding'.
295 # Convert back to the 8bit string that was in the file.
296 orig = document.body[i].encode(document.encoding)
297 # Convert the 8bit string that was in the file to unicode
298 # with the correct encoding.
299 document.body[i] = orig.decode(encoding_stack[-1])
301 # Convert unicode to the 8bit string that will be written
302 # to the file with the correct encoding.
303 orig = document.body[i].encode(encoding_stack[-1])
304 # Convert the 8bit string that will be written to the
305 # file to fake unicode with the encoding that will later
306 # be used when writing to the file.
307 document.body[i] = orig.decode(document.encoding)
310 def convert_utf8(document):
311 " Set document encoding to UTF-8. "
312 convert_multiencoding(document, True)
313 document.encoding = "utf8"
316 def revert_utf8(document):
317 " Set document encoding to the value corresponding to inputencoding. "
318 i = find_token(document.header, "\\inputencoding", 0)
320 document.header.append("\\inputencoding auto")
321 elif get_value(document.header, "\\inputencoding", i) == "utf8":
322 document.header[i] = "\\inputencoding auto"
323 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
324 document.encoding = get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)
325 convert_multiencoding(document, False)
328 def read_unicodesymbols():
329 " Read the unicodesymbols list of unicode characters and corresponding commands."
330 pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
331 fp = open(os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols'))
333 for line in fp.readlines():
335 line=line.replace(' "',' ') # remove all quotation marks with spaces before
336 line=line.replace('" ',' ') # remove all quotation marks with spaces after
337 line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis)
339 # flag1 and flag2 are preamble and other flags
340 [ucs4,command,flag1,flag2] =line.split(None,3)
341 spec_chars[unichr(eval(ucs4))] = [command, flag1, flag2]
349 def revert_unicode(document):
350 '''Transform unicode characters that can not be written using the
351 document encoding to commands according to the unicodesymbols
352 file. Characters that can not be replaced by commands are replaced by
353 an replacement string. Flags other than 'combined' are currently not
356 replacement_character = '???'
357 spec_chars = read_unicodesymbols()
359 # Define strings to start and end ERT and math insets
360 ert_intro='\n\n\\begin_inset ERT\nstatus collapsed\n\\begin_layout %s\n\\backslash\n' % document.default_layout
361 ert_outro='\n\\end_layout\n\n\\end_inset\n'
362 math_intro='\n\\begin_inset Formula $'
363 math_outro='$\n\\end_inset'
364 # Find unicode characters and replace them
365 in_ert = False # flag set to 1 if in ERT inset
366 in_math = False # flag set to 1 if in math inset
367 insets = [] # list of active insets
369 # Go through the file to capture all combining characters
370 last_char = '' # to store the previous character
373 while i < len(document.body):
374 line = document.body[i]
376 if line.find('\\begin_inset') > -1:
377 # check which inset to start
378 if line.find('\\begin_inset ERT') > -1:
381 elif line.find('\\begin_inset Formula') > -1:
383 insets.append('math')
385 insets.append('other')
386 if line.find('\\end_inset') > -1:
387 # check which inset to end
389 cur_inset = insets.pop()
390 if cur_inset == 'ert':
392 elif cur_inset == 'math':
395 pass # end of other inset
397 pass # inset list was empty (for some reason)
399 # Try to write the line
401 # If all goes well the line is written here
402 dummy = line.encode(document.encoding)
406 # Error, some character(s) in the line need to be replaced
408 for character in line:
410 # Try to write the character
411 dummy = character.encode(document.encoding)
412 mod_line += character
413 last_char = character
415 # Try to replace with ERT/math inset
416 if spec_chars.has_key(character):
417 command = spec_chars[character][0] # the command to replace unicode
418 flag1 = spec_chars[character][1]
419 flag2 = spec_chars[character][2]
420 if flag1.find('combining') > -1 or flag2.find('combining') > -1:
421 # We have a character that should be combined with the previous
422 command += '{' + last_char + '}'
423 # Remove the last character. Ignore if it is whitespace
424 if len(last_char.rstrip()):
425 # last_char was found and is not whitespace
427 mod_line = mod_line[:-1]
428 else: # last_char belongs to the last line
429 document.body[i-1] = document.body[i-1][:-1]
431 # The last character was replaced by a command. For now it is
432 # ignored. This could be handled better.
434 if command[0:2] == '\\\\':
435 if command[2:12]=='ensuremath':
438 command = command.replace('\\\\ensuremath{\\\\', '$\n\\backslash\n')
439 command = command.replace('}', '$\n')
441 # add a math inset with the replacement character
442 command = command.replace('\\\\ensuremath{\\', math_intro)
443 command = command.replace('}', math_outro)
445 # we are already in a math inset
446 command = command.replace('\\\\ensuremath{\\', '')
447 command = command.replace('}', '')
450 # avoid putting an ERT in a math; instead put command as text
451 command = command.replace('\\\\', '\mathrm{')
452 command = command + '}'
454 # add an ERT inset with the replacement character
455 command = command.replace('\\\\', ert_intro)
456 command = command + ert_outro
458 command = command.replace('\\\\', '\n\\backslash\n')
459 last_char = '' # indicate that the character should not be removed
462 # Replace with replacement string
463 mod_line += replacement_character
464 document.body[i:i+1] = mod_line.split('\n')
465 i += len(mod_line.split('\n'))
468 def revert_cs_label(document):
469 " Remove status flag of charstyle label. "
472 i = find_token(document.body, "\\begin_inset CharStyle", i)
475 # Seach for a line starting 'show_label'
476 # If it is not there, break with a warning message
479 if (document.body[i][:10] == "show_label"):
482 elif (document.body[i][:13] == "\\begin_layout"):
483 document.warning("Malformed LyX document: Missing 'show_label'.")
490 def convert_bibitem(document):
492 \bibitem [option]{argument}
496 \begin_inset LatexCommand bibitem
502 This must be called after convert_commandparams.
506 i = find_token(document.body, "\\bibitem", i)
509 j = document.body[i].find('[') + 1
510 k = document.body[i].rfind(']')
511 if j == 0: # No optional argument found
514 option = document.body[i][j:k]
515 j = document.body[i].rfind('{') + 1
516 k = document.body[i].rfind('}')
517 argument = document.body[i][j:k]
518 lines = ['\\begin_inset LatexCommand bibitem']
520 lines.append('label "%s"' % option.replace('"', '\\"'))
521 lines.append('key "%s"' % argument.replace('"', '\\"'))
523 lines.append('\\end_inset')
524 document.body[i:i+1] = lines
528 commandparams_info = {
529 # command : [option1, option2, argument]
530 "bibitem" : ["label", "", "key"],
531 "bibtex" : ["options", "btprint", "bibfiles"],
532 "cite" : ["after", "before", "key"],
533 "citet" : ["after", "before", "key"],
534 "citep" : ["after", "before", "key"],
535 "citealt" : ["after", "before", "key"],
536 "citealp" : ["after", "before", "key"],
537 "citeauthor" : ["after", "before", "key"],
538 "citeyear" : ["after", "before", "key"],
539 "citeyearpar" : ["after", "before", "key"],
540 "citet*" : ["after", "before", "key"],
541 "citep*" : ["after", "before", "key"],
542 "citealt*" : ["after", "before", "key"],
543 "citealp*" : ["after", "before", "key"],
544 "citeauthor*" : ["after", "before", "key"],
545 "Citet" : ["after", "before", "key"],
546 "Citep" : ["after", "before", "key"],
547 "Citealt" : ["after", "before", "key"],
548 "Citealp" : ["after", "before", "key"],
549 "Citeauthor" : ["after", "before", "key"],
550 "Citet*" : ["after", "before", "key"],
551 "Citep*" : ["after", "before", "key"],
552 "Citealt*" : ["after", "before", "key"],
553 "Citealp*" : ["after", "before", "key"],
554 "Citeauthor*" : ["after", "before", "key"],
555 "citefield" : ["after", "before", "key"],
556 "citetitle" : ["after", "before", "key"],
557 "cite*" : ["after", "before", "key"],
558 "hfill" : ["", "", ""],
559 "index" : ["", "", "name"],
560 "printindex" : ["", "", "name"],
561 "label" : ["", "", "name"],
562 "eqref" : ["name", "", "reference"],
563 "pageref" : ["name", "", "reference"],
564 "prettyref" : ["name", "", "reference"],
565 "ref" : ["name", "", "reference"],
566 "vpageref" : ["name", "", "reference"],
567 "vref" : ["name", "", "reference"],
568 "tableofcontents" : ["", "", "type"],
569 "htmlurl" : ["name", "", "target"],
570 "url" : ["name", "", "target"]}
573 def convert_commandparams(document):
576 \begin_inset LatexCommand \cmdname[opt1][opt2]{arg}
581 \begin_inset LatexCommand cmdname
587 name1, name2 and name3 can be different for each command.
589 # \begin_inset LatexCommand bibitem was not the official version (see
590 # convert_bibitem()), but could be read in, so we convert it here, too.
594 i = find_token(document.body, "\\begin_inset LatexCommand", i)
597 command = document.body[i][26:].strip()
599 document.warning("Malformed LyX document: Missing LatexCommand name.")
603 j = find_token(document.body, "\\end_inset", i + 1)
605 document.warning("Malformed document")
607 command += "".join(document.body[i+1:j])
608 document.body[i+1:j] = []
610 # The following parser is taken from the original InsetCommandParams::scanCommand
616 # Used to handle things like \command[foo[bar]]{foo{bar}}
620 if ((state == "CMDNAME" and c == ' ') or
621 (state == "CMDNAME" and c == '[') or
622 (state == "CMDNAME" and c == '{')):
624 if ((state == "OPTION" and c == ']') or
625 (state == "SECOPTION" and c == ']') or
626 (state == "CONTENT" and c == '}')):
630 nestdepth = nestdepth - 1
631 if ((state == "OPTION" and c == '[') or
632 (state == "SECOPTION" and c == '[') or
633 (state == "CONTENT" and c == '{')):
634 nestdepth = nestdepth + 1
635 if state == "CMDNAME":
637 elif state == "OPTION":
639 elif state == "SECOPTION":
641 elif state == "CONTENT":
646 elif c == '[' and b != ']':
648 nestdepth = 0 # Just to be sure
649 elif c == '[' and b == ']':
651 nestdepth = 0 # Just to be sure
654 nestdepth = 0 # Just to be sure
657 # Now we have parsed the command, output the parameters
658 lines = ["\\begin_inset LatexCommand %s" % name]
660 if commandparams_info[name][0] == "":
661 document.warning("Ignoring invalid option `%s' of command `%s'." % (option1, name))
663 lines.append('%s "%s"' % (commandparams_info[name][0], option1.replace('"', '\\"')))
665 if commandparams_info[name][1] == "":
666 document.warning("Ignoring invalid second option `%s' of command `%s'." % (option2, name))
668 lines.append('%s "%s"' % (commandparams_info[name][1], option2.replace('"', '\\"')))
670 if commandparams_info[name][2] == "":
671 document.warning("Ignoring invalid argument `%s' of command `%s'." % (argument, name))
673 lines.append('%s "%s"' % (commandparams_info[name][2], argument.replace('"', '\\"')))
674 document.body[i:i+1] = lines
678 def revert_commandparams(document):
679 regex = re.compile(r'(\S+)\s+(.+)')
682 i = find_token(document.body, "\\begin_inset LatexCommand", i)
685 name = document.body[i].split()[2]
686 j = find_end_of_inset(document.body, i + 1)
691 for k in range(i + 1, j):
692 match = re.match(regex, document.body[k])
694 pname = match.group(1)
695 pvalue = match.group(2)
696 if pname == "preview":
697 preview_line = document.body[k]
698 elif (commandparams_info[name][0] != "" and
699 pname == commandparams_info[name][0]):
700 option1 = pvalue.strip('"').replace('\\"', '"')
701 elif (commandparams_info[name][1] != "" and
702 pname == commandparams_info[name][1]):
703 option2 = pvalue.strip('"').replace('\\"', '"')
704 elif (commandparams_info[name][2] != "" and
705 pname == commandparams_info[name][2]):
706 argument = pvalue.strip('"').replace('\\"', '"')
707 elif document.body[k].strip() != "":
708 document.warning("Ignoring unknown contents `%s' in command inset %s." % (document.body[k], name))
709 if name == "bibitem":
711 lines = ["\\bibitem {%s}" % argument]
713 lines = ["\\bibitem [%s]{%s}" % (option1, argument)]
717 lines = ["\\begin_inset LatexCommand \\%s{%s}" % (name, argument)]
719 lines = ["\\begin_inset LatexCommand \\%s[][%s]{%s}" % (name, option2, argument)]
722 lines = ["\\begin_inset LatexCommand \\%s[%s]{%s}" % (name, option1, argument)]
724 lines = ["\\begin_inset LatexCommand \\%s[%s][%s]{%s}" % (name, option1, option2, argument)]
725 if name != "bibitem":
726 if preview_line != "":
727 lines.append(preview_line)
729 lines.append('\\end_inset')
730 document.body[i:j+1] = lines
734 def revert_nomenclature(document):
735 " Convert nomenclature entry to ERT. "
736 regex = re.compile(r'(\S+)\s+(.+)')
740 i = find_token(document.body, "\\begin_inset LatexCommand nomenclature", i)
744 j = find_end_of_inset(document.body, i + 1)
749 for k in range(i + 1, j):
750 match = re.match(regex, document.body[k])
752 name = match.group(1)
753 value = match.group(2)
754 if name == "preview":
755 preview_line = document.body[k]
756 elif name == "symbol":
757 symbol = value.strip('"').replace('\\"', '"')
758 elif name == "description":
759 description = value.strip('"').replace('\\"', '"')
760 elif name == "prefix":
761 prefix = value.strip('"').replace('\\"', '"')
762 elif document.body[k].strip() != "":
763 document.warning("Ignoring unknown contents `%s' in nomenclature inset." % document.body[k])
765 command = 'nomenclature{%s}{%s}' % (symbol, description)
767 command = 'nomenclature[%s]{%s}{%s}' % (prefix, symbol, description)
768 document.body[i:j+1] = ['\\begin_inset ERT',
771 '\\begin_layout %s' % document.default_layout,
780 if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
781 document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
782 document.preamble.append('\\makenomenclature')
785 def revert_printnomenclature(document):
786 " Convert printnomenclature to ERT. "
787 regex = re.compile(r'(\S+)\s+(.+)')
791 i = find_token(document.body, "\\begin_inset LatexCommand printnomenclature", i)
795 j = find_end_of_inset(document.body, i + 1)
798 for k in range(i + 1, j):
799 match = re.match(regex, document.body[k])
801 name = match.group(1)
802 value = match.group(2)
803 if name == "preview":
804 preview_line = document.body[k]
805 elif name == "labelwidth":
806 labelwidth = value.strip('"').replace('\\"', '"')
807 elif document.body[k].strip() != "":
808 document.warning("Ignoring unknown contents `%s' in printnomenclature inset." % document.body[k])
810 command = 'nomenclature{}'
812 command = 'nomenclature[%s]' % labelwidth
813 document.body[i:j+1] = ['\\begin_inset ERT',
816 '\\begin_layout %s' % document.default_layout,
825 if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
826 document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
827 document.preamble.append('\\makenomenclature')
830 def convert_esint(document):
831 " Add \\use_esint setting to header. "
832 i = find_token(document.header, "\\cite_engine", 0)
834 document.warning("Malformed LyX document: Missing `\\cite_engine'.")
836 # 0 is off, 1 is auto, 2 is on.
837 document.header.insert(i, '\\use_esint 0')
840 def revert_esint(document):
841 " Remove \\use_esint setting from header. "
842 i = find_token(document.header, "\\use_esint", 0)
844 document.warning("Malformed LyX document: Missing `\\use_esint'.")
846 use_esint = document.header[i].split()[1]
847 del document.header[i]
848 # 0 is off, 1 is auto, 2 is on.
850 document.preamble.append('\\usepackage{esint}')
853 def revert_clearpage(document):
857 i = find_token(document.body, "\\clearpage", i)
860 document.body[i:i+1] = ['\\begin_inset ERT',
863 '\\begin_layout %s' % document.default_layout,
874 def revert_cleardoublepage(document):
875 " cleardoublepage -> ERT "
878 i = find_token(document.body, "\\cleardoublepage", i)
881 document.body[i:i+1] = ['\\begin_inset ERT',
884 '\\begin_layout %s' % document.default_layout,
895 def convert_lyxline(document):
896 " remove fontsize commands for \lyxline "
897 # The problematic is: The old \lyxline definition doesn't handle the fontsize
898 # to change the line thickness. The new definiton does this so that imported
899 # \lyxlines would have a different line thickness. The eventual fontsize command
900 # before \lyxline is therefore removed to get the same output.
901 fontsizes = ["tiny", "scriptsize", "footnotesize", "small", "normalsize",
902 "large", "Large", "LARGE", "huge", "Huge"]
903 for n in range(0, len(fontsizes)):
906 while i < len(document.body):
907 i = find_token(document.body, "\\size " + fontsizes[n], i)
908 k = find_token(document.body, "\\lyxline", i)
909 # the corresponding fontsize command is always 2 lines before the \lyxline
910 if (i != -1 and k == i+2):
911 document.body[i:i+1] = []
917 def revert_encodings(document):
918 " Set new encodings to auto. "
919 encodings = ["8859-6", "8859-8", "cp437", "cp437de", "cp850", "cp852",
920 "cp855", "cp858", "cp862", "cp865", "cp866", "cp1250",
921 "cp1252", "cp1256", "cp1257", "latin10", "pt254", "tis620-0"]
922 i = find_token(document.header, "\\inputencoding", 0)
924 document.header.append("\\inputencoding auto")
926 inputenc = get_value(document.header, "\\inputencoding", i)
927 if inputenc in encodings:
928 document.header[i] = "\\inputencoding auto"
929 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
932 def convert_caption(document):
933 " Convert caption layouts to caption insets. "
936 i = find_token(document.body, "\\begin_layout Caption", i)
939 j = find_end_of_layout(document.body, i)
941 document.warning("Malformed LyX document: Missing `\\end_layout'.")
944 document.body[j:j] = ["\\end_layout", "", "\\end_inset", "", ""]
945 document.body[i:i+1] = ["\\begin_layout %s" % document.default_layout,
946 "\\begin_inset Caption", "",
947 "\\begin_layout %s" % document.default_layout]
951 def revert_caption(document):
952 " Convert caption insets to caption layouts. "
953 " This assumes that the text class has a caption style. "
956 i = find_token(document.body, "\\begin_inset Caption", i)
960 # We either need to delete the previous \begin_layout line, or we
961 # need to end the previous layout if this inset is not in the first
962 # position of the paragraph.
963 layout_before = find_token_backwards(document.body, "\\begin_layout", i)
964 if layout_before == -1:
965 document.warning("Malformed LyX document: Missing `\\begin_layout'.")
967 layout_line = document.body[layout_before]
968 del_layout_before = True
969 l = layout_before + 1
971 if document.body[l] != "":
972 del_layout_before = False
975 if del_layout_before:
976 del document.body[layout_before:i]
979 document.body[i:i] = ["\\end_layout", ""]
982 # Find start of layout in the inset and end of inset
983 j = find_token(document.body, "\\begin_layout", i)
985 document.warning("Malformed LyX document: Missing `\\begin_layout'.")
987 k = find_end_of_inset(document.body, i)
989 document.warning("Malformed LyX document: Missing `\\end_inset'.")
992 # We either need to delete the following \end_layout line, or we need
993 # to restart the old layout if this inset is not at the paragraph end.
994 layout_after = find_token(document.body, "\\end_layout", k)
995 if layout_after == -1:
996 document.warning("Malformed LyX document: Missing `\\end_layout'.")
998 del_layout_after = True
1000 while l < layout_after:
1001 if document.body[l] != "":
1002 del_layout_after = False
1005 if del_layout_after:
1006 del document.body[k+1:layout_after+1]
1008 document.body[k+1:k+1] = [layout_line, ""]
1010 # delete \begin_layout and \end_inset and replace \begin_inset with
1011 # "\begin_layout Caption". This works because we can only have one
1012 # paragraph in the caption inset: The old \end_layout will be recycled.
1013 del document.body[k]
1014 if document.body[k] == "":
1015 del document.body[k]
1016 del document.body[j]
1017 if document.body[j] == "":
1018 del document.body[j]
1019 document.body[i] = "\\begin_layout Caption"
1020 if document.body[i+1] == "":
1021 del document.body[i+1]
1025 # Accents of InsetLaTeXAccent
1027 "`" : u'\u0300', # grave
1028 "'" : u'\u0301', # acute
1029 "^" : u'\u0302', # circumflex
1030 "~" : u'\u0303', # tilde
1031 "=" : u'\u0304', # macron
1032 "u" : u'\u0306', # breve
1033 "." : u'\u0307', # dot above
1034 "\"": u'\u0308', # diaeresis
1035 "r" : u'\u030a', # ring above
1036 "H" : u'\u030b', # double acute
1037 "v" : u'\u030c', # caron
1038 "b" : u'\u0320', # minus sign below
1039 "d" : u'\u0323', # dot below
1040 "c" : u'\u0327', # cedilla
1041 "k" : u'\u0328', # ogonek
1042 "t" : u'\u0361' # tie. This is special: It spans two characters, but
1043 # only one is given as argument, so we don't need to
1044 # treat it differently.
1048 # special accents of InsetLaTeXAccent without argument
1049 special_accent_map = {
1050 'i' : u'\u0131', # dotless i
1051 'j' : u'\u0237', # dotless j
1052 'l' : u'\u0142', # l with stroke
1053 'L' : u'\u0141' # L with stroke
1057 # special accent arguments of InsetLaTeXAccent
1059 '\\i' : u'\u0131', # dotless i
1060 '\\j' : u'\u0237' # dotless j
1064 def _convert_accent(accent, accented_char):
1066 char = accented_char
1068 if type in special_accent_map:
1069 return special_accent_map[type]
1070 # a missing char is treated as space by LyX
1072 elif type == 'q' and char in ['t', 'd', 'l', 'L']:
1073 # Special caron, only used with t, d, l and L.
1074 # It is not in the map because we convert it to the same unicode
1075 # character as the normal caron: \q{} is only defined if babel with
1076 # the czech or slovak language is used, and the normal caron
1077 # produces the correct output if the T1 font encoding is used.
1078 # For the same reason we never convert to \q{} in the other direction.
1080 elif char in accented_map:
1081 char = accented_map[char]
1082 elif (len(char) > 1):
1083 # We can only convert accents on a single char
1085 a = accent_map.get(type)
1087 return unicodedata.normalize("NFC", "%s%s" % (char, a))
1091 def convert_ertbackslash(body, i, ert, default_layout):
1092 r""" -------------------------------------------------------------------------------------------
1093 Convert backslashes and '\n' into valid ERT code, append the converted
1094 text to body[i] and return the (maybe incremented) line index i"""
1098 body[i] = body[i] + '\\backslash '
1102 body[i+1:i+1] = ['\\end_layout', '', '\\begin_layout %s' % default_layout, '']
1105 body[i] = body[i] + c
1109 def convert_accent(document):
1110 # The following forms are supported by LyX:
1111 # '\i \"{a}' (standard form, as written by LyX)
1112 # '\i \"{}' (standard form, as written by LyX if the accented char is a space)
1113 # '\i \"{ }' (also accepted if the accented char is a space)
1114 # '\i \" a' (also accepted)
1115 # '\i \"' (also accepted)
1116 re_wholeinset = re.compile(r'^(.*)(\\i\s+)(.*)$')
1117 re_contents = re.compile(r'^([^\s{]+)(.*)$')
1118 re_accentedcontents = re.compile(r'^\s*{?([^{}]*)}?\s*$')
1121 i = find_re(document.body, re_wholeinset, i)
1124 match = re_wholeinset.match(document.body[i])
1125 prefix = match.group(1)
1126 contents = match.group(3).strip()
1127 match = re_contents.match(contents)
1129 # Strip first char (always \)
1130 accent = match.group(1)[1:]
1131 accented_contents = match.group(2).strip()
1132 match = re_accentedcontents.match(accented_contents)
1133 accented_char = match.group(1)
1134 converted = _convert_accent(accent, accented_char)
1136 # Normalize contents
1137 contents = '%s{%s}' % (accent, accented_char),
1139 document.body[i] = '%s%s' % (prefix, converted)
1142 document.warning("Converting unknown InsetLaTeXAccent `\\i %s' to ERT." % contents)
1143 document.body[i] = prefix
1144 document.body[i+1:i+1] = ['\\begin_inset ERT',
1147 '\\begin_layout %s' % document.default_layout,
1151 i = convert_ertbackslash(document.body, i + 7,
1153 document.default_layout)
1154 document.body[i+1:i+1] = ['\\end_layout',
1160 def revert_accent(document):
1161 inverse_accent_map = {}
1162 for k in accent_map:
1163 inverse_accent_map[accent_map[k]] = k
1164 inverse_special_accent_map = {}
1165 for k in special_accent_map:
1166 inverse_special_accent_map[special_accent_map[k]] = k
1167 inverse_accented_map = {}
1168 for k in accented_map:
1169 inverse_accented_map[accented_map[k]] = k
1171 # Since LyX may insert a line break within a word we must combine all
1172 # words before unicode normalization.
1173 # We do this only if the next line starts with an accent, otherwise we
1174 # would create things like '\begin_inset ERTstatus'.
1175 numberoflines = len(document.body)
1176 for i in range(numberoflines-1):
1177 if document.body[i] == '' or document.body[i+1] == '' or document.body[i][-1] == ' ':
1179 if (document.body[i+1][0] in inverse_accent_map):
1180 # the last character of this line and the first of the next line
1181 # form probably a surrogate pair.
1182 while (len(document.body[i+1]) > 0 and document.body[i+1][0] != ' '):
1183 document.body[i] += document.body[i+1][0]
1184 document.body[i+1] = document.body[i+1][1:]
1186 # Normalize to "Normal form D" (NFD, also known as canonical decomposition).
1187 # This is needed to catch all accented characters.
1188 for i in range(numberoflines):
1189 # Unfortunately we have a mixture of unicode strings and plain strings,
1190 # because we never use u'xxx' for string literals, but 'xxx'.
1191 # Therefore we may have to try two times to normalize the data.
1193 document.body[i] = unicodedata.normalize("NFD", document.body[i])
1195 document.body[i] = unicodedata.normalize("NFD", unicode(document.body[i], 'utf-8'))
1197 # Replace accented characters with InsetLaTeXAccent
1198 # Do not convert characters that can be represented in the chosen
1200 encoding_stack = [get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)]
1201 lang_re = re.compile(r"^\\lang\s(\S+)")
1203 while i < len(document.body):
1205 if (document.inputencoding == "auto" or document.inputencoding == "default") and document.cjk_encoding != '':
1206 # Track the encoding of the current line
1207 result = lang_re.match(document.body[i])
1209 language = result.group(1)
1210 if language == "default":
1211 encoding_stack[-1] = document.encoding
1213 from lyx2lyx_lang import lang
1214 encoding_stack[-1] = lang[language][3]
1216 elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
1217 encoding_stack.append(encoding_stack[-1])
1219 elif find_token(document.body, "\\end_layout", i, i + 1) == i:
1220 del encoding_stack[-1]
1223 for j in range(len(document.body[i])):
1224 # dotless i and dotless j are both in special_accent_map and can
1225 # occur as an accented character, so we need to test that the
1226 # following character is no accent
1227 if (document.body[i][j] in inverse_special_accent_map and
1228 (j == len(document.body[i]) - 1 or document.body[i][j+1] not in inverse_accent_map)):
1229 accent = document.body[i][j]
1231 dummy = accent.encode(encoding_stack[-1])
1232 except UnicodeEncodeError:
1233 # Insert the rest of the line as new line
1234 if j < len(document.body[i]) - 1:
1235 document.body.insert(i+1, document.body[i][j+1:])
1236 # Delete the accented character
1238 document.body[i] = document.body[i][:j-1]
1240 document.body[i] = u''
1241 # Finally add the InsetLaTeXAccent
1242 document.body[i] += "\\i \\%s{}" % inverse_special_accent_map[accent]
1244 elif j > 0 and document.body[i][j] in inverse_accent_map:
1245 accented_char = document.body[i][j-1]
1246 if accented_char == ' ':
1247 # Conform to LyX output
1249 elif accented_char in inverse_accented_map:
1250 accented_char = inverse_accented_map[accented_char]
1251 accent = document.body[i][j]
1253 dummy = unicodedata.normalize("NFC", accented_char + accent).encode(encoding_stack[-1])
1254 except UnicodeEncodeError:
1255 # Insert the rest of the line as new line
1256 if j < len(document.body[i]) - 1:
1257 document.body.insert(i+1, document.body[i][j+1:])
1258 # Delete the accented characters
1260 document.body[i] = document.body[i][:j-2]
1262 document.body[i] = u''
1263 # Finally add the InsetLaTeXAccent
1264 document.body[i] += "\\i \\%s{%s}" % (inverse_accent_map[accent], accented_char)
1268 # Normalize to "Normal form C" (NFC, pre-composed characters) again
1269 for i in range(numberoflines):
1270 document.body[i] = unicodedata.normalize("NFC", document.body[i])
1273 def normalize_font_whitespace_259(document):
1274 """ Before format 259 the font changes were ignored if a
1275 whitespace was the first or last character in the sequence, this function
1276 transfers the whitespace outside."""
1278 char_properties = {"\\series": "default",
1279 "\\emph": "default",
1281 "\\shape": "default",
1283 "\\family": "default"}
1284 return normalize_font_whitespace(document, char_properties)
1286 def normalize_font_whitespace_274(document):
1287 """ Before format 259 (sic) the font changes were ignored if a
1288 whitespace was the first or last character in the sequence. This was
1289 corrected for most font properties in format 259, but the language
1290 was forgotten then. This function applies the same conversion done
1291 there (namely, transfers the whitespace outside) for font language
1292 changes, as well."""
1294 char_properties = {"\\lang": "default"}
1295 return normalize_font_whitespace(document, char_properties)
1297 def get_paragraph_language(document, i):
1298 """ Return the language of the paragraph in which line i of the document
1299 body is. If the first thing in the paragraph is a \\lang command, that
1300 is the paragraph's langauge; otherwise, the paragraph's language is the
1301 document's language."""
1303 lines = document.body
1305 first_nonempty_line = \
1306 find_nonempty_line(lines, find_beginning_of_layout(lines, i) + 1)
1308 words = lines[first_nonempty_line].split()
1310 if len(words) > 1 and words[0] == "\\lang":
1313 return document.language
1315 def normalize_font_whitespace(document, char_properties):
1316 """ Before format 259 the font changes were ignored if a
1317 whitespace was the first or last character in the sequence, this function
1318 transfers the whitespace outside. Only a change in one of the properties
1319 in the provided char_properties is handled by this function."""
1321 if document.backend != "latex":
1324 lines = document.body
1329 while i < len(lines):
1330 words = lines[i].split()
1332 if len(words) > 0 and words[0] == "\\begin_layout":
1333 # a new paragraph resets all font changes
1335 # also reset the default language to be the paragraph's language
1336 if "\\lang" in char_properties.keys():
1337 char_properties["\\lang"] = \
1338 get_paragraph_language(document, i + 1)
1340 elif len(words) > 1 and words[0] in char_properties.keys():
1341 # we have a font change
1342 if char_properties[words[0]] == words[1]:
1343 # property gets reset
1344 if words[0] in changes.keys():
1345 del changes[words[0]]
1346 defaultproperty = True
1349 changes[words[0]] = words[1]
1350 defaultproperty = False
1352 # We need to explicitly reset all changed properties if we find
1353 # a space below, because LyX 1.4 would output the space after
1354 # closing the previous change and before starting the new one,
1355 # and closing a font change means to close all properties, not
1356 # just the changed one.
1358 if lines[i-1] and lines[i-1][-1] == " ":
1359 lines[i-1] = lines[i-1][:-1]
1360 # a space before the font change
1362 for k in changes.keys():
1363 # exclude property k because that is already in lines[i]
1365 added_lines[1:1] = ["%s %s" % (k, changes[k])]
1366 for k in changes.keys():
1367 # exclude property k because that must be added below anyway
1369 added_lines[0:0] = ["%s %s" % (k, char_properties[k])]
1371 # Property is reset in lines[i], so add the new stuff afterwards
1372 lines[i+1:i+1] = added_lines
1374 # Reset property for the space
1375 added_lines[0:0] = ["%s %s" % (words[0], char_properties[words[0]])]
1376 lines[i:i] = added_lines
1377 i = i + len(added_lines)
1379 elif lines[i+1] and lines[i+1][0] == " " and (len(changes) > 0 or not defaultproperty):
1380 # a space after the font change
1381 if (lines[i+1] == " " and lines[i+2]):
1382 next_words = lines[i+2].split()
1383 if len(next_words) > 0 and next_words[0] == words[0]:
1384 # a single blank with a property different from the
1385 # previous and the next line must not be changed
1388 lines[i+1] = lines[i+1][1:]
1390 for k in changes.keys():
1391 # exclude property k because that is already in lines[i]
1393 added_lines[1:1] = ["%s %s" % (k, changes[k])]
1394 for k in changes.keys():
1395 # exclude property k because that must be added below anyway
1397 added_lines[0:0] = ["%s %s" % (k, char_properties[k])]
1398 # Reset property for the space
1399 added_lines[0:0] = ["%s %s" % (words[0], char_properties[words[0]])]
1400 lines[i:i] = added_lines
1401 i = i + len(added_lines)
1406 def revert_utf8x(document):
1407 " Set utf8x encoding to utf8. "
1408 i = find_token(document.header, "\\inputencoding", 0)
1410 document.header.append("\\inputencoding auto")
1412 inputenc = get_value(document.header, "\\inputencoding", i)
1413 if inputenc == "utf8x":
1414 document.header[i] = "\\inputencoding utf8"
1415 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1418 def revert_utf8plain(document):
1419 " Set utf8plain encoding to utf8. "
1420 i = find_token(document.header, "\\inputencoding", 0)
1422 document.header.append("\\inputencoding auto")
1424 inputenc = get_value(document.header, "\\inputencoding", i)
1425 if inputenc == "utf8-plain":
1426 document.header[i] = "\\inputencoding utf8"
1427 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1430 def revert_beamer_alert(document):
1431 " Revert beamer's \\alert inset back to ERT. "
1434 i = find_token(document.body, "\\begin_inset CharStyle Alert", i)
1437 document.body[i] = "\\begin_inset ERT"
1440 if (document.body[i][:13] == "\\begin_layout"):
1441 # Insert the \alert command
1442 document.body[i + 1] = "\\alert{" + document.body[i + 1] + '}'
1449 def revert_beamer_structure(document):
1450 " Revert beamer's \\structure inset back to ERT. "
1453 i = find_token(document.body, "\\begin_inset CharStyle Structure", i)
1456 document.body[i] = "\\begin_inset ERT"
1459 if (document.body[i][:13] == "\\begin_layout"):
1460 document.body[i + 1] = "\\structure{" + document.body[i + 1] + '}'
1467 def convert_changes(document):
1468 " Switch output_changes off if tracking_changes is off. "
1469 i = find_token(document.header, '\\tracking_changes', 0)
1471 document.warning("Malformed lyx document: Missing '\\tracking_changes'.")
1473 j = find_token(document.header, '\\output_changes', 0)
1475 document.warning("Malformed lyx document: Missing '\\output_changes'.")
1477 tracking_changes = get_value(document.header, "\\tracking_changes", i)
1478 output_changes = get_value(document.header, "\\output_changes", j)
1479 if tracking_changes == "false" and output_changes == "true":
1480 document.header[j] = "\\output_changes false"
1483 def revert_ascii(document):
1484 " Set ascii encoding to auto. "
1485 i = find_token(document.header, "\\inputencoding", 0)
1487 document.header.append("\\inputencoding auto")
1489 inputenc = get_value(document.header, "\\inputencoding", i)
1490 if inputenc == "ascii":
1491 document.header[i] = "\\inputencoding auto"
1492 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1495 def normalize_language_name(document):
1496 lang = { "brazil": "brazilian",
1497 "portuges": "portuguese"}
1499 if document.language in lang:
1500 document.language = lang[document.language]
1501 i = find_token(document.header, "\\language", 0)
1502 document.header[i] = "\\language %s" % document.language
1505 def revert_language_name(document):
1506 lang = { "brazilian": "brazil",
1507 "portuguese": "portuges"}
1509 if document.language in lang:
1510 document.language = lang[document.language]
1511 i = find_token(document.header, "\\language", 0)
1512 document.header[i] = "\\language %s" % document.language
1515 # \textclass cv -> \textclass simplecv
1516 def convert_cv_textclass(document):
1517 if document.textclass == "cv":
1518 document.textclass = "simplecv"
1521 def revert_cv_textclass(document):
1522 if document.textclass == "simplecv":
1523 document.textclass = "cv"
1527 # add scaleBeforeRotation graphics param
1528 def convert_graphics_rotation(document):
1529 " add scaleBeforeRotation graphics parameter. "
1532 i = find_token(document.body, "\\begin_inset Graphics", i)
1535 j = find_end_of_inset(document.body, i+1)
1538 document.warning("Malformed LyX document: Could not find end of graphics inset.")
1539 # Seach for rotateAngle and width or height or scale
1540 # If these params are not there, nothing needs to be done.
1541 k = find_token(document.body, "\trotateAngle", i + 1, j)
1542 l = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j)
1543 if (k != -1 and l != -1):
1544 document.body.insert(j, 'scaleBeforeRotation')
1549 # remove scaleBeforeRotation graphics param
1550 def revert_graphics_rotation(document):
1551 " remove scaleBeforeRotation graphics parameter. "
1554 i = find_token(document.body, "\\begin_inset Graphics", i)
1557 j = find_end_of_inset(document.body, i + 1)
1560 document.warning("Malformed LyX document: Could not find end of graphics inset.")
1561 # If there's a scaleBeforeRotation param, just remove that
1562 k = find_token(document.body, "\tscaleBeforeRotation", i + 1, j)
1564 del document.body[k]
1566 # if not, and if we have rotateAngle and width or height or scale,
1567 # we have to put the rotateAngle value to special
1568 rotateAngle = get_value(document.body, 'rotateAngle', i + 1, j)
1569 special = get_value(document.body, 'special', i + 1, j)
1570 if rotateAngle != "":
1571 k = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j)
1575 document.body.insert(j-1, '\tspecial angle=%s' % rotateAngle)
1577 l = find_token(document.body, "\tspecial", i + 1, j)
1578 document.body[l] = document.body[l].replace(special, 'angle=%s,%s' % (rotateAngle, special))
1579 k = find_token(document.body, "\trotateAngle", i + 1, j)
1581 del document.body[k]
1586 def convert_tableborder(document):
1587 # The problematic is: LyX double the table cell border as it ignores the "|" character in
1588 # the cell arguments. A fix takes care of this and therefore the "|" has to be removed
1590 while i < len(document.body):
1591 h = document.body[i].find("leftline=\"true\"", 0, len(document.body[i]))
1592 k = document.body[i].find("|>{", 0, len(document.body[i]))
1593 # the two tokens have to be in one line
1594 if (h != -1 and k != -1):
1596 document.body[i] = document.body[i][:k] + document.body[i][k+1:len(document.body[i])-1]
1600 def revert_tableborder(document):
1602 while i < len(document.body):
1603 h = document.body[i].find("leftline=\"true\"", 0, len(document.body[i]))
1604 k = document.body[i].find(">{", 0, len(document.body[i]))
1605 # the two tokens have to be in one line
1606 if (h != -1 and k != -1):
1608 document.body[i] = document.body[i][:k] + '|' + document.body[i][k:]
1612 def revert_armenian(document):
1614 # set inputencoding from armscii8 to auto
1615 if document.inputencoding == "armscii8":
1616 i = find_token(document.header, "\\inputencoding", 0)
1618 document.header[i] = "\\inputencoding auto"
1619 # check if preamble exists, if not k is set to -1
1622 while i < len(document.preamble):
1624 k = document.preamble[i].find("\\", 0, len(document.preamble[i]))
1626 k = document.preamble[i].find("%", 0, len(document.preamble[i]))
1628 # add the entry \usepackage{armtex} to the document preamble
1629 if document.language == "armenian":
1630 # set the armtex entry as the first preamble line
1632 document.preamble[0:0] = ["\\usepackage{armtex}"]
1633 # create the preamble when it doesn't exist
1635 document.preamble.append('\\usepackage{armtex}')
1636 # Set document language from armenian to english
1637 if document.language == "armenian":
1638 document.language = "english"
1639 i = find_token(document.header, "\\language", 0)
1641 document.header[i] = "\\language english"
1644 def revert_CJK(document):
1645 " Set CJK encodings to default and languages chinese, japanese and korean to english. "
1646 encodings = ["Bg5", "Bg5+", "GB", "GBt", "GBK", "JIS",
1647 "KS", "SJIS", "UTF8", "EUC-TW", "EUC-JP"]
1648 i = find_token(document.header, "\\inputencoding", 0)
1650 document.header.append("\\inputencoding auto")
1652 inputenc = get_value(document.header, "\\inputencoding", i)
1653 if inputenc in encodings:
1654 document.header[i] = "\\inputencoding default"
1655 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1657 if document.language == "chinese-simplified" or \
1658 document.language == "chinese-traditional" or \
1659 document.language == "japanese" or document.language == "korean":
1660 document.language = "english"
1661 i = find_token(document.header, "\\language", 0)
1663 document.header[i] = "\\language english"
1666 def revert_preamble_listings_params(document):
1667 " Revert preamble option \listings_params "
1668 i = find_token(document.header, "\\listings_params", 0)
1670 document.preamble.append('\\usepackage{listings}')
1671 document.preamble.append('\\lstset{%s}' % document.header[i].split()[1].strip('"'))
1672 document.header.pop(i);
1675 def revert_listings_inset(document):
1676 r''' Revert listings inset to \lstinline or \begin, \end lstlisting, translate
1680 lstparams "language=Delphi"
1684 \begin_layout Standard
1694 \begin_layout Standard
1698 lstinline[language=Delphi]{var i = 10;}
1703 There can be an caption inset in this inset
1705 \begin_layout Standard
1706 \begin_inset Caption
1708 \begin_layout Standard
1710 \begin_inset LatexCommand label
1726 i = find_token(document.body, '\\begin_inset listings', i)
1730 if not '\\usepackage{listings}' in document.preamble:
1731 document.preamble.append('\\usepackage{listings}')
1732 j = find_end_of_inset(document.body, i + 1)
1734 # this should not happen
1740 for line in range(i + 1, i + 4):
1741 if document.body[line].startswith('inline'):
1742 inline = document.body[line].split()[1]
1743 if document.body[line].startswith('lstparams'):
1744 params = document.body[line].split()[1].strip('"')
1745 if document.body[line].startswith('status'):
1746 status = document.body[line].split()[1].strip()
1751 cap = find_token(document.body, '\\begin_inset Caption', i)
1753 cap_end = find_end_of_inset(document.body, cap + 1)
1755 # this should not happen
1758 lbl = find_token(document.body, '\\begin_inset LatexCommand label', cap + 1)
1760 lbl_end = find_end_of_inset(document.body, lbl + 1)
1762 # this should not happen
1767 for line in document.body[lbl : lbl_end + 1]:
1768 if line.startswith('name '):
1769 label = line.split()[1].strip('"')
1771 for line in document.body[cap : lbl ] + document.body[lbl_end + 1 : cap_end + 1]:
1772 if not line.startswith('\\'):
1773 caption += line.strip()
1776 # looking for the oneline code for lstinline
1777 inlinecode = document.body[find_end_of_layout(document.body,
1778 find_token(document.body, '\\begin_layout %s' % document.default_layout, i + 1) +1 ) - 1]
1779 if len(caption) > 0:
1780 if len(params) == 0:
1781 params = 'caption={%s}' % caption
1783 params += ',caption={%s}' % caption
1785 if len(params) == 0:
1786 params = 'label={%s}' % label
1788 params += ',label={%s}' % label
1790 params = '[%s]' % params
1791 params = params.replace('\\', '\\backslash\n')
1792 if inline == 'true':
1793 document.body[i:(j+1)] = [r'\begin_inset ERT',
1794 'status %s' % status,
1795 r'\begin_layout %s' % document.default_layout,
1799 'lstinline%s{%s}' % (params, inlinecode),
1804 document.body[i: j+1] = [r'\begin_inset ERT',
1805 'status %s' % status,
1807 r'\begin_layout %s' % document.default_layout,
1811 r'begin{lstlisting}%s' % params,
1813 ] + document.body[k : j - 1] + \
1815 r'\begin_layout %s' % document.default_layout,
1824 def revert_include_listings(document):
1825 r''' Revert lstinputlisting Include option , translate
1826 \begin_inset Include \lstinputlisting{file}[opt]
1836 \begin_layout Standard
1840 lstinputlisting{file}[opt]
1848 i = find_token(document.body, r'\begin_inset Include \lstinputlisting', i)
1852 if not '\\usepackage{listings}' in document.preamble:
1853 document.preamble.append('\\usepackage{listings}')
1854 j = find_end_of_inset(document.body, i + 1)
1856 # this should not happen
1858 # find command line lstinputlisting{file}[options]
1859 cmd, file, option = '', '', ''
1860 if re.match(r'\\(lstinputlisting){([.\w]*)}(.*)', document.body[i].split()[2]):
1861 cmd, file, option = re.match(r'\\(lstinputlisting){([.\w]*)}(.*)', document.body[i].split()[2]).groups()
1862 option = option.replace('\\', '\\backslash\n')
1863 document.body[i : j + 1] = [r'\begin_inset ERT',
1866 r'\begin_layout %s' % document.default_layout,
1870 '%s%s{%s}' % (cmd, option, file),
1876 def revert_ext_font_sizes(document):
1877 if document.backend != "latex": return
1878 if not document.textclass.startswith("ext"): return
1880 fontsize = get_value(document.header, '\\paperfontsize', 0)
1881 if fontsize not in ('10', '11', '12'): return
1884 i = find_token(document.header, '\\paperfontsize', 0)
1885 document.header[i] = '\\paperfontsize default'
1887 i = find_token(document.header, '\\options', 0)
1889 i = find_token(document.header, '\\textclass', 0) + 1
1890 document.header[i:i] = ['\\options %s' % fontsize]
1892 document.header[i] += ',%s' % fontsize
1895 def convert_ext_font_sizes(document):
1896 if document.backend != "latex": return
1897 if not document.textclass.startswith("ext"): return
1899 fontsize = get_value(document.header, '\\paperfontsize', 0)
1900 if fontsize != 'default': return
1902 i = find_token(document.header, '\\options', 0)
1905 options = get_value(document.header, '\\options', i)
1907 fontsizes = '10pt', '11pt', '12pt'
1908 for fs in fontsizes:
1909 if options.find(fs) != -1:
1911 else: # this else will only be attained if the for cycle had no match
1914 options = options.split(',')
1915 for j, opt in enumerate(options):
1916 if opt in fontsizes:
1923 k = find_token(document.header, '\\paperfontsize', 0)
1924 document.header[k] = '\\paperfontsize %s' % fontsize
1927 document.header[i] = '\\options %s' % ','.join(options)
1929 del document.header[i]
1932 def revert_separator_layout(document):
1933 r'''Revert --Separator-- to a lyx note
1936 \begin_layout --Separator--
1942 \begin_layout Standard
1943 \begin_inset Note Note
1946 \begin_layout Standard
1959 i = find_token(document.body, r'\begin_layout --Separator--', i)
1962 j = find_end_of_layout(document.body, i + 1)
1964 # this should not happen
1966 document.body[i : j + 1] = [r'\begin_layout %s' % document.default_layout,
1967 r'\begin_inset Note Note',
1970 r'\begin_layout %s' % document.default_layout,
1971 'Separate Environment',
1975 document.body[ i + 1 : j] + \
1981 def convert_arabic (document):
1982 if document.language == "arabic":
1983 document.language = "arabic_arabtex"
1984 i = find_token(document.header, "\\language", 0)
1986 document.header[i] = "\\language arabic_arabtex"
1988 while i < len(document.body):
1989 h = document.body[i].find("\lang arabic", 0, len(document.body[i]))
1991 # change the language name
1992 document.body[i] = '\lang arabic_arabtex'
1996 def revert_arabic (document):
1997 if document.language == "arabic_arabtex":
1998 document.language = "arabic"
1999 i = find_token(document.header, "\\language", 0)
2001 document.header[i] = "\\language arabic"
2003 while i < len(document.body):
2004 h = document.body[i].find("\lang arabic_arabtex", 0, len(document.body[i]))
2006 # change the language name
2007 document.body[i] = '\lang arabic'
2015 supported_versions = ["1.5.0","1.5"]
2016 convert = [[246, []],
2017 [247, [convert_font_settings]],
2019 [249, [convert_utf8]],
2022 [252, [convert_commandparams, convert_bibitem]],
2024 [254, [convert_esint]],
2027 [257, [convert_caption]],
2028 [258, [convert_lyxline]],
2029 [259, [convert_accent, normalize_font_whitespace_259]],
2031 [261, [convert_changes]],
2033 [263, [normalize_language_name]],
2034 [264, [convert_cv_textclass]],
2035 [265, [convert_tableborder]],
2041 [271, [convert_ext_font_sizes]],
2044 [274, [normalize_font_whitespace_274]],
2045 [275, [convert_graphics_rotation]],
2046 [276, [convert_arabic]]
2050 [275, [revert_arabic]],
2051 [274, [revert_graphics_rotation]],
2053 [272, [revert_separator_layout]],
2054 [271, [revert_preamble_listings_params, revert_listings_inset, revert_include_listings]],
2055 [270, [revert_ext_font_sizes]],
2056 [269, [revert_beamer_alert, revert_beamer_structure]],
2057 [268, [revert_preamble_listings_params, revert_listings_inset, revert_include_listings]],
2058 [267, [revert_CJK]],
2059 [266, [revert_utf8plain]],
2060 [265, [revert_armenian]],
2061 [264, [revert_tableborder]],
2062 [263, [revert_cv_textclass]],
2063 [262, [revert_language_name]],
2064 [261, [revert_ascii]],
2066 [259, [revert_utf8x]],
2069 [256, [revert_caption]],
2070 [255, [revert_encodings]],
2071 [254, [revert_clearpage, revert_cleardoublepage]],
2072 [253, [revert_esint]],
2073 [252, [revert_nomenclature, revert_printnomenclature]],
2074 [251, [revert_commandparams]],
2075 [250, [revert_cs_label]],
2077 [248, [revert_accent, revert_utf8, revert_unicode]],
2078 [247, [revert_booktabs]],
2079 [246, [revert_font_settings]],
2080 [245, [revert_framed]]]
2083 if __name__ == "__main__":