1 # This file is part of lyx2lyx
2 # -*- coding: utf-8 -*-
3 # Copyright (C) 2006 José Matos <jamatos@lyx.org>
4 # Copyright (C) 2004-2006 Georg Baum <Georg.Baum@post.rwth-aachen.de>
6 # This program is free software; you can redistribute it and/or
7 # modify it under the terms of the GNU General Public License
8 # as published by the Free Software Foundation; either version 2
9 # of the License, or (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
20 """ Convert files to the file format generated by lyx 1.5"""
26 from parser_tools import find_re, find_token, find_token_backwards, find_token_exact, find_tokens, find_end_of, get_value, find_beginning_of, find_nonempty_line
27 from LyX import get_encoding
30 ####################################################################
31 # Private helper functions
33 def find_end_of_inset(lines, i):
34 " Find end of inset, where lines[i] is included."
35 return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
37 def find_end_of_layout(lines, i):
38 " Find end of layout, where lines[i] is included."
39 return find_end_of(lines, i, "\\begin_layout", "\\end_layout")
41 def find_beginning_of_layout(lines, i):
42 "Find beginning of layout, where lines[i] is included."
43 return find_beginning_of(lines, i, "\\begin_layout", "\\end_layout")
45 # End of helper functions
46 ####################################################################
50 # Notes: Framed/Shaded
53 def revert_framed(document):
54 "Revert framed notes. "
57 i = find_tokens(document.body, ["\\begin_inset Note Framed", "\\begin_inset Note Shaded"], i)
61 document.body[i] = "\\begin_inset Note"
69 roman_fonts = {'default' : 'default', 'ae' : 'ae',
70 'times' : 'times', 'palatino' : 'palatino',
71 'helvet' : 'default', 'avant' : 'default',
72 'newcent' : 'newcent', 'bookman' : 'bookman',
74 sans_fonts = {'default' : 'default', 'ae' : 'default',
75 'times' : 'default', 'palatino' : 'default',
76 'helvet' : 'helvet', 'avant' : 'avant',
77 'newcent' : 'default', 'bookman' : 'default',
79 typewriter_fonts = {'default' : 'default', 'ae' : 'default',
80 'times' : 'default', 'palatino' : 'default',
81 'helvet' : 'default', 'avant' : 'default',
82 'newcent' : 'default', 'bookman' : 'default',
83 'pslatex' : 'courier'}
85 def convert_font_settings(document):
86 " Convert font settings. "
88 i = find_token_exact(document.header, "\\fontscheme", i)
90 document.warning("Malformed LyX document: Missing `\\fontscheme'.")
92 font_scheme = get_value(document.header, "\\fontscheme", i, i + 1)
94 document.warning("Malformed LyX document: Empty `\\fontscheme'.")
95 font_scheme = 'default'
96 if not font_scheme in roman_fonts.keys():
97 document.warning("Malformed LyX document: Unknown `\\fontscheme' `%s'." % font_scheme)
98 font_scheme = 'default'
99 document.header[i:i+1] = ['\\font_roman %s' % roman_fonts[font_scheme],
100 '\\font_sans %s' % sans_fonts[font_scheme],
101 '\\font_typewriter %s' % typewriter_fonts[font_scheme],
102 '\\font_default_family default',
105 '\\font_sf_scale 100',
106 '\\font_tt_scale 100']
109 def revert_font_settings(document):
110 " Revert font settings. "
113 fonts = {'roman' : 'default', 'sans' : 'default', 'typewriter' : 'default'}
114 for family in 'roman', 'sans', 'typewriter':
115 name = '\\font_%s' % family
116 i = find_token_exact(document.header, name, i)
118 document.warning("Malformed LyX document: Missing `%s'." % name)
121 if (insert_line < 0):
123 fonts[family] = get_value(document.header, name, i, i + 1)
124 del document.header[i]
125 i = find_token_exact(document.header, '\\font_default_family', i)
127 document.warning("Malformed LyX document: Missing `\\font_default_family'.")
128 font_default_family = 'default'
130 font_default_family = get_value(document.header, "\\font_default_family", i, i + 1)
131 del document.header[i]
132 i = find_token_exact(document.header, '\\font_sc', i)
134 document.warning("Malformed LyX document: Missing `\\font_sc'.")
137 font_sc = get_value(document.header, '\\font_sc', i, i + 1)
138 del document.header[i]
139 if font_sc != 'false':
140 document.warning("Conversion of '\\font_sc' not yet implemented.")
141 i = find_token_exact(document.header, '\\font_osf', i)
143 document.warning("Malformed LyX document: Missing `\\font_osf'.")
146 font_osf = get_value(document.header, '\\font_osf', i, i + 1)
147 del document.header[i]
148 i = find_token_exact(document.header, '\\font_sf_scale', i)
150 document.warning("Malformed LyX document: Missing `\\font_sf_scale'.")
151 font_sf_scale = '100'
153 font_sf_scale = get_value(document.header, '\\font_sf_scale', i, i + 1)
154 del document.header[i]
155 if font_sf_scale != '100':
156 document.warning("Conversion of '\\font_sf_scale' not yet implemented.")
157 i = find_token_exact(document.header, '\\font_tt_scale', i)
159 document.warning("Malformed LyX document: Missing `\\font_tt_scale'.")
160 font_tt_scale = '100'
162 font_tt_scale = get_value(document.header, '\\font_tt_scale', i, i + 1)
163 del document.header[i]
164 if font_tt_scale != '100':
165 document.warning("Conversion of '\\font_tt_scale' not yet implemented.")
166 for font_scheme in roman_fonts.keys():
167 if (roman_fonts[font_scheme] == fonts['roman'] and
168 sans_fonts[font_scheme] == fonts['sans'] and
169 typewriter_fonts[font_scheme] == fonts['typewriter']):
170 document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
171 if font_default_family != 'default':
172 document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
173 if font_osf == 'true':
174 document.warning("Ignoring `\\font_osf = true'")
176 font_scheme = 'default'
177 document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
178 if fonts['roman'] == 'cmr':
179 document.preamble.append('\\renewcommand{\\rmdefault}{cmr}')
180 if font_osf == 'true':
181 document.preamble.append('\\usepackage{eco}')
183 for font in 'lmodern', 'charter', 'utopia', 'beraserif', 'ccfonts', 'chancery':
184 if fonts['roman'] == font:
185 document.preamble.append('\\usepackage{%s}' % font)
186 for font in 'cmss', 'lmss', 'cmbr':
187 if fonts['sans'] == font:
188 document.preamble.append('\\renewcommand{\\sfdefault}{%s}' % font)
189 for font in 'berasans':
190 if fonts['sans'] == font:
191 document.preamble.append('\\usepackage{%s}' % font)
192 for font in 'cmtt', 'lmtt', 'cmtl':
193 if fonts['typewriter'] == font:
194 document.preamble.append('\\renewcommand{\\ttdefault}{%s}' % font)
195 for font in 'courier', 'beramono', 'luximono':
196 if fonts['typewriter'] == font:
197 document.preamble.append('\\usepackage{%s}' % font)
198 if font_default_family != 'default':
199 document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
200 if font_osf == 'true':
201 document.warning("Ignoring `\\font_osf = true'")
204 def revert_booktabs(document):
205 " We remove the booktabs flag or everything else will become a mess. "
206 re_row = re.compile(r'^<row.*space="[^"]+".*>$')
207 re_tspace = re.compile(r'\s+topspace="[^"]+"')
208 re_bspace = re.compile(r'\s+bottomspace="[^"]+"')
209 re_ispace = re.compile(r'\s+interlinespace="[^"]+"')
212 i = find_token(document.body, "\\begin_inset Tabular", i)
215 j = find_end_of_inset(document.body, i + 1)
217 document.warning("Malformed LyX document: Could not find end of tabular.")
219 for k in range(i, j):
220 if re.search('^<features.* booktabs="true".*>$', document.body[k]):
221 document.warning("Converting 'booktabs' table to normal table.")
222 document.body[k] = document.body[k].replace(' booktabs="true"', '')
223 if re.search(re_row, document.body[k]):
224 document.warning("Removing extra row space.")
225 document.body[k] = re_tspace.sub('', document.body[k])
226 document.body[k] = re_bspace.sub('', document.body[k])
227 document.body[k] = re_ispace.sub('', document.body[k])
231 def convert_multiencoding(document, forward):
232 """ Fix files with multiple encodings.
233 Files with an inputencoding of "auto" or "default" and multiple languages
234 where at least two languages have different default encodings are encoded
235 in multiple encodings for file formats < 249. These files are incorrectly
236 read and written (as if the whole file was in the encoding of the main
238 This is not true for files written by CJK-LyX, they are always in the locale
242 - converts from fake unicode values to true unicode if forward is true, and
243 - converts from true unicode values to fake unicode if forward is false.
244 document.encoding must be set to the old value (format 248) in both cases.
246 We do this here and not in LyX.py because it is far easier to do the
247 necessary parsing in modern formats than in ancient ones.
249 inset_types = ["Foot", "Note"]
250 if document.cjk_encoding != '':
252 encoding_stack = [document.encoding]
254 lang_re = re.compile(r"^\\lang\s(\S+)")
255 inset_re = re.compile(r"^\\begin_inset\s(\S+)")
256 if not forward: # no need to read file unless we are reverting
257 spec_chars = read_unicodesymbols()
259 if document.inputencoding == "auto" or document.inputencoding == "default":
261 while i < len(document.body):
262 result = lang_re.match(document.body[i])
264 language = result.group(1)
265 if language == "default":
266 document.warning("Resetting encoding from %s to %s." % (encoding_stack[-1], document.encoding), 3)
267 encoding_stack[-1] = document.encoding
269 from lyx2lyx_lang import lang
270 document.warning("Setting encoding from %s to %s." % (encoding_stack[-1], lang[language][3]), 3)
271 encoding_stack[-1] = lang[language][3]
272 elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
273 document.warning("Adding nested encoding %s." % encoding_stack[-1], 3)
274 if len(insets) > 0 and insets[-1] in inset_types:
275 from lyx2lyx_lang import lang
276 encoding_stack.append(lang[document.language][3])
278 encoding_stack.append(encoding_stack[-1])
279 elif find_token(document.body, "\\end_layout", i, i + 1) == i:
280 document.warning("Removing nested encoding %s." % encoding_stack[-1], 3)
281 if len(encoding_stack) == 1:
282 # Don't remove the document encoding from the stack
283 document.warning("Malformed LyX document: Unexpected `\\end_layout'.")
285 del encoding_stack[-1]
286 elif find_token(document.body, "\\begin_inset", i, i + 1) == i:
287 inset_result = inset_re.match(document.body[i])
289 insets.append(inset_result.group(1))
292 elif find_token(document.body, "\\end_inset", i, i + 1) == i:
294 if encoding_stack[-1] != document.encoding:
296 # This line has been incorrectly interpreted as if it was
297 # encoded in 'encoding'.
298 # Convert back to the 8bit string that was in the file.
299 orig = document.body[i].encode(document.encoding)
300 # Convert the 8bit string that was in the file to unicode
301 # with the correct encoding.
302 document.body[i] = orig.decode(encoding_stack[-1])
305 # Convert unicode to the 8bit string that will be written
306 # to the file with the correct encoding.
307 orig = document.body[i].encode(encoding_stack[-1])
308 # Convert the 8bit string that will be written to the
309 # file to fake unicode with the encoding that will later
310 # be used when writing to the file.
311 document.body[i] = orig.decode(document.encoding)
313 mod_line = revert_unicode_line(document, i, insets, spec_chars)
314 document.body[i:i+1] = mod_line.split('\n')
315 i += len(mod_line.split('\n')) - 1
319 def convert_utf8(document):
320 " Set document encoding to UTF-8. "
321 convert_multiencoding(document, True)
322 document.encoding = "utf8"
325 def revert_utf8(document):
326 " Set document encoding to the value corresponding to inputencoding. "
327 i = find_token(document.header, "\\inputencoding", 0)
329 document.header.append("\\inputencoding auto")
330 elif get_value(document.header, "\\inputencoding", i) == "utf8":
331 document.header[i] = "\\inputencoding auto"
332 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
333 document.encoding = get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)
334 convert_multiencoding(document, False)
337 def read_unicodesymbols():
338 " Read the unicodesymbols list of unicode characters and corresponding commands."
339 pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
340 fp = open(os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols'))
342 for line in fp.readlines():
344 line=line.replace(' "',' ') # remove all quotation marks with spaces before
345 line=line.replace('" ',' ') # remove all quotation marks with spaces after
346 line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis)
348 # flag1 and flag2 are preamble and other flags
349 [ucs4,command,flag1,flag2] =line.split(None,3)
350 spec_chars[unichr(eval(ucs4))] = [command, flag1, flag2]
357 def revert_unicode_line(document, i, insets, spec_chars, replacement_character = '???'):
358 # Define strings to start and end ERT and math insets
359 ert_intro='\n\n\\begin_inset ERT\nstatus collapsed\n\\begin_layout %s\n\\backslash\n' % document.default_layout
360 ert_outro='\n\\end_layout\n\n\\end_inset\n'
361 math_intro='\n\\begin_inset Formula $'
362 math_outro='$\n\\end_inset'
365 if i and not is_inset_line(document, i-1):
366 last_char = document.body[i - 1][-1:]
370 line = document.body[i]
371 for character in line:
373 # Try to write the character
374 dummy = character.encode(document.encoding)
375 mod_line += character
376 last_char = character
378 # Try to replace with ERT/math inset
379 if spec_chars.has_key(character):
380 command = spec_chars[character][0] # the command to replace unicode
381 flag1 = spec_chars[character][1]
382 flag2 = spec_chars[character][2]
383 if flag1.find('combining') > -1 or flag2.find('combining') > -1:
384 # We have a character that should be combined with the previous
385 command += '{' + last_char + '}'
386 # Remove the last character. Ignore if it is whitespace
387 if len(last_char.rstrip()):
388 # last_char was found and is not whitespace
390 mod_line = mod_line[:-1]
391 else: # last_char belongs to the last line
392 document.body[i-1] = document.body[i-1][:-1]
394 # The last character was replaced by a command. For now it is
395 # ignored. This could be handled better.
397 if command[0:2] == '\\\\':
398 if command[2:12]=='ensuremath':
399 if insets and insets[-1] == "ERT":
401 command = command.replace('\\\\ensuremath{\\\\', '$\n\\backslash\n')
402 command = command.replace('}', '$\n')
403 elif not insets or insets[-1] != "Formula":
404 # add a math inset with the replacement character
405 command = command.replace('\\\\ensuremath{\\', math_intro)
406 command = command.replace('}', math_outro)
408 # we are already in a math inset
409 command = command.replace('\\\\ensuremath{\\', '')
410 command = command.replace('}', '')
412 if insets and insets[-1] == "Formula":
413 # avoid putting an ERT in a math; instead put command as text
414 command = command.replace('\\\\', '\mathrm{')
415 command = command + '}'
416 elif not insets or insets[-1] != "ERT":
417 # add an ERT inset with the replacement character
418 command = command.replace('\\\\', ert_intro)
419 command = command + ert_outro
421 command = command.replace('\\\\', '\n\\backslash\n')
422 last_char = '' # indicate that the character should not be removed
425 # Replace with replacement string
426 mod_line += replacement_character
430 def revert_unicode(document):
431 '''Transform unicode characters that can not be written using the
432 document encoding to commands according to the unicodesymbols
433 file. Characters that can not be replaced by commands are replaced by
434 an replacement string. Flags other than 'combined' are currently not
436 spec_chars = read_unicodesymbols()
437 insets = [] # list of active insets
439 # Go through the document to capture all combining characters
441 while i < len(document.body):
442 line = document.body[i]
444 if line.find('\\begin_inset') > -1:
445 insets.append(line[13:].split()[0])
446 if line.find('\\end_inset') > -1:
449 # Try to write the line
451 # If all goes well the line is written here
452 dummy = line.encode(document.encoding)
455 # Error, some character(s) in the line need to be replaced
456 mod_line = revert_unicode_line(document, i, insets, spec_chars)
457 document.body[i:i+1] = mod_line.split('\n')
458 i += len(mod_line.split('\n'))
461 def revert_cs_label(document):
462 " Remove status flag of charstyle label. "
465 i = find_token(document.body, "\\begin_inset CharStyle", i)
468 # Seach for a line starting 'show_label'
469 # If it is not there, break with a warning message
472 if (document.body[i][:10] == "show_label"):
475 elif (document.body[i][:13] == "\\begin_layout"):
476 document.warning("Malformed LyX document: Missing 'show_label'.")
483 def convert_bibitem(document):
485 \bibitem [option]{argument}
489 \begin_inset LatexCommand bibitem
495 This must be called after convert_commandparams.
499 i = find_token(document.body, "\\bibitem", i)
502 j = document.body[i].find('[') + 1
503 k = document.body[i].rfind(']')
504 if j == 0: # No optional argument found
507 option = document.body[i][j:k]
508 j = document.body[i].rfind('{') + 1
509 k = document.body[i].rfind('}')
510 argument = document.body[i][j:k]
511 lines = ['\\begin_inset LatexCommand bibitem']
513 lines.append('label "%s"' % option.replace('"', '\\"'))
514 lines.append('key "%s"' % argument.replace('"', '\\"'))
516 lines.append('\\end_inset')
517 document.body[i:i+1] = lines
521 commandparams_info = {
522 # command : [option1, option2, argument]
523 "bibitem" : ["label", "", "key"],
524 "bibtex" : ["options", "btprint", "bibfiles"],
525 "cite" : ["after", "before", "key"],
526 "citet" : ["after", "before", "key"],
527 "citep" : ["after", "before", "key"],
528 "citealt" : ["after", "before", "key"],
529 "citealp" : ["after", "before", "key"],
530 "citeauthor" : ["after", "before", "key"],
531 "citeyear" : ["after", "before", "key"],
532 "citeyearpar" : ["after", "before", "key"],
533 "citet*" : ["after", "before", "key"],
534 "citep*" : ["after", "before", "key"],
535 "citealt*" : ["after", "before", "key"],
536 "citealp*" : ["after", "before", "key"],
537 "citeauthor*" : ["after", "before", "key"],
538 "Citet" : ["after", "before", "key"],
539 "Citep" : ["after", "before", "key"],
540 "Citealt" : ["after", "before", "key"],
541 "Citealp" : ["after", "before", "key"],
542 "Citeauthor" : ["after", "before", "key"],
543 "Citet*" : ["after", "before", "key"],
544 "Citep*" : ["after", "before", "key"],
545 "Citealt*" : ["after", "before", "key"],
546 "Citealp*" : ["after", "before", "key"],
547 "Citeauthor*" : ["after", "before", "key"],
548 "citefield" : ["after", "before", "key"],
549 "citetitle" : ["after", "before", "key"],
550 "cite*" : ["after", "before", "key"],
551 "hfill" : ["", "", ""],
552 "index" : ["", "", "name"],
553 "printindex" : ["", "", "name"],
554 "label" : ["", "", "name"],
555 "eqref" : ["name", "", "reference"],
556 "pageref" : ["name", "", "reference"],
557 "prettyref" : ["name", "", "reference"],
558 "ref" : ["name", "", "reference"],
559 "vpageref" : ["name", "", "reference"],
560 "vref" : ["name", "", "reference"],
561 "tableofcontents" : ["", "", "type"],
562 "htmlurl" : ["name", "", "target"],
563 "url" : ["name", "", "target"]}
566 def convert_commandparams(document):
569 \begin_inset LatexCommand \cmdname[opt1][opt2]{arg}
574 \begin_inset LatexCommand cmdname
580 name1, name2 and name3 can be different for each command.
582 # \begin_inset LatexCommand bibitem was not the official version (see
583 # convert_bibitem()), but could be read in, so we convert it here, too.
587 i = find_token(document.body, "\\begin_inset LatexCommand", i)
590 command = document.body[i][26:].strip()
592 document.warning("Malformed LyX document: Missing LatexCommand name.")
596 j = find_token(document.body, "\\end_inset", i + 1)
598 document.warning("Malformed document")
600 command += "".join(document.body[i+1:j])
601 document.body[i+1:j] = []
603 # The following parser is taken from the original InsetCommandParams::scanCommand
609 # Used to handle things like \command[foo[bar]]{foo{bar}}
613 if ((state == "CMDNAME" and c == ' ') or
614 (state == "CMDNAME" and c == '[') or
615 (state == "CMDNAME" and c == '{')):
617 if ((state == "OPTION" and c == ']') or
618 (state == "SECOPTION" and c == ']') or
619 (state == "CONTENT" and c == '}')):
623 nestdepth = nestdepth - 1
624 if ((state == "OPTION" and c == '[') or
625 (state == "SECOPTION" and c == '[') or
626 (state == "CONTENT" and c == '{')):
627 nestdepth = nestdepth + 1
628 if state == "CMDNAME":
630 elif state == "OPTION":
632 elif state == "SECOPTION":
634 elif state == "CONTENT":
639 elif c == '[' and b != ']':
641 nestdepth = 0 # Just to be sure
642 elif c == '[' and b == ']':
644 nestdepth = 0 # Just to be sure
647 nestdepth = 0 # Just to be sure
650 # Now we have parsed the command, output the parameters
651 lines = ["\\begin_inset LatexCommand %s" % name]
653 if commandparams_info[name][0] == "":
654 document.warning("Ignoring invalid option `%s' of command `%s'." % (option1, name))
656 lines.append('%s "%s"' % (commandparams_info[name][0], option1.replace('"', '\\"')))
658 if commandparams_info[name][1] == "":
659 document.warning("Ignoring invalid second option `%s' of command `%s'." % (option2, name))
661 lines.append('%s "%s"' % (commandparams_info[name][1], option2.replace('"', '\\"')))
663 if commandparams_info[name][2] == "":
664 document.warning("Ignoring invalid argument `%s' of command `%s'." % (argument, name))
666 lines.append('%s "%s"' % (commandparams_info[name][2], argument.replace('"', '\\"')))
667 document.body[i:i+1] = lines
671 def revert_commandparams(document):
672 regex = re.compile(r'(\S+)\s+(.+)')
675 i = find_token(document.body, "\\begin_inset LatexCommand", i)
678 name = document.body[i].split()[2]
679 j = find_end_of_inset(document.body, i + 1)
684 for k in range(i + 1, j):
685 match = re.match(regex, document.body[k])
687 pname = match.group(1)
688 pvalue = match.group(2)
689 if pname == "preview":
690 preview_line = document.body[k]
691 elif (commandparams_info[name][0] != "" and
692 pname == commandparams_info[name][0]):
693 option1 = pvalue.strip('"').replace('\\"', '"')
694 elif (commandparams_info[name][1] != "" and
695 pname == commandparams_info[name][1]):
696 option2 = pvalue.strip('"').replace('\\"', '"')
697 elif (commandparams_info[name][2] != "" and
698 pname == commandparams_info[name][2]):
699 argument = pvalue.strip('"').replace('\\"', '"')
700 elif document.body[k].strip() != "":
701 document.warning("Ignoring unknown contents `%s' in command inset %s." % (document.body[k], name))
702 if name == "bibitem":
704 lines = ["\\bibitem {%s}" % argument]
706 lines = ["\\bibitem [%s]{%s}" % (option1, argument)]
710 lines = ["\\begin_inset LatexCommand \\%s{%s}" % (name, argument)]
712 lines = ["\\begin_inset LatexCommand \\%s[][%s]{%s}" % (name, option2, argument)]
715 lines = ["\\begin_inset LatexCommand \\%s[%s]{%s}" % (name, option1, argument)]
717 lines = ["\\begin_inset LatexCommand \\%s[%s][%s]{%s}" % (name, option1, option2, argument)]
718 if name != "bibitem":
719 if preview_line != "":
720 lines.append(preview_line)
722 lines.append('\\end_inset')
723 document.body[i:j+1] = lines
727 def revert_nomenclature(document):
728 " Convert nomenclature entry to ERT. "
729 regex = re.compile(r'(\S+)\s+(.+)')
733 i = find_token(document.body, "\\begin_inset LatexCommand nomenclature", i)
737 j = find_end_of_inset(document.body, i + 1)
742 for k in range(i + 1, j):
743 match = re.match(regex, document.body[k])
745 name = match.group(1)
746 value = match.group(2)
747 if name == "preview":
748 preview_line = document.body[k]
749 elif name == "symbol":
750 symbol = value.strip('"').replace('\\"', '"')
751 elif name == "description":
752 description = value.strip('"').replace('\\"', '"')
753 elif name == "prefix":
754 prefix = value.strip('"').replace('\\"', '"')
755 elif document.body[k].strip() != "":
756 document.warning("Ignoring unknown contents `%s' in nomenclature inset." % document.body[k])
758 command = 'nomenclature{%s}{%s}' % (symbol, description)
760 command = 'nomenclature[%s]{%s}{%s}' % (prefix, symbol, description)
761 document.body[i:j+1] = ['\\begin_inset ERT',
764 '\\begin_layout %s' % document.default_layout,
773 if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
774 document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
775 document.preamble.append('\\makenomenclature')
778 def revert_printnomenclature(document):
779 " Convert printnomenclature to ERT. "
780 regex = re.compile(r'(\S+)\s+(.+)')
784 i = find_token(document.body, "\\begin_inset LatexCommand printnomenclature", i)
788 j = find_end_of_inset(document.body, i + 1)
791 for k in range(i + 1, j):
792 match = re.match(regex, document.body[k])
794 name = match.group(1)
795 value = match.group(2)
796 if name == "preview":
797 preview_line = document.body[k]
798 elif name == "labelwidth":
799 labelwidth = value.strip('"').replace('\\"', '"')
800 elif document.body[k].strip() != "":
801 document.warning("Ignoring unknown contents `%s' in printnomenclature inset." % document.body[k])
803 command = 'nomenclature{}'
805 command = 'nomenclature[%s]' % labelwidth
806 document.body[i:j+1] = ['\\begin_inset ERT',
809 '\\begin_layout %s' % document.default_layout,
818 if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
819 document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
820 document.preamble.append('\\makenomenclature')
823 def convert_esint(document):
824 " Add \\use_esint setting to header. "
825 i = find_token(document.header, "\\cite_engine", 0)
827 document.warning("Malformed LyX document: Missing `\\cite_engine'.")
829 # 0 is off, 1 is auto, 2 is on.
830 document.header.insert(i, '\\use_esint 0')
833 def revert_esint(document):
834 " Remove \\use_esint setting from header. "
835 i = find_token(document.header, "\\use_esint", 0)
837 document.warning("Malformed LyX document: Missing `\\use_esint'.")
839 use_esint = document.header[i].split()[1]
840 del document.header[i]
841 # 0 is off, 1 is auto, 2 is on.
843 document.preamble.append('\\usepackage{esint}')
846 def revert_clearpage(document):
850 i = find_token(document.body, "\\clearpage", i)
853 document.body[i:i+1] = ['\\begin_inset ERT',
856 '\\begin_layout %s' % document.default_layout,
867 def revert_cleardoublepage(document):
868 " cleardoublepage -> ERT "
871 i = find_token(document.body, "\\cleardoublepage", i)
874 document.body[i:i+1] = ['\\begin_inset ERT',
877 '\\begin_layout %s' % document.default_layout,
888 def convert_lyxline(document):
889 " remove fontsize commands for \lyxline "
890 # The problematic is: The old \lyxline definition doesn't handle the fontsize
891 # to change the line thickness. The new definiton does this so that imported
892 # \lyxlines would have a different line thickness. The eventual fontsize command
893 # before \lyxline is therefore removed to get the same output.
894 fontsizes = ["tiny", "scriptsize", "footnotesize", "small", "normalsize",
895 "large", "Large", "LARGE", "huge", "Huge"]
896 for n in range(0, len(fontsizes)):
899 while i < len(document.body):
900 i = find_token(document.body, "\\size " + fontsizes[n], i)
901 k = find_token(document.body, "\\lyxline", i)
902 # the corresponding fontsize command is always 2 lines before the \lyxline
903 if (i != -1 and k == i+2):
904 document.body[i:i+1] = []
910 def revert_encodings(document):
911 " Set new encodings to auto. "
912 encodings = ["8859-6", "8859-8", "cp437", "cp437de", "cp850", "cp852",
913 "cp855", "cp858", "cp862", "cp865", "cp866", "cp1250",
914 "cp1252", "cp1256", "cp1257", "latin10", "pt254", "tis620-0"]
915 i = find_token(document.header, "\\inputencoding", 0)
917 document.header.append("\\inputencoding auto")
919 inputenc = get_value(document.header, "\\inputencoding", i)
920 if inputenc in encodings:
921 document.header[i] = "\\inputencoding auto"
922 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
925 def convert_caption(document):
926 " Convert caption layouts to caption insets. "
929 i = find_token(document.body, "\\begin_layout Caption", i)
932 j = find_end_of_layout(document.body, i)
934 document.warning("Malformed LyX document: Missing `\\end_layout'.")
937 document.body[j:j] = ["\\end_layout", "", "\\end_inset", "", ""]
938 document.body[i:i+1] = ["\\begin_layout %s" % document.default_layout,
939 "\\begin_inset Caption", "",
940 "\\begin_layout %s" % document.default_layout]
944 def revert_caption(document):
945 " Convert caption insets to caption layouts. "
946 " This assumes that the text class has a caption style. "
949 i = find_token(document.body, "\\begin_inset Caption", i)
953 # We either need to delete the previous \begin_layout line, or we
954 # need to end the previous layout if this inset is not in the first
955 # position of the paragraph.
956 layout_before = find_token_backwards(document.body, "\\begin_layout", i)
957 if layout_before == -1:
958 document.warning("Malformed LyX document: Missing `\\begin_layout'.")
960 layout_line = document.body[layout_before]
961 del_layout_before = True
962 l = layout_before + 1
964 if document.body[l] != "":
965 del_layout_before = False
968 if del_layout_before:
969 del document.body[layout_before:i]
972 document.body[i:i] = ["\\end_layout", ""]
975 # Find start of layout in the inset and end of inset
976 j = find_token(document.body, "\\begin_layout", i)
978 document.warning("Malformed LyX document: Missing `\\begin_layout'.")
980 k = find_end_of_inset(document.body, i)
982 document.warning("Malformed LyX document: Missing `\\end_inset'.")
985 # We either need to delete the following \end_layout line, or we need
986 # to restart the old layout if this inset is not at the paragraph end.
987 layout_after = find_token(document.body, "\\end_layout", k)
988 if layout_after == -1:
989 document.warning("Malformed LyX document: Missing `\\end_layout'.")
991 del_layout_after = True
993 while l < layout_after:
994 if document.body[l] != "":
995 del_layout_after = False
999 del document.body[k+1:layout_after+1]
1001 document.body[k+1:k+1] = [layout_line, ""]
1003 # delete \begin_layout and \end_inset and replace \begin_inset with
1004 # "\begin_layout Caption". This works because we can only have one
1005 # paragraph in the caption inset: The old \end_layout will be recycled.
1006 del document.body[k]
1007 if document.body[k] == "":
1008 del document.body[k]
1009 del document.body[j]
1010 if document.body[j] == "":
1011 del document.body[j]
1012 document.body[i] = "\\begin_layout Caption"
1013 if document.body[i+1] == "":
1014 del document.body[i+1]
1018 # Accents of InsetLaTeXAccent
1020 "`" : u'\u0300', # grave
1021 "'" : u'\u0301', # acute
1022 "^" : u'\u0302', # circumflex
1023 "~" : u'\u0303', # tilde
1024 "=" : u'\u0304', # macron
1025 "u" : u'\u0306', # breve
1026 "." : u'\u0307', # dot above
1027 "\"": u'\u0308', # diaeresis
1028 "r" : u'\u030a', # ring above
1029 "H" : u'\u030b', # double acute
1030 "v" : u'\u030c', # caron
1031 "b" : u'\u0320', # minus sign below
1032 "d" : u'\u0323', # dot below
1033 "c" : u'\u0327', # cedilla
1034 "k" : u'\u0328', # ogonek
1035 "t" : u'\u0361' # tie. This is special: It spans two characters, but
1036 # only one is given as argument, so we don't need to
1037 # treat it differently.
1041 # special accents of InsetLaTeXAccent without argument
1042 special_accent_map = {
1043 'i' : u'\u0131', # dotless i
1044 'j' : u'\u0237', # dotless j
1045 'l' : u'\u0142', # l with stroke
1046 'L' : u'\u0141' # L with stroke
1050 # special accent arguments of InsetLaTeXAccent
1052 '\\i' : u'\u0131', # dotless i
1053 '\\j' : u'\u0237' # dotless j
1057 def _convert_accent(accent, accented_char):
1059 char = accented_char
1061 if type in special_accent_map:
1062 return special_accent_map[type]
1063 # a missing char is treated as space by LyX
1065 elif type == 'q' and char in ['t', 'd', 'l', 'L']:
1066 # Special caron, only used with t, d, l and L.
1067 # It is not in the map because we convert it to the same unicode
1068 # character as the normal caron: \q{} is only defined if babel with
1069 # the czech or slovak language is used, and the normal caron
1070 # produces the correct output if the T1 font encoding is used.
1071 # For the same reason we never convert to \q{} in the other direction.
1073 elif char in accented_map:
1074 char = accented_map[char]
1075 elif (len(char) > 1):
1076 # We can only convert accents on a single char
1078 a = accent_map.get(type)
1080 return unicodedata.normalize("NFC", "%s%s" % (char, a))
1084 def convert_ertbackslash(body, i, ert, default_layout):
1085 r""" -------------------------------------------------------------------------------------------
1086 Convert backslashes and '\n' into valid ERT code, append the converted
1087 text to body[i] and return the (maybe incremented) line index i"""
1091 body[i] = body[i] + '\\backslash '
1095 body[i+1:i+1] = ['\\end_layout', '', '\\begin_layout %s' % default_layout, '']
1098 body[i] = body[i] + c
1102 def convert_accent(document):
1103 # The following forms are supported by LyX:
1104 # '\i \"{a}' (standard form, as written by LyX)
1105 # '\i \"{}' (standard form, as written by LyX if the accented char is a space)
1106 # '\i \"{ }' (also accepted if the accented char is a space)
1107 # '\i \" a' (also accepted)
1108 # '\i \"' (also accepted)
1109 re_wholeinset = re.compile(r'^(.*)(\\i\s+)(.*)$')
1110 re_contents = re.compile(r'^([^\s{]+)(.*)$')
1111 re_accentedcontents = re.compile(r'^\s*{?([^{}]*)}?\s*$')
1114 i = find_re(document.body, re_wholeinset, i)
1117 match = re_wholeinset.match(document.body[i])
1118 prefix = match.group(1)
1119 contents = match.group(3).strip()
1120 match = re_contents.match(contents)
1122 # Strip first char (always \)
1123 accent = match.group(1)[1:]
1124 accented_contents = match.group(2).strip()
1125 match = re_accentedcontents.match(accented_contents)
1126 accented_char = match.group(1)
1127 converted = _convert_accent(accent, accented_char)
1129 # Normalize contents
1130 contents = '%s{%s}' % (accent, accented_char),
1132 document.body[i] = '%s%s' % (prefix, converted)
1135 document.warning("Converting unknown InsetLaTeXAccent `\\i %s' to ERT." % contents)
1136 document.body[i] = prefix
1137 document.body[i+1:i+1] = ['\\begin_inset ERT',
1140 '\\begin_layout %s' % document.default_layout,
1144 i = convert_ertbackslash(document.body, i + 7,
1146 document.default_layout)
1147 document.body[i+1:i+1] = ['\\end_layout',
1153 def is_inset_line(document, i):
1154 """ Line i of body has an inset """
1155 if document.body[i][:1] == '\\':
1157 last_tokens = "".join(document.body[i].split()[-2:])
1158 return last_tokens.find('\\') != -1
1161 def revert_accent(document):
1162 inverse_accent_map = {}
1163 for k in accent_map:
1164 inverse_accent_map[accent_map[k]] = k
1165 inverse_special_accent_map = {}
1166 for k in special_accent_map:
1167 inverse_special_accent_map[special_accent_map[k]] = k
1168 inverse_accented_map = {}
1169 for k in accented_map:
1170 inverse_accented_map[accented_map[k]] = k
1172 # Since LyX may insert a line break within a word we must combine all
1173 # words before unicode normalization.
1174 # We do this only if the next line starts with an accent, otherwise we
1175 # would create things like '\begin_inset ERTstatus'.
1176 for i in range(len(document.body) - 1):
1177 if document.body[i] == '' or document.body[i+1] == '' or document.body[i][-1] == ' ':
1179 if (document.body[i+1][0] in inverse_accent_map and not is_inset_line(document, i)):
1180 # the last character of this line and the first of the next line
1181 # form probably a surrogate pair, inline insets are excluded (second part of the test)
1182 while (len(document.body[i+1]) > 0 and document.body[i+1][0] != ' '):
1183 document.body[i] += document.body[i+1][0]
1184 document.body[i+1] = document.body[i+1][1:]
1186 # Normalize to "Normal form D" (NFD, also known as canonical decomposition).
1187 # This is needed to catch all accented characters.
1188 for i in range(len(document.body)):
1189 # Unfortunately we have a mixture of unicode strings and plain strings,
1190 # because we never use u'xxx' for string literals, but 'xxx'.
1191 # Therefore we may have to try two times to normalize the data.
1193 document.body[i] = unicodedata.normalize("NFD", document.body[i])
1195 document.body[i] = unicodedata.normalize("NFD", unicode(document.body[i], 'utf-8'))
1197 # Replace accented characters with InsetLaTeXAccent
1198 # Do not convert characters that can be represented in the chosen
1200 encoding_stack = [get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)]
1201 lang_re = re.compile(r"^\\lang\s(\S+)")
1204 while i < len(document.body):
1205 if (document.inputencoding == "auto" or document.inputencoding == "default") and document.cjk_encoding != '':
1206 # Track the encoding of the current line
1207 result = lang_re.match(document.body[i])
1209 language = result.group(1)
1210 if language == "default":
1211 encoding_stack[-1] = document.encoding
1213 from lyx2lyx_lang import lang
1214 encoding_stack[-1] = lang[language][3]
1216 elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
1217 encoding_stack.append(encoding_stack[-1])
1219 elif find_token(document.body, "\\end_layout", i, i + 1) == i:
1220 del encoding_stack[-1]
1223 for j in range(len(document.body[i])):
1224 # dotless i and dotless j are both in special_accent_map and can
1225 # occur as an accented character, so we need to test that the
1226 # following character is no accent
1227 if (document.body[i][j] in inverse_special_accent_map and
1228 (j == len(document.body[i]) - 1 or document.body[i][j+1] not in inverse_accent_map)):
1229 accent = document.body[i][j]
1231 dummy = accent.encode(encoding_stack[-1])
1232 except UnicodeEncodeError:
1233 # Insert the rest of the line as new line
1234 if j < len(document.body[i]) - 1:
1235 document.body.insert(i+1, document.body[i][j+1:])
1236 # Delete the accented character
1237 document.body[i] = document.body[i][:j]
1238 # Finally add the InsetLaTeXAccent
1239 document.body[i] += "\\i \\%s{}" % inverse_special_accent_map[accent]
1241 elif j > 0 and document.body[i][j] in inverse_accent_map:
1242 accented_char = document.body[i][j-1]
1243 if accented_char == ' ':
1244 # Conform to LyX output
1246 elif accented_char in inverse_accented_map:
1247 accented_char = inverse_accented_map[accented_char]
1248 accent = document.body[i][j]
1250 dummy = unicodedata.normalize("NFC", accented_char + accent).encode(encoding_stack[-1])
1251 except UnicodeEncodeError:
1252 # Insert the rest of the line as new line
1253 if j < len(document.body[i]) - 1:
1254 document.body.insert(i+1, document.body[i][j+1:])
1255 # Delete the accented characters
1256 document.body[i] = document.body[i][:j-1]
1257 # Finally add the InsetLaTeXAccent
1258 document.body[i] += "\\i \\%s{%s}" % (inverse_accent_map[accent], accented_char)
1262 # Normalize to "Normal form C" (NFC, pre-composed characters) again
1263 for i in range(len(document.body)):
1264 document.body[i] = unicodedata.normalize("NFC", document.body[i])
1267 def normalize_font_whitespace_259(document):
1268 """ Before format 259 the font changes were ignored if a
1269 whitespace was the first or last character in the sequence, this function
1270 transfers the whitespace outside."""
1272 char_properties = {"\\series": "default",
1273 "\\emph": "default",
1275 "\\shape": "default",
1277 "\\family": "default"}
1278 return normalize_font_whitespace(document, char_properties)
1280 def normalize_font_whitespace_274(document):
1281 """ Before format 259 (sic) the font changes were ignored if a
1282 whitespace was the first or last character in the sequence. This was
1283 corrected for most font properties in format 259, but the language
1284 was forgotten then. This function applies the same conversion done
1285 there (namely, transfers the whitespace outside) for font language
1286 changes, as well."""
1288 char_properties = {"\\lang": "default"}
1289 return normalize_font_whitespace(document, char_properties)
1291 def get_paragraph_language(document, i):
1292 """ Return the language of the paragraph in which line i of the document
1293 body is. If the first thing in the paragraph is a \\lang command, that
1294 is the paragraph's langauge; otherwise, the paragraph's language is the
1295 document's language."""
1297 lines = document.body
1299 first_nonempty_line = \
1300 find_nonempty_line(lines, find_beginning_of_layout(lines, i) + 1)
1302 words = lines[first_nonempty_line].split()
1304 if len(words) > 1 and words[0] == "\\lang":
1307 return document.language
1309 def normalize_font_whitespace(document, char_properties):
1310 """ Before format 259 the font changes were ignored if a
1311 whitespace was the first or last character in the sequence, this function
1312 transfers the whitespace outside. Only a change in one of the properties
1313 in the provided char_properties is handled by this function."""
1315 if document.backend != "latex":
1318 lines = document.body
1323 while i < len(lines):
1324 words = lines[i].split()
1326 if len(words) > 0 and words[0] == "\\begin_layout":
1327 # a new paragraph resets all font changes
1329 # also reset the default language to be the paragraph's language
1330 if "\\lang" in char_properties.keys():
1331 char_properties["\\lang"] = \
1332 get_paragraph_language(document, i + 1)
1334 elif len(words) > 1 and words[0] in char_properties.keys():
1335 # we have a font change
1336 if char_properties[words[0]] == words[1]:
1337 # property gets reset
1338 if words[0] in changes.keys():
1339 del changes[words[0]]
1340 defaultproperty = True
1343 changes[words[0]] = words[1]
1344 defaultproperty = False
1346 # We need to explicitly reset all changed properties if we find
1347 # a space below, because LyX 1.4 would output the space after
1348 # closing the previous change and before starting the new one,
1349 # and closing a font change means to close all properties, not
1350 # just the changed one.
1352 if lines[i-1] and lines[i-1][-1] == " ":
1353 lines[i-1] = lines[i-1][:-1]
1354 # a space before the font change
1356 for k in changes.keys():
1357 # exclude property k because that is already in lines[i]
1359 added_lines[1:1] = ["%s %s" % (k, changes[k])]
1360 for k in changes.keys():
1361 # exclude property k because that must be added below anyway
1363 added_lines[0:0] = ["%s %s" % (k, char_properties[k])]
1365 # Property is reset in lines[i], so add the new stuff afterwards
1366 lines[i+1:i+1] = added_lines
1368 # Reset property for the space
1369 added_lines[0:0] = ["%s %s" % (words[0], char_properties[words[0]])]
1370 lines[i:i] = added_lines
1371 i = i + len(added_lines)
1373 elif lines[i+1] and lines[i+1][0] == " " and (len(changes) > 0 or not defaultproperty):
1374 # a space after the font change
1375 if (lines[i+1] == " " and lines[i+2]):
1376 next_words = lines[i+2].split()
1377 if len(next_words) > 0 and next_words[0] == words[0]:
1378 # a single blank with a property different from the
1379 # previous and the next line must not be changed
1382 lines[i+1] = lines[i+1][1:]
1384 for k in changes.keys():
1385 # exclude property k because that is already in lines[i]
1387 added_lines[1:1] = ["%s %s" % (k, changes[k])]
1388 for k in changes.keys():
1389 # exclude property k because that must be added below anyway
1391 added_lines[0:0] = ["%s %s" % (k, char_properties[k])]
1392 # Reset property for the space
1393 added_lines[0:0] = ["%s %s" % (words[0], char_properties[words[0]])]
1394 lines[i:i] = added_lines
1395 i = i + len(added_lines)
1400 def revert_utf8x(document):
1401 " Set utf8x encoding to utf8. "
1402 i = find_token(document.header, "\\inputencoding", 0)
1404 document.header.append("\\inputencoding auto")
1406 inputenc = get_value(document.header, "\\inputencoding", i)
1407 if inputenc == "utf8x":
1408 document.header[i] = "\\inputencoding utf8"
1409 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1412 def revert_utf8plain(document):
1413 " Set utf8plain encoding to utf8. "
1414 i = find_token(document.header, "\\inputencoding", 0)
1416 document.header.append("\\inputencoding auto")
1418 inputenc = get_value(document.header, "\\inputencoding", i)
1419 if inputenc == "utf8-plain":
1420 document.header[i] = "\\inputencoding utf8"
1421 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1424 def revert_beamer_alert(document):
1425 " Revert beamer's \\alert inset back to ERT. "
1428 i = find_token(document.body, "\\begin_inset CharStyle Alert", i)
1431 document.body[i] = "\\begin_inset ERT"
1434 if (document.body[i][:13] == "\\begin_layout"):
1435 # Insert the \alert command
1436 document.body[i + 1] = "\\alert{" + document.body[i + 1] + '}'
1443 def revert_beamer_structure(document):
1444 " Revert beamer's \\structure inset back to ERT. "
1447 i = find_token(document.body, "\\begin_inset CharStyle Structure", i)
1450 document.body[i] = "\\begin_inset ERT"
1453 if (document.body[i][:13] == "\\begin_layout"):
1454 document.body[i + 1] = "\\structure{" + document.body[i + 1] + '}'
1461 def convert_changes(document):
1462 " Switch output_changes off if tracking_changes is off. "
1463 i = find_token(document.header, '\\tracking_changes', 0)
1465 document.warning("Malformed lyx document: Missing '\\tracking_changes'.")
1467 j = find_token(document.header, '\\output_changes', 0)
1469 document.warning("Malformed lyx document: Missing '\\output_changes'.")
1471 tracking_changes = get_value(document.header, "\\tracking_changes", i)
1472 output_changes = get_value(document.header, "\\output_changes", j)
1473 if tracking_changes == "false" and output_changes == "true":
1474 document.header[j] = "\\output_changes false"
1477 def revert_ascii(document):
1478 " Set ascii encoding to auto. "
1479 i = find_token(document.header, "\\inputencoding", 0)
1481 document.header.append("\\inputencoding auto")
1483 inputenc = get_value(document.header, "\\inputencoding", i)
1484 if inputenc == "ascii":
1485 document.header[i] = "\\inputencoding auto"
1486 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1489 def normalize_language_name(document):
1490 lang = { "brazil": "brazilian",
1491 "portuges": "portuguese"}
1493 if document.language in lang:
1494 document.language = lang[document.language]
1495 i = find_token(document.header, "\\language", 0)
1496 document.header[i] = "\\language %s" % document.language
1499 def revert_language_name(document):
1500 lang = { "brazilian": "brazil",
1501 "portuguese": "portuges"}
1503 if document.language in lang:
1504 document.language = lang[document.language]
1505 i = find_token(document.header, "\\language", 0)
1506 document.header[i] = "\\language %s" % document.language
1509 # \textclass cv -> \textclass simplecv
1510 def convert_cv_textclass(document):
1511 if document.textclass == "cv":
1512 document.textclass = "simplecv"
1515 def revert_cv_textclass(document):
1516 if document.textclass == "simplecv":
1517 document.textclass = "cv"
1521 # add scaleBeforeRotation graphics param
1522 def convert_graphics_rotation(document):
1523 " add scaleBeforeRotation graphics parameter. "
1526 i = find_token(document.body, "\\begin_inset Graphics", i)
1529 j = find_end_of_inset(document.body, i+1)
1532 document.warning("Malformed LyX document: Could not find end of graphics inset.")
1533 # Seach for rotateAngle and width or height or scale
1534 # If these params are not there, nothing needs to be done.
1535 k = find_token(document.body, "\trotateAngle", i + 1, j)
1536 l = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j)
1537 if (k != -1 and l != -1):
1538 document.body.insert(j, 'scaleBeforeRotation')
1543 # remove scaleBeforeRotation graphics param
1544 def revert_graphics_rotation(document):
1545 " remove scaleBeforeRotation graphics parameter. "
1548 i = find_token(document.body, "\\begin_inset Graphics", i)
1551 j = find_end_of_inset(document.body, i + 1)
1554 document.warning("Malformed LyX document: Could not find end of graphics inset.")
1555 # If there's a scaleBeforeRotation param, just remove that
1556 k = find_token(document.body, "\tscaleBeforeRotation", i + 1, j)
1558 del document.body[k]
1560 # if not, and if we have rotateAngle and width or height or scale,
1561 # we have to put the rotateAngle value to special
1562 rotateAngle = get_value(document.body, 'rotateAngle', i + 1, j)
1563 special = get_value(document.body, 'special', i + 1, j)
1564 if rotateAngle != "":
1565 k = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j)
1569 document.body.insert(j-1, '\tspecial angle=%s' % rotateAngle)
1571 l = find_token(document.body, "\tspecial", i + 1, j)
1572 document.body[l] = document.body[l].replace(special, 'angle=%s,%s' % (rotateAngle, special))
1573 k = find_token(document.body, "\trotateAngle", i + 1, j)
1575 del document.body[k]
1580 def convert_tableborder(document):
1581 # The problematic is: LyX double the table cell border as it ignores the "|" character in
1582 # the cell arguments. A fix takes care of this and therefore the "|" has to be removed
1584 while i < len(document.body):
1585 h = document.body[i].find("leftline=\"true\"", 0, len(document.body[i]))
1586 k = document.body[i].find("|>{", 0, len(document.body[i]))
1587 # the two tokens have to be in one line
1588 if (h != -1 and k != -1):
1590 document.body[i] = document.body[i][:k] + document.body[i][k+1:len(document.body[i])-1]
1594 def revert_tableborder(document):
1596 while i < len(document.body):
1597 h = document.body[i].find("leftline=\"true\"", 0, len(document.body[i]))
1598 k = document.body[i].find(">{", 0, len(document.body[i]))
1599 # the two tokens have to be in one line
1600 if (h != -1 and k != -1):
1602 document.body[i] = document.body[i][:k] + '|' + document.body[i][k:]
1606 def revert_armenian(document):
1608 # set inputencoding from armscii8 to auto
1609 if document.inputencoding == "armscii8":
1610 i = find_token(document.header, "\\inputencoding", 0)
1612 document.header[i] = "\\inputencoding auto"
1613 # check if preamble exists, if not k is set to -1
1616 while i < len(document.preamble):
1618 k = document.preamble[i].find("\\", 0, len(document.preamble[i]))
1620 k = document.preamble[i].find("%", 0, len(document.preamble[i]))
1622 # add the entry \usepackage{armtex} to the document preamble
1623 if document.language == "armenian":
1624 # set the armtex entry as the first preamble line
1626 document.preamble[0:0] = ["\\usepackage{armtex}"]
1627 # create the preamble when it doesn't exist
1629 document.preamble.append('\\usepackage{armtex}')
1630 # Set document language from armenian to english
1631 if document.language == "armenian":
1632 document.language = "english"
1633 i = find_token(document.header, "\\language", 0)
1635 document.header[i] = "\\language english"
1638 def revert_CJK(document):
1639 " Set CJK encodings to default and languages chinese, japanese and korean to english. "
1640 encodings = ["Bg5", "Bg5+", "GB", "GBt", "GBK", "JIS",
1641 "KS", "SJIS", "UTF8", "EUC-TW", "EUC-JP"]
1642 i = find_token(document.header, "\\inputencoding", 0)
1644 document.header.append("\\inputencoding auto")
1646 inputenc = get_value(document.header, "\\inputencoding", i)
1647 if inputenc in encodings:
1648 document.header[i] = "\\inputencoding default"
1649 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1651 if document.language == "chinese-simplified" or \
1652 document.language == "chinese-traditional" or \
1653 document.language == "japanese" or document.language == "korean":
1654 document.language = "english"
1655 i = find_token(document.header, "\\language", 0)
1657 document.header[i] = "\\language english"
1660 def revert_preamble_listings_params(document):
1661 " Revert preamble option \listings_params "
1662 i = find_token(document.header, "\\listings_params", 0)
1664 document.preamble.append('\\usepackage{listings}')
1665 document.preamble.append('\\lstset{%s}' % document.header[i].split()[1].strip('"'))
1666 document.header.pop(i);
1669 def revert_listings_inset(document):
1670 r''' Revert listings inset to \lstinline or \begin, \end lstlisting, translate
1674 lstparams "language=Delphi"
1678 \begin_layout Standard
1688 \begin_layout Standard
1692 lstinline[language=Delphi]{var i = 10;}
1697 There can be an caption inset in this inset
1699 \begin_layout Standard
1700 \begin_inset Caption
1702 \begin_layout Standard
1704 \begin_inset LatexCommand label
1720 i = find_token(document.body, '\\begin_inset listings', i)
1724 if not '\\usepackage{listings}' in document.preamble:
1725 document.preamble.append('\\usepackage{listings}')
1726 j = find_end_of_inset(document.body, i + 1)
1728 # this should not happen
1734 for line in range(i + 1, i + 4):
1735 if document.body[line].startswith('inline'):
1736 inline = document.body[line].split()[1]
1737 if document.body[line].startswith('lstparams'):
1738 params = document.body[line].split()[1].strip('"')
1739 if document.body[line].startswith('status'):
1740 status = document.body[line].split()[1].strip()
1745 cap = find_token(document.body, '\\begin_inset Caption', i)
1747 cap_end = find_end_of_inset(document.body, cap + 1)
1749 # this should not happen
1752 lbl = find_token(document.body, '\\begin_inset LatexCommand label', cap + 1)
1754 lbl_end = find_end_of_inset(document.body, lbl + 1)
1756 # this should not happen
1761 for line in document.body[lbl : lbl_end + 1]:
1762 if line.startswith('name '):
1763 label = line.split()[1].strip('"')
1765 for line in document.body[cap : lbl ] + document.body[lbl_end + 1 : cap_end + 1]:
1766 if not line.startswith('\\'):
1767 caption += line.strip()
1770 # looking for the oneline code for lstinline
1771 inlinecode = document.body[find_end_of_layout(document.body,
1772 find_token(document.body, '\\begin_layout %s' % document.default_layout, i + 1) +1 ) - 1]
1773 if len(caption) > 0:
1774 if len(params) == 0:
1775 params = 'caption={%s}' % caption
1777 params += ',caption={%s}' % caption
1779 if len(params) == 0:
1780 params = 'label={%s}' % label
1782 params += ',label={%s}' % label
1784 params = '[%s]' % params
1785 params = params.replace('\\', '\\backslash\n')
1786 if inline == 'true':
1787 document.body[i:(j+1)] = [r'\begin_inset ERT',
1788 'status %s' % status,
1789 r'\begin_layout %s' % document.default_layout,
1793 'lstinline%s{%s}' % (params, inlinecode),
1798 document.body[i: j+1] = [r'\begin_inset ERT',
1799 'status %s' % status,
1801 r'\begin_layout %s' % document.default_layout,
1805 r'begin{lstlisting}%s' % params,
1808 r'\begin_layout %s' % document.default_layout,
1809 ] + document.body[k : j - 1] + \
1811 r'\begin_layout %s' % document.default_layout,
1820 def revert_include_listings(document):
1821 r''' Revert lstinputlisting Include option , translate
1822 \begin_inset Include \lstinputlisting{file}[opt]
1832 \begin_layout Standard
1836 lstinputlisting{file}[opt]
1844 i = find_token(document.body, r'\begin_inset Include \lstinputlisting', i)
1848 if not '\\usepackage{listings}' in document.preamble:
1849 document.preamble.append('\\usepackage{listings}')
1850 j = find_end_of_inset(document.body, i + 1)
1852 # this should not happen
1854 # find command line lstinputlisting{file}[options]
1855 cmd, file, option = '', '', ''
1856 if re.match(r'\\(lstinputlisting){([.\w]*)}(.*)', document.body[i].split()[2]):
1857 cmd, file, option = re.match(r'\\(lstinputlisting){([.\w]*)}(.*)', document.body[i].split()[2]).groups()
1858 option = option.replace('\\', '\\backslash\n')
1859 document.body[i : j + 1] = [r'\begin_inset ERT',
1862 r'\begin_layout %s' % document.default_layout,
1866 '%s%s{%s}' % (cmd, option, file),
1872 def revert_ext_font_sizes(document):
1873 if document.backend != "latex": return
1874 if not document.textclass.startswith("ext"): return
1876 fontsize = get_value(document.header, '\\paperfontsize', 0)
1877 if fontsize not in ('10', '11', '12'): return
1880 i = find_token(document.header, '\\paperfontsize', 0)
1881 document.header[i] = '\\paperfontsize default'
1883 i = find_token(document.header, '\\options', 0)
1885 i = find_token(document.header, '\\textclass', 0) + 1
1886 document.header[i:i] = ['\\options %s' % fontsize]
1888 document.header[i] += ',%s' % fontsize
1891 def convert_ext_font_sizes(document):
1892 if document.backend != "latex": return
1893 if not document.textclass.startswith("ext"): return
1895 fontsize = get_value(document.header, '\\paperfontsize', 0)
1896 if fontsize != 'default': return
1898 i = find_token(document.header, '\\options', 0)
1901 options = get_value(document.header, '\\options', i)
1903 fontsizes = '10pt', '11pt', '12pt'
1904 for fs in fontsizes:
1905 if options.find(fs) != -1:
1907 else: # this else will only be attained if the for cycle had no match
1910 options = options.split(',')
1911 for j, opt in enumerate(options):
1912 if opt in fontsizes:
1919 k = find_token(document.header, '\\paperfontsize', 0)
1920 document.header[k] = '\\paperfontsize %s' % fontsize
1923 document.header[i] = '\\options %s' % ','.join(options)
1925 del document.header[i]
1928 def revert_separator_layout(document):
1929 r'''Revert --Separator-- to a lyx note
1932 \begin_layout --Separator--
1938 \begin_layout Standard
1939 \begin_inset Note Note
1942 \begin_layout Standard
1955 i = find_token(document.body, r'\begin_layout --Separator--', i)
1958 j = find_end_of_layout(document.body, i + 1)
1960 # this should not happen
1962 document.body[i : j + 1] = [r'\begin_layout %s' % document.default_layout,
1963 r'\begin_inset Note Note',
1966 r'\begin_layout %s' % document.default_layout,
1967 'Separate Environment',
1971 document.body[ i + 1 : j] + \
1977 def convert_arabic (document):
1978 if document.language == "arabic":
1979 document.language = "arabic_arabtex"
1980 i = find_token(document.header, "\\language", 0)
1982 document.header[i] = "\\language arabic_arabtex"
1984 while i < len(document.body):
1985 h = document.body[i].find("\lang arabic", 0, len(document.body[i]))
1987 # change the language name
1988 document.body[i] = '\lang arabic_arabtex'
1992 def revert_arabic (document):
1993 if document.language == "arabic_arabtex":
1994 document.language = "arabic"
1995 i = find_token(document.header, "\\language", 0)
1997 document.header[i] = "\\language arabic"
1999 while i < len(document.body):
2000 h = document.body[i].find("\lang arabic_arabtex", 0, len(document.body[i]))
2002 # change the language name
2003 document.body[i] = '\lang arabic'
2011 supported_versions = ["1.5.0","1.5"]
2012 convert = [[246, []],
2013 [247, [convert_font_settings]],
2015 [249, [convert_utf8]],
2018 [252, [convert_commandparams, convert_bibitem]],
2020 [254, [convert_esint]],
2023 [257, [convert_caption]],
2024 [258, [convert_lyxline]],
2025 [259, [convert_accent, normalize_font_whitespace_259]],
2027 [261, [convert_changes]],
2029 [263, [normalize_language_name]],
2030 [264, [convert_cv_textclass]],
2031 [265, [convert_tableborder]],
2037 [271, [convert_ext_font_sizes]],
2040 [274, [normalize_font_whitespace_274]],
2041 [275, [convert_graphics_rotation]],
2042 [276, [convert_arabic]]
2046 [275, [revert_arabic]],
2047 [274, [revert_graphics_rotation]],
2049 [272, [revert_separator_layout]],
2050 [271, [revert_preamble_listings_params, revert_listings_inset, revert_include_listings]],
2051 [270, [revert_ext_font_sizes]],
2052 [269, [revert_beamer_alert, revert_beamer_structure]],
2053 [268, [revert_preamble_listings_params, revert_listings_inset, revert_include_listings]],
2054 [267, [revert_CJK]],
2055 [266, [revert_utf8plain]],
2056 [265, [revert_armenian]],
2057 [264, [revert_tableborder]],
2058 [263, [revert_cv_textclass]],
2059 [262, [revert_language_name]],
2060 [261, [revert_ascii]],
2062 [259, [revert_utf8x]],
2065 [256, [revert_caption]],
2066 [255, [revert_encodings]],
2067 [254, [revert_clearpage, revert_cleardoublepage]],
2068 [253, [revert_esint]],
2069 [252, [revert_nomenclature, revert_printnomenclature]],
2070 [251, [revert_commandparams]],
2071 [250, [revert_cs_label]],
2073 [248, [revert_accent, revert_utf8, revert_unicode]],
2074 [247, [revert_booktabs]],
2075 [246, [revert_font_settings]],
2076 [245, [revert_framed]]]
2079 if __name__ == "__main__":