1 # This file is part of lyx2lyx
2 # -*- coding: utf-8 -*-
3 # Copyright (C) 2006 José Matos <jamatos@lyx.org>
4 # Copyright (C) 2004-2006 Georg Baum <Georg.Baum@post.rwth-aachen.de>
6 # This program is free software; you can redistribute it and/or
7 # modify it under the terms of the GNU General Public License
8 # as published by the Free Software Foundation; either version 2
9 # of the License, or (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
20 """ Convert files to the file format generated by lyx 1.5"""
26 from parser_tools import find_re, find_token, find_token_backwards, find_token_exact, find_tokens, find_end_of, get_value, find_beginning_of, find_nonempty_line
27 from LyX import get_encoding
30 ####################################################################
31 # Private helper functions
33 def find_end_of_inset(lines, i):
34 " Find end of inset, where lines[i] is included."
35 return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
37 def find_end_of_layout(lines, i):
38 " Find end of layout, where lines[i] is included."
39 return find_end_of(lines, i, "\\begin_layout", "\\end_layout")
41 def find_beginning_of_layout(lines, i):
42 "Find beginning of layout, where lines[i] is included."
43 return find_beginning_of(lines, i, "\\begin_layout", "\\end_layout")
45 # End of helper functions
46 ####################################################################
50 # Notes: Framed/Shaded
53 def revert_framed(document):
54 "Revert framed notes. "
57 i = find_tokens(document.body, ["\\begin_inset Note Framed", "\\begin_inset Note Shaded"], i)
61 document.body[i] = "\\begin_inset Note"
69 roman_fonts = {'default' : 'default', 'ae' : 'ae',
70 'times' : 'times', 'palatino' : 'palatino',
71 'helvet' : 'default', 'avant' : 'default',
72 'newcent' : 'newcent', 'bookman' : 'bookman',
74 sans_fonts = {'default' : 'default', 'ae' : 'default',
75 'times' : 'default', 'palatino' : 'default',
76 'helvet' : 'helvet', 'avant' : 'avant',
77 'newcent' : 'default', 'bookman' : 'default',
79 typewriter_fonts = {'default' : 'default', 'ae' : 'default',
80 'times' : 'default', 'palatino' : 'default',
81 'helvet' : 'default', 'avant' : 'default',
82 'newcent' : 'default', 'bookman' : 'default',
83 'pslatex' : 'courier'}
85 def convert_font_settings(document):
86 " Convert font settings. "
88 i = find_token_exact(document.header, "\\fontscheme", i)
90 document.warning("Malformed LyX document: Missing `\\fontscheme'.")
92 font_scheme = get_value(document.header, "\\fontscheme", i, i + 1)
94 document.warning("Malformed LyX document: Empty `\\fontscheme'.")
95 font_scheme = 'default'
96 if not font_scheme in roman_fonts.keys():
97 document.warning("Malformed LyX document: Unknown `\\fontscheme' `%s'." % font_scheme)
98 font_scheme = 'default'
99 document.header[i:i+1] = ['\\font_roman %s' % roman_fonts[font_scheme],
100 '\\font_sans %s' % sans_fonts[font_scheme],
101 '\\font_typewriter %s' % typewriter_fonts[font_scheme],
102 '\\font_default_family default',
105 '\\font_sf_scale 100',
106 '\\font_tt_scale 100']
109 def revert_font_settings(document):
110 " Revert font settings. "
113 fonts = {'roman' : 'default', 'sans' : 'default', 'typewriter' : 'default'}
114 for family in 'roman', 'sans', 'typewriter':
115 name = '\\font_%s' % family
116 i = find_token_exact(document.header, name, i)
118 document.warning("Malformed LyX document: Missing `%s'." % name)
121 if (insert_line < 0):
123 fonts[family] = get_value(document.header, name, i, i + 1)
124 del document.header[i]
125 i = find_token_exact(document.header, '\\font_default_family', i)
127 document.warning("Malformed LyX document: Missing `\\font_default_family'.")
128 font_default_family = 'default'
130 font_default_family = get_value(document.header, "\\font_default_family", i, i + 1)
131 del document.header[i]
132 i = find_token_exact(document.header, '\\font_sc', i)
134 document.warning("Malformed LyX document: Missing `\\font_sc'.")
137 font_sc = get_value(document.header, '\\font_sc', i, i + 1)
138 del document.header[i]
139 if font_sc != 'false':
140 document.warning("Conversion of '\\font_sc' not yet implemented.")
141 i = find_token_exact(document.header, '\\font_osf', i)
143 document.warning("Malformed LyX document: Missing `\\font_osf'.")
146 font_osf = get_value(document.header, '\\font_osf', i, i + 1)
147 del document.header[i]
148 i = find_token_exact(document.header, '\\font_sf_scale', i)
150 document.warning("Malformed LyX document: Missing `\\font_sf_scale'.")
151 font_sf_scale = '100'
153 font_sf_scale = get_value(document.header, '\\font_sf_scale', i, i + 1)
154 del document.header[i]
155 if font_sf_scale != '100':
156 document.warning("Conversion of '\\font_sf_scale' not yet implemented.")
157 i = find_token_exact(document.header, '\\font_tt_scale', i)
159 document.warning("Malformed LyX document: Missing `\\font_tt_scale'.")
160 font_tt_scale = '100'
162 font_tt_scale = get_value(document.header, '\\font_tt_scale', i, i + 1)
163 del document.header[i]
164 if font_tt_scale != '100':
165 document.warning("Conversion of '\\font_tt_scale' not yet implemented.")
166 for font_scheme in roman_fonts.keys():
167 if (roman_fonts[font_scheme] == fonts['roman'] and
168 sans_fonts[font_scheme] == fonts['sans'] and
169 typewriter_fonts[font_scheme] == fonts['typewriter']):
170 document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
171 if font_default_family != 'default':
172 document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
173 if font_osf == 'true':
174 document.warning("Ignoring `\\font_osf = true'")
176 font_scheme = 'default'
177 document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
178 if fonts['roman'] == 'cmr':
179 document.preamble.append('\\renewcommand{\\rmdefault}{cmr}')
180 if font_osf == 'true':
181 document.preamble.append('\\usepackage{eco}')
183 for font in 'lmodern', 'charter', 'utopia', 'beraserif', 'ccfonts', 'chancery':
184 if fonts['roman'] == font:
185 document.preamble.append('\\usepackage{%s}' % font)
186 for font in 'cmss', 'lmss', 'cmbr':
187 if fonts['sans'] == font:
188 document.preamble.append('\\renewcommand{\\sfdefault}{%s}' % font)
189 for font in 'berasans':
190 if fonts['sans'] == font:
191 document.preamble.append('\\usepackage{%s}' % font)
192 for font in 'cmtt', 'lmtt', 'cmtl':
193 if fonts['typewriter'] == font:
194 document.preamble.append('\\renewcommand{\\ttdefault}{%s}' % font)
195 for font in 'courier', 'beramono', 'luximono':
196 if fonts['typewriter'] == font:
197 document.preamble.append('\\usepackage{%s}' % font)
198 if font_default_family != 'default':
199 document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
200 if font_osf == 'true':
201 document.warning("Ignoring `\\font_osf = true'")
204 def revert_booktabs(document):
205 " We remove the booktabs flag or everything else will become a mess. "
206 re_row = re.compile(r'^<row.*space="[^"]+".*>$')
207 re_tspace = re.compile(r'\s+topspace="[^"]+"')
208 re_bspace = re.compile(r'\s+bottomspace="[^"]+"')
209 re_ispace = re.compile(r'\s+interlinespace="[^"]+"')
212 i = find_token(document.body, "\\begin_inset Tabular", i)
215 j = find_end_of_inset(document.body, i + 1)
217 document.warning("Malformed LyX document: Could not find end of tabular.")
219 for k in range(i, j):
220 if re.search('^<features.* booktabs="true".*>$', document.body[k]):
221 document.warning("Converting 'booktabs' table to normal table.")
222 document.body[k] = document.body[k].replace(' booktabs="true"', '')
223 if re.search(re_row, document.body[k]):
224 document.warning("Removing extra row space.")
225 document.body[k] = re_tspace.sub('', document.body[k])
226 document.body[k] = re_bspace.sub('', document.body[k])
227 document.body[k] = re_ispace.sub('', document.body[k])
231 def convert_multiencoding(document, forward):
232 """ Fix files with multiple encodings.
233 Files with an inputencoding of "auto" or "default" and multiple languages
234 where at least two languages have different default encodings are encoded
235 in multiple encodings for file formats < 249. These files are incorrectly
236 read and written (as if the whole file was in the encoding of the main
238 This is not true for files written by CJK-LyX, they are always in the locale
242 - converts from fake unicode values to true unicode if forward is true, and
243 - converts from true unicode values to fake unicode if forward is false.
244 document.encoding must be set to the old value (format 248) in both cases.
246 We do this here and not in LyX.py because it is far easier to do the
247 necessary parsing in modern formats than in ancient ones.
249 if document.cjk_encoding != '':
251 encoding_stack = [document.encoding]
252 lang_re = re.compile(r"^\\lang\s(\S+)")
253 if document.inputencoding == "auto" or document.inputencoding == "default":
254 for i in range(len(document.body)):
255 result = lang_re.match(document.body[i])
257 language = result.group(1)
258 if language == "default":
259 document.warning("Resetting encoding from %s to %s." % (encoding_stack[-1], document.encoding), 3)
260 encoding_stack[-1] = document.encoding
262 from lyx2lyx_lang import lang
263 document.warning("Setting encoding from %s to %s." % (encoding_stack[-1], lang[language][3]), 3)
264 encoding_stack[-1] = lang[language][3]
265 elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
266 document.warning("Adding nested encoding %s." % encoding_stack[-1], 3)
267 encoding_stack.append(encoding_stack[-1])
268 elif find_token(document.body, "\\end_layout", i, i + 1) == i:
269 document.warning("Removing nested encoding %s." % encoding_stack[-1], 3)
270 if len(encoding_stack) == 1:
271 # Don't remove the document encoding from the stack
272 document.warning("Malformed LyX document: Unexpected `\\end_layout'.")
274 del encoding_stack[-1]
275 if encoding_stack[-1] != document.encoding:
277 # This line has been incorrectly interpreted as if it was
278 # encoded in 'encoding'.
279 # Convert back to the 8bit string that was in the file.
280 orig = document.body[i].encode(document.encoding)
281 # Convert the 8bit string that was in the file to unicode
282 # with the correct encoding.
283 document.body[i] = orig.decode(encoding_stack[-1])
285 # Convert unicode to the 8bit string that will be written
286 # to the file with the correct encoding.
287 orig = document.body[i].encode(encoding_stack[-1])
288 # Convert the 8bit string that will be written to the
289 # file to fake unicode with the encoding that will later
290 # be used when writing to the file.
291 document.body[i] = orig.decode(document.encoding)
294 def convert_utf8(document):
295 " Set document encoding to UTF-8. "
296 convert_multiencoding(document, True)
297 document.encoding = "utf8"
300 def revert_utf8(document):
301 " Set document encoding to the value corresponding to inputencoding. "
302 i = find_token(document.header, "\\inputencoding", 0)
304 document.header.append("\\inputencoding auto")
305 elif get_value(document.header, "\\inputencoding", i) == "utf8":
306 document.header[i] = "\\inputencoding auto"
307 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
308 document.encoding = get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)
309 convert_multiencoding(document, False)
312 def revert_cs_label(document):
313 " Remove status flag of charstyle label. "
316 i = find_token(document.body, "\\begin_inset CharStyle", i)
319 # Seach for a line starting 'show_label'
320 # If it is not there, break with a warning message
323 if (document.body[i][:10] == "show_label"):
326 elif (document.body[i][:13] == "\\begin_layout"):
327 document.warning("Malformed LyX document: Missing 'show_label'.")
334 def convert_bibitem(document):
336 \bibitem [option]{argument}
340 \begin_inset LatexCommand bibitem
346 This must be called after convert_commandparams.
350 i = find_token(document.body, "\\bibitem", i)
353 j = document.body[i].find('[') + 1
354 k = document.body[i].rfind(']')
355 if j == 0: # No optional argument found
358 option = document.body[i][j:k]
359 j = document.body[i].rfind('{') + 1
360 k = document.body[i].rfind('}')
361 argument = document.body[i][j:k]
362 lines = ['\\begin_inset LatexCommand bibitem']
364 lines.append('label "%s"' % option.replace('"', '\\"'))
365 lines.append('key "%s"' % argument.replace('"', '\\"'))
367 lines.append('\\end_inset')
368 document.body[i:i+1] = lines
372 commandparams_info = {
373 # command : [option1, option2, argument]
374 "bibitem" : ["label", "", "key"],
375 "bibtex" : ["options", "btprint", "bibfiles"],
376 "cite" : ["after", "before", "key"],
377 "citet" : ["after", "before", "key"],
378 "citep" : ["after", "before", "key"],
379 "citealt" : ["after", "before", "key"],
380 "citealp" : ["after", "before", "key"],
381 "citeauthor" : ["after", "before", "key"],
382 "citeyear" : ["after", "before", "key"],
383 "citeyearpar" : ["after", "before", "key"],
384 "citet*" : ["after", "before", "key"],
385 "citep*" : ["after", "before", "key"],
386 "citealt*" : ["after", "before", "key"],
387 "citealp*" : ["after", "before", "key"],
388 "citeauthor*" : ["after", "before", "key"],
389 "Citet" : ["after", "before", "key"],
390 "Citep" : ["after", "before", "key"],
391 "Citealt" : ["after", "before", "key"],
392 "Citealp" : ["after", "before", "key"],
393 "Citeauthor" : ["after", "before", "key"],
394 "Citet*" : ["after", "before", "key"],
395 "Citep*" : ["after", "before", "key"],
396 "Citealt*" : ["after", "before", "key"],
397 "Citealp*" : ["after", "before", "key"],
398 "Citeauthor*" : ["after", "before", "key"],
399 "citefield" : ["after", "before", "key"],
400 "citetitle" : ["after", "before", "key"],
401 "cite*" : ["after", "before", "key"],
402 "hfill" : ["", "", ""],
403 "index" : ["", "", "name"],
404 "printindex" : ["", "", "name"],
405 "label" : ["", "", "name"],
406 "eqref" : ["name", "", "reference"],
407 "pageref" : ["name", "", "reference"],
408 "prettyref" : ["name", "", "reference"],
409 "ref" : ["name", "", "reference"],
410 "vpageref" : ["name", "", "reference"],
411 "vref" : ["name", "", "reference"],
412 "tableofcontents" : ["", "", "type"],
413 "htmlurl" : ["name", "", "target"],
414 "url" : ["name", "", "target"]}
417 def convert_commandparams(document):
420 \begin_inset LatexCommand \cmdname[opt1][opt2]{arg}
425 \begin_inset LatexCommand cmdname
431 name1, name2 and name3 can be different for each command.
433 # \begin_inset LatexCommand bibitem was not the official version (see
434 # convert_bibitem()), but could be read in, so we convert it here, too.
438 i = find_token(document.body, "\\begin_inset LatexCommand", i)
441 command = document.body[i][26:].strip()
443 document.warning("Malformed LyX document: Missing LatexCommand name.")
447 # The following parser is taken from the original InsetCommandParams::scanCommand
453 # Used to handle things like \command[foo[bar]]{foo{bar}}
457 if ((state == "CMDNAME" and c == ' ') or
458 (state == "CMDNAME" and c == '[') or
459 (state == "CMDNAME" and c == '{')):
461 if ((state == "OPTION" and c == ']') or
462 (state == "SECOPTION" and c == ']') or
463 (state == "CONTENT" and c == '}')):
467 nestdepth = nestdepth - 1
468 if ((state == "OPTION" and c == '[') or
469 (state == "SECOPTION" and c == '[') or
470 (state == "CONTENT" and c == '{')):
471 nestdepth = nestdepth + 1
472 if state == "CMDNAME":
474 elif state == "OPTION":
476 elif state == "SECOPTION":
478 elif state == "CONTENT":
483 elif c == '[' and b != ']':
485 nestdepth = 0 # Just to be sure
486 elif c == '[' and b == ']':
488 nestdepth = 0 # Just to be sure
491 nestdepth = 0 # Just to be sure
494 # Now we have parsed the command, output the parameters
495 lines = ["\\begin_inset LatexCommand %s" % name]
497 if commandparams_info[name][0] == "":
498 document.warning("Ignoring invalid option `%s' of command `%s'." % (option1, name))
500 lines.append('%s "%s"' % (commandparams_info[name][0], option1.replace('"', '\\"')))
502 if commandparams_info[name][1] == "":
503 document.warning("Ignoring invalid second option `%s' of command `%s'." % (option2, name))
505 lines.append('%s "%s"' % (commandparams_info[name][1], option2.replace('"', '\\"')))
507 if commandparams_info[name][2] == "":
508 document.warning("Ignoring invalid argument `%s' of command `%s'." % (argument, name))
510 lines.append('%s "%s"' % (commandparams_info[name][2], argument.replace('"', '\\"')))
511 document.body[i:i+1] = lines
515 def revert_commandparams(document):
516 regex = re.compile(r'(\S+)\s+(.+)')
519 i = find_token(document.body, "\\begin_inset LatexCommand", i)
522 name = document.body[i].split()[2]
523 j = find_end_of_inset(document.body, i + 1)
528 for k in range(i + 1, j):
529 match = re.match(regex, document.body[k])
531 pname = match.group(1)
532 pvalue = match.group(2)
533 if pname == "preview":
534 preview_line = document.body[k]
535 elif (commandparams_info[name][0] != "" and
536 pname == commandparams_info[name][0]):
537 option1 = pvalue.strip('"').replace('\\"', '"')
538 elif (commandparams_info[name][1] != "" and
539 pname == commandparams_info[name][1]):
540 option2 = pvalue.strip('"').replace('\\"', '"')
541 elif (commandparams_info[name][2] != "" and
542 pname == commandparams_info[name][2]):
543 argument = pvalue.strip('"').replace('\\"', '"')
544 elif document.body[k].strip() != "":
545 document.warning("Ignoring unknown contents `%s' in command inset %s." % (document.body[k], name))
546 if name == "bibitem":
548 lines = ["\\bibitem {%s}" % argument]
550 lines = ["\\bibitem [%s]{%s}" % (option1, argument)]
554 lines = ["\\begin_inset LatexCommand \\%s{%s}" % (name, argument)]
556 lines = ["\\begin_inset LatexCommand \\%s[][%s]{%s}" % (name, option2, argument)]
559 lines = ["\\begin_inset LatexCommand \\%s[%s]{%s}" % (name, option1, argument)]
561 lines = ["\\begin_inset LatexCommand \\%s[%s][%s]{%s}" % (name, option1, option2, argument)]
562 if name != "bibitem":
563 if preview_line != "":
564 lines.append(preview_line)
566 lines.append('\\end_inset')
567 document.body[i:j+1] = lines
571 def revert_nomenclature(document):
572 " Convert nomenclature entry to ERT. "
573 regex = re.compile(r'(\S+)\s+(.+)')
577 i = find_token(document.body, "\\begin_inset LatexCommand nomenclature", i)
581 j = find_end_of_inset(document.body, i + 1)
586 for k in range(i + 1, j):
587 match = re.match(regex, document.body[k])
589 name = match.group(1)
590 value = match.group(2)
591 if name == "preview":
592 preview_line = document.body[k]
593 elif name == "symbol":
594 symbol = value.strip('"').replace('\\"', '"')
595 elif name == "description":
596 description = value.strip('"').replace('\\"', '"')
597 elif name == "prefix":
598 prefix = value.strip('"').replace('\\"', '"')
599 elif document.body[k].strip() != "":
600 document.warning("Ignoring unknown contents `%s' in nomenclature inset." % document.body[k])
602 command = 'nomenclature{%s}{%s}' % (symbol, description)
604 command = 'nomenclature[%s]{%s}{%s}' % (prefix, symbol, description)
605 document.body[i:j+1] = ['\\begin_inset ERT',
608 '\\begin_layout %s' % document.default_layout,
617 if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
618 document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
619 document.preamble.append('\\makenomenclature')
622 def revert_printnomenclature(document):
623 " Convert printnomenclature to ERT. "
624 regex = re.compile(r'(\S+)\s+(.+)')
628 i = find_token(document.body, "\\begin_inset LatexCommand printnomenclature", i)
632 j = find_end_of_inset(document.body, i + 1)
635 for k in range(i + 1, j):
636 match = re.match(regex, document.body[k])
638 name = match.group(1)
639 value = match.group(2)
640 if name == "preview":
641 preview_line = document.body[k]
642 elif name == "labelwidth":
643 labelwidth = value.strip('"').replace('\\"', '"')
644 elif document.body[k].strip() != "":
645 document.warning("Ignoring unknown contents `%s' in printnomenclature inset." % document.body[k])
647 command = 'nomenclature{}'
649 command = 'nomenclature[%s]' % labelwidth
650 document.body[i:j+1] = ['\\begin_inset ERT',
653 '\\begin_layout %s' % document.default_layout,
662 if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
663 document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
664 document.preamble.append('\\makenomenclature')
667 def convert_esint(document):
668 " Add \\use_esint setting to header. "
669 i = find_token(document.header, "\\cite_engine", 0)
671 document.warning("Malformed LyX document: Missing `\\cite_engine'.")
673 # 0 is off, 1 is auto, 2 is on.
674 document.header.insert(i, '\\use_esint 0')
677 def revert_esint(document):
678 " Remove \\use_esint setting from header. "
679 i = find_token(document.header, "\\use_esint", 0)
681 document.warning("Malformed LyX document: Missing `\\use_esint'.")
683 use_esint = document.header[i].split()[1]
684 del document.header[i]
685 # 0 is off, 1 is auto, 2 is on.
687 document.preamble.append('\\usepackage{esint}')
690 def revert_clearpage(document):
694 i = find_token(document.body, "\\clearpage", i)
697 document.body[i:i+1] = ['\\begin_inset ERT',
700 '\\begin_layout %s' % document.default_layout,
711 def revert_cleardoublepage(document):
712 " cleardoublepage -> ERT "
715 i = find_token(document.body, "\\cleardoublepage", i)
718 document.body[i:i+1] = ['\\begin_inset ERT',
721 '\\begin_layout %s' % document.default_layout,
732 def convert_lyxline(document):
733 " remove fontsize commands for \lyxline "
734 # The problematic is: The old \lyxline definition doesn't handle the fontsize
735 # to change the line thickness. The new definiton does this so that imported
736 # \lyxlines would have a different line thickness. The eventual fontsize command
737 # before \lyxline is therefore removed to get the same output.
738 fontsizes = ["tiny", "scriptsize", "footnotesize", "small", "normalsize",
739 "large", "Large", "LARGE", "huge", "Huge"]
740 for n in range(0, len(fontsizes)):
743 while i < len(document.body):
744 i = find_token(document.body, "\\size " + fontsizes[n], i)
745 k = find_token(document.body, "\\lyxline", i)
746 # the corresponding fontsize command is always 2 lines before the \lyxline
747 if (i != -1 and k == i+2):
748 document.body[i:i+1] = []
754 def revert_encodings(document):
755 " Set new encodings to auto. "
756 encodings = ["8859-6", "8859-8", "cp437", "cp437de", "cp850", "cp852",
757 "cp855", "cp858", "cp862", "cp865", "cp866", "cp1250",
758 "cp1252", "cp1256", "cp1257", "latin10", "pt254", "tis620-0"]
759 i = find_token(document.header, "\\inputencoding", 0)
761 document.header.append("\\inputencoding auto")
763 inputenc = get_value(document.header, "\\inputencoding", i)
764 if inputenc in encodings:
765 document.header[i] = "\\inputencoding auto"
766 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
769 def convert_caption(document):
770 " Convert caption layouts to caption insets. "
773 i = find_token(document.body, "\\begin_layout Caption", i)
776 j = find_end_of_layout(document.body, i)
778 document.warning("Malformed LyX document: Missing `\\end_layout'.")
781 document.body[j:j] = ["\\end_layout", "", "\\end_inset", "", ""]
782 document.body[i:i+1] = ["\\begin_layout %s" % document.default_layout,
783 "\\begin_inset Caption", "",
784 "\\begin_layout %s" % document.default_layout]
788 def revert_caption(document):
789 " Convert caption insets to caption layouts. "
790 " This assumes that the text class has a caption style. "
793 i = find_token(document.body, "\\begin_inset Caption", i)
797 # We either need to delete the previous \begin_layout line, or we
798 # need to end the previous layout if this inset is not in the first
799 # position of the paragraph.
800 layout_before = find_token_backwards(document.body, "\\begin_layout", i)
801 if layout_before == -1:
802 document.warning("Malformed LyX document: Missing `\\begin_layout'.")
804 layout_line = document.body[layout_before]
805 del_layout_before = True
806 l = layout_before + 1
808 if document.body[l] != "":
809 del_layout_before = False
812 if del_layout_before:
813 del document.body[layout_before:i]
816 document.body[i:i] = ["\\end_layout", ""]
819 # Find start of layout in the inset and end of inset
820 j = find_token(document.body, "\\begin_layout", i)
822 document.warning("Malformed LyX document: Missing `\\begin_layout'.")
824 k = find_end_of_inset(document.body, i)
826 document.warning("Malformed LyX document: Missing `\\end_inset'.")
829 # We either need to delete the following \end_layout line, or we need
830 # to restart the old layout if this inset is not at the paragraph end.
831 layout_after = find_token(document.body, "\\end_layout", k)
832 if layout_after == -1:
833 document.warning("Malformed LyX document: Missing `\\end_layout'.")
835 del_layout_after = True
837 while l < layout_after:
838 if document.body[l] != "":
839 del_layout_after = False
843 del document.body[k+1:layout_after+1]
845 document.body[k+1:k+1] = [layout_line, ""]
847 # delete \begin_layout and \end_inset and replace \begin_inset with
848 # "\begin_layout Caption". This works because we can only have one
849 # paragraph in the caption inset: The old \end_layout will be recycled.
851 if document.body[k] == "":
854 if document.body[j] == "":
856 document.body[i] = "\\begin_layout Caption"
857 if document.body[i+1] == "":
858 del document.body[i+1]
862 # Accents of InsetLaTeXAccent
864 "`" : u'\u0300', # grave
865 "'" : u'\u0301', # acute
866 "^" : u'\u0302', # circumflex
867 "~" : u'\u0303', # tilde
868 "=" : u'\u0304', # macron
869 "u" : u'\u0306', # breve
870 "." : u'\u0307', # dot above
871 "\"": u'\u0308', # diaresis
872 "r" : u'\u030a', # ring above
873 "H" : u'\u030b', # double acute
874 "v" : u'\u030c', # caron
875 "b" : u'\u0320', # minus sign below
876 "d" : u'\u0323', # dot below
877 "c" : u'\u0327', # cedilla
878 "k" : u'\u0328', # ogonek
879 "t" : u'\u0361' # tie. This is special: It spans two characters, but
880 # only one is given as argument, so we don't need to
881 # treat it differently.
885 # special accents of InsetLaTeXAccent without argument
886 special_accent_map = {
887 'i' : u'\u0131', # dotless i
888 'j' : u'\u0237', # dotless j
889 'l' : u'\u0142', # l with stroke
890 'L' : u'\u0141' # L with stroke
894 # special accent arguments of InsetLaTeXAccent
896 '\\i' : u'\u0131', # dotless i
897 '\\j' : u'\u0237' # dotless j
901 def _convert_accent(accent, accented_char):
905 if type in special_accent_map:
906 return special_accent_map[type]
907 # a missing char is treated as space by LyX
909 elif type == 'q' and char in ['t', 'd', 'l', 'L']:
910 # Special caron, only used with t, d, l and L.
911 # It is not in the map because we convert it to the same unicode
912 # character as the normal caron: \q{} is only defined if babel with
913 # the czech or slovak language is used, and the normal caron
914 # produces the correct output if the T1 font encoding is used.
915 # For the same reason we never convert to \q{} in the other direction.
917 elif char in accented_map:
918 char = accented_map[char]
919 elif (len(char) > 1):
920 # We can only convert accents on a single char
922 a = accent_map.get(type)
924 return unicodedata.normalize("NFKC", "%s%s" % (char, a))
928 def convert_ertbackslash(body, i, ert, default_layout):
929 r""" -------------------------------------------------------------------------------------------
930 Convert backslashes and '\n' into valid ERT code, append the converted
931 text to body[i] and return the (maybe incremented) line index i"""
935 body[i] = body[i] + '\\backslash '
939 body[i+1:i+1] = ['\\end_layout', '', '\\begin_layout %s' % default_layout, '']
942 body[i] = body[i] + c
946 def convert_accent(document):
947 # The following forms are supported by LyX:
948 # '\i \"{a}' (standard form, as written by LyX)
949 # '\i \"{}' (standard form, as written by LyX if the accented char is a space)
950 # '\i \"{ }' (also accepted if the accented char is a space)
951 # '\i \" a' (also accepted)
952 # '\i \"' (also accepted)
953 re_wholeinset = re.compile(r'^(.*)(\\i\s+)(.*)$')
954 re_contents = re.compile(r'^([^\s{]+)(.*)$')
955 re_accentedcontents = re.compile(r'^\s*{?([^{}]*)}?\s*$')
958 i = find_re(document.body, re_wholeinset, i)
961 match = re_wholeinset.match(document.body[i])
962 prefix = match.group(1)
963 contents = match.group(3).strip()
964 match = re_contents.match(contents)
966 # Strip first char (always \)
967 accent = match.group(1)[1:]
968 accented_contents = match.group(2).strip()
969 match = re_accentedcontents.match(accented_contents)
970 accented_char = match.group(1)
971 converted = _convert_accent(accent, accented_char)
974 contents = '%s{%s}' % (accent, accented_char),
976 document.body[i] = '%s%s' % (prefix, converted)
979 document.warning("Converting unknown InsetLaTeXAccent `\\i %s' to ERT." % contents)
980 document.body[i] = prefix
981 document.body[i+1:i+1] = ['\\begin_inset ERT',
984 '\\begin_layout %s' % document.default_layout,
988 i = convert_ertbackslash(document.body, i + 7,
990 document.default_layout)
991 document.body[i+1:i+1] = ['\\end_layout',
997 def revert_accent(document):
998 inverse_accent_map = {}
1000 inverse_accent_map[accent_map[k]] = k
1001 inverse_special_accent_map = {}
1002 for k in special_accent_map:
1003 inverse_special_accent_map[special_accent_map[k]] = k
1004 inverse_accented_map = {}
1005 for k in accented_map:
1006 inverse_accented_map[accented_map[k]] = k
1008 # Since LyX may insert a line break within a word we must combine all
1009 # words before unicode normalization.
1010 # We do this only if the next line starts with an accent, otherwise we
1011 # would create things like '\begin_inset ERTstatus'.
1012 numberoflines = len(document.body)
1013 for i in range(numberoflines-1):
1014 if document.body[i] == '' or document.body[i+1] == '' or document.body[i][-1] == ' ':
1016 if (document.body[i+1][0] in inverse_accent_map):
1017 # the last character of this line and the first of the next line
1018 # form probably a surrogate pair.
1019 while (len(document.body[i+1]) > 0 and document.body[i+1][0] != ' '):
1020 document.body[i] += document.body[i+1][0]
1021 document.body[i+1] = document.body[i+1][1:]
1023 # Normalize to "Normal form D" (NFD, also known as canonical decomposition).
1024 # This is needed to catch all accented characters.
1025 for i in range(numberoflines):
1026 # Unfortunately we have a mixture of unicode strings and plain strings,
1027 # because we never use u'xxx' for string literals, but 'xxx'.
1028 # Therefore we may have to try two times to normalize the data.
1030 document.body[i] = unicodedata.normalize("NFKD", document.body[i])
1032 document.body[i] = unicodedata.normalize("NFKD", unicode(document.body[i], 'utf-8'))
1034 # Replace accented characters with InsetLaTeXAccent
1035 # Do not convert characters that can be represented in the chosen
1037 encoding_stack = [get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)]
1038 lang_re = re.compile(r"^\\lang\s(\S+)")
1039 for i in range(len(document.body)):
1041 if (document.inputencoding == "auto" or document.inputencoding == "default") and document.cjk_encoding != '':
1042 # Track the encoding of the current line
1043 result = lang_re.match(document.body[i])
1045 language = result.group(1)
1046 if language == "default":
1047 encoding_stack[-1] = document.encoding
1049 from lyx2lyx_lang import lang
1050 encoding_stack[-1] = lang[language][3]
1052 elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
1053 encoding_stack.append(encoding_stack[-1])
1055 elif find_token(document.body, "\\end_layout", i, i + 1) == i:
1056 del encoding_stack[-1]
1059 for j in range(len(document.body[i])):
1060 # dotless i and dotless j are both in special_accent_map and can
1061 # occur as an accented character, so we need to test that the
1062 # following character is no accent
1063 if (document.body[i][j] in inverse_special_accent_map and
1064 (j == len(document.body[i]) - 1 or document.body[i][j+1] not in inverse_accent_map)):
1065 accent = document.body[i][j]
1067 dummy = accent.encode(encoding_stack[-1])
1068 except UnicodeEncodeError:
1069 # Insert the rest of the line as new line
1070 if j < len(document.body[i]) - 1:
1071 document.body[i+1:i+1] = document.body[i][j+1:]
1072 # Delete the accented character
1074 document.body[i] = document.body[i][:j-1]
1076 document.body[i] = u''
1077 # Finally add the InsetLaTeXAccent
1078 document.body[i] += "\\i \\%s{}" % inverse_special_accent_map[accent]
1080 elif j > 0 and document.body[i][j] in inverse_accent_map:
1081 accented_char = document.body[i][j-1]
1082 if accented_char == ' ':
1083 # Conform to LyX output
1085 elif accented_char in inverse_accented_map:
1086 accented_char = inverse_accented_map[accented_char]
1087 accent = document.body[i][j]
1089 dummy = unicodedata.normalize("NFKC", accented_char + accent).encode(encoding_stack[-1])
1090 except UnicodeEncodeError:
1091 # Insert the rest of the line as new line
1092 if j < len(document.body[i]) - 1:
1093 document.body[i+1:i+1] = document.body[i][j+1:]
1094 # Delete the accented characters
1096 document.body[i] = document.body[i][:j-2]
1098 document.body[i] = u''
1099 # Finally add the InsetLaTeXAccent
1100 document.body[i] += "\\i \\%s{%s}" % (inverse_accent_map[accent], accented_char)
1102 # Normalize to "Normal form C" (NFC, pre-composed characters) again
1103 for i in range(numberoflines):
1104 document.body[i] = unicodedata.normalize("NFKC", document.body[i])
1107 def normalize_font_whitespace_259(document):
1108 """ Before format 259 the font changes were ignored if a
1109 whitespace was the first or last character in the sequence, this function
1110 transfers the whitespace outside."""
1112 char_properties = {"\\series": "default",
1113 "\\emph": "default",
1115 "\\shape": "default",
1117 "\\family": "default"}
1118 return normalize_font_whitespace(document, char_properties)
1120 def normalize_font_whitespace_274(document):
1121 """ Before format 259 (sic) the font changes were ignored if a
1122 whitespace was the first or last character in the sequence. This was
1123 corrected for most font properties in format 259, but the language
1124 was forgotten then. This function applies the same conversion done
1125 there (namely, transfers the whitespace outside) for font language
1126 changes, as well."""
1128 char_properties = {"\\lang": "default"}
1129 return normalize_font_whitespace(document, char_properties)
1131 def get_paragraph_language(document, i):
1132 """ Return the language of the paragraph in which line i of the document
1133 body is. If the first thing in the paragraph is a \\lang command, that
1134 is the paragraph's langauge; otherwise, the paragraph's language is the
1135 document's language."""
1137 lines = document.body
1139 first_nonempty_line = \
1140 find_nonempty_line(lines, find_beginning_of_layout(lines, i) + 1)
1142 words = lines[first_nonempty_line].split()
1144 if len(words) > 1 and words[0] == "\\lang":
1147 return document.language
1149 def normalize_font_whitespace(document, char_properties):
1150 """ Before format 259 the font changes were ignored if a
1151 whitespace was the first or last character in the sequence, this function
1152 transfers the whitespace outside. Only a change in one of the properties
1153 in the provided char_properties is handled by this function."""
1155 if document.backend != "latex":
1158 lines = document.body
1163 while i < len(lines):
1164 words = lines[i].split()
1166 if len(words) > 0 and words[0] == "\\begin_layout":
1167 # a new paragraph resets all font changes
1169 # also reset the default language to be the paragraph's language
1170 if "\\lang" in char_properties.keys():
1171 char_properties["\\lang"] = \
1172 get_paragraph_language(document, i + 1)
1174 elif len(words) > 1 and words[0] in char_properties.keys():
1175 # we have a font change
1176 if char_properties[words[0]] == words[1]:
1177 # property gets reset
1178 if words[0] in changes.keys():
1179 del changes[words[0]]
1180 defaultproperty = True
1183 changes[words[0]] = words[1]
1184 defaultproperty = False
1186 # We need to explicitly reset all changed properties if we find
1187 # a space below, because LyX 1.4 would output the space after
1188 # closing the previous change and before starting the new one,
1189 # and closing a font change means to close all properties, not
1190 # just the changed one.
1192 if lines[i-1] and lines[i-1][-1] == " ":
1193 lines[i-1] = lines[i-1][:-1]
1194 # a space before the font change
1196 for k in changes.keys():
1197 # exclude property k because that is already in lines[i]
1199 added_lines[1:1] = ["%s %s" % (k, changes[k])]
1200 for k in changes.keys():
1201 # exclude property k because that must be added below anyway
1203 added_lines[0:0] = ["%s %s" % (k, char_properties[k])]
1205 # Property is reset in lines[i], so add the new stuff afterwards
1206 lines[i+1:i+1] = added_lines
1208 # Reset property for the space
1209 added_lines[0:0] = ["%s %s" % (words[0], char_properties[words[0]])]
1210 lines[i:i] = added_lines
1211 i = i + len(added_lines)
1213 elif lines[i+1] and lines[i+1][0] == " " and (len(changes) > 0 or not defaultproperty):
1214 # a space after the font change
1215 if (lines[i+1] == " " and lines[i+2]):
1216 next_words = lines[i+2].split()
1217 if len(next_words) > 0 and next_words[0] == words[0]:
1218 # a single blank with a property different from the
1219 # previous and the next line must not be changed
1222 lines[i+1] = lines[i+1][1:]
1224 for k in changes.keys():
1225 # exclude property k because that is already in lines[i]
1227 added_lines[1:1] = ["%s %s" % (k, changes[k])]
1228 for k in changes.keys():
1229 # exclude property k because that must be added below anyway
1231 added_lines[0:0] = ["%s %s" % (k, char_properties[k])]
1232 # Reset property for the space
1233 added_lines[0:0] = ["%s %s" % (words[0], char_properties[words[0]])]
1234 lines[i:i] = added_lines
1235 i = i + len(added_lines)
1240 def revert_utf8x(document):
1241 " Set utf8x encoding to utf8. "
1242 i = find_token(document.header, "\\inputencoding", 0)
1244 document.header.append("\\inputencoding auto")
1246 inputenc = get_value(document.header, "\\inputencoding", i)
1247 if inputenc == "utf8x":
1248 document.header[i] = "\\inputencoding utf8"
1249 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1252 def revert_utf8plain(document):
1253 " Set utf8plain encoding to utf8. "
1254 i = find_token(document.header, "\\inputencoding", 0)
1256 document.header.append("\\inputencoding auto")
1258 inputenc = get_value(document.header, "\\inputencoding", i)
1259 if inputenc == "utf8-plain":
1260 document.header[i] = "\\inputencoding utf8"
1261 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1264 def revert_beamer_alert(document):
1265 " Revert beamer's \\alert inset back to ERT. "
1268 i = find_token(document.body, "\\begin_inset CharStyle Alert", i)
1271 document.body[i] = "\\begin_inset ERT"
1274 if (document.body[i][:13] == "\\begin_layout"):
1275 # Insert the \alert command
1276 document.body[i + 1] = "\\alert{" + document.body[i + 1] + '}'
1283 def revert_beamer_structure(document):
1284 " Revert beamer's \\structure inset back to ERT. "
1287 i = find_token(document.body, "\\begin_inset CharStyle Structure", i)
1290 document.body[i] = "\\begin_inset ERT"
1293 if (document.body[i][:13] == "\\begin_layout"):
1294 document.body[i + 1] = "\\structure{" + document.body[i + 1] + '}'
1301 def convert_changes(document):
1302 " Switch output_changes off if tracking_changes is off. "
1303 i = find_token(document.header, '\\tracking_changes', 0)
1305 document.warning("Malformed lyx document: Missing '\\tracking_changes'.")
1307 j = find_token(document.header, '\\output_changes', 0)
1309 document.warning("Malformed lyx document: Missing '\\output_changes'.")
1311 tracking_changes = get_value(document.header, "\\tracking_changes", i)
1312 output_changes = get_value(document.header, "\\output_changes", j)
1313 if tracking_changes == "false" and output_changes == "true":
1314 document.header[j] = "\\output_changes false"
1317 def revert_ascii(document):
1318 " Set ascii encoding to auto. "
1319 i = find_token(document.header, "\\inputencoding", 0)
1321 document.header.append("\\inputencoding auto")
1323 inputenc = get_value(document.header, "\\inputencoding", i)
1324 if inputenc == "ascii":
1325 document.header[i] = "\\inputencoding auto"
1326 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1329 def normalize_language_name(document):
1330 lang = { "brazil": "brazilian",
1331 "portuges": "portuguese"}
1333 if document.language in lang:
1334 document.language = lang[document.language]
1335 i = find_token(document.header, "\\language", 0)
1336 document.header[i] = "\\language %s" % document.language
1339 def revert_language_name(document):
1340 lang = { "brazilian": "brazil",
1341 "portuguese": "portuges"}
1343 if document.language in lang:
1344 document.language = lang[document.language]
1345 i = find_token(document.header, "\\language", 0)
1346 document.header[i] = "\\language %s" % document.language
1349 # \textclass cv -> \textclass simplecv
1350 def convert_cv_textclass(document):
1351 if document.textclass == "cv":
1352 document.textclass = "simplecv"
1355 def revert_cv_textclass(document):
1356 if document.textclass == "simplecv":
1357 document.textclass = "cv"
1361 # add scaleBeforeRotation graphics param
1362 def convert_graphics_rotation(document):
1363 " add scaleBeforeRotation graphics parameter. "
1366 i = find_token(document.body, "\\begin_inset Graphics", i)
1369 j = find_end_of_inset(document.body, i+1)
1372 document.warning("Malformed LyX document: Could not find end of graphics inset.")
1373 # Seach for rotateAngle and width or height or scale
1374 # If these params are not there, nothing needs to be done.
1375 # FIXME: this also inserts scaleBeforeRotation if "rotateAngle" is not there!
1376 for k in range(i+1, j):
1377 if (document.body[k].find("rotateAngle") and \
1378 (document.body[k].find("width") or \
1379 document.body[k].find("height") or \
1380 document.body[k].find("scale"))):
1381 document.body.insert(j, 'scaleBeforeRotation')
1385 # FIXME: does not work at all
1386 def revert_graphics_rotation(document):
1387 " remove scaleBeforeRotation graphics parameter. "
1390 i = find_token(document.body, "\\begin_inset Graphics", i)
1393 j = find_end_of_inset(document.body, i + 1)
1396 document.warning("Malformed LyX document: Could not find end of graphics inset.")
1397 for k in range(i+1, j):
1398 # If there's a scaleBeforeRotation param, just remove that
1399 if document.body[k].find('scaleBeforeRotation'):
1400 del document.body[k]
1402 # if not, and if we have rotateAngle and width or height or scale,
1403 # we have to put the rotateAngle value to special
1404 rotateAngle = get_value(document.body, 'rotateAngle', i+1, j)
1405 special = get_value(document.body, 'special', i+1, j)
1406 if (document.body[k].find("width") or \
1407 document.body[k].find("height") or \
1408 document.body[k].find("scale") and \
1409 document.body[k].find("rotateAngle")):
1411 document.body.insert(j-1, '\tspecial angle=%s' % rotateAngle)
1413 l = find_token(document.body, "special", i+1, j)
1414 document.body[l].replace(special, 'angle=%s,%s' % (rotateAngle, special))
1419 def convert_tableborder(document):
1420 # The problematic is: LyX double the table cell border as it ignores the "|" character in
1421 # the cell arguments. A fix takes care of this and therefore the "|" has to be removed
1423 while i < len(document.body):
1424 h = document.body[i].find("leftline=\"true\"", 0, len(document.body[i]))
1425 k = document.body[i].find("|>{", 0, len(document.body[i]))
1426 # the two tokens have to be in one line
1427 if (h != -1 and k != -1):
1429 document.body[i] = document.body[i][:k] + document.body[i][k+1:len(document.body[i])-1]
1433 def revert_tableborder(document):
1435 while i < len(document.body):
1436 h = document.body[i].find("leftline=\"true\"", 0, len(document.body[i]))
1437 k = document.body[i].find(">{", 0, len(document.body[i]))
1438 # the two tokens have to be in one line
1439 if (h != -1 and k != -1):
1441 document.body[i] = document.body[i][:k] + '|' + document.body[i][k:]
1445 def revert_armenian(document):
1447 # set inputencoding from armscii8 to auto
1448 if document.inputencoding == "armscii8":
1449 i = find_token(document.header, "\\inputencoding", 0)
1451 document.header[i] = "\\inputencoding auto"
1452 # check if preamble exists, if not k is set to -1
1455 while i < len(document.preamble):
1457 k = document.preamble[i].find("\\", 0, len(document.preamble[i]))
1459 k = document.preamble[i].find("%", 0, len(document.preamble[i]))
1461 # add the entry \usepackage{armtex} to the document preamble
1462 if document.language == "armenian":
1463 # set the armtex entry as the first preamble line
1465 document.preamble[0:0] = ["\\usepackage{armtex}"]
1466 # create the preamble when it doesn't exist
1468 document.preamble.append('\\usepackage{armtex}')
1469 # Set document language from armenian to english
1470 if document.language == "armenian":
1471 document.language = "english"
1472 i = find_token(document.header, "\\language", 0)
1474 document.header[i] = "\\language english"
1477 def revert_CJK(document):
1478 " Set CJK encodings to default and languages chinese, japanese and korean to english. "
1479 encodings = ["Bg5", "Bg5+", "GB", "GBt", "GBK", "JIS",
1480 "KS", "SJIS", "UTF8", "EUC-TW", "EUC-JP"]
1481 i = find_token(document.header, "\\inputencoding", 0)
1483 document.header.append("\\inputencoding auto")
1485 inputenc = get_value(document.header, "\\inputencoding", i)
1486 if inputenc in encodings:
1487 document.header[i] = "\\inputencoding default"
1488 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1490 if document.language == "chinese-simplified" or \
1491 document.language == "chinese-traditional" or \
1492 document.language == "japanese" or document.language == "korean":
1493 document.language = "english"
1494 i = find_token(document.header, "\\language", 0)
1496 document.header[i] = "\\language english"
1499 def revert_preamble_listings_params(document):
1500 " Revert preamble option \listings_params "
1501 i = find_token(document.header, "\\listings_params", 0)
1503 document.preamble.append('\\usepackage{listings}')
1504 document.preamble.append('\\lstset{%s}' % document.header[i].split()[1].strip('"'))
1505 document.header.pop(i);
1508 def revert_listings_inset(document):
1509 r''' Revert listings inset to \lstinline or \begin, \end lstlisting, translate
1513 lstparams "language=Delphi"
1517 \begin_layout Standard
1527 \begin_layout Standard
1531 lstinline[language=Delphi]{var i = 10;}
1536 There can be an caption inset in this inset
1538 \begin_layout Standard
1539 \begin_inset Caption
1541 \begin_layout Standard
1543 \begin_inset LatexCommand label
1559 i = find_token(document.body, '\\begin_inset listings', i)
1563 if not '\\usepackage{listings}' in document.preamble:
1564 document.preamble.append('\\usepackage{listings}')
1565 j = find_end_of_inset(document.body, i + 1)
1567 # this should not happen
1573 for line in range(i + 1, i + 4):
1574 if document.body[line].startswith('inline'):
1575 inline = document.body[line].split()[1]
1576 if document.body[line].startswith('lstparams'):
1577 params = document.body[line].split()[1].strip('"')
1578 if document.body[line].startswith('status'):
1579 status = document.body[line].split()[1].strip()
1584 cap = find_token(document.body, '\\begin_inset Caption', i)
1586 cap_end = find_end_of_inset(document.body, cap + 1)
1588 # this should not happen
1591 lbl = find_token(document.body, '\\begin_inset LatexCommand label', cap + 1)
1593 lbl_end = find_end_of_inset(document.body, lbl + 1)
1595 # this should not happen
1600 for line in document.body[lbl : lbl_end + 1]:
1601 if line.startswith('name '):
1602 label = line.split()[1].strip('"')
1604 for line in document.body[cap : lbl ] + document.body[lbl_end + 1 : cap_end + 1]:
1605 if not line.startswith('\\'):
1606 caption += line.strip()
1609 # looking for the oneline code for lstinline
1610 inlinecode = document.body[find_end_of_layout(document.body,
1611 find_token(document.body, '\\begin_layout Standard', i + 1) +1 ) - 1]
1612 if len(caption) > 0:
1613 if len(params) == 0:
1614 params = 'caption={%s}' % caption
1616 params += ',caption={%s}' % caption
1618 if len(params) == 0:
1619 params = 'label={%s}' % label
1621 params += ',label={%s}' % label
1623 params = '[%s]' % params
1624 params = params.replace('\\', '\\backslash\n')
1625 if inline == 'true':
1626 document.body[i:(j+1)] = [r'\begin_inset ERT',
1627 'status %s' % status,
1628 r'\begin_layout Standard',
1632 'lstinline%s{%s}' % (params, inlinecode),
1637 document.body[i: j+1] = [r'\begin_inset ERT',
1638 'status %s' % status,
1640 r'\begin_layout Standard',
1644 r'begin{lstlisting}%s' % params,
1646 ] + document.body[k : j - 1] + \
1648 r'\begin_layout Standard',
1657 def revert_include_listings(document):
1658 r''' Revert lstinputlisting Include option , translate
1659 \begin_inset Include \lstinputlisting{file}[opt]
1669 \begin_layout Standard
1673 lstinputlisting{file}[opt]
1681 i = find_token(document.body, r'\begin_inset Include \lstinputlisting', i)
1685 if not '\\usepackage{listings}' in document.preamble:
1686 document.preamble.append('\\usepackage{listings}')
1687 j = find_end_of_inset(document.body, i + 1)
1689 # this should not happen
1691 # find command line lstinputlisting{file}[options]
1692 cmd, file, option = '', '', ''
1693 if re.match(r'\\(lstinputlisting){([.\w]*)}(.*)', document.body[i].split()[2]):
1694 cmd, file, option = re.match(r'\\(lstinputlisting){([.\w]*)}(.*)', document.body[i].split()[2]).groups()
1695 option = option.replace('\\', '\\backslash\n')
1696 document.body[i : j + 1] = [r'\begin_inset ERT',
1699 r'\begin_layout Standard',
1703 '%s%s{%s}' % (cmd, option, file),
1709 def revert_ext_font_sizes(document):
1710 if document.backend != "latex": return
1711 if not document.textclass.startswith("ext"): return
1713 fontsize = get_value(document.header, '\\paperfontsize', 0)
1714 if fontsize not in ('10', '11', '12'): return
1717 i = find_token(document.header, '\\paperfontsize', 0)
1718 document.header[i] = '\\paperfontsize default'
1720 i = find_token(document.header, '\\options', 0)
1722 i = find_token(document.header, '\\textclass', 0) + 1
1723 document.header[i:i] = ['\\options %s' % fontsize]
1725 document.header[i] += ',%s' % fontsize
1728 def convert_ext_font_sizes(document):
1729 if document.backend != "latex": return
1730 if not document.textclass.startswith("ext"): return
1732 fontsize = get_value(document.header, '\\paperfontsize', 0)
1733 if fontsize != 'default': return
1735 i = find_token(document.header, '\\options', 0)
1738 options = get_value(document.header, '\\options', i)
1740 fontsizes = '10pt', '11pt', '12pt'
1741 for fs in fontsizes:
1742 if options.find(fs) != -1:
1744 else: # this else will only be attained if the for cycle had no match
1747 options = options.split(',')
1748 for j, opt in enumerate(options):
1749 if opt in fontsizes:
1756 k = find_token(document.header, '\\paperfontsize', 0)
1757 document.header[k] = '\\paperfontsize %s' % fontsize
1760 document.header[i] = '\\options %s' % ','.join(options)
1762 del document.header[i]
1764 def revert_separator_layout(document):
1765 r'''Revert --Separator-- to a lyx note
1768 \begin_layout --Separator--
1774 \begin_layout Standard
1775 \begin_inset Note Note
1778 \begin_layout Standard
1791 i = find_token(document.body, r'\begin_layout --Separator--', i)
1794 j = find_end_of_layout(document.body, i + 1)
1796 # this should not happen
1798 document.body[i : j + 1] = [r'\begin_layout Standard',
1799 r'\begin_inset Note Note',
1802 r'\begin_layout Standard',
1803 'Separate Environment',
1807 document.body[ i + 1 : j] + \
1812 def convert_arabic (document):
1813 if document.language == "arabic":
1814 document.language = "arabic_arabtex"
1815 i = find_token(document.header, "\\language", 0)
1817 document.header[i] = "\\language arabic_arabtex"
1819 while i < len(document.body):
1820 h = document.body[i].find("\lang arabic", 0, len(document.body[i]))
1822 # change the language name
1823 document.body[i] = '\lang arabic_arabtex'
1826 def revert_arabic (document):
1827 if document.language == "arabic_arabtex":
1828 document.language = "arabic"
1829 i = find_token(document.header, "\\language", 0)
1831 document.header[i] = "\\language arabic"
1833 while i < len(document.body):
1834 h = document.body[i].find("\lang arabic_arabtex", 0, len(document.body[i]))
1836 # change the language name
1837 document.body[i] = '\lang arabic'
1840 def revert_unicode(document):
1841 '''Transform unicode symbols according to the unicode list.
1842 Preamble flags are not implemented.
1843 Combination characters are currently ignored.
1844 Forced output is currently not enforced'''
1845 pathname = os.path.dirname(sys.argv[0])
1846 fp = open(pathname.strip('lyx2lyx') + 'unicodesymbols','r')
1848 for line in fp.readlines():
1850 line=line.replace('"','') #remove all qoutation marks
1852 # flag1 and flag2 are preamble & flags
1853 # currently NOT impemented
1854 [ucs4,command,flag1,flag2] =line.split(None,3)
1855 spec_chars[unichr(eval(ucs4))] = [command, flag1, flag2]
1859 #Define strings to start and end ERT and math insets
1860 ert_intro='\n\n\\begin_inset ERT\nstatus collapsed\n\\begin_layout Standard\n\\backslash\n'
1861 ert_outro='\n\\end_layout\n\n\\end_inset\n\n'
1862 math_intro='\n\\begin_inset Formula $'
1863 math_outro='$\n\\end_inset\n'
1864 # Find unicode characters and replace them
1865 in_ert = 0 # flag set to 1 if in ERT inset
1866 in_math = 0 # flag set to 1 if in math inset
1867 insets = [] # list of active insets
1868 for i, current_line in enumerate(document.body):
1869 if current_line.find('\\begin_inset') > -1:
1870 # check which inset to start
1871 if current_line.find('\\begin_inset ERT') > -1:
1873 insets.append('ert')
1874 elif current_line.find('\\begin_inset Formula') > -1:
1876 insets.append('math')
1878 insets.append('other')
1879 if current_line.find('\\end_inset') > -1:
1880 # check which inset to end
1882 cur_inset = insets.pop()
1883 if cur_inset == 'ert':
1885 elif cur_inset == 'math':
1888 pass # end of other inset
1890 pass # inset list was empty (for some reason)
1891 current_line=''; # clear to have as container for modified line
1892 for j in range(len(document.body[i])):
1893 if spec_chars.has_key(document.body[i][j]):
1894 flags = spec_chars[document.body[i][j]][1] + spec_chars[document.body[i][j]][2]
1895 if flags.find('combining') > -1:
1898 command = spec_chars[document.body[i][j]][0]; # the command to replace unicode
1899 if command[0:2] == '\\\\':
1900 if command[2:12]=='ensuremath':
1903 command = command.replace('\\\\ensuremath{\\\\', '$\n\\backslash\n')
1904 command = command.replace('}', '$\n')
1906 # add a math inset with the replacement character
1907 command = command.replace('\\\\ensuremath{\\', math_intro)
1908 command = command.replace('}', math_outro)
1910 # we are already in a math inset
1911 command = command.replace('\\\\ensuremath{\\', '')
1912 command = command.replace('}', '')
1915 # avoid putting an ERT in a math; instead put command as text
1916 command = command.replace('\\\\', '\mathrm{')
1917 command = command + '}'
1919 # add an ERT inset with the replacement character
1920 command = command.replace('\\\\', ert_intro)
1921 command = command + ert_outro
1923 command = command.replace('\\\\', '\n\\backslash\n')
1924 current_line = current_line + command
1926 current_line = current_line + document.body[i][j]
1927 document.body[i] = current_line
1934 supported_versions = ["1.5.0","1.5"]
1935 convert = [[246, []],
1936 [247, [convert_font_settings]],
1938 [249, [convert_utf8]],
1941 [252, [convert_commandparams, convert_bibitem]],
1943 [254, [convert_esint]],
1946 [257, [convert_caption]],
1947 [258, [convert_lyxline]],
1948 [259, [convert_accent, normalize_font_whitespace_259]],
1950 [261, [convert_changes]],
1952 [263, [normalize_language_name]],
1953 [264, [convert_cv_textclass]],
1954 [265, [convert_tableborder]],
1960 [271, [convert_ext_font_sizes]],
1963 [274, [normalize_font_whitespace_274]],
1964 [275, [convert_graphics_rotation]],
1965 [276, [convert_arabic]]
1969 [275, [revert_arabic]],
1970 [274, [revert_graphics_rotation]],
1972 [272, [revert_separator_layout]],
1973 [271, [revert_preamble_listings_params, revert_listings_inset, revert_include_listings]],
1974 [270, [revert_ext_font_sizes]],
1975 [269, [revert_beamer_alert, revert_beamer_structure]],
1976 [268, [revert_preamble_listings_params, revert_listings_inset, revert_include_listings]],
1977 [267, [revert_CJK]],
1978 [266, [revert_utf8plain]],
1979 [265, [revert_armenian]],
1980 [264, [revert_tableborder]],
1981 [263, [revert_cv_textclass]],
1982 [262, [revert_language_name]],
1983 [261, [revert_ascii]],
1985 [259, [revert_utf8x]],
1988 [256, [revert_caption]],
1989 [255, [revert_encodings]],
1990 [254, [revert_clearpage, revert_cleardoublepage]],
1991 [253, [revert_esint]],
1992 [252, [revert_nomenclature, revert_printnomenclature]],
1993 [251, [revert_commandparams]],
1994 [250, [revert_cs_label]],
1996 [248, [revert_accent, revert_utf8, revert_unicode]],
1997 [247, [revert_booktabs]],
1998 [246, [revert_font_settings]],
1999 [245, [revert_framed]]]
2002 if __name__ == "__main__":