1 # This file is part of lyx2lyx
2 # -*- coding: utf-8 -*-
3 # Copyright (C) 2006 José Matos <jamatos@lyx.org>
4 # Copyright (C) 2004-2006 Georg Baum <Georg.Baum@post.rwth-aachen.de>
6 # This program is free software; you can redistribute it and/or
7 # modify it under the terms of the GNU General Public License
8 # as published by the Free Software Foundation; either version 2
9 # of the License, or (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
20 """ Convert files to the file format generated by lyx 1.5"""
26 from parser_tools import find_re, find_token, find_token_backwards, find_token_exact, find_tokens, find_end_of, get_value, find_beginning_of, find_nonempty_line
27 from LyX import get_encoding
30 ####################################################################
31 # Private helper functions
33 def find_end_of_inset(lines, i):
34 " Find end of inset, where lines[i] is included."
35 return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
37 def find_end_of_layout(lines, i):
38 " Find end of layout, where lines[i] is included."
39 return find_end_of(lines, i, "\\begin_layout", "\\end_layout")
41 def find_beginning_of_layout(lines, i):
42 "Find beginning of layout, where lines[i] is included."
43 return find_beginning_of(lines, i, "\\begin_layout", "\\end_layout")
45 # End of helper functions
46 ####################################################################
50 # Notes: Framed/Shaded
53 def revert_framed(document):
54 "Revert framed notes. "
57 i = find_tokens(document.body, ["\\begin_inset Note Framed", "\\begin_inset Note Shaded"], i)
61 document.body[i] = "\\begin_inset Note"
69 roman_fonts = {'default' : 'default', 'ae' : 'ae',
70 'times' : 'times', 'palatino' : 'palatino',
71 'helvet' : 'default', 'avant' : 'default',
72 'newcent' : 'newcent', 'bookman' : 'bookman',
74 sans_fonts = {'default' : 'default', 'ae' : 'default',
75 'times' : 'default', 'palatino' : 'default',
76 'helvet' : 'helvet', 'avant' : 'avant',
77 'newcent' : 'default', 'bookman' : 'default',
79 typewriter_fonts = {'default' : 'default', 'ae' : 'default',
80 'times' : 'default', 'palatino' : 'default',
81 'helvet' : 'default', 'avant' : 'default',
82 'newcent' : 'default', 'bookman' : 'default',
83 'pslatex' : 'courier'}
85 def convert_font_settings(document):
86 " Convert font settings. "
88 i = find_token_exact(document.header, "\\fontscheme", i)
90 document.warning("Malformed LyX document: Missing `\\fontscheme'.")
92 font_scheme = get_value(document.header, "\\fontscheme", i, i + 1)
94 document.warning("Malformed LyX document: Empty `\\fontscheme'.")
95 font_scheme = 'default'
96 if not font_scheme in roman_fonts.keys():
97 document.warning("Malformed LyX document: Unknown `\\fontscheme' `%s'." % font_scheme)
98 font_scheme = 'default'
99 document.header[i:i+1] = ['\\font_roman %s' % roman_fonts[font_scheme],
100 '\\font_sans %s' % sans_fonts[font_scheme],
101 '\\font_typewriter %s' % typewriter_fonts[font_scheme],
102 '\\font_default_family default',
105 '\\font_sf_scale 100',
106 '\\font_tt_scale 100']
109 def revert_font_settings(document):
110 " Revert font settings. "
113 fonts = {'roman' : 'default', 'sans' : 'default', 'typewriter' : 'default'}
114 for family in 'roman', 'sans', 'typewriter':
115 name = '\\font_%s' % family
116 i = find_token_exact(document.header, name, i)
118 document.warning("Malformed LyX document: Missing `%s'." % name)
121 if (insert_line < 0):
123 fonts[family] = get_value(document.header, name, i, i + 1)
124 del document.header[i]
125 i = find_token_exact(document.header, '\\font_default_family', i)
127 document.warning("Malformed LyX document: Missing `\\font_default_family'.")
128 font_default_family = 'default'
130 font_default_family = get_value(document.header, "\\font_default_family", i, i + 1)
131 del document.header[i]
132 i = find_token_exact(document.header, '\\font_sc', i)
134 document.warning("Malformed LyX document: Missing `\\font_sc'.")
137 font_sc = get_value(document.header, '\\font_sc', i, i + 1)
138 del document.header[i]
139 if font_sc != 'false':
140 document.warning("Conversion of '\\font_sc' not yet implemented.")
141 i = find_token_exact(document.header, '\\font_osf', i)
143 document.warning("Malformed LyX document: Missing `\\font_osf'.")
146 font_osf = get_value(document.header, '\\font_osf', i, i + 1)
147 del document.header[i]
148 i = find_token_exact(document.header, '\\font_sf_scale', i)
150 document.warning("Malformed LyX document: Missing `\\font_sf_scale'.")
151 font_sf_scale = '100'
153 font_sf_scale = get_value(document.header, '\\font_sf_scale', i, i + 1)
154 del document.header[i]
155 if font_sf_scale != '100':
156 document.warning("Conversion of '\\font_sf_scale' not yet implemented.")
157 i = find_token_exact(document.header, '\\font_tt_scale', i)
159 document.warning("Malformed LyX document: Missing `\\font_tt_scale'.")
160 font_tt_scale = '100'
162 font_tt_scale = get_value(document.header, '\\font_tt_scale', i, i + 1)
163 del document.header[i]
164 if font_tt_scale != '100':
165 document.warning("Conversion of '\\font_tt_scale' not yet implemented.")
166 for font_scheme in roman_fonts.keys():
167 if (roman_fonts[font_scheme] == fonts['roman'] and
168 sans_fonts[font_scheme] == fonts['sans'] and
169 typewriter_fonts[font_scheme] == fonts['typewriter']):
170 document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
171 if font_default_family != 'default':
172 document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
173 if font_osf == 'true':
174 document.warning("Ignoring `\\font_osf = true'")
176 font_scheme = 'default'
177 document.header.insert(insert_line, '\\fontscheme %s' % font_scheme)
178 if fonts['roman'] == 'cmr':
179 document.preamble.append('\\renewcommand{\\rmdefault}{cmr}')
180 if font_osf == 'true':
181 document.preamble.append('\\usepackage{eco}')
183 for font in 'lmodern', 'charter', 'utopia', 'beraserif', 'ccfonts', 'chancery':
184 if fonts['roman'] == font:
185 document.preamble.append('\\usepackage{%s}' % font)
186 for font in 'cmss', 'lmss', 'cmbr':
187 if fonts['sans'] == font:
188 document.preamble.append('\\renewcommand{\\sfdefault}{%s}' % font)
189 for font in 'berasans':
190 if fonts['sans'] == font:
191 document.preamble.append('\\usepackage{%s}' % font)
192 for font in 'cmtt', 'lmtt', 'cmtl':
193 if fonts['typewriter'] == font:
194 document.preamble.append('\\renewcommand{\\ttdefault}{%s}' % font)
195 for font in 'courier', 'beramono', 'luximono':
196 if fonts['typewriter'] == font:
197 document.preamble.append('\\usepackage{%s}' % font)
198 if font_default_family != 'default':
199 document.preamble.append('\\renewcommand{\\familydefault}{\\%s}' % font_default_family)
200 if font_osf == 'true':
201 document.warning("Ignoring `\\font_osf = true'")
204 def revert_booktabs(document):
205 " We remove the booktabs flag or everything else will become a mess. "
206 re_row = re.compile(r'^<row.*space="[^"]+".*>$')
207 re_tspace = re.compile(r'\s+topspace="[^"]+"')
208 re_bspace = re.compile(r'\s+bottomspace="[^"]+"')
209 re_ispace = re.compile(r'\s+interlinespace="[^"]+"')
212 i = find_token(document.body, "\\begin_inset Tabular", i)
215 j = find_end_of_inset(document.body, i + 1)
217 document.warning("Malformed LyX document: Could not find end of tabular.")
219 for k in range(i, j):
220 if re.search('^<features.* booktabs="true".*>$', document.body[k]):
221 document.warning("Converting 'booktabs' table to normal table.")
222 document.body[k] = document.body[k].replace(' booktabs="true"', '')
223 if re.search(re_row, document.body[k]):
224 document.warning("Removing extra row space.")
225 document.body[k] = re_tspace.sub('', document.body[k])
226 document.body[k] = re_bspace.sub('', document.body[k])
227 document.body[k] = re_ispace.sub('', document.body[k])
231 def convert_multiencoding(document, forward):
232 """ Fix files with multiple encodings.
233 Files with an inputencoding of "auto" or "default" and multiple languages
234 where at least two languages have different default encodings are encoded
235 in multiple encodings for file formats < 249. These files are incorrectly
236 read and written (as if the whole file was in the encoding of the main
238 This is not true for files written by CJK-LyX, they are always in the locale
242 - converts from fake unicode values to true unicode if forward is true, and
243 - converts from true unicode values to fake unicode if forward is false.
244 document.encoding must be set to the old value (format 248) in both cases.
246 We do this here and not in LyX.py because it is far easier to do the
247 necessary parsing in modern formats than in ancient ones.
249 inset_types = ["Foot", "Note"]
250 if document.cjk_encoding != '':
252 encoding_stack = [document.encoding]
254 lang_re = re.compile(r"^\\lang\s(\S+)")
255 inset_re = re.compile(r"^\\begin_inset\s(\S+)")
256 if document.inputencoding == "auto" or document.inputencoding == "default":
257 for i in range(len(document.body)):
258 result = lang_re.match(document.body[i])
260 language = result.group(1)
261 if language == "default":
262 document.warning("Resetting encoding from %s to %s." % (encoding_stack[-1], document.encoding), 3)
263 encoding_stack[-1] = document.encoding
265 from lyx2lyx_lang import lang
266 document.warning("Setting encoding from %s to %s." % (encoding_stack[-1], lang[language][3]), 3)
267 encoding_stack[-1] = lang[language][3]
268 elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
269 document.warning("Adding nested encoding %s." % encoding_stack[-1], 3)
270 if len(inset_stack) > 0 and inset_stack[-1] in inset_types:
271 from lyx2lyx_lang import lang
272 encoding_stack.append(lang[document.language][3])
274 encoding_stack.append(encoding_stack[-1])
275 elif find_token(document.body, "\\end_layout", i, i + 1) == i:
276 document.warning("Removing nested encoding %s." % encoding_stack[-1], 3)
277 if len(encoding_stack) == 1:
278 # Don't remove the document encoding from the stack
279 document.warning("Malformed LyX document: Unexpected `\\end_layout'.")
281 del encoding_stack[-1]
282 elif find_token(document.body, "\\begin_inset", i, i + 1) == i:
283 inset_result = inset_re.match(document.body[i])
285 inset_type = inset_result.group(1)
286 inset_stack.append(inset_type)
288 inset_stack.append("")
289 elif find_token(document.body, "\\end_inset", i, i + 1) == i:
291 if encoding_stack[-1] != document.encoding:
293 # This line has been incorrectly interpreted as if it was
294 # encoded in 'encoding'.
295 # Convert back to the 8bit string that was in the file.
296 orig = document.body[i].encode(document.encoding)
297 # Convert the 8bit string that was in the file to unicode
298 # with the correct encoding.
299 document.body[i] = orig.decode(encoding_stack[-1])
301 # Convert unicode to the 8bit string that will be written
302 # to the file with the correct encoding.
303 orig = document.body[i].encode(encoding_stack[-1])
304 # Convert the 8bit string that will be written to the
305 # file to fake unicode with the encoding that will later
306 # be used when writing to the file.
307 document.body[i] = orig.decode(document.encoding)
310 def convert_utf8(document):
311 " Set document encoding to UTF-8. "
312 convert_multiencoding(document, True)
313 document.encoding = "utf8"
316 def revert_utf8(document):
317 " Set document encoding to the value corresponding to inputencoding. "
318 i = find_token(document.header, "\\inputencoding", 0)
320 document.header.append("\\inputencoding auto")
321 elif get_value(document.header, "\\inputencoding", i) == "utf8":
322 document.header[i] = "\\inputencoding auto"
323 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
324 document.encoding = get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)
325 convert_multiencoding(document, False)
328 def revert_cs_label(document):
329 " Remove status flag of charstyle label. "
332 i = find_token(document.body, "\\begin_inset CharStyle", i)
335 # Seach for a line starting 'show_label'
336 # If it is not there, break with a warning message
339 if (document.body[i][:10] == "show_label"):
342 elif (document.body[i][:13] == "\\begin_layout"):
343 document.warning("Malformed LyX document: Missing 'show_label'.")
350 def convert_bibitem(document):
352 \bibitem [option]{argument}
356 \begin_inset LatexCommand bibitem
362 This must be called after convert_commandparams.
366 i = find_token(document.body, "\\bibitem", i)
369 j = document.body[i].find('[') + 1
370 k = document.body[i].rfind(']')
371 if j == 0: # No optional argument found
374 option = document.body[i][j:k]
375 j = document.body[i].rfind('{') + 1
376 k = document.body[i].rfind('}')
377 argument = document.body[i][j:k]
378 lines = ['\\begin_inset LatexCommand bibitem']
380 lines.append('label "%s"' % option.replace('"', '\\"'))
381 lines.append('key "%s"' % argument.replace('"', '\\"'))
383 lines.append('\\end_inset')
384 document.body[i:i+1] = lines
388 commandparams_info = {
389 # command : [option1, option2, argument]
390 "bibitem" : ["label", "", "key"],
391 "bibtex" : ["options", "btprint", "bibfiles"],
392 "cite" : ["after", "before", "key"],
393 "citet" : ["after", "before", "key"],
394 "citep" : ["after", "before", "key"],
395 "citealt" : ["after", "before", "key"],
396 "citealp" : ["after", "before", "key"],
397 "citeauthor" : ["after", "before", "key"],
398 "citeyear" : ["after", "before", "key"],
399 "citeyearpar" : ["after", "before", "key"],
400 "citet*" : ["after", "before", "key"],
401 "citep*" : ["after", "before", "key"],
402 "citealt*" : ["after", "before", "key"],
403 "citealp*" : ["after", "before", "key"],
404 "citeauthor*" : ["after", "before", "key"],
405 "Citet" : ["after", "before", "key"],
406 "Citep" : ["after", "before", "key"],
407 "Citealt" : ["after", "before", "key"],
408 "Citealp" : ["after", "before", "key"],
409 "Citeauthor" : ["after", "before", "key"],
410 "Citet*" : ["after", "before", "key"],
411 "Citep*" : ["after", "before", "key"],
412 "Citealt*" : ["after", "before", "key"],
413 "Citealp*" : ["after", "before", "key"],
414 "Citeauthor*" : ["after", "before", "key"],
415 "citefield" : ["after", "before", "key"],
416 "citetitle" : ["after", "before", "key"],
417 "cite*" : ["after", "before", "key"],
418 "hfill" : ["", "", ""],
419 "index" : ["", "", "name"],
420 "printindex" : ["", "", "name"],
421 "label" : ["", "", "name"],
422 "eqref" : ["name", "", "reference"],
423 "pageref" : ["name", "", "reference"],
424 "prettyref" : ["name", "", "reference"],
425 "ref" : ["name", "", "reference"],
426 "vpageref" : ["name", "", "reference"],
427 "vref" : ["name", "", "reference"],
428 "tableofcontents" : ["", "", "type"],
429 "htmlurl" : ["name", "", "target"],
430 "url" : ["name", "", "target"]}
433 def convert_commandparams(document):
436 \begin_inset LatexCommand \cmdname[opt1][opt2]{arg}
441 \begin_inset LatexCommand cmdname
447 name1, name2 and name3 can be different for each command.
449 # \begin_inset LatexCommand bibitem was not the official version (see
450 # convert_bibitem()), but could be read in, so we convert it here, too.
454 i = find_token(document.body, "\\begin_inset LatexCommand", i)
457 command = document.body[i][26:].strip()
459 document.warning("Malformed LyX document: Missing LatexCommand name.")
463 j = find_token(document.body, "\\end_inset", i + 1)
465 document.warning("Malformed document")
467 command += "".join(document.body[i+1:j])
468 document.body[i+1:j] = []
470 # The following parser is taken from the original InsetCommandParams::scanCommand
476 # Used to handle things like \command[foo[bar]]{foo{bar}}
480 if ((state == "CMDNAME" and c == ' ') or
481 (state == "CMDNAME" and c == '[') or
482 (state == "CMDNAME" and c == '{')):
484 if ((state == "OPTION" and c == ']') or
485 (state == "SECOPTION" and c == ']') or
486 (state == "CONTENT" and c == '}')):
490 nestdepth = nestdepth - 1
491 if ((state == "OPTION" and c == '[') or
492 (state == "SECOPTION" and c == '[') or
493 (state == "CONTENT" and c == '{')):
494 nestdepth = nestdepth + 1
495 if state == "CMDNAME":
497 elif state == "OPTION":
499 elif state == "SECOPTION":
501 elif state == "CONTENT":
506 elif c == '[' and b != ']':
508 nestdepth = 0 # Just to be sure
509 elif c == '[' and b == ']':
511 nestdepth = 0 # Just to be sure
514 nestdepth = 0 # Just to be sure
517 # Now we have parsed the command, output the parameters
518 lines = ["\\begin_inset LatexCommand %s" % name]
520 if commandparams_info[name][0] == "":
521 document.warning("Ignoring invalid option `%s' of command `%s'." % (option1, name))
523 lines.append('%s "%s"' % (commandparams_info[name][0], option1.replace('"', '\\"')))
525 if commandparams_info[name][1] == "":
526 document.warning("Ignoring invalid second option `%s' of command `%s'." % (option2, name))
528 lines.append('%s "%s"' % (commandparams_info[name][1], option2.replace('"', '\\"')))
530 if commandparams_info[name][2] == "":
531 document.warning("Ignoring invalid argument `%s' of command `%s'." % (argument, name))
533 lines.append('%s "%s"' % (commandparams_info[name][2], argument.replace('"', '\\"')))
534 document.body[i:i+1] = lines
538 def revert_commandparams(document):
539 regex = re.compile(r'(\S+)\s+(.+)')
542 i = find_token(document.body, "\\begin_inset LatexCommand", i)
545 name = document.body[i].split()[2]
546 j = find_end_of_inset(document.body, i + 1)
551 for k in range(i + 1, j):
552 match = re.match(regex, document.body[k])
554 pname = match.group(1)
555 pvalue = match.group(2)
556 if pname == "preview":
557 preview_line = document.body[k]
558 elif (commandparams_info[name][0] != "" and
559 pname == commandparams_info[name][0]):
560 option1 = pvalue.strip('"').replace('\\"', '"')
561 elif (commandparams_info[name][1] != "" and
562 pname == commandparams_info[name][1]):
563 option2 = pvalue.strip('"').replace('\\"', '"')
564 elif (commandparams_info[name][2] != "" and
565 pname == commandparams_info[name][2]):
566 argument = pvalue.strip('"').replace('\\"', '"')
567 elif document.body[k].strip() != "":
568 document.warning("Ignoring unknown contents `%s' in command inset %s." % (document.body[k], name))
569 if name == "bibitem":
571 lines = ["\\bibitem {%s}" % argument]
573 lines = ["\\bibitem [%s]{%s}" % (option1, argument)]
577 lines = ["\\begin_inset LatexCommand \\%s{%s}" % (name, argument)]
579 lines = ["\\begin_inset LatexCommand \\%s[][%s]{%s}" % (name, option2, argument)]
582 lines = ["\\begin_inset LatexCommand \\%s[%s]{%s}" % (name, option1, argument)]
584 lines = ["\\begin_inset LatexCommand \\%s[%s][%s]{%s}" % (name, option1, option2, argument)]
585 if name != "bibitem":
586 if preview_line != "":
587 lines.append(preview_line)
589 lines.append('\\end_inset')
590 document.body[i:j+1] = lines
594 def revert_nomenclature(document):
595 " Convert nomenclature entry to ERT. "
596 regex = re.compile(r'(\S+)\s+(.+)')
600 i = find_token(document.body, "\\begin_inset LatexCommand nomenclature", i)
604 j = find_end_of_inset(document.body, i + 1)
609 for k in range(i + 1, j):
610 match = re.match(regex, document.body[k])
612 name = match.group(1)
613 value = match.group(2)
614 if name == "preview":
615 preview_line = document.body[k]
616 elif name == "symbol":
617 symbol = value.strip('"').replace('\\"', '"')
618 elif name == "description":
619 description = value.strip('"').replace('\\"', '"')
620 elif name == "prefix":
621 prefix = value.strip('"').replace('\\"', '"')
622 elif document.body[k].strip() != "":
623 document.warning("Ignoring unknown contents `%s' in nomenclature inset." % document.body[k])
625 command = 'nomenclature{%s}{%s}' % (symbol, description)
627 command = 'nomenclature[%s]{%s}{%s}' % (prefix, symbol, description)
628 document.body[i:j+1] = ['\\begin_inset ERT',
631 '\\begin_layout %s' % document.default_layout,
640 if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
641 document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
642 document.preamble.append('\\makenomenclature')
645 def revert_printnomenclature(document):
646 " Convert printnomenclature to ERT. "
647 regex = re.compile(r'(\S+)\s+(.+)')
651 i = find_token(document.body, "\\begin_inset LatexCommand printnomenclature", i)
655 j = find_end_of_inset(document.body, i + 1)
658 for k in range(i + 1, j):
659 match = re.match(regex, document.body[k])
661 name = match.group(1)
662 value = match.group(2)
663 if name == "preview":
664 preview_line = document.body[k]
665 elif name == "labelwidth":
666 labelwidth = value.strip('"').replace('\\"', '"')
667 elif document.body[k].strip() != "":
668 document.warning("Ignoring unknown contents `%s' in printnomenclature inset." % document.body[k])
670 command = 'nomenclature{}'
672 command = 'nomenclature[%s]' % labelwidth
673 document.body[i:j+1] = ['\\begin_inset ERT',
676 '\\begin_layout %s' % document.default_layout,
685 if use_nomencl and find_token(document.preamble, '\\usepackage{nomencl}[2005/09/22]', 0) == -1:
686 document.preamble.append('\\usepackage{nomencl}[2005/09/22]')
687 document.preamble.append('\\makenomenclature')
690 def convert_esint(document):
691 " Add \\use_esint setting to header. "
692 i = find_token(document.header, "\\cite_engine", 0)
694 document.warning("Malformed LyX document: Missing `\\cite_engine'.")
696 # 0 is off, 1 is auto, 2 is on.
697 document.header.insert(i, '\\use_esint 0')
700 def revert_esint(document):
701 " Remove \\use_esint setting from header. "
702 i = find_token(document.header, "\\use_esint", 0)
704 document.warning("Malformed LyX document: Missing `\\use_esint'.")
706 use_esint = document.header[i].split()[1]
707 del document.header[i]
708 # 0 is off, 1 is auto, 2 is on.
710 document.preamble.append('\\usepackage{esint}')
713 def revert_clearpage(document):
717 i = find_token(document.body, "\\clearpage", i)
720 document.body[i:i+1] = ['\\begin_inset ERT',
723 '\\begin_layout %s' % document.default_layout,
734 def revert_cleardoublepage(document):
735 " cleardoublepage -> ERT "
738 i = find_token(document.body, "\\cleardoublepage", i)
741 document.body[i:i+1] = ['\\begin_inset ERT',
744 '\\begin_layout %s' % document.default_layout,
755 def convert_lyxline(document):
756 " remove fontsize commands for \lyxline "
757 # The problematic is: The old \lyxline definition doesn't handle the fontsize
758 # to change the line thickness. The new definiton does this so that imported
759 # \lyxlines would have a different line thickness. The eventual fontsize command
760 # before \lyxline is therefore removed to get the same output.
761 fontsizes = ["tiny", "scriptsize", "footnotesize", "small", "normalsize",
762 "large", "Large", "LARGE", "huge", "Huge"]
763 for n in range(0, len(fontsizes)):
766 while i < len(document.body):
767 i = find_token(document.body, "\\size " + fontsizes[n], i)
768 k = find_token(document.body, "\\lyxline", i)
769 # the corresponding fontsize command is always 2 lines before the \lyxline
770 if (i != -1 and k == i+2):
771 document.body[i:i+1] = []
777 def revert_encodings(document):
778 " Set new encodings to auto. "
779 encodings = ["8859-6", "8859-8", "cp437", "cp437de", "cp850", "cp852",
780 "cp855", "cp858", "cp862", "cp865", "cp866", "cp1250",
781 "cp1252", "cp1256", "cp1257", "latin10", "pt254", "tis620-0"]
782 i = find_token(document.header, "\\inputencoding", 0)
784 document.header.append("\\inputencoding auto")
786 inputenc = get_value(document.header, "\\inputencoding", i)
787 if inputenc in encodings:
788 document.header[i] = "\\inputencoding auto"
789 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
792 def convert_caption(document):
793 " Convert caption layouts to caption insets. "
796 i = find_token(document.body, "\\begin_layout Caption", i)
799 j = find_end_of_layout(document.body, i)
801 document.warning("Malformed LyX document: Missing `\\end_layout'.")
804 document.body[j:j] = ["\\end_layout", "", "\\end_inset", "", ""]
805 document.body[i:i+1] = ["\\begin_layout %s" % document.default_layout,
806 "\\begin_inset Caption", "",
807 "\\begin_layout %s" % document.default_layout]
811 def revert_caption(document):
812 " Convert caption insets to caption layouts. "
813 " This assumes that the text class has a caption style. "
816 i = find_token(document.body, "\\begin_inset Caption", i)
820 # We either need to delete the previous \begin_layout line, or we
821 # need to end the previous layout if this inset is not in the first
822 # position of the paragraph.
823 layout_before = find_token_backwards(document.body, "\\begin_layout", i)
824 if layout_before == -1:
825 document.warning("Malformed LyX document: Missing `\\begin_layout'.")
827 layout_line = document.body[layout_before]
828 del_layout_before = True
829 l = layout_before + 1
831 if document.body[l] != "":
832 del_layout_before = False
835 if del_layout_before:
836 del document.body[layout_before:i]
839 document.body[i:i] = ["\\end_layout", ""]
842 # Find start of layout in the inset and end of inset
843 j = find_token(document.body, "\\begin_layout", i)
845 document.warning("Malformed LyX document: Missing `\\begin_layout'.")
847 k = find_end_of_inset(document.body, i)
849 document.warning("Malformed LyX document: Missing `\\end_inset'.")
852 # We either need to delete the following \end_layout line, or we need
853 # to restart the old layout if this inset is not at the paragraph end.
854 layout_after = find_token(document.body, "\\end_layout", k)
855 if layout_after == -1:
856 document.warning("Malformed LyX document: Missing `\\end_layout'.")
858 del_layout_after = True
860 while l < layout_after:
861 if document.body[l] != "":
862 del_layout_after = False
866 del document.body[k+1:layout_after+1]
868 document.body[k+1:k+1] = [layout_line, ""]
870 # delete \begin_layout and \end_inset and replace \begin_inset with
871 # "\begin_layout Caption". This works because we can only have one
872 # paragraph in the caption inset: The old \end_layout will be recycled.
874 if document.body[k] == "":
877 if document.body[j] == "":
879 document.body[i] = "\\begin_layout Caption"
880 if document.body[i+1] == "":
881 del document.body[i+1]
885 # Accents of InsetLaTeXAccent
887 "`" : u'\u0300', # grave
888 "'" : u'\u0301', # acute
889 "^" : u'\u0302', # circumflex
890 "~" : u'\u0303', # tilde
891 "=" : u'\u0304', # macron
892 "u" : u'\u0306', # breve
893 "." : u'\u0307', # dot above
894 "\"": u'\u0308', # diaeresis
895 "r" : u'\u030a', # ring above
896 "H" : u'\u030b', # double acute
897 "v" : u'\u030c', # caron
898 "b" : u'\u0320', # minus sign below
899 "d" : u'\u0323', # dot below
900 "c" : u'\u0327', # cedilla
901 "k" : u'\u0328', # ogonek
902 "t" : u'\u0361' # tie. This is special: It spans two characters, but
903 # only one is given as argument, so we don't need to
904 # treat it differently.
908 # special accents of InsetLaTeXAccent without argument
909 special_accent_map = {
910 'i' : u'\u0131', # dotless i
911 'j' : u'\u0237', # dotless j
912 'l' : u'\u0142', # l with stroke
913 'L' : u'\u0141' # L with stroke
917 # special accent arguments of InsetLaTeXAccent
919 '\\i' : u'\u0131', # dotless i
920 '\\j' : u'\u0237' # dotless j
924 def _convert_accent(accent, accented_char):
928 if type in special_accent_map:
929 return special_accent_map[type]
930 # a missing char is treated as space by LyX
932 elif type == 'q' and char in ['t', 'd', 'l', 'L']:
933 # Special caron, only used with t, d, l and L.
934 # It is not in the map because we convert it to the same unicode
935 # character as the normal caron: \q{} is only defined if babel with
936 # the czech or slovak language is used, and the normal caron
937 # produces the correct output if the T1 font encoding is used.
938 # For the same reason we never convert to \q{} in the other direction.
940 elif char in accented_map:
941 char = accented_map[char]
942 elif (len(char) > 1):
943 # We can only convert accents on a single char
945 a = accent_map.get(type)
947 return unicodedata.normalize("NFC", "%s%s" % (char, a))
951 def convert_ertbackslash(body, i, ert, default_layout):
952 r""" -------------------------------------------------------------------------------------------
953 Convert backslashes and '\n' into valid ERT code, append the converted
954 text to body[i] and return the (maybe incremented) line index i"""
958 body[i] = body[i] + '\\backslash '
962 body[i+1:i+1] = ['\\end_layout', '', '\\begin_layout %s' % default_layout, '']
965 body[i] = body[i] + c
969 def convert_accent(document):
970 # The following forms are supported by LyX:
971 # '\i \"{a}' (standard form, as written by LyX)
972 # '\i \"{}' (standard form, as written by LyX if the accented char is a space)
973 # '\i \"{ }' (also accepted if the accented char is a space)
974 # '\i \" a' (also accepted)
975 # '\i \"' (also accepted)
976 re_wholeinset = re.compile(r'^(.*)(\\i\s+)(.*)$')
977 re_contents = re.compile(r'^([^\s{]+)(.*)$')
978 re_accentedcontents = re.compile(r'^\s*{?([^{}]*)}?\s*$')
981 i = find_re(document.body, re_wholeinset, i)
984 match = re_wholeinset.match(document.body[i])
985 prefix = match.group(1)
986 contents = match.group(3).strip()
987 match = re_contents.match(contents)
989 # Strip first char (always \)
990 accent = match.group(1)[1:]
991 accented_contents = match.group(2).strip()
992 match = re_accentedcontents.match(accented_contents)
993 accented_char = match.group(1)
994 converted = _convert_accent(accent, accented_char)
997 contents = '%s{%s}' % (accent, accented_char),
999 document.body[i] = '%s%s' % (prefix, converted)
1002 document.warning("Converting unknown InsetLaTeXAccent `\\i %s' to ERT." % contents)
1003 document.body[i] = prefix
1004 document.body[i+1:i+1] = ['\\begin_inset ERT',
1007 '\\begin_layout %s' % document.default_layout,
1011 i = convert_ertbackslash(document.body, i + 7,
1013 document.default_layout)
1014 document.body[i+1:i+1] = ['\\end_layout',
1020 def revert_accent(document):
1021 inverse_accent_map = {}
1022 for k in accent_map:
1023 inverse_accent_map[accent_map[k]] = k
1024 inverse_special_accent_map = {}
1025 for k in special_accent_map:
1026 inverse_special_accent_map[special_accent_map[k]] = k
1027 inverse_accented_map = {}
1028 for k in accented_map:
1029 inverse_accented_map[accented_map[k]] = k
1031 # Since LyX may insert a line break within a word we must combine all
1032 # words before unicode normalization.
1033 # We do this only if the next line starts with an accent, otherwise we
1034 # would create things like '\begin_inset ERTstatus'.
1035 numberoflines = len(document.body)
1036 for i in range(numberoflines-1):
1037 if document.body[i] == '' or document.body[i+1] == '' or document.body[i][-1] == ' ':
1039 if (document.body[i+1][0] in inverse_accent_map):
1040 # the last character of this line and the first of the next line
1041 # form probably a surrogate pair.
1042 while (len(document.body[i+1]) > 0 and document.body[i+1][0] != ' '):
1043 document.body[i] += document.body[i+1][0]
1044 document.body[i+1] = document.body[i+1][1:]
1046 # Normalize to "Normal form D" (NFD, also known as canonical decomposition).
1047 # This is needed to catch all accented characters.
1048 for i in range(numberoflines):
1049 # Unfortunately we have a mixture of unicode strings and plain strings,
1050 # because we never use u'xxx' for string literals, but 'xxx'.
1051 # Therefore we may have to try two times to normalize the data.
1053 document.body[i] = unicodedata.normalize("NFD", document.body[i])
1055 document.body[i] = unicodedata.normalize("NFD", unicode(document.body[i], 'utf-8'))
1057 # Replace accented characters with InsetLaTeXAccent
1058 # Do not convert characters that can be represented in the chosen
1060 encoding_stack = [get_encoding(document.language, document.inputencoding, 248, document.cjk_encoding)]
1061 lang_re = re.compile(r"^\\lang\s(\S+)")
1063 while i < len(document.body):
1065 if (document.inputencoding == "auto" or document.inputencoding == "default") and document.cjk_encoding != '':
1066 # Track the encoding of the current line
1067 result = lang_re.match(document.body[i])
1069 language = result.group(1)
1070 if language == "default":
1071 encoding_stack[-1] = document.encoding
1073 from lyx2lyx_lang import lang
1074 encoding_stack[-1] = lang[language][3]
1076 elif find_token(document.body, "\\begin_layout", i, i + 1) == i:
1077 encoding_stack.append(encoding_stack[-1])
1079 elif find_token(document.body, "\\end_layout", i, i + 1) == i:
1080 del encoding_stack[-1]
1083 for j in range(len(document.body[i])):
1084 # dotless i and dotless j are both in special_accent_map and can
1085 # occur as an accented character, so we need to test that the
1086 # following character is no accent
1087 if (document.body[i][j] in inverse_special_accent_map and
1088 (j == len(document.body[i]) - 1 or document.body[i][j+1] not in inverse_accent_map)):
1089 accent = document.body[i][j]
1091 dummy = accent.encode(encoding_stack[-1])
1092 except UnicodeEncodeError:
1093 # Insert the rest of the line as new line
1094 if j < len(document.body[i]) - 1:
1095 document.body.insert(i+1, document.body[i][j+1:])
1096 # Delete the accented character
1098 document.body[i] = document.body[i][:j-1]
1100 document.body[i] = u''
1101 # Finally add the InsetLaTeXAccent
1102 document.body[i] += "\\i \\%s{}" % inverse_special_accent_map[accent]
1104 elif j > 0 and document.body[i][j] in inverse_accent_map:
1105 accented_char = document.body[i][j-1]
1106 if accented_char == ' ':
1107 # Conform to LyX output
1109 elif accented_char in inverse_accented_map:
1110 accented_char = inverse_accented_map[accented_char]
1111 accent = document.body[i][j]
1113 dummy = unicodedata.normalize("NFC", accented_char + accent).encode(encoding_stack[-1])
1114 except UnicodeEncodeError:
1115 # Insert the rest of the line as new line
1116 if j < len(document.body[i]) - 1:
1117 document.body.insert(i+1, document.body[i][j+1:])
1118 # Delete the accented characters
1120 document.body[i] = document.body[i][:j-2]
1122 document.body[i] = u''
1123 # Finally add the InsetLaTeXAccent
1124 document.body[i] += "\\i \\%s{%s}" % (inverse_accent_map[accent], accented_char)
1128 # Normalize to "Normal form C" (NFC, pre-composed characters) again
1129 for i in range(numberoflines):
1130 document.body[i] = unicodedata.normalize("NFC", document.body[i])
1133 def normalize_font_whitespace_259(document):
1134 """ Before format 259 the font changes were ignored if a
1135 whitespace was the first or last character in the sequence, this function
1136 transfers the whitespace outside."""
1138 char_properties = {"\\series": "default",
1139 "\\emph": "default",
1141 "\\shape": "default",
1143 "\\family": "default"}
1144 return normalize_font_whitespace(document, char_properties)
1146 def normalize_font_whitespace_274(document):
1147 """ Before format 259 (sic) the font changes were ignored if a
1148 whitespace was the first or last character in the sequence. This was
1149 corrected for most font properties in format 259, but the language
1150 was forgotten then. This function applies the same conversion done
1151 there (namely, transfers the whitespace outside) for font language
1152 changes, as well."""
1154 char_properties = {"\\lang": "default"}
1155 return normalize_font_whitespace(document, char_properties)
1157 def get_paragraph_language(document, i):
1158 """ Return the language of the paragraph in which line i of the document
1159 body is. If the first thing in the paragraph is a \\lang command, that
1160 is the paragraph's langauge; otherwise, the paragraph's language is the
1161 document's language."""
1163 lines = document.body
1165 first_nonempty_line = \
1166 find_nonempty_line(lines, find_beginning_of_layout(lines, i) + 1)
1168 words = lines[first_nonempty_line].split()
1170 if len(words) > 1 and words[0] == "\\lang":
1173 return document.language
1175 def normalize_font_whitespace(document, char_properties):
1176 """ Before format 259 the font changes were ignored if a
1177 whitespace was the first or last character in the sequence, this function
1178 transfers the whitespace outside. Only a change in one of the properties
1179 in the provided char_properties is handled by this function."""
1181 if document.backend != "latex":
1184 lines = document.body
1189 while i < len(lines):
1190 words = lines[i].split()
1192 if len(words) > 0 and words[0] == "\\begin_layout":
1193 # a new paragraph resets all font changes
1195 # also reset the default language to be the paragraph's language
1196 if "\\lang" in char_properties.keys():
1197 char_properties["\\lang"] = \
1198 get_paragraph_language(document, i + 1)
1200 elif len(words) > 1 and words[0] in char_properties.keys():
1201 # we have a font change
1202 if char_properties[words[0]] == words[1]:
1203 # property gets reset
1204 if words[0] in changes.keys():
1205 del changes[words[0]]
1206 defaultproperty = True
1209 changes[words[0]] = words[1]
1210 defaultproperty = False
1212 # We need to explicitly reset all changed properties if we find
1213 # a space below, because LyX 1.4 would output the space after
1214 # closing the previous change and before starting the new one,
1215 # and closing a font change means to close all properties, not
1216 # just the changed one.
1218 if lines[i-1] and lines[i-1][-1] == " ":
1219 lines[i-1] = lines[i-1][:-1]
1220 # a space before the font change
1222 for k in changes.keys():
1223 # exclude property k because that is already in lines[i]
1225 added_lines[1:1] = ["%s %s" % (k, changes[k])]
1226 for k in changes.keys():
1227 # exclude property k because that must be added below anyway
1229 added_lines[0:0] = ["%s %s" % (k, char_properties[k])]
1231 # Property is reset in lines[i], so add the new stuff afterwards
1232 lines[i+1:i+1] = added_lines
1234 # Reset property for the space
1235 added_lines[0:0] = ["%s %s" % (words[0], char_properties[words[0]])]
1236 lines[i:i] = added_lines
1237 i = i + len(added_lines)
1239 elif lines[i+1] and lines[i+1][0] == " " and (len(changes) > 0 or not defaultproperty):
1240 # a space after the font change
1241 if (lines[i+1] == " " and lines[i+2]):
1242 next_words = lines[i+2].split()
1243 if len(next_words) > 0 and next_words[0] == words[0]:
1244 # a single blank with a property different from the
1245 # previous and the next line must not be changed
1248 lines[i+1] = lines[i+1][1:]
1250 for k in changes.keys():
1251 # exclude property k because that is already in lines[i]
1253 added_lines[1:1] = ["%s %s" % (k, changes[k])]
1254 for k in changes.keys():
1255 # exclude property k because that must be added below anyway
1257 added_lines[0:0] = ["%s %s" % (k, char_properties[k])]
1258 # Reset property for the space
1259 added_lines[0:0] = ["%s %s" % (words[0], char_properties[words[0]])]
1260 lines[i:i] = added_lines
1261 i = i + len(added_lines)
1266 def revert_utf8x(document):
1267 " Set utf8x encoding to utf8. "
1268 i = find_token(document.header, "\\inputencoding", 0)
1270 document.header.append("\\inputencoding auto")
1272 inputenc = get_value(document.header, "\\inputencoding", i)
1273 if inputenc == "utf8x":
1274 document.header[i] = "\\inputencoding utf8"
1275 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1278 def revert_utf8plain(document):
1279 " Set utf8plain encoding to utf8. "
1280 i = find_token(document.header, "\\inputencoding", 0)
1282 document.header.append("\\inputencoding auto")
1284 inputenc = get_value(document.header, "\\inputencoding", i)
1285 if inputenc == "utf8-plain":
1286 document.header[i] = "\\inputencoding utf8"
1287 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1290 def revert_beamer_alert(document):
1291 " Revert beamer's \\alert inset back to ERT. "
1294 i = find_token(document.body, "\\begin_inset CharStyle Alert", i)
1297 document.body[i] = "\\begin_inset ERT"
1300 if (document.body[i][:13] == "\\begin_layout"):
1301 # Insert the \alert command
1302 document.body[i + 1] = "\\alert{" + document.body[i + 1] + '}'
1309 def revert_beamer_structure(document):
1310 " Revert beamer's \\structure inset back to ERT. "
1313 i = find_token(document.body, "\\begin_inset CharStyle Structure", i)
1316 document.body[i] = "\\begin_inset ERT"
1319 if (document.body[i][:13] == "\\begin_layout"):
1320 document.body[i + 1] = "\\structure{" + document.body[i + 1] + '}'
1327 def convert_changes(document):
1328 " Switch output_changes off if tracking_changes is off. "
1329 i = find_token(document.header, '\\tracking_changes', 0)
1331 document.warning("Malformed lyx document: Missing '\\tracking_changes'.")
1333 j = find_token(document.header, '\\output_changes', 0)
1335 document.warning("Malformed lyx document: Missing '\\output_changes'.")
1337 tracking_changes = get_value(document.header, "\\tracking_changes", i)
1338 output_changes = get_value(document.header, "\\output_changes", j)
1339 if tracking_changes == "false" and output_changes == "true":
1340 document.header[j] = "\\output_changes false"
1343 def revert_ascii(document):
1344 " Set ascii encoding to auto. "
1345 i = find_token(document.header, "\\inputencoding", 0)
1347 document.header.append("\\inputencoding auto")
1349 inputenc = get_value(document.header, "\\inputencoding", i)
1350 if inputenc == "ascii":
1351 document.header[i] = "\\inputencoding auto"
1352 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1355 def normalize_language_name(document):
1356 lang = { "brazil": "brazilian",
1357 "portuges": "portuguese"}
1359 if document.language in lang:
1360 document.language = lang[document.language]
1361 i = find_token(document.header, "\\language", 0)
1362 document.header[i] = "\\language %s" % document.language
1365 def revert_language_name(document):
1366 lang = { "brazilian": "brazil",
1367 "portuguese": "portuges"}
1369 if document.language in lang:
1370 document.language = lang[document.language]
1371 i = find_token(document.header, "\\language", 0)
1372 document.header[i] = "\\language %s" % document.language
1375 # \textclass cv -> \textclass simplecv
1376 def convert_cv_textclass(document):
1377 if document.textclass == "cv":
1378 document.textclass = "simplecv"
1381 def revert_cv_textclass(document):
1382 if document.textclass == "simplecv":
1383 document.textclass = "cv"
1387 # add scaleBeforeRotation graphics param
1388 def convert_graphics_rotation(document):
1389 " add scaleBeforeRotation graphics parameter. "
1392 i = find_token(document.body, "\\begin_inset Graphics", i)
1395 j = find_end_of_inset(document.body, i+1)
1398 document.warning("Malformed LyX document: Could not find end of graphics inset.")
1399 # Seach for rotateAngle and width or height or scale
1400 # If these params are not there, nothing needs to be done.
1401 k = find_token(document.body, "\trotateAngle", i + 1, j)
1402 l = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j)
1403 if (k != -1 and l != -1):
1404 document.body.insert(j, 'scaleBeforeRotation')
1409 # remove scaleBeforeRotation graphics param
1410 def revert_graphics_rotation(document):
1411 " remove scaleBeforeRotation graphics parameter. "
1414 i = find_token(document.body, "\\begin_inset Graphics", i)
1417 j = find_end_of_inset(document.body, i + 1)
1420 document.warning("Malformed LyX document: Could not find end of graphics inset.")
1421 # If there's a scaleBeforeRotation param, just remove that
1422 k = find_token(document.body, "\tscaleBeforeRotation", i + 1, j)
1424 del document.body[k]
1426 # if not, and if we have rotateAngle and width or height or scale,
1427 # we have to put the rotateAngle value to special
1428 rotateAngle = get_value(document.body, 'rotateAngle', i + 1, j)
1429 special = get_value(document.body, 'special', i + 1, j)
1430 if rotateAngle != "":
1431 k = find_tokens(document.body, ["\twidth", "\theight", "\tscale"], i + 1, j)
1435 document.body.insert(j-1, '\tspecial angle=%s' % rotateAngle)
1437 l = find_token(document.body, "\tspecial", i + 1, j)
1438 document.body[l] = document.body[l].replace(special, 'angle=%s,%s' % (rotateAngle, special))
1439 k = find_token(document.body, "\trotateAngle", i + 1, j)
1441 del document.body[k]
1446 def convert_tableborder(document):
1447 # The problematic is: LyX double the table cell border as it ignores the "|" character in
1448 # the cell arguments. A fix takes care of this and therefore the "|" has to be removed
1450 while i < len(document.body):
1451 h = document.body[i].find("leftline=\"true\"", 0, len(document.body[i]))
1452 k = document.body[i].find("|>{", 0, len(document.body[i]))
1453 # the two tokens have to be in one line
1454 if (h != -1 and k != -1):
1456 document.body[i] = document.body[i][:k] + document.body[i][k+1:len(document.body[i])-1]
1460 def revert_tableborder(document):
1462 while i < len(document.body):
1463 h = document.body[i].find("leftline=\"true\"", 0, len(document.body[i]))
1464 k = document.body[i].find(">{", 0, len(document.body[i]))
1465 # the two tokens have to be in one line
1466 if (h != -1 and k != -1):
1468 document.body[i] = document.body[i][:k] + '|' + document.body[i][k:]
1472 def revert_armenian(document):
1474 # set inputencoding from armscii8 to auto
1475 if document.inputencoding == "armscii8":
1476 i = find_token(document.header, "\\inputencoding", 0)
1478 document.header[i] = "\\inputencoding auto"
1479 # check if preamble exists, if not k is set to -1
1482 while i < len(document.preamble):
1484 k = document.preamble[i].find("\\", 0, len(document.preamble[i]))
1486 k = document.preamble[i].find("%", 0, len(document.preamble[i]))
1488 # add the entry \usepackage{armtex} to the document preamble
1489 if document.language == "armenian":
1490 # set the armtex entry as the first preamble line
1492 document.preamble[0:0] = ["\\usepackage{armtex}"]
1493 # create the preamble when it doesn't exist
1495 document.preamble.append('\\usepackage{armtex}')
1496 # Set document language from armenian to english
1497 if document.language == "armenian":
1498 document.language = "english"
1499 i = find_token(document.header, "\\language", 0)
1501 document.header[i] = "\\language english"
1504 def revert_CJK(document):
1505 " Set CJK encodings to default and languages chinese, japanese and korean to english. "
1506 encodings = ["Bg5", "Bg5+", "GB", "GBt", "GBK", "JIS",
1507 "KS", "SJIS", "UTF8", "EUC-TW", "EUC-JP"]
1508 i = find_token(document.header, "\\inputencoding", 0)
1510 document.header.append("\\inputencoding auto")
1512 inputenc = get_value(document.header, "\\inputencoding", i)
1513 if inputenc in encodings:
1514 document.header[i] = "\\inputencoding default"
1515 document.inputencoding = get_value(document.header, "\\inputencoding", 0)
1517 if document.language == "chinese-simplified" or \
1518 document.language == "chinese-traditional" or \
1519 document.language == "japanese" or document.language == "korean":
1520 document.language = "english"
1521 i = find_token(document.header, "\\language", 0)
1523 document.header[i] = "\\language english"
1526 def revert_preamble_listings_params(document):
1527 " Revert preamble option \listings_params "
1528 i = find_token(document.header, "\\listings_params", 0)
1530 document.preamble.append('\\usepackage{listings}')
1531 document.preamble.append('\\lstset{%s}' % document.header[i].split()[1].strip('"'))
1532 document.header.pop(i);
1535 def revert_listings_inset(document):
1536 r''' Revert listings inset to \lstinline or \begin, \end lstlisting, translate
1540 lstparams "language=Delphi"
1544 \begin_layout Standard
1554 \begin_layout Standard
1558 lstinline[language=Delphi]{var i = 10;}
1563 There can be an caption inset in this inset
1565 \begin_layout Standard
1566 \begin_inset Caption
1568 \begin_layout Standard
1570 \begin_inset LatexCommand label
1586 i = find_token(document.body, '\\begin_inset listings', i)
1590 if not '\\usepackage{listings}' in document.preamble:
1591 document.preamble.append('\\usepackage{listings}')
1592 j = find_end_of_inset(document.body, i + 1)
1594 # this should not happen
1600 for line in range(i + 1, i + 4):
1601 if document.body[line].startswith('inline'):
1602 inline = document.body[line].split()[1]
1603 if document.body[line].startswith('lstparams'):
1604 params = document.body[line].split()[1].strip('"')
1605 if document.body[line].startswith('status'):
1606 status = document.body[line].split()[1].strip()
1611 cap = find_token(document.body, '\\begin_inset Caption', i)
1613 cap_end = find_end_of_inset(document.body, cap + 1)
1615 # this should not happen
1618 lbl = find_token(document.body, '\\begin_inset LatexCommand label', cap + 1)
1620 lbl_end = find_end_of_inset(document.body, lbl + 1)
1622 # this should not happen
1627 for line in document.body[lbl : lbl_end + 1]:
1628 if line.startswith('name '):
1629 label = line.split()[1].strip('"')
1631 for line in document.body[cap : lbl ] + document.body[lbl_end + 1 : cap_end + 1]:
1632 if not line.startswith('\\'):
1633 caption += line.strip()
1636 # looking for the oneline code for lstinline
1637 inlinecode = document.body[find_end_of_layout(document.body,
1638 find_token(document.body, '\\begin_layout %s' % document.default_layout, i + 1) +1 ) - 1]
1639 if len(caption) > 0:
1640 if len(params) == 0:
1641 params = 'caption={%s}' % caption
1643 params += ',caption={%s}' % caption
1645 if len(params) == 0:
1646 params = 'label={%s}' % label
1648 params += ',label={%s}' % label
1650 params = '[%s]' % params
1651 params = params.replace('\\', '\\backslash\n')
1652 if inline == 'true':
1653 document.body[i:(j+1)] = [r'\begin_inset ERT',
1654 'status %s' % status,
1655 r'\begin_layout %s' % document.default_layout,
1659 'lstinline%s{%s}' % (params, inlinecode),
1664 document.body[i: j+1] = [r'\begin_inset ERT',
1665 'status %s' % status,
1667 r'\begin_layout %s' % document.default_layout,
1671 r'begin{lstlisting}%s' % params,
1673 ] + document.body[k : j - 1] + \
1675 r'\begin_layout %s' % document.default_layout,
1684 def revert_include_listings(document):
1685 r''' Revert lstinputlisting Include option , translate
1686 \begin_inset Include \lstinputlisting{file}[opt]
1696 \begin_layout Standard
1700 lstinputlisting{file}[opt]
1708 i = find_token(document.body, r'\begin_inset Include \lstinputlisting', i)
1712 if not '\\usepackage{listings}' in document.preamble:
1713 document.preamble.append('\\usepackage{listings}')
1714 j = find_end_of_inset(document.body, i + 1)
1716 # this should not happen
1718 # find command line lstinputlisting{file}[options]
1719 cmd, file, option = '', '', ''
1720 if re.match(r'\\(lstinputlisting){([.\w]*)}(.*)', document.body[i].split()[2]):
1721 cmd, file, option = re.match(r'\\(lstinputlisting){([.\w]*)}(.*)', document.body[i].split()[2]).groups()
1722 option = option.replace('\\', '\\backslash\n')
1723 document.body[i : j + 1] = [r'\begin_inset ERT',
1726 r'\begin_layout %s' % document.default_layout,
1730 '%s%s{%s}' % (cmd, option, file),
1736 def revert_ext_font_sizes(document):
1737 if document.backend != "latex": return
1738 if not document.textclass.startswith("ext"): return
1740 fontsize = get_value(document.header, '\\paperfontsize', 0)
1741 if fontsize not in ('10', '11', '12'): return
1744 i = find_token(document.header, '\\paperfontsize', 0)
1745 document.header[i] = '\\paperfontsize default'
1747 i = find_token(document.header, '\\options', 0)
1749 i = find_token(document.header, '\\textclass', 0) + 1
1750 document.header[i:i] = ['\\options %s' % fontsize]
1752 document.header[i] += ',%s' % fontsize
1755 def convert_ext_font_sizes(document):
1756 if document.backend != "latex": return
1757 if not document.textclass.startswith("ext"): return
1759 fontsize = get_value(document.header, '\\paperfontsize', 0)
1760 if fontsize != 'default': return
1762 i = find_token(document.header, '\\options', 0)
1765 options = get_value(document.header, '\\options', i)
1767 fontsizes = '10pt', '11pt', '12pt'
1768 for fs in fontsizes:
1769 if options.find(fs) != -1:
1771 else: # this else will only be attained if the for cycle had no match
1774 options = options.split(',')
1775 for j, opt in enumerate(options):
1776 if opt in fontsizes:
1783 k = find_token(document.header, '\\paperfontsize', 0)
1784 document.header[k] = '\\paperfontsize %s' % fontsize
1787 document.header[i] = '\\options %s' % ','.join(options)
1789 del document.header[i]
1792 def revert_separator_layout(document):
1793 r'''Revert --Separator-- to a lyx note
1796 \begin_layout --Separator--
1802 \begin_layout Standard
1803 \begin_inset Note Note
1806 \begin_layout Standard
1819 i = find_token(document.body, r'\begin_layout --Separator--', i)
1822 j = find_end_of_layout(document.body, i + 1)
1824 # this should not happen
1826 document.body[i : j + 1] = [r'\begin_layout %s' % document.default_layout,
1827 r'\begin_inset Note Note',
1830 r'\begin_layout %s' % document.default_layout,
1831 'Separate Environment',
1835 document.body[ i + 1 : j] + \
1841 def convert_arabic (document):
1842 if document.language == "arabic":
1843 document.language = "arabic_arabtex"
1844 i = find_token(document.header, "\\language", 0)
1846 document.header[i] = "\\language arabic_arabtex"
1848 while i < len(document.body):
1849 h = document.body[i].find("\lang arabic", 0, len(document.body[i]))
1851 # change the language name
1852 document.body[i] = '\lang arabic_arabtex'
1856 def revert_arabic (document):
1857 if document.language == "arabic_arabtex":
1858 document.language = "arabic"
1859 i = find_token(document.header, "\\language", 0)
1861 document.header[i] = "\\language arabic"
1863 while i < len(document.body):
1864 h = document.body[i].find("\lang arabic_arabtex", 0, len(document.body[i]))
1866 # change the language name
1867 document.body[i] = '\lang arabic'
1871 def read_unicodesymbols():
1872 " Read the unicodesymbols list of unicode characters and corresponding commands."
1873 pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
1874 fp = open(os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols'))
1876 for line in fp.readlines():
1878 line=line.replace(' "',' ') # remove all quotation marks with spaces before
1879 line=line.replace('" ',' ') # remove all quotation marks with spaces after
1880 line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis)
1882 # flag1 and flag2 are preamble and other flags
1883 [ucs4,command,flag1,flag2] =line.split(None,3)
1884 spec_chars[unichr(eval(ucs4))] = [command, flag1, flag2]
1892 def revert_unicode(document):
1893 '''Transform unicode characters that can not be written using the
1894 document encoding to commands according to the unicodesymbols
1895 file. Characters that can not be replaced by commands are replaced by
1896 an replacement string. Flags other than 'combined' are currently not
1899 replacement_character = '???'
1900 spec_chars = read_unicodesymbols()
1902 # Define strings to start and end ERT and math insets
1903 ert_intro='\n\n\\begin_inset ERT\nstatus collapsed\n\\begin_layout %s\n\\backslash\n' % document.default_layout
1904 ert_outro='\n\\end_layout\n\n\\end_inset\n'
1905 math_intro='\n\\begin_inset Formula $'
1906 math_outro='$\n\\end_inset'
1907 # Find unicode characters and replace them
1908 in_ert = False # flag set to 1 if in ERT inset
1909 in_math = False # flag set to 1 if in math inset
1910 insets = [] # list of active insets
1912 # Go through the file to capture all combining characters
1913 last_char = '' # to store the previous character
1916 while i < len(document.body):
1917 line = document.body[i]
1919 if line.find('\\begin_inset') > -1:
1920 # check which inset to start
1921 if line.find('\\begin_inset ERT') > -1:
1923 insets.append('ert')
1924 elif line.find('\\begin_inset Formula') > -1:
1926 insets.append('math')
1928 insets.append('other')
1929 if line.find('\\end_inset') > -1:
1930 # check which inset to end
1932 cur_inset = insets.pop()
1933 if cur_inset == 'ert':
1935 elif cur_inset == 'math':
1938 pass # end of other inset
1940 pass # inset list was empty (for some reason)
1942 # Try to write the line
1944 # If all goes well the line is written here
1945 dummy = line.encode(document.encoding)
1946 last_char = line[-1]
1949 # Error, some character(s) in the line need to be replaced
1951 for character in line:
1953 # Try to write the character
1954 dummy = character.encode(document.encoding)
1955 mod_line += character
1956 last_char = character
1958 # Try to replace with ERT/math inset
1959 if spec_chars.has_key(character):
1960 command = spec_chars[character][0] # the command to replace unicode
1961 flag1 = spec_chars[character][1]
1962 flag2 = spec_chars[character][2]
1963 if flag1.find('combining') > -1 or flag2.find('combining') > -1:
1964 # We have a character that should be combined with the previous
1965 command += '{' + last_char + '}'
1966 # Remove the last character. Ignore if it is whitespace
1967 if len(last_char.rstrip()):
1968 # last_char was found and is not whitespace
1970 mod_line = mod_line[:-1]
1971 else: # last_char belongs to the last line
1972 document.body[i-1] = document.body[i-1][:-1]
1974 # The last character was replaced by a command. For now it is
1975 # ignored. This could be handled better.
1977 if command[0:2] == '\\\\':
1978 if command[2:12]=='ensuremath':
1981 command = command.replace('\\\\ensuremath{\\\\', '$\n\\backslash\n')
1982 command = command.replace('}', '$\n')
1984 # add a math inset with the replacement character
1985 command = command.replace('\\\\ensuremath{\\', math_intro)
1986 command = command.replace('}', math_outro)
1988 # we are already in a math inset
1989 command = command.replace('\\\\ensuremath{\\', '')
1990 command = command.replace('}', '')
1993 # avoid putting an ERT in a math; instead put command as text
1994 command = command.replace('\\\\', '\mathrm{')
1995 command = command + '}'
1997 # add an ERT inset with the replacement character
1998 command = command.replace('\\\\', ert_intro)
1999 command = command + ert_outro
2001 command = command.replace('\\\\', '\n\\backslash\n')
2002 last_char = '' # indicate that the character should not be removed
2005 # Replace with replacement string
2006 mod_line += replacement_character
2007 document.body[i:i+1] = mod_line.split('\n')
2008 i += len(mod_line.split('\n'))
2015 supported_versions = ["1.5.0","1.5"]
2016 convert = [[246, []],
2017 [247, [convert_font_settings]],
2019 [249, [convert_utf8]],
2022 [252, [convert_commandparams, convert_bibitem]],
2024 [254, [convert_esint]],
2027 [257, [convert_caption]],
2028 [258, [convert_lyxline]],
2029 [259, [convert_accent, normalize_font_whitespace_259]],
2031 [261, [convert_changes]],
2033 [263, [normalize_language_name]],
2034 [264, [convert_cv_textclass]],
2035 [265, [convert_tableborder]],
2041 [271, [convert_ext_font_sizes]],
2044 [274, [normalize_font_whitespace_274]],
2045 [275, [convert_graphics_rotation]],
2046 [276, [convert_arabic]]
2050 [275, [revert_arabic]],
2051 [274, [revert_graphics_rotation]],
2053 [272, [revert_separator_layout]],
2054 [271, [revert_preamble_listings_params, revert_listings_inset, revert_include_listings]],
2055 [270, [revert_ext_font_sizes]],
2056 [269, [revert_beamer_alert, revert_beamer_structure]],
2057 [268, [revert_preamble_listings_params, revert_listings_inset, revert_include_listings]],
2058 [267, [revert_CJK]],
2059 [266, [revert_utf8plain]],
2060 [265, [revert_armenian]],
2061 [264, [revert_tableborder]],
2062 [263, [revert_cv_textclass]],
2063 [262, [revert_language_name]],
2064 [261, [revert_ascii]],
2066 [259, [revert_utf8x]],
2069 [256, [revert_caption]],
2070 [255, [revert_encodings]],
2071 [254, [revert_clearpage, revert_cleardoublepage]],
2072 [253, [revert_esint]],
2073 [252, [revert_nomenclature, revert_printnomenclature]],
2074 [251, [revert_commandparams]],
2075 [250, [revert_cs_label]],
2077 [248, [revert_accent, revert_utf8, revert_unicode]],
2078 [247, [revert_booktabs]],
2079 [246, [revert_font_settings]],
2080 [245, [revert_framed]]]
2083 if __name__ == "__main__":