1 # This file is part of lyx2lyx
2 # -*- coding: utf-8 -*-
3 # Copyright (C) 2002 Dekel Tsur <dekel@lyx.org>
4 # Copyright (C) 2004 José Matos <jamatos@lyx.org>
6 # This program is free software; you can redistribute it and/or
7 # modify it under the terms of the GNU General Public License
8 # as published by the Free Software Foundation; either version 2
9 # of the License, or (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 """ Convert files to the file format generated by lyx 1.2"""
24 from parser_tools import find_token, find_token_backwards, \
25 find_tokens, find_tokens_backwards, \
26 find_beginning_of, find_end_of, find_re, \
27 is_nonempty_line, find_nonempty_line, \
28 get_value, check_token
30 ####################################################################
31 # Private helper functions
33 def get_layout(line, default_layout):
34 " Get layout, if empty return the default layout."
41 def get_paragraph(lines, i, format):
42 " Finds the paragraph that contains line i."
43 begin_layout = "\\layout"
46 i = find_tokens_backwards(lines, ["\\end_inset", begin_layout], i)
48 if check_token(lines[i], begin_layout):
50 i = find_beginning_of_inset(lines, i)
54 def get_next_paragraph(lines, i, format):
55 " Finds the paragraph after the paragraph that contains line i."
56 tokens = ["\\begin_inset", "\\layout", "\\end_float", "\\the_end"]
59 i = find_tokens(lines, tokens, i)
60 if not check_token(lines[i], "\\begin_inset"):
62 i = find_end_of_inset(lines, i)
66 def find_beginning_of_inset(lines, i):
67 " Find beginning of inset, where lines[i] is included."
68 return find_beginning_of(lines, i, "\\begin_inset", "\\end_inset")
71 def find_end_of_inset(lines, i):
72 " Finds the matching \end_inset"
73 return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
76 def find_end_of_tabular(lines, i):
77 " Finds the matching end of tabular."
78 return find_end_of(lines, i, "<lyxtabular", "</lyxtabular")
81 def get_tabular_lines(lines, i):
82 " Returns a lists of tabular lines."
85 j = find_end_of_tabular(lines, i)
90 if check_token(lines[i], "\\begin_inset"):
91 i = find_end_of_inset(lines, i)+1
97 # End of helper functions
98 ####################################################################
102 "footnote": ["\\begin_inset Foot",
104 "margin": ["\\begin_inset Marginal",
106 "fig": ["\\begin_inset Float figure",
109 "tab": ["\\begin_inset Float table",
112 "alg": ["\\begin_inset Float algorithm",
115 "wide-fig": ["\\begin_inset Float figure",
118 "wide-tab": ["\\begin_inset Float table",
123 font_tokens = ["\\family", "\\series", "\\shape", "\\size", "\\emph",
124 "\\bar", "\\noun", "\\color", "\\lang", "\\latex"]
126 pextra_type3_rexp = re.compile(r".*\\pextra_type\s+3")
127 pextra_rexp = re.compile(r"\\pextra_type\s+(\S+)"+\
128 r"(\s+\\pextra_alignment\s+(\S+))?"+\
129 r"(\s+\\pextra_hfill\s+(\S+))?"+\
130 r"(\s+\\pextra_start_minipage\s+(\S+))?"+\
131 r"(\s+(\\pextra_widthp?)\s+(\S*))?")
135 " Get width from a regular expression. "
137 if mo.group(9) == "\\pextra_widthp":
138 return mo.group(10)+"col%"
145 def remove_oldfloat(document):
146 " Change \begin_float .. \end_float into \begin_inset Float .. \end_inset"
147 lines = document.body
150 i = find_token(lines, "\\begin_float", i)
153 # There are no nested floats, so finding the end of the float is simple
154 j = find_token(lines, "\\end_float", i+1)
156 floattype = lines[i].split()[1]
157 if floattype not in floats:
158 document.warning("Error! Unknown float type " + floattype)
161 # skip \end_deeper tokens
163 while check_token(lines[i2], "\\end_deeper"):
166 j2 = get_next_paragraph(lines, j + 1, document.format + 1)
167 lines[j2:j2] = ["\\end_deeper "]*(i2-(i+1))
169 new = floats[floattype]+[""]
171 # Check if the float is floatingfigure
172 k = find_re(lines, pextra_type3_rexp, i, j)
174 mo = pextra_rexp.search(lines[k])
175 width = get_width(mo)
176 lines[k] = re.sub(pextra_rexp, "", lines[k])
177 new = ["\\begin_inset Wrap figure",
178 'width "%s"' % width,
182 new = new+lines[i2:j]+["\\end_inset ", ""]
184 # After a float, all font attributes are reseted.
185 # We need to output '\foo default' for every attribute foo
186 # whose value is not default before the float.
187 # The check here is not accurate, but it doesn't matter
188 # as extra '\foo default' commands are ignored.
189 # In fact, it might be safer to output '\foo default' for all
191 k = get_paragraph(lines, i, document.format + 1)
193 for token in font_tokens:
194 if find_token(lines, token, k, i) != -1:
196 # This is not necessary, but we want the output to be
197 # as similar as posible to the lyx format
200 if token == "\\lang":
201 new.append(token+" "+ document.language)
203 new.append(token+" default ")
209 pextra_type2_rexp = re.compile(r".*\\pextra_type\s+[12]")
210 pextra_type2_rexp2 = re.compile(r".*(\\layout|\\pextra_type\s+2)")
211 pextra_widthp = re.compile(r"\\pextra_widthp")
213 def remove_pextra(document):
214 " Remove pextra token."
215 lines = document.body
219 i = find_re(lines, pextra_type2_rexp, i)
223 # Sometimes the \pextra_widthp argument comes in it own
224 # line. If that happens insert it back in this line.
225 if pextra_widthp.search(lines[i+1]):
226 lines[i] = lines[i] + ' ' + lines[i+1]
229 mo = pextra_rexp.search(lines[i])
230 width = get_width(mo)
232 if mo.group(1) == "1":
233 # handle \pextra_type 1 (indented paragraph)
234 lines[i] = re.sub(pextra_rexp, "\\leftindent "+width+" ", lines[i])
238 # handle \pextra_type 2 (minipage)
239 position = mo.group(3)
241 lines[i] = re.sub(pextra_rexp, "", lines[i])
243 start = ["\\begin_inset Minipage",
244 "position " + position,
247 'width "%s"' % width,
253 start = ["","\hfill",""]+start
255 start = ['\\layout %s' % document.default_layout,''] + start
257 j0 = find_token_backwards(lines,"\\layout", i-1)
258 j = get_next_paragraph(lines, i, document.format + 1)
262 # collect more paragraphs to the minipage
264 if j == -1 or not check_token(lines[j], "\\layout"):
266 i = find_re(lines, pextra_type2_rexp2, j+1)
269 mo = pextra_rexp.search(lines[i])
272 if mo.group(7) == "1":
275 lines[i] = re.sub(pextra_rexp, "", lines[i])
276 j = find_tokens(lines, ["\\layout", "\\end_float"], i+1)
279 end = ["\\end_inset "]
281 lines[j0:j] = start+mid+end
286 " Are all the lines empty?"
287 return list(filter(is_nonempty_line, lines)) == []
290 move_rexp = re.compile(r"\\(family|series|shape|size|emph|numeric|bar|noun|end_deeper)")
291 ert_rexp = re.compile(r"\\begin_inset|\\hfill|.*\\SpecialChar")
292 spchar_rexp = re.compile(r"(.*)(\\SpecialChar.*)")
295 def remove_oldert(document):
296 " Remove old ERT inset."
297 ert_begin = ["\\begin_inset ERT",
300 '\\layout %s' % document.default_layout,
302 lines = document.body
305 i = find_tokens(lines, ["\\latex latex", "\\layout LaTeX"], i)
310 # \end_inset is for ert inside a tabular cell. The other tokens
312 j = find_tokens(lines, ["\\latex default", "\\layout", "\\begin_inset", "\\end_inset", "\\end_float", "\\the_end"],
314 if check_token(lines[j], "\\begin_inset"):
315 j = find_end_of_inset(lines, j)+1
319 if check_token(lines[j], "\\layout"):
320 while j-1 >= 0 and check_token(lines[j-1], "\\begin_deeper"):
323 # We need to remove insets, special chars & font commands from ERT text
326 if check_token(lines[i], "\\layout LaTeX"):
327 new = ['\layout %s' % document.default_layout, "", ""]
331 k2 = find_re(lines, ert_rexp, k, j)
332 inset = hfill = specialchar = 0
335 elif check_token(lines[k2], "\\begin_inset"):
337 elif check_token(lines[k2], "\\hfill"):
343 mo = spchar_rexp.match(lines[k2])
344 lines[k2] = mo.group(1)
345 specialchar_str = mo.group(2)
349 for line in lines[k:k2]:
350 # Move some lines outside the ERT inset:
351 if move_rexp.match(line):
353 # This is not necessary, but we want the output to be
354 # as similar as posible to the lyx format
357 elif not check_token(line, "\\latex"):
361 if [x for x in tmp if x != ""] != []:
363 # This is not necessary, but we want the output to be
364 # as similar as posible to the lyx format
365 lines[i-1] = lines[i-1]+" "
369 new = new+ert_begin+tmp+["\\end_inset ", ""]
372 k3 = find_end_of_inset(lines, k2)
373 new = new+[""]+lines[k2:k3+1]+[""] # Put an empty line after \end_inset
375 # Skip the empty line after \end_inset
376 if not is_nonempty_line(lines[k]):
380 new = new + ["\\hfill", ""]
384 # This is not necessary, but we want the output to be
385 # as similar as posible to the lyx format
386 lines[i-1] = lines[i-1]+specialchar_str
389 new = new+[specialchar_str, ""]
395 if not check_token(lines[j], "\\latex "):
396 new = new+[""]+[lines[j]]
400 # Delete remaining "\latex xxx" tokens
403 i = find_token(lines, "\\latex ", i)
409 def remove_oldertinset(document):
410 " ERT insert are hidden feature of lyx 1.1.6. This might be removed in the future."
411 lines = document.body
414 i = find_token(lines, "\\begin_inset ERT", i)
417 j = find_end_of_inset(lines, i)
418 k = find_token(lines, "\\layout", i+1)
419 l = get_paragraph(lines, i, document.format + 1)
420 if lines[k] == lines[l]: # same layout
427 def is_ert_paragraph(document, i):
428 " Is this a ert paragraph? "
429 lines = document.body
430 if not check_token(lines[i], "\\layout"):
432 if not document.is_default_layout(get_layout(lines[i], document.default_layout)):
435 i = find_nonempty_line(lines, i+1)
436 if not check_token(lines[i], "\\begin_inset ERT"):
439 j = find_end_of_inset(lines, i)
440 k = find_nonempty_line(lines, j+1)
441 return check_token(lines[k], "\\layout")
444 def combine_ert(document):
445 " Combine ERT paragraphs."
446 lines = document.body
449 i = find_token(lines, "\\begin_inset ERT", i)
452 j = get_paragraph(lines, i, document.format + 1)
455 while is_ert_paragraph(document, j):
458 i2 = find_token(lines, "\\layout", j+1)
459 k = find_token(lines, "\\end_inset", i2+1)
460 text = text+lines[i2:k]
461 j = find_token(lines, "\\layout", k+1)
466 j = find_token(lines, "\\layout", i+1)
472 oldunits = ["pt", "cm", "in", "text%", "col%"]
474 def get_length(lines, name, start, end):
476 i = find_token(lines, name, start, end)
480 return x[2]+oldunits[int(x[1])]
483 def write_attribute(x, token, value):
486 x.append("\t"+token+" "+value)
489 def remove_figinset(document):
491 lines = document.body
494 i = find_token(lines, "\\begin_inset Figure", i)
497 j = find_end_of_inset(lines, i)
499 if ( len(lines[i].split()) > 2 ):
500 lyxwidth = lines[i].split()[3]+"pt"
501 lyxheight = lines[i].split()[4]+"pt"
506 filename = get_value(lines, "file", i+1, j)
508 width = get_length(lines, "width", i+1, j)
509 # what does width=5 mean ?
510 height = get_length(lines, "height", i+1, j)
511 rotateAngle = get_value(lines, "angle", i+1, j)
512 if width == "" and height == "":
517 flags = get_value(lines, "flags", i+1, j)
520 display = "monochrome"
527 subcaptionLine = find_token(lines, "subcaption", i+1, j)
528 if subcaptionLine != -1:
529 subcaptionText = lines[subcaptionLine][11:]
530 if subcaptionText != "":
531 subcaptionText = '"'+subcaptionText+'"'
533 k = find_token(lines, "subfigure", i+1,j)
539 new = ["\\begin_inset Graphics FormatVersion 1"]
540 write_attribute(new, "filename", filename)
541 write_attribute(new, "display", display)
543 new.append("\tsubcaption")
544 write_attribute(new, "subcaptionText", subcaptionText)
545 write_attribute(new, "size_type", size_type)
546 write_attribute(new, "width", width)
547 write_attribute(new, "height", height)
548 if rotateAngle != "":
549 new.append("\trotate")
550 write_attribute(new, "rotateAngle", rotateAngle)
551 write_attribute(new, "rotateOrigin", "leftBaseline")
552 write_attribute(new, "lyxsize_type", "1")
553 write_attribute(new, "lyxwidth", lyxwidth)
554 write_attribute(new, "lyxheight", lyxheight)
555 new = new + ["\\end_inset"]
559 attr_re = re.compile(r' \w*="(false|0|)"')
560 line_re = re.compile(r'<(features|column|row|cell)')
562 def update_tabular(document):
563 " Convert tabular format 2 to 3."
564 regexp = re.compile(r'^\\begin_inset\s+Tabular')
565 lines = document.body
568 i = find_re(lines, regexp, i)
572 for k in get_tabular_lines(lines, i):
573 if check_token(lines[k], "<lyxtabular"):
574 lines[k] = lines[k].replace('version="2"', 'version="3"')
575 elif check_token(lines[k], "<column"):
576 lines[k] = lines[k].replace('width=""', 'width="0pt"')
578 if line_re.match(lines[k]):
579 lines[k] = re.sub(attr_re, "", lines[k])
585 # Convert tabular format 2 to 3
587 # compatibility read for old longtable options. Now we can make any
588 # row part of the header/footer type we want before it was strict
589 # sequential from the first row down (as LaTeX does it!). So now when
590 # we find a header/footer line we have to go up the rows and set it
591 # on all preceding rows till the first or one with already a h/f option
592 # set. If we find a firstheader on the same line as a header or a
593 # lastfooter on the same line as a footer then this should be set empty.
596 # just for compatibility with old python versions
597 # python >= 2.3 has real booleans (False and True)
602 " Simple data structure to deal with long table info."
604 self.endhead = false # header row
605 self.endfirsthead = false # first header row
606 self.endfoot = false # footer row
607 self.endlastfoot = false # last footer row
610 def haveLTFoot(row_info):
611 " Does row has LTFoot?"
612 for row_ in row_info:
618 def setHeaderFooterRows(hr, fhr, fr, lfr, rows_, row_info):
619 " Set Header/Footer rows."
620 endfirsthead_empty = false
621 endlastfoot_empty = false
625 row_info[hr].endhead = true
627 # set firstheader info
628 if fhr and fhr < rows_:
629 if row_info[fhr].endhead:
632 row_info[fhr].endfirsthead = true
633 row_info[fhr].endhead = false
634 elif row_info[fhr - 1].endhead:
635 endfirsthead_empty = true
637 while fhr > 0 and not row_info[fhr - 1].endhead:
639 row_info[fhr].endfirsthead = true
642 if fr and fr < rows_:
643 if row_info[fr].endhead and row_info[fr - 1].endhead:
644 while fr > 0 and not row_info[fr - 1].endhead:
646 row_info[fr].endfoot = true
647 row_info[fr].endhead = false
648 elif row_info[fr].endfirsthead and row_info[fr - 1].endfirsthead:
649 while fr > 0 and not row_info[fr - 1].endfirsthead:
651 row_info[fr].endfoot = true
652 row_info[fr].endfirsthead = false
653 elif not row_info[fr - 1].endhead and not row_info[fr - 1].endfirsthead:
654 while fr > 0 and not row_info[fr - 1].endhead and not row_info[fr - 1].endfirsthead:
656 row_info[fr].endfoot = true
658 # set lastfooter info
659 if lfr and lfr < rows_:
660 if row_info[lfr].endhead and row_info[lfr - 1].endhead:
661 while lfr > 0 and not row_info[lfr - 1].endhead:
663 row_info[lfr].endlastfoot = true
664 row_info[lfr].endhead = false
665 elif row_info[lfr].endfirsthead and row_info[lfr - 1].endfirsthead:
666 while lfr > 0 and not row_info[lfr - 1].endfirsthead:
668 row_info[lfr].endlastfoot = true
669 row_info[lfr].endfirsthead = false
670 elif row_info[lfr].endfoot and row_info[lfr - 1].endfoot:
671 while lfr > 0 and not row_info[lfr - 1].endfoot:
673 row_info[lfr].endlastfoot = true
674 row_info[lfr].endfoot = false
675 elif not row_info[fr - 1].endhead and not row_info[fr - 1].endfirsthead and not row_info[fr - 1].endfoot:
676 while lfr > 0 and not row_info[lfr - 1].endhead and not row_info[lfr - 1].endfirsthead and not row_info[lfr - 1].endfoot:
678 row_info[lfr].endlastfoot = true
679 elif haveLTFoot(row_info):
680 endlastfoot_empty = true
682 return endfirsthead_empty, endlastfoot_empty
685 def insert_attribute(lines, i, attribute):
686 " Insert attribute in lines[i]."
687 last = lines[i].find('>')
688 lines[i] = lines[i][:last] + ' ' + attribute + lines[i][last:]
691 rows_re = re.compile(r'rows="(\d*)"')
692 longtable_re = re.compile(r'islongtable="(\w)"')
693 ltvalues_re = re.compile(r'endhead="(-?\d*)" endfirsthead="(-?\d*)" endfoot="(-?\d*)" endlastfoot="(-?\d*)"')
694 lt_features_re = re.compile(r'(endhead="-?\d*" endfirsthead="-?\d*" endfoot="-?\d*" endlastfoot="-?\d*")')
695 def update_longtables(document):
696 " Update longtables to new format."
697 regexp = re.compile(r'^\\begin_inset\s+Tabular')
701 i = find_re(body, regexp, i)
705 i = find_token(body, "<lyxtabular", i)
709 # get number of rows in the table
710 rows = int(rows_re.search(body[i]).group(1))
713 i = find_token(body, '<features', i)
717 # is this a longtable?
718 longtable = longtable_re.search(body[i])
721 # islongtable is missing add it
722 body[i] = body[i][:10] + 'islongtable="false" ' + body[i][10:]
724 if not longtable or longtable.group(1) != "true":
725 # remove longtable elements from features
726 features = lt_features_re.search(body[i])
728 body[i] = body[i].replace(features.group(1), "")
731 row_info = row() * rows
732 res = ltvalues_re.search(body[i])
736 endfirsthead_empty, endlastfoot_empty = setHeaderFooterRows(res.group(1), res.group(2), res.group(3), res.group(4), rows, row_info)
738 if endfirsthead_empty:
739 insert_attribute(body, i, 'firstHeadEmpty="true"')
741 if endfirsthead_empty:
742 insert_attribute(body, i, 'lastFootEmpty="true"')
745 for j in range(rows):
746 i = find_token(body, '<row', i)
748 self.endfoot = false # footer row
749 self.endlastfoot = false # last footer row
750 if row_info[j].endhead:
751 insert_attribute(body, i, 'endhead="true"')
753 if row_info[j].endfirsthead:
754 insert_attribute(body, i, 'endfirsthead="true"')
756 if row_info[j].endfoot:
757 insert_attribute(body, i, 'endfoot="true"')
759 if row_info[j].endlastfoot:
760 insert_attribute(body, i, 'endlastfoot="true"')
765 def fix_oldfloatinset(document):
766 " Figure insert are hidden feature of lyx 1.1.6. This might be removed in the future."
767 lines = document.body
770 i = find_token(lines, "\\begin_inset Float ", i)
773 j = find_token(lines, "collapsed", i)
775 lines[j:j] = ["wide false"]
779 def change_listof(document):
780 " Change listof insets."
781 lines = document.body
784 i = find_token(lines, "\\begin_inset LatexCommand \\listof", i)
787 type = re.search(r"listof(\w*)", lines[i]).group(1)[:-1]
788 lines[i] = "\\begin_inset FloatList "+type
792 def change_infoinset(document):
793 " Change info inset."
794 lines = document.body
797 i = find_token(lines, "\\begin_inset Info", i)
800 txt = lines[i][18:].lstrip()
801 new = ["\\begin_inset Note", "collapsed true", ""]
802 j = find_token(lines, "\\end_inset", i)
806 note_lines = lines[i+1:j]
808 note_lines = [txt]+note_lines
810 for line in note_lines:
811 new = new + ['\layout %s' % document.default_layout, ""]
812 tmp = line.split('\\')
815 new = new + ["\\backslash ", x]
820 def change_header(document):
822 lines = document.header
823 i = find_token(lines, "\\use_amsmath", 0)
826 lines[i+1:i+1] = ["\\use_natbib 0",
827 "\\use_numerical_citations 0"]
830 supported_versions = ["1.2.%d" % i for i in range(5)] + ["1.2"]
831 convert = [[220, [change_header, change_listof, fix_oldfloatinset,
832 update_tabular, update_longtables, remove_pextra,
833 remove_oldfloat, remove_figinset, remove_oldertinset,
834 remove_oldert, combine_ert, change_infoinset]]]
838 if __name__ == "__main__":