]> git.lyx.org Git - lyx.git/blob - lib/lyx2lyx/lyx_1_4.py
Reformat lyx2lyx code using ruff
[lyx.git] / lib / lyx2lyx / lyx_1_4.py
1 # This file is part of lyx2lyx
2 # Copyright (C) 2002 Dekel Tsur <dekel@lyx.org>
3 # Copyright (C) 2002-2004 José Matos <jamatos@lyx.org>
4 # Copyright (C) 2004-2005 Georg Baum <Georg.Baum@post.rwth-aachen.de>
5 #
6 # This program is free software; you can redistribute it and/or
7 # modify it under the terms of the GNU General Public License
8 # as published by the Free Software Foundation; either version 2
9 # of the License, or (at your option) any later version.
10 #
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 # GNU General Public License for more details.
15 #
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
19
20 """Convert files to the file format generated by lyx 1.4"""
21
22 import re
23 from os import access, F_OK
24 import os.path
25 from parser_tools import (
26     check_token,
27     find_token,
28     get_value,
29     is_nonempty_line,
30     find_tokens,
31     find_end_of,
32     find_beginning_of,
33     find_token_exact,
34     find_tokens_exact,
35     find_re,
36     find_tokens_backwards,
37 )
38 from sys import stdin
39
40 from lyx_0_12 import update_latexaccents
41
42 ####################################################################
43 # Private helper functions
44
45
46 def get_layout(line, default_layout):
47     "Get layout, if empty return the default layout."
48     tokens = line.split()
49     if len(tokens) > 1:
50         return tokens[1]
51     return default_layout
52
53
54 def get_paragraph(lines, i, format):
55     "Finds the paragraph that contains line i."
56
57     if format < 225:
58         begin_layout = "\\layout"
59     else:
60         begin_layout = "\\begin_layout"
61     while i != -1:
62         i = find_tokens_backwards(lines, ["\\end_inset", begin_layout], i)
63         if i == -1:
64             return -1
65         if check_token(lines[i], begin_layout):
66             return i
67         i = find_beginning_of_inset(lines, i)
68     return -1
69
70
71 def find_beginning_of_inset(lines, i):
72     "Find beginning of inset, where lines[i] is included."
73     return find_beginning_of(lines, i, "\\begin_inset", "\\end_inset")
74
75
76 def get_next_paragraph(lines, i, format):
77     "Finds the paragraph after the paragraph that contains line i."
78
79     if format < 225:
80         tokens = ["\\begin_inset", "\\layout", "\\end_float", "\\the_end"]
81     elif format < 236:
82         tokens = ["\\begin_inset", "\\begin_layout", "\\end_float", "\\end_document"]
83     else:
84         tokens = [
85             "\\begin_inset",
86             "\\begin_layout",
87             "\\end_float",
88             "\\end_body",
89             "\\end_document",
90         ]
91     while i != -1:
92         i = find_tokens(lines, tokens, i)
93         if not check_token(lines[i], "\\begin_inset"):
94             return i
95         i = find_end_of_inset(lines, i)
96     return -1
97
98
99 def find_end_of_inset(lines, i):
100     r"Finds the matching \end_inset"
101     return find_end_of(lines, i, "\\begin_inset", "\\end_inset")
102
103
104 def del_token(lines, token, start, end):
105     """del_token(lines, token, start, end) -> int
106
107     Find the lower line in lines where token is the first element and
108     delete that line.
109
110     Returns the number of lines remaining."""
111
112     k = find_token_exact(lines, token, start, end)
113     if k == -1:
114         return end
115     else:
116         del lines[k]
117         return end - 1
118
119
120 # End of helper functions
121 ####################################################################
122
123
124 def remove_color_default(document):
125     r"Remove \color default"
126     i = 0
127     while True:
128         i = find_token(document.body, "\\color default", i)
129         if i == -1:
130             return
131         document.body[i] = document.body[i].replace("\\color default", "\\color inherit")
132
133
134 def add_end_header(document):
135     r"Add \end_header"
136     document.header.append("\\end_header")
137
138
139 def rm_end_header(document):
140     r"Remove \end_header"
141     i = find_token(document.header, "\\end_header", 0)
142     if i == -1:
143         return
144     del document.header[i]
145
146
147 def convert_amsmath(document):
148     "Convert \\use_amsmath"
149     i = find_token(document.header, "\\use_amsmath", 0)
150     if i == -1:
151         document.warning("Malformed LyX document: Missing '\\use_amsmath'.")
152         return
153     tokens = document.header[i].split()
154     if len(tokens) != 2:
155         document.warning(
156             "Malformed LyX document: Could not parse line '%s'." % document.header[i]
157         )
158         use_amsmath = "0"
159     else:
160         use_amsmath = tokens[1]
161     # old: 0 == off, 1 == on
162     # new: 0 == off, 1 == auto, 2 == on
163     # translate off -> auto, since old format 'off' means auto in reality
164     if use_amsmath == "0":
165         document.header[i] = "\\use_amsmath 1"
166     else:
167         document.header[i] = "\\use_amsmath 2"
168
169
170 def revert_amsmath(document):
171     "Revert \\use_amsmath"
172     i = find_token(document.header, "\\use_amsmath", 0)
173     if i == -1:
174         document.warning("Malformed LyX document: Missing '\\use_amsmath'.")
175         return
176     tokens = document.header[i].split()
177     if len(tokens) != 2:
178         document.warning(
179             "Malformed LyX document: Could not parse line '%s'." % document.header[i]
180         )
181         use_amsmath = "0"
182     else:
183         use_amsmath = tokens[1]
184     # old: 0 == off, 1 == on
185     # new: 0 == off, 1 == auto, 2 == on
186     # translate auto -> off, since old format 'off' means auto in reality
187     if use_amsmath == "2":
188         document.header[i] = "\\use_amsmath 1"
189     else:
190         document.header[i] = "\\use_amsmath 0"
191
192
193 def convert_spaces(document):
194     r"\SpecialChar ~ -> \InsetSpace ~"
195     for i in range(len(document.body)):
196         document.body[i] = document.body[i].replace("\\SpecialChar ~", "\\InsetSpace ~")
197
198
199 def revert_spaces(document):
200     r"\InsetSpace ~ -> \SpecialChar ~"
201     regexp = re.compile(r"(.*)(\\InsetSpace\s+)(\S+)")
202     i = 0
203     while True:
204         i = find_re(document.body, regexp, i)
205         if i == -1:
206             break
207         space = regexp.match(document.body[i]).group(3)
208         prepend = regexp.match(document.body[i]).group(1)
209         if space == "~":
210             document.body[i] = regexp.sub(prepend + "\\SpecialChar ~", document.body[i])
211             i = i + 1
212         else:
213             document.body[i] = regexp.sub(prepend, document.body[i])
214             document.body[i + 1 : i + 1] = ""
215             if space == "\\space":
216                 space = "\\ "
217             i = insert_ert(
218                 document.body,
219                 i + 1,
220                 "Collapsed",
221                 space,
222                 document.format - 1,
223                 document.default_layout,
224             )
225
226
227 def rename_spaces(document):
228     """\\InsetSpace \\, -> \\InsetSpace \thinspace{}
229     \\InsetSpace \\space -> \\InsetSpace \\space{}"""
230     for i in range(len(document.body)):
231         document.body[i] = document.body[i].replace(
232             "\\InsetSpace \\space", "\\InsetSpace \\space{}"
233         )
234         document.body[i] = document.body[i].replace(
235             "\\InsetSpace \\,", "\\InsetSpace \\thinspace{}"
236         )
237
238
239 def revert_space_names(document):
240     """\\InsetSpace \thinspace{} -> \\InsetSpace \\,
241     \\InsetSpace \\space{} -> \\InsetSpace \\space"""
242     for i in range(len(document.body)):
243         document.body[i] = document.body[i].replace(
244             "\\InsetSpace \\space{}", "\\InsetSpace \\space"
245         )
246         document.body[i] = document.body[i].replace(
247             "\\InsetSpace \\thinspace{}", "\\InsetSpace \\,"
248         )
249
250
251 def lyx_support_escape(lab):
252     "Equivalent to pre-unicode lyx::support::escape()"
253     hexdigit = [
254         "0",
255         "1",
256         "2",
257         "3",
258         "4",
259         "5",
260         "6",
261         "7",
262         "8",
263         "9",
264         "A",
265         "B",
266         "C",
267         "D",
268         "E",
269         "F",
270     ]
271     enc = ""
272     for c in lab:
273         o = ord(c)
274         if o >= 128 or c == "=" or c == "%":
275             enc = enc + "="
276             enc = enc + hexdigit[o >> 4]
277             enc = enc + hexdigit[o & 15]
278         else:
279             enc = enc + c
280     return enc
281
282
283 def revert_eqref(document):
284     "\\begin_inset LatexCommand \\eqref -> ERT"
285     regexp = re.compile(r"^\\begin_inset\s+LatexCommand\s+\\eqref")
286     i = 0
287     while True:
288         i = find_re(document.body, regexp, i)
289         if i == -1:
290             break
291         eqref = lyx_support_escape(regexp.sub("", document.body[i]))
292         document.body[i : i + 1] = [
293             "\\begin_inset ERT",
294             "status Collapsed",
295             "",
296             "\\layout %s" % document.default_layout,
297             "",
298             "\\backslash ",
299             "eqref" + eqref,
300         ]
301         i = i + 7
302
303
304 def convert_bibtex(document):
305     "Convert BibTeX changes."
306     for i in range(len(document.body)):
307         document.body[i] = document.body[i].replace(
308             "\\begin_inset LatexCommand \\BibTeX", "\\begin_inset LatexCommand \\bibtex"
309         )
310
311
312 def revert_bibtex(document):
313     "Revert BibTeX changes."
314     for i in range(len(document.body)):
315         document.body[i] = document.body[i].replace(
316             "\\begin_inset LatexCommand \\bibtex", "\\begin_inset LatexCommand \\BibTeX"
317         )
318
319
320 def remove_insetparent(document):
321     r"Remove \lyxparent"
322     i = 0
323     while True:
324         i = find_token(document.body, "\\begin_inset LatexCommand \\lyxparent", i)
325         if i == -1:
326             break
327         del document.body[i : i + 3]
328
329
330 def convert_external(document):
331     "Convert inset External."
332     external_rexp = re.compile(r'\\begin_inset External ([^,]*),"([^"]*)",')
333     external_header = "\\begin_inset External"
334     i = 0
335     while True:
336         i = find_token(document.body, external_header, i)
337         if i == -1:
338             break
339         look = external_rexp.search(document.body[i])
340         args = ["", ""]
341         if look:
342             args[0] = look.group(1)
343             args[1] = look.group(2)
344         # FIXME: if the previous search fails then warn
345
346         if args[0] == "RasterImage":
347             # Convert a RasterImage External Inset to a Graphics Inset.
348             top = "\\begin_inset Graphics"
349             if args[1]:
350                 filename = "\tfilename " + args[1]
351             document.body[i : i + 1] = [top, filename]
352             i = i + 1
353         else:
354             # Convert the old External Inset format to the new.
355             top = external_header
356             template = "\ttemplate " + args[0]
357             if args[1]:
358                 filename = "\tfilename " + args[1]
359                 document.body[i : i + 1] = [top, template, filename]
360                 i = i + 2
361             else:
362                 document.body[i : i + 1] = [top, template]
363                 i = i + 1
364
365
366 def revert_external_1(document):
367     "Revert inset External."
368     external_header = "\\begin_inset External"
369     i = 0
370     while True:
371         i = find_token(document.body, external_header, i)
372         if i == -1:
373             break
374
375         template = document.body[i + 1].split()
376         template.reverse()
377         del document.body[i + 1]
378
379         filename = document.body[i + 1].split()
380         filename.reverse()
381         del document.body[i + 1]
382
383         params = document.body[i + 1].split()
384         params.reverse()
385         if document.body[i + 1]:
386             del document.body[i + 1]
387
388         document.body[i] = (
389             document.body[i]
390             + " "
391             + template[0]
392             + ', "'
393             + filename[0]
394             + '", " '
395             + " ".join(params[1:])
396             + '"'
397         )
398         i = i + 1
399
400
401 def revert_external_2(document):
402     "Revert inset External. (part II)"
403     draft_token = "\tdraft"
404     i = 0
405     while True:
406         i = find_token(document.body, "\\begin_inset External", i)
407         if i == -1:
408             break
409         j = find_end_of_inset(document.body, i + 1)
410         if j == -1:
411             # this should not happen
412             break
413         k = find_token(document.body, draft_token, i + 1, j - 1)
414         if k != -1 and len(draft_token) == len(document.body[k]):
415             del document.body[k]
416         i = j + 1
417
418
419 def convert_comment(document):
420     "Convert \\layout comment"
421     i = 0
422     comment = "\\layout Comment"
423     while True:
424         i = find_token(document.body, comment, i)
425         if i == -1:
426             return
427
428         document.body[i : i + 1] = [
429             "\\layout %s" % document.default_layout,
430             "",
431             "",
432             "\\begin_inset Comment",
433             "collapsed true",
434             "",
435             "\\layout %s" % document.default_layout,
436         ]
437         i = i + 7
438
439         while True:
440             old_i = i
441             i = find_token(document.body, "\\layout", i)
442             if i == -1:
443                 i = len(document.body) - 1
444                 document.body[i:i] = ["\\end_inset", "", ""]
445                 return
446
447             j = find_token(document.body, "\\begin_deeper", old_i, i)
448             if j == -1:
449                 j = i + 1
450             k = find_token(document.body, "\\begin_inset", old_i, i)
451             if k == -1:
452                 k = i + 1
453
454             if j < i and j < k:
455                 i = j
456                 del document.body[i]
457                 i = find_end_of(document.body, i, "\\begin_deeper", "\\end_deeper")
458                 if i == -1:
459                     # This case should not happen
460                     # but if this happens deal with it greacefully adding
461                     # the missing \end_deeper.
462                     i = len(document.body) - 1
463                     document.body[i:i] = ["\\end_deeper", ""]
464                     return
465                 else:
466                     del document.body[i]
467                     continue
468
469             if k < i:
470                 i = k
471                 i = find_end_of(document.body, i, "\\begin_inset", "\\end_inset")
472                 if i == -1:
473                     # This case should not happen
474                     # but if this happens deal with it greacefully adding
475                     # the missing \end_inset.
476                     i = len(document.body) - 1
477                     document.body[i:i] = ["\\end_inset", "", "", "\\end_inset", "", ""]
478                     return
479                 else:
480                     i = i + 1
481                     continue
482
483             if document.body[i].find(comment) == -1:
484                 document.body[i:i] = ["\\end_inset"]
485                 i = i + 1
486                 break
487             document.body[i : i + 1] = ["\\layout %s" % document.default_layout]
488             i = i + 1
489
490
491 def revert_comment(document):
492     "Revert comments"
493     i = 0
494     while True:
495         i = find_tokens(document.body, ["\\begin_inset Comment", "\\begin_inset Greyedout"], i)
496
497         if i == -1:
498             return
499         document.body[i] = "\\begin_inset Note"
500         i = i + 1
501
502
503 def add_end_layout(document):
504     r"Add \end_layout"
505     i = find_token(document.body, "\\layout", 0)
506
507     if i == -1:
508         return
509
510     i = i + 1
511     struct_stack = ["\\layout"]
512
513     while True:
514         i = find_tokens(
515             document.body,
516             [
517                 "\\begin_inset",
518                 "\\end_inset",
519                 "\\layout",
520                 "\\begin_deeper",
521                 "\\end_deeper",
522                 "\\the_end",
523             ],
524             i,
525         )
526
527         if i != -1:
528             token = document.body[i].split()[0]
529         else:
530             document.warning("Truncated document.")
531             i = len(document.body)
532             document.body.insert(i, "\\the_end")
533             token = ""
534
535         if token == "\\begin_inset":
536             struct_stack.append(token)
537             i = i + 1
538             continue
539
540         if token == "\\end_inset":
541             tail = struct_stack.pop()
542             if tail == "\\layout":
543                 document.body.insert(i, "")
544                 document.body.insert(i, "\\end_layout")
545                 i = i + 2
546                 # Check if it is the correct tag
547                 struct_stack.pop()
548             i = i + 1
549             continue
550
551         if token == "\\layout":
552             tail = struct_stack.pop()
553             if tail == token:
554                 document.body.insert(i, "")
555                 document.body.insert(i, "\\end_layout")
556                 i = i + 3
557             else:
558                 struct_stack.append(tail)
559                 i = i + 1
560             struct_stack.append(token)
561             continue
562
563         if token == "\\begin_deeper":
564             document.body.insert(i, "")
565             document.body.insert(i, "\\end_layout")
566             i = i + 3
567             # consecutive begin_deeper only insert one end_layout
568             while document.body[i].startswith("\\begin_deeper"):
569                 i += 1
570             struct_stack.append(token)
571             continue
572
573         if token == "\\end_deeper":
574             if struct_stack[-1] == "\\layout":
575                 document.body.insert(i, "\\end_layout")
576                 i = i + 1
577                 struct_stack.pop()
578             i = i + 1
579             continue
580
581         # case \end_document
582         document.body.insert(i, "")
583         document.body.insert(i, "\\end_layout")
584         return
585
586
587 def rm_end_layout(document):
588     r"Remove \end_layout"
589     i = 0
590     while True:
591         i = find_token(document.body, "\\end_layout", i)
592
593         if i == -1:
594             return
595
596         del document.body[i]
597
598
599 def insert_tracking_changes(document):
600     "Handle change tracking keywords."
601     i = find_token(document.header, "\\tracking_changes", 0)
602     if i == -1:
603         document.header.append("\\tracking_changes 0")
604
605
606 def rm_tracking_changes(document):
607     "Remove change tracking keywords."
608     i = find_token(document.header, "\\author", 0)
609     if i != -1:
610         del document.header[i]
611
612     i = find_token(document.header, "\\tracking_changes", 0)
613     if i == -1:
614         return
615     del document.header[i]
616
617
618 def rm_body_changes(document):
619     "Remove body changes."
620     i = 0
621     while True:
622         i = find_token(document.body, "\\change_", i)
623         if i == -1:
624             return
625
626         del document.body[i]
627
628
629 def layout2begin_layout(document):
630     r"\layout -> \begin_layout"
631     i = 0
632     while True:
633         i = find_token(document.body, "\\layout", i)
634         if i == -1:
635             return
636
637         document.body[i] = document.body[i].replace("\\layout", "\\begin_layout")
638         i = i + 1
639
640
641 def begin_layout2layout(document):
642     r"\begin_layout -> \layout"
643     i = 0
644     while True:
645         i = find_token(document.body, "\\begin_layout", i)
646         if i == -1:
647             return
648
649         document.body[i] = document.body[i].replace("\\begin_layout", "\\layout")
650         i = i + 1
651
652
653 def convert_valignment_middle(body, start, end):
654     'valignment="center" -> valignment="middle"'
655     for i in range(start, end):
656         if re.search('^<(column|cell) .*valignment="center".*>$', body[i]):
657             body[i] = body[i].replace('valignment="center"', 'valignment="middle"')
658
659
660 def convert_table_valignment_middle(document):
661     "Convert table  valignment, center -> middle"
662     regexp = re.compile(r"^\\begin_inset\s+Tabular")
663     i = 0
664     while True:
665         i = find_re(document.body, regexp, i)
666         if i == -1:
667             return
668         j = find_end_of_inset(document.body, i + 1)
669         if j == -1:
670             # this should not happen
671             convert_valignment_middle(document.body, i + 1, len(document.body))
672             return
673         convert_valignment_middle(document.body, i + 1, j)
674         i = j + 1
675
676
677 def revert_table_valignment_middle(body, start, end):
678     "valignment, middle -> center"
679     for i in range(start, end):
680         if re.search('^<(column|cell) .*valignment="middle".*>$', body[i]):
681             body[i] = body[i].replace('valignment="middle"', 'valignment="center"')
682
683
684 def revert_valignment_middle(document):
685     "Convert table  valignment, middle -> center"
686     regexp = re.compile(r"^\\begin_inset\s+Tabular")
687     i = 0
688     while True:
689         i = find_re(document.body, regexp, i)
690         if i == -1:
691             return
692         j = find_end_of_inset(document.body, i + 1)
693         if j == -1:
694             # this should not happen
695             revert_table_valignment_middle(document.body, i + 1, len(document.body))
696             return
697         revert_table_valignment_middle(document.body, i + 1, j)
698         i = j + 1
699
700
701 def convert_end_document(document):
702     "\\the_end -> \\end_document"
703     i = find_token(document.body, "\\the_end", 0)
704     if i == -1:
705         document.body.append("\\end_document")
706         return
707     document.body[i] = "\\end_document"
708
709
710 def revert_end_document(document):
711     "\\end_document -> \\the_end"
712     i = find_token(document.body, "\\end_document", 0)
713     if i == -1:
714         document.body.append("\\the_end")
715         return
716     document.body[i] = "\\the_end"
717
718
719 def convert_breaks(document):
720     r"""
721     Convert line and page breaks
722      Old:
723     \layout Standard
724     \line_top \line_bottom \pagebreak_top \pagebreak_bottom \added_space_top xxx \added_space_bottom yyy
725     0
726
727      New:
728     \begin layout Standard
729
730     \newpage
731
732     \lyxline
733     \begin_inset ERT
734     \begin layout Standard
735     \backslash
736     vspace{-1\backslash
737     parskip}
738     \end_layout
739     \end_inset
740
741     \begin_inset VSpace xxx
742     \end_inset
743
744     0
745
746     \begin_inset VSpace xxx
747     \end_inset
748     \lyxline
749
750     \newpage
751
752     \end_layout
753     """
754     par_params = (
755         "added_space_bottom",
756         "added_space_top",
757         "align",
758         "labelwidthstring",
759         "line_bottom",
760         "line_top",
761         "noindent",
762         "pagebreak_bottom",
763         "pagebreak_top",
764         "paragraph_spacing",
765         "start_of_appendix",
766     )
767     font_attributes = [
768         "\\family",
769         "\\series",
770         "\\shape",
771         "\\emph",
772         "\\numeric",
773         "\\bar",
774         "\\noun",
775         "\\color",
776         "\\lang",
777     ]
778     attribute_values = [
779         "default",
780         "default",
781         "default",
782         "default",
783         "default",
784         "default",
785         "default",
786         "none",
787         document.language,
788     ]
789     i = 0
790     while True:
791         i = find_token(document.body, "\\begin_layout", i)
792         if i == -1:
793             return
794         layout = get_layout(document.body[i], document.default_layout)
795         i = i + 1
796
797         # Merge all paragraph parameters into a single line
798         # We cannot check for '\\' only because paragraphs may start e.g.
799         # with '\\backslash'
800         while (
801             document.body[i + 1][:1] == "\\"
802             and document.body[i + 1][1:].split()[0] in par_params
803         ):
804             document.body[i] = document.body[i + 1] + " " + document.body[i]
805             del document.body[i + 1]
806
807         line_top = document.body[i].find("\\line_top")
808         line_bot = document.body[i].find("\\line_bottom")
809         pb_top = document.body[i].find("\\pagebreak_top")
810         pb_bot = document.body[i].find("\\pagebreak_bottom")
811         vspace_top = document.body[i].find("\\added_space_top")
812         vspace_bot = document.body[i].find("\\added_space_bottom")
813
814         if (
815             line_top == -1
816             and line_bot == -1
817             and pb_bot == -1
818             and pb_top == -1
819             and vspace_top == -1
820             and vspace_bot == -1
821         ):
822             continue
823
824         # Do we have a nonstandard paragraph? We need to create new paragraphs
825         # if yes to avoid putting lyxline etc. inside of special environments.
826         # This is wrong for itemize and enumerate environments, but it is
827         # impossible to convert these correctly.
828         # We want to avoid new paragraphs if possible becauase we want to
829         # inherit font sizes.
830         nonstandard = 0
831         if (
832             not document.is_default_layout(layout)
833             or document.body[i].find("\\align") != -1
834             or document.body[i].find("\\labelwidthstring") != -1
835             or document.body[i].find("\\noindent") != -1
836         ):
837             nonstandard = 1
838
839         # get the font size of the beginning of this paragraph, since we need
840         # it for the lyxline inset
841         j = i + 1
842         while not is_nonempty_line(document.body[j]):
843             j = j + 1
844         size_top = ""
845         if document.body[j].find("\\size") != -1:
846             size_top = document.body[j].split()[1]
847
848         for tag in (
849             "\\line_top",
850             "\\line_bottom",
851             "\\pagebreak_top",
852             "\\pagebreak_bottom",
853         ):
854             document.body[i] = document.body[i].replace(tag, "")
855
856         if vspace_top != -1:
857             # the position could be change because of the removal of other
858             # paragraph properties above
859             vspace_top = document.body[i].find("\\added_space_top")
860             tmp_list = document.body[i][vspace_top:].split()
861             vspace_top_value = tmp_list[1]
862             document.body[i] = document.body[i][:vspace_top] + " ".join(tmp_list[2:])
863
864         if vspace_bot != -1:
865             # the position could be change because of the removal of other
866             # paragraph properties above
867             vspace_bot = document.body[i].find("\\added_space_bottom")
868             tmp_list = document.body[i][vspace_bot:].split()
869             vspace_bot_value = tmp_list[1]
870             document.body[i] = document.body[i][:vspace_bot] + " ".join(tmp_list[2:])
871
872         document.body[i] = document.body[i].strip()
873         i = i + 1
874
875         # Create an empty paragraph or paragraph fragment for line and
876         # page break that belong above the paragraph
877         if pb_top != -1 or line_top != -1 or vspace_top != -1:
878             paragraph_above = list()
879             if nonstandard:
880                 # We need to create an extra paragraph for nonstandard environments
881                 paragraph_above = ["\\begin_layout %s" % document.default_layout, ""]
882
883             if pb_top != -1:
884                 paragraph_above.extend(["\\newpage ", ""])
885
886             if vspace_top != -1:
887                 paragraph_above.extend(
888                     ["\\begin_inset VSpace " + vspace_top_value, "\\end_inset", "", ""]
889                 )
890
891             if line_top != -1:
892                 if size_top != "":
893                     paragraph_above.extend(["\\size " + size_top + " "])
894                 # We need an additional vertical space of -\parskip.
895                 # We can't use the vspace inset because it does not know \parskip.
896                 paragraph_above.extend(["\\lyxline ", "", ""])
897                 insert_ert(
898                     paragraph_above,
899                     len(paragraph_above) - 1,
900                     "Collapsed",
901                     "\\vspace{-1\\parskip}\n",
902                     document.format + 1,
903                     document.default_layout,
904                 )
905                 paragraph_above.extend([""])
906
907             if nonstandard:
908                 paragraph_above.extend(["\\end_layout ", ""])
909                 # insert new paragraph above the current paragraph
910                 document.body[i - 2 : i - 2] = paragraph_above
911             else:
912                 # insert new lines at the beginning of the current paragraph
913                 document.body[i:i] = paragraph_above
914
915             i = i + len(paragraph_above)
916
917         # Ensure that nested style are converted later.
918         k = find_end_of(document.body, i, "\\begin_layout", "\\end_layout")
919
920         if k == -1:
921             return
922
923         if pb_bot != -1 or line_bot != -1 or vspace_bot != -1:
924             # get the font size of the end of this paragraph
925             size_bot = size_top
926             j = i + 1
927             while j < k:
928                 if document.body[j].find("\\size") != -1:
929                     size_bot = document.body[j].split()[1]
930                     j = j + 1
931                 elif document.body[j].find("\\begin_inset") != -1:
932                     # skip insets
933                     j = find_end_of_inset(document.body, j)
934                 else:
935                     j = j + 1
936
937             paragraph_below = list()
938             if nonstandard:
939                 # We need to create an extra paragraph for nonstandard environments
940                 paragraph_below = [
941                     "",
942                     "\\begin_layout %s" % document.default_layout,
943                     "",
944                 ]
945             else:
946                 for a in range(len(font_attributes)):
947                     if find_token(document.body, font_attributes[a], i, k) != -1:
948                         paragraph_below.extend([font_attributes[a] + " " + attribute_values[a]])
949
950             if line_bot != -1:
951                 if nonstandard and size_bot != "":
952                     paragraph_below.extend(["\\size " + size_bot + " "])
953                 paragraph_below.extend(["\\lyxline ", ""])
954                 if size_bot != "":
955                     paragraph_below.extend(["\\size default "])
956
957             if vspace_bot != -1:
958                 paragraph_below.extend(
959                     ["\\begin_inset VSpace " + vspace_bot_value, "\\end_inset", "", ""]
960                 )
961
962             if pb_bot != -1:
963                 paragraph_below.extend(["\\newpage ", ""])
964
965             if nonstandard:
966                 paragraph_below.extend(["\\end_layout "])
967                 # insert new paragraph below the current paragraph
968                 document.body[k + 1 : k + 1] = paragraph_below
969             else:
970                 # insert new lines at the end of the current paragraph
971                 document.body[k:k] = paragraph_below
972
973
974 def convert_note(document):
975     "Convert Notes."
976     i = 0
977     while True:
978         i = find_tokens(
979             document.body,
980             ["\\begin_inset Note", "\\begin_inset Comment", "\\begin_inset Greyedout"],
981             i,
982         )
983         if i == -1:
984             break
985
986         document.body[i] = document.body[i][0:13] + "Note " + document.body[i][13:]
987         i = i + 1
988
989
990 def revert_note(document):
991     "Revert Notes."
992     note_header = "\\begin_inset Note "
993     i = 0
994     while True:
995         i = find_token(document.body, note_header, i)
996         if i == -1:
997             break
998
999         document.body[i] = "\\begin_inset " + document.body[i][len(note_header) :]
1000         i = i + 1
1001
1002
1003 def convert_box(document):
1004     "Convert Boxes."
1005     i = 0
1006     while True:
1007         i = find_tokens(
1008             document.body,
1009             [
1010                 "\\begin_inset Boxed",
1011                 "\\begin_inset Doublebox",
1012                 "\\begin_inset Frameless",
1013                 "\\begin_inset ovalbox",
1014                 "\\begin_inset Ovalbox",
1015                 "\\begin_inset Shadowbox",
1016             ],
1017             i,
1018         )
1019         if i == -1:
1020             break
1021
1022         document.body[i] = document.body[i][0:13] + "Box " + document.body[i][13:]
1023         i = i + 1
1024
1025
1026 def revert_box(document):
1027     "Revert Boxes."
1028     box_header = "\\begin_inset Box "
1029     i = 0
1030     while True:
1031         i = find_token(document.body, box_header, i)
1032         if i == -1:
1033             break
1034
1035         document.body[i] = "\\begin_inset " + document.body[i][len(box_header) :]
1036         i = i + 1
1037
1038
1039 def convert_collapsible(document):
1040     "Convert collapsed insets."
1041     i = 0
1042     while True:
1043         i = find_tokens_exact(
1044             document.body,
1045             [
1046                 "\\begin_inset Box",
1047                 "\\begin_inset Branch",
1048                 "\\begin_inset CharStyle",
1049                 "\\begin_inset Float",
1050                 "\\begin_inset Foot",
1051                 "\\begin_inset Marginal",
1052                 "\\begin_inset Note",
1053                 "\\begin_inset OptArg",
1054                 "\\begin_inset Wrap",
1055             ],
1056             i,
1057         )
1058         if i == -1:
1059             break
1060
1061         # Seach for a line starting 'collapsed'
1062         # If, however, we find a line starting '\begin_layout'
1063         # (_always_ present) then break with a warning message
1064         i = i + 1
1065         while True:
1066             if document.body[i] == "collapsed false":
1067                 document.body[i] = "status open"
1068                 break
1069             elif document.body[i] == "collapsed true":
1070                 document.body[i] = "status collapsed"
1071                 break
1072             elif document.body[i][:13] == "\\begin_layout":
1073                 document.warning("Malformed LyX document: Missing 'collapsed'.")
1074                 break
1075             i = i + 1
1076
1077         i = i + 1
1078
1079
1080 def revert_collapsible(document):
1081     "Revert collapsed insets."
1082     i = 0
1083     while True:
1084         i = find_tokens_exact(
1085             document.body,
1086             [
1087                 "\\begin_inset Box",
1088                 "\\begin_inset Branch",
1089                 "\\begin_inset CharStyle",
1090                 "\\begin_inset Float",
1091                 "\\begin_inset Foot",
1092                 "\\begin_inset Marginal",
1093                 "\\begin_inset Note",
1094                 "\\begin_inset OptArg",
1095                 "\\begin_inset Wrap",
1096             ],
1097             i,
1098         )
1099         if i == -1:
1100             break
1101
1102         # Seach for a line starting 'status'
1103         # If, however, we find a line starting '\begin_layout'
1104         # (_always_ present) then break with a warning message
1105         i = i + 1
1106         while True:
1107             if document.body[i] == "status open":
1108                 document.body[i] = "collapsed false"
1109                 break
1110             elif document.body[i] == "status collapsed" or document.body[i] == "status inlined":
1111                 document.body[i] = "collapsed true"
1112                 break
1113             elif document.body[i][:13] == "\\begin_layout":
1114                 document.warning("Malformed LyX document: Missing 'status'.")
1115                 break
1116             i = i + 1
1117
1118         i = i + 1
1119
1120
1121 def convert_ert(document):
1122     "Convert ERT."
1123     i = 0
1124     while True:
1125         i = find_token(document.body, "\\begin_inset ERT", i)
1126         if i == -1:
1127             break
1128
1129         # Seach for a line starting 'status'
1130         # If, however, we find a line starting '\begin_layout'
1131         # (_always_ present) then break with a warning message
1132         i = i + 1
1133         while True:
1134             if document.body[i] == "status Open":
1135                 document.body[i] = "status open"
1136                 break
1137             elif document.body[i] == "status Collapsed":
1138                 document.body[i] = "status collapsed"
1139                 break
1140             elif document.body[i] == "status Inlined":
1141                 document.body[i] = "status inlined"
1142                 break
1143             elif document.body[i][:13] == "\\begin_layout":
1144                 document.warning("Malformed LyX document: Missing 'status'.")
1145                 break
1146             i = i + 1
1147
1148         i = i + 1
1149
1150
1151 def revert_ert(document):
1152     "Revert ERT."
1153     i = 0
1154     while True:
1155         i = find_token(document.body, "\\begin_inset ERT", i)
1156         if i == -1:
1157             break
1158
1159         # Seach for a line starting 'status'
1160         # If, however, we find a line starting '\begin_layout'
1161         # (_always_ present) then break with a warning message
1162         i = i + 1
1163         while True:
1164             if document.body[i] == "status open":
1165                 document.body[i] = "status Open"
1166                 break
1167             elif document.body[i] == "status collapsed":
1168                 document.body[i] = "status Collapsed"
1169                 break
1170             elif document.body[i] == "status inlined":
1171                 document.body[i] = "status Inlined"
1172                 break
1173             elif document.body[i][:13] == "\\begin_layout":
1174                 document.warning("Malformed LyX document : Missing 'status'.")
1175                 break
1176             i = i + 1
1177
1178         i = i + 1
1179
1180
1181 def convert_minipage(document):
1182     """Convert minipages to the box inset.
1183     We try to use the same order of arguments as lyx does.
1184     """
1185     pos = ["t", "c", "b"]
1186     inner_pos = ["c", "t", "b", "s"]
1187
1188     i = 0
1189     while True:
1190         i = find_token(document.body, "\\begin_inset Minipage", i)
1191         if i == -1:
1192             return
1193
1194         document.body[i] = "\\begin_inset Box Frameless"
1195         i = i + 1
1196
1197         # convert old to new position using the pos list
1198         if document.body[i][:8] == "position":
1199             document.body[i] = 'position "%s"' % pos[int(document.body[i][9])]
1200         else:
1201             document.body.insert(i, 'position "%s"' % pos[0])
1202         i = i + 1
1203
1204         document.body.insert(i, 'hor_pos "c"')
1205         i = i + 1
1206         document.body.insert(i, "has_inner_box 1")
1207         i = i + 1
1208
1209         # convert the inner_position
1210         if document.body[i][:14] == "inner_position":
1211             innerpos = inner_pos[int(document.body[i][15])]
1212             del document.body[i]
1213         else:
1214             innerpos = inner_pos[0]
1215
1216         # We need this since the new file format has a height and width
1217         # in a different order.
1218         if document.body[i][:6] == "height":
1219             height = document.body[i][6:]
1220             # test for default value of 221 and convert it accordingly
1221             if height == ' "0pt"' or height == ' "0"':
1222                 height = ' "1pt"'
1223             del document.body[i]
1224         else:
1225             height = ' "1pt"'
1226
1227         if document.body[i][:5] == "width":
1228             width = document.body[i][5:]
1229             del document.body[i]
1230         else:
1231             width = ' "0"'
1232
1233         if document.body[i][:9] == "collapsed":
1234             if document.body[i][9:] == "true":
1235                 status = "collapsed"
1236             else:
1237                 status = "open"
1238             del document.body[i]
1239         else:
1240             status = "collapsed"
1241
1242         # Handle special default case:
1243         if height == ' "1pt"' and innerpos == "c":
1244             innerpos = "t"
1245
1246         document.body.insert(i, 'inner_pos "' + innerpos + '"')
1247         i = i + 1
1248         document.body.insert(i, "use_parbox 0")
1249         i = i + 1
1250         document.body.insert(i, "width" + width)
1251         i = i + 1
1252         document.body.insert(i, 'special "none"')
1253         i = i + 1
1254         document.body.insert(i, "height" + height)
1255         i = i + 1
1256         document.body.insert(i, 'height_special "totalheight"')
1257         i = i + 1
1258         document.body.insert(i, "status " + status)
1259         i = i + 1
1260
1261
1262 def convert_ertbackslash(body, i, ert, format, default_layout):
1263     r"""-------------------------------------------------------------------------------------------
1264     Convert backslashes and '\n' into valid ERT code, append the converted
1265     text to body[i] and return the (maybe incremented) line index i"""
1266
1267     for c in ert:
1268         if c == "\\":
1269             body[i] = body[i] + "\\backslash "
1270             i = i + 1
1271             body.insert(i, "")
1272         elif c == "\n":
1273             if format <= 240:
1274                 body[i + 1 : i + 1] = ["\\newline ", ""]
1275                 i = i + 2
1276             else:
1277                 body[i + 1 : i + 1] = [
1278                     "\\end_layout",
1279                     "",
1280                     "\\begin_layout %s" % default_layout,
1281                     "",
1282                 ]
1283                 i = i + 4
1284         else:
1285             body[i] = body[i] + c
1286     return i
1287
1288
1289 def ert2latex(lines, format):
1290     r"""Converts lines in ERT code to LaTeX
1291     The surrounding \begin_layout ... \end_layout pair must not be included"""
1292
1293     backslash = re.compile(r"\\backslash\s*$")
1294     newline = re.compile(r"\\newline\s*$")
1295     if format <= 224:
1296         begin_layout = re.compile(r"\\layout\s*\S+$")
1297     else:
1298         begin_layout = re.compile(r"\\begin_layout\s*\S+$")
1299     end_layout = re.compile(r"\\end_layout\s*$")
1300     ert = ""
1301     for i in range(len(lines)):
1302         line = backslash.sub("\\\\", lines[i])
1303         if format <= 240:
1304             if begin_layout.match(line):
1305                 line = "\n\n"
1306             else:
1307                 line = newline.sub("\n", line)
1308         else:
1309             if begin_layout.match(line):
1310                 line = "\n"
1311         if format > 224 and end_layout.match(line):
1312             line = ""
1313         ert = ert + line
1314     return ert
1315
1316
1317 def get_par_params(lines, i):
1318     """get all paragraph parameters. They can be all on one line or on several lines.
1319     lines[i] must be the first parameter line"""
1320     par_params = (
1321         "added_space_bottom",
1322         "added_space_top",
1323         "align",
1324         "labelwidthstring",
1325         "line_bottom",
1326         "line_top",
1327         "noindent",
1328         "pagebreak_bottom",
1329         "pagebreak_top",
1330         "paragraph_spacing",
1331         "start_of_appendix",
1332     )
1333     # We cannot check for '\\' only because paragraphs may start e.g.
1334     # with '\\backslash'
1335     params = ""
1336     while lines[i][:1] == "\\" and lines[i][1:].split()[0] in par_params:
1337         params = params + " " + lines[i].strip()
1338         i = i + 1
1339     return params.strip()
1340
1341
1342 def lyxsize2latexsize(lyxsize):
1343     "Convert LyX font size to LaTeX fontsize."
1344     sizes = {
1345         "tiny": "tiny",
1346         "scriptsize": "scriptsize",
1347         "footnotesize": "footnotesize",
1348         "small": "small",
1349         "normal": "normalsize",
1350         "large": "large",
1351         "larger": "Large",
1352         "largest": "LARGE",
1353         "huge": "huge",
1354         "giant": "Huge",
1355     }
1356     if lyxsize in sizes:
1357         return "\\" + sizes[lyxsize]
1358     return ""
1359
1360
1361 def revert_breaks(document):
1362     """Change vspace insets, page breaks and lyxlines to paragraph options
1363     (if possible) or ERT"""
1364
1365     # Get default spaceamount
1366     i = find_token(document.header, "\\defskip", 0)
1367     if i == -1:
1368         defskipamount = "medskip"
1369     else:
1370         defskipamount = document.header[i].split()[1]
1371
1372     keys = {"\\begin_inset": "vspace", "\\lyxline": "lyxline", "\\newpage": "newpage"}
1373     keywords_top = {
1374         "vspace": "\\added_space_top",
1375         "lyxline": "\\line_top",
1376         "newpage": "\\pagebreak_top",
1377     }
1378     keywords_bot = {
1379         "vspace": "\\added_space_bottom",
1380         "lyxline": "\\line_bottom",
1381         "newpage": "\\pagebreak_bottom",
1382     }
1383     tokens = ["\\begin_inset VSpace", "\\lyxline", "\\newpage"]
1384
1385     # Convert the insets
1386     i = 0
1387     while True:
1388         i = find_tokens(document.body, tokens, i)
1389         if i == -1:
1390             return
1391
1392         # Are we at the beginning of a paragraph?
1393         paragraph_start = 1
1394         this_par = get_paragraph(document.body, i, document.format - 1)
1395         start = this_par + 1
1396         params = get_par_params(document.body, start)
1397         size = "normal"
1398         # Paragraph parameters may be on one or more lines.
1399         # Find the start of the real paragraph text.
1400         while document.body[start][:1] == "\\" and document.body[start].split()[0] in params:
1401             start = start + 1
1402         for k in range(start, i):
1403             if document.body[k].find("\\size") != -1:
1404                 # store font size
1405                 size = document.body[k].split()[1]
1406             elif is_nonempty_line(document.body[k]):
1407                 paragraph_start = 0
1408                 break
1409         # Find the end of the real paragraph text.
1410         next_par = get_next_paragraph(document.body, i, document.format - 1)
1411         if next_par == -1:
1412             document.warning("Malformed LyX document: Missing next paragraph.")
1413             i = i + 1
1414             continue
1415
1416         # first line of our insets
1417         inset_start = i
1418         # last line of our insets
1419         inset_end = inset_start
1420         # Are we at the end of a paragraph?
1421         paragraph_end = 1
1422         # start and end line numbers to delete if we convert this inset
1423         del_lines = list()
1424         # is this inset a lyxline above a paragraph?
1425         top = list()
1426         # raw inset information
1427         lines = list()
1428         # name of this inset
1429         insets = list()
1430         # font size of this inset
1431         sizes = list()
1432
1433         # Detect subsequent lyxline, vspace and pagebreak insets created by convert_breaks()
1434         n = 0
1435         k = inset_start
1436         while k < next_par:
1437             if find_tokens(document.body, tokens, k) == k:
1438                 # inset to convert
1439                 lines.append(document.body[k].split())
1440                 insets.append(keys[lines[n][0]])
1441                 del_lines.append([k, k])
1442                 top.append(0)
1443                 sizes.append(size)
1444                 n = n + 1
1445                 inset_end = k
1446             elif document.body[k].find("\\size") != -1:
1447                 # store font size
1448                 size = document.body[k].split()[1]
1449             elif find_token(document.body, "\\begin_inset ERT", k) == k:
1450                 ert_begin = find_token(document.body, "\\layout", k) + 1
1451                 if ert_begin == 0:
1452                     document.warning("Malformed LyX document: Missing '\\layout'.")
1453                     continue
1454                 ert_end = find_end_of_inset(document.body, k)
1455                 if ert_end == -1:
1456                     document.warning("Malformed LyX document: Missing '\\end_inset'.")
1457                     continue
1458                 ert = ert2latex(document.body[ert_begin:ert_end], document.format - 1)
1459                 if n > 0 and insets[n - 1] == "lyxline" and ert == "\\vspace{-1\\parskip}\n":
1460                     # vspace ERT created by convert_breaks() for top lyxline
1461                     top[n - 1] = 1
1462                     del_lines[n - 1][1] = ert_end
1463                     inset_end = ert_end
1464                     k = ert_end
1465                 else:
1466                     paragraph_end = 0
1467                     break
1468             elif (
1469                 n > 0
1470                 and insets[n - 1] == "vspace"
1471                 and find_token(document.body, "\\end_inset", k) == k
1472             ):
1473                 # ignore end of vspace inset
1474                 del_lines[n - 1][1] = k
1475                 inset_end = k
1476             elif is_nonempty_line(document.body[k]):
1477                 paragraph_end = 0
1478                 break
1479             k = k + 1
1480
1481         # Determine space amount for vspace insets
1482         spaceamount = list()
1483         arguments = list()
1484         for k in range(n):
1485             if insets[k] == "vspace":
1486                 spaceamount.append(lines[k][2])
1487                 arguments.append(" " + spaceamount[k] + " ")
1488             else:
1489                 spaceamount.append("")
1490                 arguments.append(" ")
1491
1492         # Can we convert to top paragraph parameters?
1493         before = 0
1494         if (
1495             (
1496                 n == 3
1497                 and insets[0] == "newpage"
1498                 and insets[1] == "vspace"
1499                 and insets[2] == "lyxline"
1500                 and top[2]
1501             )
1502             or (
1503                 n == 2
1504                 and (
1505                     (insets[0] == "newpage" and insets[1] == "vspace")
1506                     or (insets[0] == "newpage" and insets[1] == "lyxline" and top[1])
1507                     or (insets[0] == "vspace" and insets[1] == "lyxline" and top[1])
1508                 )
1509             )
1510             or (n == 1 and insets[0] == "lyxline" and top[0])
1511         ):
1512             # These insets have been created before a paragraph by
1513             # convert_breaks()
1514             before = 1
1515
1516         # Can we convert to bottom paragraph parameters?
1517         after = 0
1518         if (
1519             (
1520                 n == 3
1521                 and insets[0] == "lyxline"
1522                 and not top[0]
1523                 and insets[1] == "vspace"
1524                 and insets[2] == "newpage"
1525             )
1526             or (
1527                 n == 2
1528                 and (
1529                     (insets[0] == "lyxline" and not top[0] and insets[1] == "vspace")
1530                     or (insets[0] == "lyxline" and not top[0] and insets[1] == "newpage")
1531                     or (insets[0] == "vspace" and insets[1] == "newpage")
1532                 )
1533             )
1534             or (n == 1 and insets[0] == "lyxline" and not top[0])
1535         ):
1536             # These insets have been created after a paragraph by
1537             # convert_breaks()
1538             after = 1
1539
1540         if paragraph_start and paragraph_end:
1541             # We are in a paragraph of our own.
1542             # We must not delete this paragraph if it has parameters
1543             if params == "":
1544                 # First try to merge with the previous paragraph.
1545                 # We try the previous paragraph first because we would
1546                 # otherwise need ERT for two subsequent vspaces.
1547                 prev_par = get_paragraph(document.body, this_par - 1, document.format - 1) + 1
1548                 if prev_par > 0 and not before:
1549                     prev_params = get_par_params(document.body, prev_par + 1)
1550                     ert = 0
1551                     # determine font size
1552                     prev_size = "normal"
1553                     k = prev_par + 1
1554                     while (
1555                         document.body[k][:1] == "\\"
1556                         and document.body[k].split()[0] in prev_params
1557                     ):
1558                         k = k + 1
1559                     while k < this_par:
1560                         if document.body[k].find("\\size") != -1:
1561                             prev_size = document.body[k].split()[1]
1562                             break
1563                         elif document.body[k].find("\\begin_inset") != -1:
1564                             # skip insets
1565                             k = find_end_of_inset(document.body, k)
1566                         elif is_nonempty_line(document.body[k]):
1567                             break
1568                         k = k + 1
1569                     for k in range(n):
1570                         if keywords_bot[insets[k]] in prev_params or (
1571                             insets[k] == "lyxline" and sizes[k] != prev_size
1572                         ):
1573                             ert = 1
1574                             break
1575                     if not ert:
1576                         for k in range(n):
1577                             document.body.insert(
1578                                 prev_par + 1, keywords_bot[insets[k]] + arguments[k]
1579                             )
1580                         del document.body[this_par + n : next_par - 1 + n]
1581                         i = this_par + n
1582                         continue
1583                 # Then try next paragraph
1584                 if next_par > 0 and not after:
1585                     next_params = get_par_params(document.body, next_par + 1)
1586                     ert = 0
1587                     while (
1588                         document.body[k][:1] == "\\"
1589                         and document.body[k].split()[0] in next_params
1590                     ):
1591                         k = k + 1
1592                     # determine font size
1593                     next_size = "normal"
1594                     k = next_par + 1
1595                     while k < this_par:
1596                         if document.body[k].find("\\size") != -1:
1597                             next_size = document.body[k].split()[1]
1598                             break
1599                         elif is_nonempty_line(document.body[k]):
1600                             break
1601                         k = k + 1
1602                     for k in range(n):
1603                         if keywords_top[insets[k]] in next_params or (
1604                             insets[k] == "lyxline" and sizes[k] != next_size
1605                         ):
1606                             ert = 1
1607                             break
1608                     if not ert:
1609                         for k in range(n):
1610                             document.body.insert(
1611                                 next_par + 1, keywords_top[insets[k]] + arguments[k]
1612                             )
1613                         del document.body[this_par : next_par - 1]
1614                         i = this_par
1615                         continue
1616         elif paragraph_start or paragraph_end:
1617             # Convert to paragraph formatting if we are at the beginning or end
1618             # of a paragraph and the resulting paragraph would not be empty
1619             # The order is important: del and insert invalidate some indices
1620             if paragraph_start:
1621                 keywords = keywords_top
1622             else:
1623                 keywords = keywords_bot
1624             ert = 0
1625             for k in range(n):
1626                 if keywords[insets[k]] in params:
1627                     ert = 1
1628                     break
1629             if not ert:
1630                 for k in range(n):
1631                     document.body.insert(this_par + 1, keywords[insets[k]] + arguments[k])
1632                     for j in range(k, n):
1633                         del_lines[j][0] = del_lines[j][0] + 1
1634                         del_lines[j][1] = del_lines[j][1] + 1
1635                     del document.body[del_lines[k][0] : del_lines[k][1] + 1]
1636                     deleted = del_lines[k][1] - del_lines[k][0] + 1
1637                     for j in range(k + 1, n):
1638                         del_lines[j][0] = del_lines[j][0] - deleted
1639                         del_lines[j][1] = del_lines[j][1] - deleted
1640                 i = this_par
1641                 continue
1642
1643         # Convert the first inset to ERT.
1644         # The others are converted in the next loop runs (if they exist)
1645         if insets[0] == "vspace":
1646             document.body[i : i + 1] = [
1647                 "\\begin_inset ERT",
1648                 "status Collapsed",
1649                 "",
1650                 "\\layout %s" % document.default_layout,
1651                 "",
1652                 "\\backslash ",
1653             ]
1654             i = i + 6
1655             if spaceamount[0][-1] == "*":
1656                 spaceamount[0] = spaceamount[0][:-1]
1657                 keep = 1
1658             else:
1659                 keep = 0
1660
1661             # Replace defskip by the actual value
1662             if spaceamount[0] == "defskip":
1663                 spaceamount[0] = defskipamount
1664
1665             # LaTeX does not know \\smallskip* etc
1666             if keep:
1667                 if spaceamount[0] == "smallskip":
1668                     spaceamount[0] = "\\smallskipamount"
1669                 elif spaceamount[0] == "medskip":
1670                     spaceamount[0] = "\\medskipamount"
1671                 elif spaceamount[0] == "bigskip":
1672                     spaceamount[0] = "\\bigskipamount"
1673                 elif spaceamount[0] == "vfill":
1674                     spaceamount[0] = "\\fill"
1675
1676             # Finally output the LaTeX code
1677             if (
1678                 spaceamount[0] == "smallskip"
1679                 or spaceamount[0] == "medskip"
1680                 or spaceamount[0] == "bigskip"
1681                 or spaceamount[0] == "vfill"
1682             ):
1683                 document.body.insert(i, spaceamount[0] + "{}")
1684             else:
1685                 if keep:
1686                     document.body.insert(i, "vspace*{")
1687                 else:
1688                     document.body.insert(i, "vspace{")
1689                 i = convert_ertbackslash(
1690                     document.body,
1691                     i,
1692                     spaceamount[0],
1693                     document.format - 1,
1694                     document.default_layout,
1695                 )
1696                 document.body[i] = document.body[i] + "}"
1697             i = i + 1
1698         elif insets[0] == "lyxline":
1699             document.body[i] = ""
1700             latexsize = lyxsize2latexsize(size)
1701             if latexsize == "":
1702                 document.warning(
1703                     "Could not convert LyX fontsize '%s' to LaTeX font size." % size
1704                 )
1705                 latexsize = "\\normalsize"
1706             i = insert_ert(
1707                 document.body,
1708                 i,
1709                 "Collapsed",
1710                 "\\lyxline{%s}" % latexsize,
1711                 document.format - 1,
1712                 document.default_layout,
1713             )
1714             # We use \providecommand so that we don't get an error if native
1715             # lyxlines are used (LyX writes first its own preamble and then
1716             # the user specified one)
1717             add_to_preamble(
1718                 document,
1719                 [
1720                     "% Commands inserted by lyx2lyx for lyxlines",
1721                     "\\providecommand{\\lyxline}[1]{",
1722                     "  {#1 \\vspace{1ex} \\hrule width \\columnwidth \\vspace{1ex}}" "}",
1723                 ],
1724             )
1725         elif insets[0] == "newpage":
1726             document.body[i] = ""
1727             i = insert_ert(
1728                 document.body,
1729                 i,
1730                 "Collapsed",
1731                 "\\newpage{}",
1732                 document.format - 1,
1733                 document.default_layout,
1734             )
1735
1736
1737 # Convert a LyX length into a LaTeX length
1738 def convert_len(len, special):
1739     units = {
1740         "text%": "\\textwidth",
1741         "col%": "\\columnwidth",
1742         "page%": "\\pagewidth",
1743         "line%": "\\linewidth",
1744         "theight%": "\\textheight",
1745         "pheight%": "\\pageheight",
1746     }
1747
1748     # Convert special lengths
1749     if special != "none":
1750         len = "%f\\" % len2value(len) + special
1751
1752     # Convert LyX units to LaTeX units
1753     for unit in list(units.keys()):
1754         if len.find(unit) != -1:
1755             len = "%f" % (len2value(len) / 100) + units[unit]
1756             break
1757
1758     return len
1759
1760
1761 def convert_ertlen(body, i, len, special, format, default_layout):
1762     """Convert a LyX length into valid ERT code and append it to body[i]
1763     Return the (maybe incremented) line index i
1764     Convert backslashes and insert the converted length into body."""
1765     return convert_ertbackslash(body, i, convert_len(len, special), format, default_layout)
1766
1767
1768 def len2value(len):
1769     "Return the value of len without the unit in numerical form."
1770     result = re.search("([+-]?[0-9.]+)", len)
1771     if result:
1772         return float(result.group(1))
1773     # No number means 1.0
1774     return 1.0
1775
1776
1777 def insert_ert(body, i, status, text, format, default_layout):
1778     """Convert text to ERT and insert it at body[i]
1779     Return the index of the line after the inserted ERT"""
1780
1781     body[i:i] = ["\\begin_inset ERT", "status " + status, ""]
1782     i = i + 3
1783     if format <= 224:
1784         body[i:i] = ["\\layout %s" % default_layout, ""]
1785     else:
1786         body[i:i] = ["\\begin_layout %s" % default_layout, ""]
1787     i = i + 1  # i points now to the just created empty line
1788     i = convert_ertbackslash(body, i, text, format, default_layout) + 1
1789     if format > 224:
1790         body[i:i] = ["\\end_layout"]
1791         i = i + 1
1792     body[i:i] = ["", "\\end_inset", ""]
1793     i = i + 3
1794     return i
1795
1796
1797 def add_to_preamble(document, text):
1798     """Add text to the preamble if it is not already there.
1799     Only the first line is checked!"""
1800
1801     if find_token(document.preamble, text[0], 0) != -1:
1802         return
1803
1804     document.preamble.extend(text)
1805
1806
1807 def convert_frameless_box(document):
1808     "Convert frameless box."
1809     pos = ["t", "c", "b"]
1810     inner_pos = ["c", "t", "b", "s"]
1811     i = 0
1812     while True:
1813         i = find_token(document.body, "\\begin_inset Frameless", i)
1814         if i == -1:
1815             return
1816         j = find_end_of_inset(document.body, i)
1817         if j == -1:
1818             document.warning("Malformed LyX document: Missing '\\end_inset'.")
1819             i = i + 1
1820             continue
1821         del document.body[i]
1822         j = j - 1
1823
1824         # Gather parameters
1825         params = {
1826             "position": 0,
1827             "hor_pos": "c",
1828             "has_inner_box": "1",
1829             "inner_pos": 1,
1830             "use_parbox": "0",
1831             "width": "100col%",
1832             "special": "none",
1833             "height": "1in",
1834             "height_special": "totalheight",
1835             "collapsed": "false",
1836         }
1837         for key in list(params.keys()):
1838             value = get_value(document.body, key, i, j).replace('"', "")
1839             if value != "":
1840                 if key == "position":
1841                     # convert new to old position: 'position "t"' -> 0
1842                     value = find_token(pos, value, 0)
1843                     if value != -1:
1844                         params[key] = value
1845                 elif key == "inner_pos":
1846                     # convert inner position
1847                     value = find_token(inner_pos, value, 0)
1848                     if value != -1:
1849                         params[key] = value
1850                 else:
1851                     params[key] = value
1852                 j = del_token(document.body, key, i, j)
1853         i = i + 1
1854
1855         # Convert to minipage or ERT?
1856         # Note that the inner_position and height parameters of a minipage
1857         # inset are ignored and not accessible for the user, although they
1858         # are present in the file format and correctly read in and written.
1859         # Therefore we convert to ERT if they do not have their LaTeX
1860         # defaults. These are:
1861         # - the value of "position" for "inner_pos"
1862         # - "\totalheight"          for "height"
1863         if (
1864             params["use_parbox"] != "0"
1865             or params["has_inner_box"] != "1"
1866             or params["special"] != "none"
1867             or params["height_special"] != "totalheight"
1868             or len2value(params["height"]) != 1.0
1869         ):
1870             # Here we know that this box is not supported in file format 224.
1871             # Therefore we need to convert it to ERT. We can't simply convert
1872             # the beginning and end of the box to ERT, because the
1873             # box inset may contain layouts that are different from the
1874             # surrounding layout. After the conversion the contents of the
1875             # box inset is on the same level as the surrounding text, and
1876             # paragraph layouts and align parameters can get mixed up.
1877
1878             # A possible solution for this problem:
1879             # Convert the box to a minipage and redefine the minipage
1880             # environment in ERT so that the original box is simulated.
1881             # For minipages we could do this in a way that the width and
1882             # position can still be set from LyX, but this did not work well.
1883             # This is not possible for parboxes either, so we convert the
1884             # original box to ERT, put the minipage inset inside the box
1885             # and redefine the minipage environment to be empty.
1886
1887             # Commands that are independant of a particular box can go to
1888             # the preamble.
1889             # We need to define lyxtolyxrealminipage with 3 optional
1890             # arguments although LyX 1.3 uses only the first one.
1891             # Otherwise we will get LaTeX errors if this document is
1892             # converted to format 225 or above again (LyX 1.4 uses all
1893             # optional arguments).
1894             add_to_preamble(
1895                 document,
1896                 [
1897                     "% Commands inserted by lyx2lyx for frameless boxes",
1898                     "% Save the original minipage environment",
1899                     "\\let\\lyxtolyxrealminipage\\minipage",
1900                     "\\let\\endlyxtolyxrealminipage\\endminipage",
1901                     "% Define an empty lyxtolyximinipage environment",
1902                     "% with 3 optional arguments",
1903                     "\\newenvironment{lyxtolyxiiiminipage}[4]{}{}",
1904                     "\\newenvironment{lyxtolyxiiminipage}[2][\\lyxtolyxargi]%",
1905                     "  {\\begin{lyxtolyxiiiminipage}{\\lyxtolyxargi}{\\lyxtolyxargii}{#1}{#2}}%",
1906                     "  {\\end{lyxtolyxiiiminipage}}",
1907                     "\\newenvironment{lyxtolyximinipage}[1][\\totalheight]%",
1908                     "  {\\def\\lyxtolyxargii{{#1}}\\begin{lyxtolyxiiminipage}}%",
1909                     "  {\\end{lyxtolyxiiminipage}}",
1910                     "\\newenvironment{lyxtolyxminipage}[1][c]%",
1911                     "  {\\def\\lyxtolyxargi{{#1}}\\begin{lyxtolyximinipage}}",
1912                     "  {\\end{lyxtolyximinipage}}",
1913                 ],
1914             )
1915
1916             if params["use_parbox"] != "0":
1917                 ert = "\\parbox"
1918             else:
1919                 ert = "\\begin{lyxtolyxrealminipage}"
1920
1921             # convert optional arguments only if not latex default
1922             if (
1923                 pos[params["position"]] != "c"
1924                 or inner_pos[params["inner_pos"]] != pos[params["position"]]
1925                 or params["height_special"] != "totalheight"
1926                 or len2value(params["height"]) != 1.0
1927             ):
1928                 ert = ert + "[" + pos[params["position"]] + "]"
1929             if (
1930                 inner_pos[params["inner_pos"]] != pos[params["position"]]
1931                 or params["height_special"] != "totalheight"
1932                 or len2value(params["height"]) != 1.0
1933             ):
1934                 ert = ert + "[" + convert_len(params["height"], params["height_special"]) + "]"
1935             if inner_pos[params["inner_pos"]] != pos[params["position"]]:
1936                 ert = ert + "[" + inner_pos[params["inner_pos"]] + "]"
1937
1938             ert = ert + "{" + convert_len(params["width"], params["special"]) + "}"
1939
1940             if params["use_parbox"] != "0":
1941                 ert = ert + "{"
1942             ert = ert + "\\let\\minipage\\lyxtolyxminipage%\n"
1943             ert = ert + "\\let\\endminipage\\endlyxtolyxminipage%\n"
1944
1945             old_i = i
1946             i = insert_ert(
1947                 document.body,
1948                 i,
1949                 "Collapsed",
1950                 ert,
1951                 document.format - 1,
1952                 document.default_layout,
1953             )
1954             j = j + i - old_i - 1
1955
1956             document.body[i:i] = [
1957                 "\\begin_inset Minipage",
1958                 "position %d" % params["position"],
1959                 "inner_position 1",
1960                 'height "1in"',
1961                 'width "' + params["width"] + '"',
1962                 "collapsed " + params["collapsed"],
1963             ]
1964             i = i + 6
1965             j = j + 6
1966
1967             # Restore the original minipage environment since we may have
1968             # minipages inside this box.
1969             # Start a new paragraph because the following may be nonstandard
1970             document.body[i:i] = ["\\layout %s" % document.default_layout, "", ""]
1971             i = i + 2
1972             j = j + 3
1973             ert = "\\let\\minipage\\lyxtolyxrealminipage%\n"
1974             ert = ert + "\\let\\endminipage\\lyxtolyxrealendminipage%"
1975             old_i = i
1976             i = insert_ert(
1977                 document.body,
1978                 i,
1979                 "Collapsed",
1980                 ert,
1981                 document.format - 1,
1982                 document.default_layout,
1983             )
1984             j = j + i - old_i - 1
1985
1986             # Redefine the minipage end before the inset end.
1987             # Start a new paragraph because the previous may be nonstandard
1988             document.body[j:j] = ["\\layout %s" % document.default_layout, "", ""]
1989             j = j + 2
1990             ert = "\\let\\endminipage\\endlyxtolyxminipage"
1991             j = insert_ert(
1992                 document.body,
1993                 j,
1994                 "Collapsed",
1995                 ert,
1996                 document.format - 1,
1997                 document.default_layout,
1998             )
1999             j = j + 1
2000             document.body.insert(j, "")
2001             j = j + 1
2002
2003             # LyX writes '%\n' after each box. Therefore we need to end our
2004             # ERT with '%\n', too, since this may swallow a following space.
2005             if params["use_parbox"] != "0":
2006                 ert = "}%\n"
2007             else:
2008                 ert = "\\end{lyxtolyxrealminipage}%\n"
2009             j = insert_ert(
2010                 document.body,
2011                 j,
2012                 "Collapsed",
2013                 ert,
2014                 document.format - 1,
2015                 document.default_layout,
2016             )
2017
2018             # We don't need to restore the original minipage after the inset
2019             # end because the scope of the redefinition is the original box.
2020
2021         else:
2022             # Convert to minipage
2023             document.body[i:i] = [
2024                 "\\begin_inset Minipage",
2025                 "position %d" % params["position"],
2026                 "inner_position %d" % params["inner_pos"],
2027                 'height "' + params["height"] + '"',
2028                 'width "' + params["width"] + '"',
2029                 "collapsed " + params["collapsed"],
2030             ]
2031             i = i + 6
2032
2033
2034 def remove_branches(document):
2035     "Remove branches."
2036     i = 0
2037     while True:
2038         i = find_token(document.header, "\\branch", i)
2039         if i == -1:
2040             break
2041         document.warning("Removing branch %s." % document.header[i].split()[1])
2042         j = find_token(document.header, "\\end_branch", i)
2043         if j == -1:
2044             document.warning("Malformed LyX document: Missing '\\end_branch'.")
2045             break
2046         del document.header[i : j + 1]
2047
2048     i = 0
2049     while True:
2050         i = find_token(document.body, "\\begin_inset Branch", i)
2051         if i == -1:
2052             return
2053         j = find_end_of_inset(document.body, i)
2054         if j == -1:
2055             document.warning("Malformed LyX document: Missing '\\end_inset'.")
2056             i = i + 1
2057             continue
2058         del document.body[i]
2059         del document.body[j - 1]
2060         # Seach for a line starting 'collapsed'
2061         # If, however, we find a line starting '\layout'
2062         # (_always_ present) then break with a warning message
2063         collapsed_found = 0
2064         while True:
2065             if document.body[i][:9] == "collapsed":
2066                 del document.body[i]
2067                 collapsed_found = 1
2068                 continue
2069             elif document.body[i][:7] == "\\layout":
2070                 if collapsed_found == 0:
2071                     document.warning("Malformed LyX document: Missing 'collapsed'.")
2072                 # Delete this new paragraph, since it would not appear in
2073                 # .tex output. This avoids also empty paragraphs.
2074                 del document.body[i]
2075                 break
2076             i = i + 1
2077
2078
2079 def convert_jurabib(document):
2080     "Convert jurabib."
2081     i = find_token(document.header, "\\use_numerical_citations", 0)
2082     if i == -1:
2083         document.warning("Malformed lyx document: Missing '\\use_numerical_citations'.")
2084         return
2085     document.header.insert(i + 1, "\\use_jurabib 0")
2086
2087
2088 def revert_jurabib(document):
2089     "Revert jurabib."
2090     i = find_token(document.header, "\\use_jurabib", 0)
2091     if i == -1:
2092         document.warning("Malformed lyx document: Missing '\\use_jurabib'.")
2093         return
2094     if get_value(document.header, "\\use_jurabib", 0) != "0":
2095         document.warning("Conversion of '\\use_jurabib = 1' not yet implemented.")
2096         # Don't remove '\\use_jurabib' so that people will get warnings by lyx
2097         return
2098     del document.header[i]
2099
2100
2101 def convert_bibtopic(document):
2102     "Convert bibtopic."
2103     i = find_token(document.header, "\\use_jurabib", 0)
2104     if i == -1:
2105         document.warning("Malformed lyx document: Missing '\\use_jurabib'.")
2106         return
2107     document.header.insert(i + 1, "\\use_bibtopic 0")
2108
2109
2110 def revert_bibtopic(document):
2111     "Revert bibtopic."
2112     i = find_token(document.header, "\\use_bibtopic", 0)
2113     if i == -1:
2114         document.warning("Malformed lyx document: Missing '\\use_bibtopic'.")
2115         return
2116     if get_value(document.header, "\\use_bibtopic", 0) != "0":
2117         document.warning("Conversion of '\\use_bibtopic = 1' not yet implemented.")
2118         # Don't remove '\\use_jurabib' so that people will get warnings by lyx
2119     del document.header[i]
2120
2121
2122 def convert_float(document):
2123     "Convert sideway floats."
2124     i = 0
2125     while True:
2126         i = find_token_exact(document.body, "\\begin_inset Float", i)
2127         if i == -1:
2128             return
2129         # Seach for a line starting 'wide'
2130         # If, however, we find a line starting '\begin_layout'
2131         # (_always_ present) then break with a warning message
2132         i = i + 1
2133         while True:
2134             if document.body[i][:4] == "wide":
2135                 document.body.insert(i + 1, "sideways false")
2136                 break
2137             elif document.body[i][:13] == "\\begin_layout":
2138                 document.warning("Malformed lyx document: Missing 'wide'.")
2139                 break
2140             i = i + 1
2141         i = i + 1
2142
2143
2144 def revert_float(document):
2145     "Revert sideways floats."
2146     i = 0
2147     while True:
2148         i = find_token_exact(document.body, "\\begin_inset Float", i)
2149         if i == -1:
2150             return
2151         line = document.body[i]
2152         r = re.compile(r"\\begin_inset Float (.*)$")
2153         m = r.match(line)
2154         floattype = m.group(1)
2155         if floattype != "figure" and floattype != "table":
2156             i = i + 1
2157             continue
2158         j = find_end_of_inset(document.body, i)
2159         if j == -1:
2160             document.warning("Malformed lyx document: Missing '\\end_inset'.")
2161             i = i + 1
2162             continue
2163         if get_value(document.body, "sideways", i, j) != "false":
2164             l = find_token(document.body, "\\begin_layout Standard", i + 1, j)
2165             if l == -1:
2166                 document.warning(
2167                     "Malformed LyX document: Missing `\\begin_layout Standard' in Float inset."
2168                 )
2169                 return
2170             document.body[j] = (
2171                 "\\layout Standard\n\\begin_inset ERT\nstatus Collapsed\n\n"
2172                 "\\layout Standard\n\n\n\\backslash\n"
2173                 "end{sideways" + floattype + "}\n\n\\end_inset\n"
2174             )
2175             del document.body[i + 1 : l - 1]
2176             document.body[i] = (
2177                 "\\begin_inset ERT\nstatus Collapsed\n\n"
2178                 "\\layout Standard\n\n\n\\backslash\n"
2179                 "begin{sideways" + floattype + "}\n\n\\end_inset\n\n"
2180             )
2181             add_to_preamble(document, ["\\usepackage{rotfloat}\n"])
2182             i = i + 1
2183             continue
2184         del_token(document.body, "sideways", i, j)
2185         i = i + 1
2186
2187
2188 def convert_graphics(document):
2189     """Add extension to documentnames of insetgraphics if necessary."""
2190     i = 0
2191     while True:
2192         i = find_token(document.body, "\\begin_inset Graphics", i)
2193         if i == -1:
2194             return
2195
2196         j = find_token_exact(document.body, "documentname", i)
2197         if j == -1:
2198             return
2199         i = i + 1
2200         filename = document.body[j].split()[1]
2201         if document.dir == "" and not os.path.isabs(filename):
2202             # We don't know the directory and cannot check the document.
2203             # We could use a heuristic and take the current directory,
2204             # and we could try to find out if documentname has an extension,
2205             # but that would be just guesses and could be wrong.
2206             document.warning(
2207                 """Warning: Cannot determine whether document
2208          %s
2209          needs an extension when reading from standard input.
2210          You may need to correct the document manually or run
2211          lyx2lyx again with the .lyx document as commandline argument."""
2212                 % filename
2213             )
2214             continue
2215         absname = os.path.normpath(os.path.join(document.dir, filename))
2216         # This needs to be the same algorithm as in pre 233 insetgraphics
2217         if access(absname, F_OK):
2218             continue
2219         if access(absname + ".ps", F_OK):
2220             document.body[j] = document.body[j].replace(filename, filename + ".ps")
2221             continue
2222         if access(absname + ".eps", F_OK):
2223             document.body[j] = document.body[j].replace(filename, filename + ".eps")
2224
2225
2226 def convert_names(document):
2227     """Convert in the docbook backend from firstname and surname style
2228     to charstyles.
2229     """
2230     if document.backend != "docbook":
2231         return
2232
2233     i = 0
2234
2235     while True:
2236         i = find_token(document.body, "\\begin_layout Author", i)
2237         if i == -1:
2238             return
2239
2240         i = i + 1
2241         while document.body[i] == "":
2242             i = i + 1
2243
2244         if (
2245             document.body[i][:11] != "\\end_layout"
2246             or document.body[i + 2][:13] != "\\begin_deeper"
2247         ):
2248             i = i + 1
2249             continue
2250
2251         k = i
2252         i = find_end_of(document.body, i + 3, "\\begin_deeper", "\\end_deeper")
2253         if i == -1:
2254             # something is really wrong, abort
2255             document.warning("Missing \\end_deeper, after style Author.")
2256             document.warning("Aborted attempt to parse FirstName and Surname.")
2257             return
2258         firstname, surname = "", ""
2259
2260         name = document.body[k:i]
2261
2262         j = find_token(name, "\\begin_layout FirstName", 0)
2263         if j != -1:
2264             j = j + 1
2265             while name[j] != "\\end_layout":
2266                 firstname = firstname + name[j]
2267                 j = j + 1
2268
2269         j = find_token(name, "\\begin_layout Surname", 0)
2270         if j != -1:
2271             j = j + 1
2272             while name[j] != "\\end_layout":
2273                 surname = surname + name[j]
2274                 j = j + 1
2275
2276         # delete name
2277         del document.body[k + 2 : i + 1]
2278
2279         document.body[k - 1 : k - 1] = [
2280             "",
2281             "",
2282             "\\begin_inset CharStyle Firstname",
2283             "status inlined",
2284             "",
2285             "\\begin_layout %s" % document.default_layout,
2286             "",
2287             "%s" % firstname,
2288             r"\end_layout",
2289             "",
2290             r"\end_inset",
2291             "",
2292             "",
2293             "\\begin_inset CharStyle Surname",
2294             "status inlined",
2295             "",
2296             "\\begin_layout %s" % document.default_layout,
2297             "",
2298             "%s" % surname,
2299             "\\end_layout",
2300             "",
2301             "\\end_inset",
2302             "",
2303         ]
2304
2305
2306 def revert_names(document):
2307     """Revert in the docbook backend from firstname and surname char style
2308     to styles.
2309     """
2310     if document.backend != "docbook":
2311         return
2312
2313
2314 def convert_cite_engine(document):
2315     r"""\use_natbib 1                       \cite_engine <style>
2316     \use_numerical_citations 0     ->   where <style> is one of
2317     \use_jurabib 0                      "basic", "natbib_authoryear","""
2318
2319     a = find_token(document.header, "\\use_natbib", 0)
2320     if a == -1:
2321         document.warning("Malformed lyx document: Missing '\\use_natbib'.")
2322         return
2323
2324     b = find_token(document.header, "\\use_numerical_citations", 0)
2325     if b == -1 or b != a + 1:
2326         document.warning("Malformed lyx document: Missing '\\use_numerical_citations'.")
2327         return
2328
2329     c = find_token(document.header, "\\use_jurabib", 0)
2330     if c == -1 or c != b + 1:
2331         document.warning("Malformed lyx document: Missing '\\use_jurabib'.")
2332         return
2333
2334     use_natbib = int(document.header[a].split()[1])
2335     use_numerical_citations = int(document.header[b].split()[1])
2336     use_jurabib = int(document.header[c].split()[1])
2337
2338     cite_engine = "basic"
2339     if use_natbib:
2340         if use_numerical_citations:
2341             cite_engine = "natbib_numerical"
2342         else:
2343             cite_engine = "natbib_authoryear"
2344     elif use_jurabib:
2345         cite_engine = "jurabib"
2346
2347     del document.header[a : c + 1]
2348     document.header.insert(a, "\\cite_engine " + cite_engine)
2349
2350
2351 def revert_cite_engine(document):
2352     "Revert the cite engine."
2353     i = find_token(document.header, "\\cite_engine", 0)
2354     if i == -1:
2355         document.warning("Malformed lyx document: Missing '\\cite_engine'.")
2356         return
2357
2358     cite_engine = document.header[i].split()[1]
2359
2360     use_natbib = "0"
2361     use_numerical = "0"
2362     use_jurabib = "0"
2363     if cite_engine == "natbib_numerical":
2364         use_natbib = "1"
2365         use_numerical = "1"
2366     elif cite_engine == "natbib_authoryear":
2367         use_natbib = "1"
2368     elif cite_engine == "jurabib":
2369         use_jurabib = "1"
2370
2371     del document.header[i]
2372     document.header.insert(i, "\\use_jurabib " + use_jurabib)
2373     document.header.insert(i, "\\use_numerical_citations " + use_numerical)
2374     document.header.insert(i, "\\use_natbib " + use_natbib)
2375
2376
2377 def convert_paperpackage(document):
2378     "Convert paper package."
2379     i = find_token(document.header, "\\paperpackage", 0)
2380     if i == -1:
2381         return
2382
2383     packages = {
2384         "default": "none",
2385         "a4": "none",
2386         "a4wide": "a4",
2387         "widemarginsa4": "a4wide",
2388     }
2389     if len(document.header[i].split()) > 1:
2390         paperpackage = document.header[i].split()[1]
2391         document.header[i] = document.header[i].replace(paperpackage, packages[paperpackage])
2392     else:
2393         document.header[i] = document.header[i] + " widemarginsa4"
2394
2395
2396 def revert_paperpackage(document):
2397     "Revert paper package."
2398     i = find_token(document.header, "\\paperpackage", 0)
2399     if i == -1:
2400         return
2401
2402     packages = {
2403         "none": "a4",
2404         "a4": "a4wide",
2405         "a4wide": "widemarginsa4",
2406         "widemarginsa4": "",
2407         "default": "default",
2408     }
2409     if len(document.header[i].split()) > 1:
2410         paperpackage = document.header[i].split()[1]
2411     else:
2412         paperpackage = "default"
2413     document.header[i] = document.header[i].replace(paperpackage, packages[paperpackage])
2414
2415
2416 def convert_bullets(document):
2417     "Convert bullets."
2418     i = 0
2419     while True:
2420         i = find_token(document.header, "\\bullet", i)
2421         if i == -1:
2422             return
2423         if document.header[i][:12] == "\\bulletLaTeX":
2424             document.header[i] = document.header[i] + " " + document.header[i + 1].strip()
2425             n = 3
2426         else:
2427             document.header[i] = (
2428                 document.header[i]
2429                 + " "
2430                 + document.header[i + 1].strip()
2431                 + " "
2432                 + document.header[i + 2].strip()
2433                 + " "
2434                 + document.header[i + 3].strip()
2435             )
2436             n = 5
2437         del document.header[i + 1 : i + n]
2438         i = i + 1
2439
2440
2441 def revert_bullets(document):
2442     "Revert bullets."
2443     i = 0
2444     while True:
2445         i = find_token(document.header, "\\bullet", i)
2446         if i == -1:
2447             return
2448         if document.header[i][:12] == "\\bulletLaTeX":
2449             n = document.header[i].find('"')
2450             if n == -1:
2451                 document.warning("Malformed header.")
2452                 return
2453             else:
2454                 document.header[i : i + 1] = [
2455                     document.header[i][: n - 1],
2456                     "\t" + document.header[i][n:],
2457                     "\\end_bullet",
2458                 ]
2459             i = i + 3
2460         else:
2461             frag = document.header[i].split()
2462             if len(frag) != 5:
2463                 document.warning("Malformed header.")
2464                 return
2465             else:
2466                 document.header[i : i + 1] = [
2467                     frag[0] + " " + frag[1],
2468                     "\t" + frag[2],
2469                     "\t" + frag[3],
2470                     "\t" + frag[4],
2471                     "\\end_bullet",
2472                 ]
2473                 i = i + 5
2474
2475
2476 def add_begin_header(document):
2477     r"Add \begin_header and \begin_document."
2478     i = find_token(document.header, "\\lyxformat", 0)
2479     document.header.insert(i + 1, "\\begin_header")
2480     document.header.insert(i + 1, "\\begin_document")
2481
2482
2483 def remove_begin_header(document):
2484     r"Remove \begin_header and \begin_document."
2485     i = find_token(document.header, "\\begin_document", 0)
2486     if i != -1:
2487         del document.header[i]
2488     i = find_token(document.header, "\\begin_header", 0)
2489     if i != -1:
2490         del document.header[i]
2491
2492
2493 def add_begin_body(document):
2494     r"Add and \begin_document and \end_document"
2495     document.body.insert(0, "\\begin_body")
2496     document.body.insert(1, "")
2497     i = find_token(document.body, "\\end_document", 0)
2498     document.body.insert(i, "\\end_body")
2499
2500
2501 def remove_begin_body(document):
2502     r"Remove \begin_body and \end_body"
2503     i = find_token(document.body, "\\begin_body", 0)
2504     if i != -1:
2505         del document.body[i]
2506         if not document.body[i]:
2507             del document.body[i]
2508     i = find_token(document.body, "\\end_body", 0)
2509     if i != -1:
2510         del document.body[i]
2511
2512
2513 def normalize_papersize(document):
2514     r"Normalize \papersize"
2515     i = find_token(document.header, "\\papersize", 0)
2516     if i == -1:
2517         return
2518
2519     tmp = document.header[i].split()
2520     if tmp[1] == "Default":
2521         document.header[i] = "\\papersize default"
2522         return
2523     if tmp[1] == "Custom":
2524         document.header[i] = "\\papersize custom"
2525
2526
2527 def denormalize_papersize(document):
2528     r"Revert \papersize"
2529     i = find_token(document.header, "\\papersize", 0)
2530     if i == -1:
2531         return
2532
2533     tmp = document.header[i].split()
2534     if tmp[1] == "custom":
2535         document.header[i] = "\\papersize Custom"
2536
2537
2538 def strip_end_space(document):
2539     "Strip spaces at end of command line."
2540     for i in range(len(document.body)):
2541         if document.body[i][:1] == "\\":
2542             document.body[i] = document.body[i].strip()
2543
2544
2545 def use_x_boolean(document):
2546     r"Use boolean values for \use_geometry, \use_bibtopic and \tracking_changes"
2547     bin2bool = {"0": "false", "1": "true"}
2548     for use in "\\use_geometry", "\\use_bibtopic", "\\tracking_changes":
2549         i = find_token(document.header, use, 0)
2550         if i == -1:
2551             continue
2552         decompose = document.header[i].split()
2553         document.header[i] = decompose[0] + " " + bin2bool[decompose[1]]
2554
2555
2556 def use_x_binary(document):
2557     r"Use digit values for \use_geometry, \use_bibtopic and \tracking_changes"
2558     bool2bin = {"false": "0", "true": "1"}
2559     for use in "\\use_geometry", "\\use_bibtopic", "\\tracking_changes":
2560         i = find_token(document.header, use, 0)
2561         if i == -1:
2562             continue
2563         decompose = document.header[i].split()
2564         document.header[i] = decompose[0] + " " + bool2bin[decompose[1]]
2565
2566
2567 def normalize_paragraph_params(document):
2568     "Place all the paragraph parameters in their own line."
2569     body = document.body
2570
2571     allowed_parameters = (
2572         "\\paragraph_spacing",
2573         "\\noindent",
2574         "\\align",
2575         "\\labelwidthstring",
2576         "\\start_of_appendix",
2577         "\\leftindent",
2578     )
2579
2580     i = 0
2581     while True:
2582         i = find_token(document.body, "\\begin_layout", i)
2583         if i == -1:
2584             return
2585
2586         i = i + 1
2587         while True:
2588             if body[i].strip() and body[i].split()[0] not in allowed_parameters:
2589                 break
2590
2591             j = body[i].find("\\", 1)
2592
2593             if j != -1:
2594                 body[i : i + 1] = [body[i][:j].strip(), body[i][j:]]
2595
2596             i = i + 1
2597
2598
2599 def convert_output_changes(document):
2600     "Add output_changes parameter."
2601     i = find_token(document.header, "\\tracking_changes", 0)
2602     if i == -1:
2603         document.warning("Malformed lyx document: Missing '\\tracking_changes'.")
2604         return
2605     document.header.insert(i + 1, "\\output_changes true")
2606
2607
2608 def revert_output_changes(document):
2609     "Remove output_changes parameter."
2610     i = find_token(document.header, "\\output_changes", 0)
2611     if i == -1:
2612         return
2613     del document.header[i]
2614
2615
2616 def convert_ert_paragraphs(document):
2617     "Convert paragraph breaks and sanitize paragraphs."
2618     forbidden_settings = [
2619         # paragraph parameters
2620         "\\paragraph_spacing",
2621         "\\labelwidthstring",
2622         "\\start_of_appendix",
2623         "\\noindent",
2624         "\\leftindent",
2625         "\\align",
2626         # font settings
2627         "\\family",
2628         "\\series",
2629         "\\shape",
2630         "\\size",
2631         "\\emph",
2632         "\\numeric",
2633         "\\bar",
2634         "\\noun",
2635         "\\color",
2636         "\\lang",
2637     ]
2638     i = 0
2639     while True:
2640         i = find_token(document.body, "\\begin_inset ERT", i)
2641         if i == -1:
2642             return
2643         j = find_end_of_inset(document.body, i)
2644         if j == -1:
2645             document.warning("Malformed lyx document: Missing '\\end_inset'.")
2646             i = i + 1
2647             continue
2648
2649         # convert non-standard paragraphs to standard
2650         k = i
2651         while True:
2652             k = find_token(document.body, "\\begin_layout", k, j)
2653             if k == -1:
2654                 break
2655             document.body[k] = "\\begin_layout %s" % document.default_layout
2656             k = k + 1
2657
2658         # remove all paragraph parameters and font settings
2659         k = i
2660         while k < j:
2661             if document.body[k].strip() and document.body[k].split()[0] in forbidden_settings:
2662                 del document.body[k]
2663                 j = j - 1
2664             else:
2665                 k = k + 1
2666
2667         # insert an empty paragraph before each paragraph but the first
2668         k = i
2669         first_pagraph = 1
2670         while True:
2671             k = find_token(document.body, "\\begin_layout", k, j)
2672             if k == -1:
2673                 break
2674             if first_pagraph:
2675                 first_pagraph = 0
2676                 k = k + 1
2677                 continue
2678             document.body[k:k] = [
2679                 "\\begin_layout %s" % document.default_layout,
2680                 "",
2681                 "\\end_layout",
2682                 "",
2683             ]
2684             k = k + 5
2685             j = j + 4
2686
2687         # convert \\newline to new paragraph
2688         k = i
2689         while True:
2690             k = find_token(document.body, "\\newline", k, j)
2691             if k == -1:
2692                 break
2693             document.body[k : k + 1] = [
2694                 "\\end_layout",
2695                 "",
2696                 "\\begin_layout %s" % document.default_layout,
2697             ]
2698             k = k + 3
2699             j = j + 2
2700             # We need an empty line if document.default_layout == ''
2701             if document.body[k] != "":
2702                 document.body.insert(k, "")
2703                 k = k + 1
2704                 j = j + 1
2705         i = i + 1
2706
2707
2708 def revert_ert_paragraphs(document):
2709     "Remove double paragraph breaks."
2710     i = 0
2711     while True:
2712         i = find_token(document.body, "\\begin_inset ERT", i)
2713         if i == -1:
2714             return
2715         j = find_end_of_inset(document.body, i)
2716         if j == -1:
2717             document.warning("Malformed lyx document: Missing '\\end_inset'.")
2718             i = i + 1
2719             continue
2720
2721         # replace paragraph breaks with \newline
2722         k = i
2723         while True:
2724             k = find_token(document.body, "\\end_layout", k, j)
2725             l = find_token(document.body, "\\begin_layout", k, j)
2726             if k == -1 or l == -1:
2727                 break
2728             document.body[k : l + 1] = ["\\newline"]
2729             j = j - l + k
2730             k = k + 1
2731
2732         # replace double \newlines with paragraph breaks
2733         k = i
2734         while True:
2735             k = find_token(document.body, "\\newline", k, j)
2736             if k == -1:
2737                 break
2738             l = k + 1
2739             while document.body[l] == "":
2740                 l = l + 1
2741             if document.body[l].strip() and document.body[l].split()[0] == "\\newline":
2742                 document.body[k : l + 1] = [
2743                     "\\end_layout",
2744                     "",
2745                     "\\begin_layout %s" % document.default_layout,
2746                 ]
2747                 j = j - l + k + 2
2748                 k = k + 3
2749                 # We need an empty line if document.default_layout == ''
2750                 if document.body[l + 1] != "":
2751                     document.body.insert(l + 1, "")
2752                     k = k + 1
2753                     j = j + 1
2754             else:
2755                 k = k + 1
2756         i = i + 1
2757
2758
2759 def convert_french(document):
2760     "Convert frenchb."
2761     regexp = re.compile(r"^\\language\s+frenchb")
2762     i = find_re(document.header, regexp, 0)
2763     if i != -1:
2764         document.header[i] = "\\language french"
2765
2766     # Change language in the document body
2767     regexp = re.compile(r"^\\lang\s+frenchb")
2768     i = 0
2769     while True:
2770         i = find_re(document.body, regexp, i)
2771         if i == -1:
2772             break
2773         document.body[i] = "\\lang french"
2774         i = i + 1
2775
2776
2777 def remove_paperpackage(document):
2778     "Remove paper package."
2779     i = find_token(document.header, "\\paperpackage", 0)
2780
2781     if i == -1:
2782         return
2783
2784     paperpackage = document.header[i].split()[1]
2785
2786     del document.header[i]
2787
2788     if paperpackage not in ("a4", "a4wide", "widemarginsa4"):
2789         return
2790
2791     conv = {
2792         "a4": "\\usepackage{a4}",
2793         "a4wide": "\\usepackage{a4wide}",
2794         "widemarginsa4": "\\usepackage[widemargins]{a4}",
2795     }
2796     # for compatibility we ensure it is the first entry in preamble
2797     document.preamble[0:0] = [conv[paperpackage]]
2798
2799     i = find_token(document.header, "\\papersize", 0)
2800     if i != -1:
2801         document.header[i] = "\\papersize default"
2802
2803
2804 def remove_quotestimes(document):
2805     "Remove quotestimes."
2806     i = find_token(document.header, "\\quotes_times", 0)
2807     if i == -1:
2808         return
2809     del document.header[i]
2810
2811
2812 def convert_sgml_paragraphs(document):
2813     "Convert SGML paragraphs."
2814     if document.backend != "docbook":
2815         return
2816
2817     i = 0
2818     while True:
2819         i = find_token(document.body, "\\begin_layout SGML", i)
2820
2821         if i == -1:
2822             return
2823
2824         document.body[i] = "\\begin_layout Standard"
2825         j = find_token(document.body, "\\end_layout", i)
2826
2827         document.body[j + 1 : j + 1] = ["", "\\end_inset", "", "", "\\end_layout"]
2828         document.body[i + 1 : i + 1] = [
2829             "\\begin_inset ERT",
2830             "status inlined",
2831             "",
2832             "\\begin_layout Standard",
2833             "",
2834         ]
2835
2836         i = i + 10
2837
2838
2839 ##
2840 # Conversion hub
2841 #
2842
2843 supported_versions = ["1.4.%d" % i for i in range(3)] + ["1.4"]
2844 convert = [
2845     [222, [insert_tracking_changes, add_end_header, convert_amsmath]],
2846     [223, [remove_color_default, convert_spaces, convert_bibtex, remove_insetparent]],
2847     [224, [convert_external, convert_comment]],
2848     [
2849         225,
2850         [
2851             add_end_layout,
2852             layout2begin_layout,
2853             convert_end_document,
2854             convert_table_valignment_middle,
2855             convert_breaks,
2856         ],
2857     ],
2858     [226, [convert_note]],
2859     [227, [convert_box]],
2860     [228, [convert_collapsible, convert_ert]],
2861     [229, [convert_minipage]],
2862     [230, [convert_jurabib]],
2863     [231, [convert_float]],
2864     [232, [convert_bibtopic]],
2865     [233, [convert_graphics, convert_names]],
2866     [234, [convert_cite_engine]],
2867     [235, [convert_paperpackage]],
2868     [
2869         236,
2870         [
2871             convert_bullets,
2872             add_begin_header,
2873             add_begin_body,
2874             normalize_papersize,
2875             strip_end_space,
2876         ],
2877     ],
2878     [237, [use_x_boolean]],
2879     [238, [update_latexaccents]],
2880     [239, [normalize_paragraph_params]],
2881     [240, [convert_output_changes]],
2882     [241, [convert_ert_paragraphs]],
2883     [242, [convert_french]],
2884     [243, [remove_paperpackage]],
2885     [244, [rename_spaces]],
2886     [245, [remove_quotestimes, convert_sgml_paragraphs]],
2887 ]
2888
2889 revert = [
2890     [244, []],
2891     [243, [revert_space_names]],
2892     [242, []],
2893     [241, []],
2894     [240, [revert_ert_paragraphs]],
2895     [239, [revert_output_changes]],
2896     [238, []],
2897     [237, []],
2898     [236, [use_x_binary]],
2899     [
2900         235,
2901         [denormalize_papersize, remove_begin_body, remove_begin_header, revert_bullets],
2902     ],
2903     [234, [revert_paperpackage]],
2904     [233, [revert_cite_engine]],
2905     [232, [revert_names]],
2906     [231, [revert_bibtopic]],
2907     [230, [revert_float]],
2908     [229, [revert_jurabib]],
2909     [228, []],
2910     [227, [revert_collapsible, revert_ert]],
2911     [226, [revert_box, revert_external_2]],
2912     [225, [revert_note]],
2913     [
2914         224,
2915         [
2916             rm_end_layout,
2917             begin_layout2layout,
2918             revert_end_document,
2919             revert_valignment_middle,
2920             revert_breaks,
2921             convert_frameless_box,
2922             remove_branches,
2923         ],
2924     ],
2925     [223, [revert_external_2, revert_comment, revert_eqref]],
2926     [222, [revert_spaces, revert_bibtex]],
2927     [221, [revert_amsmath, rm_end_header, rm_tracking_changes, rm_body_changes]],
2928 ]
2929
2930
2931 if __name__ == "__main__":
2932     pass