]> git.lyx.org Git - lyx.git/blob - lib/lyx2lyx/lyx_2_3.py
Fix output of en- and em-dashes with TeX fonts
[lyx.git] / lib / lyx2lyx / lyx_2_3.py
1 # -*- coding: utf-8 -*-
2 # This file is part of lyx2lyx
3 # -*- coding: utf-8 -*-
4 # Copyright (C) 2016 The LyX team
5 #
6 # This program is free software; you can redistribute it and/or
7 # modify it under the terms of the GNU General Public License
8 # as published by the Free Software Foundation; either version 2
9 # of the License, or (at your option) any later version.
10 #
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 # GNU General Public License for more details.
15 #
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
19
20 """ Convert files to the file format generated by lyx 2.3"""
21
22 import re, string
23 import unicodedata
24 import sys, os
25
26 # Uncomment only what you need to import, please.
27
28 from parser_tools import find_end_of, find_token_backwards, find_end_of_layout, \
29     find_token, find_end_of_inset, get_value,  get_bool_value, \
30     get_containing_layout, get_quoted_value, del_token
31 #  find_tokens, find_token_exact, is_in_inset, \
32 #  check_token, get_option_value
33
34 from lyx2lyx_tools import add_to_preamble, put_cmd_in_ert
35 #  get_ert, lyx2latex, \
36 #  lyx2verbatim, length_in_bp, convert_info_insets
37 #  insert_to_preamble, latex_length, revert_flex_inset, \
38 #  revert_font_attrs, hex2ratio, str2bool
39
40 ####################################################################
41 # Private helper functions
42
43
44
45 ###############################################################################
46 ###
47 ### Conversion and reversion routines
48 ###
49 ###############################################################################
50
51 def convert_microtype(document):
52     " Add microtype settings. "
53     i = find_token(document.header, "\\font_tt_scale" , 0)
54     if i == -1:
55         document.warning("Malformed LyX document: Can't find \\font_tt_scale.")
56         i = len(document.header) - 1
57
58     j = find_token(document.preamble, "\\usepackage{microtype}", 0)
59     if j == -1:
60         document.header.insert(i + 1, "\\use_microtype false")
61     else:
62         document.header.insert(i + 1, "\\use_microtype true")
63         del document.preamble[j]
64
65
66 def revert_microtype(document):
67     " Remove microtype settings. "
68     i = find_token(document.header, "\\use_microtype", 0)
69     if i == -1:
70         return
71     use_microtype = get_bool_value(document.header, "\\use_microtype" , i)
72     del document.header[i]
73     if use_microtype:
74         add_to_preamble(document, ["\\usepackage{microtype}"])
75
76
77 def convert_dateinset(document):
78     ' Convert date external inset to ERT '
79     i = 0
80     while True:
81         i = find_token(document.body, "\\begin_inset External", i)
82         if i == -1:
83             return
84         j = find_end_of_inset(document.body, i)
85         if j == -1:
86             document.warning("Malformed lyx document: Missing '\\end_inset' in convert_dateinset.")
87             i += 1
88             continue
89         if get_value(document.body, 'template', i, j) == "Date":
90             document.body[i : j + 1] = put_cmd_in_ert("\\today ")
91         i += 1
92         continue
93
94
95 def convert_inputenc(document):
96     " Replace no longer supported input encoding settings. "
97     i = find_token(document.header, "\\inputenc", 0)
98     if i == -1:
99         return
100     if get_value(document.header, "\\inputencoding", i) == "pt254":
101         document.header[i] = "\\inputencoding pt154"
102     
103
104 def convert_ibranches(document):
105     ' Add "inverted 0" to branch insets'
106     i = 0
107     while True:
108         i = find_token(document.body, "\\begin_inset Branch", i)
109         if i == -1:
110             return
111         document.body.insert(i + 1, "inverted 0")
112         i += 1
113
114
115 def revert_ibranches(document):
116     ' Convert inverted branches to explicit anti-branches'
117     # Get list of branches
118     ourbranches = {}
119     i = 0
120     while True:
121         i = find_token(document.header, "\\branch", i)
122         if i == -1:
123             break
124         branch = document.header[i][8:].strip()
125         if document.header[i+1].startswith("\\selected "):
126             #document.warning(document.header[i+1])
127             #document.warning(document.header[i+1][10])
128             selected = int(document.header[i+1][10])
129         else:
130             document.warning("Malformed LyX document: No selection indicator for branch " + branch)
131             selected = 1
132             
133         # the value tells us whether the branch is selected
134         ourbranches[document.header[i][8:].strip()] = selected
135         i += 1
136
137     # Figure out what inverted branches, if any, have been used
138     # and convert them to "Anti-OldBranch"
139     ibranches = {}
140     i = 0
141     while True:
142         i = find_token(document.body, "\\begin_inset Branch", i)
143         if i == -1:
144             break
145         if not document.body[i+1].startswith("inverted "):
146             document.warning("Malformed LyX document: Missing 'inverted' tag!")
147             i += 1
148             continue
149         inverted = document.body[i+1][9]
150         #document.warning(document.body[i+1])
151
152         if inverted == "1":
153             branch = document.body[i][20:].strip()
154             #document.warning(branch)
155             if not branch in ibranches:
156                 antibranch = "Anti-" + branch
157                 while antibranch in ibranches:
158                     antibranch = "x" + antibranch
159                 ibranches[branch] = antibranch
160             else:
161                 antibranch = ibranches[branch]
162             #document.warning(antibranch)
163             document.body[i] = "\\begin_inset Branch " + antibranch
164
165         # remove "inverted" key
166         del document.body[i+1]
167         i += 1
168
169     # now we need to add the new branches to the header
170     for old, new in ibranches.iteritems():
171         i = find_token(document.header, "\\branch " + old, 0)
172         if i == -1:
173             document.warning("Can't find branch %s even though we found it before!" % (old))
174             continue
175         j = find_token(document.header, "\\end_branch", i)
176         if j == -1:
177             document.warning("Malformed LyX document! Can't find end of branch " + old)
178             continue
179         # ourbranches[old] - 1 inverts the selection status of the old branch
180         lines = ["\\branch " + new,
181                  "\\selected " + str(ourbranches[old] - 1)]
182         # these are the old lines telling us color, etc.
183         lines += document.header[i+2 : j+1]
184         document.header[i:i] = lines
185
186
187 def revert_beamer_article_styles(document):
188     " Include (scr)article styles in beamer article "
189
190     beamer_articles = ["article-beamer", "scrarticle-beamer"]
191     if document.textclass not in beamer_articles:
192         return
193
194     inclusion = "article.layout"
195     if document.textclass == "scrarticle-beamer":
196         inclusion = "scrartcl.layout"
197
198     i = find_token(document.header, "\\begin_local_layout", 0)
199     if i == -1:
200         k = find_token(document.header, "\\language", 0)
201         if k == -1:
202             # this should not happen
203             document.warning("Malformed LyX document! No \\language header found!")
204             return
205         document.header[k-1 : k-1] = ["\\begin_local_layout", "\\end_local_layout"]
206         i = k - 1
207
208     j = find_end_of(document.header, i, "\\begin_local_layout", "\\end_local_layout")
209     if j == -1:
210         # this should not happen
211         document.warning("Malformed LyX document: Can't find end of local layout!")
212         return
213
214     document.header[i+1 : i+1] = [
215         "### Inserted by lyx2lyx (more [scr]article styles) ###",
216         "Input " + inclusion,
217         "Input beamer.layout",
218         "Provides geometry 0",
219         "Provides hyperref 0",
220         "DefaultFont",
221         "     Family                Roman",
222         "     Series                Medium",
223         "     Shape                 Up",
224         "     Size                  Normal",
225         "     Color                 None",
226         "EndFont",
227         "Preamble",
228         "     \\usepackage{beamerarticle,pgf}",
229         "     % this default might be overridden by plain title style",
230         "     \\newcommand\makebeamertitle{\\frame{\\maketitle}}%",
231         "     \\AtBeginDocument{",
232         "             \\let\\origtableofcontents=\\tableofcontents",
233         "             \\def\\tableofcontents{\\@ifnextchar[{\\origtableofcontents}{\\gobbletableofcontents}}",
234         "             \\def\\gobbletableofcontents#1{\\origtableofcontents}",
235         "     }",
236         "EndPreamble",
237         "### End of insertion by lyx2lyx (more [scr]article styles) ###"
238     ]
239
240
241 def convert_beamer_article_styles(document):
242     " Remove included (scr)article styles in beamer article "
243
244     beamer_articles = ["article-beamer", "scrarticle-beamer"]
245     if document.textclass not in beamer_articles:
246         return
247
248     i = find_token(document.header, "\\begin_local_layout", 0)
249     if i == -1:
250         return
251
252     j = find_end_of(document.header, i, "\\begin_local_layout", "\\end_local_layout")
253     if j == -1:
254         # this should not happen
255         document.warning("Malformed LyX document: Can't find end of local layout!")
256         return
257
258     k = find_token(document.header, "### Inserted by lyx2lyx (more [scr]article styles) ###", i, j)
259     if k != -1:
260         l = find_token(document.header, "### End of insertion by lyx2lyx (more [scr]article styles) ###", i, j)
261         if l == -1:
262             # this should not happen
263             document.warning("End of lyx2lyx local layout insertion not found!")
264             return
265
266         if k == i + 1 and l == j - 1:
267             # that was all the local layout there was
268             document.header[i : j + 1] = []
269         else:
270             document.header[k : l + 1] = []
271
272
273 def revert_bosnian(document):
274     "Set the document language to English but assure Bosnian output"
275
276     if document.language == "bosnian":
277         document.language = "english"
278         i = find_token(document.header, "\\language bosnian", 0)
279         if i != -1:
280             document.header[i] = "\\language english"
281         j = find_token(document.header, "\\language_package default", 0)
282         if j != -1:
283             document.header[j] = "\\language_package babel"
284         k = find_token(document.header, "\\options", 0)
285         if k != -1:
286             document.header[k] = document.header[k].replace("\\options", "\\options bosnian,")
287         else:
288             l = find_token(document.header, "\\use_default_options", 0)
289             document.header.insert(l + 1, "\\options bosnian")
290
291
292 def revert_friulan(document):
293     "Set the document language to English but assure Friulan output"
294
295     if document.language == "friulan":
296         document.language = "english"
297         i = find_token(document.header, "\\language friulan", 0)
298         if i != -1:
299             document.header[i] = "\\language english"
300         j = find_token(document.header, "\\language_package default", 0)
301         if j != -1:
302             document.header[j] = "\\language_package babel"
303         k = find_token(document.header, "\\options", 0)
304         if k != -1:
305             document.header[k] = document.header[k].replace("\\options", "\\options friulan,")
306         else:
307             l = find_token(document.header, "\\use_default_options", 0)
308             document.header.insert(l + 1, "\\options friulan")
309
310
311 def revert_macedonian(document):
312     "Set the document language to English but assure Macedonian output"
313
314     if document.language == "macedonian":
315         document.language = "english"
316         i = find_token(document.header, "\\language macedonian", 0)
317         if i != -1:
318             document.header[i] = "\\language english"
319         j = find_token(document.header, "\\language_package default", 0)
320         if j != -1:
321             document.header[j] = "\\language_package babel"
322         k = find_token(document.header, "\\options", 0)
323         if k != -1:
324             document.header[k] = document.header[k].replace("\\options", "\\options macedonian,")
325         else:
326             l = find_token(document.header, "\\use_default_options", 0)
327             document.header.insert(l + 1, "\\options macedonian")
328
329
330 def revert_piedmontese(document):
331     "Set the document language to English but assure Piedmontese output"
332
333     if document.language == "piedmontese":
334         document.language = "english"
335         i = find_token(document.header, "\\language piedmontese", 0)
336         if i != -1:
337             document.header[i] = "\\language english"
338         j = find_token(document.header, "\\language_package default", 0)
339         if j != -1:
340             document.header[j] = "\\language_package babel"
341         k = find_token(document.header, "\\options", 0)
342         if k != -1:
343             document.header[k] = document.header[k].replace("\\options", "\\options piedmontese,")
344         else:
345             l = find_token(document.header, "\\use_default_options", 0)
346             document.header.insert(l + 1, "\\options piedmontese")
347
348
349 def revert_romansh(document):
350     "Set the document language to English but assure Romansh output"
351
352     if document.language == "romansh":
353         document.language = "english"
354         i = find_token(document.header, "\\language romansh", 0)
355         if i != -1:
356             document.header[i] = "\\language english"
357         j = find_token(document.header, "\\language_package default", 0)
358         if j != -1:
359             document.header[j] = "\\language_package babel"
360         k = find_token(document.header, "\\options", 0)
361         if k != -1:
362             document.header[k] = document.header[k].replace("\\options", "\\options romansh,")
363         else:
364             l = find_token(document.header, "\\use_default_options", 0)
365             document.header.insert(l + 1, "\\options romansh")
366
367
368 def revert_amharic(document):
369     "Set the document language to English but assure Amharic output"
370
371     if document.language == "amharic":
372         document.language = "english"
373         i = find_token(document.header, "\\language amharic", 0)
374         if i != -1:
375             document.header[i] = "\\language english"
376         j = find_token(document.header, "\\language_package default", 0)
377         if j != -1:
378             document.header[j] = "\\language_package default"
379         add_to_preamble(document, ["\\AtBeginDocument{\setotherlanguage{amharic}}"])
380         document.body[2 : 2] = ["\\begin_layout Standard",
381                                 "\\begin_inset ERT", "status open", "",
382                                 "\\begin_layout Plain Layout", "", "",
383                                 "\\backslash",
384                                 "resetdefaultlanguage{amharic}",
385                                 "\\end_layout", "", "\\end_inset", "", "",
386                                 "\\end_layout", ""]
387
388
389 def revert_asturian(document):
390     "Set the document language to English but assure Asturian output"
391
392     if document.language == "asturian":
393         document.language = "english"
394         i = find_token(document.header, "\\language asturian", 0)
395         if i != -1:
396             document.header[i] = "\\language english"
397         j = find_token(document.header, "\\language_package default", 0)
398         if j != -1:
399             document.header[j] = "\\language_package default"
400         add_to_preamble(document, ["\\AtBeginDocument{\setotherlanguage{asturian}}"])
401         document.body[2 : 2] = ["\\begin_layout Standard",
402                                 "\\begin_inset ERT", "status open", "",
403                                 "\\begin_layout Plain Layout", "", "",
404                                 "\\backslash",
405                                 "resetdefaultlanguage{asturian}",
406                                 "\\end_layout", "", "\\end_inset", "", "",
407                                 "\\end_layout", ""]
408
409
410 def revert_kannada(document):
411     "Set the document language to English but assure Kannada output"
412
413     if document.language == "kannada":
414         document.language = "english"
415         i = find_token(document.header, "\\language kannada", 0)
416         if i != -1:
417             document.header[i] = "\\language english"
418         j = find_token(document.header, "\\language_package default", 0)
419         if j != -1:
420             document.header[j] = "\\language_package default"
421         add_to_preamble(document, ["\\AtBeginDocument{\setotherlanguage{kannada}}"])
422         document.body[2 : 2] = ["\\begin_layout Standard",
423                                 "\\begin_inset ERT", "status open", "",
424                                 "\\begin_layout Plain Layout", "", "",
425                                 "\\backslash",
426                                 "resetdefaultlanguage{kannada}",
427                                 "\\end_layout", "", "\\end_inset", "", "",
428                                 "\\end_layout", ""]
429
430
431 def revert_khmer(document):
432     "Set the document language to English but assure Khmer output"
433
434     if document.language == "khmer":
435         document.language = "english"
436         i = find_token(document.header, "\\language khmer", 0)
437         if i != -1:
438             document.header[i] = "\\language english"
439         j = find_token(document.header, "\\language_package default", 0)
440         if j != -1:
441             document.header[j] = "\\language_package default"
442         add_to_preamble(document, ["\\AtBeginDocument{\setotherlanguage{khmer}}"])
443         document.body[2 : 2] = ["\\begin_layout Standard",
444                                 "\\begin_inset ERT", "status open", "",
445                                 "\\begin_layout Plain Layout", "", "",
446                                 "\\backslash",
447                                 "resetdefaultlanguage{khmer}",
448                                 "\\end_layout", "", "\\end_inset", "", "",
449                                 "\\end_layout", ""]
450
451
452 def revert_urdu(document):
453     "Set the document language to English but assure Urdu output"
454
455     if document.language == "urdu":
456         document.language = "english"
457         i = find_token(document.header, "\\language urdu", 0)
458         if i != -1:
459             document.header[i] = "\\language english"
460         j = find_token(document.header, "\\language_package default", 0)
461         if j != -1:
462             document.header[j] = "\\language_package default"
463         add_to_preamble(document, ["\\AtBeginDocument{\setotherlanguage{urdu}}"])
464         document.body[2 : 2] = ["\\begin_layout Standard",
465                                 "\\begin_inset ERT", "status open", "",
466                                 "\\begin_layout Plain Layout", "", "",
467                                 "\\backslash",
468                                 "resetdefaultlanguage{urdu}",
469                                 "\\end_layout", "", "\\end_inset", "", "",
470                                 "\\end_layout", ""]
471
472
473 def revert_syriac(document):
474     "Set the document language to English but assure Syriac output"
475
476     if document.language == "syriac":
477         document.language = "english"
478         i = find_token(document.header, "\\language syriac", 0)
479         if i != -1:
480             document.header[i] = "\\language english"
481         j = find_token(document.header, "\\language_package default", 0)
482         if j != -1:
483             document.header[j] = "\\language_package default"
484         add_to_preamble(document, ["\\AtBeginDocument{\setotherlanguage{syriac}}"])
485         document.body[2 : 2] = ["\\begin_layout Standard",
486                                 "\\begin_inset ERT", "status open", "",
487                                 "\\begin_layout Plain Layout", "", "",
488                                 "\\backslash",
489                                 "resetdefaultlanguage{syriac}",
490                                 "\\end_layout", "", "\\end_inset", "", "",
491                                 "\\end_layout", ""]
492
493
494 def revert_quotes(document):
495     " Revert Quote Insets in verbatim or Hebrew context to plain quotes "
496
497     # First handle verbatim insets
498     i = 0
499     j = 0
500     while i < len(document.body):
501         words = document.body[i].split()
502         if len(words) > 1 and words[0] == "\\begin_inset" and \
503            ( words[1] in ["ERT", "listings"] or ( len(words) > 2 and words[2] in ["URL", "Chunk", "Sweave", "S/R"]) ):
504             j = find_end_of_inset(document.body, i)
505             if j == -1:
506                 document.warning("Malformed LyX document: Can't find end of " + words[1] + " inset at line " + str(i))
507                 i += 1
508                 continue
509             while True:
510                 k = find_token(document.body, '\\begin_inset Quotes', i, j)
511                 if k == -1:
512                     i += 1
513                     break
514                 l = find_end_of_inset(document.body, k)
515                 if l == -1:
516                     document.warning("Malformed LyX document: Can't find end of Quote inset at line " + str(k))
517                     i = k
518                     continue
519                 replace = "\""
520                 if document.body[k].endswith("s"):
521                     replace = "'"
522                 document.body[k:l+1] = [replace]
523         else:
524             i += 1
525             continue
526
527     # Now verbatim layouts
528     i = 0
529     j = 0
530     while i < len(document.body):
531         words = document.body[i].split()
532         if len(words) > 1 and words[0] == "\\begin_layout" and \
533            words[1] in ["Verbatim", "Verbatim*", "Code", "Author_Email", "Author_URL"]:
534             j = find_end_of_layout(document.body, i)
535             if j == -1:
536                 document.warning("Malformed LyX document: Can't find end of " + words[1] + " layout at line " + str(i))
537                 i += 1
538                 continue
539             while True:
540                 k = find_token(document.body, '\\begin_inset Quotes', i, j)
541                 if k == -1:
542                     i += 1
543                     break
544                 l = find_end_of_inset(document.body, k)
545                 if l == -1:
546                     document.warning("Malformed LyX document: Can't find end of Quote inset at line " + str(k))
547                     i = k
548                     continue
549                 replace = "\""
550                 if document.body[k].endswith("s"):
551                     replace = "'"
552                 document.body[k:l+1] = [replace]
553         else:
554             i += 1
555             continue
556
557     # Now handle Hebrew
558     if not document.language == "hebrew" and find_token(document.body, '\\lang hebrew', 0) == -1:
559         return
560
561     i = 0
562     j = 0
563     while True:
564         k = find_token(document.body, '\\begin_inset Quotes', i)
565         if k == -1:
566             return
567         l = find_end_of_inset(document.body, k)
568         if l == -1:
569             document.warning("Malformed LyX document: Can't find end of Quote inset at line " + str(k))
570             i = k
571             continue
572         hebrew = False
573         parent = get_containing_layout(document.body, k)
574         ql = find_token_backwards(document.body, "\\lang", k)
575         if ql == -1 or ql < parent[1]:
576             hebrew = document.language == "hebrew"
577         elif document.body[ql] == "\\lang hebrew":
578             hebrew = True
579         if hebrew:
580             replace = "\""
581             if document.body[k].endswith("s"):
582                 replace = "'"
583             document.body[k:l+1] = [replace]
584         i = l
585     
586
587 def revert_iopart(document):
588     " Input new styles via local layout "
589     if document.textclass != "iopart":
590         return
591
592     i = find_token(document.header, "\\begin_local_layout", 0)
593     if i == -1:
594         k = find_token(document.header, "\\language", 0)
595         if k == -1:
596             # this should not happen
597             document.warning("Malformed LyX document! No \\language header found!")
598             return
599         document.header[k-1 : k-1] = ["\\begin_local_layout", "\\end_local_layout"]
600         i = k-1
601
602     j = find_end_of(document.header, i, "\\begin_local_layout", "\\end_local_layout")
603     if j == -1:
604         # this should not happen
605         document.warning("Malformed LyX document! Can't find end of local layout!")
606         return
607
608     document.header[i+1 : i+1] = [
609         "### Inserted by lyx2lyx (stdlayouts) ###",
610         "Input stdlayouts.inc",
611         "### End of insertion by lyx2lyx (stdlayouts) ###"
612     ]
613
614
615 def convert_iopart(document):
616     " Remove local layout we added, if it is there "
617     if document.textclass != "iopart":
618         return
619
620     i = find_token(document.header, "\\begin_local_layout", 0)
621     if i == -1:
622         return
623
624     j = find_end_of(document.header, i, "\\begin_local_layout", "\\end_local_layout")
625     if j == -1:
626         # this should not happen
627         document.warning("Malformed LyX document! Can't find end of local layout!")
628         return
629
630     k = find_token(document.header, "### Inserted by lyx2lyx (stdlayouts) ###", i, j)
631     if k != -1:
632         l = find_token(document.header, "### End of insertion by lyx2lyx (stdlayouts) ###", i, j)
633         if l == -1:
634             # this should not happen
635             document.warning("End of lyx2lyx local layout insertion not found!")
636             return
637         if k == i + 1 and l == j - 1:
638             # that was all the local layout there was
639             document.header[i : j + 1] = []
640         else:
641             document.header[k : l + 1] = []
642
643
644 def convert_quotestyle(document):
645     " Convert \\quotes_language to \\quotes_style "
646     i = find_token(document.header, "\\quotes_language", 0)
647     if i == -1:
648         document.warning("Malformed LyX document! Can't find \\quotes_language!")
649         return
650     val = get_value(document.header, "\\quotes_language", i)
651     document.header[i] = "\\quotes_style " + val
652
653
654 def revert_quotestyle(document):
655     " Revert \\quotes_style to \\quotes_language "
656     i = find_token(document.header, "\\quotes_style", 0)
657     if i == -1:
658         document.warning("Malformed LyX document! Can't find \\quotes_style!")
659         return
660     val = get_value(document.header, "\\quotes_style", i)
661     document.header[i] = "\\quotes_language " + val
662
663
664 def revert_plainquote(document):
665     " Revert plain quote insets "
666
667     # First, revert style setting
668     i = find_token(document.header, "\\quotes_style plain", 0)
669     if i != -1:
670         document.header[i] = "\\quotes_style english"
671
672     # now the insets
673     i = 0
674     j = 0
675     while True:
676         k = find_token(document.body, '\\begin_inset Quotes q', i)
677         if k == -1:
678             return
679         l = find_end_of_inset(document.body, k)
680         if l == -1:
681             document.warning("Malformed LyX document: Can't find end of Quote inset at line " + str(k))
682             i = k
683             continue
684         replace = "\""
685         if document.body[k].endswith("s"):
686             replace = "'"
687         document.body[k:l+1] = [replace]
688         i = l
689
690
691 def convert_frenchquotes(document):
692     " Convert french quote insets to swiss "
693
694     # First, revert style setting
695     i = find_token(document.header, "\\quotes_style french", 0)
696     if i != -1:
697         document.header[i] = "\\quotes_style swiss"
698
699     # now the insets
700     i = 0
701     while True:
702         i = find_token(document.body, '\\begin_inset Quotes f', i)
703         if i == -1:
704             return
705         val = get_value(document.body, "\\begin_inset Quotes", i)[7:]
706         newval = val.replace("f", "c", 1)
707         document.body[i] = document.body[i].replace(val, newval)
708         i += 1
709
710
711 def revert_swissquotes(document):
712     " Revert swiss quote insets to french "
713
714     # First, revert style setting
715     i = find_token(document.header, "\\quotes_style swiss", 0)
716     if i != -1:
717         document.header[i] = "\\quotes_style french"
718
719     # now the insets
720     i = 0
721     while True:
722         i = find_token(document.body, '\\begin_inset Quotes c', i)
723         if i == -1:
724             return
725         val = get_value(document.body, "\\begin_inset Quotes", i)[7:]
726         newval = val.replace("c", "f", 1)
727         document.body[i] = document.body[i].replace(val, newval)
728         i += 1
729
730
731 def revert_britishquotes(document):
732     " Revert british quote insets to english "
733
734     # First, revert style setting
735     i = find_token(document.header, "\\quotes_style british", 0)
736     if i != -1:
737         document.header[i] = "\\quotes_style english"
738
739     # now the insets
740     i = 0
741     while True:
742         i = find_token(document.body, '\\begin_inset Quotes b', i)
743         if i == -1:
744             return
745         val = get_value(document.body, "\\begin_inset Quotes", i)[7:]
746         newval = val.replace("b", "e", 1)
747         if val[2] == "d":
748             # opening mark
749             newval = newval.replace("d", "s")
750         else:
751             # closing mark
752             newval = newval.replace("s", "d")
753         document.body[i] = document.body[i].replace(val, newval)
754         i += 1
755
756
757 def revert_swedishgquotes(document):
758     " Revert swedish quote insets "
759
760     # First, revert style setting
761     i = find_token(document.header, "\\quotes_style swedishg", 0)
762     if i != -1:
763         document.header[i] = "\\quotes_style danish"
764
765     # now the insets
766     i = 0
767     while True:
768         i = find_token(document.body, '\\begin_inset Quotes w', i)
769         if i == -1:
770             return
771         val = get_value(document.body, "\\begin_inset Quotes", i)[7:]
772         if val[2] == "d":
773             # outer marks
774             newval = val.replace("w", "a", 1).replace("r", "l")
775         else:
776             # inner marks
777             newval = val.replace("w", "s", 1)
778         document.body[i] = document.body[i].replace(val, newval)
779         i += 1
780
781
782 def revert_frenchquotes(document):
783     " Revert french inner quote insets "
784
785     i = 0
786     while True:
787         i = find_token(document.body, '\\begin_inset Quotes f', i)
788         if i == -1:
789             return
790         val = get_value(document.body, "\\begin_inset Quotes", i)[7:]
791         if val[2] == "s":
792             # inner marks
793             newval = val.replace("f", "e", 1).replace("s", "d")
794             document.body[i] = document.body[i].replace(val, newval)
795         i += 1
796
797
798 def revert_frenchinquotes(document):
799     " Revert inner frenchin quote insets "
800
801     # First, revert style setting
802     i = find_token(document.header, "\\quotes_style frenchin", 0)
803     if i != -1:
804         document.header[i] = "\\quotes_style french"
805
806     # now the insets
807     i = 0
808     while True:
809         i = find_token(document.body, '\\begin_inset Quotes i', i)
810         if i == -1:
811             return
812         val = get_value(document.body, "\\begin_inset Quotes", i)[7:]
813         newval = val.replace("i", "f", 1)
814         if val[2] == "s":
815             # inner marks
816             newval = newval.replace("s", "d")
817         document.body[i] = document.body[i].replace(val, newval)
818         i += 1
819
820
821 def revert_russianquotes(document):
822     " Revert russian quote insets "
823
824     # First, revert style setting
825     i = find_token(document.header, "\\quotes_style russian", 0)
826     if i != -1:
827         document.header[i] = "\\quotes_style french"
828
829     # now the insets
830     i = 0
831     while True:
832         i = find_token(document.body, '\\begin_inset Quotes r', i)
833         if i == -1:
834             return
835         val = get_value(document.body, "\\begin_inset Quotes", i)[7:]
836         newval = val
837         if val[2] == "s":
838             # inner marks
839             newval = val.replace("r", "g", 1).replace("s", "d")
840         else:
841             # outer marks
842             newval = val.replace("r", "f", 1)
843         document.body[i] = document.body[i].replace(val, newval)
844         i += 1
845
846
847 def revert_dynamicquotes(document):
848     " Revert dynamic quote insets "
849
850     # First, revert header
851     i = find_token(document.header, "\\dynamic_quotes", 0)
852     if i != -1:
853         del document.header[i]
854
855     # Get global style
856     style = "english"
857     i = find_token(document.header, "\\quotes_style", 0)
858     if i == -1:
859         document.warning("Malformed document! Missing \\quotes_style")
860     else:
861         style = get_value(document.header, "\\quotes_style", i)
862
863     s = "e"
864     if style == "english":
865         s = "e"
866     elif style == "swedish":
867         s = "s"
868     elif style == "german":
869         s = "g"
870     elif style == "polish":
871         s = "p"
872     elif style == "swiss":
873         s = "c"
874     elif style == "danish":
875         s = "a"
876     elif style == "plain":
877         s = "q"
878     elif style == "british":
879         s = "b"
880     elif style == "swedishg":
881         s = "w"
882     elif style == "french":
883         s = "f"
884     elif style == "frenchin":
885         s = "i"
886     elif style == "russian":
887         s = "r"
888
889     # now transform the insets
890     i = 0
891     while True:
892         i = find_token(document.body, '\\begin_inset Quotes x', i)
893         if i == -1:
894             return
895         document.body[i] = document.body[i].replace("x", s)
896         i += 1
897
898
899 def revert_cjkquotes(document):
900     " Revert cjk quote insets "
901
902     # Get global style
903     style = "english"
904     i = find_token(document.header, "\\quotes_style", 0)
905     if i == -1:
906         document.warning("Malformed document! Missing \\quotes_style")
907     else:
908         style = get_value(document.header, "\\quotes_style", i)
909
910     global_cjk = style.find("cjk") != -1
911
912     if global_cjk:
913         document.header[i] = "\\quotes_style english"
914         # transform dynamic insets
915         s = "j"
916         if style == "cjkangle":
917             s = "k"
918         i = 0
919         while True:
920             i = find_token(document.body, '\\begin_inset Quotes x', i)
921             if i == -1:
922                 break
923             document.body[i] = document.body[i].replace("x", s)
924             i += 1
925
926     cjk_langs = ["chinese-simplified", "chinese-traditional", "japanese", "japanese-cjk", "korean"]
927
928     i = 0
929     j = 0
930     while True:
931         k = find_token(document.body, '\\begin_inset Quotes j', i)
932         if k == -1:
933             break
934         l = find_end_of_inset(document.body, k)
935         if l == -1:
936             document.warning("Malformed LyX document: Can't find end of Quote inset at line " + str(k))
937             i = k
938             continue
939         cjk = False
940         parent = get_containing_layout(document.body, k)
941         ql = find_token_backwards(document.body, "\\lang", k)
942         if ql == -1 or ql < parent[1]:
943             cjk = document.language in cjk_langs
944         elif document.body[ql].split()[1] in cjk_langs:
945             cjk = True
946         val = get_value(document.body, "\\begin_inset Quotes", i)[7:]
947         replace = []
948         if val[2] == "s":
949             # inner marks
950             if val[1] == "l":
951                 # inner opening mark
952                 if cjk:
953                     replace = [u"\u300E"]
954                 else:
955                     replace = ["\\begin_inset Formula $\\llceil$", "\\end_inset"]
956             else:
957                 # inner closing mark
958                 if cjk:
959                     replace = [u"\u300F"]
960                 else:
961                     replace = ["\\begin_inset Formula $\\rrfloor$", "\\end_inset"]
962         else:
963             # outer marks
964             if val[1] == "l":
965                 # outer opening mark
966                 if cjk:
967                     replace = [u"\u300C"]
968                 else:
969                     replace = ["\\begin_inset Formula $\\lceil$", "\\end_inset"]
970             else:
971                 # outer closing mark
972                 if cjk:
973                     replace = [u"\u300D"]
974                 else:
975                     replace = ["\\begin_inset Formula $\\rfloor$", "\\end_inset"]
976
977         document.body[k:l+1] = replace
978         i = l
979
980     i = 0
981     j = 0
982     while True:
983         k = find_token(document.body, '\\begin_inset Quotes k', i)
984         if k == -1:
985             return
986         l = find_end_of_inset(document.body, k)
987         if l == -1:
988             document.warning("Malformed LyX document: Can't find end of Quote inset at line " + str(k))
989             i = k
990             continue
991         cjk = False
992         parent = get_containing_layout(document.body, k)
993         ql = find_token_backwards(document.body, "\\lang", k)
994         if ql == -1 or ql < parent[1]:
995             cjk = document.language in cjk_langs
996         elif document.body[ql].split()[1] in cjk_langs:
997             cjk = True
998         val = get_value(document.body, "\\begin_inset Quotes", i)[7:]
999         replace = []
1000         if val[2] == "s":
1001             # inner marks
1002             if val[1] == "l":
1003                 # inner opening mark
1004                 if cjk:
1005                     replace = [u"\u3008"]
1006                 else:
1007                     replace = ["\\begin_inset Formula $\\langle$", "\\end_inset"]
1008             else:
1009                 # inner closing mark
1010                 if cjk:
1011                     replace = [u"\u3009"]
1012                 else:
1013                     replace = ["\\begin_inset Formula $\\rangle$", "\\end_inset"]
1014         else:
1015             # outer marks
1016             if val[1] == "l":
1017                 # outer opening mark
1018                 if cjk:
1019                     replace = [u"\u300A"]
1020                 else:
1021                     replace = ["\\begin_inset Formula $\\langle\\kern -2.5pt\\langle$", "\\end_inset"]
1022             else:
1023                 # outer closing mark
1024                 if cjk:
1025                     replace = [u"\u300B"]
1026                 else:
1027                     replace = ["\\begin_inset Formula $\\rangle\\kern -2.5pt\\rangle$", "\\end_inset"]
1028
1029         document.body[k:l+1] = replace
1030         i = l
1031
1032
1033 def revert_crimson(document):
1034     " Revert native Cochineal/Crimson font definition to LaTeX " 
1035
1036     if find_token(document.header, "\\use_non_tex_fonts false", 0) != -1:
1037         preamble = ""
1038         i = find_token(document.header, "\\font_roman \"cochineal\"", 0)
1039         if i != -1:
1040             osf = False
1041             j = find_token(document.header, "\\font_osf true", 0)
1042             if j != -1:
1043                 osf = True
1044             preamble = "\\usepackage"
1045             if osf:
1046                 document.header[j] = "\\font_osf false"
1047                 preamble += "[proportional,osf]"
1048             preamble += "{cochineal}"
1049             add_to_preamble(document, [preamble])
1050             document.header[i] = document.header[i].replace("cochineal", "default")
1051
1052
1053 def revert_cochinealmath(document):
1054     " Revert cochineal newtxmath definitions to LaTeX " 
1055
1056     if find_token(document.header, "\\use_non_tex_fonts false", 0) != -1: 
1057         i = find_token(document.header, "\\font_math \"cochineal-ntxm\"", 0)
1058         if i != -1:
1059             add_to_preamble(document, "\\usepackage[cochineal]{newtxmath}")
1060             document.header[i] = document.header[i].replace("cochineal-ntxm", "auto")
1061
1062
1063 def revert_labelonly(document):
1064     " Revert labelonly tag for InsetRef "
1065     i = 0
1066     while (True):
1067         i = find_token(document.body, "\\begin_inset CommandInset ref", i)
1068         if i == -1:
1069             return
1070         j = find_end_of_inset(document.body, i)
1071         if j == -1:
1072             document.warning("Can't find end of reference inset at line %d!!" %(i))
1073             i += 1
1074             continue
1075         k = find_token(document.body, "LatexCommand labelonly", i, j)
1076         if k == -1:
1077             i = j
1078             continue
1079         label = get_quoted_value(document.body, "reference", i, j)
1080         if not label:
1081             document.warning("Can't find label for reference at line %d!" %(i))
1082             i = j + 1
1083             continue
1084         document.body[i:j+1] = put_cmd_in_ert([label])
1085         i += 1
1086
1087
1088 def revert_plural_refs(document):
1089     " Revert plural and capitalized references "
1090     i = find_token(document.header, "\\use_refstyle 1", 0)
1091     use_refstyle = (i != 0)
1092
1093     i = 0
1094     while (True):
1095         i = find_token(document.body, "\\begin_inset CommandInset ref", i)
1096         if i == -1:
1097             return
1098         j = find_end_of_inset(document.body, i)
1099         if j == -1:
1100             document.warning("Can't find end of reference inset at line %d!!" %(i))
1101             i += 1
1102             continue
1103
1104         plural = caps = suffix = False
1105         k = find_token(document.body, "LaTeXCommand formatted", i, j)
1106         if k != -1 and use_refstyle:
1107             plural = get_bool_value(document.body, "plural", i, j, False)
1108             caps   = get_bool_value(document.body, "caps", i, j, False)
1109             label  = get_quoted_value(document.body, "reference", i, j)
1110             if label:
1111                 try:
1112                     (prefix, suffix) = label.split(":", 1)
1113                 except:
1114                     document.warning("No `:' separator in formatted reference at line %d!" % (i))
1115             else:
1116                 document.warning("Can't find label for reference at line %d!" % (i))
1117
1118         # this effectively tests also for use_refstyle and a formatted reference
1119         # we do this complicated test because we would otherwise do this erasure
1120         # over and over and over
1121         if not ((plural or caps) and suffix):
1122             del_token(document.body, "plural", i, j)
1123             del_token(document.body, "caps", i, j - 1) # since we deleted a line
1124             i = j - 1
1125             continue
1126
1127         if caps:
1128             prefix = prefix[0].title() + prefix[1:]
1129         cmd = "\\" + prefix + "ref"
1130         if plural:
1131             cmd += "[s]"
1132         cmd += "{" + suffix + "}"
1133         document.body[i:j+1] = put_cmd_in_ert([cmd])
1134         i += 1
1135
1136
1137 def revert_noprefix(document):
1138     " Revert labelonly tags with 'noprefix' set "
1139     i = 0
1140     while (True):
1141         i = find_token(document.body, "\\begin_inset CommandInset ref", i)
1142         if i == -1:
1143             return
1144         j = find_end_of_inset(document.body, i)
1145         if j == -1:
1146             document.warning("Can't find end of reference inset at line %d!!" %(i))
1147             i += 1
1148             continue
1149         k = find_token(document.body, "LatexCommand labelonly", i, j)
1150         if k == -1:
1151             i = j
1152             continue
1153         noprefix = get_bool_value(document.body, "noprefix", i, j)
1154         if not noprefix:
1155             del_token(document.body, "noprefix", i, j)
1156             i = j
1157             continue
1158         label = get_quoted_value(document.body, "reference", i, j)
1159         if not label:
1160             document.warning("Can't find label for reference at line %d!" %(i))
1161             i = j + 1
1162             continue
1163         try:
1164             (prefix, suffix) = label.split(":", 1)
1165         except:
1166             document.warning("No `:' separator in formatted reference at line %d!" % (i))
1167             # we'll leave this as an ordinary labelonly reference
1168             del_token(document.body, "noprefix", i, j)
1169             i = j
1170             continue
1171         document.body[i:j+1] = put_cmd_in_ert([suffix])
1172         i += 1
1173
1174
1175 def revert_biblatex(document):
1176     " Revert biblatex support "
1177
1178     #
1179     # Header
1180     #
1181
1182     # 1. Get cite engine
1183     engine = "basic"
1184     i = find_token(document.header, "\\cite_engine", 0)
1185     if i == -1:
1186         document.warning("Malformed document! Missing \\cite_engine")
1187     else:
1188         engine = get_value(document.header, "\\cite_engine", i)
1189
1190     # 2. Store biblatex state and revert to natbib
1191     biblatex = False
1192     if engine in ["biblatex", "biblatex-natbib"]:
1193         biblatex = True
1194         document.header[i] = "\\cite_engine natbib"
1195
1196     # 3. Store and remove new document headers
1197     bibstyle = ""
1198     i = find_token(document.header, "\\biblatex_bibstyle", 0)
1199     if i != -1:
1200         bibstyle = get_value(document.header, "\\biblatex_bibstyle", i)
1201         del document.header[i]
1202
1203     citestyle = ""
1204     i = find_token(document.header, "\\biblatex_citestyle", 0)
1205     if i != -1:
1206         citestyle = get_value(document.header, "\\biblatex_citestyle", i)
1207         del document.header[i]
1208
1209     biblio_options = ""
1210     i = find_token(document.header, "\\biblio_options", 0)
1211     if i != -1:
1212         biblio_options = get_value(document.header, "\\biblio_options", i)
1213         del document.header[i]
1214
1215     if biblatex:
1216         bbxopts = "[natbib=true"
1217         if bibstyle != "":
1218             bbxopts += ",bibstyle=" + bibstyle
1219         if citestyle != "":
1220             bbxopts += ",citestyle=" + citestyle
1221         if biblio_options != "":
1222             bbxopts += "," + biblio_options
1223         bbxopts += "]"
1224         add_to_preamble(document, "\\usepackage" + bbxopts + "{biblatex}")
1225
1226     #
1227     # Body
1228     #
1229
1230     # 1. Bibtex insets
1231     i = 0
1232     bibresources = []
1233     while (True):
1234         i = find_token(document.body, "\\begin_inset CommandInset bibtex", i)
1235         if i == -1:
1236             break
1237         j = find_end_of_inset(document.body, i)
1238         if j == -1:
1239             document.warning("Can't find end of bibtex inset at line %d!!" %(i))
1240             i += 1
1241             continue
1242         bibs = get_quoted_value(document.body, "bibfiles", i, j)
1243         opts = get_quoted_value(document.body, "biblatexopts", i, j)
1244         # store resources
1245         if bibs:
1246             bibresources += bibs.split(",")
1247         else:
1248             document.warning("Can't find bibfiles for bibtex inset at line %d!" %(i))
1249         # remove biblatexopts line
1250         k = find_token(document.body, "biblatexopts", i, j)
1251         if k != -1:
1252             del document.body[k]
1253         # Re-find inset end line
1254         j = find_end_of_inset(document.body, i)
1255         # Insert ERT \\printbibliography and wrap bibtex inset to a Note
1256         if biblatex:
1257             pcmd = "printbibliography"
1258             if opts:
1259                 pcmd += "[" + opts + "]"
1260             repl = ["\\begin_inset ERT", "status open", "", "\\begin_layout Plain Layout",\
1261                     "", "", "\\backslash", pcmd, "\\end_layout", "", "\\end_inset", "", "",\
1262                     "\\end_layout", "", "\\begin_layout Standard", "\\begin_inset Note Note",\
1263                     "status open", "", "\\begin_layout Plain Layout" ]
1264             repl += document.body[i:j+1]
1265             repl += ["", "\\end_layout", "", "\\end_inset", "", ""]
1266             document.body[i:j+1] = repl
1267             j += 27
1268
1269         i = j + 1
1270
1271     if biblatex:
1272         for b in bibresources:
1273             add_to_preamble(document, "\\addbibresource{" + b + ".bib}")
1274
1275     # 2. Citation insets
1276
1277     # Specific citation insets used in biblatex that need to be reverted to ERT
1278     new_citations = {
1279         "Cite" : "Cite",
1280         "citebyear" : "citeyear",
1281         "citeyear" : "cite*",
1282         "Footcite" : "Smartcite",
1283         "footcite" : "smartcite",
1284         "Autocite" : "Autocite",
1285         "autocite" : "autocite",
1286         "citetitle" : "citetitle",
1287         "citetitle*" : "citetitle*",
1288         "fullcite" : "fullcite",
1289         "footfullcite" : "footfullcite",
1290         "supercite" : "supercite",
1291         "citeauthor" : "citeauthor",
1292         "citeauthor*" : "citeauthor*",
1293         "Citeauthor" : "Citeauthor",
1294         "Citeauthor*" : "Citeauthor*"
1295         }
1296
1297     # All commands accepted by LyX < 2.3. Everything else throws an error.
1298     old_citations = [ "cite", "nocite", "citet", "citep", "citealt", "citealp",\
1299                       "citeauthor", "citeyear", "citeyearpar", "citet*", "citep*",\
1300                       "citealt*", "citealp*", "citeauthor*", "Citet",  "Citep",\
1301                       "Citealt",  "Citealp",  "Citeauthor", "Citet*", "Citep*",\
1302                       "Citealt*", "Citealp*", "Citeauthor*", "fullcite", "footcite",\
1303                       "footcitet", "footcitep", "footcitealt", "footcitealp",\
1304                       "footciteauthor", "footciteyear", "footciteyearpar",\
1305                       "citefield", "citetitle", "cite*" ]
1306
1307     i = 0
1308     while (True):
1309         i = find_token(document.body, "\\begin_inset CommandInset citation", i)
1310         if i == -1:
1311             break
1312         j = find_end_of_inset(document.body, i)
1313         if j == -1:
1314             document.warning("Can't find end of citation inset at line %d!!" %(i))
1315             i += 1
1316             continue
1317         k = find_token(document.body, "LatexCommand", i, j)
1318         if k == -1:
1319             document.warning("Can't find LatexCommand for citation inset at line %d!" %(i))
1320             i = j + 1
1321             continue
1322         cmd = get_value(document.body, "LatexCommand", k)
1323         if biblatex and cmd in list(new_citations.keys()):
1324             pre = get_quoted_value(document.body, "before", i, j)
1325             post = get_quoted_value(document.body, "after", i, j)
1326             key = get_quoted_value(document.body, "key", i, j)
1327             if not key:
1328                 document.warning("Citation inset at line %d does not have a key!" %(i))
1329                 key = "???"
1330             # Replace known new commands with ERT
1331             res = "\\" + new_citations[cmd]
1332             if pre:
1333                 res += "[" + pre + "]"
1334             if post:
1335                 res += "[" + post + "]"
1336             elif pre:
1337                 res += "[]"
1338             res += "{" + key + "}"
1339             document.body[i:j+1] = put_cmd_in_ert([res])
1340         elif cmd not in old_citations:
1341             # Reset unknown commands to cite. This is what LyX does as well
1342             # (but LyX 2.2 would break on unknown commands)
1343             document.body[k] = "LatexCommand cite"
1344             document.warning("Reset unknown cite command '%s' with cite" % cmd)
1345         i = j + 1
1346
1347     # Emulate the old biblatex-workaround (pretend natbib in order to use the styles)
1348     if biblatex:
1349         i = find_token(document.header, "\\begin_local_layout", 0)
1350         if i == -1:
1351             k = find_token(document.header, "\\language", 0)
1352             if k == -1:
1353                 # this should not happen
1354                 document.warning("Malformed LyX document! No \\language header found!")
1355                 return
1356             document.header[k-1 : k-1] = ["\\begin_local_layout", "\\end_local_layout"]
1357             i = k-1
1358
1359         j = find_end_of(document.header, i, "\\begin_local_layout", "\\end_local_layout")
1360         if j == -1:
1361             # this should not happen
1362             document.warning("Malformed LyX document! Can't find end of local layout!")
1363             return
1364
1365         document.header[i+1 : i+1] = [
1366             "### Inserted by lyx2lyx (biblatex emulation) ###",
1367             "Provides natbib 1",
1368             "### End of insertion by lyx2lyx (biblatex emulation) ###"
1369         ]
1370
1371
1372 def revert_citekeyonly(document):
1373     " Revert keyonly cite command to ERT "
1374
1375     i = 0
1376     while (True):
1377         i = find_token(document.body, "\\begin_inset CommandInset citation", i)
1378         if i == -1:
1379             break
1380         j = find_end_of_inset(document.body, i)
1381         if j == -1:
1382             document.warning("Can't find end of citation inset at line %d!!" %(i))
1383             i += 1
1384             continue
1385         k = find_token(document.body, "LatexCommand", i, j)
1386         if k == -1:
1387             document.warning("Can't find LatexCommand for citation inset at line %d!" %(i))
1388             i = j + 1
1389             continue
1390         cmd = get_value(document.body, "LatexCommand", k)
1391         if cmd != "keyonly":
1392             i = j + 1
1393             continue
1394
1395         key = get_quoted_value(document.body, "key", i, j)
1396         if not key:
1397             document.warning("Citation inset at line %d does not have a key!" %(i))
1398         # Replace known new commands with ERT
1399         document.body[i:j+1] = put_cmd_in_ert([key])
1400         i = j + 1
1401
1402
1403
1404 def revert_bibpackopts(document):
1405     " Revert support for natbib/jurabib package options "
1406
1407     engine = "basic"
1408     i = find_token(document.header, "\\cite_engine", 0)
1409     if i == -1:
1410         document.warning("Malformed document! Missing \\cite_engine")
1411     else:
1412         engine = get_value(document.header, "\\cite_engine", i)
1413
1414     biblatex = False
1415     if engine not in ["natbib", "jurabib"]:
1416         return
1417
1418     i = find_token(document.header, "\\biblio_options", 0)
1419     if i == -1:
1420         # Nothing to do if we have no options
1421         return
1422
1423     biblio_options = get_value(document.header, "\\biblio_options", i)
1424     del document.header[i]
1425
1426     if not biblio_options:
1427         # Nothing to do for empty options
1428         return
1429
1430     i = find_token(document.header, "\\begin_local_layout", 0)
1431     if i == -1:
1432         k = find_token(document.header, "\\language", 0)
1433         if k == -1:
1434             # this should not happen
1435             document.warning("Malformed LyX document! No \\language header found!")
1436             return
1437         document.header[k-1 : k-1] = ["\\begin_local_layout", "\\end_local_layout"]
1438         i = k - 1
1439
1440     j = find_end_of(document.header, i, "\\begin_local_layout", "\\end_local_layout")
1441     if j == -1:
1442         # this should not happen
1443         document.warning("Malformed LyX document! Can't find end of local layout!")
1444         return
1445
1446     document.header[i+1 : i+1] = [
1447         "### Inserted by lyx2lyx (bibliography package options) ###",
1448         "PackageOptions " + engine + " " + biblio_options,
1449         "### End of insertion by lyx2lyx (bibliography package options) ###"
1450     ]
1451
1452
1453 def revert_qualicites(document):
1454     " Revert qualified citation list commands to ERT "
1455
1456     # Citation insets that support qualified lists, with their LaTeX code
1457     ql_citations = {
1458         "cite" : "cites",
1459         "Cite" : "Cites",
1460         "citet" : "textcites",
1461         "Citet" : "Textcites",
1462         "citep" : "parencites",
1463         "Citep" : "Parencites",
1464         "Footcite" : "Smartcites",
1465         "footcite" : "smartcites",
1466         "Autocite" : "Autocites",
1467         "autocite" : "autocites",
1468         }
1469
1470     # Get cite engine
1471     engine = "basic"
1472     i = find_token(document.header, "\\cite_engine", 0)
1473     if i == -1:
1474         document.warning("Malformed document! Missing \\cite_engine")
1475     else:
1476         engine = get_value(document.header, "\\cite_engine", i)
1477
1478     biblatex = engine in ["biblatex", "biblatex-natbib"]
1479
1480     i = 0
1481     while (True):
1482         i = find_token(document.body, "\\begin_inset CommandInset citation", i)
1483         if i == -1:
1484             break
1485         j = find_end_of_inset(document.body, i)
1486         if j == -1:
1487             document.warning("Can't find end of citation inset at line %d!!" %(i))
1488             i += 1
1489             continue
1490         pres = find_token(document.body, "pretextlist", i, j)
1491         posts = find_token(document.body, "posttextlist", i, j)
1492         if pres == -1 and posts == -1:
1493             # nothing to do.
1494             i = j + 1
1495             continue
1496         pretexts = get_quoted_value(document.body, "pretextlist", pres)
1497         posttexts = get_quoted_value(document.body, "posttextlist", posts)
1498         k = find_token(document.body, "LatexCommand", i, j)
1499         if k == -1:
1500             document.warning("Can't find LatexCommand for citation inset at line %d!" %(i))
1501             i = j + 1
1502             continue
1503         cmd = get_value(document.body, "LatexCommand", k)
1504         if biblatex and cmd in list(ql_citations.keys()):
1505             pre = get_quoted_value(document.body, "before", i, j)
1506             post = get_quoted_value(document.body, "after", i, j)
1507             key = get_quoted_value(document.body, "key", i, j)
1508             if not key:
1509                 document.warning("Citation inset at line %d does not have a key!" %(i))
1510                 key = "???"
1511             keys = key.split(",")
1512             prelist = pretexts.split("\t")
1513             premap = dict()
1514             for pp in prelist:
1515                 ppp = pp.split(" ", 1)
1516                 premap[ppp[0]] = ppp[1]
1517             postlist = posttexts.split("\t")
1518             postmap = dict()
1519             for pp in postlist:
1520                 ppp = pp.split(" ", 1)
1521                 postmap[ppp[0]] = ppp[1]
1522             # Replace known new commands with ERT
1523             if "(" in pre or ")" in pre:
1524                 pre = "{" + pre + "}"
1525             if "(" in post or ")" in post:
1526                 post = "{" + post + "}"
1527             res = "\\" + ql_citations[cmd]
1528             if pre:
1529                 res += "(" + pre + ")"
1530             if post:
1531                 res += "(" + post + ")"
1532             elif pre:
1533                 res += "()"
1534             for kk in keys:
1535                 if premap.get(kk, "") != "":
1536                     res += "[" + premap[kk] + "]"
1537                 if postmap.get(kk, "") != "":
1538                     res += "[" + postmap[kk] + "]"
1539                 elif premap.get(kk, "") != "":
1540                     res += "[]"
1541                 res += "{" + kk + "}"
1542             document.body[i:j+1] = put_cmd_in_ert([res])
1543         else:
1544             # just remove the params
1545             del document.body[posttexts]
1546             del document.body[pretexts]
1547             i += 1
1548
1549
1550 command_insets = ["bibitem", "citation", "href", "index_print", "nomenclature"]
1551 def convert_literalparam(document):
1552     " Add param literal "
1553
1554     # These already had some sort of latexify method
1555     latexified_insets = ["href", "index_print", "nomenclature"]
1556
1557     for inset in command_insets:
1558         i = 0
1559         while True:
1560             i = find_token(document.body, '\\begin_inset CommandInset %s' % inset, i)
1561             if i == -1:
1562                 break
1563             j = find_end_of_inset(document.body, i)
1564             if j == -1:
1565                 document.warning("Malformed LyX document: Can't find end of %s inset at line %d" % (inset, i))
1566                 i += 1
1567                 continue
1568             while i < j and document.body[i].strip() != '':
1569                 i += 1
1570             if inset in latexified_insets:
1571                 document.body.insert(i, "literal \"false\"")
1572             else:
1573                 document.body.insert(i, "literal \"true\"")
1574
1575
1576
1577 def revert_literalparam(document):
1578     " Remove param literal "
1579
1580     for inset in command_insets:
1581         i = 0
1582         while True:
1583             i = find_token(document.body, '\\begin_inset CommandInset %s' % inset, i)
1584             if i == -1:
1585                 break
1586             j = find_end_of_inset(document.body, i)
1587             if j == -1:
1588                 document.warning("Malformed LyX document: Can't find end of %s inset at line %d" % (inset, i))
1589                 i += 1
1590                 continue
1591             k = find_token(document.body, 'literal', i, j)
1592             if k == -1:
1593                 i += 1
1594                 continue
1595             del document.body[k]
1596
1597
1598
1599 def revert_multibib(document):
1600     " Revert multibib support "
1601
1602     # 1. Get cite engine
1603     engine = "basic"
1604     i = find_token(document.header, "\\cite_engine", 0)
1605     if i == -1:
1606         document.warning("Malformed document! Missing \\cite_engine")
1607     else:
1608         engine = get_value(document.header, "\\cite_engine", i)
1609
1610     # 2. Do we use biblatex?
1611     biblatex = False
1612     if engine in ["biblatex", "biblatex-natbib"]:
1613         biblatex = True
1614
1615     # 3. Store and remove multibib document header
1616     multibib = ""
1617     i = find_token(document.header, "\\multibib", 0)
1618     if i != -1:
1619         multibib = get_value(document.header, "\\multibib", i)
1620         del document.header[i]
1621
1622     if not multibib:
1623         return
1624
1625     # 4. The easy part: Biblatex
1626     if biblatex:
1627         i = find_token(document.header, "\\biblio_options", 0)
1628         if i == -1:
1629             k = find_token(document.header, "\\use_bibtopic", 0)
1630             if k == -1:
1631                 # this should not happen
1632                 document.warning("Malformed LyX document! No \\use_bibtopic header found!")
1633                 return
1634             document.header[k-1 : k-1] = ["\\biblio_options " + "refsection=" + multibib]
1635         else:
1636             biblio_options = get_value(document.header, "\\biblio_options", i)
1637             if biblio_options:
1638                 biblio_options += ","
1639             biblio_options += "refsection=" + multibib
1640             document.header[i] = "\\biblio_options " + biblio_options
1641
1642         # Bibtex insets
1643         i = 0
1644         while (True):
1645             i = find_token(document.body, "\\begin_inset CommandInset bibtex", i)
1646             if i == -1:
1647                 break
1648             j = find_end_of_inset(document.body, i)
1649             if j == -1:
1650                 document.warning("Can't find end of bibtex inset at line %d!!" %(i))
1651                 i += 1
1652                 continue
1653             btprint = get_quoted_value(document.body, "btprint", i, j)
1654             if btprint != "bibbysection":
1655                 i += 1
1656                 continue
1657             opts = get_quoted_value(document.body, "biblatexopts", i, j)
1658             # change btprint line
1659             k = find_token(document.body, "btprint", i, j)
1660             if k != -1:
1661                 document.body[k] = "btprint \"btPrintCited\""
1662             # Insert ERT \\bibbysection and wrap bibtex inset to a Note
1663             pcmd = "bibbysection"
1664             if opts:
1665                 pcmd += "[" + opts + "]"
1666             repl = ["\\begin_inset ERT", "status open", "", "\\begin_layout Plain Layout",\
1667                     "", "", "\\backslash", pcmd, "\\end_layout", "", "\\end_inset", "", "",\
1668                     "\\end_layout", "", "\\begin_layout Standard", "\\begin_inset Note Note",\
1669                     "status open", "", "\\begin_layout Plain Layout" ]
1670             repl += document.body[i:j+1]
1671             repl += ["", "\\end_layout", "", "\\end_inset", "", ""]
1672             document.body[i:j+1] = repl
1673             j += 27
1674
1675             i = j + 1
1676         return
1677
1678     # 5. More tricky: Bibtex/Bibtopic
1679     k = find_token(document.header, "\\use_bibtopic", 0)
1680     if k == -1:
1681         # this should not happen
1682         document.warning("Malformed LyX document! No \\use_bibtopic header found!")
1683         return
1684     document.header[k] = "\\use_bibtopic true"
1685
1686     # Possible units. This assumes that the LyX name follows the std,
1687     # which might not always be the case. But it's as good as we can get.
1688     units = {
1689         "part" : "Part",
1690         "chapter" : "Chapter",
1691         "section" : "Section",
1692         "subsection" : "Subsection",
1693         }
1694
1695     if multibib not in units.keys():
1696         document.warning("Unknown multibib value `%s'!" % nultibib)
1697         return
1698     unit = units[multibib]
1699     btunit = False
1700     i = 0
1701     while (True):
1702         i = find_token(document.body, "\\begin_layout " + unit, i)
1703         if i == -1:
1704             break
1705         if btunit:
1706             document.body[i-1 : i-1] = ["\\begin_layout Standard",
1707                                 "\\begin_inset ERT", "status open", "",
1708                                 "\\begin_layout Plain Layout", "", "",
1709                                 "\\backslash",
1710                                 "end{btUnit}", "\\end_layout",
1711                                 "\\begin_layout Plain Layout", "",
1712                                 "\\backslash",
1713                                 "begin{btUnit}"
1714                                 "\\end_layout", "", "\\end_inset", "", "",
1715                                 "\\end_layout", ""]
1716             i += 21
1717         else:
1718             document.body[i-1 : i-1] = ["\\begin_layout Standard",
1719                                 "\\begin_inset ERT", "status open", "",
1720                                 "\\begin_layout Plain Layout", "", "",
1721                                 "\\backslash",
1722                                 "begin{btUnit}"
1723                                 "\\end_layout", "", "\\end_inset", "", "",
1724                                 "\\end_layout", ""]
1725             i += 16
1726         btunit = True
1727         i += 1
1728
1729     if btunit:
1730         i = find_token(document.body, "\\end_body", i)
1731         document.body[i-1 : i-1] = ["\\begin_layout Standard",
1732                                 "\\begin_inset ERT", "status open", "",
1733                                 "\\begin_layout Plain Layout", "", "",
1734                                 "\\backslash",
1735                                 "end{btUnit}"
1736                                 "\\end_layout", "", "\\end_inset", "", "",
1737                                 "\\end_layout", ""]
1738
1739
1740 def revert_chapterbib(document):
1741     " Revert chapterbib support "
1742
1743     # 1. Get cite engine
1744     engine = "basic"
1745     i = find_token(document.header, "\\cite_engine", 0)
1746     if i == -1:
1747         document.warning("Malformed document! Missing \\cite_engine")
1748     else:
1749         engine = get_value(document.header, "\\cite_engine", i)
1750
1751     # 2. Do we use biblatex?
1752     biblatex = False
1753     if engine in ["biblatex", "biblatex-natbib"]:
1754         biblatex = True
1755
1756     # 3. Store multibib document header value
1757     multibib = ""
1758     i = find_token(document.header, "\\multibib", 0)
1759     if i != -1:
1760         multibib = get_value(document.header, "\\multibib", i)
1761
1762     if not multibib or multibib != "child":
1763         # nothing to do
1764         return
1765
1766     # 4. remove multibib header
1767     del document.header[i]
1768
1769     # 5. Biblatex
1770     if biblatex:
1771         # find include insets
1772         i = 0
1773         while (True):
1774             i = find_token(document.body, "\\begin_inset CommandInset include", i)
1775             if i == -1:
1776                 break
1777             j = find_end_of_inset(document.body, i)
1778             if j == -1:
1779                 document.warning("Can't find end of bibtex inset at line %d!!" %(i))
1780                 i += 1
1781                 continue
1782             parent = get_containing_layout(document.body, i)
1783             parbeg = parent[1]
1784
1785             # Insert ERT \\newrefsection before inset
1786             beg = ["\\begin_layout Standard",
1787                    "\\begin_inset ERT", "status open", "",
1788                    "\\begin_layout Plain Layout", "", "",
1789                    "\\backslash",
1790                    "newrefsection"
1791                    "\\end_layout", "", "\\end_inset", "", "",
1792                    "\\end_layout", ""]
1793             document.body[parbeg-1:parbeg-1] = beg
1794             j += len(beg)
1795             i = j + 1
1796         return
1797
1798     # 6. Bibtex/Bibtopic
1799     i = find_token(document.header, "\\use_bibtopic", 0)
1800     if i == -1:
1801         # this should not happen
1802         document.warning("Malformed LyX document! No \\use_bibtopic header found!")
1803         return
1804     if get_value(document.header, "\\use_bibtopic", i) == "true":
1805         # find include insets
1806         i = 0
1807         while (True):
1808             i = find_token(document.body, "\\begin_inset CommandInset include", i)
1809             if i == -1:
1810                 break
1811             j = find_end_of_inset(document.body, i)
1812             if j == -1:
1813                 document.warning("Can't find end of bibtex inset at line %d!!" %(i))
1814                 i += 1
1815                 continue
1816             parent = get_containing_layout(document.body, i)
1817             parbeg = parent[1]
1818             parend = parent[2]
1819
1820             # Insert wrap inset into \\begin{btUnit}...\\end{btUnit}
1821             beg = ["\\begin_layout Standard",
1822                    "\\begin_inset ERT", "status open", "",
1823                    "\\begin_layout Plain Layout", "", "",
1824                    "\\backslash",
1825                    "begin{btUnit}"
1826                    "\\end_layout", "", "\\end_inset", "", "",
1827                    "\\end_layout", ""]
1828             end = ["\\begin_layout Standard",
1829                    "\\begin_inset ERT", "status open", "",
1830                    "\\begin_layout Plain Layout", "", "",
1831                    "\\backslash",
1832                    "end{btUnit}"
1833                    "\\end_layout", "", "\\end_inset", "", "",
1834                    "\\end_layout", ""]
1835             document.body[parend+1:parend+1] = end
1836             document.body[parbeg-1:parbeg-1] = beg
1837             j += len(beg) + len(end)
1838             i = j + 1
1839         return
1840
1841     # 7. Chapterbib proper
1842     add_to_preamble(document, ["\\usepackage{chapterbib}"])
1843
1844
1845 def convert_dashligatures(document):
1846     " Remove a zero-length space (U+200B) after en- and em-dashes. "
1847
1848     i = 0
1849     while i < len(document.body):
1850         words = document.body[i].split()
1851         # Skip some document parts where dashes are not converted
1852         if len(words) > 1 and words[0] == "\\begin_inset" and \
1853            words[1] in ["CommandInset", "ERT", "External", "Formula", \
1854                         "FormulaMacro", "Graphics", "IPA", "listings"]:
1855             j = find_end_of_inset(document.body, i)
1856             if j == -1:
1857                 document.warning("Malformed LyX document: Can't find end of " \
1858                                  + words[1] + " inset at line " + str(i))
1859                 i += 1
1860             else:
1861                 i = j
1862             continue
1863         if len(words) > 0 and words[0] in ["\\leftindent", \
1864                 "\\paragraph_spacing", "\\align", "\\labelwidthstring"]:
1865             i += 1
1866             continue
1867
1868         start = 0
1869         while True:
1870             j = document.body[i].find(u"\u2013", start) # en-dash
1871             k = document.body[i].find(u"\u2014", start) # em-dash
1872             if j == -1 and k == -1:
1873                 break
1874             if j == -1 or (k != -1 and k < j):
1875                 j = k
1876             after = document.body[i][j+1:]
1877             if after.startswith(u"\u200B"):
1878                 document.body[i] = document.body[i][:j+1] + after[1:]
1879             else:
1880                 if len(after) == 0 and document.body[i+1].startswith(u"\u200B"):
1881                     document.body[i+1] = document.body[i+1][1:]
1882                     break
1883             start = j+1
1884         i += 1
1885
1886
1887 def revert_dashligatures(document):
1888     " Remove font ligature settings for en- and em-dashes. "
1889     i = find_token(document.header, "\\use_dash_ligatures", 0)
1890     if i == -1:
1891         return
1892     use_dash_ligatures = get_bool_value(document.header, "\\use_dash_ligatures", i)
1893     del document.header[i]
1894     use_non_tex_fonts = False
1895     i = find_token(document.header, "\\use_non_tex_fonts", 0)
1896     if i != -1:
1897         use_non_tex_fonts = get_bool_value(document.header, "\\use_non_tex_fonts", i)
1898     if not use_dash_ligatures or use_non_tex_fonts:
1899         return
1900
1901     # Add a zero-length space (U+200B) after en- and em-dashes
1902     i = 0
1903     while i < len(document.body):
1904         words = document.body[i].split()
1905         # Skip some document parts where dashes are not converted
1906         if len(words) > 1 and words[0] == "\\begin_inset" and \
1907            words[1] in ["CommandInset", "ERT", "External", "Formula", \
1908                         "FormulaMacro", "Graphics", "IPA", "listings"]:
1909             j = find_end_of_inset(document.body, i)
1910             if j == -1:
1911                 document.warning("Malformed LyX document: Can't find end of " \
1912                                  + words[1] + " inset at line " + str(i))
1913                 i += 1
1914             else:
1915                 i = j
1916             continue
1917         if len(words) > 0 and words[0] in ["\\leftindent", \
1918                 "\\paragraph_spacing", "\\align", "\\labelwidthstring"]:
1919             i += 1
1920             continue
1921
1922         start = 0
1923         while True:
1924             j = document.body[i].find(u"\u2013", start) # en-dash
1925             k = document.body[i].find(u"\u2014", start) # em-dash
1926             if j == -1 and k == -1:
1927                 break
1928             if j == -1 or (k != -1 and k < j):
1929                 j = k
1930             after = document.body[i][j+1:]
1931             document.body[i] = document.body[i][:j+1] + u"\u200B" + after
1932             start = j+1
1933         i += 1
1934     
1935
1936 ##
1937 # Conversion hub
1938 #
1939
1940 supported_versions = ["2.3.0", "2.3"]
1941 convert = [
1942            [509, [convert_microtype]],
1943            [510, [convert_dateinset]],
1944            [511, [convert_ibranches]],
1945            [512, [convert_beamer_article_styles]],
1946            [513, []],
1947            [514, []],
1948            [515, []],
1949            [516, [convert_inputenc]],
1950            [517, []],
1951            [518, [convert_iopart]],
1952            [519, [convert_quotestyle]],
1953            [520, []],
1954            [521, [convert_frenchquotes]],
1955            [522, []],
1956            [523, []],
1957            [524, []],
1958            [525, []],
1959            [526, []],
1960            [527, []],
1961            [528, []],
1962            [529, []],
1963            [530, []],
1964            [531, []],
1965            [532, [convert_literalparam]],
1966            [533, []],
1967            [534, []],
1968            [535, [convert_dashligatures]]
1969           ]
1970
1971 revert =  [
1972            [534, [revert_dashligatures]],
1973            [533, [revert_chapterbib]],
1974            [532, [revert_multibib]],
1975            [531, [revert_literalparam]],
1976            [530, [revert_qualicites]],
1977            [529, [revert_bibpackopts]],
1978            [528, [revert_citekeyonly]],
1979            [527, [revert_biblatex]],
1980            [526, [revert_noprefix]],
1981            [525, [revert_plural_refs]],
1982            [524, [revert_labelonly]],
1983            [523, [revert_crimson, revert_cochinealmath]],
1984            [522, [revert_cjkquotes]],
1985            [521, [revert_dynamicquotes]],
1986            [520, [revert_britishquotes, revert_swedishgquotes, revert_frenchquotes, revert_frenchinquotes, revert_russianquotes, revert_swissquotes]],
1987            [519, [revert_plainquote]],
1988            [518, [revert_quotestyle]],
1989            [517, [revert_iopart]],
1990            [516, [revert_quotes]],
1991            [515, []],
1992            [514, [revert_urdu, revert_syriac]],
1993            [513, [revert_amharic, revert_asturian, revert_kannada, revert_khmer]],
1994            [512, [revert_bosnian, revert_friulan, revert_macedonian, revert_piedmontese, revert_romansh]],
1995            [511, [revert_beamer_article_styles]],
1996            [510, [revert_ibranches]],
1997            [509, []],
1998            [508, [revert_microtype]]
1999           ]
2000
2001
2002 if __name__ == "__main__":
2003     pass