]> git.lyx.org Git - lyx.git/blob - lib/scripts/docbook2epub.py
Create Chapter 6 Bullets in Additional.lyx and move the bullet section into it; this...
[lyx.git] / lib / scripts / docbook2epub.py
1 # -*- coding: utf-8 -*-
2
3 # file docbook2epub.py
4 # This file is part of LyX, the document processor.
5 # Licence details can be found in the file COPYING.
6 #
7 # \author Thibaut Cuvelier
8 #
9 # Full author contact details are available in file CREDITS
10
11 # Usage:
12 #   python docbook2epub.py java_binary saxon_path xsltproc_path xslt_path in.docbook in.orig.path out.epub
13
14 from __future__ import print_function
15
16 import glob
17 import os
18 import shutil
19 import sys
20 import tempfile
21 import zipfile
22 from io import open  # Required for Python 2.
23
24
25 def _parse_nullable_argument(arg):
26     return arg if arg != '' and arg != 'none' else None
27
28
29 class ImageRename:
30     def __init__(self, opf_path, local_path, epub_path):
31         self.opf_path = opf_path
32         self.local_path = local_path
33         self.epub_path = epub_path
34
35
36 class DocBookToEpub:
37     def __init__(self, args=None):
38         if args is None:
39             args = sys.argv
40
41         if len(args) != 8:
42             print('Exactly eight arguments are expected, only %s found: %s.' % (len(args), args))
43             sys.exit(1)
44
45         self.own_path = sys.argv[0]
46         self.java_path = _parse_nullable_argument(sys.argv[1])
47         self.saxon_path = _parse_nullable_argument(sys.argv[2])
48         self.xsltproc_path = _parse_nullable_argument(sys.argv[3])
49         self.xslt_path = _parse_nullable_argument(sys.argv[4])
50         self.input = sys.argv[5]
51         self.input_path = sys.argv[6]
52         self.output = sys.argv[7]
53         self.script_folder = os.path.dirname(self.own_path) + '/../'
54
55         print('Generating ePub with the following parameters:')
56         print(self.own_path)
57         print(self.java_path)
58         print(self.saxon_path)
59         print(self.xsltproc_path)
60         print(self.xslt_path)
61         print(self.input)
62         print(self.input_path)
63         print(self.output)
64
65         # Precompute paths that will be used later.
66         self.output_dir = tempfile.mkdtemp().replace('\\', '/')
67         self.package_opf = self.output_dir + '/OEBPS/package.opf'  # Does not exist yet,
68         print('Temporary output directory: %s' % self.output_dir)
69
70         if self.xslt_path is None:
71             self.xslt = self.script_folder + 'docbook/epub3/chunk.xsl'
72         else:
73             self.xslt = self.xslt_path + '/epub3/chunk.xsl'
74         print('XSLT style sheet to use:')
75         print(self.xslt)
76
77         if self.saxon_path is None:
78             self.saxon_path = self.script_folder + 'scripts/saxon6.5.5.jar'
79
80         # These will be filled during the execution of the script.
81         self.renamed = None
82
83     def gracefully_fail(self, reason):
84         print('docbook2epub fails: %s' % reason)
85         shutil.rmtree(self.output_dir, ignore_errors=True)
86         sys.exit(1)
87
88     def start_xslt_transformation(self):
89         command = None
90         if self.xsltproc_path is not None:
91             command = self.start_xslt_transformation_xsltproc()
92         elif self.java_path is not None:
93             command = self.start_xslt_transformation_saxon6()
94
95         if command is None:
96             self.gracefully_fail('no XSLT processor available')
97
98         print('Command to execute:')
99         print(command)
100
101         quoted_command = command
102         if os.name == 'nt':
103             # On Windows, it is typical to have spaces in folder names, and that requires to wrap the whole command
104             # in quotes. On Linux, this might create errors when starting the command.
105             quoted_command = '"' + command + '"'
106         # This could be simplified by using subprocess.run, but this requires Python 3.5.
107
108         if os.system(quoted_command) != 0:
109             self.gracefully_fail('error from the XSLT processor')
110
111         print('Generated ePub contents.')
112
113     def start_xslt_transformation_xsltproc(self):
114         params = '-stringparam base.dir "' + self.output_dir + '"'
115         return '"' + self.xsltproc_path + '" ' + params + ' "' + self.xslt + '" "' + self.input + '"'
116
117     def start_xslt_transformation_saxon6(self):
118         params = 'base.dir=%s' % self.output_dir
119         executable = '"' + self.java_path + '" -jar "' + self.saxon_path + '"'
120         return executable + ' "' + self.input + '" "' + self.xslt + '" "' + params + '"'
121
122     def get_images_from_package_opf(self):
123         images = []
124
125         # Example in the OPF file:
126         #     <item id="d436e1" href="D:/LyX/lib/images/buffer-view.svgz" media-type="image/SVGZ"/>
127         # The XHTML files are also <item> tags:
128         #     <item id="id-d0e2" href="index.xhtml" media-type="application/xhtml+xml"/>
129         try:
130             with open(self.package_opf, 'r') as f:
131                 for line in f.readlines():
132                     if '<item' in line and 'media-type="image' in line:
133                         images.append(line.split('href="')[1].split('"')[0])
134         except FileNotFoundError:
135             print('The package.opf file was not found, probably due to a DocBook error. The ePub file will be corrupt.')
136
137         return images
138
139     def get_image_changes(self):
140         epub_folder = 'images/'
141
142         changes = []
143         for image in self.get_images_from_package_opf():
144             if os.path.exists(image):
145                 file_system_path = image
146             elif os.path.exists(self.input_path + image):
147                 file_system_path = self.input_path + image
148             else:
149                 file_system_path = ''
150
151             changes.append(ImageRename(image, file_system_path, epub_folder + os.path.basename(image)))
152         return changes
153
154     def change_image_paths(self, file):
155         # This could be optimised, as the same operation is performed a zillion times on many files:
156         # https://www.oreilly.com/library/view/python-cookbook/0596001673/ch03s15.html
157         with open(file, 'r', encoding='utf8') as f:
158             contents = list(f)
159
160         with open(file, 'w', encoding='utf8') as f:
161             for line in contents:
162                 for change in self.renamed:
163                     line = line.replace(change.opf_path, change.epub_path)
164                 f.write(line)
165
166     def copy_images(self):
167         # Copy the assets to the OEBPS/images/. All paths are available in OEBPS/package.opf, but they must also be
168         # changed in the XHTML files. Typically, the current paths are absolute.
169
170         # First, get the mapping old file => file in the ePub archive.
171         self.renamed = self.get_image_changes()
172
173         # Then, transform all paths (both OPF and XHTML files).
174         self.change_image_paths(self.output_dir + '/OEBPS/package.opf')
175         for file in glob.glob(self.output_dir + '/OEBPS/*.xhtml'):
176             self.change_image_paths(file)
177
178         # Ensure that the destination path exists. OEBPS exists due to the DocBook-to-ePub transformation.
179         if not os.path.exists(self.output_dir + '/OEBPS/images/'):
180             os.mkdir(self.output_dir + '/OEBPS/images/')
181
182         # Finally, actually copy the image files.
183         for change in self.renamed:
184             shutil.copyfile(change.local_path, self.output_dir + '/OEBPS/' + change.epub_path)
185
186     def create_zip_archive(self):
187         with zipfile.ZipFile(self.output, 'w', zipfile.ZIP_DEFLATED) as zip:
188             # Python 3.5 brings the `recursive` argument. For older versions, this trick is required...
189             # for file in glob.glob(output_dir + '/**/*', recursive=True):
190             for file in [os.path.join(dp, f) for dp, dn, filenames in os.walk(self.output_dir) for f in filenames]:
191                 zip.write(file, os.path.relpath(file, self.output_dir), compress_type=zipfile.ZIP_STORED)
192
193         shutil.rmtree(self.output_dir)
194         print('Generated ePub.')
195
196     def transform(self):
197         self.start_xslt_transformation()
198         self.copy_images()
199         self.create_zip_archive()
200
201
202 if __name__ == '__main__':
203     DocBookToEpub(sys.argv).transform()