2 # This file is part of LyX, the document processor.
3 # Licence details can be found in the file COPYING.
5 # \author Thibaut Cuvelier
7 # Full author contact details are available in file CREDITS
10 # python docbook2epub.py java_binary saxon_path xsltproc_path xslt_path in.docbook in.orig.path out.epub
21 def _parse_nullable_argument(arg):
22 return arg if arg != '' and arg != 'none' else None
26 def __init__(self, opf_path, local_path, epub_path):
27 self.opf_path = opf_path
28 self.local_path = local_path
29 self.epub_path = epub_path
33 def __init__(self, args=None):
38 print(f'Exactly eight arguments are expected, only {len(args)} found: {args}.')
41 self.own_path = sys.argv[0]
42 self.java_path = _parse_nullable_argument(sys.argv[1])
43 self.saxon_path = _parse_nullable_argument(sys.argv[2])
44 self.xsltproc_path = _parse_nullable_argument(sys.argv[3])
45 self.xslt_path = _parse_nullable_argument(sys.argv[4])
46 self.input = sys.argv[5]
47 self.input_path = sys.argv[6]
48 self.output = sys.argv[7]
49 self.script_folder = os.path.dirname(self.own_path) + '/../'
51 print('Generating ePub with the following parameters:')
54 print(self.saxon_path)
55 print(self.xsltproc_path)
58 print(self.input_path)
61 # Precompute paths that will be used later.
62 self.output_dir = tempfile.mkdtemp().replace('\\', '/')
63 self.package_opf = self.output_dir + '/OEBPS/package.opf' # Does not exist yet,
64 print('Temporary output directory: %s' % self.output_dir)
66 if self.xslt_path is None:
67 self.xslt = self.script_folder + 'docbook/epub3/chunk.xsl'
69 self.xslt = self.xslt_path + '/epub3/chunk.xsl'
70 print('XSLT style sheet to use:')
73 if self.saxon_path is None:
74 self.saxon_path = self.script_folder + 'scripts/saxon6.5.5.jar'
76 # These will be filled during the execution of the script.
79 def gracefully_fail(self, reason):
80 print('docbook2epub fails: %s' % reason)
81 shutil.rmtree(self.output_dir, ignore_errors=True)
84 def start_xslt_transformation(self):
86 if self.xsltproc_path is not None:
87 command = self.start_xslt_transformation_xsltproc()
88 elif self.java_path is not None:
89 command = self.start_xslt_transformation_saxon6()
92 self.gracefully_fail('no XSLT processor available')
94 print('Command to execute:')
97 quoted_command = command
99 # On Windows, it is typical to have spaces in folder names, and that requires to wrap the whole command
100 # in quotes. On Linux, this might create errors when starting the command.
101 quoted_command = '"' + command + '"'
102 # This could be simplified by using subprocess.run, but this requires Python 3.5.
104 if os.system(quoted_command) != 0:
105 self.gracefully_fail('error from the XSLT processor')
107 print('Generated ePub contents.')
109 def start_xslt_transformation_xsltproc(self):
110 params = '-stringparam base.dir "' + self.output_dir + '"'
111 return '"' + self.xsltproc_path + '" ' + params + ' "' + self.xslt + '" "' + self.input + '"'
113 def start_xslt_transformation_saxon6(self):
114 params = 'base.dir=%s' % self.output_dir
115 executable = '"' + self.java_path + '" -jar "' + self.saxon_path + '"'
116 return executable + ' "' + self.input + '" "' + self.xslt + '" "' + params + '"'
118 def get_images_from_package_opf(self):
121 # Example in the OPF file:
122 # <item id="d436e1" href="D:/LyX/lib/images/buffer-view.svgz" media-type="image/SVGZ"/>
123 # The XHTML files are also <item> tags:
124 # <item id="id-d0e2" href="index.xhtml" media-type="application/xhtml+xml"/>
126 with open(self.package_opf) as f:
127 for line in f.readlines():
128 if '<item' in line and 'media-type="image' in line:
129 images.append(line.split('href="')[1].split('"')[0])
130 except FileNotFoundError:
131 print('The package.opf file was not found, probably due to a DocBook error. The ePub file will be corrupt.')
135 def get_image_changes(self):
136 epub_folder = 'images/'
139 for image in self.get_images_from_package_opf():
140 if os.path.exists(image):
141 file_system_path = image
142 elif os.path.exists(self.input_path + image):
143 file_system_path = self.input_path + image
145 file_system_path = ''
147 changes.append(ImageRename(image, file_system_path, epub_folder + os.path.basename(image)))
150 def change_image_paths(self, file):
151 # This could be optimised, as the same operation is performed a zillion times on many files:
152 # https://www.oreilly.com/library/view/python-cookbook/0596001673/ch03s15.html
153 with open(file, encoding='utf8') as f:
156 with open(file, 'w', encoding='utf8') as f:
157 for line in contents:
158 for change in self.renamed:
159 line = line.replace(change.opf_path, change.epub_path)
162 def copy_images(self):
163 # Copy the assets to the OEBPS/images/. All paths are available in OEBPS/package.opf, but they must also be
164 # changed in the XHTML files. Typically, the current paths are absolute.
166 # First, get the mapping old file => file in the ePub archive.
167 self.renamed = self.get_image_changes()
169 # Then, transform all paths (both OPF and XHTML files).
170 self.change_image_paths(self.output_dir + '/OEBPS/package.opf')
171 for file in glob.glob(self.output_dir + '/OEBPS/*.xhtml'):
172 self.change_image_paths(file)
174 # Ensure that the destination path exists. OEBPS exists due to the DocBook-to-ePub transformation.
175 if not os.path.exists(self.output_dir + '/OEBPS/images/'):
176 os.mkdir(self.output_dir + '/OEBPS/images/')
178 # Finally, actually copy the image files.
179 for change in self.renamed:
180 shutil.copyfile(change.local_path, self.output_dir + '/OEBPS/' + change.epub_path)
182 def create_zip_archive(self):
183 with zipfile.ZipFile(self.output, 'w', zipfile.ZIP_DEFLATED) as zip:
184 # Python 3.5 brings the `recursive` argument. For older versions, this trick is required...
185 # for file in glob.glob(output_dir + '/**/*', recursive=True):
186 for file in [os.path.join(dp, f) for dp, dn, filenames in os.walk(self.output_dir) for f in filenames]:
187 zip.write(file, os.path.relpath(file, self.output_dir), compress_type=zipfile.ZIP_STORED)
189 shutil.rmtree(self.output_dir)
190 print('Generated ePub.')
193 self.start_xslt_transformation()
195 self.create_zip_archive()
198 if __name__ == '__main__':
199 DocBookToEpub(sys.argv).transform()