1 # -*- coding: utf-8 -*-
4 # This file is part of LyX, the document processor.
5 # Licence details can be found in the file COPYING.
7 # \author Thibaut Cuvelier
9 # Full author contact details are available in file CREDITS
12 # python docbook2epub.py java_binary in.docbook out.epub
14 from __future__ import print_function
25 def __init__(self, args=None):
30 print('Six arguments are expected, only %s found: %s.' % (len(args), args))
33 self.own_path = sys.argv[0]
34 self.java_path = sys.argv[1] if sys.argv[1] != '' and sys.argv[1] != 'none' else None
35 self.xsltproc_path = sys.argv[2] if sys.argv[2] != '' and sys.argv[2] != 'none' else None
36 self.xslt_path = sys.argv[3] if sys.argv[3] != '' and sys.argv[3] != 'none' else None
37 self.input = sys.argv[4]
38 self.output = sys.argv[5]
39 self.script_folder = os.path.dirname(self.own_path) + '/../'
41 print('Generating ePub with the following parameters:')
44 print(self.xsltproc_path)
48 # Precompute paths that will be used later.
49 self.output_dir = tempfile.mkdtemp().replace('\\', '/')
50 self.package_opf = self.output_dir + '/OEBPS/package.opf' # Does not exist yet,
51 print('Temporary output directory: %s' % self.output_dir)
53 if self.xslt_path is None:
54 self.xslt = self.script_folder + 'docbook/epub3/chunk.xsl'
56 self.xslt = self.xslt_path + '/epub3/chunk.xsl'
57 print('XSLT style sheet to use:')
60 # These will be filled during the execution of the script.
63 def gracefully_fail(self, reason):
64 print('docbook2epub fails: %s' % reason)
65 shutil.rmtree(self.output_dir, ignore_errors=True)
68 def start_xslt_transformation(self):
70 if self.xsltproc_path is not None:
71 command = self.start_xslt_transformation_xsltproc()
72 elif self.java_path is not None:
73 command = self.start_xslt_transformation_saxon6()
76 self.gracefully_fail('no XSLT processor available')
78 print('Command to execute:')
81 quoted_command = command
83 # On Windows, it is typical to have spaces in folder names, and that requires to wrap the whole command
84 # in quotes. On Linux, this might create errors when starting the command.
85 quoted_command = '"' + command + '"'
86 # This could be simplified by using subprocess.run, but this requires Python 3.5.
88 if os.system(quoted_command) != 0:
89 self.gracefully_fail('error from the XSLT processor')
91 print('Generated ePub contents.')
93 def start_xslt_transformation_xsltproc(self):
94 params = '-stringparam base.dir "' + self.output_dir + '"'
95 return '"' + self.xsltproc_path + '" ' + params + ' "' + self.xslt + '" "' + self.input + '"'
97 def start_xslt_transformation_saxon6(self):
98 saxon_jar = self.script_folder + 'scripts/saxon6.5.5.jar'
99 params = 'base.dir=%s' % self.output_dir
100 executable = '"' + self.java_path + '" -jar "' + saxon_jar + '"'
101 return executable + ' "' + self.input + '" "' + self.xslt + '" "' + params + '"'
103 def get_images_from_package_opf(self):
106 # Example in the OPF file:
107 # <item id="d436e1" href="D:/LyX/lib/images/buffer-view.svgz" media-type="image/SVGZ"/>
108 # The XHTML files are also <item> tags:
109 # <item id="id-d0e2" href="index.xhtml" media-type="application/xhtml+xml"/>
111 with open(self.package_opf, 'r') as f:
112 for line in f.readlines():
113 if '<item' in line and 'media-type="image' in line:
114 images.append(line.split('href="')[1].split('"')[0])
115 except FileNotFoundError:
116 print('The package.opf file was not found, probably due to a DocBook error. The ePub file will be corrupt.')
120 def change_image_paths(self, file):
121 # This could be optimised, as the same operation is performed a zillion times on many files:
122 # https://www.oreilly.com/library/view/python-cookbook/0596001673/ch03s15.html
123 with open(file, 'r', encoding='utf8') as f:
126 with open(file, 'w', encoding='utf8') as f:
127 for line in contents:
128 for (old, new) in self.renamed.items():
129 line = line.replace(old, new)
132 def copy_images(self):
133 # Copy the assets to the OEBPS/images/. All paths are available in OEBPS/package.opf, but they must also be
134 # changed in the XHTML files. Typically, the current paths are absolute.
136 # First, get the mapping old file => file in the ePub archive.
137 original_images = self.get_images_from_package_opf()
138 self.renamed = {img: 'images/' + os.path.basename(img) for img in original_images}
140 # Then, transform all paths (both OPF and XHTML files).
141 self.change_image_paths(self.output_dir + '/OEBPS/package.opf')
142 for file in glob.glob(self.output_dir + '/OEBPS/*.xhtml'):
143 self.change_image_paths(file)
145 # Ensure that the destination path exists. OEBPS exists due to the DocBook-to-ePub transformation.
146 if not os.path.exists(self.output_dir + '/OEBPS/images/'):
147 os.mkdir(self.output_dir + '/OEBPS/images/')
149 # Finally, actually copy the image files.
150 for (old, new) in self.renamed.items():
151 shutil.copyfile(old, self.output_dir + '/OEBPS/' + new)
153 def create_zip_archive(self):
154 with zipfile.ZipFile(self.output, 'w', zipfile.ZIP_DEFLATED) as zip:
155 # Python 3.5 brings the `recursive` argument. For older versions, this trick is required...
156 # for file in glob.glob(output_dir + '/**/*', recursive=True):
157 for file in [os.path.join(dp, f) for dp, dn, filenames in os.walk(self.output_dir) for f in filenames]:
158 zip.write(file, os.path.relpath(file, self.output_dir), compress_type=zipfile.ZIP_STORED)
160 shutil.rmtree(self.output_dir)
161 print('Generated ePub.')
164 self.start_xslt_transformation()
166 self.create_zip_archive()
169 if __name__ == '__main__':
170 DocBookToEpub(sys.argv).transform()