1 # -*- coding: utf-8 -*-
4 # This file is part of LyX, the document processor.
5 # Licence details can be found in the file COPYING.
7 # \author Thibaut Cuvelier
9 # Full author contact details are available in file CREDITS
12 # python docbook2epub.py java_binary in.docbook out.epub
14 from __future__ import print_function
24 def parse_arguments():
25 if len(sys.argv) != 4:
27 own_path, java_path, input, output = sys.argv
28 script_folder = os.path.dirname(own_path) + '/../'
30 print('Generating ePub with the following parameters:')
36 return java_path, input, output, script_folder
39 def create_temporary_folder():
40 output_dir = tempfile.mkdtemp().replace('\\', '/')
41 print('Temporary output directory:')
46 def start_xslt_transformation(input, output_dir, script_folder, java_path):
47 xslt = script_folder + 'docbook/epub3/chunk.xsl'
48 saxon_jar = script_folder + 'scripts/saxon6.5.5.jar'
49 saxon_params = 'base.dir=%s' % output_dir
50 command = '"' + java_path + '" -jar "' + saxon_jar + '" "' + input + '" "' + xslt + '" "' + saxon_params + '"'
52 print('XSLT style sheet to use:')
54 print('Command to execute:')
57 quoted_command = command
59 # On Windows, it is typical to have spaces in folder names, and that requires to wrap the whole command
60 # in quotes. On Linux, this might create errors when starting the command.
61 quoted_command = '"' + command + '"'
62 # This could be simplified by using subprocess.run, but this requires Python 3.5.
64 if os.system(quoted_command) != 0:
65 print('docbook2epub fails')
66 shutil.rmtree(output_dir, ignore_errors=True)
69 print('Generated ePub contents.')
72 def get_images_from_package_opf(package_opf):
75 # Example in the OPF file:
76 # <item id="d436e1" href="D:/LyX/lib/images/buffer-view.svgz" media-type="image/SVGZ"/>
77 # The XHTML files are also <item> tags:
78 # <item id="id-d0e2" href="index.xhtml" media-type="application/xhtml+xml"/>
80 with open(package_opf, 'r') as f:
81 for line in f.readlines():
82 if '<item' in line and 'media-type="image' in line:
83 images.append(line.split('href="')[1].split('"')[0])
84 except FileNotFoundError:
85 print('The package.opf file was not found, probably due to a DocBook error. The ePub file will be corrupt.')
90 def change_image_paths(file, renamed):
91 # This could be optimised, as the same operation is performed a zillion times on many files:
92 # https://www.oreilly.com/library/view/python-cookbook/0596001673/ch03s15.html
93 with open(file, 'r', encoding='utf8') as f:
96 with open(file, 'w', encoding='utf8') as f:
98 for (old, new) in renamed.items():
99 line = line.replace(old, new)
103 def copy_images(output_dir):
104 # Copy the assets to the OEBPS/images/. All paths are available in OEBPS/package.opf, but they must also be changed
105 # in the XHTML files. Typically, the current paths are absolute.
107 # First, get the mapping old file => file in the ePub archive.
108 original_images = get_images_from_package_opf(output_dir + '/OEBPS/package.opf')
109 renamed = {img: 'images/' + os.path.basename(img) for img in original_images}
111 # Then, transform all paths (both OPF and XHTML files).
112 change_image_paths(output_dir + '/OEBPS/package.opf', renamed)
113 for file in glob.glob(output_dir + '/OEBPS/*.xhtml'):
114 change_image_paths(file, renamed)
116 # Ensure that the destination path exists.
117 if not os.path.exists(output_dir + '/OEBPS/images/'):
118 os.mkdir(output_dir + '/OEBPS/images/')
120 # Finally, actually copy the image files.
121 for (old, new) in renamed.items():
122 shutil.copyfile(old, output_dir + '/OEBPS/' + new)
125 def create_zip_archive(output, output_dir):
126 with zipfile.ZipFile(output, 'w', zipfile.ZIP_DEFLATED) as zip:
127 # Python 3.5 brings the `recursive` argument. For older versions, this trick is required...
128 # for file in glob.glob(output_dir + '/**/*', recursive=True):
129 for file in [os.path.join(dp, f) for dp, dn, filenames in os.walk(output_dir) for f in filenames]:
130 zip.write(file, os.path.relpath(file, output_dir), compress_type=zipfile.ZIP_STORED)
132 shutil.rmtree(output_dir)
133 print('Generated ePub.')
136 if __name__ == '__main__':
137 java_path, input, output, script_folder = parse_arguments()
138 output_dir = create_temporary_folder()
139 start_xslt_transformation(input, output_dir, script_folder, java_path)
140 copy_images(output_dir)
141 create_zip_archive(output, output_dir)