From: Thibaut Cuvelier Date: Sun, 7 Feb 2021 19:46:29 +0000 (+0100) Subject: ePub: restructure the script as a class. X-Git-Tag: 2.4.0-alpha3~86 X-Git-Url: https://git.lyx.org/gitweb/?a=commitdiff_plain;h=d7d31ab512b5cc8cfe3a607fcf99b966e565c544;p=lyx.git ePub: restructure the script as a class. This allows to share a little bit more code and to avoid functions with many arguments. --- diff --git a/lib/scripts/docbook2epub.py b/lib/scripts/docbook2epub.py index ec0383afeb..9836240913 100644 --- a/lib/scripts/docbook2epub.py +++ b/lib/scripts/docbook2epub.py @@ -21,139 +21,147 @@ import tempfile import zipfile -def parse_arguments(): - if len(sys.argv) != 5: - print('Five arguments are expected, only %s found.' % len(sys.argv)) - print(sys.argv) +class DocBookToEpub: + def __init__(self, args=None): + if args is None: + args = sys.argv + + if len(args) != 5: + print('Five arguments are expected, only %s found.' % len(sys.argv)) + print(args) + sys.exit(1) + + self.own_path = sys.argv[0] + self.java_path = sys.argv[1] if sys.argv[1] != '' and sys.argv[1] != 'none' else '' + self.xsltproc_path = sys.argv[2] if sys.argv[2] != '' and sys.argv[2] != 'none' else '' + self.input = sys.argv[3] + self.output = sys.argv[4] + self.script_folder = os.path.dirname(self.own_path) + '/../' + + print('Generating ePub with the following parameters:') + print(self.own_path) + print(self.java_path) + print(self.xsltproc_path) + print(self.input) + print(self.output) + + # Precompute paths that will be used later. + self.output_dir = tempfile.mkdtemp().replace('\\', '/') + self.package_opf = self.output_dir + '/OEBPS/package.opf' # Does not exist yet, + print('Temporary output directory: %s' % self.output_dir) + + self.xslt = self.script_folder + 'docbook/epub3/chunk.xsl' + print('XSLT style sheet to use:') + print(self.xslt) + + # These will be filled during the execution of the script. + self.renamed = None + + def gracefully_fail(self, reason): + print('docbook2epub fails: %s' % reason) + shutil.rmtree(self.output_dir, ignore_errors=True) sys.exit(1) - own_path, java_path, xsltproc_path, input, output = sys.argv - script_folder = os.path.dirname(own_path) + '/../' - - print('Generating ePub with the following parameters:') - print(own_path) - print(java_path) - print(xsltproc_path) - print(input) - print(output) - - return java_path, xsltproc_path, input, output, script_folder - - -def create_temporary_folder(): - output_dir = tempfile.mkdtemp().replace('\\', '/') - print('Temporary output directory:') - print(output_dir) - return output_dir - - -def start_xslt_transformation(input, output_dir, script_folder, java_path, xsltproc_path): - xslt = script_folder + 'docbook/epub3/chunk.xsl' - if xsltproc_path != '' and xsltproc_path != 'none': - command = start_xslt_transformation_xsltproc(input, output_dir, script_folder, xslt, xsltproc_path) - elif java_path != '' and java_path != 'none': - command = start_xslt_transformation_saxon6(input, output_dir, script_folder, xslt, java_path) - else: - print('docbook2epub fails: no XSLT processor available') - shutil.rmtree(output_dir, ignore_errors=True) - sys.exit(1) - - print('XSLT style sheet to use:') - print(xslt) - print('Command to execute:') - print(command) - - quoted_command = command - if os.name == 'nt': - # On Windows, it is typical to have spaces in folder names, and that requires to wrap the whole command - # in quotes. On Linux, this might create errors when starting the command. - quoted_command = '"' + command + '"' - # This could be simplified by using subprocess.run, but this requires Python 3.5. - - if os.system(quoted_command) != 0: - print('docbook2epub fails: error from the XSLT processor') - shutil.rmtree(output_dir, ignore_errors=True) - sys.exit(1) - - print('Generated ePub contents.') - - -def start_xslt_transformation_xsltproc(input, output_dir, _, xslt, xsltproc_path): - return '"' + xsltproc_path + '" -stringparam base.dir "' + output_dir + '" "' + xslt + '" "' + input + '"' - - -def start_xslt_transformation_saxon6(input, output_dir, script_folder, xslt, java_path): - saxon_jar = script_folder + 'scripts/saxon6.5.5.jar' - params = 'base.dir=%s' % output_dir - return '"' + java_path + '" -jar "' + saxon_jar + '" "' + input + '" "' + xslt + '" "' + params + '"' - - -def get_images_from_package_opf(package_opf): - images = [] - - # Example in the OPF file: - # - # The XHTML files are also tags: - # - try: - with open(package_opf, 'r') as f: - for line in f.readlines(): - if ' file in the ePub archive. - original_images = get_images_from_package_opf(output_dir + '/OEBPS/package.opf') - renamed = {img: 'images/' + os.path.basename(img) for img in original_images} - - # Then, transform all paths (both OPF and XHTML files). - change_image_paths(output_dir + '/OEBPS/package.opf', renamed) - for file in glob.glob(output_dir + '/OEBPS/*.xhtml'): - change_image_paths(file, renamed) - - # Ensure that the destination path exists. - if not os.path.exists(output_dir + '/OEBPS/images/'): - os.mkdir(output_dir + '/OEBPS/images/') - - # Finally, actually copy the image files. - for (old, new) in renamed.items(): - shutil.copyfile(old, output_dir + '/OEBPS/' + new) - - -def create_zip_archive(output, output_dir): - with zipfile.ZipFile(output, 'w', zipfile.ZIP_DEFLATED) as zip: - # Python 3.5 brings the `recursive` argument. For older versions, this trick is required... - # for file in glob.glob(output_dir + '/**/*', recursive=True): - for file in [os.path.join(dp, f) for dp, dn, filenames in os.walk(output_dir) for f in filenames]: - zip.write(file, os.path.relpath(file, output_dir), compress_type=zipfile.ZIP_STORED) - shutil.rmtree(output_dir) - print('Generated ePub.') + def start_xslt_transformation(self): + command = None + if self.xsltproc_path != '': + command = self.start_xslt_transformation_xsltproc() + elif self.java_path != '': + command = self.start_xslt_transformation_saxon6() + + if command is None: + self.gracefully_fail('no XSLT processor available') + + print('Command to execute:') + print(command) + + quoted_command = command + if os.name == 'nt': + # On Windows, it is typical to have spaces in folder names, and that requires to wrap the whole command + # in quotes. On Linux, this might create errors when starting the command. + quoted_command = '"' + command + '"' + # This could be simplified by using subprocess.run, but this requires Python 3.5. + + if os.system(quoted_command) != 0: + self.gracefully_fail('error from the XSLT processor') + + print('Generated ePub contents.') + + def start_xslt_transformation_xsltproc(self): + params = '-stringparam base.dir "' + self.output_dir + '"' + return '"' + self.xsltproc_path + '" ' + params + ' "' + self.xslt + '" "' + self.input + '"' + + def start_xslt_transformation_saxon6(self): + saxon_jar = self.script_folder + 'scripts/saxon6.5.5.jar' + params = 'base.dir=%s' % self.output_dir + executable = '"' + self.java_path + '" -jar "' + saxon_jar + '"' + return executable + ' "' + self.input + '" "' + self.xslt + '" "' + params + '"' + + def get_images_from_package_opf(self): + images = [] + + # Example in the OPF file: + # + # The XHTML files are also tags: + # + try: + with open(self.package_opf, 'r') as f: + for line in f.readlines(): + if ' file in the ePub archive. + original_images = self.get_images_from_package_opf() + self.renamed = {img: 'images/' + os.path.basename(img) for img in original_images} + + # Then, transform all paths (both OPF and XHTML files). + self.change_image_paths(self.output_dir + '/OEBPS/package.opf') + for file in glob.glob(self.output_dir + '/OEBPS/*.xhtml'): + self.change_image_paths(file) + + # Ensure that the destination path exists. OEBPS exists due to the DocBook-to-ePub transformation. + if not os.path.exists(self.output_dir + '/OEBPS/images/'): + os.mkdir(self.output_dir + '/OEBPS/images/') + + # Finally, actually copy the image files. + for (old, new) in self.renamed.items(): + shutil.copyfile(old, self.output_dir + '/OEBPS/' + new) + + def create_zip_archive(self): + with zipfile.ZipFile(self.output, 'w', zipfile.ZIP_DEFLATED) as zip: + # Python 3.5 brings the `recursive` argument. For older versions, this trick is required... + # for file in glob.glob(output_dir + '/**/*', recursive=True): + for file in [os.path.join(dp, f) for dp, dn, filenames in os.walk(self.output_dir) for f in filenames]: + zip.write(file, os.path.relpath(file, self.output_dir), compress_type=zipfile.ZIP_STORED) + + shutil.rmtree(self.output_dir) + print('Generated ePub.') + + def transform(self): + self.start_xslt_transformation() + self.copy_images() + self.create_zip_archive() if __name__ == '__main__': - java_path, xsltproc_path, input, output, script_folder = parse_arguments() - output_dir = create_temporary_folder() - start_xslt_transformation(input, output_dir, script_folder, java_path, xsltproc_path) - copy_images(output_dir) - create_zip_archive(output, output_dir) + DocBookToEpub(sys.argv).transform()