Remove profiling.py

[lyx.git] / lib / scripts / docbook2epub.py
diff --git a/lib/scripts/docbook2epub.py b/lib/scripts/docbook2epub.py

index b3aba7aac945566934413db7f180e9a32f134cd1..5cd8593dbbbfc0a5e963d3066e1f2c06a9e0cd67 100644 (file)
--- a/lib/scripts/docbook2epub.py
+++ b/lib/scripts/docbook2epub.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
  # file docbook2epub.py
  # This file is part of LyX, the document processor.
  # Licence details can be found in the file COPYING.
@@ -9,9 +7,8 @@
  # Full author contact details are available in file CREDITS
  
  # Usage:
-#   python docbook2epub.py java_binary in.docbook out.epub
+#   python docbook2epub.py java_binary saxon_path xsltproc_path xslt_path in.docbook in.orig.path out.epub
  
-from __future__ import print_function
  
  import glob
  import os
@@ -21,28 +18,44 @@ import tempfile
  import zipfile
  
  
+def _parse_nullable_argument(arg):
+    return arg if arg != '' and arg != 'none' else None
+
+
+class ImageRename:
+    def __init__(self, opf_path, local_path, epub_path):
+        self.opf_path = opf_path
+        self.local_path = local_path
+        self.epub_path = epub_path
+
+
  class DocBookToEpub:
      def __init__(self, args=None):
          if args is None:
              args = sys.argv
  
-        if len(args) != 6:
-            print('Six arguments are expected, only %s found: %s.' % (len(args), args))
+        if len(args) != 8:
+            print(f'Exactly eight arguments are expected, only {len(args)} found: {args}.')
              sys.exit(1)
  
          self.own_path = sys.argv[0]
-        self.java_path = sys.argv[1] if sys.argv[1] != '' and sys.argv[1] != 'none' else ''
-        self.xsltproc_path = sys.argv[2] if sys.argv[2] != '' and sys.argv[2] != 'none' else ''
-        self.xslt_path = sys.argv[3] if sys.argv[3] != '' and sys.argv[3] != 'none' else ''
-        self.input = sys.argv[4]
-        self.output = sys.argv[5]
+        self.java_path = _parse_nullable_argument(sys.argv[1])
+        self.saxon_path = _parse_nullable_argument(sys.argv[2])
+        self.xsltproc_path = _parse_nullable_argument(sys.argv[3])
+        self.xslt_path = _parse_nullable_argument(sys.argv[4])
+        self.input = sys.argv[5]
+        self.input_path = sys.argv[6]
+        self.output = sys.argv[7]
          self.script_folder = os.path.dirname(self.own_path) + '/../'
  
          print('Generating ePub with the following parameters:')
          print(self.own_path)
          print(self.java_path)
+        print(self.saxon_path)
          print(self.xsltproc_path)
+        print(self.xslt_path)
          print(self.input)
+        print(self.input_path)
          print(self.output)
  
          # Precompute paths that will be used later.
@@ -50,13 +63,16 @@ class DocBookToEpub:
          self.package_opf = self.output_dir + '/OEBPS/package.opf'  # Does not exist yet,
          print('Temporary output directory: %s' % self.output_dir)
  
-        if self.xslt_path == '':
+        if self.xslt_path is None:
              self.xslt = self.script_folder + 'docbook/epub3/chunk.xsl'
          else:
              self.xslt = self.xslt_path + '/epub3/chunk.xsl'
          print('XSLT style sheet to use:')
          print(self.xslt)
  
+        if self.saxon_path is None:
+            self.saxon_path = self.script_folder + 'scripts/saxon6.5.5.jar'
+
          # These will be filled during the execution of the script.
          self.renamed = None
  
@@ -67,9 +83,9 @@ class DocBookToEpub:
  
      def start_xslt_transformation(self):
          command = None
-        if self.xsltproc_path != '':
+        if self.xsltproc_path is not None:
              command = self.start_xslt_transformation_xsltproc()
-        elif self.java_path != '':
+        elif self.java_path is not None:
              command = self.start_xslt_transformation_saxon6()
  
          if command is None:
@@ -95,9 +111,8 @@ class DocBookToEpub:
          return '"' + self.xsltproc_path + '" ' + params + ' "' + self.xslt + '" "' + self.input + '"'
  
      def start_xslt_transformation_saxon6(self):
-        saxon_jar = self.script_folder + 'scripts/saxon6.5.5.jar'
          params = 'base.dir=%s' % self.output_dir
-        executable = '"' + self.java_path + '" -jar "' + saxon_jar + '"'
+        executable = '"' + self.java_path + '" -jar "' + self.saxon_path + '"'
          return executable + ' "' + self.input + '" "' + self.xslt + '" "' + params + '"'
  
      def get_images_from_package_opf(self):
@@ -108,7 +123,7 @@ class DocBookToEpub:
          # The XHTML files are also <item> tags:
          #     <item id="id-d0e2" href="index.xhtml" media-type="application/xhtml+xml"/>
          try:
-            with open(self.package_opf, 'r') as f:
+            with open(self.package_opf) as f:
                  for line in f.readlines():
                      if '<item' in line and 'media-type="image' in line:
                          images.append(line.split('href="')[1].split('"')[0])
@@ -117,16 +132,31 @@ class DocBookToEpub:
  
          return images
  
+    def get_image_changes(self):
+        epub_folder = 'images/'
+
+        changes = []
+        for image in self.get_images_from_package_opf():
+            if os.path.exists(image):
+                file_system_path = image
+            elif os.path.exists(self.input_path + image):
+                file_system_path = self.input_path + image
+            else:
+                file_system_path = ''
+
+            changes.append(ImageRename(image, file_system_path, epub_folder + os.path.basename(image)))
+        return changes
+
      def change_image_paths(self, file):
          # This could be optimised, as the same operation is performed a zillion times on many files:
          # https://www.oreilly.com/library/view/python-cookbook/0596001673/ch03s15.html
-        with open(file, 'r', encoding='utf8') as f:
+        with open(file, encoding='utf8') as f:
              contents = list(f)
  
          with open(file, 'w', encoding='utf8') as f:
              for line in contents:
-                for (old, new) in self.renamed.items():
-                    line = line.replace(old, new)
+                for change in self.renamed:
+                    line = line.replace(change.opf_path, change.epub_path)
                  f.write(line)
  
      def copy_images(self):
@@ -134,8 +164,7 @@ class DocBookToEpub:
          # changed in the XHTML files. Typically, the current paths are absolute.
  
          # First, get the mapping old file => file in the ePub archive.
-        original_images = self.get_images_from_package_opf()
-        self.renamed = {img: 'images/' + os.path.basename(img) for img in original_images}
+        self.renamed = self.get_image_changes()
  
          # Then, transform all paths (both OPF and XHTML files).
          self.change_image_paths(self.output_dir + '/OEBPS/package.opf')
@@ -147,8 +176,8 @@ class DocBookToEpub:
              os.mkdir(self.output_dir + '/OEBPS/images/')
  
          # Finally, actually copy the image files.
-        for (old, new) in self.renamed.items():
-            shutil.copyfile(old, self.output_dir + '/OEBPS/' + new)
+        for change in self.renamed:
+            shutil.copyfile(change.local_path, self.output_dir + '/OEBPS/' + change.epub_path)
  
      def create_zip_archive(self):
          with zipfile.ZipFile(self.output, 'w', zipfile.ZIP_DEFLATED) as zip: