Remove profiling.py

[lyx.git] / lib / scripts / lyxpak.py
diff --git a/lib/scripts/lyxpak.py b/lib/scripts/lyxpak.py

index 9a56bf03c1d9ff878c976948c69f0d239a883121..0d9151592ea84f049b5422d6f2ed4791cf547968 100755 (executable)
--- a/lib/scripts/lyxpak.py
+++ b/lib/scripts/lyxpak.py
@@ -1,11 +1,9 @@
-# -*- coding: utf-8 -*-
-
  # file lyxpak.py
  # This file is part of LyX, the document processor.
  # Licence details can be found in the file COPYING.
  
  # author Enrico Forestieri
-# author Richard Heck
+# author Richard Kimberly Heck
  
  # Full author contact details are available in file CREDITS
  
@@ -15,29 +13,34 @@
  # a gzip compressed tar archive on *nix. This can be controlled by command
  # line options, however.
  
-import gzip, os, re, string, sys
-if sys.version_info < (2, 4, 0):
-    from sets import Set as set
-from getopt import getopt
-from cStringIO import StringIO
+import gzip, os, re, sys
+from io import BytesIO
+import subprocess
+
+# The path to the current python executable. sys.executable may fail, so in
+# this case we revert to simply calling "python" from the path.
+PYTHON_BIN = sys.executable if sys.executable else "python"
  
  running_on_windows = (os.name == 'nt')
  
  if running_on_windows:
      from shutil import copyfile
      from tempfile import NamedTemporaryFile
+    from lyxwin_getopt import getopt
+else:
+    from getopt import getopt
  
  # Pre-compiled regular expressions.
-re_lyxfile = re.compile("\.lyx$")
-re_input = re.compile(r'^(.*)\\(input|include){(\s*)(.+)(\s*)}.*$')
-re_ertinput = re.compile(r'^(input|include)({)(\s*)(.+)(\s*)}.*$')
-re_package = re.compile(r'^(.*)\\(usepackage){(\s*)(.+)(\s*)}.*$')
-re_class = re.compile(r'^(\\)(textclass)(\s+)(.+)\s*$')
-re_norecur = re.compile(r'^(.*)\\(verbatiminput|lstinputlisting|includegraphics\[*.*\]*){(\s*)(.+)(\s*)}.*$')
-re_ertnorecur = re.compile(r'^(verbatiminput|lstinputlisting|includegraphics\[*.*\]*)({)(\s*)(.+)(\s*)}.*$')
-re_filename = re.compile(r'^(\s*)(filename)(\s+)(.+)\s*$')
-re_options = re.compile(r'^(\s*)options(\s+)(.+)\s*$')
-re_bibfiles = re.compile(r'^(\s*)bibfiles(\s+)(.+)\s*$')
+re_lyxfile = re.compile(br"\.lyx$")
+re_input = re.compile(b'^(.*)\\\\(input|include){(\\s*)(.+)(\\s*)}.*$')
+re_ertinput = re.compile(b'^(input|include)({)(\\s*)(.+)(\\s*)}.*$')
+re_package = re.compile(b'^(.*)\\\\(usepackage){(\\s*)(.+)(\\s*)}.*$')
+re_class = re.compile(b'^(\\\\)(textclass)(\\s+)(.+)\\s*$')
+re_norecur = re.compile(b'^(.*)\\\\(verbatiminput|lstinputlisting|includegraphics\\[*.*\\]*){(\\s*)(.+)(\\s*)}.*$')
+re_ertnorecur = re.compile(b'^(verbatiminput|lstinputlisting|includegraphics\\[*.*\\]*)({)(\\s*)(.+)(\\s*)}.*$')
+re_filename = re.compile(b'^(\\s*)(filename)(\\s+)(.+)\\s*$')
+re_options = re.compile(b'^(\\s*)options(\\s+)(.+)\\s*$')
+re_bibfiles = re.compile(b'^(\\s*)bibfiles(\\s+)(.+)\\s*$')
  
  
  def usage(prog_name):
@@ -60,20 +63,17 @@ def error(message):
      sys.exit(1)
  
  
-def gzopen(file, mode):
-    input = open(unicode(file, 'utf-8'), 'rb')
-    magicnum = input.read(2)
-    input.close()
-    if magicnum == "\x1f\x8b":
-        return gzip.open(unicode(file, 'utf-8'), mode)
-    return open(unicode(file, 'utf-8'), mode)
+def tostr(message):
+    return message.decode(sys.getfilesystemencoding())
  
  
-def run_cmd(cmd):
-    handle = os.popen(cmd, 'r')
-    cmd_stdout = handle.read()
-    cmd_status = handle.close()
-    return cmd_status, cmd_stdout
+def gzopen(file):
+    input = open(file.decode('utf-8'), 'rb')
+    magicnum = input.read(2)
+    input.close()
+    if magicnum == b"\x1f\x8b":
+        return gzip.open(file.decode('utf-8'))
+    return open(file.decode('utf-8'), 'rb')
  
  
  def find_exe(candidates, extlist, path):
@@ -98,27 +98,34 @@ def gather_files(curfile, incfiles, lyx2lyx):
      " Recursively gather files."
      curdir = os.path.dirname(abspath(curfile))
      is_lyxfile = re_lyxfile.search(curfile)
+
      if is_lyxfile:
          if running_on_windows:
-            # os.popen cannot cope with unicode arguments and we cannot be
+            # subprocess cannot cope with unicode arguments and we cannot be
              # sure that curfile can be correctly converted to the current
              # code page. So, we resort to running lyx2lyx on a copy.
              tmp = NamedTemporaryFile(delete=False)
              tmp.close()
-            copyfile(unicode(curfile, 'utf-8'), tmp.name)
-            lyx2lyx_cmd = 'python "%s" "%s"' % (lyx2lyx, tmp.name)
-            l2l_status, l2l_stdout = run_cmd(lyx2lyx_cmd)
+            copyfile(curfile.decode('utf-8'), tmp.name)
+            try:
+                l2l_stdout = subprocess.check_output([PYTHON_BIN, lyx2lyx, tmp.name])
+            except subprocess.CalledProcessError:
+                error(f'{lyx2lyx} failed to convert "{tostr(curfile)}"')
              os.unlink(tmp.name)
          else:
-            lyx2lyx_cmd = 'python "%s" "%s"' % (lyx2lyx, curfile)
-            l2l_status, l2l_stdout = run_cmd(lyx2lyx_cmd)
-        if l2l_status != None:
-            error('%s failed to convert "%s"' % (lyx2lyx, curfile))
-        if l2l_stdout.startswith("\x1f\x8b"):
-            l2l_stdout = gzip.GzipFile("", "r", 0, StringIO(l2l_stdout)).read()
+            try:
+                l2l_stdout = subprocess.check_output([PYTHON_BIN, lyx2lyx, curfile])
+            except subprocess.CalledProcessError:
+                error(f'{lyx2lyx} failed to convert "{tostr(curfile)}"')
+        if l2l_stdout.startswith(b"\x1f\x8b"):
+            l2l_stdout = gzip.GzipFile("", "rb", 0, BytesIO(l2l_stdout)).read()
+        elif running_on_windows:
+            # For some unknown reason, there can be a spurious '\r' in the line
+            # separators, causing spurious empty lines when calling splitlines.
+            l2l_stdout = l2l_stdout.replace(b'\r\r\n', b'\r\n')
          lines = l2l_stdout.splitlines()
      else:
-        input = gzopen(curfile, 'rU')
+        input = gzopen(curfile)
          lines = input.readlines()
          input.close()
  
@@ -127,7 +134,7 @@ def gather_files(curfile, incfiles, lyx2lyx):
      while i < len(lines):
          # Gather used files.
          recursive = True
-        extlist = ['']
+        extlist = [b'']
          match = re_filename.match(lines[i])
          if not match:
              if maybe_in_ert:
@@ -136,26 +143,26 @@ def gather_files(curfile, incfiles, lyx2lyx):
                  match = re_input.match(lines[i])
              if not match:
                  match = re_package.match(lines[i])
-                extlist = ['.sty']
+                extlist = [b'.sty']
                  if not match:
                      match = re_class.match(lines[i])
-                    extlist = ['.cls']
+                    extlist = [b'.cls']
                      if not match:
                          if maybe_in_ert:
                              match = re_ertnorecur.match(lines[i])
                          else:
                              match = re_norecur.match(lines[i])
-                        extlist = ['', '.eps', '.pdf', '.png', '.jpg']
+                        extlist = [b'', b'.eps', b'.pdf', b'.png', b'.jpg']
                          recursive = False
-        maybe_in_ert = is_lyxfile and lines[i] == "\\backslash"
+        maybe_in_ert = is_lyxfile and lines[i] == b"\\backslash"
          if match:
-            file = match.group(4).strip('"')
+            file = match.group(4).strip(b'"')
              if not os.path.isabs(file):
                  file = os.path.join(curdir, file)
              file_exists = False
-            if not os.path.isdir(unicode(file, 'utf-8')):
+            if not os.path.isdir(file):
                  for ext in extlist:
-                    if os.path.exists(unicode(file + ext, 'utf-8')):
+                    if os.path.exists(file + ext):
                          file = file + ext
                          file_exists = True
                          break
@@ -173,12 +180,12 @@ def gather_files(curfile, incfiles, lyx2lyx):
          # Gather bibtex *.bst files.
          match = re_options.match(lines[i])
          if match:
-            file = match.group(3).strip('"')
-            if file.startswith("bibtotoc,"):
+            file = match.group(3).strip(b'"')
+            if file.startswith(b"bibtotoc,"):
                  file = file[9:]
              if not os.path.isabs(file):
-                file = os.path.join(curdir, file + '.bst')
-            if os.path.exists(unicode(file, 'utf-8')):
+                file = os.path.join(curdir, file + b'.bst')
+            if os.path.exists(file):
                  incfiles.append(abspath(file))
              i += 1
              continue
@@ -186,14 +193,14 @@ def gather_files(curfile, incfiles, lyx2lyx):
          # Gather bibtex *.bib files.
          match = re_bibfiles.match(lines[i])
          if match:
-            bibfiles = match.group(3).strip('"').split(',')
+            bibfiles = match.group(3).strip(b'"').split(b',')
              j = 0
              while j < len(bibfiles):
                  if os.path.isabs(bibfiles[j]):
-                    file = bibfiles[j] + '.bib'
+                    file = bibfiles[j] + b'.bib'
                  else:
-                    file = os.path.join(curdir, bibfiles[j] + '.bib')
-                if os.path.exists(unicode(file, 'utf-8')):
+                    file = os.path.join(curdir, bibfiles[j] + b'.bib')
+                if os.path.exists(file):
                      incfiles.append(abspath(file))
                  j += 1
              i += 1
@@ -211,7 +218,10 @@ def find_lyx2lyx(progloc, path):
      # for $SOMEDIR/lyx2lyx/lyx2lyx.
      ourpath = os.path.dirname(abspath(progloc))
      (upone, discard) = os.path.split(ourpath)
-    tryit = os.path.join(upone, "lyx2lyx", "lyx2lyx")
+    if running_on_windows:
+        tryit = os.path.join(upone, b"lyx2lyx", b"lyx2lyx")
+    else:
+        tryit = os.path.join(upone, "lyx2lyx", "lyx2lyx")
      if os.access(tryit, os.X_OK):
          return tryit
  
@@ -220,10 +230,11 @@ def find_lyx2lyx(progloc, path):
      if "PATHEXT" in os.environ:
          extlist = extlist + os.environ["PATHEXT"].split(os.pathsep)
      lyx_exe, full_path = find_exe(["lyxc", "lyx"], extlist, path)
-    if lyx_exe == None:
+    if lyx_exe is None:
          error('Cannot find the LyX executable in the path.')
-    cmd_status, cmd_stdout = run_cmd("%s -version 2>&1" % lyx_exe)
-    if cmd_status != None:
+    try:
+        cmd_stdout = subprocess.check_output([lyx_exe, '-version'], stderr=subprocess.STDOUT)
+    except subprocess.CalledProcessError:
          error('Cannot query LyX about the lyx2lyx script.')
      re_msvc = re.compile(r'^(\s*)(Host type:)(\s+)(win32)$')
      re_sysdir = re.compile(r'^(\s*)(LyX files dir:)(\s+)(\S+)$')
@@ -251,9 +262,12 @@ def main(args):
      ourprog = args[0]
  
      try:
-      (options, argv) = getopt(args[1:], "htzl:o:")
+        if running_on_windows:
+            (options, argv) = getopt(args[1:], b"htzl:o:")
+        else:
+            (options, argv) = getopt(args[1:], "htzl:o:")
      except:
-      error(usage(ourprog))
+        error(usage(ourprog))
  
      # we expect the filename to be left
      if len(argv) != 1:
@@ -264,49 +278,47 @@ def main(args):
      lyx2lyx = None
  
      for (opt, param) in options:
-      if opt == "-h":
-        print usage(ourprog)
-        sys.exit(0)
-      elif opt == "-t":
-        makezip = False
-      elif opt == "-z":
-        makezip = True
-      elif opt == "-l":
-        lyx2lyx = param
-      elif opt == "-o":
-        outdir = param
-        if not os.path.isdir(unicode(outdir, 'utf-8')):
-          error('Error: "%s" is not a directory.' % outdir)
+        if opt == "-h":
+            print(usage(ourprog))
+            sys.exit(0)
+        elif opt == "-t":
+            makezip = False
+        elif opt == "-z":
+            makezip = True
+        elif opt == "-l":
+            lyx2lyx = param
+        elif opt == "-o":
+            outdir = param
+            if not os.path.isdir(outdir):
+                error('Error: "%s" is not a directory.' % outdir)
  
      lyxfile = argv[0]
-    if not running_on_windows:
-        lyxfile = unicode(lyxfile, sys.getfilesystemencoding()).encode('utf-8')
-    if not os.path.exists(unicode(lyxfile, 'utf-8')):
-        error('File "%s" not found.' % lyxfile)
+    if not os.path.exists(lyxfile):
+        error('File "%s" not found.' % tostr(lyxfile))
  
      # Check that it actually is a LyX document
-    input = gzopen(lyxfile, 'rU')
+    input = gzopen(lyxfile)
      line = input.readline()
      input.close()
-    if not (line and line.startswith('#LyX')):
-        error('File "%s" is not a LyX document.' % lyxfile)
+    if not (line and line.startswith(b'#LyX')):
+        error('File "%s" is not a LyX document.' % tostr(lyxfile))
  
      if makezip:
          import zipfile
      else:
          import tarfile
  
-    ar_ext = ".tar.gz"
+    ar_ext = b".tar.gz"
      if makezip:
-        ar_ext = ".zip"
+        ar_ext = b".zip"
  
-    ar_name = re_lyxfile.sub(ar_ext, abspath(lyxfile))
+    ar_name = re_lyxfile.sub(ar_ext, abspath(lyxfile)).decode('utf-8')
      if outdir:
          ar_name = os.path.join(abspath(outdir), os.path.basename(ar_name))
  
-    path = string.split(os.environ["PATH"], os.pathsep)
+    path = os.environ["PATH"].split(os.pathsep)
  
-    if lyx2lyx == None:
+    if lyx2lyx is None:
          lyx2lyx = find_lyx2lyx(ourprog, path)
  
      # Initialize the list with the specified LyX file and recursively
@@ -315,18 +327,19 @@ def main(args):
      gather_files(lyxfile, incfiles, lyx2lyx)
  
      # Find the topmost dir common to all files
+    path_sep = os.path.sep.encode('utf-8')
      if len(incfiles) > 1:
          topdir = os.path.commonprefix(incfiles)
          # As os.path.commonprefix() works on a character by character basis,
          # rather than on path elements, we need to remove any trailing bytes.
-        topdir = topdir.rpartition(os.path.sep)[0] + os.path.sep
+        topdir = topdir.rpartition(path_sep)[0] + path_sep
      else:
-        topdir = os.path.dirname(incfiles[0]) + os.path.sep
+        topdir = os.path.dirname(incfiles[0]) + path_sep
  
      # Remove the prefix common to all paths in the list
      i = 0
      while i < len(incfiles):
-        incfiles[i] = string.replace(incfiles[i], topdir, '', 1)
+        incfiles[i] = incfiles[i].replace(topdir, b'', 1)
          i += 1
  
      # Remove duplicates and sort the list
@@ -334,24 +347,24 @@ def main(args):
      incfiles.sort()
  
      if topdir != '':
-        os.chdir(unicode(topdir, 'utf-8'))
+        os.chdir(topdir)
  
      # Create the archive
      try:
          if makezip:
              zip = zipfile.ZipFile(ar_name, "w", zipfile.ZIP_DEFLATED)
              for file in incfiles:
-                zip.write(file.decode('utf-8'), unicode(file, 'utf-8'))
+                zip.write(file.decode('utf-8'))
              zip.close()
          else:
              tar = tarfile.open(ar_name, "w:gz")
              for file in incfiles:
-                tar.add(file)
+                tar.add(file.decode('utf-8'))
              tar.close()
      except:
          error('Failed to create LyX archive "%s"' % ar_name)
  
-    print 'LyX archive "%s" created successfully.' % ar_name
+    print('LyX archive "%s" created successfully.' % ar_name)
      return 0
  
  
@@ -375,10 +388,10 @@ if __name__ == "__main__":
          argc = c_int(0)
          argv_unicode = CommandLineToArgvW(GetCommandLineW(), byref(argc))
          # unicode_argv[0] is the Python interpreter, so skip that.
-        argv = [argv_unicode[i].encode('utf-8') for i in xrange(1, argc.value)]
+        argv = [argv_unicode[i].encode('utf-8') for i in range(1, argc.value)]
          # Also skip option arguments to the Python interpreter.
          while len(argv) > 0:
-            if not argv[0].startswith("-"):
+            if not argv[0].startswith(b"-"):
                  break
              argv = argv[1:]
          sys.argv = argv