lib/scripts/lyxpak.py

   1 # -*- coding: utf-8 -*-
   2
   3 # file lyxpak.py
   4 # This file is part of LyX, the document processor.
   5 # Licence details can be found in the file COPYING.
   6
   7 # author Enrico Forestieri
   8 # author Richard Kimberly Heck
   9
  10 # Full author contact details are available in file CREDITS
  11
  12 # This script creates a tar or zip archive with a lyx file and all included
  13 # files (graphics and so on). By default, the created archive is the standard
  14 # type on a given platform, such that a zip archive is created on Windows and
  15 # a gzip compressed tar archive on *nix. This can be controlled by command
  16 # line options, however.
  17
  18 from __future__ import print_function
  19 import gzip, os, re, sys
  20 from getopt import getopt
  21 from io import BytesIO
  22 import subprocess
  23
  24 # Provide support for both python 2 and 3
  25 if sys.version_info[0] != 2:
  26     def unicode(arg, enc):
  27         return arg
  28
  29 # The path to the current python executable. sys.executable may fail, so in
  30 # this case we revert to simply calling "python" from the path.
  31 PYTHON_BIN = sys.executable if sys.executable else "python"
  32
  33 running_on_windows = (os.name == 'nt')
  34
  35 if running_on_windows:
  36     from shutil import copyfile
  37     from tempfile import NamedTemporaryFile
  38
  39 # Pre-compiled regular expressions.
  40 re_lyxfile = re.compile(br"\.lyx$")
  41 re_input = re.compile(b'^(.*)\\\\(input|include){(\\s*)(.+)(\\s*)}.*$')
  42 re_ertinput = re.compile(b'^(input|include)({)(\\s*)(.+)(\\s*)}.*$')
  43 re_package = re.compile(b'^(.*)\\\\(usepackage){(\\s*)(.+)(\\s*)}.*$')
  44 re_class = re.compile(b'^(\\\\)(textclass)(\\s+)(.+)\\s*$')
  45 re_norecur = re.compile(b'^(.*)\\\\(verbatiminput|lstinputlisting|includegraphics\\[*.*\\]*){(\\s*)(.+)(\\s*)}.*$')
  46 re_ertnorecur = re.compile(b'^(verbatiminput|lstinputlisting|includegraphics\\[*.*\\]*)({)(\\s*)(.+)(\\s*)}.*$')
  47 re_filename = re.compile(b'^(\\s*)(filename)(\\s+)(.+)\\s*$')
  48 re_options = re.compile(b'^(\\s*)options(\\s+)(.+)\\s*$')
  49 re_bibfiles = re.compile(b'^(\\s*)bibfiles(\\s+)(.+)\\s*$')
  50
  51
  52 def usage(prog_name):
  53     msg = '''
  54 Usage: %s [-t] [-z] [-l path] [-o output_dir] file.lyx
  55 Options:
  56 -l: Path to lyx2lyx script
  57 -o: Directory for output
  58 -t: Create gzipped tar file
  59 -z: Create zip file
  60 By default, we create file.zip on Windows and file.tar.gz on *nix,
  61 with the file output to where file.lyx is, and we look for lyx2lyx
  62 in the known locations, querying LyX itself if necessary.
  63 '''
  64     return msg % prog_name
  65
  66
  67 def error(message):
  68     sys.stderr.write(message + '\n')
  69     sys.exit(1)
  70
  71
  72 def tostr(message):
  73     return message.decode(sys.getfilesystemencoding())
  74
  75
  76 def gzopen(file):
  77     input = open(file.decode('utf-8'), 'rb')
  78     magicnum = input.read(2)
  79     input.close()
  80     if magicnum == b"\x1f\x8b":
  81         return gzip.open(file.decode('utf-8'))
  82     return open(file.decode('utf-8'), 'rb')
  83
  84
  85 def find_exe(candidates, extlist, path):
  86     for prog in candidates:
  87         for directory in path:
  88             for ext in extlist:
  89                 full_path = os.path.join(directory, prog + ext)
  90                 if os.access(full_path, os.X_OK):
  91                     return prog, full_path
  92     return None, None
  93
  94
  95 def abspath(name):
  96     " Resolve symlinks and returns the absolute normalized name."
  97     newname = os.path.normpath(os.path.abspath(name))
  98     if not running_on_windows:
  99         newname = os.path.realpath(newname)
 100     return newname
 101
 102
 103 def gather_files(curfile, incfiles, lyx2lyx):
 104     " Recursively gather files."
 105     curdir = os.path.dirname(abspath(curfile))
 106     is_lyxfile = re_lyxfile.search(curfile)
 107
 108     if is_lyxfile:
 109         if running_on_windows:
 110             # subprocess cannot cope with unicode arguments and we cannot be
 111             # sure that curfile can be correctly converted to the current
 112             # code page. So, we resort to running lyx2lyx on a copy.
 113             tmp = NamedTemporaryFile(delete=False)
 114             tmp.close()
 115             copyfile(curfile.decode('utf-8'), tmp.name)
 116             try:
 117                 l2l_stdout = subprocess.check_output([PYTHON_BIN, lyx2lyx, tmp.name])
 118             except subprocess.CalledProcessError:
 119                 error('%s failed to convert "%s"' % (lyx2lyx, tostr(curfile)))
 120             os.unlink(tmp.name)
 121         else:
 122             try:
 123                 l2l_stdout = subprocess.check_output([PYTHON_BIN, lyx2lyx, curfile])
 124             except subprocess.CalledProcessError:
 125                 error('%s failed to convert "%s"' % (lyx2lyx, tostr(curfile)))
 126         if l2l_stdout.startswith(b"\x1f\x8b"):
 127             l2l_stdout = gzip.GzipFile("", "rb", 0, BytesIO(l2l_stdout)).read()
 128         elif running_on_windows:
 129             # For some unknown reason, there can be a spurious '\r' in the line
 130             # separators, causing spurious empty lines when calling splitlines.
 131             l2l_stdout = l2l_stdout.replace('\r\r\n', '\r\n')
 132         lines = l2l_stdout.splitlines()
 133     else:
 134         input = gzopen(curfile)
 135         lines = input.readlines()
 136         input.close()
 137
 138     maybe_in_ert = False
 139     i = 0
 140     while i < len(lines):
 141         # Gather used files.
 142         recursive = True
 143         extlist = [b'']
 144         match = re_filename.match(lines[i])
 145         if not match:
 146             if maybe_in_ert:
 147                 match = re_ertinput.match(lines[i])
 148             else:
 149                 match = re_input.match(lines[i])
 150             if not match:
 151                 match = re_package.match(lines[i])
 152                 extlist = [b'.sty']
 153                 if not match:
 154                     match = re_class.match(lines[i])
 155                     extlist = [b'.cls']
 156                     if not match:
 157                         if maybe_in_ert:
 158                             match = re_ertnorecur.match(lines[i])
 159                         else:
 160                             match = re_norecur.match(lines[i])
 161                         extlist = [b'', b'.eps', b'.pdf', b'.png', b'.jpg']
 162                         recursive = False
 163         maybe_in_ert = is_lyxfile and lines[i] == b"\\backslash"
 164         if match:
 165             file = match.group(4).strip(b'"')
 166             if not os.path.isabs(file):
 167                 file = os.path.join(curdir, file)
 168             file_exists = False
 169             if not os.path.isdir(unicode(file, 'utf-8')):
 170                 for ext in extlist:
 171                     if os.path.exists(unicode(file + ext, 'utf-8')):
 172                         file = file + ext
 173                         file_exists = True
 174                         break
 175             if file_exists and not abspath(file) in incfiles:
 176                 incfiles.append(abspath(file))
 177                 if recursive:
 178                     gather_files(file, incfiles, lyx2lyx)
 179             i += 1
 180             continue
 181
 182         if not is_lyxfile:
 183             i += 1
 184             continue
 185
 186         # Gather bibtex *.bst files.
 187         match = re_options.match(lines[i])
 188         if match:
 189             file = match.group(3).strip(b'"')
 190             if file.startswith(b"bibtotoc,"):
 191                 file = file[9:]
 192             if not os.path.isabs(file):
 193                 file = os.path.join(curdir, file + b'.bst')
 194             if os.path.exists(unicode(file, 'utf-8')):
 195                 incfiles.append(abspath(file))
 196             i += 1
 197             continue
 198
 199         # Gather bibtex *.bib files.
 200         match = re_bibfiles.match(lines[i])
 201         if match:
 202             bibfiles = match.group(3).strip(b'"').split(b',')
 203             j = 0
 204             while j < len(bibfiles):
 205                 if os.path.isabs(bibfiles[j]):
 206                     file = bibfiles[j] + b'.bib'
 207                 else:
 208                     file = os.path.join(curdir, bibfiles[j] + b'.bib')
 209                 if os.path.exists(unicode(file, 'utf-8')):
 210                     incfiles.append(abspath(file))
 211                 j += 1
 212             i += 1
 213             continue
 214
 215         i += 1
 216
 217     return 0
 218
 219
 220 def find_lyx2lyx(progloc, path):
 221     " Find a usable version of the lyx2lyx script. "
 222     # first we will see if the script is roughly where we are
 223     # i.e., we will assume we are in $SOMEDIR/scripts and look
 224     # for $SOMEDIR/lyx2lyx/lyx2lyx.
 225     ourpath = os.path.dirname(abspath(progloc))
 226     (upone, discard) = os.path.split(ourpath)
 227     tryit = os.path.join(upone, "lyx2lyx", "lyx2lyx")
 228     if os.access(tryit, os.X_OK):
 229         return tryit
 230
 231     # now we will try to query LyX itself to find the path.
 232     extlist = ['']
 233     if "PATHEXT" in os.environ:
 234         extlist = extlist + os.environ["PATHEXT"].split(os.pathsep)
 235     lyx_exe, full_path = find_exe(["lyxc", "lyx"], extlist, path)
 236     if lyx_exe == None:
 237         error('Cannot find the LyX executable in the path.')
 238     try:
 239         cmd_stdout = subprocess.check_output([lyx_exe, '-version'], stderr=subprocess.STDOUT)
 240     except subprocess.CalledProcessError:
 241         error('Cannot query LyX about the lyx2lyx script.')
 242     re_msvc = re.compile(r'^(\s*)(Host type:)(\s+)(win32)$')
 243     re_sysdir = re.compile(r'^(\s*)(LyX files dir:)(\s+)(\S+)$')
 244     lines = cmd_stdout.splitlines()
 245     for line in lines:
 246         match = re_msvc.match(line)
 247         if match:
 248             # The LyX executable was built with MSVC, so the
 249             # "LyX files dir:" line is unusable
 250             basedir = os.path.dirname(os.path.dirname(full_path))
 251             tryit = os.path.join(basedir, 'Resources', 'lyx2lyx', 'lyx2lyx')
 252             break
 253         match = re_sysdir.match(line)
 254         if match:
 255             tryit = os.path.join(match.group(4), 'lyx2lyx', 'lyx2lyx')
 256             break
 257
 258     if not os.access(tryit, os.X_OK):
 259         error('Unable to find the lyx2lyx script.')
 260     return tryit
 261
 262
 263 def main(args):
 264
 265     ourprog = args[0]
 266
 267     try:
 268       (options, argv) = getopt(args[1:], "htzl:o:")
 269     except:
 270       error(usage(ourprog))
 271
 272     # we expect the filename to be left
 273     if len(argv) != 1:
 274         error(usage(ourprog))
 275
 276     makezip = running_on_windows
 277     outdir = ""
 278     lyx2lyx = None
 279
 280     for (opt, param) in options:
 281       if opt == "-h":
 282         print(usage(ourprog))
 283         sys.exit(0)
 284       elif opt == "-t":
 285         makezip = False
 286       elif opt == "-z":
 287         makezip = True
 288       elif opt == "-l":
 289         lyx2lyx = param
 290       elif opt == "-o":
 291         outdir = param
 292         if not os.path.isdir(unicode(outdir, 'utf-8')):
 293           error('Error: "%s" is not a directory.' % outdir)
 294
 295     lyxfile = argv[0]
 296     if not running_on_windows:
 297         lyxfile = unicode(lyxfile, sys.getfilesystemencoding()).encode('utf-8')
 298     if not os.path.exists(unicode(lyxfile, 'utf-8')):
 299         error('File "%s" not found.' % tostr(lyxfile))
 300
 301     # Check that it actually is a LyX document
 302     input = gzopen(lyxfile)
 303     line = input.readline()
 304     input.close()
 305     if not (line and line.startswith(b'#LyX')):
 306         error('File "%s" is not a LyX document.' % tostr(lyxfile))
 307
 308     if makezip:
 309         import zipfile
 310     else:
 311         import tarfile
 312
 313     ar_ext = b".tar.gz"
 314     if makezip:
 315         ar_ext = b".zip"
 316
 317     ar_name = re_lyxfile.sub(ar_ext, abspath(lyxfile)).decode('utf-8')
 318     if outdir:
 319         ar_name = os.path.join(abspath(outdir), os.path.basename(ar_name))
 320
 321     path = os.environ["PATH"].split(os.pathsep)
 322
 323     if lyx2lyx == None:
 324         lyx2lyx = find_lyx2lyx(ourprog, path)
 325
 326     # Initialize the list with the specified LyX file and recursively
 327     # gather all required files (also from child documents).
 328     incfiles = [abspath(lyxfile)]
 329     gather_files(lyxfile, incfiles, lyx2lyx)
 330
 331     # Find the topmost dir common to all files
 332     path_sep = os.path.sep.encode('utf-8')
 333     if len(incfiles) > 1:
 334         topdir = os.path.commonprefix(incfiles)
 335         # As os.path.commonprefix() works on a character by character basis,
 336         # rather than on path elements, we need to remove any trailing bytes.
 337         topdir = topdir.rpartition(path_sep)[0] + path_sep
 338     else:
 339         topdir = os.path.dirname(incfiles[0]) + path_sep
 340
 341     # Remove the prefix common to all paths in the list
 342     i = 0
 343     while i < len(incfiles):
 344         incfiles[i] = incfiles[i].replace(topdir, b'', 1)
 345         i += 1
 346
 347     # Remove duplicates and sort the list
 348     incfiles = list(set(incfiles))
 349     incfiles.sort()
 350
 351     if topdir != '':
 352         os.chdir(unicode(topdir, 'utf-8'))
 353
 354     # Create the archive
 355     try:
 356         if makezip:
 357             zip = zipfile.ZipFile(ar_name, "w", zipfile.ZIP_DEFLATED)
 358             for file in incfiles:
 359                 zip.write(file.decode('utf-8'))
 360             zip.close()
 361         else:
 362             tar = tarfile.open(ar_name, "w:gz")
 363             for file in incfiles:
 364                 tar.add(file.decode('utf-8'))
 365             tar.close()
 366     except:
 367         error('Failed to create LyX archive "%s"' % ar_name)
 368
 369     print('LyX archive "%s" created successfully.' % ar_name)
 370     return 0
 371
 372
 373 if __name__ == "__main__":
 374     if running_on_windows:
 375         # This works around <http://bugs.python.org/issue2128> for Python 2.
 376         # All arguments are retrieved in unicode format and converted to utf-8.
 377         # In this way, when launched from the command line, lyxpak.py can deal
 378         # with any non-ascii names. Unfortunately, this is not the case when
 379         # launched by LyX, because LyX converts the arguments of the converters
 380         # to the filesystem encoding. On Windows this corresponds to the current
 381         # code page and not to the UTF-16 encoding used by NTFS, such that they
 382         # are transliterated if not exactly encodable. As an example, α may
 383         # become a, β may become ß, and so on. However, this is a problem only
 384         # if the full path of the LyX document contains an unencodable character
 385         # as all other paths are extracted from the document in utf-8 format.
 386         from ctypes import WINFUNCTYPE, windll, POINTER, byref, c_int
 387         from ctypes.wintypes import LPWSTR, LPCWSTR
 388         GetCommandLineW = WINFUNCTYPE(LPWSTR)(("GetCommandLineW", windll.kernel32))
 389         CommandLineToArgvW = WINFUNCTYPE(POINTER(LPWSTR), LPCWSTR, POINTER(c_int))(("CommandLineToArgvW", windll.shell32))
 390         argc = c_int(0)
 391         argv_unicode = CommandLineToArgvW(GetCommandLineW(), byref(argc))
 392         # unicode_argv[0] is the Python interpreter, so skip that.
 393         argv = [argv_unicode[i].encode('utf-8') for i in xrange(1, argc.value)]
 394         # Also skip option arguments to the Python interpreter.
 395         while len(argv) > 0:
 396             if not argv[0].startswith("-"):
 397                 break
 398             argv = argv[1:]
 399         sys.argv = argv
 400
 401     main(sys.argv)