lib/scripts/lyxpak.py

   1 # file lyxpak.py
   2 # This file is part of LyX, the document processor.
   3 # Licence details can be found in the file COPYING.
   4
   5 # author Enrico Forestieri
   6 # author Richard Kimberly Heck
   7
   8 # Full author contact details are available in file CREDITS
   9
  10 # This script creates a tar or zip archive with a lyx file and all included
  11 # files (graphics and so on). By default, the created archive is the standard
  12 # type on a given platform, such that a zip archive is created on Windows and
  13 # a gzip compressed tar archive on *nix. This can be controlled by command
  14 # line options, however.
  15
  16 import gzip, os, re, sys
  17 from io import BytesIO
  18 import subprocess
  19
  20 # The path to the current python executable. sys.executable may fail, so in
  21 # this case we revert to simply calling "python" from the path.
  22 PYTHON_BIN = sys.executable if sys.executable else "python"
  23
  24 running_on_windows = (os.name == 'nt')
  25
  26 if running_on_windows:
  27     from shutil import copyfile
  28     from tempfile import NamedTemporaryFile
  29     from lyxwin_getopt import getopt
  30 else:
  31     from getopt import getopt
  32
  33 # Pre-compiled regular expressions.
  34 re_lyxfile = re.compile(br"\.lyx$")
  35 re_input = re.compile(b'^(.*)\\\\(input|include){(\\s*)(.+)(\\s*)}.*$')
  36 re_ertinput = re.compile(b'^(input|include)({)(\\s*)(.+)(\\s*)}.*$')
  37 re_package = re.compile(b'^(.*)\\\\(usepackage){(\\s*)(.+)(\\s*)}.*$')
  38 re_class = re.compile(b'^(\\\\)(textclass)(\\s+)(.+)\\s*$')
  39 re_norecur = re.compile(b'^(.*)\\\\(verbatiminput|lstinputlisting|includegraphics\\[*.*\\]*){(\\s*)(.+)(\\s*)}.*$')
  40 re_ertnorecur = re.compile(b'^(verbatiminput|lstinputlisting|includegraphics\\[*.*\\]*)({)(\\s*)(.+)(\\s*)}.*$')
  41 re_filename = re.compile(b'^(\\s*)(filename)(\\s+)(.+)\\s*$')
  42 re_options = re.compile(b'^(\\s*)options(\\s+)(.+)\\s*$')
  43 re_bibfiles = re.compile(b'^(\\s*)bibfiles(\\s+)(.+)\\s*$')
  44
  45
  46 def usage(prog_name):
  47     msg = '''
  48 Usage: %s [-t] [-z] [-l path] [-o output_dir] file.lyx
  49 Options:
  50 -l: Path to lyx2lyx script
  51 -o: Directory for output
  52 -t: Create gzipped tar file
  53 -z: Create zip file
  54 By default, we create file.zip on Windows and file.tar.gz on *nix,
  55 with the file output to where file.lyx is, and we look for lyx2lyx
  56 in the known locations, querying LyX itself if necessary.
  57 '''
  58     return msg % prog_name
  59
  60
  61 def error(message):
  62     sys.stderr.write(message + '\n')
  63     sys.exit(1)
  64
  65
  66 def tostr(message):
  67     return message.decode(sys.getfilesystemencoding())
  68
  69
  70 def gzopen(file):
  71     input = open(file.decode('utf-8'), 'rb')
  72     magicnum = input.read(2)
  73     input.close()
  74     if magicnum == b"\x1f\x8b":
  75         return gzip.open(file.decode('utf-8'))
  76     return open(file.decode('utf-8'), 'rb')
  77
  78
  79 def find_exe(candidates, extlist, path):
  80     for prog in candidates:
  81         for directory in path:
  82             for ext in extlist:
  83                 full_path = os.path.join(directory, prog + ext)
  84                 if os.access(full_path, os.X_OK):
  85                     return prog, full_path
  86     return None, None
  87
  88
  89 def abspath(name):
  90     " Resolve symlinks and returns the absolute normalized name."
  91     newname = os.path.normpath(os.path.abspath(name))
  92     if not running_on_windows:
  93         newname = os.path.realpath(newname)
  94     return newname
  95
  96
  97 def gather_files(curfile, incfiles, lyx2lyx):
  98     " Recursively gather files."
  99     curdir = os.path.dirname(abspath(curfile))
 100     is_lyxfile = re_lyxfile.search(curfile)
 101
 102     if is_lyxfile:
 103         if running_on_windows:
 104             # subprocess cannot cope with unicode arguments and we cannot be
 105             # sure that curfile can be correctly converted to the current
 106             # code page. So, we resort to running lyx2lyx on a copy.
 107             tmp = NamedTemporaryFile(delete=False)
 108             tmp.close()
 109             copyfile(curfile.decode('utf-8'), tmp.name)
 110             try:
 111                 l2l_stdout = subprocess.check_output([PYTHON_BIN, lyx2lyx, tmp.name])
 112             except subprocess.CalledProcessError:
 113                 error(f'{lyx2lyx} failed to convert "{tostr(curfile)}"')
 114             os.unlink(tmp.name)
 115         else:
 116             try:
 117                 l2l_stdout = subprocess.check_output([PYTHON_BIN, lyx2lyx, curfile])
 118             except subprocess.CalledProcessError:
 119                 error(f'{lyx2lyx} failed to convert "{tostr(curfile)}"')
 120         if l2l_stdout.startswith(b"\x1f\x8b"):
 121             l2l_stdout = gzip.GzipFile("", "rb", 0, BytesIO(l2l_stdout)).read()
 122         elif running_on_windows:
 123             # For some unknown reason, there can be a spurious '\r' in the line
 124             # separators, causing spurious empty lines when calling splitlines.
 125             l2l_stdout = l2l_stdout.replace(b'\r\r\n', b'\r\n')
 126         lines = l2l_stdout.splitlines()
 127     else:
 128         input = gzopen(curfile)
 129         lines = input.readlines()
 130         input.close()
 131
 132     maybe_in_ert = False
 133     i = 0
 134     while i < len(lines):
 135         # Gather used files.
 136         recursive = True
 137         extlist = [b'']
 138         match = re_filename.match(lines[i])
 139         if not match:
 140             if maybe_in_ert:
 141                 match = re_ertinput.match(lines[i])
 142             else:
 143                 match = re_input.match(lines[i])
 144             if not match:
 145                 match = re_package.match(lines[i])
 146                 extlist = [b'.sty']
 147                 if not match:
 148                     match = re_class.match(lines[i])
 149                     extlist = [b'.cls']
 150                     if not match:
 151                         if maybe_in_ert:
 152                             match = re_ertnorecur.match(lines[i])
 153                         else:
 154                             match = re_norecur.match(lines[i])
 155                         extlist = [b'', b'.eps', b'.pdf', b'.png', b'.jpg']
 156                         recursive = False
 157         maybe_in_ert = is_lyxfile and lines[i] == b"\\backslash"
 158         if match:
 159             file = match.group(4).strip(b'"')
 160             if not os.path.isabs(file):
 161                 file = os.path.join(curdir, file)
 162             file_exists = False
 163             if not os.path.isdir(file):
 164                 for ext in extlist:
 165                     if os.path.exists(file + ext):
 166                         file = file + ext
 167                         file_exists = True
 168                         break
 169             if file_exists and not abspath(file) in incfiles:
 170                 incfiles.append(abspath(file))
 171                 if recursive:
 172                     gather_files(file, incfiles, lyx2lyx)
 173             i += 1
 174             continue
 175
 176         if not is_lyxfile:
 177             i += 1
 178             continue
 179
 180         # Gather bibtex *.bst files.
 181         match = re_options.match(lines[i])
 182         if match:
 183             file = match.group(3).strip(b'"')
 184             if file.startswith(b"bibtotoc,"):
 185                 file = file[9:]
 186             if not os.path.isabs(file):
 187                 file = os.path.join(curdir, file + b'.bst')
 188             if os.path.exists(file):
 189                 incfiles.append(abspath(file))
 190             i += 1
 191             continue
 192
 193         # Gather bibtex *.bib files.
 194         match = re_bibfiles.match(lines[i])
 195         if match:
 196             bibfiles = match.group(3).strip(b'"').split(b',')
 197             j = 0
 198             while j < len(bibfiles):
 199                 if os.path.isabs(bibfiles[j]):
 200                     file = bibfiles[j] + b'.bib'
 201                 else:
 202                     file = os.path.join(curdir, bibfiles[j] + b'.bib')
 203                 if os.path.exists(file):
 204                     incfiles.append(abspath(file))
 205                 j += 1
 206             i += 1
 207             continue
 208
 209         i += 1
 210
 211     return 0
 212
 213
 214 def find_lyx2lyx(progloc, path):
 215     " Find a usable version of the lyx2lyx script. "
 216     # first we will see if the script is roughly where we are
 217     # i.e., we will assume we are in $SOMEDIR/scripts and look
 218     # for $SOMEDIR/lyx2lyx/lyx2lyx.
 219     ourpath = os.path.dirname(abspath(progloc))
 220     (upone, discard) = os.path.split(ourpath)
 221     if running_on_windows:
 222         tryit = os.path.join(upone, b"lyx2lyx", b"lyx2lyx")
 223     else:
 224         tryit = os.path.join(upone, "lyx2lyx", "lyx2lyx")
 225     if os.access(tryit, os.X_OK):
 226         return tryit
 227
 228     # now we will try to query LyX itself to find the path.
 229     extlist = ['']
 230     if "PATHEXT" in os.environ:
 231         extlist = extlist + os.environ["PATHEXT"].split(os.pathsep)
 232     lyx_exe, full_path = find_exe(["lyxc", "lyx"], extlist, path)
 233     if lyx_exe is None:
 234         error('Cannot find the LyX executable in the path.')
 235     try:
 236         cmd_stdout = subprocess.check_output([lyx_exe, '-version'], stderr=subprocess.STDOUT)
 237     except subprocess.CalledProcessError:
 238         error('Cannot query LyX about the lyx2lyx script.')
 239     re_msvc = re.compile(r'^(\s*)(Host type:)(\s+)(win32)$')
 240     re_sysdir = re.compile(r'^(\s*)(LyX files dir:)(\s+)(\S+)$')
 241     lines = cmd_stdout.splitlines()
 242     for line in lines:
 243         match = re_msvc.match(line)
 244         if match:
 245             # The LyX executable was built with MSVC, so the
 246             # "LyX files dir:" line is unusable
 247             basedir = os.path.dirname(os.path.dirname(full_path))
 248             tryit = os.path.join(basedir, 'Resources', 'lyx2lyx', 'lyx2lyx')
 249             break
 250         match = re_sysdir.match(line)
 251         if match:
 252             tryit = os.path.join(match.group(4), 'lyx2lyx', 'lyx2lyx')
 253             break
 254
 255     if not os.access(tryit, os.X_OK):
 256         error('Unable to find the lyx2lyx script.')
 257     return tryit
 258
 259
 260 def main(args):
 261
 262     ourprog = args[0]
 263
 264     try:
 265         if running_on_windows:
 266             (options, argv) = getopt(args[1:], b"htzl:o:")
 267         else:
 268             (options, argv) = getopt(args[1:], "htzl:o:")
 269     except:
 270         error(usage(ourprog))
 271
 272     # we expect the filename to be left
 273     if len(argv) != 1:
 274         error(usage(ourprog))
 275
 276     makezip = running_on_windows
 277     outdir = ""
 278     lyx2lyx = None
 279
 280     for (opt, param) in options:
 281         if opt == "-h":
 282             print(usage(ourprog))
 283             sys.exit(0)
 284         elif opt == "-t":
 285             makezip = False
 286         elif opt == "-z":
 287             makezip = True
 288         elif opt == "-l":
 289             lyx2lyx = param
 290         elif opt == "-o":
 291             outdir = param
 292             if not os.path.isdir(outdir):
 293                 error('Error: "%s" is not a directory.' % outdir)
 294
 295     lyxfile = argv[0]
 296     if not os.path.exists(lyxfile):
 297         error('File "%s" not found.' % tostr(lyxfile))
 298
 299     # Check that it actually is a LyX document
 300     input = gzopen(lyxfile)
 301     line = input.readline()
 302     input.close()
 303     if not (line and line.startswith(b'#LyX')):
 304         error('File "%s" is not a LyX document.' % tostr(lyxfile))
 305
 306     if makezip:
 307         import zipfile
 308     else:
 309         import tarfile
 310
 311     ar_ext = b".tar.gz"
 312     if makezip:
 313         ar_ext = b".zip"
 314
 315     ar_name = re_lyxfile.sub(ar_ext, abspath(lyxfile)).decode('utf-8')
 316     if outdir:
 317         ar_name = os.path.join(abspath(outdir), os.path.basename(ar_name))
 318
 319     path = os.environ["PATH"].split(os.pathsep)
 320
 321     if lyx2lyx is None:
 322         lyx2lyx = find_lyx2lyx(ourprog, path)
 323
 324     # Initialize the list with the specified LyX file and recursively
 325     # gather all required files (also from child documents).
 326     incfiles = [abspath(lyxfile)]
 327     gather_files(lyxfile, incfiles, lyx2lyx)
 328
 329     # Find the topmost dir common to all files
 330     path_sep = os.path.sep.encode('utf-8')
 331     if len(incfiles) > 1:
 332         topdir = os.path.commonprefix(incfiles)
 333         # As os.path.commonprefix() works on a character by character basis,
 334         # rather than on path elements, we need to remove any trailing bytes.
 335         topdir = topdir.rpartition(path_sep)[0] + path_sep
 336     else:
 337         topdir = os.path.dirname(incfiles[0]) + path_sep
 338
 339     # Remove the prefix common to all paths in the list
 340     i = 0
 341     while i < len(incfiles):
 342         incfiles[i] = incfiles[i].replace(topdir, b'', 1)
 343         i += 1
 344
 345     # Remove duplicates and sort the list
 346     incfiles = list(set(incfiles))
 347     incfiles.sort()
 348
 349     if topdir != '':
 350         os.chdir(topdir)
 351
 352     # Create the archive
 353     try:
 354         if makezip:
 355             zip = zipfile.ZipFile(ar_name, "w", zipfile.ZIP_DEFLATED)
 356             for file in incfiles:
 357                 zip.write(file.decode('utf-8'))
 358             zip.close()
 359         else:
 360             tar = tarfile.open(ar_name, "w:gz")
 361             for file in incfiles:
 362                 tar.add(file.decode('utf-8'))
 363             tar.close()
 364     except:
 365         error('Failed to create LyX archive "%s"' % ar_name)
 366
 367     print('LyX archive "%s" created successfully.' % ar_name)
 368     return 0
 369
 370
 371 if __name__ == "__main__":
 372     if running_on_windows:
 373         # This works around <http://bugs.python.org/issue2128> for Python 2.
 374         # All arguments are retrieved in unicode format and converted to utf-8.
 375         # In this way, when launched from the command line, lyxpak.py can deal
 376         # with any non-ascii names. Unfortunately, this is not the case when
 377         # launched by LyX, because LyX converts the arguments of the converters
 378         # to the filesystem encoding. On Windows this corresponds to the current
 379         # code page and not to the UTF-16 encoding used by NTFS, such that they
 380         # are transliterated if not exactly encodable. As an example, α may
 381         # become a, β may become ß, and so on. However, this is a problem only
 382         # if the full path of the LyX document contains an unencodable character
 383         # as all other paths are extracted from the document in utf-8 format.
 384         from ctypes import WINFUNCTYPE, windll, POINTER, byref, c_int
 385         from ctypes.wintypes import LPWSTR, LPCWSTR
 386         GetCommandLineW = WINFUNCTYPE(LPWSTR)(("GetCommandLineW", windll.kernel32))
 387         CommandLineToArgvW = WINFUNCTYPE(POINTER(LPWSTR), LPCWSTR, POINTER(c_int))(("CommandLineToArgvW", windll.shell32))
 388         argc = c_int(0)
 389         argv_unicode = CommandLineToArgvW(GetCommandLineW(), byref(argc))
 390         # unicode_argv[0] is the Python interpreter, so skip that.
 391         argv = [argv_unicode[i].encode('utf-8') for i in range(1, argc.value)]
 392         # Also skip option arguments to the Python interpreter.
 393         while len(argv) > 0:
 394             if not argv[0].startswith(b"-"):
 395                 break
 396             argv = argv[1:]
 397         sys.argv = argv
 398
 399     main(sys.argv)