lib/scripts/lyxpak.py

   1 # -*- coding: utf-8 -*-
   2
   3 # file lyxpak.py
   4 # This file is part of LyX, the document processor.
   5 # Licence details can be found in the file COPYING.
   6
   7 # author Enrico Forestieri
   8 # author Richard Heck
   9
  10 # Full author contact details are available in file CREDITS
  11
  12 # This script creates a tar or zip archive with a lyx file and all included
  13 # files (graphics and so on). By default, the created archive is the standard
  14 # type on a given platform, such that a zip archive is created on Windows and
  15 # a gzip compressed tar archive on *nix. This can be controlled by command
  16 # line options, however.
  17
  18 import gzip, os, re, string, sys
  19 if sys.version_info < (2, 4, 0):
  20     from sets import Set as set
  21 from getopt import getopt
  22 from cStringIO import StringIO
  23
  24 running_on_windows = (os.name == 'nt')
  25
  26 if running_on_windows:
  27     from shutil import copyfile
  28     from tempfile import NamedTemporaryFile
  29
  30 # Pre-compiled regular expressions.
  31 re_lyxfile = re.compile("\.lyx$")
  32 re_input = re.compile(r'^(.*)\\(input|include){(\s*)(.+)(\s*)}.*$')
  33 re_ertinput = re.compile(r'^(input|include)({)(\s*)(.+)(\s*)}.*$')
  34 re_package = re.compile(r'^(.*)\\(usepackage){(\s*)(.+)(\s*)}.*$')
  35 re_class = re.compile(r'^(\\)(textclass)(\s+)(.+)\s*$')
  36 re_norecur = re.compile(r'^(.*)\\(verbatiminput|lstinputlisting|includegraphics\[*.*\]*){(\s*)(.+)(\s*)}.*$')
  37 re_ertnorecur = re.compile(r'^(verbatiminput|lstinputlisting|includegraphics\[*.*\]*)({)(\s*)(.+)(\s*)}.*$')
  38 re_filename = re.compile(r'^(\s*)(filename)(\s+)(.+)\s*$')
  39 re_options = re.compile(r'^(\s*)options(\s+)(.+)\s*$')
  40 re_bibfiles = re.compile(r'^(\s*)bibfiles(\s+)(.+)\s*$')
  41
  42
  43 def usage(prog_name):
  44     msg = '''
  45 Usage: %s [-t] [-z] [-l path] [-o output_dir] file.lyx
  46 Options:
  47 -l: Path to lyx2lyx script
  48 -o: Directory for output
  49 -t: Create gzipped tar file
  50 -z: Create zip file
  51 By default, we create file.zip on Windows and file.tar.gz on *nix,
  52 with the file output to where file.lyx is, and we look for lyx2lyx
  53 in the known locations, querying LyX itself if necessary.
  54 '''
  55     return msg % prog_name
  56
  57
  58 def error(message):
  59     sys.stderr.write(message + '\n')
  60     sys.exit(1)
  61
  62
  63 def gzopen(file, mode):
  64     input = open(unicode(file, 'utf-8'), 'rb')
  65     magicnum = input.read(2)
  66     input.close()
  67     if magicnum == "\x1f\x8b":
  68         return gzip.open(unicode(file, 'utf-8'), mode)
  69     return open(unicode(file, 'utf-8'), mode)
  70
  71
  72 def run_cmd(cmd):
  73     handle = os.popen(cmd, 'r')
  74     cmd_stdout = handle.read()
  75     cmd_status = handle.close()
  76     return cmd_status, cmd_stdout
  77
  78
  79 def find_exe(candidates, extlist, path):
  80     for prog in candidates:
  81         for directory in path:
  82             for ext in extlist:
  83                 full_path = os.path.join(directory, prog + ext)
  84                 if os.access(full_path, os.X_OK):
  85                     return prog, full_path
  86     return None, None
  87
  88
  89 def abspath(name):
  90     " Resolve symlinks and returns the absolute normalized name."
  91     newname = os.path.normpath(os.path.abspath(name))
  92     if not running_on_windows:
  93         newname = os.path.realpath(newname)
  94     return newname
  95
  96
  97 def gather_files(curfile, incfiles, lyx2lyx):
  98     " Recursively gather files."
  99     curdir = os.path.dirname(abspath(curfile))
 100     is_lyxfile = re_lyxfile.search(curfile)
 101     if is_lyxfile:
 102         if running_on_windows:
 103             # os.popen cannot cope with unicode arguments and we cannot be
 104             # sure that curfile can be correctly converted to the current
 105             # code page. So, we resort to running lyx2lyx on a copy.
 106             tmp = NamedTemporaryFile(delete=False)
 107             tmp.close()
 108             copyfile(unicode(curfile, 'utf-8'), tmp.name)
 109             lyx2lyx_cmd = 'python "%s" "%s"' % (lyx2lyx, tmp.name)
 110             l2l_status, l2l_stdout = run_cmd(lyx2lyx_cmd)
 111             os.unlink(tmp.name)
 112         else:
 113             lyx2lyx_cmd = 'python "%s" "%s"' % (lyx2lyx, curfile)
 114             l2l_status, l2l_stdout = run_cmd(lyx2lyx_cmd)
 115         if l2l_status != None:
 116             error('%s failed to convert "%s"' % (lyx2lyx, curfile))
 117         if l2l_stdout.startswith("\x1f\x8b"):
 118             l2l_stdout = gzip.GzipFile("", "r", 0, StringIO(l2l_stdout)).read()
 119         lines = l2l_stdout.splitlines()
 120     else:
 121         input = gzopen(curfile, 'rU')
 122         lines = input.readlines()
 123         input.close()
 124
 125     maybe_in_ert = False
 126     i = 0
 127     while i < len(lines):
 128         # Gather used files.
 129         recursive = True
 130         extlist = ['']
 131         match = re_filename.match(lines[i])
 132         if not match:
 133             if maybe_in_ert:
 134                 match = re_ertinput.match(lines[i])
 135             else:
 136                 match = re_input.match(lines[i])
 137             if not match:
 138                 match = re_package.match(lines[i])
 139                 extlist = ['.sty']
 140                 if not match:
 141                     match = re_class.match(lines[i])
 142                     extlist = ['.cls']
 143                     if not match:
 144                         if maybe_in_ert:
 145                             match = re_ertnorecur.match(lines[i])
 146                         else:
 147                             match = re_norecur.match(lines[i])
 148                         extlist = ['', '.eps', '.pdf', '.png', '.jpg']
 149                         recursive = False
 150         maybe_in_ert = is_lyxfile and lines[i] == "\\backslash"
 151         if match:
 152             file = match.group(4).strip('"')
 153             if not os.path.isabs(file):
 154                 file = os.path.join(curdir, file)
 155             file_exists = False
 156             if not os.path.isdir(unicode(file, 'utf-8')):
 157                 for ext in extlist:
 158                     if os.path.exists(unicode(file + ext, 'utf-8')):
 159                         file = file + ext
 160                         file_exists = True
 161                         break
 162             if file_exists and not abspath(file) in incfiles:
 163                 incfiles.append(abspath(file))
 164                 if recursive:
 165                     gather_files(file, incfiles, lyx2lyx)
 166             i += 1
 167             continue
 168
 169         if not is_lyxfile:
 170             i += 1
 171             continue
 172
 173         # Gather bibtex *.bst files.
 174         match = re_options.match(lines[i])
 175         if match:
 176             file = match.group(3).strip('"')
 177             if file.startswith("bibtotoc,"):
 178                 file = file[9:]
 179             if not os.path.isabs(file):
 180                 file = os.path.join(curdir, file + '.bst')
 181             if os.path.exists(unicode(file, 'utf-8')):
 182                 incfiles.append(abspath(file))
 183             i += 1
 184             continue
 185
 186         # Gather bibtex *.bib files.
 187         match = re_bibfiles.match(lines[i])
 188         if match:
 189             bibfiles = match.group(3).strip('"').split(',')
 190             j = 0
 191             while j < len(bibfiles):
 192                 if os.path.isabs(bibfiles[j]):
 193                     file = bibfiles[j] + '.bib'
 194                 else:
 195                     file = os.path.join(curdir, bibfiles[j] + '.bib')
 196                 if os.path.exists(unicode(file, 'utf-8')):
 197                     incfiles.append(abspath(file))
 198                 j += 1
 199             i += 1
 200             continue
 201
 202         i += 1
 203
 204     return 0
 205
 206
 207 def find_lyx2lyx(progloc, path):
 208     " Find a usable version of the lyx2lyx script. "
 209     # first we will see if the script is roughly where we are
 210     # i.e., we will assume we are in $SOMEDIR/scripts and look
 211     # for $SOMEDIR/lyx2lyx/lyx2lyx.
 212     ourpath = os.path.dirname(abspath(progloc))
 213     (upone, discard) = os.path.split(ourpath)
 214     tryit = os.path.join(upone, "lyx2lyx", "lyx2lyx")
 215     if os.access(tryit, os.X_OK):
 216         return tryit
 217
 218     # now we will try to query LyX itself to find the path.
 219     extlist = ['']
 220     if "PATHEXT" in os.environ:
 221         extlist = extlist + os.environ["PATHEXT"].split(os.pathsep)
 222     lyx_exe, full_path = find_exe(["lyxc", "lyx"], extlist, path)
 223     if lyx_exe == None:
 224         error('Cannot find the LyX executable in the path.')
 225     cmd_status, cmd_stdout = run_cmd("%s -version 2>&1" % lyx_exe)
 226     if cmd_status != None:
 227         error('Cannot query LyX about the lyx2lyx script.')
 228     re_msvc = re.compile(r'^(\s*)(Host type:)(\s+)(win32)$')
 229     re_sysdir = re.compile(r'^(\s*)(LyX files dir:)(\s+)(\S+)$')
 230     lines = cmd_stdout.splitlines()
 231     for line in lines:
 232         match = re_msvc.match(line)
 233         if match:
 234             # The LyX executable was built with MSVC, so the
 235             # "LyX files dir:" line is unusable
 236             basedir = os.path.dirname(os.path.dirname(full_path))
 237             tryit = os.path.join(basedir, 'Resources', 'lyx2lyx', 'lyx2lyx')
 238             break
 239         match = re_sysdir.match(line)
 240         if match:
 241             tryit = os.path.join(match.group(4), 'lyx2lyx', 'lyx2lyx')
 242             break
 243
 244     if not os.access(tryit, os.X_OK):
 245         error('Unable to find the lyx2lyx script.')
 246     return tryit
 247
 248
 249 def main(args):
 250
 251     ourprog = args[0]
 252
 253     try:
 254       (options, argv) = getopt(args[1:], "htzl:o:")
 255     except:
 256       error(usage(ourprog))
 257
 258     # we expect the filename to be left
 259     if len(argv) != 1:
 260         error(usage(ourprog))
 261
 262     makezip = running_on_windows
 263     outdir = ""
 264     lyx2lyx = None
 265
 266     for (opt, param) in options:
 267       if opt == "-h":
 268         print usage(ourprog)
 269         sys.exit(0)
 270       elif opt == "-t":
 271         makezip = False
 272       elif opt == "-z":
 273         makezip = True
 274       elif opt == "-l":
 275         lyx2lyx = param
 276       elif opt == "-o":
 277         outdir = param
 278         if not os.path.isdir(unicode(outdir, 'utf-8')):
 279           error('Error: "%s" is not a directory.' % outdir)
 280
 281     lyxfile = argv[0]
 282     if not running_on_windows:
 283         lyxfile = unicode(lyxfile, sys.getfilesystemencoding()).encode('utf-8')
 284     if not os.path.exists(unicode(lyxfile, 'utf-8')):
 285         error('File "%s" not found.' % lyxfile)
 286
 287     # Check that it actually is a LyX document
 288     input = gzopen(lyxfile, 'rU')
 289     line = input.readline()
 290     input.close()
 291     if not (line and line.startswith('#LyX')):
 292         error('File "%s" is not a LyX document.' % lyxfile)
 293
 294     if makezip:
 295         import zipfile
 296     else:
 297         import tarfile
 298
 299     ar_ext = ".tar.gz"
 300     if makezip:
 301         ar_ext = ".zip"
 302
 303     ar_name = re_lyxfile.sub(ar_ext, abspath(lyxfile))
 304     if outdir:
 305         ar_name = os.path.join(abspath(outdir), os.path.basename(ar_name))
 306
 307     path = string.split(os.environ["PATH"], os.pathsep)
 308
 309     if lyx2lyx == None:
 310         lyx2lyx = find_lyx2lyx(ourprog, path)
 311
 312     # Initialize the list with the specified LyX file and recursively
 313     # gather all required files (also from child documents).
 314     incfiles = [abspath(lyxfile)]
 315     gather_files(lyxfile, incfiles, lyx2lyx)
 316
 317     # Find the topmost dir common to all files
 318     if len(incfiles) > 1:
 319         topdir = os.path.commonprefix(incfiles)
 320         # As os.path.commonprefix() works on a character by character basis,
 321         # rather than on path elements, we need to remove any trailing bytes.
 322         topdir = topdir.rpartition(os.path.sep)[0] + os.path.sep
 323     else:
 324         topdir = os.path.dirname(incfiles[0]) + os.path.sep
 325
 326     # Remove the prefix common to all paths in the list
 327     i = 0
 328     while i < len(incfiles):
 329         incfiles[i] = string.replace(incfiles[i], topdir, '', 1)
 330         i += 1
 331
 332     # Remove duplicates and sort the list
 333     incfiles = list(set(incfiles))
 334     incfiles.sort()
 335
 336     if topdir != '':
 337         os.chdir(unicode(topdir, 'utf-8'))
 338
 339     # Create the archive
 340     try:
 341         if makezip:
 342             zip = zipfile.ZipFile(ar_name, "w", zipfile.ZIP_DEFLATED)
 343             for file in incfiles:
 344                 zip.write(file.decode('utf-8'), unicode(file, 'utf-8'))
 345             zip.close()
 346         else:
 347             tar = tarfile.open(ar_name, "w:gz")
 348             for file in incfiles:
 349                 tar.add(file)
 350             tar.close()
 351     except:
 352         error('Failed to create LyX archive "%s"' % ar_name)
 353
 354     print 'LyX archive "%s" created successfully.' % ar_name
 355     return 0
 356
 357
 358 if __name__ == "__main__":
 359     if running_on_windows:
 360         # This works around <http://bugs.python.org/issue2128> for Python 2.
 361         # All arguments are retrieved in unicode format and converted to utf-8.
 362         # In this way, when launched from the command line, lyxpak.py can deal
 363         # with any non-ascii names. Unfortunately, this is not the case when
 364         # launched by LyX, because LyX converts the arguments of the converters
 365         # to the filesystem encoding. On Windows this corresponds to the current
 366         # code page and not to the UTF-16 encoding used by NTFS, such that they
 367         # are transliterated if not exactly encodable. As an example, α may
 368         # become a, β may become ß, and so on. However, this is a problem only
 369         # if the full path of the LyX document contains an unencodable character
 370         # as all other paths are extracted from the document in utf-8 format.
 371         from ctypes import WINFUNCTYPE, windll, POINTER, byref, c_int
 372         from ctypes.wintypes import LPWSTR, LPCWSTR
 373         GetCommandLineW = WINFUNCTYPE(LPWSTR)(("GetCommandLineW", windll.kernel32))
 374         CommandLineToArgvW = WINFUNCTYPE(POINTER(LPWSTR), LPCWSTR, POINTER(c_int))(("CommandLineToArgvW", windll.shell32))
 375         argc = c_int(0)
 376         argv_unicode = CommandLineToArgvW(GetCommandLineW(), byref(argc))
 377         # unicode_argv[0] is the Python interpreter, so skip that.
 378         argv = [argv_unicode[i].encode('utf-8') for i in xrange(1, argc.value)]
 379         # Also skip option arguments to the Python interpreter.
 380         while len(argv) > 0:
 381             if not argv[0].startswith("-"):
 382                 break
 383             argv = argv[1:]
 384         sys.argv = argv
 385
 386     main(sys.argv)