1 # -*- coding: utf-8 -*-
4 # This file is part of LyX, the document processor.
5 # Licence details can be found in the file COPYING.
7 # author Enrico Forestieri
8 # author Richard Kimberly Heck
10 # Full author contact details are available in file CREDITS
12 # This script creates a tar or zip archive with a lyx file and all included
13 # files (graphics and so on). By default, the created archive is the standard
14 # type on a given platform, such that a zip archive is created on Windows and
15 # a gzip compressed tar archive on *nix. This can be controlled by command
16 # line options, however.
18 from __future__ import print_function
19 import gzip, os, re, sys
20 from getopt import getopt
21 from io import BytesIO
24 # Provide support for both python 2 and 3
25 if sys.version_info[0] != 2:
26 def unicode(arg, enc):
29 # The path to the current python executable. sys.executable may fail, so in
30 # this case we revert to simply calling "python" from the path.
31 PYTHON_BIN = sys.executable if sys.executable else "python"
33 running_on_windows = (os.name == 'nt')
35 if running_on_windows:
36 from shutil import copyfile
37 from tempfile import NamedTemporaryFile
39 # Pre-compiled regular expressions.
40 re_lyxfile = re.compile(b"\.lyx$")
41 re_input = re.compile(b'^(.*)\\\\(input|include){(\\s*)(.+)(\\s*)}.*$')
42 re_ertinput = re.compile(b'^(input|include)({)(\\s*)(.+)(\\s*)}.*$')
43 re_package = re.compile(b'^(.*)\\\\(usepackage){(\\s*)(.+)(\\s*)}.*$')
44 re_class = re.compile(b'^(\\\\)(textclass)(\\s+)(.+)\\s*$')
45 re_norecur = re.compile(b'^(.*)\\\\(verbatiminput|lstinputlisting|includegraphics\\[*.*\\]*){(\\s*)(.+)(\\s*)}.*$')
46 re_ertnorecur = re.compile(b'^(verbatiminput|lstinputlisting|includegraphics\\[*.*\\]*)({)(\\s*)(.+)(\\s*)}.*$')
47 re_filename = re.compile(b'^(\\s*)(filename)(\\s+)(.+)\\s*$')
48 re_options = re.compile(b'^(\\s*)options(\\s+)(.+)\\s*$')
49 re_bibfiles = re.compile(b'^(\\s*)bibfiles(\\s+)(.+)\\s*$')
54 Usage: %s [-t] [-z] [-l path] [-o output_dir] file.lyx
56 -l: Path to lyx2lyx script
57 -o: Directory for output
58 -t: Create gzipped tar file
60 By default, we create file.zip on Windows and file.tar.gz on *nix,
61 with the file output to where file.lyx is, and we look for lyx2lyx
62 in the known locations, querying LyX itself if necessary.
64 return msg % prog_name
68 sys.stderr.write(message + '\n')
73 return message.decode(sys.getfilesystemencoding())
77 input = open(file.decode('utf-8'), 'rb')
78 magicnum = input.read(2)
80 if magicnum == b"\x1f\x8b":
81 return gzip.open(file.decode('utf-8'))
82 return open(file.decode('utf-8'), 'rb')
85 def find_exe(candidates, extlist, path):
86 for prog in candidates:
87 for directory in path:
89 full_path = os.path.join(directory, prog + ext)
90 if os.access(full_path, os.X_OK):
91 return prog, full_path
96 " Resolve symlinks and returns the absolute normalized name."
97 newname = os.path.normpath(os.path.abspath(name))
98 if not running_on_windows:
99 newname = os.path.realpath(newname)
103 def gather_files(curfile, incfiles, lyx2lyx):
104 " Recursively gather files."
105 curdir = os.path.dirname(abspath(curfile))
106 is_lyxfile = re_lyxfile.search(curfile)
109 if running_on_windows:
110 # subprocess cannot cope with unicode arguments and we cannot be
111 # sure that curfile can be correctly converted to the current
112 # code page. So, we resort to running lyx2lyx on a copy.
113 tmp = NamedTemporaryFile(delete=False)
115 copyfile(curfile.decode('utf-8'), tmp.name)
117 l2l_stdout = subprocess.check_output([PYTHON_BIN, lyx2lyx, tmp.name])
118 except subprocess.CalledProcessError:
119 error('%s failed to convert "%s"' % (lyx2lyx, tostr(curfile)))
123 l2l_stdout = subprocess.check_output([PYTHON_BIN, lyx2lyx, curfile])
124 except subprocess.CalledProcessError:
125 error('%s failed to convert "%s"' % (lyx2lyx, tostr(curfile)))
126 if l2l_stdout.startswith(b"\x1f\x8b"):
127 l2l_stdout = gzip.GzipFile("", "rb", 0, BytesIO(l2l_stdout)).read()
128 elif running_on_windows:
129 # For some unknown reason, there can be a spurious '\r' in the line
130 # separators, causing spurious empty lines when calling splitlines.
131 l2l_stdout = l2l_stdout.replace('\r\r\n', '\r\n')
132 lines = l2l_stdout.splitlines()
134 input = gzopen(curfile)
135 lines = input.readlines()
140 while i < len(lines):
144 match = re_filename.match(lines[i])
147 match = re_ertinput.match(lines[i])
149 match = re_input.match(lines[i])
151 match = re_package.match(lines[i])
154 match = re_class.match(lines[i])
158 match = re_ertnorecur.match(lines[i])
160 match = re_norecur.match(lines[i])
161 extlist = [b'', b'.eps', b'.pdf', b'.png', b'.jpg']
163 maybe_in_ert = is_lyxfile and lines[i] == b"\\backslash"
165 file = match.group(4).strip(b'"')
166 if not os.path.isabs(file):
167 file = os.path.join(curdir, file)
169 if not os.path.isdir(unicode(file, 'utf-8')):
171 if os.path.exists(unicode(file + ext, 'utf-8')):
175 if file_exists and not abspath(file) in incfiles:
176 incfiles.append(abspath(file))
178 gather_files(file, incfiles, lyx2lyx)
186 # Gather bibtex *.bst files.
187 match = re_options.match(lines[i])
189 file = match.group(3).strip(b'"')
190 if file.startswith(b"bibtotoc,"):
192 if not os.path.isabs(file):
193 file = os.path.join(curdir, file + b'.bst')
194 if os.path.exists(unicode(file, 'utf-8')):
195 incfiles.append(abspath(file))
199 # Gather bibtex *.bib files.
200 match = re_bibfiles.match(lines[i])
202 bibfiles = match.group(3).strip(b'"').split(b',')
204 while j < len(bibfiles):
205 if os.path.isabs(bibfiles[j]):
206 file = bibfiles[j] + b'.bib'
208 file = os.path.join(curdir, bibfiles[j] + b'.bib')
209 if os.path.exists(unicode(file, 'utf-8')):
210 incfiles.append(abspath(file))
220 def find_lyx2lyx(progloc, path):
221 " Find a usable version of the lyx2lyx script. "
222 # first we will see if the script is roughly where we are
223 # i.e., we will assume we are in $SOMEDIR/scripts and look
224 # for $SOMEDIR/lyx2lyx/lyx2lyx.
225 ourpath = os.path.dirname(abspath(progloc))
226 (upone, discard) = os.path.split(ourpath)
227 tryit = os.path.join(upone, "lyx2lyx", "lyx2lyx")
228 if os.access(tryit, os.X_OK):
231 # now we will try to query LyX itself to find the path.
233 if "PATHEXT" in os.environ:
234 extlist = extlist + os.environ["PATHEXT"].split(os.pathsep)
235 lyx_exe, full_path = find_exe(["lyxc", "lyx"], extlist, path)
237 error('Cannot find the LyX executable in the path.')
239 cmd_stdout = subprocess.check_output([lyx_exe, '-version'], stderr=subprocess.STDOUT)
240 except subprocess.CalledProcessError:
241 error('Cannot query LyX about the lyx2lyx script.')
242 re_msvc = re.compile(r'^(\s*)(Host type:)(\s+)(win32)$')
243 re_sysdir = re.compile(r'^(\s*)(LyX files dir:)(\s+)(\S+)$')
244 lines = cmd_stdout.splitlines()
246 match = re_msvc.match(line)
248 # The LyX executable was built with MSVC, so the
249 # "LyX files dir:" line is unusable
250 basedir = os.path.dirname(os.path.dirname(full_path))
251 tryit = os.path.join(basedir, 'Resources', 'lyx2lyx', 'lyx2lyx')
253 match = re_sysdir.match(line)
255 tryit = os.path.join(match.group(4), 'lyx2lyx', 'lyx2lyx')
258 if not os.access(tryit, os.X_OK):
259 error('Unable to find the lyx2lyx script.')
268 (options, argv) = getopt(args[1:], "htzl:o:")
270 error(usage(ourprog))
272 # we expect the filename to be left
274 error(usage(ourprog))
276 makezip = running_on_windows
280 for (opt, param) in options:
282 print(usage(ourprog))
292 if not os.path.isdir(unicode(outdir, 'utf-8')):
293 error('Error: "%s" is not a directory.' % outdir)
296 if not running_on_windows:
297 lyxfile = unicode(lyxfile, sys.getfilesystemencoding()).encode('utf-8')
298 if not os.path.exists(unicode(lyxfile, 'utf-8')):
299 error('File "%s" not found.' % tostr(lyxfile))
301 # Check that it actually is a LyX document
302 input = gzopen(lyxfile)
303 line = input.readline()
305 if not (line and line.startswith(b'#LyX')):
306 error('File "%s" is not a LyX document.' % tostr(lyxfile))
317 ar_name = re_lyxfile.sub(ar_ext, abspath(lyxfile)).decode('utf-8')
319 ar_name = os.path.join(abspath(outdir), os.path.basename(ar_name))
321 path = os.environ["PATH"].split(os.pathsep)
324 lyx2lyx = find_lyx2lyx(ourprog, path)
326 # Initialize the list with the specified LyX file and recursively
327 # gather all required files (also from child documents).
328 incfiles = [abspath(lyxfile)]
329 gather_files(lyxfile, incfiles, lyx2lyx)
331 # Find the topmost dir common to all files
332 path_sep = os.path.sep.encode('utf-8')
333 if len(incfiles) > 1:
334 topdir = os.path.commonprefix(incfiles)
335 # As os.path.commonprefix() works on a character by character basis,
336 # rather than on path elements, we need to remove any trailing bytes.
337 topdir = topdir.rpartition(path_sep)[0] + path_sep
339 topdir = os.path.dirname(incfiles[0]) + path_sep
341 # Remove the prefix common to all paths in the list
343 while i < len(incfiles):
344 incfiles[i] = incfiles[i].replace(topdir, b'', 1)
347 # Remove duplicates and sort the list
348 incfiles = list(set(incfiles))
352 os.chdir(unicode(topdir, 'utf-8'))
357 zip = zipfile.ZipFile(ar_name, "w", zipfile.ZIP_DEFLATED)
358 for file in incfiles:
359 zip.write(file.decode('utf-8'))
362 tar = tarfile.open(ar_name, "w:gz")
363 for file in incfiles:
364 tar.add(file.decode('utf-8'))
367 error('Failed to create LyX archive "%s"' % ar_name)
369 print('LyX archive "%s" created successfully.' % ar_name)
373 if __name__ == "__main__":
374 if running_on_windows:
375 # This works around <http://bugs.python.org/issue2128> for Python 2.
376 # All arguments are retrieved in unicode format and converted to utf-8.
377 # In this way, when launched from the command line, lyxpak.py can deal
378 # with any non-ascii names. Unfortunately, this is not the case when
379 # launched by LyX, because LyX converts the arguments of the converters
380 # to the filesystem encoding. On Windows this corresponds to the current
381 # code page and not to the UTF-16 encoding used by NTFS, such that they
382 # are transliterated if not exactly encodable. As an example, α may
383 # become a, β may become ß, and so on. However, this is a problem only
384 # if the full path of the LyX document contains an unencodable character
385 # as all other paths are extracted from the document in utf-8 format.
386 from ctypes import WINFUNCTYPE, windll, POINTER, byref, c_int
387 from ctypes.wintypes import LPWSTR, LPCWSTR
388 GetCommandLineW = WINFUNCTYPE(LPWSTR)(("GetCommandLineW", windll.kernel32))
389 CommandLineToArgvW = WINFUNCTYPE(POINTER(LPWSTR), LPCWSTR, POINTER(c_int))(("CommandLineToArgvW", windll.shell32))
391 argv_unicode = CommandLineToArgvW(GetCommandLineW(), byref(argc))
392 # unicode_argv[0] is the Python interpreter, so skip that.
393 argv = [argv_unicode[i].encode('utf-8') for i in xrange(1, argc.value)]
394 # Also skip option arguments to the Python interpreter.
396 if not argv[0].startswith("-"):