1 # -*- coding: utf-8 -*-
4 # This file is part of LyX, the document processor.
5 # Licence details can be found in the file COPYING.
7 # author Enrico Forestieri
10 # Full author contact details are available in file CREDITS
12 # This script creates a tar or zip archive with a lyx file and all included
13 # files (graphics and so on). By default, the created archive is the standard
14 # type on a given platform, such that a zip archive is created on Windows and
15 # a gzip compressed tar archive on *nix. This can be controlled by command
16 # line options, however.
18 import gzip, os, re, string, sys
19 if sys.version_info < (2, 4, 0):
20 from sets import Set as set
21 from getopt import getopt
22 from cStringIO import StringIO
24 running_on_windows = (os.name == 'nt')
26 if running_on_windows:
27 from shutil import copyfile
28 from tempfile import NamedTemporaryFile
30 # Pre-compiled regular expressions.
31 re_lyxfile = re.compile("\.lyx$")
32 re_input = re.compile(r'^(.*)\\(input|include){(\s*)(.+)(\s*)}.*$')
33 re_ertinput = re.compile(r'^(input|include)({)(\s*)(.+)(\s*)}.*$')
34 re_package = re.compile(r'^(.*)\\(usepackage){(\s*)(.+)(\s*)}.*$')
35 re_class = re.compile(r'^(\\)(textclass)(\s+)(.+)\s*$')
36 re_norecur = re.compile(r'^(.*)\\(verbatiminput|lstinputlisting|includegraphics\[*.*\]*){(\s*)(.+)(\s*)}.*$')
37 re_ertnorecur = re.compile(r'^(verbatiminput|lstinputlisting|includegraphics\[*.*\]*)({)(\s*)(.+)(\s*)}.*$')
38 re_filename = re.compile(r'^(\s*)(filename)(\s+)(.+)\s*$')
39 re_options = re.compile(r'^(\s*)options(\s+)(.+)\s*$')
40 re_bibfiles = re.compile(r'^(\s*)bibfiles(\s+)(.+)\s*$')
45 Usage: %s [-t] [-z] [-l path] [-o output_dir] file.lyx
47 -l: Path to lyx2lyx script
48 -o: Directory for output
49 -t: Create gzipped tar file
51 By default, we create file.zip on Windows and file.tar.gz on *nix,
52 with the file output to where file.lyx is, and we look for lyx2lyx
53 in the known locations, querying LyX itself if necessary.
55 return msg % prog_name
59 sys.stderr.write(message + '\n')
63 def gzopen(file, mode):
64 input = open(unicode(file, 'utf-8'), 'rb')
65 magicnum = input.read(2)
67 if magicnum == "\x1f\x8b":
68 return gzip.open(unicode(file, 'utf-8'), mode)
69 return open(unicode(file, 'utf-8'), mode)
73 handle = os.popen(cmd, 'r')
74 cmd_stdout = handle.read()
75 cmd_status = handle.close()
76 return cmd_status, cmd_stdout
79 def find_exe(candidates, extlist, path):
80 for prog in candidates:
81 for directory in path:
83 full_path = os.path.join(directory, prog + ext)
84 if os.access(full_path, os.X_OK):
85 return prog, full_path
90 " Resolve symlinks and returns the absolute normalized name."
91 newname = os.path.normpath(os.path.abspath(name))
92 if not running_on_windows:
93 newname = os.path.realpath(newname)
97 def gather_files(curfile, incfiles, lyx2lyx):
98 " Recursively gather files."
99 curdir = os.path.dirname(abspath(curfile))
100 is_lyxfile = re_lyxfile.search(curfile)
102 if running_on_windows:
103 # os.popen cannot cope with unicode arguments and we cannot be
104 # sure that curfile can be correctly converted to the current
105 # code page. So, we resort to running lyx2lyx on a copy.
106 tmp = NamedTemporaryFile(delete=False)
108 copyfile(unicode(curfile, 'utf-8'), tmp.name)
109 lyx2lyx_cmd = 'python "%s" "%s"' % (lyx2lyx, tmp.name)
110 l2l_status, l2l_stdout = run_cmd(lyx2lyx_cmd)
113 lyx2lyx_cmd = 'python "%s" "%s"' % (lyx2lyx, curfile)
114 l2l_status, l2l_stdout = run_cmd(lyx2lyx_cmd)
115 if l2l_status != None:
116 error('%s failed to convert "%s"' % (lyx2lyx, curfile))
117 if l2l_stdout.startswith("\x1f\x8b"):
118 l2l_stdout = gzip.GzipFile("", "r", 0, StringIO(l2l_stdout)).read()
119 lines = l2l_stdout.splitlines()
121 input = gzopen(curfile, 'rU')
122 lines = input.readlines()
127 while i < len(lines):
131 match = re_filename.match(lines[i])
134 match = re_ertinput.match(lines[i])
136 match = re_input.match(lines[i])
138 match = re_package.match(lines[i])
141 match = re_class.match(lines[i])
145 match = re_ertnorecur.match(lines[i])
147 match = re_norecur.match(lines[i])
148 extlist = ['', '.eps', '.pdf', '.png', '.jpg']
150 maybe_in_ert = is_lyxfile and lines[i] == "\\backslash"
152 file = match.group(4).strip('"')
153 if not os.path.isabs(file):
154 file = os.path.join(curdir, file)
156 if not os.path.isdir(unicode(file, 'utf-8')):
158 if os.path.exists(unicode(file + ext, 'utf-8')):
162 if file_exists and not abspath(file) in incfiles:
163 incfiles.append(abspath(file))
165 gather_files(file, incfiles, lyx2lyx)
173 # Gather bibtex *.bst files.
174 match = re_options.match(lines[i])
176 file = match.group(3).strip('"')
177 if file.startswith("bibtotoc,"):
179 if not os.path.isabs(file):
180 file = os.path.join(curdir, file + '.bst')
181 if os.path.exists(unicode(file, 'utf-8')):
182 incfiles.append(abspath(file))
186 # Gather bibtex *.bib files.
187 match = re_bibfiles.match(lines[i])
189 bibfiles = match.group(3).strip('"').split(',')
191 while j < len(bibfiles):
192 if os.path.isabs(bibfiles[j]):
193 file = bibfiles[j] + '.bib'
195 file = os.path.join(curdir, bibfiles[j] + '.bib')
196 if os.path.exists(unicode(file, 'utf-8')):
197 incfiles.append(abspath(file))
207 def find_lyx2lyx(progloc, path):
208 " Find a usable version of the lyx2lyx script. "
209 # first we will see if the script is roughly where we are
210 # i.e., we will assume we are in $SOMEDIR/scripts and look
211 # for $SOMEDIR/lyx2lyx/lyx2lyx.
212 ourpath = os.path.dirname(abspath(progloc))
213 (upone, discard) = os.path.split(ourpath)
214 tryit = os.path.join(upone, "lyx2lyx", "lyx2lyx")
215 if os.access(tryit, os.X_OK):
218 # now we will try to query LyX itself to find the path.
220 if "PATHEXT" in os.environ:
221 extlist = extlist + os.environ["PATHEXT"].split(os.pathsep)
222 lyx_exe, full_path = find_exe(["lyxc", "lyx"], extlist, path)
224 error('Cannot find the LyX executable in the path.')
225 cmd_status, cmd_stdout = run_cmd("%s -version 2>&1" % lyx_exe)
226 if cmd_status != None:
227 error('Cannot query LyX about the lyx2lyx script.')
228 re_msvc = re.compile(r'^(\s*)(Host type:)(\s+)(win32)$')
229 re_sysdir = re.compile(r'^(\s*)(LyX files dir:)(\s+)(\S+)$')
230 lines = cmd_stdout.splitlines()
232 match = re_msvc.match(line)
234 # The LyX executable was built with MSVC, so the
235 # "LyX files dir:" line is unusable
236 basedir = os.path.dirname(os.path.dirname(full_path))
237 tryit = os.path.join(basedir, 'Resources', 'lyx2lyx', 'lyx2lyx')
239 match = re_sysdir.match(line)
241 tryit = os.path.join(match.group(4), 'lyx2lyx', 'lyx2lyx')
244 if not os.access(tryit, os.X_OK):
245 error('Unable to find the lyx2lyx script.')
254 (options, argv) = getopt(args[1:], "htzl:o:")
256 error(usage(ourprog))
258 # we expect the filename to be left
260 error(usage(ourprog))
262 makezip = running_on_windows
266 for (opt, param) in options:
278 if not os.path.isdir(unicode(outdir, 'utf-8')):
279 error('Error: "%s" is not a directory.' % outdir)
282 if not running_on_windows:
283 lyxfile = unicode(lyxfile, sys.getfilesystemencoding()).encode('utf-8')
284 if not os.path.exists(unicode(lyxfile, 'utf-8')):
285 error('File "%s" not found.' % lyxfile)
287 # Check that it actually is a LyX document
288 input = gzopen(lyxfile, 'rU')
289 line = input.readline()
291 if not (line and line.startswith('#LyX')):
292 error('File "%s" is not a LyX document.' % lyxfile)
303 ar_name = re_lyxfile.sub(ar_ext, abspath(lyxfile))
305 ar_name = os.path.join(abspath(outdir), os.path.basename(ar_name))
307 path = string.split(os.environ["PATH"], os.pathsep)
310 lyx2lyx = find_lyx2lyx(ourprog, path)
312 # Initialize the list with the specified LyX file and recursively
313 # gather all required files (also from child documents).
314 incfiles = [abspath(lyxfile)]
315 gather_files(lyxfile, incfiles, lyx2lyx)
317 # Find the topmost dir common to all files
318 if len(incfiles) > 1:
319 topdir = os.path.commonprefix(incfiles)
320 # As os.path.commonprefix() works on a character by character basis,
321 # rather than on path elements, we need to remove any trailing bytes.
322 topdir = topdir.rpartition(os.path.sep)[0] + os.path.sep
324 topdir = os.path.dirname(incfiles[0]) + os.path.sep
326 # Remove the prefix common to all paths in the list
328 while i < len(incfiles):
329 incfiles[i] = string.replace(incfiles[i], topdir, '', 1)
332 # Remove duplicates and sort the list
333 incfiles = list(set(incfiles))
337 os.chdir(unicode(topdir, 'utf-8'))
342 zip = zipfile.ZipFile(ar_name, "w", zipfile.ZIP_DEFLATED)
343 for file in incfiles:
344 zip.write(file.decode('utf-8'), unicode(file, 'utf-8'))
347 tar = tarfile.open(ar_name, "w:gz")
348 for file in incfiles:
352 error('Failed to create LyX archive "%s"' % ar_name)
354 print 'LyX archive "%s" created successfully.' % ar_name
358 if __name__ == "__main__":
359 if running_on_windows:
360 # This works around <http://bugs.python.org/issue2128> for Python 2.
361 # All arguments are retrieved in unicode format and converted to utf-8.
362 # In this way, when launched from the command line, lyxpak.py can deal
363 # with any non-ascii names. Unfortunately, this is not the case when
364 # launched by LyX, because LyX converts the arguments of the converters
365 # to the filesystem encoding. On Windows this corresponds to the current
366 # code page and not to the UTF-16 encoding used by NTFS, such that they
367 # are transliterated if not exactly encodable. As an example, α may
368 # become a, β may become ß, and so on. However, this is a problem only
369 # if the full path of the LyX document contains an unencodable character
370 # as all other paths are extracted from the document in utf-8 format.
371 from ctypes import WINFUNCTYPE, windll, POINTER, byref, c_int
372 from ctypes.wintypes import LPWSTR, LPCWSTR
373 GetCommandLineW = WINFUNCTYPE(LPWSTR)(("GetCommandLineW", windll.kernel32))
374 CommandLineToArgvW = WINFUNCTYPE(POINTER(LPWSTR), LPCWSTR, POINTER(c_int))(("CommandLineToArgvW", windll.shell32))
376 argv_unicode = CommandLineToArgvW(GetCommandLineW(), byref(argc))
377 # unicode_argv[0] is the Python interpreter, so skip that.
378 argv = [argv_unicode[i].encode('utf-8') for i in xrange(1, argc.value)]
379 # Also skip option arguments to the Python interpreter.
381 if not argv[0].startswith("-"):