X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=src%2FFormat.cpp;h=7128e2a7744692f739ee3759726cbcc4cbadb3df;hb=26ba2a65838731ce639a09539f617cb0f0be3b22;hp=050af1414f69ae0058849c9f26e35af9c68ca961;hpb=c0bb9707cb1b9c0fa307f1a030b5a8657f2c21fc;p=lyx.git diff --git a/src/Format.cpp b/src/Format.cpp index 050af1414f..7128e2a774 100644 --- a/src/Format.cpp +++ b/src/Format.cpp @@ -14,21 +14,30 @@ #include "Buffer.h" #include "BufferParams.h" #include "LyXRC.h" +#include "OutputParams.h" #include "ServerSocket.h" #include "frontends/alert.h" //to be removed? #include "support/debug.h" +#include "support/docstream.h" #include "support/filetools.h" #include "support/gettext.h" #include "support/lstrings.h" +#include "support/lyxmagic.h" +#include "support/mutex.h" #include "support/os.h" +#include "support/PathChanger.h" #include "support/Systemcall.h" #include "support/textutils.h" +#include "support/Translator.h" #include +#include +#include +#include -// FIXME: Q_WS_MACX is not available, it's in Qt +// FIXME: Q_OS_MAC is not available, it's in Qt #ifdef USE_MACOSX_PACKAGING #include "support/linkback/LinkBackProxy.h" #endif @@ -47,50 +56,31 @@ string const token_from_format("$$i"); string const token_path_format("$$p"); string const token_socket_format("$$a"); - -class FormatNamesEqual : public unary_function { -public: - FormatNamesEqual(string const & name) - : name_(name) {} - bool operator()(Format const & f) const - { - return f.name() == name_; - } -private: - string name_; -}; +} // namespace -class FormatExtensionsEqual : public unary_function { -public: - FormatExtensionsEqual(string const & extension) - : extension_(extension) {} - bool operator()(Format const & f) const - { - return f.extension() == extension_; - } -private: - string extension_; -}; - -} //namespace anon +bool Format::formatSorter(Format const * lhs, Format const * rhs) +{ + return compare_locale(translateIfPossible(lhs->prettyname()), + translateIfPossible(rhs->prettyname())) < 0; +} bool operator<(Format const & a, Format const & b) { - // use the compare_ascii_no_case instead of compare_no_case, - // because in turkish, 'i' is not the lowercase version of 'I', - // and thus turkish locale breaks parsing of tags. - - return compare_ascii_no_case(a.prettyname(), b.prettyname()) < 0; + return compare_locale(translateIfPossible(a.prettyname()), + translateIfPossible(b.prettyname())) < 0; } -Format::Format(string const & n, string const & e, string const & p, +Format::Format(string const & n, string const & e, docstring const & p, string const & s, string const & v, string const & ed, - int flags) - : name_(n), extension_(e), prettyname_(p), shortcut_(s), viewer_(v), - editor_(ed), flags_(flags) -{} + string const & m, int flags) + : name_(n), prettyname_(p), shortcut_(s), viewer_(v), + editor_(ed), mime_(m), flags_(flags) +{ + extension_list_ = getVectorFromString(e, ","); + LYXERR(Debug::GRAPHICS, "New Format: n=" << n << ", flags=" << flags); +} bool Format::dummy() const @@ -99,6 +89,19 @@ bool Format::dummy() const } +string const Format::extensions() const +{ + return getStringFromVector(extension_list_, ", "); +} + + +bool Format::hasExtension(string const & ext) const +{ + return (find(extension_list_.begin(), extension_list_.end(), ext) + != extension_list_.end()); +} + + bool Format::isChildFormat() const { if (name_.empty()) @@ -113,38 +116,359 @@ string const Format::parentFormat() const } +void Format::setExtensions(string const & e) +{ + extension_list_ = getVectorFromString(e, ","); +} + + +namespace { + +std::function FormatNameIs(string const & name) +{ + return [name](Format const & f){ return f.name() == name; }; +} + +} + // This method should return a reference, and throw an exception // if the format named name cannot be found (Lgb) Format const * Formats::getFormat(string const & name) const { FormatList::const_iterator cit = - find_if(formatlist.begin(), formatlist.end(), - FormatNamesEqual(name)); - if (cit != formatlist.end()) + find_if(formatlist_.begin(), formatlist_.end(), + FormatNameIs(name)); + if (cit != formatlist_.end()) return &(*cit); else - return 0; + return nullptr; } +Format * Formats::getFormat(string const & name) +{ + FormatList::iterator it = + find_if(formatlist_.begin(), formatlist_.end(), + FormatNameIs(name)); + + if (it != formatlist_.end()) + return &(*it); + + return nullptr; +} + + +namespace { + +/** Guess the file format name (as in Format::name()) from contents. + * Normally you don't want to use this directly, but rather + * Formats::getFormatFromFile(). + */ +string guessFormatFromContents(FileName const & fn) +{ + // the different filetypes and what they contain in one of the first lines + // (dots are any characters). (Herbert 20020131) + // AGR Grace... + // BMP BM... + // EPS %!PS-Adobe-3.0 EPSF... + // FIG #FIG... + // FITS ...BITPIX... + // GIF GIF... + // JPG \377\330... (0xFFD8) + // PDF %PDF-... + // PNG .PNG... + // PBM P1... or P4 (B/W) + // PGM P2... or P5 (Grayscale) + // PPM P3... or P6 (color) + // PS %!PS-Adobe-2.0 or 1.0, no "EPSF"! + // SGI \001\332... (decimal 474) + // TGIF %TGIF... + // TIFF II... or MM... + // XBM ..._bits[]... + // XPM /* XPM */ sometimes missing (f.ex. tgif-export) + // ...static char *... + // XWD \000\000\000\151 (0x00006900) decimal 105 + // + // GZIP \037\213 http://www.ietf.org/rfc/rfc1952.txt + // ZIP PK... http://www.halyava.ru/document/ind_arch.htm + // Z \037\235 UNIX compress + + // paranoia check + if (fn.empty() || !fn.isReadableFile()) + return string(); + + ifstream ifs(fn.toFilesystemEncoding().c_str()); + if (!ifs) + // Couldn't open file... + return string(); + + // gnuzip + static string const gzipStamp = "\037\213"; + + // PKZIP + static string const zipStamp = "PK"; + + // ZIP containers (koffice, openoffice.org etc). + static string const nonzipStamp = "\010\0\0\0mimetypeapplication/"; + + // compress + static string const compressStamp = "\037\235"; + + // DOS binary EPS according to Adobe TN-5002 + static string const binEPSStamp = "\xC5\xD0\xD3\xC6"; + + + // Maximum strings to read + int const max_count = 50; + int count = 0; + + string str; + string format; + bool firstLine = true; + bool backslash = false; + bool maybelatex = false; + int dollars = 0; + while ((count++ < max_count) && format.empty() && !maybelatex) { + if (ifs.eof()) + break; + + getline(ifs, str); + string const stamp = str.substr(0, 2); + if (firstLine && str.size() >= 2) { + // at first we check for a zipped file, because this + // information is saved in the first bytes of the file! + // also some graphic formats which save the information + // in the first line, too. + if (prefixIs(str, gzipStamp)) { + format = "gzip"; + + } else if (stamp == zipStamp && + !contains(str, nonzipStamp)) { + format = "zip"; + + } else if (stamp == compressStamp) { + format = "compress"; + + // the graphics part + } else if (stamp == "BM") { + format = "bmp"; + + } else if (stamp == "\377\330") { + format = "jpg"; + + } else if (prefixIs(str, "\x89PNG")) { + format = "png"; + + } else if (stamp == "\001\332") { + format = "sgi"; + + } else if (prefixIs(str, binEPSStamp)) { + format = "eps"; + + // PBM family + // Don't need to use str.at(0), str.at(1) because + // we already know that str.size() >= 2 + } else if (str[0] == 'P') { + switch (str[1]) { + case '1': + case '4': + format = "pbm"; + break; + case '2': + case '5': + format = "pgm"; + break; + case '3': + case '6': + format = "ppm"; + } + break; + + } else if ((stamp == "II") || (stamp == "MM")) { + format = "tiff"; + + } else if (prefixIs(str,"%TGIF")) { + format = "tgif"; + + } else if (prefixIs(str,"#FIG")) { + format = "fig"; + + } else if (prefixIs(str,"GIF")) { + format = "gif"; + + } else if (str.size() > 3) { + int const c = ((str[0] << 24) & (str[1] << 16) & + (str[2] << 8) & str[3]); + if (c == 105) { + format = "xwd"; + } + } + + firstLine = false; + } + + if (!format.empty()) + break; + else if (contains(str,"EPSF")) + // dummy, if we have wrong file description like + // %!PS-Adobe-2.0EPSF" + format = "eps"; + + else if (contains(str, "Grace")) + format = "agr"; + + else if (contains(str, "%PDF")) + // autodetect pdf format for graphics inclusion + format = "pdf6"; + + else if (contains(str, " EMF")) + format = "emf"; + + else if (contains(str, "%!PS-Adobe")) { + // eps or ps + ifs >> str; + if (contains(str,"EPSF")) + format = "eps"; + else + format = "ps"; + } + + else if (contains(str, "_bits[]")) + format = "xbm"; + + else if (contains(str, "XPM") || contains(str, "static char *")) + format = "xpm"; + + else if (contains(str, "BITPIX")) + format = "fits"; + + else if (contains(str, "\\documentclass") || + contains(str, "\\chapter") || + contains(str, "\\section") || + contains(str, "\\begin") || + contains(str, "\\end") || + contains(str, "$$") || + contains(str, "\\[") || + contains(str, "\\]")) + maybelatex = true; + else { + if (contains(str, '\\')) + backslash = true; + dollars += count_char(str, '$'); + if (backslash && dollars > 1) + // inline equation + maybelatex = true; + } + } + + if (format.empty() && maybelatex && !isBinaryFile(fn)) + format = "latex"; + + if (format.empty()) { + if (ifs.eof()) + LYXERR(Debug::GRAPHICS, "filetools(getFormatFromContents)\n" + "\tFile type not recognised before EOF!"); + } else { + LYXERR(Debug::GRAPHICS, "Recognised Fileformat: " << format); + return format; + } + + LYXERR(Debug::GRAPHICS, "filetools(getFormatFromContents)\n" + << "\tCouldn't find a known format!"); + return string(); +} + +} // namespace + + string Formats::getFormatFromFile(FileName const & filename) const { if (filename.empty()) return string(); - string const format = filename.guessFormatFromContents(); - if (!format.empty()) - return format; + string psformat; + string format; + if (filename.exists()) { + // one instance of Magic that will be reused for next calls + // This avoids to read the magic file everytime + // If libmagic is not available, Magic::file returns an empty string. + static Magic magic; + string const result = magic.file(filename.toFilesystemEncoding()); + string const mime = token(result, ';', 0); + // our own detection is better for binary files (can be anything) + // and different plain text formats + if (!mime.empty() && mime != "application/octet-stream" && + mime != "text/plain") { + Formats::const_iterator cit = + find_if(formatlist_.begin(), formatlist_.end(), + [mime](Format const & f){ return f.mime() == mime; }); + if (cit != formatlist_.end()) { + LYXERR(Debug::GRAPHICS, "\tgot format from MIME type: " + << mime << " -> " << cit->name()); + // See special eps/ps handling below + if (mime == "application/postscript") + psformat = cit->name(); + else + format = cit->name(); + } + } + + // libmagic recognizes as latex also some formats of ours + // such as pstex and pdftex. Therefore we have to perform + // additional checks in this case (bug 9244). + if (!format.empty() && format != "latex") + return format; + } - // try to find a format from the file extension. string const ext = getExtension(filename.absFileName()); + if (format.empty()) { + // libmagic does not distinguish eps and ps. + // Therefore we need to use our own detection here, but only if it + // recognizes either ps or eps. Otherwise the libmagic guess will + // be better (bug 9146). + format = guessFormatFromContents(filename); + if (!psformat.empty()) { + if (isPostScriptFileFormat(format)) + return format; + else + return psformat; + } + + if (isZippedFileFormat(format) && !ext.empty()) { + string const & fmt_name = getFormatFromExtension(ext); + if (!fmt_name.empty()) { + Format const * p_format = getFormat(fmt_name); + if (p_format && p_format->zippedNative()) + return p_format->name(); + } + } + // Don't simply return latex (bug 9244). + if (!format.empty() && format != "latex") + return format; + } + + // Both libmagic and our guessing from contents may return as latex + // also lyx files and our pstex and pdftex formats. In this case we + // give precedence to the format determined by the extension. + if (format == "latex") { + format = getFormatFromExtension(ext); + return format.empty() ? "latex" : format; + } + + // try to find a format from the file extension. + return getFormatFromExtension(ext); +} + + +string Formats::getFormatFromExtension(string const & ext) const +{ if (!ext.empty()) { // this is ambigous if two formats have the same extension, // but better than nothing Formats::const_iterator cit = - find_if(formatlist.begin(), formatlist.end(), - FormatExtensionsEqual(ext)); - if (cit != formats.end()) { + find_if(formatlist_.begin(), formatlist_.end(), + [ext](Format const & f){ return f.hasExtension(ext); }); + if (cit != formatlist_.end()) { LYXERR(Debug::GRAPHICS, "\twill guess format from file extension: " << ext << " -> " << cit->name()); return cit->name(); @@ -154,6 +478,49 @@ string Formats::getFormatFromFile(FileName const & filename) const } +/// Used to store last timestamp of file and whether it is (was) zipped +struct ZippedInfo { + bool zipped; + std::time_t timestamp; + ZippedInfo(bool zipped, std::time_t timestamp) + : zipped(zipped), timestamp(timestamp) { } +}; + + +/// Mapping absolute pathnames of files to their ZippedInfo metadata. +static std::map zipped_; +static Mutex zipped_mutex; + + +bool Formats::isZippedFile(support::FileName const & filename) const { + string const & fname = filename.absFileName(); + time_t timestamp = filename.lastModified(); + Mutex::Locker lock(&zipped_mutex); + map::iterator it = zipped_.find(fname); + if (it != zipped_.end() && it->second.timestamp == timestamp) + return it->second.zipped; + // FIXME perf: This very expensive function is called on startup on each + // file whic is going to be parsed, and also on svgz icons. Maybe there is a + // quicker way to check whether a file is zipped? I.e. for icons we + // probably just need to check the extension (svgz vs svg). + string const & format = getFormatFromFile(filename); + bool zipped = (format == "gzip" || format == "zip"); + zipped_.insert(make_pair(fname, ZippedInfo(zipped, timestamp))); + return zipped; +} + + +bool Formats::isZippedFileFormat(string const & format) +{ + return contains("gzip zip compress", format) && !format.empty(); +} + + +bool Formats::isPostScriptFileFormat(string const & format) +{ + return format == "ps" || format == "eps"; +} + static string fixCommand(string const & cmd, string const & ext, os::auto_open_mode mode) { @@ -176,8 +543,8 @@ static string fixCommand(string const & cmd, string const & ext, void Formats::setAutoOpen() { - FormatList::iterator fit = formatlist.begin(); - FormatList::iterator const fend = formatlist.end(); + FormatList::iterator fit = formatlist_.begin(); + FormatList::iterator const fend = formatlist_.end(); for ( ; fit != fend ; ++fit) { fit->setViewer(fixCommand(fit->viewer(), fit->extension(), os::VIEW)); fit->setEditor(fixCommand(fit->editor(), fit->extension(), os::EDIT)); @@ -188,75 +555,73 @@ void Formats::setAutoOpen() int Formats::getNumber(string const & name) const { FormatList::const_iterator cit = - find_if(formatlist.begin(), formatlist.end(), - FormatNamesEqual(name)); - if (cit != formatlist.end()) - return distance(formatlist.begin(), cit); - else + find_if(formatlist_.begin(), formatlist_.end(), + FormatNameIs(name)); + if (cit == formatlist_.end()) return -1; + + return distance(formatlist_.begin(), cit); } void Formats::add(string const & name) { if (!getFormat(name)) - add(name, name, name, string(), string(), string(), - Format::document); + add(name, name, from_utf8(name), string(), string(), string(), + string(), Format::document); } -void Formats::add(string const & name, string const & extension, - string const & prettyname, string const & shortcut, +void Formats::add(string const & name, string const & extensions, + docstring const & prettyname, string const & shortcut, string const & viewer, string const & editor, - int flags) + string const & mime, int flags) { - FormatList::iterator it = - find_if(formatlist.begin(), formatlist.end(), - FormatNamesEqual(name)); - if (it == formatlist.end()) - formatlist.push_back(Format(name, extension, prettyname, - shortcut, viewer, editor, flags)); + Format * format = getFormat(name); + if (format) + *format = Format(name, extensions, prettyname, shortcut, viewer, + editor, mime, flags); else - *it = Format(name, extension, prettyname, shortcut, viewer, - editor, flags); + formatlist_.push_back(Format(name, extensions, prettyname, + shortcut, viewer, editor, mime, flags)); } void Formats::erase(string const & name) { FormatList::iterator it = - find_if(formatlist.begin(), formatlist.end(), - FormatNamesEqual(name)); - if (it != formatlist.end()) - formatlist.erase(it); + find_if(formatlist_.begin(), formatlist_.end(), + FormatNameIs(name)); + if (it != formatlist_.end()) + formatlist_.erase(it); } void Formats::sort() { - std::sort(formatlist.begin(), formatlist.end()); + std::sort(formatlist_.begin(), formatlist_.end()); } void Formats::setViewer(string const & name, string const & command) { add(name); - FormatList::iterator it = - find_if(formatlist.begin(), formatlist.end(), - FormatNamesEqual(name)); - if (it != formatlist.end()) - it->setViewer(command); + Format * format = getFormat(name); + if (format) + format->setViewer(command); + else + LYXERR0("Unable to set viewer for non-existent format: " << name); } void Formats::setEditor(string const & name, string const & command) { add(name); - FormatList::iterator it = - find_if(formatlist.begin(), formatlist.end(), - FormatNamesEqual(name)); - if (it != formatlist.end()) - it->setEditor(command); + Format * format = getFormat(name); + if (format) + format->setEditor(command); + else + LYXERR0("Unable to set editor for non-existent format: " << name); } @@ -279,12 +644,12 @@ bool Formats::view(Buffer const & buffer, FileName const & filename, // by the caller (this should be "utility" code) Alert::error(_("Cannot view file"), bformat(_("No information for viewing %1$s"), - prettyName(format_name))); + translateIfPossible(prettyName(format_name)))); return false; } // viewer is 'auto' if (format->viewer() == "auto") { - if (os::autoOpenFile(filename.absFileName(), os::VIEW)) + if (os::autoOpenFile(filename.absFileName(), os::VIEW, buffer.filePath())) return true; else { Alert::error(_("Cannot view file"), @@ -294,7 +659,29 @@ bool Formats::view(Buffer const & buffer, FileName const & filename, } } - string command = libScriptSearch(format->viewer()); + string command = format->viewer(); + + // Escape backslashes if not already in double or single quotes. + // We cannot simply quote the whole command as there may be arguments. + if (contains(command, '\\')) { + bool inquote1 = false; + bool inquote2 = false; + string::iterator cit = command.begin(); + for (; cit != command.end(); ++cit) { + switch (*cit) { + case '"': + inquote1 = !inquote1; + break; + case '\'': + inquote2 = !inquote2; + break; + case '\\': + if (!inquote1 && !inquote2) + cit = ++command.insert(cit, '\\'); + break; + } + } + } if (format_name == "dvi" && !lyxrc.view_dvi_paper_option.empty()) { @@ -311,15 +698,19 @@ bool Formats::view(Buffer const & buffer, FileName const & filename, if (!contains(command, token_from_format)) command += ' ' + token_from_format; - command = subst(command, token_from_format, quoteName(filename.toFilesystemEncoding())); - command = subst(command, token_path_format, quoteName(onlyPath(filename.toFilesystemEncoding()))); + command = subst(command, token_from_format, + quoteName(onlyFileName(filename.toFilesystemEncoding()), quote_shell_filename)); + command = subst(command, token_path_format, + quoteName(onlyPath(filename.toFilesystemEncoding()), quote_shell_filename)); command = subst(command, token_socket_format, quoteName(theServerSocket().address())); LYXERR(Debug::FILES, "Executing command: " << command); // FIXME UNICODE utf8 can be wrong for files buffer.message(_("Executing command: ") + from_utf8(command)); + PathChanger p(filename.onlyPath()); Systemcall one; - one.startscript(Systemcall::DontWait, command); + one.startscript(Systemcall::DontWait, command, + buffer.filePath(), buffer.layoutPos()); // we can't report any sort of error, since we aren't waiting return true; @@ -329,16 +720,15 @@ bool Formats::view(Buffer const & buffer, FileName const & filename, bool Formats::edit(Buffer const & buffer, FileName const & filename, string const & format_name) const { - if (filename.empty() || !filename.exists()) { - Alert::error(_("Cannot edit file"), - bformat(_("File does not exist: %1$s"), - from_utf8(filename.absFileName()))); + if (filename.empty()) { + Alert::error(_("No Filename"), + _("No filename was provided!")); return false; } // LinkBack files look like PDF, but have the .linkback extension string const ext = getExtension(filename.absFileName()); - if (format_name == "pdf" && ext == "linkback") { + if (format_name == "pdf6" && ext == "linkback") { #ifdef USE_MACOSX_PACKAGING return editLinkBackFile(filename.absFileName().c_str()); #else @@ -357,13 +747,13 @@ bool Formats::edit(Buffer const & buffer, FileName const & filename, // be done by the caller (this should be "utility" code) Alert::error(_("Cannot edit file"), bformat(_("No information for editing %1$s"), - prettyName(format_name))); + translateIfPossible(prettyName(format_name)))); return false; } - + // editor is 'auto' if (format->editor() == "auto") { - if (os::autoOpenFile(filename.absFileName(), os::EDIT)) + if (os::autoOpenFile(filename.absFileName(), os::EDIT, buffer.filePath())) return true; else { Alert::error(_("Cannot edit file"), @@ -378,15 +768,18 @@ bool Formats::edit(Buffer const & buffer, FileName const & filename, if (!contains(command, token_from_format)) command += ' ' + token_from_format; - command = subst(command, token_from_format, quoteName(filename.toFilesystemEncoding())); - command = subst(command, token_path_format, quoteName(onlyPath(filename.toFilesystemEncoding()))); + command = subst(command, token_from_format, + quoteName(filename.toFilesystemEncoding(), quote_shell_filename)); + command = subst(command, token_path_format, + quoteName(onlyPath(filename.toFilesystemEncoding()), quote_shell_filename)); command = subst(command, token_socket_format, quoteName(theServerSocket().address())); LYXERR(Debug::FILES, "Executing command: " << command); // FIXME UNICODE utf8 can be wrong for files buffer.message(_("Executing command: ") + from_utf8(command)); Systemcall one; - one.startscript(Systemcall::DontWait, command); + one.startscript(Systemcall::DontWait, command, + buffer.filePath(), buffer.layoutPos()); // we can't report any sort of error, since we aren't waiting return true; @@ -397,7 +790,7 @@ docstring const Formats::prettyName(string const & name) const { Format const * format = getFormat(name); if (format) - return from_utf8(format->prettyname()); + return format->prettyname(); else return from_utf8(name); } @@ -413,11 +806,55 @@ string const Formats::extension(string const & name) const } +string const Formats::extensions(string const & name) const +{ + Format const * format = getFormat(name); + if (format) + return format->extensions(); + else + return name; +} -Formats formats; +namespace { + +typedef Translator FlavorTranslator; + + +FlavorTranslator initFlavorTranslator() +{ + FlavorTranslator f(Flavor::LaTeX, "latex"); + f.addPair(Flavor::DviLuaTeX, "dviluatex"); + f.addPair(Flavor::LuaTeX, "luatex"); + f.addPair(Flavor::PdfLaTeX, "pdflatex"); + f.addPair(Flavor::XeTeX, "xetex"); + f.addPair(Flavor::DocBook5, "docbook-xml"); + f.addPair(Flavor::Html, "xhtml"); + f.addPair(Flavor::Text, "text"); + f.addPair(Flavor::LyX, "lyx"); + return f; +} + + +FlavorTranslator const & flavorTranslator() +{ + static FlavorTranslator const translator = initFlavorTranslator(); + return translator; +} + +} // namespace + -Formats system_formats; +std::string flavor2format(Flavor flavor) +{ + return flavorTranslator().find(flavor); +} +/* Not currently needed, but I'll leave the code in case it is. +Flavor format2flavor(std::string fmt) +{ + return flavorTranslator().find(fmt); +} */ + } // namespace lyx