X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=src%2FFormat.cpp;h=4a33eb07c49e97c80ef4548979b00029500c9dc4;hb=28be7d552f62cc02fa86d7f79201d089bfb2d7b5;hp=189754370b95f242eb05a4853505c6be8b9b2bcf;hpb=b31f0cfc4dc481e15df07d7ab12249c539333910;p=lyx.git diff --git a/src/Format.cpp b/src/Format.cpp index 189754370b..4a33eb07c4 100644 --- a/src/Format.cpp +++ b/src/Format.cpp @@ -22,8 +22,10 @@ #include "support/filetools.h" #include "support/gettext.h" #include "support/lstrings.h" +#include "support/mutex.h" +#include "support/docstream.h" #include "support/os.h" -#include "support/Path.h" +#include "support/PathChanger.h" #include "support/Systemcall.h" #include "support/textutils.h" #include "support/Translator.h" @@ -32,11 +34,15 @@ #include #include -// FIXME: Q_WS_MACX is not available, it's in Qt +// FIXME: Q_OS_MAC is not available, it's in Qt #ifdef USE_MACOSX_PACKAGING #include "support/linkback/LinkBackProxy.h" #endif +#ifdef HAVE_MAGIC_H +#include +#endif + using namespace std; using namespace lyx::support; @@ -55,7 +61,8 @@ string const token_socket_format("$$a"); class FormatNamesEqual : public unary_function { public: FormatNamesEqual(string const & name) - : name_(name) {} + : name_(name) + {} bool operator()(Format const & f) const { return f.name() == name_; @@ -68,7 +75,8 @@ private: class FormatExtensionsEqual : public unary_function { public: FormatExtensionsEqual(string const & extension) - : extension_(extension) {} + : extension_(extension) + {} bool operator()(Format const & f) const { return f.hasExtension(extension_); @@ -77,24 +85,43 @@ private: string extension_; }; + +class FormatMimeEqual : public unary_function { +public: + FormatMimeEqual(string const & mime) + : mime_(mime) + {} + bool operator()(Format const & f) const + { + // The test for empty mime strings is needed since we allow + // formats with empty mime types. + return f.mime() == mime_ && !mime_.empty(); + } +private: + string mime_; +}; + + } //namespace anon +bool Format::formatSorter(Format const * lhs, Format const * rhs) +{ + return compare_locale(translateIfPossible(lhs->prettyname()), + translateIfPossible(rhs->prettyname())) < 0; +} bool operator<(Format const & a, Format const & b) { - // use the compare_ascii_no_case instead of compare_no_case, - // because in turkish, 'i' is not the lowercase version of 'I', - // and thus turkish locale breaks parsing of tags. - - return compare_ascii_no_case(a.prettyname(), b.prettyname()) < 0; + return compare_locale(translateIfPossible(a.prettyname()), + translateIfPossible(b.prettyname())) < 0; } -Format::Format(string const & n, string const & e, string const & p, +Format::Format(string const & n, string const & e, docstring const & p, string const & s, string const & v, string const & ed, - int flags) + string const & m, int flags) : name_(n), prettyname_(p), shortcut_(s), viewer_(v), - editor_(ed), flags_(flags) + editor_(ed), mime_(m), flags_(flags) { extension_list_ = getVectorFromString(e, ","); LYXERR(Debug::GRAPHICS, "New Format: n=" << n << ", flags=" << flags); @@ -145,33 +172,322 @@ void Format::setExtensions(string const & e) Format const * Formats::getFormat(string const & name) const { FormatList::const_iterator cit = - find_if(formatlist.begin(), formatlist.end(), + find_if(formatlist_.begin(), formatlist_.end(), FormatNamesEqual(name)); - if (cit != formatlist.end()) + if (cit != formatlist_.end()) return &(*cit); else return 0; } +namespace { + +/** Guess the file format name (as in Format::name()) from contents. + * Normally you don't want to use this directly, but rather + * Formats::getFormatFromFile(). + */ +string guessFormatFromContents(FileName const & fn) +{ + // the different filetypes and what they contain in one of the first lines + // (dots are any characters). (Herbert 20020131) + // AGR Grace... + // BMP BM... + // EPS %!PS-Adobe-3.0 EPSF... + // FIG #FIG... + // FITS ...BITPIX... + // GIF GIF... + // JPG \377\330... (0xFFD8) + // PDF %PDF-... + // PNG .PNG... + // PBM P1... or P4 (B/W) + // PGM P2... or P5 (Grayscale) + // PPM P3... or P6 (color) + // PS %!PS-Adobe-2.0 or 1.0, no "EPSF"! + // SGI \001\332... (decimal 474) + // TGIF %TGIF... + // TIFF II... or MM... + // XBM ..._bits[]... + // XPM /* XPM */ sometimes missing (f.ex. tgif-export) + // ...static char *... + // XWD \000\000\000\151 (0x00006900) decimal 105 + // + // GZIP \037\213 http://www.ietf.org/rfc/rfc1952.txt + // ZIP PK... http://www.halyava.ru/document/ind_arch.htm + // Z \037\235 UNIX compress + + // paranoia check + if (fn.empty() || !fn.isReadableFile()) + return string(); + + ifstream ifs(fn.toFilesystemEncoding().c_str()); + if (!ifs) + // Couldn't open file... + return string(); + + // gnuzip + static string const gzipStamp = "\037\213"; + + // PKZIP + static string const zipStamp = "PK"; + + // ZIP containers (koffice, openoffice.org etc). + static string const nonzipStamp = "\008\0\0\0mimetypeapplication/"; + + // compress + static string const compressStamp = "\037\235"; + + // DOS binary EPS according to Adobe TN-5002 + static string const binEPSStamp = "\xC5\xD0\xD3\xC6"; + + + // Maximum strings to read + int const max_count = 50; + int count = 0; + + string str; + string format; + bool firstLine = true; + bool backslash = false; + bool maybelatex = false; + int dollars = 0; + while ((count++ < max_count) && format.empty() && !maybelatex) { + if (ifs.eof()) + break; + + getline(ifs, str); + string const stamp = str.substr(0, 2); + if (firstLine && str.size() >= 2) { + // at first we check for a zipped file, because this + // information is saved in the first bytes of the file! + // also some graphic formats which save the information + // in the first line, too. + if (prefixIs(str, gzipStamp)) { + format = "gzip"; + + } else if (stamp == zipStamp && + !contains(str, nonzipStamp)) { + format = "zip"; + + } else if (stamp == compressStamp) { + format = "compress"; + + // the graphics part + } else if (stamp == "BM") { + format = "bmp"; + + } else if (stamp == "\377\330") { + format = "jpg"; + + } else if (stamp == "\001\332") { + format = "sgi"; + } else if (prefixIs(str, binEPSStamp)) { + format = "eps"; + + // PBM family + // Don't need to use str.at(0), str.at(1) because + // we already know that str.size() >= 2 + } else if (str[0] == 'P') { + switch (str[1]) { + case '1': + case '4': + format = "pbm"; + break; + case '2': + case '5': + format = "pgm"; + break; + case '3': + case '6': + format = "ppm"; + } + break; + + } else if ((stamp == "II") || (stamp == "MM")) { + format = "tiff"; + + } else if (prefixIs(str,"%TGIF")) { + format = "tgif"; + + } else if (prefixIs(str,"#FIG")) { + format = "fig"; + + } else if (prefixIs(str,"GIF")) { + format = "gif"; + + } else if (str.size() > 3) { + int const c = ((str[0] << 24) & (str[1] << 16) & + (str[2] << 8) & str[3]); + if (c == 105) { + format = "xwd"; + } + } + + firstLine = false; + } + + if (!format.empty()) + break; + else if (contains(str,"EPSF")) + // dummy, if we have wrong file description like + // %!PS-Adobe-2.0EPSF" + format = "eps"; + + else if (contains(str, "Grace")) + format = "agr"; + + else if (contains(str, "%PDF")) + // autodetect pdf format for graphics inclusion + format = "pdf6"; + + else if (contains(str, "PNG")) + format = "png"; + + else if (contains(str, "%!PS-Adobe")) { + // eps or ps + ifs >> str; + if (contains(str,"EPSF")) + format = "eps"; + else + format = "ps"; + } + + else if (contains(str, "_bits[]")) + format = "xbm"; + + else if (contains(str, "XPM") || contains(str, "static char *")) + format = "xpm"; + + else if (contains(str, "BITPIX")) + format = "fits"; + + else if (contains(str, "\\documentclass") || + contains(str, "\\chapter") || + contains(str, "\\section") || + contains(str, "\\begin") || + contains(str, "\\end") || + contains(str, "$$") || + contains(str, "\\[") || + contains(str, "\\]")) + maybelatex = true; + else { + if (contains(str, '\\')) + backslash = true; + dollars += count_char(str, '$'); + if (backslash && dollars > 1) + // inline equation + maybelatex = true; + } + } + + if (format.empty() && maybelatex && !isBinaryFile(fn)) + format = "latex"; + + if (format.empty()) { + if (ifs.eof()) + LYXERR(Debug::GRAPHICS, "filetools(getFormatFromContents)\n" + "\tFile type not recognised before EOF!"); + } else { + LYXERR(Debug::GRAPHICS, "Recognised Fileformat: " << format); + return format; + } + + LYXERR(Debug::GRAPHICS, "filetools(getFormatFromContents)\n" + << "\tCouldn't find a known format!"); + return string(); +} + +} + + string Formats::getFormatFromFile(FileName const & filename) const { if (filename.empty()) return string(); - string const format = filename.guessFormatFromContents(); + string psformat; + string format; +#ifdef HAVE_MAGIC_H + if (filename.exists()) { + magic_t magic_cookie = magic_open(MAGIC_MIME); + if (magic_cookie) { + if (magic_load(magic_cookie, NULL) != 0) { + LYXERR(Debug::GRAPHICS, "Formats::getFormatFromFile\n" + << "\tCouldn't load magic database - " + << magic_error(magic_cookie)); + } else { + char const * result = magic_file(magic_cookie, + filename.toFilesystemEncoding().c_str()); + string mime; + if (result) + mime = token(result, ';', 0); + else { + LYXERR(Debug::GRAPHICS, "Formats::getFormatFromFile\n" + << "\tCouldn't query magic database - " + << magic_error(magic_cookie)); + } + // our own detection is better for binary files (can be anything) + // and different plain text formats + if (!mime.empty() && mime != "application/octet-stream" && + mime != "text/plain") { + Formats::const_iterator cit = + find_if(formatlist_.begin(), formatlist_.end(), + FormatMimeEqual(mime)); + if (cit != formatlist_.end()) { + LYXERR(Debug::GRAPHICS, "\tgot format from MIME type: " + << mime << " -> " << cit->name()); + // See special eps/ps handling below + if (mime == "application/postscript") + psformat = cit->name(); + else + format = cit->name(); + } + } + } + magic_close(magic_cookie); + // libmagic recognizes as latex also some formats of ours + // such as pstex and pdftex. Therefore we have to perform + // additional checks in this case (bug 9244). + if (!format.empty() && format != "latex") + return format; + } + } +#endif + string const ext = getExtension(filename.absFileName()); - if ((format == "gzip" || format == "zip" || format == "compress") - && !ext.empty()) { - string const & fmt_name = formats.getFormatFromExtension(ext); - if (!fmt_name.empty()) { - Format const * p_format = formats.getFormat(fmt_name); - if (p_format && p_format->zippedNative()) - return p_format->name(); + if (format.empty()) { + // libmagic does not distinguish eps and ps. + // Therefore we need to use our own detection here, but only if it + // recognizes either ps or eps. Otherwise the libmagic guess will + // be better (bug 9146). + format = guessFormatFromContents(filename); + if (!psformat.empty()) { + if (isPostScriptFileFormat(format)) + return format; + else + return psformat; + } + + if (isZippedFileFormat(format) && !ext.empty()) { + string const & fmt_name = getFormatFromExtension(ext); + if (!fmt_name.empty()) { + Format const * p_format = getFormat(fmt_name); + if (p_format && p_format->zippedNative()) + return p_format->name(); + } } + // Don't simply return latex (bug 9244). + if (!format.empty() && format != "latex") + return format; + } + + // Both libmagic and our guessing from contents may return as latex + // also lyx files and our pstex and pdftex formats. In this case we + // give precedence to the format determined by the extension. + if (format == "latex") { + format = getFormatFromExtension(ext); + return format.empty() ? "latex" : format; } - if (!format.empty()) - return format; // try to find a format from the file extension. return getFormatFromExtension(ext); @@ -184,9 +500,9 @@ string Formats::getFormatFromExtension(string const & ext) const // this is ambigous if two formats have the same extension, // but better than nothing Formats::const_iterator cit = - find_if(formatlist.begin(), formatlist.end(), + find_if(formatlist_.begin(), formatlist_.end(), FormatExtensionsEqual(ext)); - if (cit != formats.end()) { + if (cit != formatlist_.end()) { LYXERR(Debug::GRAPHICS, "\twill guess format from file extension: " << ext << " -> " << cit->name()); return cit->name(); @@ -207,21 +523,38 @@ struct ZippedInfo { /// Mapping absolute pathnames of files to their ZippedInfo metadata. static std::map zipped_; +static Mutex zipped_mutex; bool Formats::isZippedFile(support::FileName const & filename) const { string const & fname = filename.absFileName(); time_t timestamp = filename.lastModified(); + Mutex::Locker lock(&zipped_mutex); map::iterator it = zipped_.find(fname); if (it != zipped_.end() && it->second.timestamp == timestamp) return it->second.zipped; + // FIXME perf: This very expensive function is called on startup on each + // file whic is going to be parsed, and also on svgz icons. Maybe there is a + // quicker way to check whether a file is zipped? I.e. for icons we + // probably just need to check the extension (svgz vs svg). string const & format = getFormatFromFile(filename); bool zipped = (format == "gzip" || format == "zip"); - zipped_.insert(pair(fname, ZippedInfo(zipped, timestamp))); + zipped_.insert(make_pair(fname, ZippedInfo(zipped, timestamp))); return zipped; } +bool Formats::isZippedFileFormat(string const & format) +{ + return contains("gzip zip compress", format) && !format.empty(); +} + + +bool Formats::isPostScriptFileFormat(string const & format) +{ + return format == "ps" || format == "eps"; +} + static string fixCommand(string const & cmd, string const & ext, os::auto_open_mode mode) { @@ -244,8 +577,8 @@ static string fixCommand(string const & cmd, string const & ext, void Formats::setAutoOpen() { - FormatList::iterator fit = formatlist.begin(); - FormatList::iterator const fend = formatlist.end(); + FormatList::iterator fit = formatlist_.begin(); + FormatList::iterator const fend = formatlist_.end(); for ( ; fit != fend ; ++fit) { fit->setViewer(fixCommand(fit->viewer(), fit->extension(), os::VIEW)); fit->setEditor(fixCommand(fit->editor(), fit->extension(), os::EDIT)); @@ -256,10 +589,10 @@ void Formats::setAutoOpen() int Formats::getNumber(string const & name) const { FormatList::const_iterator cit = - find_if(formatlist.begin(), formatlist.end(), + find_if(formatlist_.begin(), formatlist_.end(), FormatNamesEqual(name)); - if (cit != formatlist.end()) - return distance(formatlist.begin(), cit); + if (cit != formatlist_.end()) + return distance(formatlist_.begin(), cit); else return -1; } @@ -268,41 +601,41 @@ int Formats::getNumber(string const & name) const void Formats::add(string const & name) { if (!getFormat(name)) - add(name, name, name, string(), string(), string(), - Format::document); + add(name, name, from_utf8(name), string(), string(), string(), + string(), Format::document); } void Formats::add(string const & name, string const & extensions, - string const & prettyname, string const & shortcut, + docstring const & prettyname, string const & shortcut, string const & viewer, string const & editor, - int flags) + string const & mime, int flags) { FormatList::iterator it = - find_if(formatlist.begin(), formatlist.end(), + find_if(formatlist_.begin(), formatlist_.end(), FormatNamesEqual(name)); - if (it == formatlist.end()) - formatlist.push_back(Format(name, extensions, prettyname, - shortcut, viewer, editor, flags)); + if (it == formatlist_.end()) + formatlist_.push_back(Format(name, extensions, prettyname, + shortcut, viewer, editor, mime, flags)); else *it = Format(name, extensions, prettyname, shortcut, viewer, - editor, flags); + editor, mime, flags); } void Formats::erase(string const & name) { FormatList::iterator it = - find_if(formatlist.begin(), formatlist.end(), + find_if(formatlist_.begin(), formatlist_.end(), FormatNamesEqual(name)); - if (it != formatlist.end()) - formatlist.erase(it); + if (it != formatlist_.end()) + formatlist_.erase(it); } void Formats::sort() { - std::sort(formatlist.begin(), formatlist.end()); + std::sort(formatlist_.begin(), formatlist_.end()); } @@ -310,9 +643,9 @@ void Formats::setViewer(string const & name, string const & command) { add(name); FormatList::iterator it = - find_if(formatlist.begin(), formatlist.end(), + find_if(formatlist_.begin(), formatlist_.end(), FormatNamesEqual(name)); - if (it != formatlist.end()) + if (it != formatlist_.end()) it->setViewer(command); } @@ -321,9 +654,9 @@ void Formats::setEditor(string const & name, string const & command) { add(name); FormatList::iterator it = - find_if(formatlist.begin(), formatlist.end(), + find_if(formatlist_.begin(), formatlist_.end(), FormatNamesEqual(name)); - if (it != formatlist.end()) + if (it != formatlist_.end()) it->setEditor(command); } @@ -362,7 +695,7 @@ bool Formats::view(Buffer const & buffer, FileName const & filename, } } - string command = libScriptSearch(format->viewer()); + string command = format->viewer(); if (format_name == "dvi" && !lyxrc.view_dvi_paper_option.empty()) { @@ -379,8 +712,10 @@ bool Formats::view(Buffer const & buffer, FileName const & filename, if (!contains(command, token_from_format)) command += ' ' + token_from_format; - command = subst(command, token_from_format, quoteName(onlyFileName(filename.toFilesystemEncoding()))); - command = subst(command, token_path_format, quoteName(onlyPath(filename.toFilesystemEncoding()))); + command = subst(command, token_from_format, + quoteName(onlyFileName(filename.toFilesystemEncoding()), quote_shell_filename)); + command = subst(command, token_path_format, + quoteName(onlyPath(filename.toFilesystemEncoding()), quote_shell_filename)); command = subst(command, token_socket_format, quoteName(theServerSocket().address())); LYXERR(Debug::FILES, "Executing command: " << command); // FIXME UNICODE utf8 can be wrong for files @@ -388,7 +723,8 @@ bool Formats::view(Buffer const & buffer, FileName const & filename, PathChanger p(filename.onlyPath()); Systemcall one; - one.startscript(Systemcall::DontWait, command, buffer.filePath()); + one.startscript(Systemcall::DontWait, command, + buffer.filePath(), buffer.layoutPos()); // we can't report any sort of error, since we aren't waiting return true; @@ -407,7 +743,7 @@ bool Formats::edit(Buffer const & buffer, FileName const & filename, // LinkBack files look like PDF, but have the .linkback extension string const ext = getExtension(filename.absFileName()); - if (format_name == "pdf" && ext == "linkback") { + if (format_name == "pdf6" && ext == "linkback") { #ifdef USE_MACOSX_PACKAGING return editLinkBackFile(filename.absFileName().c_str()); #else @@ -447,15 +783,18 @@ bool Formats::edit(Buffer const & buffer, FileName const & filename, if (!contains(command, token_from_format)) command += ' ' + token_from_format; - command = subst(command, token_from_format, quoteName(filename.toFilesystemEncoding())); - command = subst(command, token_path_format, quoteName(onlyPath(filename.toFilesystemEncoding()))); + command = subst(command, token_from_format, + quoteName(filename.toFilesystemEncoding(), quote_shell_filename)); + command = subst(command, token_path_format, + quoteName(onlyPath(filename.toFilesystemEncoding()), quote_shell_filename)); command = subst(command, token_socket_format, quoteName(theServerSocket().address())); LYXERR(Debug::FILES, "Executing command: " << command); // FIXME UNICODE utf8 can be wrong for files buffer.message(_("Executing command: ") + from_utf8(command)); Systemcall one; - one.startscript(Systemcall::DontWait, command, buffer.filePath()); + one.startscript(Systemcall::DontWait, command, + buffer.filePath(), buffer.layoutPos()); // we can't report any sort of error, since we aren't waiting return true; @@ -466,7 +805,7 @@ docstring const Formats::prettyName(string const & name) const { Format const * format = getFormat(name); if (format) - return from_utf8(format->prettyname()); + return format->prettyname(); else return from_utf8(name); } @@ -493,8 +832,10 @@ string const Formats::extensions(string const & name) const namespace { + typedef Translator FlavorTranslator; + FlavorTranslator initFlavorTranslator() { FlavorTranslator f(OutputParams::LATEX, "latex"); @@ -505,15 +846,17 @@ FlavorTranslator initFlavorTranslator() f.addPair(OutputParams::XML, "docbook-xml"); f.addPair(OutputParams::HTML, "xhtml"); f.addPair(OutputParams::TEXT, "text"); + f.addPair(OutputParams::LYX, "lyx"); return f; } FlavorTranslator const & flavorTranslator() { - static FlavorTranslator translator = initFlavorTranslator(); + static FlavorTranslator const translator = initFlavorTranslator(); return translator; } + } @@ -529,9 +872,4 @@ OutputParams::FLAVOR format2flavor(std::string fmt) return flavorTranslator().find(fmt); } */ -Formats formats; - -Formats system_formats; - - } // namespace lyx