X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=src%2FFormat.cpp;h=a6727a2d8128a8c67e670912ad559b0d66b901f8;hb=b5722962fb0393299e02df0b146522770e98aef6;hp=cd2d238dc81ffe3c5cee127d0503cf3ccebf5bd0;hpb=cc5cc067a037d201f162e7ea71a184cf9bed7758;p=lyx.git diff --git a/src/Format.cpp b/src/Format.cpp index cd2d238dc8..a6727a2d81 100644 --- a/src/Format.cpp +++ b/src/Format.cpp @@ -19,18 +19,23 @@ #include "frontends/alert.h" //to be removed? #include "support/debug.h" +#include "support/docstream.h" #include "support/filetools.h" #include "support/gettext.h" #include "support/lstrings.h" +#include "support/lyxmagic.h" +#include "support/mutex.h" #include "support/os.h" -#include "support/Path.h" +#include "support/PathChanger.h" #include "support/Systemcall.h" #include "support/textutils.h" #include "support/Translator.h" #include +#include +#include -// FIXME: Q_WS_MACX is not available, it's in Qt +// FIXME: Q_OS_MAC is not available, it's in Qt #ifdef USE_MACOSX_PACKAGING #include "support/linkback/LinkBackProxy.h" #endif @@ -53,7 +58,8 @@ string const token_socket_format("$$a"); class FormatNamesEqual : public unary_function { public: FormatNamesEqual(string const & name) - : name_(name) {} + : name_(name) + {} bool operator()(Format const & f) const { return f.name() == name_; @@ -66,33 +72,57 @@ private: class FormatExtensionsEqual : public unary_function { public: FormatExtensionsEqual(string const & extension) - : extension_(extension) {} + : extension_(extension) + {} bool operator()(Format const & f) const { - return f.extension() == extension_; + return f.hasExtension(extension_); } private: string extension_; }; -} //namespace anon -bool operator<(Format const & a, Format const & b) +class FormatMimeEqual : public unary_function { +public: + FormatMimeEqual(string const & mime) + : mime_(mime) + {} + bool operator()(Format const & f) const + { + // The test for empty mime strings is needed since we allow + // formats with empty mime types. + return f.mime() == mime_ && !mime_.empty(); + } +private: + string mime_; +}; + + +} // namespace + +bool Format::formatSorter(Format const * lhs, Format const * rhs) { - // use the compare_ascii_no_case instead of compare_no_case, - // because in turkish, 'i' is not the lowercase version of 'I', - // and thus turkish locale breaks parsing of tags. + return compare_locale(translateIfPossible(lhs->prettyname()), + translateIfPossible(rhs->prettyname())) < 0; +} - return compare_ascii_no_case(a.prettyname(), b.prettyname()) < 0; +bool operator<(Format const & a, Format const & b) +{ + return compare_locale(translateIfPossible(a.prettyname()), + translateIfPossible(b.prettyname())) < 0; } -Format::Format(string const & n, string const & e, string const & p, +Format::Format(string const & n, string const & e, docstring const & p, string const & s, string const & v, string const & ed, - int flags) - : name_(n), extension_(e), prettyname_(p), shortcut_(s), viewer_(v), - editor_(ed), flags_(flags) -{} + string const & m, int flags) + : name_(n), prettyname_(p), shortcut_(s), viewer_(v), + editor_(ed), mime_(m), flags_(flags) +{ + extension_list_ = getVectorFromString(e, ","); + LYXERR(Debug::GRAPHICS, "New Format: n=" << n << ", flags=" << flags); +} bool Format::dummy() const @@ -101,6 +131,19 @@ bool Format::dummy() const } +string const Format::extensions() const +{ + return getStringFromVector(extension_list_, ", "); +} + + +bool Format::hasExtension(string const & e) const +{ + return (find(extension_list_.begin(), extension_list_.end(), e) + != extension_list_.end()); +} + + bool Format::isChildFormat() const { if (name_.empty()) @@ -115,38 +158,336 @@ string const Format::parentFormat() const } +void Format::setExtensions(string const & e) +{ + extension_list_ = getVectorFromString(e, ","); +} + + // This method should return a reference, and throw an exception // if the format named name cannot be found (Lgb) Format const * Formats::getFormat(string const & name) const { FormatList::const_iterator cit = - find_if(formatlist.begin(), formatlist.end(), + find_if(formatlist_.begin(), formatlist_.end(), FormatNamesEqual(name)); - if (cit != formatlist.end()) + if (cit != formatlist_.end()) return &(*cit); else return 0; } +namespace { + +/** Guess the file format name (as in Format::name()) from contents. + * Normally you don't want to use this directly, but rather + * Formats::getFormatFromFile(). + */ +string guessFormatFromContents(FileName const & fn) +{ + // the different filetypes and what they contain in one of the first lines + // (dots are any characters). (Herbert 20020131) + // AGR Grace... + // BMP BM... + // EPS %!PS-Adobe-3.0 EPSF... + // FIG #FIG... + // FITS ...BITPIX... + // GIF GIF... + // JPG \377\330... (0xFFD8) + // PDF %PDF-... + // PNG .PNG... + // PBM P1... or P4 (B/W) + // PGM P2... or P5 (Grayscale) + // PPM P3... or P6 (color) + // PS %!PS-Adobe-2.0 or 1.0, no "EPSF"! + // SGI \001\332... (decimal 474) + // TGIF %TGIF... + // TIFF II... or MM... + // XBM ..._bits[]... + // XPM /* XPM */ sometimes missing (f.ex. tgif-export) + // ...static char *... + // XWD \000\000\000\151 (0x00006900) decimal 105 + // + // GZIP \037\213 http://www.ietf.org/rfc/rfc1952.txt + // ZIP PK... http://www.halyava.ru/document/ind_arch.htm + // Z \037\235 UNIX compress + + // paranoia check + if (fn.empty() || !fn.isReadableFile()) + return string(); + + ifstream ifs(fn.toFilesystemEncoding().c_str()); + if (!ifs) + // Couldn't open file... + return string(); + + // gnuzip + static string const gzipStamp = "\037\213"; + + // PKZIP + static string const zipStamp = "PK"; + + // ZIP containers (koffice, openoffice.org etc). + static string const nonzipStamp = "\010\0\0\0mimetypeapplication/"; + + // compress + static string const compressStamp = "\037\235"; + + // DOS binary EPS according to Adobe TN-5002 + static string const binEPSStamp = "\xC5\xD0\xD3\xC6"; + + + // Maximum strings to read + int const max_count = 50; + int count = 0; + + string str; + string format; + bool firstLine = true; + bool backslash = false; + bool maybelatex = false; + int dollars = 0; + while ((count++ < max_count) && format.empty() && !maybelatex) { + if (ifs.eof()) + break; + + getline(ifs, str); + string const stamp = str.substr(0, 2); + if (firstLine && str.size() >= 2) { + // at first we check for a zipped file, because this + // information is saved in the first bytes of the file! + // also some graphic formats which save the information + // in the first line, too. + if (prefixIs(str, gzipStamp)) { + format = "gzip"; + + } else if (stamp == zipStamp && + !contains(str, nonzipStamp)) { + format = "zip"; + + } else if (stamp == compressStamp) { + format = "compress"; + + // the graphics part + } else if (stamp == "BM") { + format = "bmp"; + + } else if (stamp == "\377\330") { + format = "jpg"; + + } else if (stamp == "\001\332") { + format = "sgi"; + } else if (prefixIs(str, binEPSStamp)) { + format = "eps"; + + // PBM family + // Don't need to use str.at(0), str.at(1) because + // we already know that str.size() >= 2 + } else if (str[0] == 'P') { + switch (str[1]) { + case '1': + case '4': + format = "pbm"; + break; + case '2': + case '5': + format = "pgm"; + break; + case '3': + case '6': + format = "ppm"; + } + break; + + } else if ((stamp == "II") || (stamp == "MM")) { + format = "tiff"; + + } else if (prefixIs(str,"%TGIF")) { + format = "tgif"; + + } else if (prefixIs(str,"#FIG")) { + format = "fig"; + + } else if (prefixIs(str,"GIF")) { + format = "gif"; + + } else if (str.size() > 3) { + int const c = ((str[0] << 24) & (str[1] << 16) & + (str[2] << 8) & str[3]); + if (c == 105) { + format = "xwd"; + } + } + + firstLine = false; + } + + if (!format.empty()) + break; + else if (contains(str,"EPSF")) + // dummy, if we have wrong file description like + // %!PS-Adobe-2.0EPSF" + format = "eps"; + + else if (contains(str, "Grace")) + format = "agr"; + + else if (contains(str, "%PDF")) + // autodetect pdf format for graphics inclusion + format = "pdf6"; + + else if (contains(str, " EMF")) + format = "emf"; + + else if (contains(str, "PNG")) + format = "png"; + + else if (contains(str, "%!PS-Adobe")) { + // eps or ps + ifs >> str; + if (contains(str,"EPSF")) + format = "eps"; + else + format = "ps"; + } + + else if (contains(str, "_bits[]")) + format = "xbm"; + + else if (contains(str, "XPM") || contains(str, "static char *")) + format = "xpm"; + + else if (contains(str, "BITPIX")) + format = "fits"; + + else if (contains(str, "\\documentclass") || + contains(str, "\\chapter") || + contains(str, "\\section") || + contains(str, "\\begin") || + contains(str, "\\end") || + contains(str, "$$") || + contains(str, "\\[") || + contains(str, "\\]")) + maybelatex = true; + else { + if (contains(str, '\\')) + backslash = true; + dollars += count_char(str, '$'); + if (backslash && dollars > 1) + // inline equation + maybelatex = true; + } + } + + if (format.empty() && maybelatex && !isBinaryFile(fn)) + format = "latex"; + + if (format.empty()) { + if (ifs.eof()) + LYXERR(Debug::GRAPHICS, "filetools(getFormatFromContents)\n" + "\tFile type not recognised before EOF!"); + } else { + LYXERR(Debug::GRAPHICS, "Recognised Fileformat: " << format); + return format; + } + + LYXERR(Debug::GRAPHICS, "filetools(getFormatFromContents)\n" + << "\tCouldn't find a known format!"); + return string(); +} + +} // namespace + + string Formats::getFormatFromFile(FileName const & filename) const { if (filename.empty()) return string(); - string const format = filename.guessFormatFromContents(); - if (!format.empty()) - return format; + string psformat; + string format; + if (filename.exists()) { + // one instance of Magic that will be reused for next calls + // This avoids to read the magic file everytime + // If libmagic is not available, Magic::file returns an empty string. + static Magic magic; + string const result = magic.file(filename.toFilesystemEncoding()); + string const mime = token(result, ';', 0); + // our own detection is better for binary files (can be anything) + // and different plain text formats + if (!mime.empty() && mime != "application/octet-stream" && + mime != "text/plain") { + Formats::const_iterator cit = + find_if(formatlist_.begin(), formatlist_.end(), + FormatMimeEqual(mime)); + if (cit != formatlist_.end()) { + LYXERR(Debug::GRAPHICS, "\tgot format from MIME type: " + << mime << " -> " << cit->name()); + // See special eps/ps handling below + if (mime == "application/postscript") + psformat = cit->name(); + else + format = cit->name(); + } + } + + // libmagic recognizes as latex also some formats of ours + // such as pstex and pdftex. Therefore we have to perform + // additional checks in this case (bug 9244). + if (!format.empty() && format != "latex") + return format; + } - // try to find a format from the file extension. string const ext = getExtension(filename.absFileName()); + if (format.empty()) { + // libmagic does not distinguish eps and ps. + // Therefore we need to use our own detection here, but only if it + // recognizes either ps or eps. Otherwise the libmagic guess will + // be better (bug 9146). + format = guessFormatFromContents(filename); + if (!psformat.empty()) { + if (isPostScriptFileFormat(format)) + return format; + else + return psformat; + } + + if (isZippedFileFormat(format) && !ext.empty()) { + string const & fmt_name = getFormatFromExtension(ext); + if (!fmt_name.empty()) { + Format const * p_format = getFormat(fmt_name); + if (p_format && p_format->zippedNative()) + return p_format->name(); + } + } + // Don't simply return latex (bug 9244). + if (!format.empty() && format != "latex") + return format; + } + + // Both libmagic and our guessing from contents may return as latex + // also lyx files and our pstex and pdftex formats. In this case we + // give precedence to the format determined by the extension. + if (format == "latex") { + format = getFormatFromExtension(ext); + return format.empty() ? "latex" : format; + } + + // try to find a format from the file extension. + return getFormatFromExtension(ext); +} + + +string Formats::getFormatFromExtension(string const & ext) const +{ if (!ext.empty()) { // this is ambigous if two formats have the same extension, // but better than nothing Formats::const_iterator cit = - find_if(formatlist.begin(), formatlist.end(), + find_if(formatlist_.begin(), formatlist_.end(), FormatExtensionsEqual(ext)); - if (cit != formats.end()) { + if (cit != formatlist_.end()) { LYXERR(Debug::GRAPHICS, "\twill guess format from file extension: " << ext << " -> " << cit->name()); return cit->name(); @@ -156,6 +497,49 @@ string Formats::getFormatFromFile(FileName const & filename) const } +/// Used to store last timestamp of file and whether it is (was) zipped +struct ZippedInfo { + bool zipped; + std::time_t timestamp; + ZippedInfo(bool zipped, std::time_t timestamp) + : zipped(zipped), timestamp(timestamp) { } +}; + + +/// Mapping absolute pathnames of files to their ZippedInfo metadata. +static std::map zipped_; +static Mutex zipped_mutex; + + +bool Formats::isZippedFile(support::FileName const & filename) const { + string const & fname = filename.absFileName(); + time_t timestamp = filename.lastModified(); + Mutex::Locker lock(&zipped_mutex); + map::iterator it = zipped_.find(fname); + if (it != zipped_.end() && it->second.timestamp == timestamp) + return it->second.zipped; + // FIXME perf: This very expensive function is called on startup on each + // file whic is going to be parsed, and also on svgz icons. Maybe there is a + // quicker way to check whether a file is zipped? I.e. for icons we + // probably just need to check the extension (svgz vs svg). + string const & format = getFormatFromFile(filename); + bool zipped = (format == "gzip" || format == "zip"); + zipped_.insert(make_pair(fname, ZippedInfo(zipped, timestamp))); + return zipped; +} + + +bool Formats::isZippedFileFormat(string const & format) +{ + return contains("gzip zip compress", format) && !format.empty(); +} + + +bool Formats::isPostScriptFileFormat(string const & format) +{ + return format == "ps" || format == "eps"; +} + static string fixCommand(string const & cmd, string const & ext, os::auto_open_mode mode) { @@ -178,8 +562,8 @@ static string fixCommand(string const & cmd, string const & ext, void Formats::setAutoOpen() { - FormatList::iterator fit = formatlist.begin(); - FormatList::iterator const fend = formatlist.end(); + FormatList::iterator fit = formatlist_.begin(); + FormatList::iterator const fend = formatlist_.end(); for ( ; fit != fend ; ++fit) { fit->setViewer(fixCommand(fit->viewer(), fit->extension(), os::VIEW)); fit->setEditor(fixCommand(fit->editor(), fit->extension(), os::EDIT)); @@ -190,10 +574,10 @@ void Formats::setAutoOpen() int Formats::getNumber(string const & name) const { FormatList::const_iterator cit = - find_if(formatlist.begin(), formatlist.end(), + find_if(formatlist_.begin(), formatlist_.end(), FormatNamesEqual(name)); - if (cit != formatlist.end()) - return distance(formatlist.begin(), cit); + if (cit != formatlist_.end()) + return distance(formatlist_.begin(), cit); else return -1; } @@ -202,41 +586,41 @@ int Formats::getNumber(string const & name) const void Formats::add(string const & name) { if (!getFormat(name)) - add(name, name, name, string(), string(), string(), - Format::document); + add(name, name, from_utf8(name), string(), string(), string(), + string(), Format::document); } -void Formats::add(string const & name, string const & extension, - string const & prettyname, string const & shortcut, +void Formats::add(string const & name, string const & extensions, + docstring const & prettyname, string const & shortcut, string const & viewer, string const & editor, - int flags) + string const & mime, int flags) { FormatList::iterator it = - find_if(formatlist.begin(), formatlist.end(), + find_if(formatlist_.begin(), formatlist_.end(), FormatNamesEqual(name)); - if (it == formatlist.end()) - formatlist.push_back(Format(name, extension, prettyname, - shortcut, viewer, editor, flags)); + if (it == formatlist_.end()) + formatlist_.push_back(Format(name, extensions, prettyname, + shortcut, viewer, editor, mime, flags)); else - *it = Format(name, extension, prettyname, shortcut, viewer, - editor, flags); + *it = Format(name, extensions, prettyname, shortcut, viewer, + editor, mime, flags); } void Formats::erase(string const & name) { FormatList::iterator it = - find_if(formatlist.begin(), formatlist.end(), + find_if(formatlist_.begin(), formatlist_.end(), FormatNamesEqual(name)); - if (it != formatlist.end()) - formatlist.erase(it); + if (it != formatlist_.end()) + formatlist_.erase(it); } void Formats::sort() { - std::sort(formatlist.begin(), formatlist.end()); + std::sort(formatlist_.begin(), formatlist_.end()); } @@ -244,9 +628,9 @@ void Formats::setViewer(string const & name, string const & command) { add(name); FormatList::iterator it = - find_if(formatlist.begin(), formatlist.end(), + find_if(formatlist_.begin(), formatlist_.end(), FormatNamesEqual(name)); - if (it != formatlist.end()) + if (it != formatlist_.end()) it->setViewer(command); } @@ -255,9 +639,9 @@ void Formats::setEditor(string const & name, string const & command) { add(name); FormatList::iterator it = - find_if(formatlist.begin(), formatlist.end(), + find_if(formatlist_.begin(), formatlist_.end(), FormatNamesEqual(name)); - if (it != formatlist.end()) + if (it != formatlist_.end()) it->setEditor(command); } @@ -296,7 +680,29 @@ bool Formats::view(Buffer const & buffer, FileName const & filename, } } - string command = libScriptSearch(format->viewer()); + string command = format->viewer(); + + // Escape backslashes if not already in double or single quotes. + // We cannot simply quote the whole command as there may be arguments. + if (contains(command, '\\')) { + bool inquote1 = false; + bool inquote2 = false; + string::iterator cit = command.begin(); + for (; cit != command.end(); ++cit) { + switch (*cit) { + case '"': + inquote1 = !inquote1; + break; + case '\'': + inquote2 = !inquote2; + break; + case '\\': + if (!inquote1 && !inquote2) + cit = ++command.insert(cit, '\\'); + break; + } + } + } if (format_name == "dvi" && !lyxrc.view_dvi_paper_option.empty()) { @@ -313,8 +719,10 @@ bool Formats::view(Buffer const & buffer, FileName const & filename, if (!contains(command, token_from_format)) command += ' ' + token_from_format; - command = subst(command, token_from_format, quoteName(onlyFileName(filename.toFilesystemEncoding()))); - command = subst(command, token_path_format, quoteName(onlyPath(filename.toFilesystemEncoding()))); + command = subst(command, token_from_format, + quoteName(onlyFileName(filename.toFilesystemEncoding()), quote_shell_filename)); + command = subst(command, token_path_format, + quoteName(onlyPath(filename.toFilesystemEncoding()), quote_shell_filename)); command = subst(command, token_socket_format, quoteName(theServerSocket().address())); LYXERR(Debug::FILES, "Executing command: " << command); // FIXME UNICODE utf8 can be wrong for files @@ -322,7 +730,8 @@ bool Formats::view(Buffer const & buffer, FileName const & filename, PathChanger p(filename.onlyPath()); Systemcall one; - one.startscript(Systemcall::DontWait, command, buffer.filePath()); + one.startscript(Systemcall::DontWait, command, + buffer.filePath(), buffer.layoutPos()); // we can't report any sort of error, since we aren't waiting return true; @@ -341,7 +750,7 @@ bool Formats::edit(Buffer const & buffer, FileName const & filename, // LinkBack files look like PDF, but have the .linkback extension string const ext = getExtension(filename.absFileName()); - if (format_name == "pdf" && ext == "linkback") { + if (format_name == "pdf6" && ext == "linkback") { #ifdef USE_MACOSX_PACKAGING return editLinkBackFile(filename.absFileName().c_str()); #else @@ -381,15 +790,18 @@ bool Formats::edit(Buffer const & buffer, FileName const & filename, if (!contains(command, token_from_format)) command += ' ' + token_from_format; - command = subst(command, token_from_format, quoteName(filename.toFilesystemEncoding())); - command = subst(command, token_path_format, quoteName(onlyPath(filename.toFilesystemEncoding()))); + command = subst(command, token_from_format, + quoteName(filename.toFilesystemEncoding(), quote_shell_filename)); + command = subst(command, token_path_format, + quoteName(onlyPath(filename.toFilesystemEncoding()), quote_shell_filename)); command = subst(command, token_socket_format, quoteName(theServerSocket().address())); LYXERR(Debug::FILES, "Executing command: " << command); // FIXME UNICODE utf8 can be wrong for files buffer.message(_("Executing command: ") + from_utf8(command)); Systemcall one; - one.startscript(Systemcall::DontWait, command, buffer.filePath()); + one.startscript(Systemcall::DontWait, command, + buffer.filePath(), buffer.layoutPos()); // we can't report any sort of error, since we aren't waiting return true; @@ -400,7 +812,7 @@ docstring const Formats::prettyName(string const & name) const { Format const * format = getFormat(name); if (format) - return from_utf8(format->prettyname()); + return format->prettyname(); else return from_utf8(name); } @@ -416,9 +828,21 @@ string const Formats::extension(string const & name) const } +string const Formats::extensions(string const & name) const +{ + Format const * format = getFormat(name); + if (format) + return format->extensions(); + else + return name; +} + + namespace { + typedef Translator FlavorTranslator; + FlavorTranslator initFlavorTranslator() { FlavorTranslator f(OutputParams::LATEX, "latex"); @@ -429,16 +853,18 @@ FlavorTranslator initFlavorTranslator() f.addPair(OutputParams::XML, "docbook-xml"); f.addPair(OutputParams::HTML, "xhtml"); f.addPair(OutputParams::TEXT, "text"); + f.addPair(OutputParams::LYX, "lyx"); return f; } FlavorTranslator const & flavorTranslator() { - static FlavorTranslator translator = initFlavorTranslator(); + static FlavorTranslator const translator = initFlavorTranslator(); return translator; } -} + +} // namespace std::string flavor2format(OutputParams::FLAVOR flavor) @@ -453,9 +879,4 @@ OutputParams::FLAVOR format2flavor(std::string fmt) return flavorTranslator().find(fmt); } */ -Formats formats; - -Formats system_formats; - - } // namespace lyx