* Full author contact details are available in file CREDITS.
*/
-/** How to use this trivial profiler:
+/**
+ * ==== HOW TO USE THIS TRIVIAL PROFILER:
*
* * at the beginning of the interesting block, just add:
* PROFILE_THIS_BLOCK(some_identifier)
*
* * when the program ends, statistics will be sent to standard error, like:
*
- * ##### some_identifier: 6.48475usec, count=25405
+ * #pmprof# some_identifier: 6.51usec, count=7120, total=46.33msec
+ *
+ * * It is also possible to profile caching schemes. All it takes is an additional
+ * PROFILE_CACHE_MISS(some_identifier)
+ * in the place that takes care of cache misses. Then the output at the end will change to
+ *
+ * #pmprof# some_identifier: 6.51usec, count=7120, total=46.33msec
+ * hit: 96%, 4.36usec, count=6849, total=29.89msec
+ * miss: 3%, 60.65usec, count=271, total=16.43msec
+ *
+ * * if DISABLE_PMPROF is defined before including pmprof.h, the
+ * profiler is replaced by empty macros. This is useful for quickly
+ * checking the overhead.
+ *
+ * ==== ABOUT PROFILING SCOPE:
*
* The code measured by the profiler corresponds to the lifetime of a
* local variable declared by the PROFILE_THIS_BLOCK macro.
#ifndef PMPROF_H
#define PMPROF_H
-#ifdef _WIN32
-#include <windows.h>
+#if defined(DISABLE_PMPROF)
+
+// Make pmprof an empty shell
+#define PROFILE_THIS_BLOCK(a)
+#define PROFILE_CACHE_MISS(a)
+
#else
-#include <sys/time.h>
-#endif
+#include <chrono>
#include <iomanip>
#include <iostream>
#error Profiling is not usable when run-time debugging is in effect
#endif
-#ifdef _WIN32
-/* This function does not really returns the "time of day",
- * but it will suffice to evaluate elapsed times.
- */
-int gettimeofday(struct timeval * tv, struct timezone * /*tz*/)
+namespace _pmprof {
+
+using namespace std;
+using namespace chrono;
+
+namespace {
+
+void dump_time(system_clock::duration value)
{
- LARGE_INTEGER frequency, t;
- QueryPerformanceFrequency(&frequency);
- QueryPerformanceCounter(&t);
+ auto musec = duration_cast<microseconds>(value).count();
+ cerr << fixed << setprecision(2);
+ if (musec >= 1000000)
+ cerr << musec / 1000000.0 << " s";
+ else if (musec >= 1000)
+ cerr << musec / 1000.0 << " ms";
+ else
+ cerr << musec << " us";
+}
- tv->tv_sec = long(t.QuadPart / frequency.QuadPart);
- tv->tv_usec = long((1000000.0 * (t.QuadPart % frequency.QuadPart)) / frequency.QuadPart);
- return 0;
+void dump(system_clock::duration total, unsigned long long count) {
+ if (count > 0) {
+ dump_time(total / count);
+ cerr << ", count=" << count << ", total=";
+ dump_time(total);
+ } else
+ std::cerr << "no data";
+ cerr << endl;
}
-#endif // _WIN32
+} // namespace
+
/* Helper class for gathering data. Instantiate this as a static
* variable, so that its destructor will be executed when the program
* ends.
*/
-
-class PMProfStat {
+class stat {
public:
- PMProfStat(char const * name)
- : name_(name), sec_(0), usec_(0), count_(0) {};
-
- ~PMProfStat() {
- if (count_>0) {
- double total = 0.001 * (sec_ * 1000000 + usec_);
- std::cerr << std::fixed << std::setprecision(2)
- << "#pmprof# " << name_ << ": "
- << total / count_
- << "msec, count=" << count_
- << ", total=" << total << "msec"
- << std::endl;
- }
+ stat(char const * name) : name_(name) {}
+
+ ~stat() {
+ if (count_ + miss_count_ > 0) {
+ if (miss_count_ == 0) {
+ cerr << "#pmprof# " << name_ << ": ";
+ dump(dur_, count_);
+ }
+ else {
+ cerr << "#pmprof# " << name_ << ": ";
+ dump(dur_ + miss_dur_, count_ + miss_count_);
+ cerr << " hit: " << 100 * count_ / (count_ + miss_count_) << "%, ";
+ dump(dur_, count_);
+ cerr << " miss: " << 100 * miss_count_ / (count_ + miss_count_) << "%, ";
+ dump(miss_dur_, miss_count_);
+ }
+ } else
+ std::cerr << "#pmprof# " << name_ << ": no data" << std::endl;
}
- void add(const long long s, const long long u) {
- sec_ += s;
- usec_ += u;
- count_++;
+ void add(system_clock::duration d, const bool hit) {
+ if (hit) {
+ dur_ += d;
+ count_++;
+ } else {
+ miss_dur_ += d;
+ miss_count_++;
+ }
}
private:
char const * name_;
- long long sec_, usec_;
- unsigned long long count_;
+ system_clock::duration dur_, miss_dur_;
+ unsigned long long count_ = 0, miss_count_ = 0;
};
/* Helper class which gathers data at the end of the scope. One
* instance of this one should be created at each execution of the
* block. At the end of the block, it sends statistics to the static
- * PMProfStat object.
+ * stat object.
*/
-class PMProfInstance {
+class instance {
public:
- PMProfInstance(PMProfStat * stat) : stat_(stat)
+ instance(stat * stat) : hit(true), stat_(stat)
{
- gettimeofday(&before_, 0);
+ before_ = system_clock::now();
}
- ~PMProfInstance() {
- gettimeofday(&after_, 0);
- stat_->add(after_.tv_sec - before_.tv_sec,
- after_.tv_usec - before_.tv_usec);
+ ~instance() {
+ stat_->add(system_clock::now() - before_, hit);
}
+ bool hit;
+
private:
- timeval before_, after_;
- PMProfStat * stat_;
+ system_clock::time_point before_;
+ stat * stat_;
};
+}
#define PROFILE_THIS_BLOCK(a) \
- static PMProfStat PMPS_##a(#a);\
- PMProfInstance PMPI_##a(&PMPS_##a);
+ static _pmprof::stat _pmps_##a(#a); \
+ _pmprof::instance _pmpi_##a(&_pmps_##a);
+
+#define PROFILE_CACHE_MISS(a) \
+ _pmpi_##a.hit = false;
+#endif // !defined(DISABLE_PMPROF)
#endif