3 * This file is part of LyX, the document processor.
4 * Licence details can be found in the file COPYING.
6 * \author Jean-Marc Lasgouttes
8 * Full author contact details are available in file CREDITS.
12 * ==== HOW TO USE THIS TRIVIAL PROFILER:
14 * * at the beginning of the interesting block, just add:
15 * PROFILE_THIS_BLOCK(some_identifier)
17 * A trailing semicolon can be added at your discretion.
19 * * when the program ends, statistics will be sent to standard error, like:
21 * #pmprof# some_identifier: 6.51usec, count=7120, total=46.33msec
23 * * It is also possible to profile caching schemes. All it takes is an additional
24 * PROFILE_CACHE_MISS(some_identifier)
25 * in the place that takes care of cache misses. Then the output at the end will change to
27 * #pmprof# some_identifier: 6.51usec, count=7120, total=46.33msec
28 * hit: 96%, 4.36usec, count=6849, total=29.89msec
29 * miss: 3%, 60.65usec, count=271, total=16.43msec
32 * ==== ABOUT PROFILING SCOPE:
34 * The code measured by the profiler corresponds to the lifetime of a
35 * local variable declared by the PROFILE_THIS_BLOCK macro.
37 * Some examples of profiling scope: In the snippets below, c1, c2...
38 * designate code chunks, and the identifiers of profiling blocks are
39 * chosen to reflect what they count.
43 * PROFILE_THIS_BLOCK(c2)
49 * PROFILE_THIS_BLOCK(c1_c2)
51 * PROFILE_THIS_BLOCK(c2)
58 * PROFILE_THIS_BLOCK(c1)
61 * PROFILE_THIS_BLOCK(c2)
67 * PROFILE_THIS_BLOCK(c1_c2_c3)
70 * PROFILE_THIS_BLOCK(c2)
76 * Influence of identifier names: they are mainly used for display
77 * purpose, but the same name should not be used twice in the same
81 * PROFILE_THIS_BLOCK(foo)
83 * PROFILE_THIS_BLOCK(foo) // error: identifier clash
87 * In the example below, c1+c2 and c2 are counted separately, but in
88 * the output, both are confusingly labelled `foo'.
91 * PROFILE_THIS_BLOCK(foo)
94 * PROFILE_THIS_BLOCK(foo) // error: identifier clash
107 #include <sys/time.h>
114 #if defined(__GNUG__) && defined(_GLIBCXX_DEBUG)
115 #error Profiling is not usable when run-time debugging is in effect
119 /* This function does not really returns the "time of day",
120 * but it will suffice to evaluate elapsed times.
122 int gettimeofday(struct timeval * tv, struct timezone * /*tz*/)
124 LARGE_INTEGER frequency, t;
125 QueryPerformanceFrequency(&frequency);
126 QueryPerformanceCounter(&t);
128 tv->tv_sec = long(t.QuadPart / frequency.QuadPart);
129 tv->tv_usec = long((1000000.0 * (t.QuadPart % frequency.QuadPart)) / frequency.QuadPart);
137 void dump(long long sec, long long usec, unsigned long long count) {
138 double const total = sec * 1000000 + usec;
139 std::cerr << std::fixed << std::setprecision(2)
141 << "usec, count=" << count
142 << ", total=" << total * 0.001 << "msec"
149 /* Helper class for gathering data. Instantiate this as a static
150 * variable, so that its destructor will be executed when the program
157 PMProfStat(char const * name) : name_(name), sec_(0), usec_(0), count_(0),
158 miss_sec_(0), miss_usec_(0), miss_count_(0) {}
162 if (miss_count_ == 0) {
163 std::cerr << "#pmprof# " << name_ << ": ";
164 dump(sec_, usec_, count_);
167 std::cerr << "#pmprof# " << name_ << ": ";
168 dump(sec_ + miss_sec_, usec_ + miss_usec_, count_ + miss_count_);
169 std::cerr << " hit: " << 100 * count_ / (count_ + miss_count_) << "%, ";
170 dump(sec_, usec_, count_);
171 std::cerr << " miss: " << 100 * miss_count_ / (count_ + miss_count_) << "%, ";
172 dump(miss_sec_, miss_usec_, miss_count_);
177 void add(const long long s, const long long u, const bool hit) {
191 long long sec_, usec_;
192 unsigned long long count_;
193 long long miss_sec_, miss_usec_;
194 unsigned long long miss_count_;
198 /* Helper class which gathers data at the end of the scope. One
199 * instance of this one should be created at each execution of the
200 * block. At the end of the block, it sends statistics to the static
203 class PMProfInstance {
205 PMProfInstance(PMProfStat * stat) : hit(true), stat_(stat)
207 gettimeofday(&before_, 0);
211 gettimeofday(&after_, 0);
212 stat_->add(after_.tv_sec - before_.tv_sec,
213 after_.tv_usec - before_.tv_usec, hit);
219 timeval before_, after_;
224 #define PROFILE_THIS_BLOCK(a) \
225 static PMProfStat PMPS_##a(#a);\
226 PMProfInstance PMPI_##a(&PMPS_##a);
228 #define PROFILE_CACHE_MISS(a) \
229 PMPI_##a.hit = false;