287 lines
5.1 KiB
C++
287 lines
5.1 KiB
C++
//
|
|
// Use "rdtsc" to mesure performance.
|
|
//
|
|
//
|
|
#ifndef __PERFORMANCE__H__
|
|
#define __PERFORMANCE__H__
|
|
|
|
#include <fstream>
|
|
#include <iostream>
|
|
#include <limits>
|
|
#include <math.h>
|
|
#include <map>
|
|
|
|
#if defined( WIN32 )
|
|
#undef min
|
|
#undef max
|
|
#endif
|
|
|
|
#if defined( WIN32 )
|
|
#define QUERY_PERFORMANCE_COUNTER
|
|
#define RDTSC
|
|
#else
|
|
#define GET_TIME_OF_DAY
|
|
#endif
|
|
|
|
#if defined( RDTSC )
|
|
#include <windows.h>
|
|
|
|
class perf
|
|
{
|
|
__int64 beginning;
|
|
static double frequency;
|
|
|
|
__forceinline __int64 getCurrentTime()
|
|
{
|
|
__asm
|
|
{
|
|
//
|
|
// Read the time stamp counter.
|
|
//
|
|
rdtsc
|
|
}
|
|
}
|
|
|
|
public:
|
|
__forceinline perf()
|
|
{
|
|
// ::Sleep( 0 );
|
|
//
|
|
// Serialized instruction ensure all previouse
|
|
// instructions a done befor reading the performance
|
|
// counter.
|
|
//
|
|
__asm xor eax,eax
|
|
__asm cpuid
|
|
beginning = getCurrentTime();
|
|
// __asm xor eax,eax
|
|
// __asm cpuid
|
|
}
|
|
|
|
__forceinline double elapsed()
|
|
{
|
|
__int64 now = getCurrentTime();
|
|
return double(now - beginning - 60 ) / frequency;
|
|
}
|
|
|
|
static void init()
|
|
{
|
|
//
|
|
// Use only one fixed CPU
|
|
//
|
|
BOOL b;
|
|
DWORD_PTR proc_affi;
|
|
DWORD_PTR sys_affi;
|
|
DWORD_PTR exclud_affi;
|
|
GetProcessAffinityMask( GetCurrentProcess(), &proc_affi, &sys_affi );
|
|
exclud_affi = proc_affi & ~sys_affi;
|
|
proc_affi = ( exclud_affi ) ? proc_affi : proc_affi;
|
|
int i = 0;
|
|
while (( proc_affi >>= 1 )) ++i;
|
|
proc_affi = 1 << i;
|
|
b = SetProcessAffinityMask( GetCurrentProcess(), proc_affi );
|
|
//
|
|
// Set the priority of thread high.
|
|
//
|
|
b = SetPriorityClass( GetCurrentProcess(), REALTIME_PRIORITY_CLASS );
|
|
b = SetThreadPriority( GetCurrentThread(), THREAD_PRIORITY_TIME_CRITICAL );
|
|
|
|
//
|
|
// Get the frequency.
|
|
// Temporaily put 1.
|
|
//
|
|
frequency = 1.;
|
|
}
|
|
};
|
|
|
|
double perf::frequency;
|
|
|
|
#elif defined( QUERY_PERFORMANCE_COUNTER )
|
|
#include <windows.h>
|
|
|
|
class perf
|
|
{
|
|
__int64 beginning;
|
|
static double frequency;
|
|
|
|
__forceinline __int64 getCurrentTime()
|
|
{
|
|
//
|
|
// This call must be quite fast. Since, in x86 architectur
|
|
// it is one instruction. Yet WIN32 API might added some
|
|
// additional processing.
|
|
//
|
|
// \todo Vahagn: add our assembly optimised function.
|
|
//
|
|
__int64 tc;
|
|
QueryPerformanceCounter(
|
|
reinterpret_cast<LARGE_INTEGER*>( &tc )
|
|
);
|
|
return tc;
|
|
}
|
|
|
|
public:
|
|
__forceinline perf()
|
|
{
|
|
// ::Sleep( 0 );
|
|
beginning = getCurrentTime();
|
|
}
|
|
|
|
__forceinline double elapsed()
|
|
{
|
|
__int64 now = getCurrentTime();
|
|
return double(now - beginning ); // frequency;
|
|
}
|
|
|
|
static void init()
|
|
{
|
|
//
|
|
// Use only one fixed CPU
|
|
//
|
|
BOOL b;
|
|
DWORD_PTR proc_affi;
|
|
DWORD_PTR sys_affi;
|
|
DWORD_PTR exclud_affi;
|
|
GetProcessAffinityMask( GetCurrentProcess(), &proc_affi, &sys_affi );
|
|
exclud_affi = proc_affi & ~sys_affi;
|
|
proc_affi = ( exclud_affi ) ? proc_affi : proc_affi;
|
|
int i = 0;
|
|
while (( proc_affi >>= 1 )) ++i;
|
|
proc_affi = 1 << i;
|
|
b = SetProcessAffinityMask( GetCurrentProcess(), proc_affi );
|
|
//
|
|
// Set the priority of thread high.
|
|
//
|
|
b = SetPriorityClass( GetCurrentProcess(), REALTIME_PRIORITY_CLASS );
|
|
b = SetThreadPriority( GetCurrentThread(), THREAD_PRIORITY_TIME_CRITICAL );
|
|
|
|
//
|
|
// Get the frequency.
|
|
//
|
|
__int64 pf;
|
|
QueryPerformanceFrequency(
|
|
reinterpret_cast<LARGE_INTEGER*>( &pf )
|
|
);
|
|
//
|
|
// Get the frequency.
|
|
//
|
|
frequency = double(pf);
|
|
}
|
|
};
|
|
|
|
double perf::frequency;
|
|
|
|
#elif defined ( GET_TIME_OF_DAY )
|
|
|
|
#include <sys/time.h>
|
|
|
|
class perf
|
|
{
|
|
timeval beginning;
|
|
|
|
public:
|
|
__forceinline perf()
|
|
{
|
|
gettimeofday(&beginning,0);
|
|
}
|
|
|
|
__forceinline double elapsed()
|
|
{
|
|
timeval now;
|
|
gettimeofday(&now,0);
|
|
return double(now.tv_sec) - double(beginning.tv_sec)
|
|
+ (double(now.tv_usec)-double(beginning.tv_usec))/1000000.0;
|
|
}
|
|
|
|
static void init()
|
|
{
|
|
}
|
|
};
|
|
|
|
#endif
|
|
|
|
template<class F >
|
|
double mesure( F& fnctr, int nProbes = 100000, bool bPrint = false )
|
|
{
|
|
typedef std::map<double,int> probs_type;
|
|
|
|
probs_type probs;
|
|
int n = 0;
|
|
for ( int i = 0; i < nProbes; ++i )
|
|
{
|
|
perf pc;
|
|
fnctr();
|
|
double m = pc.elapsed();
|
|
n = ++probs[ m ];
|
|
}
|
|
|
|
double m;
|
|
n = 0;
|
|
for ( probs_type::iterator it = probs.begin();
|
|
it != probs.end();
|
|
++it )
|
|
{
|
|
if ( it->second > n )
|
|
{
|
|
n = it->second;
|
|
m = it->first;
|
|
}
|
|
}
|
|
|
|
if ( bPrint )
|
|
{
|
|
std::cout << "tsc=" << m << " probes=" << nProbes << std::endl;
|
|
std::cout << "===============================" << std::endl;
|
|
for ( probs_type::iterator it = probs.begin();
|
|
it != probs.end();
|
|
++it )
|
|
std::cout << "prob=" << it->first << "\t amount=" << it->second << std::endl;
|
|
}
|
|
|
|
return m;
|
|
};
|
|
|
|
struct nop
|
|
{
|
|
__forceinline void operator() ()
|
|
{
|
|
}
|
|
};
|
|
|
|
void perf_init()
|
|
{
|
|
perf::init();
|
|
|
|
mesure<nop>( nop(), 100000, true );
|
|
|
|
#if 0
|
|
|
|
|
|
typedef std::map<double,int> probs_type;
|
|
|
|
probs_type probs;
|
|
double m = 1e300;
|
|
double s = 0;
|
|
int i_last = 0;
|
|
int i = 0;
|
|
int n = 0;
|
|
double c;
|
|
for ( ; n < 1000000; ++i )
|
|
{
|
|
perf pc;
|
|
c = pc.elapsed();
|
|
n = ++probs[ c ];
|
|
}
|
|
std::cout << "tsc=" << c << " probes=" << i << std::endl;
|
|
std::cout << "=========================" << std::endl;
|
|
for ( probs_type::iterator it = probs.begin();
|
|
it != probs.end();
|
|
++it )
|
|
{
|
|
std::cout << "prob=" << it->first << "\t amount=" << it->second << std::endl;
|
|
}
|
|
#endif
|
|
|
|
}
|
|
|
|
#endif//__PERFORMANCE__H__
|