// // Use "rdtsc" to mesure performance. // // #ifndef __PERFORMANCE__H__ #define __PERFORMANCE__H__ #include #include #include #include #include #if defined( WIN32 ) #undef min #undef max #endif #if defined( WIN32 ) #define QUERY_PERFORMANCE_COUNTER #define RDTSC #else #define GET_TIME_OF_DAY #endif #if defined( RDTSC ) #include class perf { __int64 beginning; static double frequency; __forceinline __int64 getCurrentTime() { __asm { // // Read the time stamp counter. // rdtsc } } public: __forceinline perf() { // ::Sleep( 0 ); // // Serialized instruction ensure all previouse // instructions a done befor reading the performance // counter. // __asm xor eax,eax __asm cpuid beginning = getCurrentTime(); // __asm xor eax,eax // __asm cpuid } __forceinline double elapsed() { __int64 now = getCurrentTime(); return double(now - beginning - 60 ) / frequency; } static void init() { // // Use only one fixed CPU // BOOL b; DWORD_PTR proc_affi; DWORD_PTR sys_affi; DWORD_PTR exclud_affi; GetProcessAffinityMask( GetCurrentProcess(), &proc_affi, &sys_affi ); exclud_affi = proc_affi & ~sys_affi; proc_affi = ( exclud_affi ) ? proc_affi : proc_affi; int i = 0; while (( proc_affi >>= 1 )) ++i; proc_affi = 1 << i; b = SetProcessAffinityMask( GetCurrentProcess(), proc_affi ); // // Set the priority of thread high. // b = SetPriorityClass( GetCurrentProcess(), REALTIME_PRIORITY_CLASS ); b = SetThreadPriority( GetCurrentThread(), THREAD_PRIORITY_TIME_CRITICAL ); // // Get the frequency. // Temporaily put 1. // frequency = 1.; } }; double perf::frequency; #elif defined( QUERY_PERFORMANCE_COUNTER ) #include class perf { __int64 beginning; static double frequency; __forceinline __int64 getCurrentTime() { // // This call must be quite fast. Since, in x86 architectur // it is one instruction. Yet WIN32 API might added some // additional processing. // // \todo Vahagn: add our assembly optimised function. // __int64 tc; QueryPerformanceCounter( reinterpret_cast( &tc ) ); return tc; } public: __forceinline perf() { // ::Sleep( 0 ); beginning = getCurrentTime(); } __forceinline double elapsed() { __int64 now = getCurrentTime(); return double(now - beginning ); // frequency; } static void init() { // // Use only one fixed CPU // BOOL b; DWORD_PTR proc_affi; DWORD_PTR sys_affi; DWORD_PTR exclud_affi; GetProcessAffinityMask( GetCurrentProcess(), &proc_affi, &sys_affi ); exclud_affi = proc_affi & ~sys_affi; proc_affi = ( exclud_affi ) ? proc_affi : proc_affi; int i = 0; while (( proc_affi >>= 1 )) ++i; proc_affi = 1 << i; b = SetProcessAffinityMask( GetCurrentProcess(), proc_affi ); // // Set the priority of thread high. // b = SetPriorityClass( GetCurrentProcess(), REALTIME_PRIORITY_CLASS ); b = SetThreadPriority( GetCurrentThread(), THREAD_PRIORITY_TIME_CRITICAL ); // // Get the frequency. // __int64 pf; QueryPerformanceFrequency( reinterpret_cast( &pf ) ); // // Get the frequency. // frequency = double(pf); } }; double perf::frequency; #elif defined ( GET_TIME_OF_DAY ) #include class perf { timeval beginning; public: __forceinline perf() { gettimeofday(&beginning,0); } __forceinline double elapsed() { timeval now; gettimeofday(&now,0); return double(now.tv_sec) - double(beginning.tv_sec) + (double(now.tv_usec)-double(beginning.tv_usec))/1000000.0; } static void init() { } }; #endif template double mesure( F& fnctr, int nProbes = 100000, bool bPrint = false ) { typedef std::map probs_type; probs_type probs; int n = 0; for ( int i = 0; i < nProbes; ++i ) { perf pc; fnctr(); double m = pc.elapsed(); n = ++probs[ m ]; } double m; n = 0; for ( probs_type::iterator it = probs.begin(); it != probs.end(); ++it ) { if ( it->second > n ) { n = it->second; m = it->first; } } if ( bPrint ) { std::cout << "tsc=" << m << " probes=" << nProbes << std::endl; std::cout << "===============================" << std::endl; for ( probs_type::iterator it = probs.begin(); it != probs.end(); ++it ) std::cout << "prob=" << it->first << "\t amount=" << it->second << std::endl; } return m; }; struct nop { __forceinline void operator() () { } }; void perf_init() { perf::init(); mesure( nop(), 100000, true ); #if 0 typedef std::map probs_type; probs_type probs; double m = 1e300; double s = 0; int i_last = 0; int i = 0; int n = 0; double c; for ( ; n < 1000000; ++i ) { perf pc; c = pc.elapsed(); n = ++probs[ c ]; } std::cout << "tsc=" << c << " probes=" << i << std::endl; std::cout << "=========================" << std::endl; for ( probs_type::iterator it = probs.begin(); it != probs.end(); ++it ) { std::cout << "prob=" << it->first << "\t amount=" << it->second << std::endl; } #endif } #endif//__PERFORMANCE__H__