Memory benchmark through list to reduce overhead of surrounding codes.

This commit is contained in:
2013-07-09 14:52:22 +04:00
parent 395c6f49db
commit d45f136252

View File

@@ -1,6 +1,6 @@
/* Check cf5-opt.vim defs. /* Check cf5-opt.vim defs.
VIM: let g:lcppflags="-std=c++11 -O2 -pthread" VIM: let g:lcppflags="-std=c++11 -O2 -pthread"
VIM: let g:wcppflags="/O2 /EHsc /DWIN32" VIM: let g:wcppflags="/Z7 /O2 /EHsc /DWIN32"
VIM: let g:cppflags=g:Iboost VIM: let g:cppflags=g:Iboost
VIM-: let g:wldflags=/DEBUG VIM-: let g:wldflags=/DEBUG
VIM: let g:ldflags=g:Lboost VIM: let g:ldflags=g:Lboost
@@ -83,16 +83,21 @@ void test_steps( F f , char * p, const size_t c, char * nm )
#endif #endif
static const size_t GB = 1024L*1024L*1024L; static const size_t GB = 1024L*1024L*1024L;
static const size_t L3CACHE = 4*1024*1024;
class tests class tests
{ {
public: public:
typedef long long duration_type; typedef long long duration_type;
typedef void * elem_type; struct elem_type
{
elem_type * next;
};
public: public:
size_t const workset; size_t const workset;
size_t const cnt; size_t const cnt;
elem_type * const mem; elem_type * const mem;
std::vector<char> cache_reset;
std::vector<duration_type> res; std::vector<duration_type> res;
public: public:
@@ -100,6 +105,7 @@ public:
: workset(_ws) : workset(_ws)
, cnt(workset/sizeof(elem_type)) , cnt(workset/sizeof(elem_type))
, mem((elem_type*)malloc( workset )) , mem((elem_type*)malloc( workset ))
, cache_reset(L3CACHE)
{ {
} }
~tests() ~tests()
@@ -142,10 +148,57 @@ public:
void evict() void evict()
{ {
for ( elem_type * p = mem, * const pe = mem+cnt; p < pe; ++p ) std::fill(cache_reset.begin(),cache_reset.end(),0);
*p = 0;
} }
//
// Make a cyclic list of length c and step s. In fact if we iterate
// through this list we read all elements of p with step s. Once we
// reach the end of the p we jump to the beginning and continue read
// the next path till the end. This continues until all elements of
// p are read. And then the whole is beginning again.
//
void make_cycle( elem_type * const p, const size_t c, const size_t s )
{
elem_type * const pe = p+c;
elem_type * h = p;
for ( size_t i = 0; i < s; ++i )
for ( elem_type * q = p+i; q < pe; q+=s )
h = h->next = q;
h->next = p;
}
//
// Print indices of list.
//
void test_print_make_cycle( elem_type * const p, const size_t c, const size_t s )
{
std::cout << "list lenght=" << c << " step=" << s << std::endl;
make_cycle( mem, c, s );
elem_type * h = p;
int l = 0;
while ( h->next != p )
{
std::cout << "[" << std::setw(2) << h->next - p << "] ";
if (!(++l%=15))
std::cout << std::endl;
h = h->next;
}
std::cout << "[" << std::setw(2) << h->next - p << "]"
<< std::endl << std::endl;
}
void test_make_cycle()
{
test_print_make_cycle( mem, 16, 1 );
test_print_make_cycle( mem, 16, 2 );
test_print_make_cycle( mem, 16, 4 );
test_print_make_cycle( mem, 32, 4 );
test_print_make_cycle( mem, 16, 8 );
test_print_make_cycle( mem, 16, 16 );
test_print_make_cycle( mem, 16, 17 );
test_print_make_cycle( mem, 1, 17 );
test_print_make_cycle( mem, 1, 1 );
}
// //
// Calculate average of tests duration and the deviation. // Calculate average of tests duration and the deviation.
// //
@@ -170,21 +223,20 @@ public:
return std::make_pair(avrg,dev); return std::make_pair(avrg,dev);
} }
#ifdef _MSC_VER
__declspec(noinline) __declspec(noinline)
duration_type scan_read( elem_type const * const p, const size_t c, const size_t s ) #endif
duration_type scan_read( elem_type const * const p, size_t cnt )
{ {
auto b = std::chrono::high_resolution_clock::now(); auto b = std::chrono::high_resolution_clock::now();
// //
register elem_type sum = 0; register elem_type const * h = p;
elem_type const * const qe = p+c; for ( size_t c = cnt; c; --c )
for ( int r =0, re=cnt/c; r < re; ++r ) h = h->next;
for ( size_t i = 0; i < s; ++i )
for ( elem_type const * q = p+i; q < qe; q+=s )
sum = *q;
// //
// Prevents optimisation of the loop. // Prevents optimisation of the loop.
// //
volatile elem_type no_optimization = sum; volatile static elem_type const * no_optimization = h;
// //
auto e = std::chrono::high_resolution_clock::now(); auto e = std::chrono::high_resolution_clock::now();
return std::chrono::nanoseconds(e-b).count(); return std::chrono::nanoseconds(e-b).count();
@@ -204,11 +256,12 @@ public:
for ( ; c <= cnt; c <<=1 ) for ( ; c <= cnt; c <<=1 )
{ {
print_param( c*sizeof(elem_type) ); print_param( c*sizeof(elem_type) );
make_cycle( mem, c, s );
for ( int t = 0; t < trys; ++t ) for ( int t = 0; t < trys; ++t )
{ {
evict(); evict();
test[t] = scan_read( mem, c, s ); test[t] = scan_read( mem, cnt );
print_time( test[t] ); print_time( test[t] );
} }
@@ -240,11 +293,8 @@ public:
// //
// Print results. // Print results.
// //
std::cout << std::log(double(se))/std::log(2.) << " ";
size_t const je = std::log(double(se))/std::log(2.)+1; size_t const je = std::log(double(se))/std::log(2.)+1;
size_t const ie = res.size()/je; size_t const ie = res.size()/je;
std::cout << res.size() << " " << je << " " << ie << std::endl;
for ( size_t i = 0, c = 8; i < ie; ++i, c <<=1 ) for ( size_t i = 0, c = 8; i < ie; ++i, c <<=1 )
{ {
std::cout << std::setw(10) << c*sizeof(elem_type); std::cout << std::setw(10) << c*sizeof(elem_type);
@@ -261,6 +311,7 @@ int main ( void )
{try{ {try{
tests t; tests t;
//t.test_make_cycle();
t.benchmark_read_time(); t.benchmark_read_time();
return 0; return 0;