Memory benchmark through list to reduce overhead of surrounding codes.

This commit is contained in:
2013-07-09 14:52:22 +04:00
parent 395c6f49db
commit d45f136252

View File

@@ -1,6 +1,6 @@
/* Check cf5-opt.vim defs.
VIM: let g:lcppflags="-std=c++11 -O2 -pthread"
VIM: let g:wcppflags="/O2 /EHsc /DWIN32"
VIM: let g:wcppflags="/Z7 /O2 /EHsc /DWIN32"
VIM: let g:cppflags=g:Iboost
VIM-: let g:wldflags=/DEBUG
VIM: let g:ldflags=g:Lboost
@@ -83,16 +83,21 @@ void test_steps( F f , char * p, const size_t c, char * nm )
#endif
static const size_t GB = 1024L*1024L*1024L;
static const size_t L3CACHE = 4*1024*1024;
class tests
{
public:
typedef long long duration_type;
typedef void * elem_type;
struct elem_type
{
elem_type * next;
};
public:
size_t const workset;
size_t const cnt;
elem_type * const mem;
std::vector<char> cache_reset;
std::vector<duration_type> res;
public:
@@ -100,6 +105,7 @@ public:
: workset(_ws)
, cnt(workset/sizeof(elem_type))
, mem((elem_type*)malloc( workset ))
, cache_reset(L3CACHE)
{
}
~tests()
@@ -142,10 +148,57 @@ public:
void evict()
{
for ( elem_type * p = mem, * const pe = mem+cnt; p < pe; ++p )
*p = 0;
std::fill(cache_reset.begin(),cache_reset.end(),0);
}
//
// Make a cyclic list of length c and step s. In fact if we iterate
// through this list we read all elements of p with step s. Once we
// reach the end of the p we jump to the beginning and continue read
// the next path till the end. This continues until all elements of
// p are read. And then the whole is beginning again.
//
void make_cycle( elem_type * const p, const size_t c, const size_t s )
{
elem_type * const pe = p+c;
elem_type * h = p;
for ( size_t i = 0; i < s; ++i )
for ( elem_type * q = p+i; q < pe; q+=s )
h = h->next = q;
h->next = p;
}
//
// Print indices of list.
//
void test_print_make_cycle( elem_type * const p, const size_t c, const size_t s )
{
std::cout << "list lenght=" << c << " step=" << s << std::endl;
make_cycle( mem, c, s );
elem_type * h = p;
int l = 0;
while ( h->next != p )
{
std::cout << "[" << std::setw(2) << h->next - p << "] ";
if (!(++l%=15))
std::cout << std::endl;
h = h->next;
}
std::cout << "[" << std::setw(2) << h->next - p << "]"
<< std::endl << std::endl;
}
void test_make_cycle()
{
test_print_make_cycle( mem, 16, 1 );
test_print_make_cycle( mem, 16, 2 );
test_print_make_cycle( mem, 16, 4 );
test_print_make_cycle( mem, 32, 4 );
test_print_make_cycle( mem, 16, 8 );
test_print_make_cycle( mem, 16, 16 );
test_print_make_cycle( mem, 16, 17 );
test_print_make_cycle( mem, 1, 17 );
test_print_make_cycle( mem, 1, 1 );
}
//
// Calculate average of tests duration and the deviation.
//
@@ -170,21 +223,20 @@ public:
return std::make_pair(avrg,dev);
}
#ifdef _MSC_VER
__declspec(noinline)
duration_type scan_read( elem_type const * const p, const size_t c, const size_t s )
#endif
duration_type scan_read( elem_type const * const p, size_t cnt )
{
auto b = std::chrono::high_resolution_clock::now();
//
register elem_type sum = 0;
elem_type const * const qe = p+c;
for ( int r =0, re=cnt/c; r < re; ++r )
for ( size_t i = 0; i < s; ++i )
for ( elem_type const * q = p+i; q < qe; q+=s )
sum = *q;
register elem_type const * h = p;
for ( size_t c = cnt; c; --c )
h = h->next;
//
// Prevents optimisation of the loop.
//
volatile elem_type no_optimization = sum;
volatile static elem_type const * no_optimization = h;
//
auto e = std::chrono::high_resolution_clock::now();
return std::chrono::nanoseconds(e-b).count();
@@ -204,11 +256,12 @@ public:
for ( ; c <= cnt; c <<=1 )
{
print_param( c*sizeof(elem_type) );
make_cycle( mem, c, s );
for ( int t = 0; t < trys; ++t )
{
evict();
test[t] = scan_read( mem, c, s );
test[t] = scan_read( mem, cnt );
print_time( test[t] );
}
@@ -240,11 +293,8 @@ public:
//
// Print results.
//
std::cout << std::log(double(se))/std::log(2.) << " ";
size_t const je = std::log(double(se))/std::log(2.)+1;
size_t const ie = res.size()/je;
std::cout << res.size() << " " << je << " " << ie << std::endl;
for ( size_t i = 0, c = 8; i < ie; ++i, c <<=1 )
{
std::cout << std::setw(10) << c*sizeof(elem_type);
@@ -261,6 +311,7 @@ int main ( void )
{try{
tests t;
//t.test_make_cycle();
t.benchmark_read_time();
return 0;