Memory benchmark through list to reduce overhead of surrounding codes.
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
/* Check cf5-opt.vim defs.
|
||||
VIM: let g:lcppflags="-std=c++11 -O2 -pthread"
|
||||
VIM: let g:wcppflags="/O2 /EHsc /DWIN32"
|
||||
VIM: let g:wcppflags="/Z7 /O2 /EHsc /DWIN32"
|
||||
VIM: let g:cppflags=g:Iboost
|
||||
VIM-: let g:wldflags=/DEBUG
|
||||
VIM: let g:ldflags=g:Lboost
|
||||
@@ -83,16 +83,21 @@ void test_steps( F f , char * p, const size_t c, char * nm )
|
||||
#endif
|
||||
|
||||
static const size_t GB = 1024L*1024L*1024L;
|
||||
static const size_t L3CACHE = 4*1024*1024;
|
||||
|
||||
class tests
|
||||
{
|
||||
public:
|
||||
typedef long long duration_type;
|
||||
typedef void * elem_type;
|
||||
struct elem_type
|
||||
{
|
||||
elem_type * next;
|
||||
};
|
||||
public:
|
||||
size_t const workset;
|
||||
size_t const cnt;
|
||||
elem_type * const mem;
|
||||
std::vector<char> cache_reset;
|
||||
std::vector<duration_type> res;
|
||||
|
||||
public:
|
||||
@@ -100,6 +105,7 @@ public:
|
||||
: workset(_ws)
|
||||
, cnt(workset/sizeof(elem_type))
|
||||
, mem((elem_type*)malloc( workset ))
|
||||
, cache_reset(L3CACHE)
|
||||
{
|
||||
}
|
||||
~tests()
|
||||
@@ -142,10 +148,57 @@ public:
|
||||
|
||||
void evict()
|
||||
{
|
||||
for ( elem_type * p = mem, * const pe = mem+cnt; p < pe; ++p )
|
||||
*p = 0;
|
||||
std::fill(cache_reset.begin(),cache_reset.end(),0);
|
||||
}
|
||||
|
||||
//
|
||||
// Make a cyclic list of length c and step s. In fact if we iterate
|
||||
// through this list we read all elements of p with step s. Once we
|
||||
// reach the end of the p we jump to the beginning and continue read
|
||||
// the next path till the end. This continues until all elements of
|
||||
// p are read. And then the whole is beginning again.
|
||||
//
|
||||
void make_cycle( elem_type * const p, const size_t c, const size_t s )
|
||||
{
|
||||
elem_type * const pe = p+c;
|
||||
elem_type * h = p;
|
||||
for ( size_t i = 0; i < s; ++i )
|
||||
for ( elem_type * q = p+i; q < pe; q+=s )
|
||||
h = h->next = q;
|
||||
h->next = p;
|
||||
}
|
||||
//
|
||||
// Print indices of list.
|
||||
//
|
||||
void test_print_make_cycle( elem_type * const p, const size_t c, const size_t s )
|
||||
{
|
||||
std::cout << "list lenght=" << c << " step=" << s << std::endl;
|
||||
make_cycle( mem, c, s );
|
||||
|
||||
elem_type * h = p;
|
||||
int l = 0;
|
||||
while ( h->next != p )
|
||||
{
|
||||
std::cout << "[" << std::setw(2) << h->next - p << "] ";
|
||||
if (!(++l%=15))
|
||||
std::cout << std::endl;
|
||||
h = h->next;
|
||||
}
|
||||
std::cout << "[" << std::setw(2) << h->next - p << "]"
|
||||
<< std::endl << std::endl;
|
||||
}
|
||||
void test_make_cycle()
|
||||
{
|
||||
test_print_make_cycle( mem, 16, 1 );
|
||||
test_print_make_cycle( mem, 16, 2 );
|
||||
test_print_make_cycle( mem, 16, 4 );
|
||||
test_print_make_cycle( mem, 32, 4 );
|
||||
test_print_make_cycle( mem, 16, 8 );
|
||||
test_print_make_cycle( mem, 16, 16 );
|
||||
test_print_make_cycle( mem, 16, 17 );
|
||||
test_print_make_cycle( mem, 1, 17 );
|
||||
test_print_make_cycle( mem, 1, 1 );
|
||||
}
|
||||
|
||||
//
|
||||
// Calculate average of tests duration and the deviation.
|
||||
//
|
||||
@@ -170,21 +223,20 @@ public:
|
||||
return std::make_pair(avrg,dev);
|
||||
}
|
||||
|
||||
#ifdef _MSC_VER
|
||||
__declspec(noinline)
|
||||
duration_type scan_read( elem_type const * const p, const size_t c, const size_t s )
|
||||
#endif
|
||||
duration_type scan_read( elem_type const * const p, size_t cnt )
|
||||
{
|
||||
auto b = std::chrono::high_resolution_clock::now();
|
||||
//
|
||||
register elem_type sum = 0;
|
||||
elem_type const * const qe = p+c;
|
||||
for ( int r =0, re=cnt/c; r < re; ++r )
|
||||
for ( size_t i = 0; i < s; ++i )
|
||||
for ( elem_type const * q = p+i; q < qe; q+=s )
|
||||
sum = *q;
|
||||
register elem_type const * h = p;
|
||||
for ( size_t c = cnt; c; --c )
|
||||
h = h->next;
|
||||
//
|
||||
// Prevents optimisation of the loop.
|
||||
//
|
||||
volatile elem_type no_optimization = sum;
|
||||
volatile static elem_type const * no_optimization = h;
|
||||
//
|
||||
auto e = std::chrono::high_resolution_clock::now();
|
||||
return std::chrono::nanoseconds(e-b).count();
|
||||
@@ -204,11 +256,12 @@ public:
|
||||
for ( ; c <= cnt; c <<=1 )
|
||||
{
|
||||
print_param( c*sizeof(elem_type) );
|
||||
make_cycle( mem, c, s );
|
||||
|
||||
for ( int t = 0; t < trys; ++t )
|
||||
{
|
||||
evict();
|
||||
test[t] = scan_read( mem, c, s );
|
||||
test[t] = scan_read( mem, cnt );
|
||||
print_time( test[t] );
|
||||
}
|
||||
|
||||
@@ -240,11 +293,8 @@ public:
|
||||
//
|
||||
// Print results.
|
||||
//
|
||||
std::cout << std::log(double(se))/std::log(2.) << " ";
|
||||
|
||||
size_t const je = std::log(double(se))/std::log(2.)+1;
|
||||
size_t const ie = res.size()/je;
|
||||
std::cout << res.size() << " " << je << " " << ie << std::endl;
|
||||
for ( size_t i = 0, c = 8; i < ie; ++i, c <<=1 )
|
||||
{
|
||||
std::cout << std::setw(10) << c*sizeof(elem_type);
|
||||
@@ -261,6 +311,7 @@ int main ( void )
|
||||
{try{
|
||||
|
||||
tests t;
|
||||
//t.test_make_cycle();
|
||||
t.benchmark_read_time();
|
||||
|
||||
return 0;
|
||||
|
||||
Reference in New Issue
Block a user