From d45f13625206ab20fcaabff5f56fe5125fa042d8 Mon Sep 17 00:00:00 2001 From: Vahagn Khachatryan Date: Tue, 9 Jul 2013 14:52:22 +0400 Subject: [PATCH] Memory benchmark through list to reduce overhead of surrounding codes. --- memory_benchmark.cpp | 85 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 68 insertions(+), 17 deletions(-) diff --git a/memory_benchmark.cpp b/memory_benchmark.cpp index 71a065e..88b08a9 100644 --- a/memory_benchmark.cpp +++ b/memory_benchmark.cpp @@ -1,6 +1,6 @@ /* Check cf5-opt.vim defs. VIM: let g:lcppflags="-std=c++11 -O2 -pthread" -VIM: let g:wcppflags="/O2 /EHsc /DWIN32" +VIM: let g:wcppflags="/Z7 /O2 /EHsc /DWIN32" VIM: let g:cppflags=g:Iboost VIM-: let g:wldflags=/DEBUG VIM: let g:ldflags=g:Lboost @@ -83,16 +83,21 @@ void test_steps( F f , char * p, const size_t c, char * nm ) #endif static const size_t GB = 1024L*1024L*1024L; +static const size_t L3CACHE = 4*1024*1024; class tests { public: typedef long long duration_type; - typedef void * elem_type; + struct elem_type + { + elem_type * next; + }; public: size_t const workset; size_t const cnt; elem_type * const mem; + std::vector cache_reset; std::vector res; public: @@ -100,6 +105,7 @@ public: : workset(_ws) , cnt(workset/sizeof(elem_type)) , mem((elem_type*)malloc( workset )) + , cache_reset(L3CACHE) { } ~tests() @@ -142,10 +148,57 @@ public: void evict() { - for ( elem_type * p = mem, * const pe = mem+cnt; p < pe; ++p ) - *p = 0; + std::fill(cache_reset.begin(),cache_reset.end(),0); + } + + // + // Make a cyclic list of length c and step s. In fact if we iterate + // through this list we read all elements of p with step s. Once we + // reach the end of the p we jump to the beginning and continue read + // the next path till the end. This continues until all elements of + // p are read. And then the whole is beginning again. + // + void make_cycle( elem_type * const p, const size_t c, const size_t s ) + { + elem_type * const pe = p+c; + elem_type * h = p; + for ( size_t i = 0; i < s; ++i ) + for ( elem_type * q = p+i; q < pe; q+=s ) + h = h->next = q; + h->next = p; + } + // + // Print indices of list. + // + void test_print_make_cycle( elem_type * const p, const size_t c, const size_t s ) + { + std::cout << "list lenght=" << c << " step=" << s << std::endl; + make_cycle( mem, c, s ); + + elem_type * h = p; + int l = 0; + while ( h->next != p ) + { + std::cout << "[" << std::setw(2) << h->next - p << "] "; + if (!(++l%=15)) + std::cout << std::endl; + h = h->next; + } + std::cout << "[" << std::setw(2) << h->next - p << "]" + << std::endl << std::endl; + } + void test_make_cycle() + { + test_print_make_cycle( mem, 16, 1 ); + test_print_make_cycle( mem, 16, 2 ); + test_print_make_cycle( mem, 16, 4 ); + test_print_make_cycle( mem, 32, 4 ); + test_print_make_cycle( mem, 16, 8 ); + test_print_make_cycle( mem, 16, 16 ); + test_print_make_cycle( mem, 16, 17 ); + test_print_make_cycle( mem, 1, 17 ); + test_print_make_cycle( mem, 1, 1 ); } - // // Calculate average of tests duration and the deviation. // @@ -170,21 +223,20 @@ public: return std::make_pair(avrg,dev); } +#ifdef _MSC_VER __declspec(noinline) - duration_type scan_read( elem_type const * const p, const size_t c, const size_t s ) +#endif + duration_type scan_read( elem_type const * const p, size_t cnt ) { auto b = std::chrono::high_resolution_clock::now(); // - register elem_type sum = 0; - elem_type const * const qe = p+c; - for ( int r =0, re=cnt/c; r < re; ++r ) - for ( size_t i = 0; i < s; ++i ) - for ( elem_type const * q = p+i; q < qe; q+=s ) - sum = *q; + register elem_type const * h = p; + for ( size_t c = cnt; c; --c ) + h = h->next; // // Prevents optimisation of the loop. // - volatile elem_type no_optimization = sum; + volatile static elem_type const * no_optimization = h; // auto e = std::chrono::high_resolution_clock::now(); return std::chrono::nanoseconds(e-b).count(); @@ -204,11 +256,12 @@ public: for ( ; c <= cnt; c <<=1 ) { print_param( c*sizeof(elem_type) ); + make_cycle( mem, c, s ); for ( int t = 0; t < trys; ++t ) { evict(); - test[t] = scan_read( mem, c, s ); + test[t] = scan_read( mem, cnt ); print_time( test[t] ); } @@ -240,11 +293,8 @@ public: // // Print results. // - std::cout << std::log(double(se))/std::log(2.) << " "; - size_t const je = std::log(double(se))/std::log(2.)+1; size_t const ie = res.size()/je; - std::cout << res.size() << " " << je << " " << ie << std::endl; for ( size_t i = 0, c = 8; i < ie; ++i, c <<=1 ) { std::cout << std::setw(10) << c*sizeof(elem_type); @@ -261,6 +311,7 @@ int main ( void ) {try{ tests t; + //t.test_make_cycle(); t.benchmark_read_time(); return 0;