/* Check cf5-opt.vim defs. VIM: let g:lcppflags="-std=c++11 -O2 -pthread" VIM: let g:wcppflags="/Z7 /O2 /EHsc /DWIN32" VIM: let g:cppflags=g:Iboost VIM-: let g:wldflags=/DEBUG VIM: let g:ldflags=g:Lboost VIM: let g:ldlibpath=g:Bboost VIM: let g:argv="" VIM-: let g:cf5output=0 */ #include #include #include #include #include #include #include #include #include #if 0 typedef long long duration_type; static const size_t ce = 1024L*1024L*1024L; duration_type scan_read( char * p, const size_t c, const size_t s ) { auto b = std::chrono::high_resolution_clock::now(); int sum = 0; char * qe = p+c; for ( int n =0, ne=ce/c; n < ne; ++n ) for ( size_t i = 0; i < s; ++i ) for ( char * q = p+i; q < qe; q+=s ) sum += *q; volatile int no_optimization = sum; auto e = std::chrono::high_resolution_clock::now(); return std::chrono::nanoseconds(e-b).count(); } duration_type scan_write( char * p, const size_t c, const size_t s ) { int a = rand(); auto b = std::chrono::high_resolution_clock::now(); char * qe = p+c; for ( int n =0, ne=ce/c; n < ne; ++n ) for ( size_t i = 0; i < s; ++i ) for ( char * q = p+i; q < qe; q+=s ) *q = a; auto e = std::chrono::high_resolution_clock::now(); return std::chrono::nanoseconds(e-b).count(); } template void test_steps( F f , char * p, const size_t c, char * nm ) { f( p, c, 1 ); // First run ignore. std::cout << std::setw(10) << "steps"; std::cout << std::setw(16) << nm; std::cout << std::setw(16) << nm; std::cout << std::setw(16) << nm; std::cout << std::setw(16) << "average"; std::cout << std::setw(16) << "deviation" << std::endl; for ( size_t s = 1; s < c; s<<=1 ) { std::cout << std::setw(10) << s << std::flush; auto d1 = f( p, c, s ); std::cout << std::setw(16) << d1 << std::flush; auto d2 = f( p, c, s ); std::cout << std::setw(16) << d2 << std::flush; auto d3 = f( p, c, s ); std::cout << std::setw(16) << d3 << std::flush; auto a = (d1+d2+d3)/3; std::cout << std::setw(16) << a; auto dev = (std::abs(d1-a)+std::abs(d2-a)+std::abs(d3-a))/3; std::cout << std::setw(16) << dev << std::endl; } } #endif static const size_t GB = 1024L*1024L*1024L; static const size_t MB = 1024L*1024L; static const size_t L3CACHE = 20*MB; class tests { public: typedef long long duration_type; struct elem_type { elem_type * next; }; public: size_t const workset; size_t const cnt; elem_type * const mem; std::vector cache_reset; std::vector res; public: tests( size_t _ws = 4*GB ) : workset(_ws) , cnt(workset/sizeof(elem_type)) , mem((elem_type*)malloc( workset )) , cache_reset(L3CACHE) { } ~tests() { free(mem); } public: void print_hdr( const char * title, const char * param, const char * action ) { std::cout << std::endl << title << std::endl << std::setfill('-') << std::setw(90) << "" << std::endl << std::setfill(' ') << std::setw(10) << param << std::setw(14) << action << "#1" << std::setw(14) << action << "#2" << std::setw(14) << action << "#3" << std::setw(16) << "average" << std::setw(16) << "deviation" << std::endl; } void print_param( size_t param ) { std::cout << std::setw(10) << param << std::flush; } void print_time( duration_type d ) { std::cout << std::setw(16) << d << std::flush; } void print_avrg( duration_type avrg, duration_type dev ) { std::cout << std::setw(16) << avrg; std::cout << std::setw(16) << dev << std::endl; } void evict() { std::fill(cache_reset.begin(),cache_reset.end(),0); } // // Make a cyclic list of length c and step s. In fact if we iterate // through this list we read all elements of p with step s. Once we // reach the end of the p we jump to the beginning and continue read // the next path till the end. This continues until all elements of // p are read. And then the whole is beginning again. // void make_cycle( elem_type * const p, const size_t c, const size_t s ) { elem_type * const pe = p+c; elem_type * h = p; for ( size_t i = 0; i < s; ++i ) for ( elem_type * q = p+i; q < pe; q+=s ) h = h->next = q; h->next = p; } // // Print indices of list. // void test_print_make_cycle( elem_type * const p, const size_t c, const size_t s ) { std::cout << "list lenght=" << c << " step=" << s << std::endl; make_cycle( mem, c, s ); elem_type * h = p; int l = 0; while ( h->next != p ) { std::cout << "[" << std::setw(2) << h->next - p << "] "; if (!(++l%=15)) std::cout << std::endl; h = h->next; } std::cout << "[" << std::setw(2) << h->next - p << "]" << std::endl << std::endl; } void test_make_cycle() { test_print_make_cycle( mem, 16, 1 ); test_print_make_cycle( mem, 16, 2 ); test_print_make_cycle( mem, 16, 4 ); test_print_make_cycle( mem, 32, 4 ); test_print_make_cycle( mem, 16, 8 ); test_print_make_cycle( mem, 16, 16 ); test_print_make_cycle( mem, 16, 17 ); test_print_make_cycle( mem, 1, 17 ); test_print_make_cycle( mem, 1, 1 ); } // // Calculate average of tests duration and the deviation. // std::pair average( const duration_type * test, const size_t count ) { // // Calc average duration. // duration_type avrg = 0; for ( int i = 0; i < count; ++i ) avrg += test[i]; avrg /= count; // // Calc deviation from average duration. // duration_type dev = 0; for ( int i = 0; i < count; ++i ) dev += std::abs( avrg - test[i] ); dev /= count; // return std::make_pair(avrg,dev); } #ifdef _MSC_VER __declspec(noinline) #endif duration_type scan_read( elem_type const * const p, size_t cnt ) { auto b = std::chrono::high_resolution_clock::now(); // register elem_type const * h = p; for ( size_t c = cnt; c; --c ) h = h->next; // // Prevents optimisation of the loop. // volatile static elem_type const * no_optimization = h; // auto e = std::chrono::high_resolution_clock::now(); return std::chrono::nanoseconds(e-b).count(); } void benchmark_read_time(int s) { std::stringstream ss; ss << "Memory continuous read. Step=" << s*sizeof(elem_type); print_hdr( ss.str().c_str(), "wset", "read" ); const int trys = 3; duration_type test[trys]; size_t c = 8; for ( ; c < s; c <<=1 ) res.push_back( -1 ); for ( ; c <= cnt; c <<=1 ) { print_param( c*sizeof(elem_type) ); make_cycle( mem, c, s ); for ( int t = 0; t < trys; ++t ) { evict(); test[t] = scan_read( mem, cnt ); print_time( test[t] ); } auto a = average( test, trys ); res.push_back( a.first ); print_avrg( a.first, a.second ); } } void benchmark_read_time() { size_t const se = cnt; for ( size_t s = 1; s <= se; s <<=1 ) benchmark_read_time(s); // // Print header. // std::cout << std::endl << "Read time test." << std::endl << std::setfill('-') << std::setw(90) << "" << std::endl << std::setfill(' ') << std::setw(10) << "wset"; for ( size_t s = 1; s <= se; s <<=1 ) { std::stringstream ss; ss << "s" << s*sizeof(elem_type); std::cout << std::setw(16) << ss.str(); } std::cout << std::endl; // // Print results. // size_t const je = std::log(double(se))/std::log(2.)+1; size_t const ie = res.size()/je; for ( size_t i = 0, c = 8; i < ie; ++i, c <<=1 ) { std::cout << std::setw(10) << c*sizeof(elem_type); for ( size_t j = i; j < res.size(); j+=ie ) std::cout << std::setw(16) << res[j]; std::cout << std::endl; } } }; int main ( void ) {try{ tests t; //t.test_make_cycle(); t.benchmark_read_time(); return 0; } catch ( const std::exception& e ) { std::cerr << std::endl << "std::exception(\"" << e.what() << "\")." << std::endl; return 2; } catch ( ... ) { std::cerr << std::endl << "unknown exception." << std::endl; return 1; }}