/* Check cf5-opt.vim defs. VIM: let g:lcppflags="-std=c++11 -O2 -pthread" VIM: let g:wcppflags="/O2 /EHsc /DWIN32" VIM: let g:cppflags=g:Iboost VIM-: let g:wldflags=/DEBUG VIM: let g:ldflags=g:Lboost VIM: let g:ldlibpath=g:Bboost VIM: let g:argv="" VIM-: let g:cf5output=0 */ #include #include #include #include #include #include #include #include #include #if 0 typedef long long duration_type; static const size_t ce = 1024L*1024L*1024L; duration_type scan_read( char * p, const size_t c, const size_t s ) { auto b = std::chrono::high_resolution_clock::now(); int sum = 0; char * qe = p+c; for ( int n =0, ne=ce/c; n < ne; ++n ) for ( size_t i = 0; i < s; ++i ) for ( char * q = p+i; q < qe; q+=s ) sum += *q; volatile int no_optimization = sum; auto e = std::chrono::high_resolution_clock::now(); return std::chrono::nanoseconds(e-b).count(); } duration_type scan_write( char * p, const size_t c, const size_t s ) { int a = rand(); auto b = std::chrono::high_resolution_clock::now(); char * qe = p+c; for ( int n =0, ne=ce/c; n < ne; ++n ) for ( size_t i = 0; i < s; ++i ) for ( char * q = p+i; q < qe; q+=s ) *q = a; auto e = std::chrono::high_resolution_clock::now(); return std::chrono::nanoseconds(e-b).count(); } template void test_steps( F f , char * p, const size_t c, char * nm ) { f( p, c, 1 ); // First run ignore. std::cout << std::setw(10) << "steps"; std::cout << std::setw(16) << nm; std::cout << std::setw(16) << nm; std::cout << std::setw(16) << nm; std::cout << std::setw(16) << "average"; std::cout << std::setw(16) << "deviation" << std::endl; for ( size_t s = 1; s < c; s<<=1 ) { std::cout << std::setw(10) << s << std::flush; auto d1 = f( p, c, s ); std::cout << std::setw(16) << d1 << std::flush; auto d2 = f( p, c, s ); std::cout << std::setw(16) << d2 << std::flush; auto d3 = f( p, c, s ); std::cout << std::setw(16) << d3 << std::flush; auto a = (d1+d2+d3)/3; std::cout << std::setw(16) << a; auto dev = (std::abs(d1-a)+std::abs(d2-a)+std::abs(d3-a))/3; std::cout << std::setw(16) << dev << std::endl; } } #endif static const size_t GB = 1024L*1024L*1024L; class tests { public: typedef long long duration_type; typedef void * elem_type; public: size_t const workset; size_t const cnt; elem_type * const mem; std::vector res; public: tests( size_t _ws = GB ) : workset(_ws) , cnt(workset/sizeof(elem_type)) , mem((elem_type*)malloc( workset )) { } ~tests() { free(mem); } public: void print_hdr( const char * title, const char * param, const char * action ) { std::cout << std::endl << title << std::endl << std::setfill('-') << std::setw(90) << "" << std::endl << std::setfill(' ') << std::setw(10) << param << std::setw(14) << action << "#1" << std::setw(14) << action << "#2" << std::setw(14) << action << "#3" << std::setw(16) << "average" << std::setw(16) << "deviation" << std::endl; } void print_param( size_t param ) { std::cout << std::setw(10) << param << std::flush; } void print_time( duration_type d ) { std::cout << std::setw(16) << d << std::flush; } void print_avrg( duration_type avrg, duration_type dev ) { std::cout << std::setw(16) << avrg; std::cout << std::setw(16) << dev << std::endl; } void evict() { for ( elem_type * p = mem, * const pe = mem+cnt; p < pe; ++p ) *p = 0; } // // Calculate average of tests duration and the deviation. // std::pair average( const duration_type * test, const size_t count ) { // // Calc average duration. // duration_type avrg = 0; for ( int i = 0; i < count; ++i ) avrg += test[i]; avrg /= count; // // Calc deviation from average duration. // duration_type dev = 0; for ( int i = 0; i < count; ++i ) dev += std::abs( avrg - test[i] ); dev /= count; // return std::make_pair(avrg,dev); } __declspec(noinline) duration_type scan_read( elem_type const * const p, const size_t c, const size_t s ) { auto b = std::chrono::high_resolution_clock::now(); // register elem_type sum = 0; elem_type const * const qe = p+c; for ( int r =0, re=cnt/c; r < re; ++r ) for ( size_t i = 0; i < s; ++i ) for ( elem_type const * q = p+i; q < qe; q+=s ) sum = *q; // // Prevents optimisation of the loop. // volatile elem_type no_optimization = sum; // auto e = std::chrono::high_resolution_clock::now(); return std::chrono::nanoseconds(e-b).count(); } void benchmark_read_time(int s) { std::stringstream ss; ss << "Memory continuous read. Step=" << s*sizeof(elem_type); print_hdr( ss.str().c_str(), "wset", "read" ); const int trys = 3; duration_type test[trys]; size_t c = 8; for ( ; c < s; c <<=1 ) res.push_back( -1 ); for ( ; c <= cnt; c <<=1 ) { print_param( c*sizeof(elem_type) ); for ( int t = 0; t < trys; ++t ) { evict(); test[t] = scan_read( mem, c, s ); print_time( test[t] ); } auto a = average( test, trys ); res.push_back( a.first ); print_avrg( a.first, a.second ); } } void benchmark_read_time() { size_t const se = cnt; for ( size_t s = 1; s <= se; s <<=1 ) benchmark_read_time(s); // // Print header. // std::cout << std::endl << "Read time test." << std::endl << std::setfill('-') << std::setw(90) << "" << std::endl << std::setfill(' ') << std::setw(10) << "wset"; for ( size_t s = 1; s <= se; s <<=1 ) { std::stringstream ss; ss << "s" << s*sizeof(elem_type); std::cout << std::setw(16) << ss.str(); } std::cout << std::endl; // // Print results. // std::cout << std::log(double(se))/std::log(2.) << " "; size_t const je = std::log(double(se))/std::log(2.)+1; size_t const ie = res.size()/je; std::cout << res.size() << " " << je << " " << ie << std::endl; for ( size_t i = 0, c = 8; i < ie; ++i, c <<=1 ) { std::cout << std::setw(10) << c*sizeof(elem_type); for ( size_t j = i; j < res.size(); j+=ie ) std::cout << std::setw(16) << res[j]; std::cout << std::endl; } } }; int main ( void ) {try{ tests t; t.benchmark_read_time(); return 0; } catch ( const std::exception& e ) { std::cerr << std::endl << "std::exception(\"" << e.what() << "\")." << std::endl; return 2; } catch ( ... ) { std::cerr << std::endl << "unknown exception." << std::endl; return 1; }}