333 lines
7.7 KiB
C++
333 lines
7.7 KiB
C++
/* Check cf5-opt.vim defs.
|
|
VIM: let g:lcppflags="-std=c++11 -O2 -pthread"
|
|
VIM: let g:wcppflags="/Z7 /O2 /EHsc /DWIN32"
|
|
VIM: let g:cppflags=g:Iboost
|
|
VIM-: let g:wldflags=/DEBUG
|
|
VIM: let g:ldflags=g:Lboost
|
|
VIM: let g:ldlibpath=g:Bboost
|
|
VIM: let g:argv=""
|
|
VIM-: let g:cf5output=0
|
|
*/
|
|
#include <sstream>
|
|
#include <iostream>
|
|
#include <iomanip>
|
|
#include <exception>
|
|
#include <chrono>
|
|
#include <algorithm>
|
|
#include <vector>
|
|
#include <utility>
|
|
#include <cmath>
|
|
|
|
#if 0
|
|
typedef long long duration_type;
|
|
static const size_t ce = 1024L*1024L*1024L;
|
|
|
|
duration_type scan_read( char * p, const size_t c, const size_t s )
|
|
{
|
|
auto b = std::chrono::high_resolution_clock::now();
|
|
|
|
int sum = 0;
|
|
char * qe = p+c;
|
|
for ( int n =0, ne=ce/c; n < ne; ++n )
|
|
for ( size_t i = 0; i < s; ++i )
|
|
for ( char * q = p+i; q < qe; q+=s )
|
|
sum += *q;
|
|
volatile int no_optimization = sum;
|
|
auto e = std::chrono::high_resolution_clock::now();
|
|
return std::chrono::nanoseconds(e-b).count();
|
|
}
|
|
|
|
duration_type scan_write( char * p, const size_t c, const size_t s )
|
|
{
|
|
int a = rand();
|
|
|
|
auto b = std::chrono::high_resolution_clock::now();
|
|
|
|
char * qe = p+c;
|
|
for ( int n =0, ne=ce/c; n < ne; ++n )
|
|
for ( size_t i = 0; i < s; ++i )
|
|
for ( char * q = p+i; q < qe; q+=s )
|
|
*q = a;
|
|
|
|
auto e = std::chrono::high_resolution_clock::now();
|
|
return std::chrono::nanoseconds(e-b).count();
|
|
}
|
|
|
|
template <class F>
|
|
void test_steps( F f , char * p, const size_t c, char * nm )
|
|
{
|
|
f( p, c, 1 ); // First run ignore.
|
|
|
|
std::cout << std::setw(10) << "steps";
|
|
std::cout << std::setw(16) << nm;
|
|
std::cout << std::setw(16) << nm;
|
|
std::cout << std::setw(16) << nm;
|
|
std::cout << std::setw(16) << "average";
|
|
std::cout << std::setw(16) << "deviation" << std::endl;
|
|
|
|
for ( size_t s = 1; s < c; s<<=1 )
|
|
{
|
|
std::cout << std::setw(10) << s << std::flush;
|
|
auto d1 = f( p, c, s );
|
|
std::cout << std::setw(16) << d1 << std::flush;
|
|
auto d2 = f( p, c, s );
|
|
std::cout << std::setw(16) << d2 << std::flush;
|
|
auto d3 = f( p, c, s );
|
|
std::cout << std::setw(16) << d3 << std::flush;
|
|
auto a = (d1+d2+d3)/3;
|
|
std::cout << std::setw(16) << a;
|
|
auto dev = (std::abs(d1-a)+std::abs(d2-a)+std::abs(d3-a))/3;
|
|
std::cout << std::setw(16) << dev << std::endl;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
static const size_t GB = 1024L*1024L*1024L;
|
|
static const size_t MB = 1024L*1024L;
|
|
static const size_t L3CACHE = 20*MB;
|
|
|
|
class tests
|
|
{
|
|
public:
|
|
typedef long long duration_type;
|
|
struct elem_type
|
|
{
|
|
elem_type * next;
|
|
};
|
|
public:
|
|
size_t const workset;
|
|
size_t const cnt;
|
|
elem_type * const mem;
|
|
std::vector<char> cache_reset;
|
|
std::vector<duration_type> res;
|
|
|
|
public:
|
|
tests( size_t _ws = 4*GB )
|
|
: workset(_ws)
|
|
, cnt(workset/sizeof(elem_type))
|
|
, mem((elem_type*)malloc( workset ))
|
|
, cache_reset(L3CACHE)
|
|
{
|
|
}
|
|
~tests()
|
|
{
|
|
free(mem);
|
|
}
|
|
|
|
public:
|
|
|
|
void print_hdr( const char * title,
|
|
const char * param,
|
|
const char * action )
|
|
{
|
|
std::cout << std::endl << title << std::endl
|
|
<< std::setfill('-') << std::setw(90) << "" << std::endl
|
|
<< std::setfill(' ')
|
|
<< std::setw(10) << param
|
|
<< std::setw(14) << action << "#1"
|
|
<< std::setw(14) << action << "#2"
|
|
<< std::setw(14) << action << "#3"
|
|
<< std::setw(16) << "average"
|
|
<< std::setw(16) << "deviation" << std::endl;
|
|
}
|
|
|
|
void print_param( size_t param )
|
|
{
|
|
std::cout << std::setw(10) << param << std::flush;
|
|
}
|
|
|
|
void print_time( duration_type d )
|
|
{
|
|
std::cout << std::setw(16) << d << std::flush;
|
|
}
|
|
|
|
void print_avrg( duration_type avrg, duration_type dev )
|
|
{
|
|
std::cout << std::setw(16) << avrg;
|
|
std::cout << std::setw(16) << dev << std::endl;
|
|
}
|
|
|
|
void evict()
|
|
{
|
|
std::fill(cache_reset.begin(),cache_reset.end(),0);
|
|
}
|
|
|
|
//
|
|
// Make a cyclic list of length c and step s. In fact if we iterate
|
|
// through this list we read all elements of p with step s. Once we
|
|
// reach the end of the p we jump to the beginning and continue read
|
|
// the next path till the end. This continues until all elements of
|
|
// p are read. And then the whole is beginning again.
|
|
//
|
|
void make_cycle( elem_type * const p, const size_t c, const size_t s )
|
|
{
|
|
elem_type * const pe = p+c;
|
|
elem_type * h = p;
|
|
for ( size_t i = 0; i < s; ++i )
|
|
for ( elem_type * q = p+i; q < pe; q+=s )
|
|
h = h->next = q;
|
|
h->next = p;
|
|
}
|
|
//
|
|
// Print indices of list.
|
|
//
|
|
void test_print_make_cycle( elem_type * const p, const size_t c, const size_t s )
|
|
{
|
|
std::cout << "list lenght=" << c << " step=" << s << std::endl;
|
|
make_cycle( mem, c, s );
|
|
|
|
elem_type * h = p;
|
|
int l = 0;
|
|
while ( h->next != p )
|
|
{
|
|
std::cout << "[" << std::setw(2) << h->next - p << "] ";
|
|
if (!(++l%=15))
|
|
std::cout << std::endl;
|
|
h = h->next;
|
|
}
|
|
std::cout << "[" << std::setw(2) << h->next - p << "]"
|
|
<< std::endl << std::endl;
|
|
}
|
|
void test_make_cycle()
|
|
{
|
|
test_print_make_cycle( mem, 16, 1 );
|
|
test_print_make_cycle( mem, 16, 2 );
|
|
test_print_make_cycle( mem, 16, 4 );
|
|
test_print_make_cycle( mem, 32, 4 );
|
|
test_print_make_cycle( mem, 16, 8 );
|
|
test_print_make_cycle( mem, 16, 16 );
|
|
test_print_make_cycle( mem, 16, 17 );
|
|
test_print_make_cycle( mem, 1, 17 );
|
|
test_print_make_cycle( mem, 1, 1 );
|
|
}
|
|
//
|
|
// Calculate average of tests duration and the deviation.
|
|
//
|
|
std::pair<duration_type,duration_type>
|
|
average( const duration_type * test, const size_t count )
|
|
{
|
|
//
|
|
// Calc average duration.
|
|
//
|
|
duration_type avrg = 0;
|
|
for ( int i = 0; i < count; ++i )
|
|
avrg += test[i];
|
|
avrg /= count;
|
|
//
|
|
// Calc deviation from average duration.
|
|
//
|
|
duration_type dev = 0;
|
|
for ( int i = 0; i < count; ++i )
|
|
dev += std::abs( avrg - test[i] );
|
|
dev /= count;
|
|
//
|
|
return std::make_pair(avrg,dev);
|
|
}
|
|
|
|
#ifdef _MSC_VER
|
|
__declspec(noinline)
|
|
#endif
|
|
duration_type scan_read( elem_type const * const p, size_t cnt )
|
|
{
|
|
auto b = std::chrono::high_resolution_clock::now();
|
|
//
|
|
register elem_type const * h = p;
|
|
for ( size_t c = cnt; c; --c )
|
|
h = h->next;
|
|
//
|
|
// Prevents optimisation of the loop.
|
|
//
|
|
volatile static elem_type const * no_optimization = h;
|
|
//
|
|
auto e = std::chrono::high_resolution_clock::now();
|
|
return std::chrono::nanoseconds(e-b).count();
|
|
}
|
|
|
|
void benchmark_read_time(int s)
|
|
{
|
|
std::stringstream ss;
|
|
ss << "Memory continuous read. Step=" << s*sizeof(elem_type);
|
|
print_hdr( ss.str().c_str(), "wset", "read" );
|
|
|
|
const int trys = 3;
|
|
duration_type test[trys];
|
|
size_t c = 8;
|
|
for ( ; c < s; c <<=1 )
|
|
res.push_back( -1 );
|
|
for ( ; c <= cnt; c <<=1 )
|
|
{
|
|
print_param( c*sizeof(elem_type) );
|
|
make_cycle( mem, c, s );
|
|
|
|
for ( int t = 0; t < trys; ++t )
|
|
{
|
|
evict();
|
|
test[t] = scan_read( mem, cnt );
|
|
print_time( test[t] );
|
|
}
|
|
|
|
auto a = average( test, trys );
|
|
res.push_back( a.first );
|
|
print_avrg( a.first, a.second );
|
|
}
|
|
}
|
|
|
|
void benchmark_read_time()
|
|
{
|
|
size_t const se = cnt;
|
|
for ( size_t s = 1; s <= se; s <<=1 )
|
|
benchmark_read_time(s);
|
|
//
|
|
// Print header.
|
|
//
|
|
std::cout << std::endl << "Read time test." << std::endl
|
|
<< std::setfill('-') << std::setw(90) << "" << std::endl
|
|
<< std::setfill(' ')
|
|
<< std::setw(10) << "wset";
|
|
for ( size_t s = 1; s <= se; s <<=1 )
|
|
{
|
|
std::stringstream ss;
|
|
ss << "s" << s*sizeof(elem_type);
|
|
std::cout << std::setw(16) << ss.str();
|
|
}
|
|
std::cout << std::endl;
|
|
//
|
|
// Print results.
|
|
//
|
|
size_t const je = std::log(double(se))/std::log(2.)+1;
|
|
size_t const ie = res.size()/je;
|
|
for ( size_t i = 0, c = 8; i < ie; ++i, c <<=1 )
|
|
{
|
|
std::cout << std::setw(10) << c*sizeof(elem_type);
|
|
for ( size_t j = i; j < res.size(); j+=ie )
|
|
std::cout << std::setw(16) << res[j];
|
|
std::cout << std::endl;
|
|
}
|
|
}
|
|
|
|
};
|
|
|
|
|
|
int main ( void )
|
|
{try{
|
|
|
|
tests t;
|
|
//t.test_make_cycle();
|
|
t.benchmark_read_time();
|
|
|
|
return 0;
|
|
}
|
|
catch ( const std::exception& e )
|
|
{
|
|
std::cerr << std::endl
|
|
<< "std::exception(\"" << e.what() << "\")." << std::endl;
|
|
return 2;
|
|
}
|
|
catch ( ... )
|
|
{
|
|
std::cerr << std::endl
|
|
<< "unknown exception." << std::endl;
|
|
return 1;
|
|
}}
|
|
|