Reorganizing some files.

This commit is contained in:
2015-02-08 23:37:56 +04:00
parent 7a53e5a3a1
commit 12af9d1b7f
22 changed files with 0 additions and 270 deletions

View File

@@ -0,0 +1,667 @@
/*
* SYNOPSYS CONFIDENTIAL - This is an unpublished, proprietary work of Synopsys,
* Inc., and is fully protected under copyright and trade secret laws. You may
* not view, use, disclose, copy, or distribute this file or any information
* contained herein except pursuant to a valid written license from Synopsys.
*/
//
// The purpose of this file is to define SSE2 data types to abstacr from the compiler
// specific constructs. Currently the target compilers are GCC and the MS VC 2005.
//
#ifndef _SSE2_CMPL_ABSTRACTION_MSC_PCKFLOAT_H_
#define _SSE2_CMPL_ABSTRACTION_MSC_PCKFLOAT_H_
//
// Namespace sse2
//
namespace sse2
{
//
/// class ps (packed single precision)
//
class ps
{
public:
//
/// The type.
//
typedef rxmm128s my_rxmm;
/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
//
/// Packed double arithmetic
//
/*!
r0 := a0 + b0
r1 := a1 + b1
r2 := a2 + b2
r3 := a3 + b3
*/
static inline rxmm128s add( rxmm128s a, rxmm128s b )
{
return _mm_add_ps( a, b );
}
/*!
r0 := a0 - b0
r1 := a1 - b1
r2 := a2 - b2
r3 := a3 - b3
*/
static inline rxmm128s sub( rxmm128s a, rxmm128s b )
{
return _mm_sub_ps( a, b );
}
/*!
r0 := a0 * b0
r1 := a1 * b1
r2 := a2 * b2
r3 := a3 * b3
*/
static inline rxmm128s mul( rxmm128s a, rxmm128s b )
{
return _mm_mul_ps( a, b );
}
/*!
r0 := a0 / b0
r1 := a1 / b1
r2 := a2 / b2
r3 := a3 / b3
*/
static inline rxmm128s div( rxmm128s a, rxmm128s b )
{
return _mm_div_ps( a, b );
}
/*!
r0 := max(a0, b0)
r1 := max(a1, b1)
r2 := max(a2, b2)
r3 := max(a3, b3)
*/
static inline rxmm128s max( rxmm128s a, rxmm128s b )
{
return _mm_max_ps( a, b );
}
/*!
r0 := min(a0, b0)
r1 := min(a1, b1)
r2 := min(a2, b2)
r3 := min(a3, b3)
*/
static inline rxmm128s min( rxmm128s a, rxmm128s b )
{
return _mm_min_ps( a, b );
}
/*!
r0 := sqrt(a0)
r1 := sqrt(a1)
r2 := sqrt(a2)
r3 := sqrt(a3)
*/
static inline rxmm128s sqrt( rxmm128s a )
{
return _mm_sqrt_ps( a );
}
/*!
r0 := recip(a0)
r1 := recip(a1)
r2 := recip(a2)
r3 := recip(a3)
*/
static inline rxmm128s rcp( rxmm128s a )
{
return _mm_rcp_ps( a );
}
/*!
r0 := recip(sqrt(a0))
r1 := recip(sqrt(a1))
r2 := recip(sqrt(a2))
r3 := recip(sqrt(a3))
*/
static inline rxmm128s rsqrt( rxmm128s a )
{
return _mm_rsqrt_ps( a );
}
/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
//
/// Packed double logic
//
/*!
r0 := ~a0 & b0
r1 := ~a1 & b1
r2 := ~a2 & b2
r3 := ~a3 & b3
*/
static inline rxmm128s andnot( rxmm128s a, rxmm128s b )
{
return _mm_andnot_ps( a, b );
}
/*!
r0 := a0 & b0
r1 := a1 & b1
*/
static inline XMM_TYPE and( rxmm128s a, rxmm128s b )
{
return _mm_and_ps( a, b );
}
/*!
r0 := a0 | b0
r1 := a1 | b1
*/
static inline XMM_TYPE or( rxmm128s a, rxmm128s b )
{
return _mm_or_ps( a, b );
}
/*!
r0 := a0 ^ b0
r1 := a1 ^ b1
r2 := a2 ^ b2
r3 := a3 ^ b3
*/
static inline XMM_TYPE xor( rxmm128s a, rxmm128s b )
{
return _mm_xor_ps( a, b );
}
/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
//
/// Packed double comparision
//
/*!
r0 := (a0 == b0) ? 0xffffffff : 0x0
r1 := (a1 == b1) ? 0xffffffff : 0x0
r2 := (a2 == b2) ? 0xffffffff : 0x0
r3 := (a3 == b3) ? 0xffffffff : 0x0
*/
static inline rxmm128s cmp_eq( rxmm128s a, rxmm128s b )
{
BOOST_STATIC_ASSERT( false );
return _mm_cmpeq_ps( a, b );
}
/*!
r0 := (a0 != b0) ? 0xffffffff : 0x0
r1 := (a1 != b1) ? 0xffffffff : 0x0
r2 := (a2 != b2) ? 0xffffffff : 0x0
r3 := (a3 != b3) ? 0xffffffff : 0x0
*/
static inline rxmm128s cmp_neq( rxmm128s a, rxmm128s b )
{
return _mm_cmpneq_ps( a, b );
}
/*!
r0 := (a0 < b0) ? 0xffffffff : 0x0
r1 := (a1 < b1) ? 0xffffffff : 0x0
r2 := (a2 < b2) ? 0xffffffff : 0x0
r3 := (a3 < b3) ? 0xffffffff : 0x0
*/
static inline rxmm128s cmp_lt( rxmm128s a, rxmm128s b )
{
return _mm_cmplt_ps( a, b );
}
/*!
r0 := (a0 <= b0) ? 0xffffffff : 0x0
r1 := (a1 <= b1) ? 0xffffffff : 0x0
r2 := (a2 <= b2) ? 0xffffffff : 0x0
r3 := (a3 <= b3) ? 0xffffffff : 0x0
*/
static inline rxmm128s cmp_le( rxmm128s a, rxmm128s b )
{
BOOST_STATIC_ASSERT( false );
return _mm_cmple_ps( a, b );
}
/*!
r0 := (a0 > b0) ? 0xffffffff : 0x0
r1 := (a1 > b1) ? 0xffffffff : 0x0
r2 := (a2 > b2) ? 0xffffffff : 0x0
r3 := (a3 > b3) ? 0xffffffff : 0x0
*/
static inline rxmm128s cmp_gt( rxmm128s a, rxmm128s b )
{
BOOST_STATIC_ASSERT( false );
return _mm_cmpgt_ps( a, b );
}
/*!
r0 := (a0 >= b0) ? 0xffffffff : 0x0
r1 := (a1 >= b1) ? 0xffffffff : 0x0
r2 := (a2 >= b2) ? 0xffffffff : 0x0
r3 := (a3 >= b3) ? 0xffffffff : 0x0
*/
static inline rxmm128s cmp_ge( rxmm128s a, rxmm128s b )
{
return _mm_cmpge_ps( a, b );
}
/*!
r0 := (a0 ord b0) ? 0xffffffff : 0x0
r1 := (a1 ord b1) ? 0xffffffff : 0x0
r2 := (a2 ord b2) ? 0xffffffff : 0x0
r3 := (a3 ord b3) ? 0xffffffff : 0x0
*/
static inline rxmm128s cmp_ord( rxmm128s a, rxmm128s b )
{
return _mm_cmpord_ps( a, b );
}
/*!
r0 := (a0 unord b0) ? 0xffffffff : 0x0
r1 := (a1 unord b1) ? 0xffffffff : 0x0
r2 := (a2 unord b2) ? 0xffffffff : 0x0
r3 := (a3 unord b3) ? 0xffffffff : 0x0
*/
static inline rxmm128s cmp_unord( rxmm128s a, rxmm128s b )
{
return _mm_cmpunord_ps( a, b );
}
/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
//
/// Packed double load
//
/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
// Misc
/*!
r0 := a2
r1 := b2
r2 := a3
r3 := b3
*/
static inline rxmm128s unpckh( rxmm128s a, rxmm128s b )
{
return _mm_unpackhi_ps( a, b );
}
/*!
r0 := a0
r1 := b0
r2 := a1
r3 := b1
*/
static inline rxmm128s unpckl( rxmm128s a, rxmm128s b )
{
return _mm_unpacklo_ps( a, b );
}
/*!
r := sign(a3)<<3 | sign(a2)<<2 | sign(a1)<<1 | sign(a0)
*/
static inline int movmsk( rxmm128s a, rxmm128s b )
{
return _mm_movemask_ps( a, b );
}
/*!
r0 := (i0 == 1) ? b0 : a0
r1 := (i1 == 1) ? b1 : a1
r2 := (i2 == 1) ? b2 : a2
r3 := (i3 == 1) ? b3 : a3
\sa movmsk
*/
static inline int shuffle( rxmm128s a, rxmm128s b, int i )
{
return _mm_shuffle_ps( a, b, i );
}
/*!
r3 := a3
r2 := a2
r1 := b3
r0 := b2
*/
static inline rxmm128s move_hl( rxmm128s a, rxmm128s b )
{
return mm_movehl_ps( a0 );
}
/*!
r3 := b1
r2 := b0
r1 := a1
r0 := a0
*/
static inline rxmm128s move_lh( rxmm128s a, rxmm128s b )
{
return _mm_movelh_ps( a0 );
}
/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
// Memory load
/*!
The address \arg p must be 16-byte aligned.
r0 := p[0]
r1 := p[1]
r2 := p[2]
r3 := p[3]
*/
static inline rxmm128s load( float * p )
{
return _mm_load_ps( p );
}
/*!
The address \arg p must be 16-byte aligned.
r0 := p[3]
r1 := p[2]
r2 := p[1]
r3 := p[0]
*/
static inline rxmm128s load_reverse( float * p )
{
return _mm_loadr_ps( p );
}
/*!
The address \arg p does not need to be 16-byte aligned.
r0 := p[0]
r1 := p[1]
r2 := p[2]
r3 := p[3]
*/
static inline rxmm128s load_unaligned( float * p )
{
return _mm_loadu_ps( p );
}
/*!
The address \arg p does not need to be 16-byte aligned.
r0 := *p
r1 := *p
r2 := *p
r3 := *p
*/
static inline rxmm128s load_both( float * p )
{
return _mm_load1_ps( p );
}
/*!
The address \arg p does not need to be 16-byte aligned.
r0 := *p
r1 := 0.0
r2 := 0.0
r3 := 0.0
*/
static inline rxmm128s load_s( float * p )
{
return _mm_load_ss( p );
}
/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
// Memory store
/*!
The address \arg p must be 16-byte aligned.
p[0] := a0
p[1] := a1
p[2] := a2
p[3] := a3
*/
static inline void store( float * p, rxmm128s a )
{
_mm_store_ps( p, a );
}
/*!
The address \arg p must be 16-byte aligned.
p[0] := a3
p[1] := a2
p[2] := a1
p[3] := a0
*/
static inline void store_reverse( float * p, rxmm128s a )
{
_mm_storer_ps( p, a );
}
/*!
The address \arg p does not need to be 16-byte aligned.
p[0] := a0
p[1] := a1
p[2] := a2
p[3] := a3
*/
static inline void store_unaligned(float * p, rxmm128s a )
{
_mm_storeu_ps( p, a );
}
/*!
The address \arg p does not need to be 16-byte aligned.
p[0] := a0
p[1] := a0
*/
static inline void store_both( float * p, rxmm128s a )
{
return _mm_store1_ps( p );
}
/*!
The address \arg p does not need to be 16-byte aligned.
*p := a0
*/
static inline void store_s( float * p, rxmm128s a )
{
return _mm_store_ss( p );
}
/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
// Memory set
/*!
r0 := a0
r1 := a1
r2 := a2
r3 := a3
*/
static inline rxmm128s set( float a3, float a2, float a1, float a0 )
{
return _mm_set_ps( a3, a2, a1, a0 );
}
/*!
r0 := 0.0
r1 := 0.0
r2 := 0.0
r3 := 0.0
*/
static inline rxmm128s set_zero()
{
return _mm_setzero_ps( a0 );
}
/*!
r0 := a0
r1 := a0
r2 := a0
r3 := a0
*/
static inline rxmm128s set_both( float a0 )
{
return _mm_set1_ps( a0 );
}
/*!
The address \arg p does not need to be 16-byte aligned.
r0 := a0
r1 := 0.0
r2 := 0.0
r3 := 0.0
*/
static inline rxmm128s set_s( float a0 )
{
return _mm_set_ss( a0 );
}
/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/
//
/// Packed double convertion
//
/*!
r0 := (float) a0
r1 := (float) a1
r2 := 0.0
r3 := 0.0
*/
static inline rxmm128s cvtpd2ps( rxmm128s a )
{
return _mm_cvtpd_ps( a );
}
/*!
r0 := (double) a0
r1 := (double) a1
*/
static inline rxmm128s cvtps2pd( rxmm128s a )
{
return _mm_cvtps_ps( a );
}
/*!
r0 := (int) a0
r1 := (int) a1
r2 := 0.0
r3 := 0.0
*/
static inline rxmm128l cvtpd2dq( rxmm128s a )
{
return _mm_cvtpd_epi32( a );
}
/*!
r0 := (double) a0
r1 := (double) a1
*/
static inline rxmm128s cvtdq2pd( rxmm128l a )
{
return _mm_cvtepi32_ps( a );
}
/*!
r := (int) a0
*/
static inline int cvtsd2si( rxmm128s a )
{
return _mm_cvtsd_si32( a );
}
/*!
r0 := (float) b0
r1 := a1
r2 := a2
r3 := a3
*/
static inline rxmm128s cvtsd2ss( rxmm128l a, rxmm128s b )
{
return _mm_cvtsd_ss( a, b );
}
/*!
r0 := (double) b
r1 := a1
*/
static inline rxmm128s cvtsi2sd( rxmm128s a, int b )
{
return _mm_cvtsi32_sd( a, b );
}
/*!
r0 := (double) b0
r1 := a1
*/
static inline rxmm128s cvtss2sd( rxmm128s a, rxmm128s b )
{
return _mm_cvtss_sd( a, b );
}
/*!
using truncate
r0 := (int) a0
r1 := (int) a1
r2 := 0x0
r3 := 0x0
*/
static inline rxmm128l cvttpd2dq( rxmm128s a )
{
return _mm_cvttpd_epi32( a );
}
/*!
using truncate
r := (int) a0
*/
static inline int cvttsd2si( rxmm128s a )
{
return _mm_cvttsd_si32( a );
}
/*!
r0 := (float) a0
r1 := (float) a1
r2 := (float) a2
r3 := (float) a3
*/
static inline rxmm128s cvtdq2ps( rxmm128l a )
{
return _mm_cvtepi32_ps( a );
}
/*!
r0 := (int) a0
r1 := (int) a1
r2 := (int) a2
r3 := (int) a3
*/
static inline rxmm128l cvtps2dq( rxmm128s a )
{
return _mm_cvtps_epi32( a );
}
/*!
uses trancate
r0 := (int) a0
r1 := (int) a1
r2 := (int) a2
r3 := (int) a3
*/
static inline rxmm128l cvttps2dq( rxmm128s a )
{
return _mm_cvttps_epi32( a );
}
//
// class ps
//
};
//
// Namespace sse2
//
}
#endif/*_SSE2_CMPL_ABSTRACTION_MSC_PCKFLOAT_H_*/