/* * SYNOPSYS CONFIDENTIAL - This is an unpublished, proprietary work of Synopsys, * Inc., and is fully protected under copyright and trade secret laws. You may * not view, use, disclose, copy, or distribute this file or any information * contained herein except pursuant to a valid written license from Synopsys. */ // // The purpose of this file is to define SSE2 data types to abstacr from the compiler // specific constructs. Currently the target compilers are GCC and the MS VC 2005. // #ifndef _SSE2_CMPL_ABSTRACTION_MSC_PCKINT8_H_ #define _SSE2_CMPL_ABSTRACTION_MSC_PCKINT8_H_ // // Namespace sse2 // namespace sse2 { // /// class epi64 (packed single precision) // class epi64 { public: // /// The type. // typedef rxmm128l my_rxmm; /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ // /// Packed integer arithmetic // /*! r0 := a0 + b0 r1 := a1 + b1 */ static inline rxmm128d add( rxmm128d a, rxmm128d b ) { BOOST_STATIC_ASSERT( false ); return _mm_add_pd( a, b ); } /*! r0 := a0 - b0 r1 := a1 - b1 */ static inline rxmm128d sub( rxmm128d a, rxmm128d b ) { BOOST_STATIC_ASSERT( false ); return _mm_sub_pd( a, b ); } /*! r0 := a0 * b0 r1 := a1 * b1 */ static inline rxmm128d mul( rxmm128d a, rxmm128d b ) { BOOST_STATIC_ASSERT( false ); return _mm_mul_pd( a, b ); } /*! r0 := a0 / b0 r1 := a1 / b1 */ static inline rxmm128d div( rxmm128d a, rxmm128d b ) { BOOST_STATIC_ASSERT( false ); return _mm_div_pd( a, b ); } /*! r0 := max( a0, b0 ) r1 := max( a1, b1 ) */ static inline rxmm128d max( rxmm128d a, rxmm128d b ) { BOOST_STATIC_ASSERT( false ); return _mm_max_pd( a, b ); } /*! r0 := min( a0, b0 ) r1 := min( a1, b1 ) */ static inline rxmm128d min( rxmm128d a, rxmm128d b ) { BOOST_STATIC_ASSERT( false ); return _mm_min_pd( a, b ); } /*! r0 := sqrt( a0 ) r1 := sqrt( a1 ) */ static inline rxmm128d sqrt( rxmm128d a ) { BOOST_STATIC_ASSERT( false ); return _mm_sqrt_pd( a, b ); } /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ // /// Packed integer logic // /*! r0 := (~a0) & b0 r1 := (~a1) & b1 */ static inline rxmm128d andnot( rxmm128d a, rxmm128d b ) { BOOST_STATIC_ASSERT( false ); return _mm_andnot_pd( a, b ); } /*! r0 := a0 & b0 r1 := a1 & b1 */ static inline XMM_TYPE and( rxmm128d a, rxmm128d b ) { BOOST_STATIC_ASSERT( false ); return _mm_and_pd( a, b ); } /*! r0 := a0 | b0 r1 := a1 | b1 */ static inline XMM_TYPE or( rxmm128d a, rxmm128d b ) { BOOST_STATIC_ASSERT( false ); return _mm_or_pd( a, b ); } /*! r0 := a0 ^ b0 r1 := a1 ^ b1 */ static inline XMM_TYPE xor( rxmm128d a, rxmm128d b ) { BOOST_STATIC_ASSERT( false ); return _mm_xor_pd( a, b ); } /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ // /// Packed integer comparision // /*! r0 := (a0 == b0) ? 0xffffffffffffffff : 0x0 r1 := (a1 == b1) ? 0xffffffffffffffff : 0x0 */ static inline rxmm128d cmp_eq( rxmm128d a, rxmm128d b ) { BOOST_STATIC_ASSERT( false ); return _mm_cmpeq_pd( a, b ); } /*! r0 := (a0 != b0) ? 0xffffffffffffffff : 0x0 r1 := (a1 != b1) ? 0xffffffffffffffff : 0x0 */ static inline rxmm128d cmp_neq( rxmm128d a, rxmm128d b ) { return _mm_cmpneq_pd( a, b ); } /*! r0 := (a0 < b0) ? 0xffffffffffffffff : 0x0 r1 := (a1 < b1) ? 0xffffffffffffffff : 0x0 */ static inline rxmm128d cmp_lt( rxmm128d a, rxmm128d b ) { return _mm_cmplt_pd( a, b ); } /*! r0 := (a0 <= b0) ? 0xffffffffffffffff : 0x0 r1 := (a1 <= b1) ? 0xffffffffffffffff : 0x0 */ static inline rxmm128d cmp_le( rxmm128d a, rxmm128d b ) { BOOST_STATIC_ASSERT( false ); return _mm_cmple_pd( a, b ); } /*! r0 := (a0 > b0) ? 0xffffffffffffffff : 0x0 r1 := (a1 > b1) ? 0xffffffffffffffff : 0x0 */ static inline rxmm128d cmp_gt( rxmm128d a, rxmm128d b ) { BOOST_STATIC_ASSERT( false ); return _mm_cmpgt_pd( a, b ); } /*! r0 := (a0 >= b0) ? 0xffffffffffffffff : 0x0 r1 := (a1 >= b1) ? 0xffffffffffffffff : 0x0 */ static inline rxmm128d cmp_ge( rxmm128d a, rxmm128d b ) { return _mm_cmpge_pd( a, b ); } /*! r0 := (a0 ord b0) ? 0xffffffffffffffff : 0x0 r1 := (a1 ord b1) ? 0xffffffffffffffff : 0x0 */ static inline rxmm128d cmp_ord( rxmm128d a, rxmm128d b ) { return _mm_cmpord_pd( a, b ); } /*! r0 := (a0 unord b0) ? 0xffffffffffffffff : 0x0 r1 := (a1 unord b1) ? 0xffffffffffffffff : 0x0 */ static inline rxmm128d cmp_unord( rxmm128d a, rxmm128d b ) { return _mm_cmpunord_pd( a, b ); } /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ // /// Packed integer load // /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ // Misc /*! r0 := a1 r1 := b1 */ static inline rxmm128d unpckh( rxmm128d a, rxmm128d b ) { return _mm_unpackhi_pd( a, b ); } /*! r0 := a0 r1 := b0 */ static inline rxmm128d unpckl( rxmm128d a, rxmm128d b ) { return _mm_unpacklo_pd( a, b ); } /*! r := sign(a1) << 1 | sign(a0) */ static inline int movmsk( rxmm128d a, rxmm128d b ) { return _mm_movemask_pd( a, b ); } /*! r0 := (i0 == 1) ? b0 : a0 r1 := (i1 == 1) ? b1 : a1 \sa movmsk */ static inline int shuffle( rxmm128d a, rxmm128d b, int i ) { return _mm_shuffle_pd( a, b, i ); } /*! == shuffle( a, b, 1 ) r0 := b0 r1 := a1 */ static inline rxmm128d move_sd( rxmm128d a, rxmm128d b ) { return _mm_move_sd( a0 ); } /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ // Memory load /*! The address \arg p must be 16-byte aligned. r0 := p[0] r1 := p[1] */ static inline rxmm128d load( double * p ) { return _mm_load_pd( p ); } /*! The address \arg p must be 16-byte aligned. r0 := p[1] r1 := p[0] */ static inline rxmm128d load_reverse( double * p ) { return _mm_loadr_pd( p ); } /*! The address \arg p does not need to be 16-byte aligned. r0 := p[0] r1 := p[1] */ static inline rxmm128d load_unaligned( double * p ) { return _mm_loadu_pd( p ); } /*! The address \arg p does not need to be 16-byte aligned. r0 := a0 r1 := *p */ static inline rxmm128d load_hi( rxmm128d a, double * p ) { return _mm_loadh_pd( a, p ); } /*! The address \arg p does not need to be 16-byte aligned. r0 := *p r1 := a1 */ static inline rxmm128d load_lo( rxmm128d a, double * p ) { return _mm_loadl_pd( a, p ); } /*! The address \arg p does not need to be 16-byte aligned. r0 := *p r1 := *p */ static inline rxmm128d load_both( double * p ) { return _mm_load1_pd( p ); } /*! The address \arg p does not need to be 16-byte aligned. r0 := *p r1 := 0.0 */ static inline rxmm128d load_sd( double * p ) { return _mm_load_sd( p ); } /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ // Memory store /*! The address \arg p must be 16-byte aligned. p[0] := a0 p[1] := a1 */ static inline void store( double * p, rxmm128d a ) { _mm_load_pd( p, a ); } /*! The address \arg p must be 16-byte aligned. p[0] := a1 p[1] := a0 */ static inline void store_reverse( double * p, rxmm128d a ) { _mm_storer_pd( p, a ); } /*! The address \arg p does not need to be 16-byte aligned. p[0] := a0 p[1] := a1 */ static inline void store_unaligned(double * p, rxmm128d a ) { _mm_storeu_pd( p, a ); } /*! The address \arg p does not need to be 16-byte aligned. *p := a1 */ static inline void store_hi( double * p, rxmm128d a ) { _mm_storeh_pd( p, a ); } /*! The address \arg p does not need to be 16-byte aligned. *p := a0 */ static inline void store_lo( double * p, rxmm128d a ) { _mm_storel_pd( p, a ); } /*! The address \arg p does not need to be 16-byte aligned. p[0] := a0 p[1] := a0 */ static inline void store_both( double * p, rxmm128d a ) { return _mm_store1_pd( p ); } /*! The address \arg p does not need to be 16-byte aligned. *p := a0 */ static inline void store_sd( double * p, rxmm128d a ) { return _mm_store_sd( p ); } /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ // Memory set /*! r0 := a0 r1 := a1 */ static inline rxmm128d set( double a1, double a0 ) { return _mm_set_pd( a1, a0 ); } /*! r0 := 0.0 r1 := 0.0 */ static inline rxmm128d set_zero() { return _mm_setzero_pd( a0 ); } /*! r0 := a0 r1 := a0 */ static inline rxmm128d set_both( double a0 ) { return _mm_set1_pd( a0 ); } /*! The address \arg p does not need to be 16-byte aligned. r0 := a0 r1 := 0.0 */ static inline rxmm128d set_sd( double a0 ) { return _mm_set_sd( a0 ); } /*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ // /// Packed integer convertion // /*! r0 := (float) a0 r1 := (float) a1 r2 := 0.0 r3 := 0.0 */ static inline rxmm128s cvtpd2ps( rxmm128d a ) { return _mm_cvtpd_ps( a ); } /*! r0 := (double) a0 r1 := (double) a1 */ static inline rxmm128d cvtps2pd( rxmm128s a ) { return _mm_cvtps_pd( a ); } /*! r0 := (int) a0 r1 := (int) a1 r2 := 0.0 r3 := 0.0 */ static inline rxmm128l cvtpd2dq( rxmm128d a ) { return _mm_cvtpd_epi32( a ); } /*! r0 := (double) a0 r1 := (double) a1 */ static inline rxmm128d cvtdq2pd( rxmm128l a ) { return _mm_cvtepi32_pd( a ); } /*! r := (int) a0 */ static inline int cvtsd2si( rxmm128d a ) { return _mm_cvtsd_si32( a ); } /*! r0 := (float) b0 r1 := a1 r2 := a2 r3 := a3 */ static inline rxmm128s cvtsd2ss( rxmm128l a, rxmm128d b ) { return _mm_cvtsd_ss( a, b ); } /*! r0 := (double) b r1 := a1 */ static inline rxmm128d cvtsi2sd( rxmm128d a, int b ) { return _mm_cvtsi32_sd( a, b ); } /*! r0 := (double) b0 r1 := a1 */ static inline rxmm128d cvtss2sd( rxmm128d a, rxmm128s b ) { return _mm_cvtss_sd( a, b ); } /*! using truncate r0 := (int) a0 r1 := (int) a1 r2 := 0x0 r3 := 0x0 */ static inline rxmm128l cvttpd2dq( rxmm128d a ) { return _mm_cvttpd_epi32( a ); } /*! using truncate r := (int) a0 */ static inline int cvttsd2si( rxmm128d a ) { return _mm_cvttsd_si32( a ); } /*! r0 := (float) a0 r1 := (float) a1 r2 := (float) a2 r3 := (float) a3 */ static inline rxmm128s cvtdq2ps( rxmm128l a ) { return _mm_cvtepi32_ps( a ); } /*! r0 := (int) a0 r1 := (int) a1 r2 := (int) a2 r3 := (int) a3 */ static inline rxmm128l cvtps2dq( rxmm128s a ) { return _mm_cvtps_epi32( a ); } /*! uses trancate r0 := (int) a0 r1 := (int) a1 r2 := (int) a2 r3 := (int) a3 */ static inline rxmm128l cvttps2dq( rxmm128s a ) { return _mm_cvttps_epi32( a ); } // // class epi64 // }; // // Namespace sse2 // } #endif/*_SSE2_CMPL_ABSTRACTION_MSC_PCKINT8_H_*/