1#ifndef AMPGEN_AVXd_TYPES
2#define AMPGEN_AVXd_TYPES 1
15extern "C" void _ZGVdN4vvv_sincos(__m256d x, __m256i ptrs, __m256i ptrc);
19#define libmvec_alias( function_name) \
20 extern "C" __m256d _ZGVcN4v_##function_name(__m256d x); \
21 inline real_v function_name( const real_v& v ){ return _ZGVcN4v_##function_name (v) ; }
23#define libmvec_alias( F ) \
24 inline real_v F( const real_v& v ){ auto arr = v.to_ptr(); return real_v( std::F(arr[0]), std::F(arr[1]), std::F(arr[2]), std::F(arr[3])) ; }
32 static constexpr unsigned size = 4;
37 real_v(
const double& x0,
const double& x1,
const double& x2,
const double& x3 )
39 data = _mm256_set_pd(x3,x2,x1,x0);
41 explicit real_v(
const double* f ) :
data( _mm256_loadu_pd( f ) ) {}
42 real_v(
const std::array<double,4> f ) :
data( _mm256_loadu_pd( f.
data() ) ) {}
43 void store(
double* ptr )
const { _mm256_storeu_pd( ptr,
data ); }
44 const double*
to_ptr()
const {
return reinterpret_cast<const double*
>( &
data ) ; }
45 double*
to_ptr() {
return reinterpret_cast<double*
>( &
data ) ; }
46 std::array<double, 4>
to_array()
const { std::array<double, 4> b;
store( &b[0] );
return b; }
47 double at(
const unsigned i)
const {
return to_ptr()[i]; }
48 operator __m256d()
const {
return data ; }
56 auto xr = _mm256_round_pd(
data, _MM_FROUND_TO_NEG_INF);
57 return _mm256_sub_epi64(_mm256_castpd_si256(_mm256_add_pd(xr, _mm256_set1_pd(0x0018000000000000))),
58 _mm256_castpd_si256(_mm256_set1_pd(0x0018000000000000)));
79 inline real_v abs (
const real_v& v ) {
return _mm256_andnot_pd(_mm256_set1_pd(-0.), v); }
80 inline real_v real_v::operator+=(
const real_v& rhs ){ *
this = *
this + rhs;
return *
this; }
81 inline real_v real_v::operator-=(
const real_v& rhs ){ *
this = *
this - rhs;
return *
this; }
82 inline real_v real_v::operator*=(
const real_v& rhs ){ *
this = *
this * rhs;
return *
this; }
83 inline real_v real_v::operator/=(
const real_v& rhs ){ *
this = *
this / rhs;
return *
this; }
88 inline
real_v log( const
real_v& v ){
return real_v( std::log(v.at(0)), std::log(v.at(1)), std::log(v.at(2)), std::log(v.at(3))) ; }
92 __m256i sp = _mm256_add_epi64(_mm256_set1_epi64x((uint64_t)&s),_mm256_set_epi64x(24,16,8,0));
93 __m256i cp = _mm256_add_epi64(_mm256_set1_epi64x((uint64_t)&c),_mm256_set_epi64x(24,16,8,0));
94 _ZGVdN4vvv_sincos(v,sp,cp);
100 inline std::array<int64_t, real_v::size>
store(
const __m256i& v )
102 alignas(32) std::array<int64_t, real_v::size> rt;
103 _mm256_store_si256( (__m256i*)&rt[0], v);
109 std::pair<real_v, real_v> rt;
110 sincos( v, rt.first, rt.second );
125 const double* bx = x.
to_ptr();
126 const double* by = y.
to_ptr();
127 return real_v (std::atan2(by[0], bx[0]), std::atan2( by[1], bx[1]), std::atan2( by[2], bx[2]), std::atan2( by[3], bx[3]) );
131 return _mm256_i64gather_pd(base_addr, offsets.
to_int(),
sizeof(
double));
135 return _mm256_fmadd_pd(a, b, c);
141 return select( a > 0., r, -r );
146 for(
unsigned i = 0 ; i != 4; ++i ) os << data[i] <<
" ";
156#pragma omp declare reduction(+: real_v: \
157 omp_out = omp_out + omp_in)
158 #pragma omp declare reduction(+: complex_v: \
159 omp_out = omp_out + omp_in)
real_v operator<(const real_v &lhs, const real_v &rhs)
real_v operator+(const real_v &lhs, const real_v &rhs)
real_v log(const real_v &v)
real_v fmadd(const real_v &a, const real_v &b, const real_v &c)
real_v operator&(const real_v &lhs, const real_v &rhs)
real_v select(const real_v &mask, const real_v &a, const real_v &b)
real_v operator<=(const real_v &lhs, const real_v &rhs)
std::ostream & operator<<(std::ostream &os, const real_v &obj)
real_v abs(const real_v &v)
real_v operator>(const real_v &lhs, const real_v &rhs)
real_v operator^(const real_v &lhs, const real_v &rhs)
real_v operator!(const real_v &x)
real_v cos(const real_v &v)
real_v operator/(const real_v &lhs, const real_v &rhs)
real_v operator|(const real_v &lhs, const real_v &rhs)
real_v operator-(const real_v &lhs, const real_v &rhs)
real_v sqrt(const real_v &v)
real_v operator&&(const real_v &lhs, const real_v &rhs)
real_v exp(const real_v &v)
real_v operator>=(const real_v &lhs, const real_v &rhs)
real_v remainder(const real_v &a, const real_v &b)
std::array< int64_t, real_v::size > store(const __m256i &v)
void sincos(const real_v &v, real_v &s, real_v &c)
real_v sign(const real_v &v)
real_v fmod(const real_v &a, const real_v &b)
real_v operator==(const real_v &lhs, const real_v &rhs)
real_v atan2(const real_v &y, const real_v &x)
real_v conj(const real_v &v)
real_v operator||(const real_v &lhs, const real_v &rhs)
real_v gather(const double *base_addr, const real_v &offsets)
real_v operator*(const real_v &lhs, const real_v &rhs)
real_v tan(const real_v &v)
real_v sin(const real_v &v)
Complex< real_v > complex_v
void store(double *ptr) const
real_v operator-=(const real_v &rhs)
real_v(const std::array< double, 4 > f)
static constexpr unsigned size
real_v operator*=(const real_v &rhs)
real_v operator/=(const real_v &rhs)
real_v operator+=(const real_v &rhs)
real_v(const double &x0, const double &x1, const double &x2, const double &x3)
double at(const unsigned i) const
const double * to_ptr() const
std::array< double, 4 > to_array() const