1#ifndef AMPGEN_AVXd_TYPES
2#define AMPGEN_AVXd_TYPES 1
13 #define stl_fallback( x ) \
14 inline real_v x( const real_v& v ){ auto a = v.to_array(); return real_v( std::x(a[0]), std::x(a[1]), std::x(a[2]), std::x(a[3]), std::x(a[4]), std::x(a[5]), std::x(a[6]), std::x(a[7]) ) ; }
18 static constexpr unsigned size = 8;
24 const double& x0,
const double& x1,
const double& x2,
const double& x3,
25 const double& x4,
const double& x5,
const double& x6,
const double& x7)
27 double tmp[8] = {x0,x1,x2,x3,x4,x5,x6,x7};
28 data = _mm512_loadu_pd(tmp);
30 real_v(
const double* f ) :
data( _mm512_loadu_pd( f ) ) {}
31 void store(
double* ptr )
const { _mm512_storeu_pd( ptr,
data ); }
32 std::array<double, 8>
to_array()
const { std::array<double, 8> b;
store( &b[0] );
return b; }
33 double at(
const unsigned i)
const {
return to_array()[i] ; }
34 operator __m512d()
const {
return data ; }
52 inline __mmask8
operator<(
const real_v& lhs,
const real_v& rhs ) {
return _mm512_cmp_pd_mask( lhs, rhs, _CMP_LT_OS ); }
53 inline __mmask8
operator>(
const real_v& lhs,
const real_v& rhs ) {
return _mm512_cmp_pd_mask( lhs, rhs, _CMP_GT_OS ); }
54 inline __mmask8
operator==(
const real_v& lhs,
const real_v& rhs ){
return _mm512_cmp_pd_mask( lhs, rhs, _CMP_EQ_OS ); }
56 inline real_v abs (
const real_v& v ) {
return _mm512_andnot_pd(_mm512_set1_pd(-0.), v); }
67 std::atan2(by[0], bx[0]) , std::atan2( by[1], bx[1]), std::atan2( by[2], bx[2]), std::atan2( by[3], bx[3]) ,
68 std::atan2(by[4], bx[4]) , std::atan2( by[5], bx[5]), std::atan2( by[6], bx[6]), std::atan2( by[7], bx[7]) );
72 auto xr = _mm512_roundscale_pd(x, _MM_FROUND_TO_ZERO);
74 return _mm512_sub_epi64(_mm512_castpd_si512(_mm512_add_pd(xr, _mm512_set1_pd(0x0018000000000000))),
75 _mm512_castpd_si512(_mm512_set1_pd(0x0018000000000000)));
79 return _mm512_i64gather_pd(
double_to_int(offsets), base_addr,
sizeof(
double));
84 auto arg_as_int = _mm512_castpd_si512(value);
85 static const real_v offset(4503599627370496.0 + 1022.0);
86 static const __m512i pow2_52_i = _mm512_set1_epi64(0x4330000000000000);
87 auto b = _mm512_srl_epi64(arg_as_int, _mm_cvtsi32_si128(52));
88 auto c = _mm512_or_si512( b , pow2_52_i);
89 exponent =
real_v( _mm512_castsi512_pd(c) ) - offset;
90 mant = _mm512_castsi512_pd(_mm512_or_si512(_mm512_and_si512 (arg_as_int, _mm512_set1_epi64(0x000FFFFFFFFFFFFFll) ), _mm512_set1_epi64(0x3FE0000000000000ll)));
95 return _mm512_fmadd_pd(a, b, c);
99 static const real_v corr = 0.693147180559945286226764;
100 static const real_v CL15 = 0.148197055177935105296783;
101 static const real_v CL13 = 0.153108178020442575739679;
102 static const real_v CL11 = 0.181837339521549679055568;
103 static const real_v CL9 = 0.22222194152736701733275;
104 static const real_v CL7 = 0.285714288030134544449368;
105 static const real_v CL5 = 0.399999999989941956712869;
106 static const real_v CL3 = 0.666666666666685503450651;
107 static const real_v CL1 = 2.0;
109 frexp(arg, mant, exponent);
110 auto x = (mant - 1.) / (mant + 1.);
112 auto p =
fmadd(CL15, x2, CL13);
113 p =
fmadd(p, x2, CL11);
114 p =
fmadd(p, x2, CL9);
115 p =
fmadd(p, x2, CL7);
116 p =
fmadd(p, x2, CL5);
117 p =
fmadd(p, x2, CL3);
118 p =
fmadd(p, x2, CL1);
119 p =
fmadd(p, x, corr * exponent);
130 return select( a > 0., r, -r );
135 for(
unsigned i = 0 ; i != 8; ++i ) os << buffer[i] <<
" ";
155 auto [s,c] = sincos( v.imag());
168 inline std::ostream&
operator<<( std::ostream& os,
const complex_v& obj ) {
return os <<
"( "<< obj.real() <<
") (" << obj.imag() <<
")"; }
169 #pragma omp declare reduction(+: real_v: \
170 omp_out = omp_out + omp_in)
171 #pragma omp declare reduction(+: complex_v: \
172 omp_out = omp_out + omp_in)
real_v abs(const real_v &v)
real_v operator||(const real_v &lhs, const real_v &rhs)
real_v sqrt(const real_v &v)
real_v atan2(const real_v &y, const real_v &x)
real_v gather(const double *base_addr, const real_v &offsets)
real_v fmadd(const real_v &a, const real_v &b, const real_v &c)
real_v tan(const real_v &v)
real_v operator-(const real_v &lhs, const real_v &rhs)
real_v cos(const real_v &v)
__mmask8 operator>(const real_v &lhs, const real_v &rhs)
real_v operator|(const real_v &lhs, const real_v &rhs)
real_v fmod(const real_v &a, const real_v &b)
real_v log(const real_v &arg)
real_v operator-=(real_v &lhs, const real_v &rhs)
__m512i double_to_int(const real_v &x)
real_v select(const __mmask8 &mask, const real_v &a, const real_v &b)
real_v operator!(const real_v &x)
void frexp(const real_v &value, real_v &mant, real_v &exponent)
std::complex< real_v > complex_v
__mmask8 operator<(const real_v &lhs, const real_v &rhs)
real_v operator^(const real_v &lhs, const real_v &rhs)
real_v operator/(const real_v &lhs, const real_v &rhs)
real_v sign(const real_v &v)
std::ostream & operator<<(std::ostream &os, const real_v &obj)
real_v sin(const real_v &v)
real_v operator&&(const real_v &lhs, const real_v &rhs)
real_v operator&(const real_v &lhs, const real_v &rhs)
real_v remainder(const real_v &a, const real_v &b)
__mmask8 operator==(const real_v &lhs, const real_v &rhs)
real_v operator*=(real_v &lhs, const real_v &rhs)
real_v operator/=(real_v &lhs, const real_v &rhs)
real_v operator+=(real_v &lhs, const real_v &rhs)
real_v operator+(const real_v &lhs, const real_v &rhs)
real_v norm(const complex_v &v)
real_v exp(const real_v &v)
real_v operator*(const real_v &lhs, const real_v &rhs)
std::array< double, 8 > to_array() const
void store(double *ptr) const
double at(const unsigned i) const
real_v(const double &x0, const double &x1, const double &x2, const double &x3, const double &x4, const double &x5, const double &x6, const double &x7)
static constexpr unsigned size