AmpGen 2.1
Loading...
Searching...
No Matches
avx2d_types.h
Go to the documentation of this file.
1#ifndef AMPGEN_AVXd_TYPES
2#define AMPGEN_AVXd_TYPES 1
3
4#include <immintrin.h>
5#include <array>
6#include <iostream>
7#include "AmpGen/Complex.h"
8// #include <complex>
9#ifdef _OPENMP
10#include <omp.h>
11#endif
12#include <cmath>
13
14#if USE_MVEC
15extern "C" void _ZGVdN4vvv_sincos(__m256d x, __m256i ptrs, __m256i ptrc);
16#endif
17
18#if USE_MVEC
19#define libmvec_alias( function_name) \
20 extern "C" __m256d _ZGVcN4v_##function_name(__m256d x); \
21 inline real_v function_name( const real_v& v ){ return _ZGVcN4v_##function_name (v) ; }
22#else
23#define libmvec_alias( F ) \
24 inline real_v F( const real_v& v ){ auto arr = v.to_ptr(); return real_v( std::F(arr[0]), std::F(arr[1]), std::F(arr[2]), std::F(arr[3])) ; }
25#endif
26
27namespace AmpGen {
28 namespace AVX2d {
29
30 struct real_v {
31 __m256d data;
32 static constexpr unsigned size = 4;
33 typedef double scalar_type;
34 real_v() = default;
35 real_v(__m256d data ) : data(data) {}
36 real_v(const double& f ) : data( _mm256_set1_pd( f )) {}
37 real_v(const double& x0, const double& x1, const double& x2, const double& x3 )
38 {
39 data = _mm256_set_pd(x3,x2,x1,x0);
40 }
41 explicit real_v(const double* f ) : data( _mm256_loadu_pd( f ) ) {}
42 real_v(const std::array<double,4> f ) : data( _mm256_loadu_pd( f.data() ) ) {}
43 void store( double* ptr ) const { _mm256_storeu_pd( ptr, data ); }
44 const double* to_ptr() const { return reinterpret_cast<const double*>( &data ) ; }
45 double* to_ptr() { return reinterpret_cast<double*>( &data ) ; }
46 std::array<double, 4> to_array() const { std::array<double, 4> b; store( &b[0] ); return b; }
47 double at(const unsigned i) const { return to_ptr()[i]; }
48 operator __m256d() const { return data ; }
49 inline real_v operator+=(const real_v& rhs );
50 inline real_v operator-=(const real_v& rhs );
51 inline real_v operator*=(const real_v& rhs );
52 inline real_v operator/=(const real_v& rhs );
53 inline __m256i to_int() const
54 {
55 // based on: https://stackoverflow.com/questions/41144668/how-to-efficiently-perform-double-int64-conversions-with-sse-avx
56 auto xr = _mm256_round_pd(data, _MM_FROUND_TO_NEG_INF);
57 return _mm256_sub_epi64(_mm256_castpd_si256(_mm256_add_pd(xr, _mm256_set1_pd(0x0018000000000000))),
58 _mm256_castpd_si256(_mm256_set1_pd(0x0018000000000000)));
59 }
60 };
61
62 inline real_v operator+( const real_v& lhs, const real_v& rhs ) { return _mm256_add_pd(lhs, rhs); }
63 inline real_v operator-( const real_v& lhs, const real_v& rhs ) { return _mm256_sub_pd(lhs, rhs); }
64 inline real_v operator*( const real_v& lhs, const real_v& rhs ) { return _mm256_mul_pd(lhs, rhs); }
65 inline real_v operator/( const real_v& lhs, const real_v& rhs ) { return _mm256_div_pd(lhs, rhs); }
66 inline real_v operator-( const real_v& x ) { return -1.f * x; }
67 inline real_v operator&( const real_v& lhs, const real_v& rhs ) { return _mm256_and_pd( lhs, rhs ); }
68 inline real_v operator|( const real_v& lhs, const real_v& rhs ) { return _mm256_or_pd( lhs, rhs ); }
69 inline real_v operator^( const real_v& lhs, const real_v& rhs ) { return _mm256_xor_pd( lhs, rhs ); }
70 inline real_v operator&&( const real_v& lhs, const real_v& rhs ) { return _mm256_and_pd( lhs, rhs ); }
71 inline real_v operator||( const real_v& lhs, const real_v& rhs ) { return _mm256_or_pd( lhs, rhs ); }
72 inline real_v operator!( const real_v& x ) { return x ^ _mm256_castsi256_pd( _mm256_set1_epi32( -1 ) ); }
73 inline real_v operator<( const real_v& lhs, const real_v& rhs ) { return _mm256_cmp_pd( lhs, rhs, _CMP_LT_OS ); }
74 inline real_v operator>( const real_v& lhs, const real_v& rhs ) { return _mm256_cmp_pd( lhs, rhs, _CMP_GT_OS ); }
75 inline real_v operator<=( const real_v& lhs, const real_v& rhs ) { return _mm256_cmp_pd( lhs, rhs, _CMP_LE_OS ); }
76 inline real_v operator>=( const real_v& lhs, const real_v& rhs ) { return _mm256_cmp_pd( lhs, rhs, _CMP_GE_OS ); }
77 inline real_v operator==( const real_v& lhs, const real_v& rhs ){ return _mm256_cmp_pd( lhs, rhs, _CMP_EQ_OS ); }
78 inline real_v sqrt( const real_v& v ) { return _mm256_sqrt_pd(v); }
79 inline real_v abs ( const real_v& v ) { return _mm256_andnot_pd(_mm256_set1_pd(-0.), v); }
80 inline real_v real_v::operator+=(const real_v& rhs ){ *this = *this + rhs; return *this; }
81 inline real_v real_v::operator-=(const real_v& rhs ){ *this = *this - rhs; return *this; }
82 inline real_v real_v::operator*=(const real_v& rhs ){ *this = *this * rhs; return *this; }
83 inline real_v real_v::operator/=(const real_v& rhs ){ *this = *this / rhs; return *this; }
87 // libmvec_alias( log )
88 inline real_v log( const real_v& v ){ return real_v( std::log(v.at(0)), std::log(v.at(1)), std::log(v.at(2)), std::log(v.at(3))) ; }
89 inline void sincos( const real_v& v, real_v& s, real_v& c )
90 {
91#if USE_MVEC
92 __m256i sp = _mm256_add_epi64(_mm256_set1_epi64x((uint64_t)&s),_mm256_set_epi64x(24,16,8,0));
93 __m256i cp = _mm256_add_epi64(_mm256_set1_epi64x((uint64_t)&c),_mm256_set_epi64x(24,16,8,0));
94 _ZGVdN4vvv_sincos(v,sp,cp);
95#else
96 s = sin(v);
97 c = cos(v);
98#endif
99 }
100 inline std::array<int64_t, real_v::size> store( const __m256i& v )
101 {
102 alignas(32) std::array<int64_t, real_v::size> rt;
103 _mm256_store_si256( (__m256i*)&rt[0], v);
104 return rt;
105 }
106
107 inline std::pair<real_v, real_v> sincos( const real_v& v )
108 {
109 std::pair<real_v, real_v> rt;
110 sincos( v, rt.first, rt.second );
111 return rt;
112 }
113 inline real_v tan( const real_v& v )
114 {
115 auto [s,c] = sincos( v );
116 return s / c ;
117 }
118
119 inline real_v select(const real_v& mask, const real_v& a, const real_v& b ) { return _mm256_blendv_pd( b, a, mask ); }
120 inline real_v select(const bool& mask , const real_v& a, const real_v& b ) { return mask ? a : b; }
121 inline real_v sign ( const real_v& v){ return select( v > 0., +1., -1. ); }
122 inline real_v conj( const real_v& v ) { return v; }
123
124 inline real_v atan2( const real_v& y, const real_v& x ){
125 const double* bx = x.to_ptr();
126 const double* by = y.to_ptr();
127 return real_v (std::atan2(by[0], bx[0]), std::atan2( by[1], bx[1]), std::atan2( by[2], bx[2]), std::atan2( by[3], bx[3]) );
128 }
129 inline real_v gather( const double* base_addr, const real_v& offsets)
130 {
131 return _mm256_i64gather_pd(base_addr, offsets.to_int(),sizeof(double));
132 }
133 inline real_v fmadd( const real_v& a, const real_v& b, const real_v& c )
134 {
135 return _mm256_fmadd_pd(a, b, c);
136 }
137 inline real_v remainder( const real_v& a, const real_v& b ){ return a - real_v(_mm256_round_pd(a/b, _MM_FROUND_TO_NEG_INF)) * b; }
138 inline real_v fmod( const real_v& a, const real_v& b )
139 {
140 auto r = remainder( abs(a), abs(b) );
141 return select( a > 0., r, -r );
142 }
143
144 inline std::ostream& operator<<( std::ostream& os, const real_v& obj ) {
145 auto data = obj.to_ptr();
146 for( unsigned i = 0 ; i != 4; ++i ) os << data[i] << " ";
147 return os;
148 }
149
151 inline complex_v select(const real_v& mask, const complex_v& a, const complex_v& b ) { return complex_v( select(mask, a.real(), b.real()), select(mask, a.imag(), b.imag() ) ) ; }
152 inline complex_v select(const real_v& mask, const real_v& a, const complex_v& b ) { return complex_v( select(mask, a , b.real()), select(mask, real_v(0.), b.imag()) ); }
153 inline complex_v select(const real_v& mask, const complex_v& a, const real_v& b ) { return complex_v( select(mask, a.real(), b ) , select(mask, a.imag(), real_v(0.)) ); }
154 inline complex_v select(const bool& mask , const complex_v& a, const complex_v& b ) { return mask ? a : b; }
155
156#pragma omp declare reduction(+: real_v: \
157 omp_out = omp_out + omp_in)
158 #pragma omp declare reduction(+: complex_v: \
159 omp_out = omp_out + omp_in)
160
161 }
162}
163
164#endif
#define libmvec_alias(F)
real_v operator<(const real_v &lhs, const real_v &rhs)
Definition avx2d_types.h:73
real_v operator+(const real_v &lhs, const real_v &rhs)
Definition avx2d_types.h:62
real_v log(const real_v &v)
Definition avx2d_types.h:88
real_v fmadd(const real_v &a, const real_v &b, const real_v &c)
real_v operator&(const real_v &lhs, const real_v &rhs)
Definition avx2d_types.h:67
real_v select(const real_v &mask, const real_v &a, const real_v &b)
real_v operator<=(const real_v &lhs, const real_v &rhs)
Definition avx2d_types.h:75
std::ostream & operator<<(std::ostream &os, const real_v &obj)
real_v abs(const real_v &v)
Definition avx2d_types.h:79
real_v operator>(const real_v &lhs, const real_v &rhs)
Definition avx2d_types.h:74
real_v operator^(const real_v &lhs, const real_v &rhs)
Definition avx2d_types.h:69
real_v operator!(const real_v &x)
Definition avx2d_types.h:72
real_v cos(const real_v &v)
Definition avx2d_types.h:85
real_v operator/(const real_v &lhs, const real_v &rhs)
Definition avx2d_types.h:65
real_v operator|(const real_v &lhs, const real_v &rhs)
Definition avx2d_types.h:68
real_v operator-(const real_v &lhs, const real_v &rhs)
Definition avx2d_types.h:63
real_v sqrt(const real_v &v)
Definition avx2d_types.h:78
real_v operator&&(const real_v &lhs, const real_v &rhs)
Definition avx2d_types.h:70
real_v exp(const real_v &v)
Definition avx2d_types.h:86
real_v operator>=(const real_v &lhs, const real_v &rhs)
Definition avx2d_types.h:76
real_v remainder(const real_v &a, const real_v &b)
std::array< int64_t, real_v::size > store(const __m256i &v)
void sincos(const real_v &v, real_v &s, real_v &c)
Definition avx2d_types.h:89
real_v sign(const real_v &v)
real_v fmod(const real_v &a, const real_v &b)
real_v operator==(const real_v &lhs, const real_v &rhs)
Definition avx2d_types.h:77
real_v atan2(const real_v &y, const real_v &x)
real_v conj(const real_v &v)
real_v operator||(const real_v &lhs, const real_v &rhs)
Definition avx2d_types.h:71
real_v gather(const double *base_addr, const real_v &offsets)
real_v operator*(const real_v &lhs, const real_v &rhs)
Definition avx2d_types.h:64
real_v tan(const real_v &v)
real_v sin(const real_v &v)
Definition avx2d_types.h:84
Complex< real_v > complex_v
AVX::real_v real_v
Definition utils.h:46
void store(double *ptr) const
Definition avx2d_types.h:43
real_v operator-=(const real_v &rhs)
Definition avx2d_types.h:81
real_v(const std::array< double, 4 > f)
Definition avx2d_types.h:42
static constexpr unsigned size
Definition avx2d_types.h:32
real_v(__m256d data)
Definition avx2d_types.h:35
real_v operator*=(const real_v &rhs)
Definition avx2d_types.h:82
real_v(const double *f)
Definition avx2d_types.h:41
real_v operator/=(const real_v &rhs)
Definition avx2d_types.h:83
real_v operator+=(const real_v &rhs)
Definition avx2d_types.h:80
real_v(const double &x0, const double &x1, const double &x2, const double &x3)
Definition avx2d_types.h:37
double at(const unsigned i) const
Definition avx2d_types.h:47
const double * to_ptr() const
Definition avx2d_types.h:44
std::array< double, 4 > to_array() const
Definition avx2d_types.h:46
real_v(const double &f)
Definition avx2d_types.h:36
__m256i to_int() const
Definition avx2d_types.h:53
real_t real() const
Definition Complex.h:24
real_t imag() const
Definition Complex.h:25