AmpGen 2.1
Loading...
Searching...
No Matches
avx2f_types.h
Go to the documentation of this file.
1#ifndef AMPGEN_AVX_TYPES
2#define AMPGEN_AVX_TYPES 1
3
4#include <immintrin.h>
5#include <array>
6#include <iostream>
7#include <complex>
8#include <omp.h>
9#include <cmath>
10#include "AmpGen/Complex.h"
11
12#if USE_MVEC
13 extern "C" void _ZGVdN8vvv_sincos(__m256 x, __m256i ptrs, __m256i ptrc);
14#define libmvec_alias(F, O) \
15 extern "C" __m256 _ZGVcN8v_##F(__m256 x); \
16 inline real_v O( const real_v& v ){ return _ZGVcN8v_##F(v) ; }
17#else
18#define libmvec_alias(F, O) \
19 inline real_v O( const real_v& v ){ auto arr = v.to_ptr(); return real_v( \
20 std::F(arr[0]), std::F(arr[1]), std::F(arr[2]), std::F(arr[3]), \
21 std::F(arr[4]), std::F(arr[5]), std::F(arr[6]), std::F(arr[7]) ) ; }
22#endif
23
24namespace AmpGen {
25 namespace AVX2f {
26 struct real_v {
27 __m256 data;
28 static constexpr unsigned size = 8 ;
29 typedef float scalar_type;
30 real_v() = default;
31 real_v(__m256 data ) : data(data) {}
32 real_v(const int& f ) : real_v(scalar_type(f)) {}
33 real_v(const scalar_type& f ) : data( _mm256_set1_ps(f) ) {}
34 real_v(const double& f ) : data( _mm256_set1_ps( scalar_type(f) )) {}
35 explicit real_v(const scalar_type* f ) : data( _mm256_loadu_ps( f ) ) {}
36 real_v(const scalar_type& x0, const scalar_type& x1, const scalar_type& x2, const scalar_type& x3,
37 const scalar_type& x4, const scalar_type& x5, const scalar_type& x6, const scalar_type& x7)
38 {
39 data = _mm256_set_ps(x7,x6,x5,x4,x3,x2,x1,x0);
40 }
41
42 void store( scalar_type* ptr ) const { _mm256_storeu_ps( ptr, data ); }
43 std::array<scalar_type, 8> to_array() const { std::array<scalar_type, 8> b; store( &b[0] ); return b; }
44 const scalar_type* to_ptr() const { return reinterpret_cast<const scalar_type*>( &data ) ; }
45 scalar_type* to_ptr() { return reinterpret_cast<scalar_type*>( &data ) ; }
46 scalar_type at(const unsigned i) const { return to_ptr()[i] ; }
47 operator __m256() const { return data ; }
48 inline real_v operator+=(const real_v& rhs );
49 inline real_v operator-=(const real_v& rhs );
50 inline real_v operator*=(const real_v& rhs );
51 inline real_v operator/=(const real_v& rhs );
52 inline __m256i to_int() const { return _mm256_cvtps_epi32(data); }
53 };
54
55 inline real_v operator+( const real_v& lhs, const real_v& rhs ) { return _mm256_add_ps(lhs, rhs); }
56 inline real_v operator-( const real_v& lhs, const real_v& rhs ) { return _mm256_sub_ps(lhs, rhs); }
57 inline real_v operator*( const real_v& lhs, const real_v& rhs ) { return _mm256_mul_ps(lhs, rhs); }
58 inline real_v operator/( const real_v& lhs, const real_v& rhs ) { return _mm256_div_ps(lhs, rhs); }
59 inline real_v operator-( const real_v& x ) { return -1.f * x; }
60 inline real_v operator&( const real_v& lhs, const real_v& rhs ) { return _mm256_and_ps( lhs, rhs ); }
61 inline real_v operator|( const real_v& lhs, const real_v& rhs ) { return _mm256_or_ps( lhs, rhs ); }
62 inline real_v operator^( const real_v& lhs, const real_v& rhs ) { return _mm256_xor_ps( lhs, rhs ); }
63 inline real_v operator&&( const real_v& lhs, const real_v& rhs ) { return _mm256_and_ps( lhs, rhs ); }
64 inline real_v operator||( const real_v& lhs, const real_v& rhs ) { return _mm256_or_ps( lhs, rhs ); }
65 inline real_v operator!( const real_v& x ) { return x ^ _mm256_castsi256_ps( _mm256_set1_epi32( -1 ) ); }
66 inline real_v operator<( const real_v& lhs, const real_v& rhs ) { return _mm256_cmp_ps( lhs, rhs, _CMP_LT_OS ); }
67 inline real_v operator>( const real_v& lhs, const real_v& rhs ) { return _mm256_cmp_ps( lhs, rhs, _CMP_GT_OS ); }
68 inline real_v operator<=( const real_v& lhs, const real_v& rhs ) { return _mm256_cmp_ps( lhs, rhs, _CMP_LE_OS ); }
69 inline real_v operator>=( const real_v& lhs, const real_v& rhs ) { return _mm256_cmp_ps( lhs, rhs, _CMP_GE_OS ); }
70 inline real_v operator==( const real_v& lhs, const real_v& rhs ){ return _mm256_cmp_ps( lhs, rhs, _CMP_EQ_OS ); }
71 inline real_v sqrt( const real_v& v ) { return _mm256_sqrt_ps(v); }
72 inline real_v real_v::operator+=(const real_v& rhs ){ *this = *this + rhs; return *this; }
73 inline real_v real_v::operator-=(const real_v& rhs ){ *this = *this - rhs; return *this; }
74 inline real_v real_v::operator*=(const real_v& rhs ){ *this = *this * rhs; return *this; }
75 inline real_v real_v::operator/=(const real_v& rhs ){ *this = *this / rhs; return *this; }
80 inline std::array<int32_t, real_v::size> store( const __m256i& v )
81 {
82 alignas(32) std::array<int32_t, real_v::size> rt;
83 _mm256_store_si256( (__m256i*)&rt[0], v);
84 return rt;
85 }
86
87 inline void sincos( const real_v& v, real_v& s, real_v& c )
88 {
89 s = sin(v);
90 c = cos(v);
91 }
92 inline std::pair<real_v, real_v> sincos( const real_v& v )
93 {
94 std::pair<real_v, real_v> rt;
95 sincos( v, rt.first, rt.second );
96 return rt;
97 }
98 inline real_v tan( const real_v& v )
99 {
100 auto [s,c] = sincos( v );
101 return s / c ;
102 }
103
104 inline real_v abs ( const real_v& v ) { return v & _mm256_castsi256_ps( _mm256_set1_epi32( 0x7FFFFFFF ) ); }
105 inline real_v select(const real_v& mask, const real_v& a, const real_v& b ) { return _mm256_blendv_ps( b, a, mask ); }
106 inline real_v select(const bool& mask , const real_v& a, const real_v& b ) { return mask ? a : b; }
107 inline real_v sign ( const real_v& v){ return select( v > 0., +1., -1. ); }
108 inline real_v fmadd ( const real_v& a, const real_v& b, const real_v& c ) { return _mm256_fmadd_ps(a, b, c); }
109 inline real_v remainder( const real_v& a, const real_v& b ){ return a - real_v(_mm256_round_ps(a/b, _MM_FROUND_TO_NEG_INF)) * b; }
110 inline real_v atan2( const real_v& y, const real_v& x ){
111 const auto* bx = x.to_ptr();
112 const auto* by = y.to_ptr();
113 real_v rt;
114 for( unsigned i = 0 ; i != real_v::size ; ++i ) rt.to_ptr()[i] = std::atan2( by[i] , bx[i] );
115 return rt;
116 }
117 inline real_v gather( const double* base_addr, const real_v& offsets)
118 {
120 std::array<float, real_v::size> tmp;
121 auto ptr = store( offsets.to_int() );
122 // int32_t* ptr = (int32_t*)(&ints);
123 for( int i = 0 ; i != real_v::size; ++i ) tmp[i] = real_v::scalar_type( base_addr[ptr[i]] );
124 return real_v( tmp.data() );
125 }
126
127 inline real_v fmod( const real_v& a, const real_v& b )
128 {
129 auto r = remainder( abs(a), abs(b) );
130 return select( a > 0., r, -r );
131 }
132
133 inline std::ostream& operator<<( std::ostream& os, const real_v& obj ) {
134 auto buffer = obj.to_array();
135 for( unsigned i = 0 ; i != real_v::size; ++i ) os << buffer[i] << " ";
136 return os;
137 }
138
140 inline complex_v select(const real_v& mask, const complex_v& a, const complex_v& b ) { return complex_v( select(mask, a.real(), b.real()), select(mask, a.imag(), b.imag() ) ) ; }
141 inline complex_v select(const real_v& mask, const real_v& a, const complex_v& b ) { return complex_v( select(mask, a , b.real()), select(mask, 0.f, b.imag()) ); }
142 inline complex_v select(const real_v& mask, const complex_v& a, const real_v& b ) { return complex_v( select(mask, a.real(), b ) , select(mask, a.imag(), 0.f) ); }
143 inline complex_v select(const bool& mask , const complex_v& a, const complex_v& b ) { return mask ? a : b; }
144 #pragma omp declare reduction(+: real_v: \
145 omp_out = omp_out + omp_in)
146 #pragma omp declare reduction(+: complex_v: \
147 omp_out = omp_out + omp_in)
148 }
149}
150
151#endif
#define libmvec_alias(F)
real_v operator&(const real_v &lhs, const real_v &rhs)
Definition avx2f_types.h:60
real_v operator-(const real_v &lhs, const real_v &rhs)
Definition avx2f_types.h:56
real_v sqrt(const real_v &v)
Definition avx2f_types.h:71
real_v operator+(const real_v &lhs, const real_v &rhs)
Definition avx2f_types.h:55
real_v operator||(const real_v &lhs, const real_v &rhs)
Definition avx2f_types.h:64
real_v abs(const real_v &v)
void sincos(const real_v &v, real_v &s, real_v &c)
Definition avx2f_types.h:87
real_v operator==(const real_v &lhs, const real_v &rhs)
Definition avx2f_types.h:70
real_v fmadd(const real_v &a, const real_v &b, const real_v &c)
real_v select(const real_v &mask, const real_v &a, const real_v &b)
real_v operator/(const real_v &lhs, const real_v &rhs)
Definition avx2f_types.h:58
real_v sin(const real_v &v)
Definition avx2f_types.h:76
real_v fmod(const real_v &a, const real_v &b)
Complex< real_v > complex_v
real_v tan(const real_v &v)
Definition avx2f_types.h:98
real_v operator>=(const real_v &lhs, const real_v &rhs)
Definition avx2f_types.h:69
real_v operator!(const real_v &x)
Definition avx2f_types.h:65
real_v operator^(const real_v &lhs, const real_v &rhs)
Definition avx2f_types.h:62
std::array< int32_t, real_v::size > store(const __m256i &v)
Definition avx2f_types.h:80
real_v operator*(const real_v &lhs, const real_v &rhs)
Definition avx2f_types.h:57
std::ostream & operator<<(std::ostream &os, const real_v &obj)
real_v exp(const real_v &v)
Definition avx2f_types.h:78
real_v operator|(const real_v &lhs, const real_v &rhs)
Definition avx2f_types.h:61
real_v cos(const real_v &v)
Definition avx2f_types.h:77
real_v operator>(const real_v &lhs, const real_v &rhs)
Definition avx2f_types.h:67
real_v log(const real_v &v)
Definition avx2f_types.h:79
real_v gather(const double *base_addr, const real_v &offsets)
real_v operator<(const real_v &lhs, const real_v &rhs)
Definition avx2f_types.h:66
real_v remainder(const real_v &a, const real_v &b)
real_v operator&&(const real_v &lhs, const real_v &rhs)
Definition avx2f_types.h:63
real_v sign(const real_v &v)
real_v atan2(const real_v &y, const real_v &x)
real_v operator<=(const real_v &lhs, const real_v &rhs)
Definition avx2f_types.h:68
AVX::real_v real_v
Definition utils.h:46
real_v(__m256 data)
Definition avx2f_types.h:31
real_v operator/=(const real_v &rhs)
Definition avx2f_types.h:75
real_v operator+=(const real_v &rhs)
Definition avx2f_types.h:72
const scalar_type * to_ptr() const
Definition avx2f_types.h:44
real_v(const scalar_type &f)
Definition avx2f_types.h:33
real_v(const scalar_type &x0, const scalar_type &x1, const scalar_type &x2, const scalar_type &x3, const scalar_type &x4, const scalar_type &x5, const scalar_type &x6, const scalar_type &x7)
Definition avx2f_types.h:36
real_v(const double &f)
Definition avx2f_types.h:34
scalar_type at(const unsigned i) const
Definition avx2f_types.h:46
void store(scalar_type *ptr) const
Definition avx2f_types.h:42
scalar_type * to_ptr()
Definition avx2f_types.h:45
std::array< scalar_type, 8 > to_array() const
Definition avx2f_types.h:43
real_v operator*=(const real_v &rhs)
Definition avx2f_types.h:74
real_v(const scalar_type *f)
Definition avx2f_types.h:35
real_v operator-=(const real_v &rhs)
Definition avx2f_types.h:73
__m256i to_int() const
Definition avx2f_types.h:52
static constexpr unsigned size
Definition avx2f_types.h:28
real_v(const int &f)
Definition avx2f_types.h:32
real_t real() const
Definition Complex.h:24
real_t imag() const
Definition Complex.h:25