AmpGen 2.1
Loading...
Searching...
No Matches
arm128d_types.h
Go to the documentation of this file.
1#ifndef AMPGEN_ARM128d_TYPES
2#define AMPGEN_ARM128d_TYPES 1
3
4#include <arm_neon.h>
5#include <array>
6#include <iostream>
7#include "AmpGen/Complex.h"
8#ifdef _OPENMP
9#include <omp.h>
10#endif
11#include <cmath>
12
13
14#if USE_MVEC
15extern "C" void _ZGVdN4vvv_sincos(__m256d x, __m256i ptrs, __m256i ptrc);
16#define libmvec_alias( function_name) \
17 extern "C" __m256d _ZGVcN4v_##function_name(__m256d x); \
18inline real_v function_name( const real_v& v ){ return _ZGVcN4v_##function_name (v) ; }
19#else
20#define libmvec_alias( F ) \
21 inline real_v F( const real_v& v ){ auto arr = v.to_ptr(); return real_v( std::F(arr[0]), std::F(arr[1]) ); }
22#endif
23
24namespace AmpGen {
25 namespace ARM128d {
26
27 struct real_v {
28 float64x2_t data;
29 static constexpr unsigned size = 2;
30 typedef double scalar_type;
31 real_v() = default;
32 real_v(float64x2_t data ) : data(data) {}
33 explicit real_v( uint64x2_t&& data ) : data( vcvtq_f64_u64(data) ) {}
34 explicit real_v( int64x2_t&& data ) : data( vcvtq_f64_s64(data) ) {}
35 real_v(const scalar_type& f ) : data(vmovq_n_f64(f)) {}
36 real_v(const scalar_type& x0, const scalar_type& x1 )
37 {
38 data = vsetq_lane_f64(x0, data, 0 );
39 data = vsetq_lane_f64(x1, data, 1 );
40 }
41 explicit real_v(const scalar_type* f ) : data( vld1q_f64( f ) ) {}
42 real_v(const std::array<scalar_type, size> f ) : data( vld1q_f64( f.data() ) ) {}
43 void store( scalar_type* ptr ) const { vst1q_f64(ptr, data); }
44 const scalar_type* to_ptr() const { return reinterpret_cast<const scalar_type*>( &data ) ; }
45 scalar_type* to_ptr() { return reinterpret_cast<scalar_type*>( &data ) ; }
46 std::array<scalar_type, size> to_array() const { std::array<scalar_type, size> b; store( &b[0] ); return b; }
47 int64x2_t to_int() const { return vcvtq_s64_f64(data); }
48 double at(const unsigned i) const { return to_ptr()[i]; }
49 operator float64x2_t() const { return data ; }
50 inline real_v operator+=(const real_v& rhs );
51 inline real_v operator-=(const real_v& rhs );
52 inline real_v operator*=(const real_v& rhs );
53 inline real_v operator/=(const real_v& rhs );
54 };
55 struct int_v { int_v( uint64x2_t&& data) : data(data){}; uint64x2_t data; operator uint64x2_t() const { return data;} };
56
57
58 inline real_v operator+( const real_v& lhs, const real_v& rhs ) { return vaddq_f64(lhs, rhs); }
59 inline real_v operator-( const real_v& lhs, const real_v& rhs ) { return vsubq_f64(lhs, rhs); }
60 inline real_v operator*( const real_v& lhs, const real_v& rhs ) { return vmulq_f64(lhs, rhs); }
61 inline real_v operator/( const real_v& lhs, const real_v& rhs ) { return vdivq_f64(lhs, rhs); }
62 inline real_v operator-( const real_v& x ) { return -1.f * x; }
63
64 // inline real_v operator&( const real_v& lhs, const real_v& rhs ) { return real_v( vceqq_f64( lhs, rhs ) ); }
65 // inline real_v operator|( const real_v& lhs, const real_v& rhs ) { return _mm256_or_pd( lhs, rhs ); }
66 // inline real_v operator^( const real_v& lhs, const real_v& rhs ) { return _mm256_xor_pd( lhs, rhs ); }
67 inline int_v operator&&( const int_v& lhs, const int_v& rhs ) { return vandq_u64( lhs, rhs ); }
68 inline int_v operator||( const int_v& lhs, const int_v& rhs ) { return vorrq_u64( lhs, rhs ); }
69 // inline real_v operator!( const real_v& x ) { return x ^ _mm256_castsi256_pd( _mm256_set1_epi32( -1 ) ); }
70
71 inline int_v operator<( const real_v& lhs, const real_v& rhs ) { return vcltq_f64(lhs,rhs); }
72 inline int_v operator>( const real_v& lhs, const real_v& rhs ) { return vcgtq_f64(lhs,rhs); }
73 inline int_v operator<=( const real_v& lhs, const real_v& rhs ){ return vcleq_f64( lhs, rhs ); }
74 inline int_v operator>=( const real_v& lhs, const real_v& rhs ){ return vcleq_f64( lhs, rhs ); }
75 inline int_v operator==( const real_v& lhs, const real_v& rhs ){ return vceqq_f64( lhs, rhs); }
76 inline real_v sqrt( const real_v& v ) { return vsqrtq_f64(v); }
77 inline real_v abs ( const real_v& v ) { return vabsq_f64(v); }
78 inline real_v real_v::operator+=(const real_v& rhs ){ *this = *this + rhs; return *this; }
79 inline real_v real_v::operator-=(const real_v& rhs ){ *this = *this - rhs; return *this; }
80 inline real_v real_v::operator*=(const real_v& rhs ){ *this = *this * rhs; return *this; }
81 inline real_v real_v::operator/=(const real_v& rhs ){ *this = *this / rhs; return *this; }
86 inline void sincos( const real_v& v, real_v& s, real_v& c )
87 {
88 s = sin(v);
89 c = cos(v);
90 }
91 inline std::pair<real_v, real_v> sincos( const real_v& v )
92 {
93 std::pair<real_v, real_v> rt;
94 sincos( v, rt.first, rt.second );
95 return rt;
96 }
97 inline real_v tan( const real_v& v )
98 {
99 auto [s,c] = sincos( v );
100 return s / c ;
101 }
102 inline std::array<uint64_t, real_v::size> store( const int_v& v )
103 {
104 std::array<uint64_t, real_v::size> rt;
105 vst1q_u64( rt.data(), v );
106 return rt;
107 }
108
109 inline real_v select(const int_v& mask, const real_v& a, const real_v& b ) { return vbslq_f64(mask, a, b); }
110 inline real_v select(const bool& mask , const real_v& a, const real_v& b ) { return mask ? a : b; }
111 inline real_v sign ( const real_v& v){ return select( v > real_v(0.), +1., -1. ); }
112 inline real_v atan2( const real_v& y, const real_v& x ){
113 const double* bx = x.to_ptr();
114 const double* by = y.to_ptr();
115 return real_v ( std::atan2(by[0], bx[0]), std::atan2( by[1], bx[1]) );
116 }
117 inline real_v gather( const double* base_addr, const real_v& offsets)
118 {
119 std::array<int64_t, real_v::size> offsets_p;
120 vst1q_s64( offsets_p.data(), offsets.to_int() );
121 return real_v ( base_addr[offsets_p[0]], base_addr[offsets_p[1]] );
122 }
123 inline real_v fmadd( const real_v& a, const real_v& b, const real_v& c )
124 {
125 return vmlaq_f64(a, b, c);
126 }
127 inline real_v remainder( const real_v& a, const real_v& b ){ return a - b * real_v(vcvtq_u64_f64(a/b)); }
128 inline real_v fmod( const real_v& a, const real_v& b ){ return remainder( abs(a), abs(b) ) * sign(a); }
129
130 inline std::ostream& operator<<( std::ostream& os, const real_v& obj ) {
131 auto data = obj.to_ptr();
132 for( unsigned i = 0 ; i != 4; ++i ) os << data[i] << " ";
133 return os;
134 }
135 template<typename real_v> inline real_v conj(const real_v& arg ){ return arg ; }
137
138 inline complex_v select(const int_v& mask, const complex_v& a, const complex_v& b ) { return complex_v( select(mask, a.re, b.re), select(mask, a.im, b.im ) ) ; }
139 inline complex_v select(const int_v& mask, const real_v& a, const complex_v& b ) { return complex_v( select(mask, a , b.re), select(mask, 0.f, b.im) ); }
140 inline complex_v select(const int_v& mask, const complex_v& a, const real_v& b ) { return complex_v( select(mask, a.re, b ) , select(mask, a.im, 0.f) ); }
141 inline complex_v select(const bool& mask , const complex_v& a, const complex_v& b ) { return mask ? a : b; }
142 }
143}
144
145#endif
#define libmvec_alias(F)
Complex< real_v > complex_v
void sincos(const real_v &v, real_v &s, real_v &c)
real_v tan(const real_v &v)
int_v operator==(const real_v &lhs, const real_v &rhs)
real_v operator+(const real_v &lhs, const real_v &rhs)
int_v operator>(const real_v &lhs, const real_v &rhs)
real_v sin(const real_v &v)
std::array< uint64_t, real_v::size > store(const int_v &v)
int_v operator<=(const real_v &lhs, const real_v &rhs)
real_v atan2(const real_v &y, const real_v &x)
real_v remainder(const real_v &a, const real_v &b)
int_v operator&&(const int_v &lhs, const int_v &rhs)
real_v operator/(const real_v &lhs, const real_v &rhs)
real_v conj(const real_v &arg)
real_v operator*(const real_v &lhs, const real_v &rhs)
real_v fmod(const real_v &a, const real_v &b)
real_v sign(const real_v &v)
real_v cos(const real_v &v)
real_v operator-(const real_v &lhs, const real_v &rhs)
real_v log(const real_v &v)
real_v gather(const double *base_addr, const real_v &offsets)
real_v select(const int_v &mask, const real_v &a, const real_v &b)
int_v operator<(const real_v &lhs, const real_v &rhs)
int_v operator>=(const real_v &lhs, const real_v &rhs)
real_v fmadd(const real_v &a, const real_v &b, const real_v &c)
real_v sqrt(const real_v &v)
real_v abs(const real_v &v)
std::ostream & operator<<(std::ostream &os, const real_v &obj)
int_v operator||(const int_v &lhs, const int_v &rhs)
real_v exp(const real_v &v)
AVX::real_v real_v
Definition utils.h:46
int_v(uint64x2_t &&data)
real_v operator+=(const real_v &rhs)
scalar_type * to_ptr()
const scalar_type * to_ptr() const
real_v(const scalar_type &f)
real_v(const scalar_type &x0, const scalar_type &x1)
void store(scalar_type *ptr) const
int64x2_t to_int() const
real_v operator/=(const real_v &rhs)
real_v(int64x2_t &&data)
real_v(float64x2_t data)
real_v operator-=(const real_v &rhs)
static constexpr unsigned size
real_v operator*=(const real_v &rhs)
real_v(const std::array< scalar_type, size > f)
real_v(const scalar_type *f)
std::array< scalar_type, size > to_array() const
double at(const unsigned i) const
real_v(uint64x2_t &&data)
real_t re
Definition Complex.h:9