1#ifndef AMPGEN_SIMD_UTILS_H
2#define AMPGEN_SIMD_UTILS_H
8#define INSTRUCTION_SET_SCALAR 0
9#define INSTRUCTION_SET_AVX2f 1
10#define INSTRUCTION_SET_AVX2d 2
11#define INSTRUCTION_SET_AVX512d 3
12#define INSTRUCTION_SET_ARM128d 10
15#if INSTRUCTION_SET == INSTRUCTION_SET_SCALAR
17#elif INSTRUCTION_SET == INSTRUCTION_SET_AVX2f
20#elif INSTRUCTION_SET == INSTRUCTION_SET_AVX2d
23#elif INSTRUCTION_SET == INSTRUCTION_SET_AVX512d
26#elif INSTRUCTION_SET == INSTRUCTION_SET_ARM128d
29 #pragma message("Unrecognised instruction set")
34#if INSTRUCTION_SET == INSTRUCTION_SET_AVX512d
35 namespace AVX = AVX512d;
36#elif INSTRUCTION_SET == INSTRUCTION_SET_AVX2d
37 namespace AVX = AVX2d;
38#elif INSTRUCTION_SET == INSTRUCTION_SET_AVX2f
39 namespace AVX = AVX2f;
40#elif INSTRUCTION_SET == INSTRUCTION_SET_SCALAR
42#elif INSTRUCTION_SET == INSTRUCTION_SET_ARM128d
43 namespace AVX = ARM128d;
51 template <
typename T>
struct size {
static constexpr unsigned value = 1; } ;
55 #if INSTRUCTION_SET != 0
57 template <>
struct is_vector_type <
real_v > : std::true_type {};
58 template <>
struct size <
complex_v>{
static constexpr unsigned value = real_v::size; };
59 template <>
struct size <
real_v> {
static constexpr unsigned value = real_v::size; };
61 #if INSTRUCTION_SET == INSTRUCTION_SET_ARM128d
62 template <>
struct size <AVX::int_v> {
static constexpr unsigned value = 2; };
63 template <>
struct is_vector_type <AVX::int_v> : std::true_type {};
65 template <
typename simd_type,
typename container_type,
typename functor_type> simd_type
gather(
66 const container_type& container,
const functor_type& functor,
unsigned offset=0,
typename simd_type::scalar_type df =0.)
68 std::array<typename simd_type::scalar_type, simd_type::size> rv;
70 for(
unsigned k = 0 ; k != simd_type::size; ++k ) rv[k] = offset + k < container.size() ? functor(container[offset+k]) : functor(container[container.size()-1]);
72 for(
unsigned k = 0 ; k != simd_type::size; ++k ) rv[k] = offset + k < container.size() ? functor(container[offset+k]) : df;
73 return simd_type( rv.data() );
65 template <
typename simd_type,
typename container_type,
typename functor_type> simd_type
gather( {
…}
76 template <
typename simd_type>
size_t aligned_size(
const size_t& unaligned_size ) {
76 template <
typename simd_type>
size_t aligned_size(
const size_t& unaligned_size ) {
…}
79 template <
typename simd_type>
auto sum_elements(
const simd_type& obj )
83 const auto arr = obj.to_ptr();
79 template <
typename simd_type>
auto sum_elements(
const simd_type& obj ) {
…}
90 template <
typename simd_type>
bool all_of(
const simd_type& obj)
93 #if INSTRUCTION_SET == INSTRUCTION_SET_AVX2d
94 return _mm256_movemask_pd( obj ) == 0xF;
95 #elif INSTRUCTION_SET == INSTRUCTION_SET_AVX2f
96 return _mm256_movemask_ps( obj ) == 0xFF;
90 template <
typename simd_type>
bool all_of(
const simd_type& obj) {
…}
100 template <
typename simd_type,
typename value_type>
bool all_of(
const simd_type& obj,
const value_type& v )
102 return all_of( obj == v );
100 template <
typename simd_type,
typename value_type>
bool all_of(
const simd_type& obj,
const value_type& v ) {
…}
104 template <
typename T>
auto make_complex( T&& re, T&& im ) {
return std::complex<T>(re,im); }
105 template <
unsigned p=0,
typename vtype>
auto get( vtype v )
109 else if constexpr ( std::is_same<vtype, complex_v>::value )
return std::complex(
get<p>(v.real()),
get<p>(v.imag()) );
105 template <
unsigned p=0,
typename vtype>
auto get( vtype v ) {
…}
112 template <
typename vtype>
auto at( vtype v,
const unsigned p=0 )
116 if constexpr ( std::is_same<vtype, real_v>::value )
return v.at(p);
117 if constexpr ( std::is_same<vtype, complex_v>::value )
return std::complex(
at( v.real(), p),
at( v.imag(), p) );
112 template <
typename vtype>
auto at( vtype v,
const unsigned p=0 ) {
…}
124 template <
typename T>
inline auto norm( T&& value ){
126 else {
return value.norm(); }
124 template <
typename T>
inline auto norm( T&& value ) {
…}
129 template <
typename type,
typename store_type>
void store( store_type* container,
const type& v)
133 auto arr = v.to_ptr();
129 template <
typename type,
typename store_type>
void store( store_type* container,
const type& v) {
…}
auto make_complex(T &&re, T &&im)
auto sum_elements(const simd_type &obj)
simd_type gather(const container_type &container, const functor_type &functor, unsigned offset=0, typename simd_type::scalar_type df=0.)
bool all_of(const simd_type &obj)
void store(store_type *container, const type &v)
auto at(vtype v, const unsigned p=0)
size_t aligned_size(const size_t &unaligned_size)
std::complex< double > complex_v
static constexpr unsigned value