3 #include <thrust/detail/config.h> 4 #include <thrust/system_error.h> 6 #define _USE_MATH_DEFINES 15 #define _Pragma(x) __pragma(x) 19 #if THRUST_DEVICE_SYSTEM != THRUST_DEVICE_SYSTEM_CUDA 21 inline void cudaDeviceSynchronize() {}
28 #define MEMCPY(target, source, count, direction) cudaMemcpy(target, source, count, direction) 29 #define MEMCPY_TO_SYMBOL(target, source, count, offset, direction) \ 30 cudaMemcpyToSymbol(target, source, count, offset, direction) 31 #define GET_FUNCTION_ADDR(fname) \ 33 cudaMemcpyFromSymbol((void **)&host_fcn_ptr, fname, sizeof(void *)); \ 34 GOOFIT_DEBUG("Using function {} in {}, {}:{}", #fname, __func__, __FILE__, __LINE__); \ 36 #define MEMCPY_FROM_SYMBOL(target, source, count, offset, direction) \ 37 cudaMemcpyFromSymbol(target, source, count, offset, direction) 41 #include <generics/ldg.h> 42 #define RO_CACHE(x) __ldg(&x) 46 #define MEMCPY(target, source, count, direction) memcpy((char *)target, source, count) 47 #define MEMCPY_TO_SYMBOL(target, source, count, offset, direction) memcpy(((char *)target) + offset, source, count) 48 #define MEMCPY_FROM_SYMBOL(target, source, count, offset, direction) \ 49 memcpy((char *)target, ((char *)source) + offset, count) 50 #define GET_FUNCTION_ADDR(fname) \ 52 host_fcn_ptr = (void *)fname; \ 53 GOOFIT_DEBUG("Using function {} in {}, {}:{}", #fname, __func__, __FILE__, __LINE__); \ 62 #if THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_OMP || THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_TBB 63 #define THREADIDX (omp_get_thread_num()) 64 #define BLOCKDIM (omp_get_num_threads()) 66 #define THREAD_SYNCH _Pragma("omp barrier") 68 #elif THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CPP 74 #elif THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA 75 #define THREADIDX (threadIdx.x) 76 #define BLOCKDIM (blockDim.x) 77 #define BLOCKIDX (blockIdx.x) 78 #define THREAD_SYNCH __syncthreads(); 83 #if THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA 84 #include <driver_types.h> 86 enum cudaError_t { cudaSuccess, cudaErrorMemoryAllocation };
90 cudaError_t
gooMalloc(
void **target,
size_t bytes);
94 #ifndef GOOFIT_SINGLES 97 #define root2 1.4142135623730951 98 #define invRootPi 0.5641895835477563 103 #define root2 1.4142135623730951f 104 #define invRootPi 0.5641895835477563f 110 #define POW2(x) ((x) * (x)) 111 #define POW3(x) ((x) * (x) * (x)) 114 #if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ < 350) 115 template <
typename T>
116 __host__ __device__ T
rsqrt(T val) {
117 return 1.0 / sqrt(val);
122 #if defined(__CUDACC__) && __CUDACC_VER_MAJOR__ < 8
cudaError_t gooFree(void *ptr)
__host__ __device__ T rsqrt(T val)
cudaError_t gooMalloc(void **target, size_t bytes)