GooFit  v2.1.3
Classes | Macros | Functions
ThrustOverride.h File Reference
#include <thrust/detail/config.h>
#include <thrust/detail/seq.h>
#include <thrust/detail/temporary_array.h>
#include <thrust/detail/type_traits.h>
#include <thrust/execution_policy.h>
#include <thrust/reduce.h>
#include <thrust/system/cuda/detail/bulk.h>
#include <thrust/system/cuda/detail/decomposition.h>
#include <thrust/system/cuda/detail/execute_on_stream.h>
#include <thrust/system/cuda/detail/execution_policy.h>
#include <goofit/detail/ThrustOverrideConfig.h>

Go to the source code of this file.

Classes

struct  goofit_policy
 

Macros

#define OR_GROUPSIZE   128
 
#define OR_GRAINSIZE   7
 

Functions

template<typename InputIterator , typename UnaryFunction , typename OutputType , typename BinaryFunction >
__host__ __device__ OutputType transform_reduce (goofit_policy &exec, InputIterator first, InputIterator last, UnaryFunction unary_op, OutputType init, BinaryFunction binary_op)
 
template<typename InputIterator , typename OutputType , typename BinaryFunction >
__host__ __device__ OutputType reduce (goofit_policy &exec, InputIterator first, InputIterator last, OutputType init, BinaryFunction binary_op)
 

Macro Definition Documentation

◆ OR_GRAINSIZE

#define OR_GRAINSIZE   7

Definition at line 25 of file ThrustOverride.h.

Referenced by reduce().

◆ OR_GROUPSIZE

#define OR_GROUPSIZE   128

Definition at line 21 of file ThrustOverride.h.

Referenced by reduce().

Function Documentation

◆ reduce()

template<typename InputIterator , typename OutputType , typename BinaryFunction >
__host__ __device__ OutputType reduce ( goofit_policy exec,
InputIterator  first,
InputIterator  last,
OutputType  init,
BinaryFunction  binary_op 
)

Definition at line 47 of file ThrustOverride.h.

References OR_GRAINSIZE, and OR_GROUPSIZE.

Referenced by getData(), main(), and transform_reduce().

47  {
48  typedef typename thrust::iterator_difference<InputIterator>::type size_type;
49 
50  const size_type n = last - first;
51 
52  if(n <= 0)
53  return init;
54 
55  // We are no longer expecting a default stream to be passed
56  cudaStream_t s = stream(thrust::detail::derived_cast(exec));
57 
58  // OR_GROUPSIZE=128
59  // OR_GRAINSIZE=7
60  // printf ("groupsize:%i grainsize:%i\n", OR_GROUPSIZE, OR_GRAINSIZE);
61  const size_type groupsize = OR_GROUPSIZE;
62  // const size_type groupsize = 128;
63  // const size_type grainsize = 7;
64  const size_type grainsize = OR_GRAINSIZE;
65  const size_type tile_size = groupsize * grainsize;
66  const size_type num_tiles = (n + tile_size - 1) / tile_size;
67  const size_type subscription = 10;
68 
69  thrust::system::cuda::detail::bulk_::concurrent_group<thrust::system::cuda::detail::bulk_::agent<grainsize>,
70  groupsize>
71  g;
72 
73  const size_type num_groups = thrust::min<size_type>(subscription * g.hardware_concurrency(), num_tiles);
74 
75  thrust::system::cuda::detail::aligned_decomposition<size_type> decomp(n, num_groups, tile_size);
76 
77  thrust::detail::temporary_array<OutputType, goofit_policy> partial_sums(exec, decomp.size());
78 
79  // reduce into partial sums
80  thrust::system::cuda::detail::bulk_::async(thrust::system::cuda::detail::bulk_::par(s, g, decomp.size()),
81  thrust::system::cuda::detail::reduce_detail::reduce_partitions(),
82  thrust::system::cuda::detail::bulk_::root.this_exec,
83  first,
84  decomp,
85  partial_sums.begin(),
86  init,
87  binary_op)
88  .wait();
89 
90  if(partial_sums.size() > 1) {
91  // reduce the partial sums
92  thrust::system::cuda::detail::bulk_::async(thrust::system::cuda::detail::bulk_::par(s, g, 1),
93  thrust::system::cuda::detail::reduce_detail::reduce_partitions(),
94  thrust::system::cuda::detail::bulk_::root.this_exec,
95  partial_sums.begin(),
96  partial_sums.end(),
97  partial_sums.begin(),
98  binary_op);
99  } // end if
100 
101  return get_value(exec, &partial_sums[0]);
102 } // end goofit_reduce()
#define OR_GRAINSIZE
#define OR_GROUPSIZE

◆ transform_reduce()

template<typename InputIterator , typename UnaryFunction , typename OutputType , typename BinaryFunction >
__host__ __device__ OutputType transform_reduce ( goofit_policy exec,
InputIterator  first,
InputIterator  last,
UnaryFunction  unary_op,
OutputType  init,
BinaryFunction  binary_op 
)

Definition at line 32 of file ThrustOverride.h.

References reduce().

37  {
38  // printf ("goofit transform_reduce\n");
39  thrust::transform_iterator<UnaryFunction, InputIterator, OutputType> xfrm_first(first, unary_op);
40  thrust::transform_iterator<UnaryFunction, InputIterator, OutputType> xfrm_last(last, unary_op);
41 
42  return thrust::reduce(exec, xfrm_first, xfrm_last, init, binary_op);
43 } // end transform_reduce()
__host__ __device__ OutputType reduce(goofit_policy &exec, InputIterator first, InputIterator last, OutputType init, BinaryFunction binary_op)