2 #include <goofit/PDFs/basic/InterHistPdf.h>
3 #include <goofit/Variable.h>
7 __constant__ fptype *dev_base_interhists[100]; // Multiple histograms for the case of multiple PDFs
8 #define OBS_CODE 4242424242
9 // This number is presumably so high that it will never collide
10 // with an actual parameter index. It indicates that this dimension
11 // is an event observable.
13 // dev_powi is implemented in SmoothHistogramPdf.cu.
15 __device__ fptype device_InterHistogram(fptype *evt, fptype *p, unsigned int *indices) {
17 // nP totalHistograms (idx1 limit1 step1 bins1) (idx2 limit2 step2 bins2) nO o1 o2
18 // where limit and step are indices into functorConstants.
20 int numVars = (indices[0] - 1) / 4;
23 int myHistogramIndex = indices[1];
24 fptype binDistances[10]; // Ten dimensions should be more than enough!
25 // Distance from bin center in units of bin width in each dimension.
27 unsigned int observablesSeen = 0;
29 for(int i = 0; i < numVars; ++i) {
30 fptype currVariable = 0;
31 unsigned int varIndex = indices[2 + 4 * i];
33 if(varIndex == OBS_CODE) {
34 // Interpret this number as observable index.
35 // Notice that this if does not cause a fork
36 // - all threads will hit the same index and
37 // make the same decision.
38 currVariable = evt[indices[indices[0] + 2 + observablesSeen++]];
40 // Interpret as parameter index.
41 currVariable = p[varIndex];
44 int lowerBoundIdx = 3 + 4 * i;
45 fptype lowerBound = functorConstants[indices[lowerBoundIdx + 0]];
46 fptype step = functorConstants[indices[lowerBoundIdx + 1]];
48 currVariable -= lowerBound;
51 auto localBin = static_cast<int>(floor(currVariable));
52 binDistances[i] = currVariable - localBin - fptype(0.5);
53 globalBin += previous * localBin;
54 previous *= indices[lowerBoundIdx + 2];
56 if(0 == THREADIDX + BLOCKIDX)
57 printf("Variable %i: %f %f %i\n", i, currVariable, currVariable * step + lowerBound, localBin);
60 fptype *myHistogram = dev_base_interhists[myHistogramIndex];
73 fptype totalWeight = 0;
74 int totalBins = dev_powi(3, numVars);
76 for(int i = 0; i < totalBins; ++i) {
77 int currBin = globalBin;
78 int localPrevious = 1;
79 int trackingBin = globalBin;
80 bool offSomeAxis = false;
81 fptype currentWeight = 0;
83 // Loop over vars to get offset for each one.
84 for(int v = 0; v < numVars; ++v) {
85 int localNumBins = indices[4 * (v + 1) + 1];
86 int offset = ((i / dev_powi(3, v)) % 3) - 1;
88 currBin += offset * localPrevious;
89 localPrevious *= localNumBins;
91 int currVarBin = trackingBin % localNumBins;
92 trackingBin /= localNumBins;
94 if(currVarBin + offset < 0)
97 if(currVarBin + offset >= localNumBins)
100 fptype currDist = binDistances[v];
102 currentWeight += currDist * currDist;
104 if(0 == THREADIDX + BLOCKIDX)
105 printf("%i, %i: %f %f %f %i %s\n",
112 offSomeAxis ? "off" : "on");
115 // Only interpolate the four closest boxes (in two dimensions; more in three dimensions).
116 currentWeight = currentWeight > 0
117 ? (currentWeight <= sqrt(static_cast<fptype>(numVars)) ? 1 / sqrt(currentWeight) : 0)
119 fptype currentEntry = offSomeAxis ? 0 : myHistogram[currBin];
120 ret += currentWeight * currentEntry;
121 totalWeight += currentWeight;
123 if(0 == THREADIDX + BLOCKIDX)
125 "Adding bin content %i %f with weight %f for total %f.\n", currBin, currentEntry, currentWeight, ret);
128 if(0 == THREADIDX + BLOCKIDX)
129 printf("%f %f %f %i %f\n", ret, totalWeight, evt[0], indices[6], p[indices[6]]);
135 __device__ device_function_ptr ptr_to_InterHistogram = device_InterHistogram;
138 InterHistPdf::InterHistPdf(std::string n, BinnedDataSet *x, std::vector<Variable> params, std::vector<Observable> obses)
140 , numVars(x->numVariables()) {
141 int numConstants = 2 * numVars;
142 registerConstants(numConstants);
143 static unsigned int totalHistograms = 0;
144 host_constants = new fptype[numConstants];
147 std::vector<unsigned int> pindices;
148 pindices.push_back(totalHistograms);
152 for(Observable var : x->getObservables()) {
153 if(std::find(obses.begin(), obses.end(), var) != obses.end()) {
154 registerObservable(var);
155 pindices.push_back(OBS_CODE);
158 pindices.push_back(cIndex + 2 * varIndex + 0);
159 pindices.push_back(cIndex + 2 * varIndex + 1);
160 pindices.push_back(var.getNumBins());
162 // NB, do not put cIndex here, it is accounted for by the offset in MEMCPY_TO_SYMBOL below.
163 host_constants[2 * varIndex + 0] = var.getLowerLimit();
164 host_constants[2 * varIndex + 1] = var.getBinSize();
168 unsigned int numbins = x->getNumBins();
169 thrust::host_vector<fptype> host_histogram;
171 for(unsigned int i = 0; i < numbins; ++i) {
172 fptype curr = x->getBinContent(i);
173 host_histogram.push_back(curr);
177 MEMCPY_TO_SYMBOL(functorConstants,
179 numConstants * sizeof(fptype),
180 cIndex * sizeof(fptype),
181 cudaMemcpyHostToDevice);
183 dev_base_histogram = new thrust::device_vector<fptype>(host_histogram);
184 static fptype *dev_address[1];
185 dev_address[0] = (&((*dev_base_histogram)[0])).get();
187 dev_base_interhists, dev_address, sizeof(fptype *), totalHistograms * sizeof(fptype *), cudaMemcpyHostToDevice);
188 GET_FUNCTION_ADDR(ptr_to_InterHistogram);
189 initialize(pindices);
193 } // namespace GooFit