GooFit  v2.1.3
GooPdf.cu
Go to the documentation of this file.
1 #include <goofit/GlobalCudaDefines.h>
2 #include <goofit/PDFs/GooPdf.h>
3 #include <goofit/detail/ThrustOverride.h>
4 
5 #include <goofit/BinnedDataSet.h>
6 #include <goofit/Error.h>
7 #include <goofit/FitControl.h>
8 #include <goofit/Log.h>
9 #include <goofit/UnbinnedDataSet.h>
10 #include <goofit/Variable.h>
11 
12 #include <thrust/device_vector.h>
13 #include <thrust/host_vector.h>
14 #include <thrust/iterator/constant_iterator.h>
15 #include <thrust/iterator/zip_iterator.h>
16 #include <thrust/sequence.h>
17 #include <thrust/transform.h>
18 #include <thrust/transform_reduce.h>
19 
20 #ifdef ROOT_FOUND
21 #include <TH1D.h>
22 #endif
23 
24 #ifdef GOOFIT_MPI
25 #include <mpi.h>
26 #endif
27 
28 namespace GooFit {
29 
30 // These variables are either function-pointer related (thus specific to this implementation)
31 // or constrained to be in the CUDAglob translation unit by nvcc limitations; otherwise they
32 // would be in PdfBase.
33 
34 // Device-side, translation-unit constrained.
35 
36 __constant__ fptype cudaArray[maxParams];
37 __constant__ unsigned int paramIndices[maxParams];
38 __constant__ fptype functorConstants[maxParams];
39 __constant__ fptype normalisationFactors[maxParams];
40 
41 // For debugging
42 
43 __constant__ int callnumber;
44 __constant__ int gpuDebug;
45 __constant__ unsigned int debugParamIndex;
46 __device__ int internalDebug1 = -1;
47 __device__ int internalDebug2 = -1;
48 __device__ int internalDebug3 = -1;
49 int cpuDebug = 0;
50 
51 #ifdef PROFILING
52 __device__ fptype timeHistogram[10000];
53 fptype host_timeHist[10000];
54 #endif
55 
56 // Function-pointer related.
57 __device__ void *device_function_table[200];
58 // Not clear why this cannot be __constant__, but it causes crashes to declare it so.
59 
60 void *host_function_table[200];
61 unsigned int num_device_functions = 0;
62 std::map<void *, int> functionAddressToDeviceIndexMap;
63 
64 // For use in debugging memory issues
65 void printMemoryStatus(std::string file, int line) {
66  size_t memfree = 0;
67  size_t memtotal = 0;
68  cudaDeviceSynchronize();
69 
70 #if THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA
71  cudaMemGetInfo(&memfree, &memtotal);
72 #endif
73  cudaDeviceSynchronize();
74  std::cout << "Memory status " << file << " " << line << " Free " << memfree << " Total " << memtotal << " Used "
75  << (memtotal - memfree) << std::endl;
76 }
77 
78 __device__ fptype calculateEval(fptype rawPdf, fptype *evtVal, unsigned int par) {
79  // Just return the raw PDF value, for use in (eg) normalisation.
80  return rawPdf;
81 }
82 
83 __device__ fptype calculateNLL(fptype rawPdf, fptype *evtVal, unsigned int par) {
84  // if ((10 > callnumber) && (THREADIDX < 10) && (BLOCKIDX == 0)) cuPrintf("calculateNll %i %f %f %f\n", callnumber,
85  // rawPdf, normalisationFactors[par], rawPdf*normalisationFactors[par]);
86  // if (THREADIDX < 50) printf("Thread %i %f %f\n", THREADIDX, rawPdf, normalisationFactors[par]);
87  rawPdf *= normalisationFactors[par];
88  return rawPdf > 0 ? -log(rawPdf) : 0;
89 }
90 
91 __device__ fptype calculateProb(fptype rawPdf, fptype *evtVal, unsigned int par) {
92  // Return probability, ie normalized PDF value.
93  return rawPdf * normalisationFactors[par];
94 }
95 
96 __device__ fptype calculateBinAvg(fptype rawPdf, fptype *evtVal, unsigned int par) {
97  rawPdf *= normalisationFactors[par];
98  rawPdf *= evtVal[1]; // Bin volume
99 
100  // Log-likelihood of numEvents with expectation of exp is (-exp + numEvents*ln(exp) - ln(numEvents!)).
101  // The last is constant, so we drop it; and then multiply by minus one to get the negative log-likelihood.
102  if(rawPdf > 0) {
103  fptype expEvents = functorConstants[0] * rawPdf;
104  return (expEvents - evtVal[0] * log(expEvents));
105  }
106 
107  return 0;
108 }
109 
110 __device__ fptype calculateBinWithError(fptype rawPdf, fptype *evtVal, unsigned int par) {
111  // In this case interpret the rawPdf as just a number, not a number of events.
112  // Do not divide by integral over phase space, do not multiply by bin volume,
113  // and do not collect 200 dollars. evtVal should have the structure (bin entry, bin error).
114  // printf("[%i, %i] ((%f - %f) / %f)^2 = %f\n", BLOCKIDX, THREADIDX, rawPdf, evtVal[0], evtVal[1], pow((rawPdf -
115  // evtVal[0]) / evtVal[1], 2));
116  rawPdf -= evtVal[0]; // Subtract observed value.
117  rawPdf /= evtVal[1]; // Divide by error.
118  rawPdf *= rawPdf;
119  return rawPdf;
120 }
121 
122 __device__ fptype calculateChisq(fptype rawPdf, fptype *evtVal, unsigned int par) {
123  rawPdf *= normalisationFactors[par];
124  rawPdf *= evtVal[1]; // Bin volume
125 
126  return POW2(rawPdf * functorConstants[0] - evtVal[0]) / (evtVal[0] > 1 ? evtVal[0] : 1);
127 }
128 
129 __device__ device_metric_ptr ptr_to_Eval = calculateEval;
130 __device__ device_metric_ptr ptr_to_NLL = calculateNLL;
131 __device__ device_metric_ptr ptr_to_Prob = calculateProb;
132 __device__ device_metric_ptr ptr_to_BinAvg = calculateBinAvg;
133 __device__ device_metric_ptr ptr_to_BinWithError = calculateBinWithError;
134 __device__ device_metric_ptr ptr_to_Chisq = calculateChisq;
135 
136 void *host_fcn_ptr = nullptr;
137 
138 void *getMetricPointer(std::string name) {
139 #define CHOOSE_PTR(ptrname) \
140  if(name == #ptrname) \
141  GET_FUNCTION_ADDR(ptrname);
142  host_fcn_ptr = nullptr;
143  CHOOSE_PTR(ptr_to_Eval);
144  CHOOSE_PTR(ptr_to_NLL);
145  CHOOSE_PTR(ptr_to_Prob);
146  CHOOSE_PTR(ptr_to_BinAvg);
147  CHOOSE_PTR(ptr_to_BinWithError);
148  CHOOSE_PTR(ptr_to_Chisq);
149 
150  if(host_fcn_ptr == nullptr)
151  throw GooFit::GeneralError("host_fcn_ptr is nullptr");
152 
153  return host_fcn_ptr;
154 #undef CHOOSE_PTR
155 }
156 
157 void *getMetricPointer(EvalFunc val) { return getMetricPointer(evalfunc_to_string(val)); }
158 
159 __host__ int GooPdf::findFunctionIdx(void *dev_functionPtr) {
160  // Code specific to function-pointer implementation
161  auto localPos = functionAddressToDeviceIndexMap.find(dev_functionPtr);
162 
163  if(localPos != functionAddressToDeviceIndexMap.end()) {
164  return (*localPos).second;
165  }
166 
167  int fIdx = num_device_functions;
168  host_function_table[num_device_functions] = dev_functionPtr;
169  functionAddressToDeviceIndexMap[dev_functionPtr] = num_device_functions;
170  num_device_functions++;
171  MEMCPY_TO_SYMBOL(
172  device_function_table, host_function_table, num_device_functions * sizeof(void *), 0, cudaMemcpyHostToDevice);
173 
174 #ifdef PROFILING
175  host_timeHist[fIdx] = 0;
176  MEMCPY_TO_SYMBOL(timeHistogram, host_timeHist, 10000 * sizeof(fptype), 0);
177 #endif
178 
179  return fIdx;
180 }
181 
182 __host__ void GooPdf::initialize(std::vector<unsigned int> pindices, void *dev_functionPtr) {
183  if(!fitControl)
184  setFitControl(std::make_shared<UnbinnedNllFit>());
185 
186  // MetricTaker must be created after PdfBase initialisation is done.
187  PdfBase::initializeIndices(pindices);
188 
189  functionIdx = findFunctionIdx(dev_functionPtr);
190  setMetrics();
191 }
192 
193 __host__ void GooPdf::setDebugMask(int mask, bool setSpecific) const {
194  cpuDebug = mask;
195 #if THRUST_DEVICE_SYSTEM != THRUST_DEVICE_SYSTEM_CUDA
196  gpuDebug = cpuDebug;
197 
198  if(setSpecific)
199  debugParamIndex = parameters;
200 
201 #else
202  MEMCPY_TO_SYMBOL(gpuDebug, &cpuDebug, sizeof(int), 0, cudaMemcpyHostToDevice);
203 
204  if(setSpecific)
205  MEMCPY_TO_SYMBOL(debugParamIndex, &parameters, sizeof(unsigned int), 0, cudaMemcpyHostToDevice);
206 
207 #endif
208 }
209 
210 __host__ void GooPdf::setMetrics() {
211  logger = std::make_shared<MetricTaker>(this, getMetricPointer(fitControl->getMetric()));
212 }
213 
214 __host__ double GooPdf::sumOfNll(int numVars) const {
215  static thrust::plus<double> cudaPlus;
216  thrust::constant_iterator<int> eventSize(numVars);
217  thrust::constant_iterator<fptype *> arrayAddress(dev_event_array);
218  double dummy = 0;
219 
220  // if (host_callnumber >= 2) GooFit::abort(__FILE__, __LINE__, getName() + " debug abort", this);
221  thrust::counting_iterator<int> eventIndex(0);
222 
223  double ret;
224 #ifdef GOOFIT_MPI
225 #if THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA
226  goofit_policy my_policy;
227  double r = thrust::transform_reduce(
228  my_policy,
229  thrust::make_zip_iterator(thrust::make_tuple(eventIndex, arrayAddress, eventSize)),
230  thrust::make_zip_iterator(thrust::make_tuple(eventIndex + m_iEventsPerTask, arrayAddress, eventSize)),
231  *logger,
232  dummy,
233  cudaPlus);
234 #else
235  double r = thrust::transform_reduce(
236  thrust::make_zip_iterator(thrust::make_tuple(eventIndex, arrayAddress, eventSize)),
237  thrust::make_zip_iterator(thrust::make_tuple(eventIndex + m_iEventsPerTask, arrayAddress, eventSize)),
238  *logger,
239  dummy,
240  cudaPlus);
241 #endif
242 
243  MPI_Allreduce(&r, &ret, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
244 #else
245 #if THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA
246  goofit_policy my_policy;
247  ret = thrust::transform_reduce(
248  my_policy,
249  thrust::make_zip_iterator(thrust::make_tuple(eventIndex, arrayAddress, eventSize)),
250  thrust::make_zip_iterator(thrust::make_tuple(eventIndex + numEntries, arrayAddress, eventSize)),
251  *logger,
252  dummy,
253  cudaPlus);
254 #else
255  ret = thrust::transform_reduce(
256  thrust::make_zip_iterator(thrust::make_tuple(eventIndex, arrayAddress, eventSize)),
257  thrust::make_zip_iterator(thrust::make_tuple(eventIndex + numEntries, arrayAddress, eventSize)),
258  *logger,
259  dummy,
260  cudaPlus);
261 #endif
262 #endif
263  return ret;
264 }
265 
266 __host__ double GooPdf::calculateNLL() const {
267  // if (cpuDebug & 1) std::cout << getName() << " entering calculateNLL (" << host_callnumber << ")" << std::endl;
268 
269  // MEMCPY_TO_SYMBOL(callnumber, &host_callnumber, sizeof(int));
270  // int oldMask = cpuDebug;
271  // if (0 == host_callnumber) setDebugMask(0, false);
272  // std::cout << "Start norm " << getName() << std::endl;
273  normalize();
274  // std::cout << "Norm done\n";
275  // if ((0 == host_callnumber) && (1 == oldMask)) setDebugMask(1, false);
276 
277  // if (cpuDebug & 1) {
278  // std::cout << "Norm factors: ";
279  // for (int i = 0; i < totalParams; ++i) std::cout << host_normalisation[i] << " ";
280  // std::cout << std::endl;
281  //}
282 
283  if(host_normalisation[parameters] <= 0)
284  GooFit::abort(__FILE__, __LINE__, getName() + " non-positive normalisation", this);
285 
286  MEMCPY_TO_SYMBOL(normalisationFactors, host_normalisation, totalParams * sizeof(fptype), 0, cudaMemcpyHostToDevice);
287  cudaDeviceSynchronize(); // Ensure normalisation integrals are finished
288 
289  int numVars = observables.size();
290 
291  if(fitControl->binnedFit()) {
292  numVars += 2;
293  numVars *= -1;
294  }
295 
296  fptype ret = sumOfNll(numVars);
297 
298  if(0 == ret)
299  GooFit::abort(__FILE__, __LINE__, getName() + " zero NLL", this);
300 
301  // if (cpuDebug & 1) std::cout << "Full NLL " << host_callnumber << " : " << 2*ret << std::endl;
302  // setDebugMask(0);
303 
304  // if ((cpuDebug & 1) && (host_callnumber >= 1)) GooFit::abort(__FILE__, __LINE__, getName() + " debug abort",
305  // this);
306  return 2 * ret;
307 }
308 
309 __host__ std::vector<fptype> GooPdf::evaluateAtPoints(Observable var) {
310  copyParams();
311  normalize();
312  MEMCPY_TO_SYMBOL(normalisationFactors, host_normalisation, totalParams * sizeof(fptype), 0, cudaMemcpyHostToDevice);
313  UnbinnedDataSet tempdata(observables);
314 
315  double step = var.getBinSize();
316 
317  for(int i = 0; i < var.getNumBins(); ++i) {
318  var.setValue(var.getLowerLimit() + (i + 0.5) * step);
319  tempdata.addEvent();
320  }
321 
322  auto old = getData();
323  setData(&tempdata);
324 
325  thrust::counting_iterator<int> eventIndex(0);
326  thrust::constant_iterator<int> eventSize(observables.size());
327  thrust::constant_iterator<fptype *> arrayAddress(dev_event_array);
328  thrust::device_vector<fptype> results(var.getNumBins());
329 
330  MetricTaker evalor(this, getMetricPointer("ptr_to_Eval"));
331 #ifdef GOOFIT_MPI
332  thrust::transform(
333  thrust::make_zip_iterator(thrust::make_tuple(eventIndex, arrayAddress, eventSize)),
334  thrust::make_zip_iterator(thrust::make_tuple(eventIndex + m_iEventsPerTask, arrayAddress, eventSize)),
335  results.begin(),
336  evalor);
337 #else
338  thrust::transform(thrust::make_zip_iterator(thrust::make_tuple(eventIndex, arrayAddress, eventSize)),
339  thrust::make_zip_iterator(thrust::make_tuple(eventIndex + numEntries, arrayAddress, eventSize)),
340  results.begin(),
341  evalor);
342 #endif
343 
344  // Note, This is not fully realized with MPI. We need to copy each 'results' buffer to each other 'MPI_Scatterv',
345  // then we can do the rest.
346  thrust::host_vector<fptype> h_results = results;
347  std::vector<fptype> res;
348  res.resize(var.getNumBins());
349 
350  for(int i = 0; i < var.getNumBins(); ++i) {
351  res[i] = h_results[i] * host_normalisation[parameters];
352  }
353 
354  setData(old);
355  return res;
356 }
357 
358 __host__ void GooPdf::scan(Observable var, std::vector<fptype> &values) {
359  fptype step = var.getUpperLimit();
360  step -= var.getLowerLimit();
361  step /= var.getNumBins();
362  values.clear();
363 
364  for(fptype v = var.getLowerLimit() + 0.5 * step; v < var.getUpperLimit(); v += step) {
365  var.setValue(v);
366  copyParams();
367  fptype curr = calculateNLL();
368  values.push_back(curr);
369  }
370 }
371 
372 // TODO: is this needed?
373 __host__ void GooPdf::setParameterConstantness(bool constant) {
374  std::vector<Variable> pars = getParameters();
375 
376  for(Variable &p : pars) {
377  p.setFixed(constant);
378  }
379 }
380 
381 __host__ fptype GooPdf::getValue(EvalFunc evalfunc) {
382  // Returns the value of the PDF at a single point.
383  // Execute redundantly in all threads for OpenMP multiGPU case
384  copyParams();
385  normalize();
386  MEMCPY_TO_SYMBOL(normalisationFactors, host_normalisation, totalParams * sizeof(fptype), 0, cudaMemcpyHostToDevice);
387 
388  UnbinnedDataSet point(observables);
389  point.addEvent();
390  auto old = getData();
391  setData(&point);
392 
393  thrust::counting_iterator<int> eventIndex(0);
394  thrust::constant_iterator<int> eventSize(observables.size());
395  thrust::constant_iterator<fptype *> arrayAddress(dev_event_array);
396  thrust::device_vector<fptype> results(1);
397 
398  MetricTaker evalor(this, getMetricPointer(evalfunc));
399  thrust::transform(thrust::make_zip_iterator(thrust::make_tuple(eventIndex, arrayAddress, eventSize)),
400  thrust::make_zip_iterator(thrust::make_tuple(eventIndex + 1, arrayAddress, eventSize)),
401  results.begin(),
402  evalor);
403 
404  setData(old);
405  return results[0];
406 }
407 
408 __host__ fptype GooPdf::normalize() const {
409  if(!fitControl->metricIsPdf()) {
410  GOOFIT_TRACE("{}: metricIsPdf, returning 1", getName());
411  host_normalisation[parameters] = 1.0;
412  return 1.0;
413  }
414 
415  fptype ret = 1;
416 
417  if(hasAnalyticIntegral()) {
418  // Loop goes only over observables of this PDF.
419  for(const Observable &v : observables) {
420  GOOFIT_TRACE("{}: Analytically integrating over {}", getName(), v.getName());
421  ret *= integrate(v.getLowerLimit(), v.getUpperLimit());
422  }
423 
424  host_normalisation[parameters] = 1.0 / ret;
425  GOOFIT_TRACE("{}: Param {} integral is = {}", getName(), parameters, ret);
426 
427  return ret;
428  }
429 
430  GOOFIT_TRACE("{}, Computing integral without analytic help", getName());
431 
432  int totalBins = 1;
433 
434  for(const Observable &v : observables) {
435  ret *= v.getUpperLimit() - v.getLowerLimit();
436  totalBins *= integrationBins > 0 ? integrationBins : v.getNumBins();
437 
438  GOOFIT_TRACE("Total bins {} due to {} {} {}", totalBins, v.getName(), integrationBins, v.getNumBins());
439  }
440 
441  ret /= totalBins;
442 
443  fptype dummy = 0;
444  static thrust::plus<fptype> cudaPlus;
445  thrust::constant_iterator<fptype *> arrayAddress(normRanges);
446  thrust::constant_iterator<int> eventSize(observables.size());
447  thrust::counting_iterator<int> binIndex(0);
448 
449  fptype sum;
450 #ifdef GOOFIT_MPI
451 #if THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA
452  goofit_policy my_policy;
453  fptype s = thrust::transform_reduce(
454  my_policy,
455  thrust::make_zip_iterator(thrust::make_tuple(binIndex, eventSize, arrayAddress)),
456  thrust::make_zip_iterator(thrust::make_tuple(binIndex + totalBins, eventSize, arrayAddress)),
457  *logger,
458  dummy,
459  cudaPlus);
460 #else
461  fptype s = thrust::transform_reduce(
462  thrust::make_zip_iterator(thrust::make_tuple(binIndex, eventSize, arrayAddress)),
463  thrust::make_zip_iterator(thrust::make_tuple(binIndex + totalBins, eventSize, arrayAddress)),
464  *logger,
465  dummy,
466  cudaPlus);
467 #endif
468 
469  MPI_Allreduce(&s, &sum, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
470 #else
471 #if THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA
472  goofit_policy my_policy;
473  sum = thrust::transform_reduce(
474  my_policy,
475  thrust::make_zip_iterator(thrust::make_tuple(binIndex, eventSize, arrayAddress)),
476  thrust::make_zip_iterator(thrust::make_tuple(binIndex + totalBins, eventSize, arrayAddress)),
477  *logger,
478  dummy,
479  cudaPlus);
480 #else
481  sum = thrust::transform_reduce(
482  thrust::make_zip_iterator(thrust::make_tuple(binIndex, eventSize, arrayAddress)),
483  thrust::make_zip_iterator(thrust::make_tuple(binIndex + totalBins, eventSize, arrayAddress)),
484  *logger,
485  dummy,
486  cudaPlus);
487 #endif
488 #endif
489 
490  if(std::isnan(sum)) {
491  GooFit::abort(__FILE__, __LINE__, getName() + " NaN in normalisation", this);
492  } else if(0 >= sum) {
493  GooFit::abort(__FILE__, __LINE__, "Non-positive normalisation", this);
494  }
495 
496  ret *= sum;
497 
498  if(0 == ret)
499  GooFit::abort(__FILE__, __LINE__, "Zero integral");
500 
501  GOOFIT_TRACE("{}: Param {} integral is ~= {}", getName(), parameters, ret);
502  host_normalisation[parameters] = 1.0 / ret;
503  return ret;
504 }
505 
506 #ifdef PROFILING
507 __constant__ fptype conversion = (1.0 / CLOCKS_PER_SEC);
508 __device__ fptype callFunction(fptype *eventAddress, unsigned int functionIdx, unsigned int paramIdx) {
509  clock_t start = clock();
510  fptype ret = (*(reinterpret_cast<device_function_ptr>(device_function_table[functionIdx])))(
511  eventAddress, cudaArray, paramIndices + paramIdx);
512  clock_t stop = clock();
513 
514  if((0 == THREADIDX + BLOCKIDX) && (stop > start)) {
515  // Avoid issue when stop overflows and start doesn't.
516  timeHistogram[functionIdx * 100 + paramIdx] += ((stop - start) * conversion);
517  // printf("Clock: %li %li %li | %u %f\n", (long) start, (long) stop, (long) (stop - start), functionIdx,
518  // timeHistogram[functionIdx]);
519  }
520 
521  return ret;
522 }
523 #else
524 __device__ fptype callFunction(fptype *eventAddress, unsigned int functionIdx, unsigned int paramIdx) {
525  return (*(reinterpret_cast<device_function_ptr>(device_function_table[functionIdx])))(
526  eventAddress, cudaArray, paramIndices + paramIdx);
527 }
528 #endif
529 
530 __host__ std::vector<std::vector<fptype>> GooPdf::getCompProbsAtDataPoints() {
531  copyParams();
532  // double overall =
533  normalize();
534  MEMCPY_TO_SYMBOL(normalisationFactors, host_normalisation, totalParams * sizeof(fptype), 0, cudaMemcpyHostToDevice);
535 
536  int numVars = observables.size();
537 
538  if(fitControl->binnedFit()) {
539  numVars += 2;
540  numVars *= -1;
541  }
542 
543  thrust::device_vector<fptype> results(numEntries);
544  thrust::constant_iterator<int> eventSize(numVars);
545  thrust::constant_iterator<fptype *> arrayAddress(dev_event_array);
546  thrust::counting_iterator<int> eventIndex(0);
547  MetricTaker evalor(this, getMetricPointer("ptr_to_Prob"));
548  thrust::transform(thrust::make_zip_iterator(thrust::make_tuple(eventIndex, arrayAddress, eventSize)),
549  thrust::make_zip_iterator(thrust::make_tuple(eventIndex + numEntries, arrayAddress, eventSize)),
550  results.begin(),
551  evalor);
552  std::vector<std::vector<fptype>> values;
553  values.resize(components.size() + 1);
554  thrust::host_vector<fptype> host_results = results;
555 
556  for(unsigned int i = 0; i < host_results.size(); ++i) {
557  values[0].push_back(host_results[i]);
558  }
559 
560  for(unsigned int i = 0; i < components.size(); ++i) {
561  MetricTaker compevalor(components[i], getMetricPointer("ptr_to_Prob"));
562  thrust::counting_iterator<int> ceventIndex(0);
563  thrust::transform(
564  thrust::make_zip_iterator(thrust::make_tuple(ceventIndex, arrayAddress, eventSize)),
565  thrust::make_zip_iterator(thrust::make_tuple(ceventIndex + numEntries, arrayAddress, eventSize)),
566  results.begin(),
567  compevalor);
568  host_results = results;
569 
570  for(unsigned int j = 0; j < host_results.size(); ++j) {
571  values[1 + i].push_back(host_results[j]);
572  }
573  }
574  return values;
575 }
576 
577 __host__ UnbinnedDataSet GooPdf::makeGrid() {
578  std::vector<Observable> ret = getObservables();
579 
580  UnbinnedDataSet grid{ret};
581  grid.fillWithGrid();
582 
583  return grid;
584 }
585 
586 // still need to add OpenMP/multi-GPU code here
587 __host__ void GooPdf::transformGrid(fptype *host_output) {
588  generateNormRange();
589  // normalize();
590  int totalBins = 1;
591 
592  for(const Observable &v : observables) {
593  totalBins *= v.getNumBins();
594  }
595 
596  thrust::constant_iterator<fptype *> arrayAddress(normRanges);
597  thrust::constant_iterator<int> eventSize(observables.size());
598  thrust::counting_iterator<int> binIndex(0);
599  thrust::device_vector<fptype> d_vec;
600  d_vec.resize(totalBins);
601 
602  thrust::transform(thrust::make_zip_iterator(thrust::make_tuple(binIndex, eventSize, arrayAddress)),
603  thrust::make_zip_iterator(thrust::make_tuple(binIndex + totalBins, eventSize, arrayAddress)),
604  d_vec.begin(),
605  *logger);
606 
607  thrust::host_vector<fptype> h_vec = d_vec;
608 
609  for(unsigned int i = 0; i < totalBins; ++i)
610  host_output[i] = h_vec[i];
611 }
612 
613 __host__ void GooPdf::setFitControl(std::shared_ptr<FitControl> fc) {
614  for(auto &component : components) {
615  component->setFitControl(fc);
616  }
617 
618  fitControl = fc;
619 
620  setMetrics();
621 }
622 
623 #ifdef ROOT_FOUND
624 __host__ TH1D *GooPdf::plotToROOT(Observable var, double normFactor, std::string name) {
625  if(name.empty())
626  name = getName() + "_hist";
627 
628  auto ret = new TH1D(name.c_str(), "", var.getNumBins(), var.getLowerLimit(), var.getUpperLimit());
629  std::vector<fptype> binValues = evaluateAtPoints(var);
630 
631  double pdf_int = 0;
632 
633  for(int i = 0; i < var.getNumBins(); ++i) {
634  pdf_int += binValues[i];
635  }
636 
637  for(int i = 0; i < var.getNumBins(); ++i)
638  ret->SetBinContent(i + 1, binValues[i] * normFactor / pdf_int / var.getBinSize());
639  return ret;
640 }
641 #endif
642 } // namespace GooFit