GooFit  v2.1.3
DP4Pdf.cu
Go to the documentation of this file.
1 /*
2 04/05/2016 Christoph Hasse
3 DISCLAIMER:
4 
5 This code is not sufficently tested yet and still under heavy development!
6 
7 TODO:
8 - Test lineshapes, only done for BW_DP and BW_MINT so far
9 - Check and implement more SF
10 - Currently no check if the event is even allowed in phasespace is done. This should preferably be done outside of this
11 class.
12 
13 - Some things could be implemented differently maybe, performance should be compared for both cases.
14  -For example the way Spinfactors are stored in the same array as the Lineshape values.
15  Is this really worth the memory we lose by using a complex to store the SF?
16 */
17 
18 #include <mcbooster/Evaluate.h>
19 #include <mcbooster/EvaluateArray.h>
20 #include <mcbooster/GContainers.h>
21 #include <mcbooster/GFunctional.h>
22 #include <mcbooster/GTypes.h>
23 #include <mcbooster/Generate.h>
24 #include <mcbooster/Vector4R.h>
25 
26 #include <goofit/Error.h>
27 #include <goofit/PDFs/physics/DP4Pdf.h>
28 #include <goofit/PDFs/physics/EvalVar.h>
29 
30 namespace GooFit {
31 
32 // The function of this array is to hold all the cached waves; specific
33 // waves are recalculated when the corresponding resonance mass or width
34 // changes. Note that in a multithread environment each thread needs its
35 // own cache, hence the '10'. Ten threads should be enough for anyone!
36 __device__ fpcomplex *cResSF[10];
37 __device__ fpcomplex *Amps_DP[10];
38 /*
39 Constant memory array to hold specific info for amplitude calculation.
40 First entries are the starting points in array, necessary, because number of Lineshapes(LS) or Spinfactors(SF) can vary
41 |start of each Amplitude| #Linshapes | #Spinfactors | LS-indices | SF-indices|
42 | 1 entry per Amplitude | 1 per Amp | 1 per Amp | #LS in Amp| #SF in Amp|
43 */
44 __constant__ unsigned int AmpIndices[500];
45 
46 // This function gets called by the GooFit framework to get the value of the PDF.
47 __device__ fptype device_DP(fptype *evt, fptype *p, unsigned int *indices) {
48  // printf("DalitzPlot evt %i zero: %i %i %f (%f, %f).\n", evtNum, numResonances, effFunctionIdx, eff, totalAmp.real,
49  // totalAmp.imag);
50 
51  auto evtNum = static_cast<int>(floor(0.5 + evt[indices[7 + indices[0]]]));
52  // printf("%i\n",evtNum );
53  fpcomplex totalAmp(0, 0);
54  unsigned int cacheToUse = indices[2];
55  unsigned int numAmps = indices[5];
56 
57  for(int i = 0; i < numAmps; ++i) {
58  fpcomplex amp{p[indices[6 + 2 * i]], p[indices[7 + 2 * i]]};
59 
60  fpcomplex matrixelement((Amps_DP[cacheToUse][evtNum * numAmps + i]).real(),
61  (Amps_DP[cacheToUse][evtNum * numAmps + i]).imag());
62 
63  totalAmp += matrixelement * amp;
64  }
65 
66  fptype ret = thrust::norm(totalAmp);
67  int effFunctionIdx = 6 + 2 * indices[3] + 2 * indices[4] + 2 * indices[5];
68  fptype eff = callFunction(evt, indices[effFunctionIdx], indices[effFunctionIdx + 1]);
69  ret *= eff;
70 
71  // printf("result %.7g\n", ret);
72  return ret;
73 }
74 
75 __device__ device_function_ptr ptr_to_DP = device_DP;
76 
77 __host__ DPPdf::DPPdf(
78  std::string n, std::vector<Observable> observables, DecayInfo4 decay, GooPdf *efficiency, unsigned int MCeventsNorm)
79  : GooPdf(n)
80  , decayInfo(decay)
81  , totalEventSize(observables.size()) // number of observables plus eventnumber
82 {
83  for(auto &observable : observables) {
84  registerObservable(observable);
85  }
86 
87  // registerObservable(eventNumber);
88 
89  std::vector<fptype> decayConstants;
90  decayConstants.push_back(decayInfo.meson_radius);
91 
92  for(double &particle_masse : decayInfo.particle_masses) {
93  decayConstants.push_back(particle_masse);
94  }
95 
96  std::vector<unsigned int> pindices;
97  pindices.push_back(registerConstants(decayConstants.size()));
98  MEMCPY_TO_SYMBOL(functorConstants,
99  &decayConstants[0],
100  decayConstants.size() * sizeof(fptype),
101  cIndex * sizeof(fptype),
102  cudaMemcpyHostToDevice);
103  static int cacheCount = 0;
104  cacheToUse = cacheCount++;
105  pindices.push_back(cacheToUse);
106  pindices.push_back(0); //#LS
107  pindices.push_back(0); //#SF
108  pindices.push_back(0); //#AMP
109 
110  // This is the start of reading in the amplitudes and adding the lineshapes and Spinfactors to this PDF
111  // This is done in this way so we don't have multiple copies of one lineshape in one pdf.
112  std::vector<unsigned int> ampidx;
113  std::vector<unsigned int> nPermVec;
114  std::vector<unsigned int> ampidxstart;
115 
116  for(auto &amplitude : decayInfo.amplitudes) {
117  AmpMap[amplitude->_uniqueDecayStr] = std::make_pair(std::vector<unsigned int>(0), std::vector<unsigned int>(0));
118 
119  auto LSvec = amplitude->_LS;
120 
121  for(auto &LSIT : LSvec) {
122  auto found = std::find_if(components.begin(), components.end(), [&LSIT](const PdfBase *L) {
123  return (*LSIT) == *(dynamic_cast<const Lineshape *>(L));
124  });
125 
126  if(found != components.end()) {
127  AmpMap[amplitude->_uniqueDecayStr].first.push_back(std::distance(components.begin(), found));
128  } else {
129  components.push_back(LSIT);
130  AmpMap[amplitude->_uniqueDecayStr].first.push_back(components.size() - 1);
131  }
132  }
133 
134  auto SFvec = amplitude->_SF;
135 
136  for(auto &SFIT : SFvec) {
137  auto found = std::find_if(
138  SpinFactors.begin(), SpinFactors.end(), [&SFIT](const SpinFactor *S) { return (*SFIT) == (*S); });
139 
140  if(found != SpinFactors.end()) {
141  AmpMap[amplitude->_uniqueDecayStr].second.push_back(std::distance(SpinFactors.begin(), found));
142  } else {
143  SpinFactors.push_back(SFIT);
144  AmpMap[amplitude->_uniqueDecayStr].second.push_back(SpinFactors.size() - 1);
145  }
146  }
147 
148  nPermVec.push_back(amplitude->_nPerm);
149  pindices.push_back(registerParameter(amplitude->_ar));
150  pindices.push_back(registerParameter(amplitude->_ai));
151 
152  ampidxstart.push_back(ampidx.size());
153  std::vector<unsigned int> ls = AmpMap[amplitude->_uniqueDecayStr].first;
154  std::vector<unsigned int> sf = AmpMap[amplitude->_uniqueDecayStr].second;
155  ampidx.push_back(ls.size());
156  ampidx.push_back(sf.size());
157  ampidx.push_back(amplitude->_nPerm);
158  ampidx.insert(ampidx.end(), ls.begin(), ls.end());
159  ampidx.insert(ampidx.end(), sf.begin(), sf.end());
160  }
161 
162  MEMCPY_TO_SYMBOL(
163  AmpIndices, &(ampidxstart[0]), ampidxstart.size() * sizeof(unsigned int), 0, cudaMemcpyHostToDevice);
164  MEMCPY_TO_SYMBOL(AmpIndices,
165  &(ampidx[0]),
166  ampidx.size() * sizeof(unsigned int),
167  ampidxstart.size() * sizeof(unsigned int),
168  cudaMemcpyHostToDevice);
169 
170  pindices[2] = components.size();
171  pindices[3] = SpinFactors.size();
172  pindices[4] = AmpMap.size();
173 
174  for(auto &component : components) {
175  reinterpret_cast<Lineshape *>(component)->setConstantIndex(cIndex);
176  pindices.push_back(reinterpret_cast<Lineshape *>(component)->getFunctionIndex());
177  pindices.push_back(reinterpret_cast<Lineshape *>(component)->getParameterIndex());
178  }
179 
180  for(auto &SpinFactor : SpinFactors) {
181  pindices.push_back(SpinFactor->getFunctionIndex());
182  pindices.push_back(SpinFactor->getParameterIndex());
183  SpinFactor->setConstantIndex(cIndex);
184  }
185 
186  pindices.push_back(efficiency->getFunctionIndex());
187  pindices.push_back(efficiency->getParameterIndex());
188  components.push_back(efficiency);
189 
190  GET_FUNCTION_ADDR(ptr_to_DP);
191  initialize(pindices);
192 
193  Integrator = new NormIntegrator(parameters);
194  redoIntegral = new bool[components.size() - 1];
195  cachedMasses = new fptype[components.size() - 1];
196  cachedWidths = new fptype[components.size() - 1];
197 
198  for(int i = 0; i < components.size() - 1; ++i) {
199  redoIntegral[i] = true;
200  cachedMasses[i] = -1;
201  cachedWidths[i] = -1;
202  lscalculators.push_back(new LSCalculator(parameters, i));
203  }
204 
205  for(int i = 0; i < SpinFactors.size(); ++i) {
206  sfcalculators.push_back(new SFCalculator(parameters, i));
207  }
208 
209  for(int i = 0; i < AmpMap.size(); ++i) {
210  AmpCalcs.push_back(new AmpCalc(ampidxstart[i], parameters, nPermVec[i]));
211  }
212 
213  // fprintf(stderr,"#Amp's %i, #LS %i, #SF %i \n", AmpMap.size(), components.size()-1, SpinFactors.size() );
214 
215  std::vector<mcbooster::GReal_t> masses(decayInfo.particle_masses.begin() + 1, decayInfo.particle_masses.end());
216  mcbooster::PhaseSpace phsp(decayInfo.particle_masses[0], masses, MCeventsNorm);
217  phsp.Generate(mcbooster::Vector4R(decayInfo.particle_masses[0], 0.0, 0.0, 0.0));
218  phsp.Unweight();
219 
220  auto nAcc = phsp.GetNAccepted();
221  mcbooster::BoolVector_d flags = phsp.GetAccRejFlags();
222  auto d1 = phsp.GetDaughters(0);
223  auto d2 = phsp.GetDaughters(1);
224  auto d3 = phsp.GetDaughters(2);
225  auto d4 = phsp.GetDaughters(3);
226 
227  // auto zip_begin = thrust::make_zip_iterator(thrust::make_tuple(d1.begin(), d2.begin(), d3.begin(), d4.begin()));
228  // auto zip_end = zip_begin + d1.size();
229  // auto new_end = thrust::remove_if(zip_begin, zip_end, flags.begin(), thrust::logical_not<bool>());
230 
231  printf("After accept-reject we will keep %.i Events for normalization.\n", static_cast<int>(nAcc));
232  d1.shrink_to_fit();
233  d2.shrink_to_fit();
234  d3.shrink_to_fit();
235  d4.shrink_to_fit();
236 
237  mcbooster::ParticlesSet_d pset(4);
238  pset[0] = &d1;
239  pset[1] = &d2;
240  pset[2] = &d3;
241  pset[3] = &d4;
242 
243  norm_M12 = mcbooster::RealVector_d(nAcc);
244  norm_M34 = mcbooster::RealVector_d(nAcc);
245  norm_CosTheta12 = mcbooster::RealVector_d(nAcc);
246  norm_CosTheta34 = mcbooster::RealVector_d(nAcc);
247  norm_phi = mcbooster::RealVector_d(nAcc);
248 
249  mcbooster::VariableSet_d VarSet(5);
250  VarSet[0] = &norm_M12;
251  VarSet[1] = &norm_M34;
252  VarSet[2] = &norm_CosTheta12;
253  VarSet[3] = &norm_CosTheta34;
254  VarSet[4] = &norm_phi;
255 
256  Dim5 eval = Dim5();
257  mcbooster::EvaluateArray<Dim5>(eval, pset, VarSet);
258 
259  norm_SF = mcbooster::RealVector_d(nAcc * SpinFactors.size());
260  norm_LS = mcbooster::mc_device_vector<fpcomplex>(nAcc * (components.size() - 1));
261  MCevents = nAcc;
262 
263  addSpecialMask(PdfBase::ForceSeparateNorm);
264 }
265 
266 // makes the arrays to chache the lineshape values and spinfactors in CachedResSF and the values of the amplitudes in
267 // cachedAMPs
268 // I made the choice to have spinfactors next to the values of the lineshape in memory. I waste memory by doing this
269 // because a spinfactor is saved as complex
270 // It would be nice to test if this is better than having the spinfactors stored seperately.
271 __host__ void DPPdf::setDataSize(unsigned int dataSize, unsigned int evtSize) {
272  // Default 3 is m12, m13, evtNum for DP 2dim, 4-body decay has 5 independent vars plus evtNum = 6
273  totalEventSize = evtSize;
274  if(totalEventSize < 3)
275  throw GooFit::GeneralError("totalEventSize {} should be 3 or more", totalEventSize);
276 
277  if(cachedResSF)
278  delete cachedResSF;
279 
280  if(cachedAMPs)
281  delete cachedAMPs;
282 
283  numEntries = dataSize;
284  cachedResSF = new thrust::device_vector<fpcomplex>(
285  dataSize * (components.size() + SpinFactors.size() - 1)); // -1 because 1 component is efficiency
286  void *dummy = thrust::raw_pointer_cast(cachedResSF->data());
287  MEMCPY_TO_SYMBOL(cResSF, &dummy, sizeof(fpcomplex *), cacheToUse * sizeof(fpcomplex *), cudaMemcpyHostToDevice);
288 
289  cachedAMPs = new thrust::device_vector<fpcomplex>(dataSize * (AmpCalcs.size()));
290  void *dummy2 = thrust::raw_pointer_cast(cachedAMPs->data());
291  MEMCPY_TO_SYMBOL(Amps_DP, &dummy2, sizeof(fpcomplex *), cacheToUse * sizeof(fpcomplex *), cudaMemcpyHostToDevice);
292 
293  setForceIntegrals();
294 }
295 
296 // this is where the actual magic happens. This function does all the calculations!
297 __host__ fptype DPPdf::normalize() const {
298  // fprintf(stderr, "start normalize\n");
299  recursiveSetNormalisation(1); // Not going to normalize efficiency,
300  // so set normalisation factor to 1 so it doesn't get multiplied by zero.
301  // Copy at this time to ensure that the SpecialResonanceCalculators, which need the efficiency,
302  // don't get zeroes through multiplying by the normFactor.
303  MEMCPY_TO_SYMBOL(normalisationFactors, host_normalisation, totalParams * sizeof(fptype), 0, cudaMemcpyHostToDevice);
304 
305  // check if MINUIT changed any parameters and if so remember that so we know
306  // we need to recalculate that lineshape and every amp, that uses that lineshape
307  for(unsigned int i = 0; i < components.size() - 1; ++i) {
308  redoIntegral[i] = forceRedoIntegrals;
309 
310  if(!(components[i]->parametersChanged()))
311  continue;
312 
313  redoIntegral[i] = true;
314  }
315 
316  SpinsCalculated = !forceRedoIntegrals;
317  forceRedoIntegrals = false;
318 
319  // just some thrust iterators for the calculation.
320  thrust::constant_iterator<fptype *> dataArray(dev_event_array);
321  thrust::constant_iterator<int> eventSize(totalEventSize);
322  thrust::counting_iterator<int> eventIndex(0);
323 
324  // Calculate spinfactors only once for normalisation events and real events
325  // strided_range is a template implemented in DalitsPlotHelpers.hh
326  // it basically goes through the array by increasing the pointer by a certain amount instead of just one step.
327  if(!SpinsCalculated) {
328  for(int i = 0; i < SpinFactors.size(); ++i) {
329  unsigned int offset = components.size() - 1;
330  thrust::transform(
331  thrust::make_zip_iterator(thrust::make_tuple(eventIndex, dataArray, eventSize)),
332  thrust::make_zip_iterator(thrust::make_tuple(eventIndex + numEntries, dataArray, eventSize)),
333  strided_range<thrust::device_vector<fpcomplex>::iterator>(
334  cachedResSF->begin() + offset + i, cachedResSF->end(), (components.size() + SpinFactors.size() - 1))
335  .begin(),
336  *(sfcalculators[i]));
337 
338  if(!generation_no_norm) {
339  thrust::transform(
340  thrust::make_zip_iterator(thrust::make_tuple(norm_M12.begin(),
341  norm_M34.begin(),
342  norm_CosTheta12.begin(),
343  norm_CosTheta34.begin(),
344  norm_phi.begin())),
345  thrust::make_zip_iterator(thrust::make_tuple(
346  norm_M12.end(), norm_M34.end(), norm_CosTheta12.end(), norm_CosTheta34.end(), norm_phi.end())),
347  (norm_SF.begin() + (i * MCevents)),
348  NormSpinCalculator(parameters, i));
349  }
350  }
351 
352  SpinsCalculated = true;
353  }
354 
355  // this calculates the values of the lineshapes and stores them in the array. It is recalculated every time
356  // parameters change.
357  for(int i = 0; i < components.size() - 1; ++i) {
358  if(redoIntegral[i]) {
359  thrust::transform(
360  thrust::make_zip_iterator(thrust::make_tuple(eventIndex, dataArray, eventSize)),
361  thrust::make_zip_iterator(thrust::make_tuple(eventIndex + numEntries, dataArray, eventSize)),
362  strided_range<thrust::device_vector<fpcomplex>::iterator>(
363  cachedResSF->begin() + i, cachedResSF->end(), (components.size() + SpinFactors.size() - 1))
364  .begin(),
365  *(lscalculators[i]));
366  }
367  }
368 
369  // this is a little messy but it basically checks if the amplitude includes one of the recalculated lineshapes and
370  // if so recalculates that amplitude
371  auto AmpMapIt = AmpMap.begin();
372 
373  for(int i = 0; i < AmpCalcs.size(); ++i) {
374  std::vector<unsigned int> redoidx((*AmpMapIt).second.first);
375  bool redo = false;
376 
377  for(unsigned int j : redoidx) {
378  if(!redoIntegral[j])
379  continue;
380 
381  redo = true;
382  break;
383  }
384 
385  if(redo) {
386  thrust::transform(eventIndex,
387  eventIndex + numEntries,
388  strided_range<thrust::device_vector<fpcomplex>::iterator>(
389  cachedAMPs->begin() + i, cachedAMPs->end(), AmpCalcs.size())
390  .begin(),
391  *(AmpCalcs[i]));
392  }
393  }
394 
395  // lineshape value calculation for the normalisation, also recalculated every time parameter change
396  if(!generation_no_norm) {
397  for(int i = 0; i < components.size() - 1; ++i) {
398  if(!redoIntegral[i])
399  continue;
400 
401  thrust::transform(
402  thrust::make_zip_iterator(thrust::make_tuple(norm_M12.begin(),
403  norm_M34.begin(),
404  norm_CosTheta12.begin(),
405  norm_CosTheta34.begin(),
406  norm_phi.begin())),
407  thrust::make_zip_iterator(thrust::make_tuple(
408  norm_M12.end(), norm_M34.end(), norm_CosTheta12.end(), norm_CosTheta34.end(), norm_phi.end())),
409  (norm_LS.begin() + (i * MCevents)),
410  NormLSCalculator(parameters, i));
411  }
412  }
413 
414  thrust::constant_iterator<fptype *> normSFaddress(thrust::raw_pointer_cast(norm_SF.data()));
415  thrust::constant_iterator<fpcomplex *> normLSaddress(thrust::raw_pointer_cast(norm_LS.data()));
416  thrust::constant_iterator<int> NumNormEvents(MCevents);
417 
418  // this does the rest of the integration with the cached lineshape and spinfactor values for the normalization
419  // events
420  fptype ret = 1.0;
421 
422  if(!generation_no_norm) {
423  fptype sumIntegral = 0;
424  sumIntegral += thrust::transform_reduce(
425  thrust::make_zip_iterator(thrust::make_tuple(eventIndex, NumNormEvents, normSFaddress, normLSaddress)),
426  thrust::make_zip_iterator(
427  thrust::make_tuple(eventIndex + MCevents, NumNormEvents, normSFaddress, normLSaddress)),
428  *Integrator,
429  0.,
430  thrust::plus<fptype>());
431  // MCevents is the number of normalisation events.
432  sumIntegral /= MCevents;
433  ret = sumIntegral;
434  }
435 
436  if(std::isnan(ret))
437  GooFit::abort(__FILE__, __LINE__, getName() + " NAN normalization in DPPdf", this);
438 
439  host_normalisation[parameters] = 1.0 / ret;
440  // printf("end of normalize %f\n", ret);
441  return ret;
442 }
443 
444 __host__
445  std::tuple<mcbooster::ParticlesSet_h, mcbooster::VariableSet_h, mcbooster::RealVector_h, mcbooster::RealVector_h>
446  DPPdf::GenerateSig(unsigned int numEvents) {
447  std::vector<mcbooster::GReal_t> masses(decayInfo.particle_masses.begin() + 1, decayInfo.particle_masses.end());
448  mcbooster::PhaseSpace phsp(decayInfo.particle_masses[0], masses, numEvents, generation_offset);
449  phsp.Generate(mcbooster::Vector4R(decayInfo.particle_masses[0], 0.0, 0.0, 0.0));
450 
451  auto d1 = phsp.GetDaughters(0);
452  auto d2 = phsp.GetDaughters(1);
453  auto d3 = phsp.GetDaughters(2);
454  auto d4 = phsp.GetDaughters(3);
455 
456  mcbooster::ParticlesSet_d pset(4);
457  pset[0] = &d1;
458  pset[1] = &d2;
459  pset[2] = &d3;
460  pset[3] = &d4;
461 
462  auto SigGen_M12_d = mcbooster::RealVector_d(numEvents);
463  auto SigGen_M34_d = mcbooster::RealVector_d(numEvents);
464  auto SigGen_CosTheta12_d = mcbooster::RealVector_d(numEvents);
465  auto SigGen_CosTheta34_d = mcbooster::RealVector_d(numEvents);
466  auto SigGen_phi_d = mcbooster::RealVector_d(numEvents);
467 
468  mcbooster::VariableSet_d VarSet_d(5);
469  VarSet_d[0] = &SigGen_M12_d;
470  VarSet_d[1] = &SigGen_M34_d;
471  VarSet_d[2] = &SigGen_CosTheta12_d;
472  VarSet_d[3] = &SigGen_CosTheta34_d;
473  VarSet_d[4] = &SigGen_phi_d;
474 
475  Dim5 eval = Dim5();
476  mcbooster::EvaluateArray<Dim5>(eval, pset, VarSet_d);
477 
478  auto h1 = new mcbooster::Particles_h(d1);
479  auto h2 = new mcbooster::Particles_h(d2);
480  auto h3 = new mcbooster::Particles_h(d3);
481  auto h4 = new mcbooster::Particles_h(d4);
482 
483  mcbooster::ParticlesSet_h ParSet(4);
484  ParSet[0] = h1;
485  ParSet[1] = h2;
486  ParSet[2] = h3;
487  ParSet[3] = h4;
488 
489  auto SigGen_M12_h = new mcbooster::RealVector_h(SigGen_M12_d);
490  auto SigGen_M34_h = new mcbooster::RealVector_h(SigGen_M34_d);
491  auto SigGen_CosTheta12_h = new mcbooster::RealVector_h(SigGen_CosTheta12_d);
492  auto SigGen_CosTheta34_h = new mcbooster::RealVector_h(SigGen_CosTheta34_d);
493  auto SigGen_phi_h = new mcbooster::RealVector_h(SigGen_phi_d);
494 
495  mcbooster::VariableSet_h VarSet(5);
496  VarSet[0] = SigGen_M12_h;
497  VarSet[1] = SigGen_M34_h;
498  VarSet[2] = SigGen_CosTheta12_h;
499  VarSet[3] = SigGen_CosTheta34_h;
500  VarSet[4] = SigGen_phi_h;
501 
502  mcbooster::RealVector_d weights(phsp.GetWeights());
503  phsp.FreeResources();
504 
505  auto DS = new mcbooster::RealVector_d(6 * numEvents);
506  thrust::counting_iterator<int> eventNumber(0);
507 
508 #pragma unroll
509 
510  for(int i = 0; i < 5; ++i) {
511  mcbooster::strided_range<mcbooster::RealVector_d::iterator> sr(DS->begin() + i, DS->end(), 6);
512  thrust::copy(VarSet_d[i]->begin(), VarSet_d[i]->end(), sr.begin());
513  }
514 
515  mcbooster::strided_range<mcbooster::RealVector_d::iterator> sr(DS->begin() + 5, DS->end(), 6);
516  thrust::copy(eventNumber, eventNumber + numEvents, sr.begin());
517 
518  dev_event_array = thrust::raw_pointer_cast(DS->data());
519  setDataSize(numEvents, 6);
520 
521  generation_no_norm = true; // we need no normalization for generation, but we do need to make sure that norm = 1;
522  SigGenSetIndices();
523  copyParams();
524  normalize();
525  setForceIntegrals();
526  MEMCPY_TO_SYMBOL(normalisationFactors, host_normalisation, totalParams * sizeof(fptype), 0, cudaMemcpyHostToDevice);
527 
528  thrust::device_vector<fptype> results(numEvents);
529  thrust::constant_iterator<int> eventSize(6);
530  thrust::constant_iterator<fptype *> arrayAddress(dev_event_array);
531  thrust::counting_iterator<int> eventIndex(0);
532 
533  MetricTaker evalor(this, getMetricPointer("ptr_to_Prob"));
534  thrust::transform(thrust::make_zip_iterator(thrust::make_tuple(eventIndex, arrayAddress, eventSize)),
535  thrust::make_zip_iterator(thrust::make_tuple(eventIndex + numEvents, arrayAddress, eventSize)),
536  results.begin(),
537  evalor);
538  cudaDeviceSynchronize();
539  gooFree(dev_event_array);
540 
541  thrust::transform(
542  results.begin(), results.end(), weights.begin(), weights.begin(), thrust::multiplies<mcbooster::GReal_t>());
543  mcbooster::BoolVector_d flags(numEvents);
544 
545  thrust::counting_iterator<mcbooster::GLong_t> first(0);
546  thrust::counting_iterator<mcbooster::GLong_t> last = first + numEvents;
547 
548  auto max = thrust::max_element(weights.begin(), weights.end());
549  thrust::transform(first, last, weights.begin(), flags.begin(), mcbooster::FlagAcceptReject((fptype)*max));
550 
551  auto weights_h = mcbooster::RealVector_h(weights);
552  auto results_h = mcbooster::RealVector_h(results);
553  auto flags_h = mcbooster::BoolVector_h(flags);
554  cudaDeviceSynchronize();
555 
556  return std::make_tuple(ParSet, VarSet, weights_h, flags_h);
557 }
558 
559 SFCalculator::SFCalculator(int pIdx, unsigned int sf_idx)
560  : _spinfactor_i(sf_idx)
561  , _parameters(pIdx) {}
562 
563 __device__ fpcomplex SFCalculator::operator()(thrust::tuple<int, fptype *, int> t) const {
564  int evtNum = thrust::get<0>(t);
565  fptype *evt = thrust::get<1>(t) + (evtNum * thrust::get<2>(t));
566 
567  unsigned int *indices = paramIndices + _parameters; // Jump to DALITZPLOT position within parameters array
568  int parameter_i = 6 + (2 * indices[5]) + (indices[3] * 2)
569  + (_spinfactor_i * 2); // Find position of this resonance relative to DALITZPLOT start
570  unsigned int functn_i = indices[parameter_i];
571  unsigned int params_i = indices[parameter_i + 1];
572 
573  fptype m12 = evt[indices[2 + indices[0]]];
574  fptype m34 = evt[indices[3 + indices[0]]];
575  fptype cos12 = evt[indices[4 + indices[0]]];
576  fptype cos34 = evt[indices[5 + indices[0]]];
577  fptype phi = evt[indices[6 + indices[0]]];
578 
579  fptype vecs[16];
580  get4Vecs(vecs, indices[1], m12, m34, cos12, cos34, phi);
581  // printf("%i, %i, %f, %f, %f, %f, %f \n",evtNum, thrust::get<2>(t), m12, m34, cos12, cos34, phi );
582  // printf("vec%i %f, %f, %f, %f\n",0, vecs[0], vecs[1], vecs[2], vecs[3]);
583  // printf("vec%i %f, %f, %f, %f\n",1, vecs[4], vecs[5], vecs[6], vecs[7]);
584  // printf("vec%i %f, %f, %f, %f\n",2, vecs[8], vecs[9], vecs[10], vecs[11]);
585  // printf("vec%i %f, %f, %f, %f\n",3, vecs[12], vecs[13], vecs[14], vecs[15]);
586 
587  auto func = reinterpret_cast<spin_function_ptr>(device_function_table[functn_i]);
588  fptype sf = (*func)(vecs, paramIndices + params_i);
589  // printf("SpinFactors %i : %.7g\n",evtNum, sf );
590  return {sf, 0.};
591 }
592 
593 NormSpinCalculator::NormSpinCalculator(int pIdx, unsigned int sf_idx)
594  : _spinfactor_i(sf_idx)
595  , _parameters(pIdx) {}
596 
597 __device__ fptype NormSpinCalculator::operator()(
598  thrust::tuple<mcbooster::GReal_t, mcbooster::GReal_t, mcbooster::GReal_t, mcbooster::GReal_t, mcbooster::GReal_t> t)
599  const {
600  unsigned int *indices = paramIndices + _parameters; // Jump to DALITZPLOT position within parameters array
601  unsigned int numLS = indices[3];
602  unsigned int numAmps = indices[5];
603  int parameter_i = 6 + (2 * numAmps) + (numLS * 2)
604  + (_spinfactor_i * 2); // Find position of this resonance relative to DALITZPLOT start
605  unsigned int functn_i = indices[parameter_i];
606  unsigned int params_i = indices[parameter_i + 1];
607 
608  fptype m12 = (thrust::get<0>(t));
609  fptype m34 = (thrust::get<1>(t));
610  fptype cos12 = (thrust::get<2>(t));
611  fptype cos34 = (thrust::get<3>(t));
612  fptype phi = (thrust::get<4>(t));
613 
614  fptype vecs[16];
615  get4Vecs(vecs, indices[1], m12, m34, cos12, cos34, phi);
616 
617  // printf("evt %i vec%i %.5g, %.5g, %.5g, %.5g\n", evtNum,0, vecs[0], vecs[1], vecs[2], vecs[3]);
618  // printf("evt %i vec%i %.5g, %.5g, %.5g, %.5g\n", evtNum,1, vecs[4], vecs[5], vecs[6], vecs[7]);
619  // printf("evt %i vec%i %.5g, %.5g, %.5g, %.5g\n", evtNum,2, vecs[8], vecs[9], vecs[10], vecs[11]);
620  // printf("evt %i vec%i %.5g, %.5g, %.5g, %.5g\n", evtNum,3, vecs[12], vecs[13], vecs[14], vecs[15]);
621  // // }
622  auto func = reinterpret_cast<spin_function_ptr>(device_function_table[functn_i]);
623  fptype sf = (*func)(vecs, paramIndices + params_i);
624 
625  // printf("NormSF evt:%.5g, %.5g, %.5g, %.5g, %.5g\n", m12, m34, cos12, cos34, phi);
626  // printf("NormSF %i, %.5g\n",_spinfactor_i, sf );
627  // THREAD_SYNCH
628  return sf;
629 }
630 
631 LSCalculator::LSCalculator(int pIdx, unsigned int res_idx)
632  : _resonance_i(res_idx)
633  , _parameters(pIdx) {}
634 
635 __device__ fpcomplex LSCalculator::operator()(thrust::tuple<int, fptype *, int> t) const {
636  // Calculates the BW values for a specific resonance.
637  fpcomplex ret;
638 
639  int evtNum = thrust::get<0>(t);
640  fptype *evt = thrust::get<1>(t) + (evtNum * thrust::get<2>(t));
641 
642  unsigned int *indices = paramIndices + _parameters; // Jump to DALITZPLOT position within parameters array
643  int parameter_i
644  = 6 + (2 * indices[5]) + (_resonance_i * 2); // Find position of this resonance relative to DALITZPLOT start
645  unsigned int functn_i = indices[parameter_i];
646  unsigned int params_i = indices[parameter_i + 1];
647  unsigned int pair = (paramIndices + params_i)[5];
648 
649  fptype m1 = functorConstants[indices[1] + 2];
650  fptype m2 = functorConstants[indices[1] + 3];
651  fptype m3 = functorConstants[indices[1] + 4];
652  fptype m4 = functorConstants[indices[1] + 5];
653 
654  fptype m12 = evt[indices[2 + indices[0]]];
655  fptype m34 = evt[indices[3 + indices[0]]];
656  fptype cos12 = evt[indices[4 + indices[0]]];
657  fptype cos34 = evt[indices[5 + indices[0]]];
658  fptype phi = evt[indices[6 + indices[0]]];
659 
660  if(pair < 2) {
661  fptype mres = pair == 0 ? m12 : m34;
662  fptype d1 = pair == 0 ? m1 : m3;
663  fptype d2 = pair == 0 ? m2 : m4;
664  ret = getResonanceAmplitude(mres, d1, d2, functn_i, params_i);
665  // printf("LS %i: mass:%f, %f i%f\n",_resonance_i, mres, ret.real, ret.imag );
666  } else {
667  fptype vecs[16];
668  get4Vecs(vecs, indices[1], m12, m34, cos12, cos34, phi);
669  fptype d1, d2;
670  fptype mres = getmass(pair, d1, d2, vecs, m1, m2, m3, m4);
671  ret = getResonanceAmplitude(mres, d1, d2, functn_i, params_i);
672  // printf("LS_m_calc %i: mass:%f, %f i%f\n",_resonance_i, mres, ret.real, ret.imag );
673  }
674 
675  // if (!inDalitz(m12, m13, motherMass, daug1Mass, daug2Mass, daug3Mass)) return ret;
676  // printf("m12 %f \n", m12); // %f %f %f (%f, %f)\n ", m12, m13, m23, ret.real, ret.imag);
677  // printf("#Parameters %i, #LS %i, #SF %i, #AMP %i \n", indices[0], indices[3], indices[4], indices[5]);
678  // printf("%i mass: %.5g, BW_%i : %f %f\n",evtNum, massstore, _resonance_i, ret.real, ret.imag);
679 
680  return ret;
681 }
682 
683 NormLSCalculator::NormLSCalculator(int pIdx, unsigned int res_idx)
684  : _resonance_i(res_idx)
685  , _parameters(pIdx) {}
686 
687 __device__ fpcomplex NormLSCalculator::operator()(
688  thrust::tuple<mcbooster::GReal_t, mcbooster::GReal_t, mcbooster::GReal_t, mcbooster::GReal_t, mcbooster::GReal_t> t)
689  const {
690  // Calculates the BW values for a specific resonance.
691  fpcomplex ret;
692 
693  unsigned int *indices = paramIndices + _parameters; // Jump to DALITZPLOT position within parameters array
694  unsigned int numAmps = indices[5];
695  int parameter_i
696  = 6 + (2 * numAmps) + (_resonance_i * 2); // Find position of this resonance relative to DALITZPLOT start
697  unsigned int functn_i = indices[parameter_i];
698  unsigned int params_i = indices[parameter_i + 1];
699  unsigned int pair = (paramIndices + params_i)[5];
700 
701  fptype m1 = functorConstants[indices[1] + 2];
702  fptype m2 = functorConstants[indices[1] + 3];
703  fptype m3 = functorConstants[indices[1] + 4];
704  fptype m4 = functorConstants[indices[1] + 5];
705 
706  fptype m12 = (thrust::get<0>(t));
707  fptype m34 = (thrust::get<1>(t));
708  fptype cos12 = (thrust::get<2>(t));
709  fptype cos34 = (thrust::get<3>(t));
710  fptype phi = (thrust::get<4>(t));
711 
712  if(pair < 2) {
713  fptype mres = pair == 0 ? m12 : m34;
714  fptype d1 = pair == 0 ? m1 : m3;
715  fptype d2 = pair == 0 ? m2 : m4;
716  ret = getResonanceAmplitude(mres, d1, d2, functn_i, params_i);
717  } else {
718  fptype vecs[16];
719  get4Vecs(vecs, indices[1], m12, m34, cos12, cos34, phi);
720  fptype d1, d2;
721  fptype mres = getmass(pair, d1, d2, vecs, m1, m2, m3, m4);
722  ret = getResonanceAmplitude(mres, d1, d2, functn_i, params_i);
723  }
724 
725  // printf("NormLS %f, %f, %f, %f, %f \n",m12, m34, cos12, cos34, phi );
726  // printf("%i, %i, %i, %i, %i \n",numLS, numSF, numAmps, offset, evtNum );
727  // printf("NLS %i, %f, %f\n",_resonance_i,ret.real, ret.imag);
728 
729  // printf("m12 %f \n", m12); // %f %f %f (%f, %f)\n ", m12, m13, m23, ret.real, ret.imag);
730  // printf("#Parameters %i, #LS %i, #SF %i, #AMP %i \n", indices[0], indices[3], indices[4], indices[5]);
731  // THREAD_SYNCH
732  return ret;
733 }
734 
735 AmpCalc::AmpCalc(unsigned int AmpIdx, unsigned int pIdx, unsigned int nPerm)
736  : _nPerm(nPerm)
737  , _AmpIdx(AmpIdx)
738  , _parameters(pIdx) {}
739 
740 __device__ fpcomplex AmpCalc::operator()(thrust::tuple<int, fptype *, int> t) const {
741  unsigned int *indices = paramIndices + _parameters;
742  unsigned int cacheToUse = indices[2];
743  unsigned int totalLS = indices[3];
744  unsigned int totalSF = indices[4];
745  unsigned int totalAMP = indices[5];
746  unsigned int offset = totalLS + totalSF;
747  unsigned int numLS = AmpIndices[totalAMP + _AmpIdx];
748  unsigned int numSF = AmpIndices[totalAMP + _AmpIdx + 1];
749  unsigned int evtNum = thrust::get<0>(t);
750 
751  fpcomplex returnVal(0, 0);
752  unsigned int SF_step = numSF / _nPerm;
753  unsigned int LS_step = numLS / _nPerm;
754 
755  for(int i = 0; i < _nPerm; ++i) {
756  fpcomplex ret(1, 0);
757  fpcomplex tmp(1, 0);
758 
759  for(int j = i * LS_step; j < (i + 1) * LS_step; ++j) {
760  tmp = (cResSF[cacheToUse][evtNum * offset + AmpIndices[totalAMP + _AmpIdx + 3 + j]]);
761  ret *= tmp;
762  // printf("Lineshape = (%.7g, %.7g)\n", tmp.real, tmp.imag);
763  }
764 
765  // printf("Lineshape Product = (%.7g, %.7g)\n", ret.real, ret.imag);
766  for(int j = i * SF_step; j < (i + 1) * SF_step; ++j) {
767  tmp = (cResSF[cacheToUse][evtNum * offset + totalLS + AmpIndices[totalAMP + _AmpIdx + 3 + numLS + j]]
768  .real());
769  ret *= tmp;
770  // printf("SF = (%.7g, %.7g)\n", tmp.real, tmp.imag);
771  }
772 
773  // printf("Lineshape Product * SF = (%.7g, %.7g)\n", ret.real, ret.imag);
774 
775  returnVal += ret;
776  }
777 
778  returnVal *= (1 / sqrt(static_cast<fptype>(_nPerm)));
779  // printf("Amplitude Value = (%.7g, %.7g)\n", returnVal.real, returnVal.imag);
780  return returnVal;
781 }
782 
783 NormIntegrator::NormIntegrator(unsigned int pIdx)
784  : _parameters(pIdx) {}
785 
786 __device__ fptype NormIntegrator::operator()(thrust::tuple<int, int, fptype *, fpcomplex *> t) const {
787  unsigned int *indices = paramIndices + _parameters;
788  unsigned int totalAMP = indices[5];
789 
790  unsigned int evtNum = thrust::get<0>(t);
791  unsigned int MCevents = thrust::get<1>(t);
792  fptype *SFnorm = thrust::get<2>(t) + evtNum;
793  fpcomplex *LSnorm = thrust::get<3>(t) + evtNum;
794 
795  fpcomplex returnVal(0, 0);
796 
797  for(int amp = 0; amp < totalAMP; ++amp) {
798  unsigned int ampidx = AmpIndices[amp];
799  unsigned int numLS = AmpIndices[totalAMP + ampidx];
800  unsigned int numSF = AmpIndices[totalAMP + ampidx + 1];
801  unsigned int nPerm = AmpIndices[totalAMP + ampidx + 2];
802  unsigned int SF_step = numSF / nPerm;
803  unsigned int LS_step = numLS / nPerm;
804  fpcomplex ret2(0, 0);
805  // printf("%i, %i, %i, %i, %i, %i, %i, %i, %i, %f\n",ampidx, amp, numLS, numSF, nPerm,AmpIndices[totalAMP +
806  // ampidx + 3 + 0], AmpIndices[totalAMP + ampidx + 3 + 1], AmpIndices[totalAMP + ampidx + 3 + 2],
807  // AmpIndices[totalAMP + ampidx + 3 + 3], (1/sqrt((fptype)(nPerm))) );
808 
809  for(int j = 0; j < nPerm; ++j) {
810  fpcomplex ret(1, 0);
811 
812  for(int i = j * LS_step; i < (j + 1) * LS_step; ++i) {
813  fpcomplex matrixelement(LSnorm[AmpIndices[totalAMP + ampidx + 3 + i] * MCevents]);
814  // printf("Norm BW %i, %.5g, %.5g\n",AmpIndices[totalAMP + ampidx + 3 + i] , matrixelement.real,
815  // matrixelement.imag);
816  ret *= matrixelement;
817  }
818 
819  for(int i = j * SF_step; i < (j + 1) * SF_step; ++i) {
820  fptype matrixelement = (SFnorm[AmpIndices[totalAMP + ampidx + 3 + numLS + i] * MCevents]);
821  // printf("Norm SF %i, %.5g\n",AmpIndices[totalAMP + ampidx + 3 + i] , matrixelement);
822  ret *= matrixelement;
823  }
824 
825  ret2 += ret;
826  }
827 
828  fpcomplex amp_C{cudaArray[indices[2 * amp + 6]], cudaArray[indices[2 * amp + 7]]};
829  ret2 *= (1 / sqrt(static_cast<fptype>(nPerm)));
830  // printf("Result Amplitude %i, %.5g, %.5g\n",amp, ret2.real, ret2.imag);
831  returnVal += ret2 * amp_C;
832  }
833 
834  return thrust::norm(returnVal);
835 }
836 } // namespace GooFit