2 04/05/2016 Christoph Hasse
5 This code is not sufficently tested yet and still under heavy development!
8 - Test lineshapes, only done for BW_DP and BW_MINT so far
9 - Check and implement more SF
10 - Currently no check if the event is even allowed in phasespace is done. This should preferably be done outside of this
13 - Some things could be implemented differently maybe, performance should be compared for both cases.
14 -For example the way Spinfactors are stored in the same array as the Lineshape values.
15 Is this really worth the memory we lose by using a complex to store the SF?
18 #include <mcbooster/Evaluate.h>
19 #include <mcbooster/EvaluateArray.h>
20 #include <mcbooster/GContainers.h>
21 #include <mcbooster/GFunctional.h>
22 #include <mcbooster/GTypes.h>
23 #include <mcbooster/Generate.h>
24 #include <mcbooster/Vector4R.h>
26 #include <goofit/Error.h>
27 #include <goofit/PDFs/physics/DP4Pdf.h>
28 #include <goofit/PDFs/physics/EvalVar.h>
32 // The function of this array is to hold all the cached waves; specific
33 // waves are recalculated when the corresponding resonance mass or width
34 // changes. Note that in a multithread environment each thread needs its
35 // own cache, hence the '10'. Ten threads should be enough for anyone!
36 __device__ fpcomplex *cResSF[10];
37 __device__ fpcomplex *Amps_DP[10];
39 Constant memory array to hold specific info for amplitude calculation.
40 First entries are the starting points in array, necessary, because number of Lineshapes(LS) or Spinfactors(SF) can vary
41 |start of each Amplitude| #Linshapes | #Spinfactors | LS-indices | SF-indices|
42 | 1 entry per Amplitude | 1 per Amp | 1 per Amp | #LS in Amp| #SF in Amp|
44 __constant__ unsigned int AmpIndices[500];
46 // This function gets called by the GooFit framework to get the value of the PDF.
47 __device__ fptype device_DP(fptype *evt, fptype *p, unsigned int *indices) {
48 // printf("DalitzPlot evt %i zero: %i %i %f (%f, %f).\n", evtNum, numResonances, effFunctionIdx, eff, totalAmp.real,
51 auto evtNum = static_cast<int>(floor(0.5 + evt[indices[7 + indices[0]]]));
52 // printf("%i\n",evtNum );
53 fpcomplex totalAmp(0, 0);
54 unsigned int cacheToUse = indices[2];
55 unsigned int numAmps = indices[5];
57 for(int i = 0; i < numAmps; ++i) {
58 fpcomplex amp{p[indices[6 + 2 * i]], p[indices[7 + 2 * i]]};
60 fpcomplex matrixelement((Amps_DP[cacheToUse][evtNum * numAmps + i]).real(),
61 (Amps_DP[cacheToUse][evtNum * numAmps + i]).imag());
63 totalAmp += matrixelement * amp;
66 fptype ret = thrust::norm(totalAmp);
67 int effFunctionIdx = 6 + 2 * indices[3] + 2 * indices[4] + 2 * indices[5];
68 fptype eff = callFunction(evt, indices[effFunctionIdx], indices[effFunctionIdx + 1]);
71 // printf("result %.7g\n", ret);
75 __device__ device_function_ptr ptr_to_DP = device_DP;
77 __host__ DPPdf::DPPdf(
78 std::string n, std::vector<Observable> observables, DecayInfo4 decay, GooPdf *efficiency, unsigned int MCeventsNorm)
81 , totalEventSize(observables.size()) // number of observables plus eventnumber
83 for(auto &observable : observables) {
84 registerObservable(observable);
87 // registerObservable(eventNumber);
89 std::vector<fptype> decayConstants;
90 decayConstants.push_back(decayInfo.meson_radius);
92 for(double &particle_masse : decayInfo.particle_masses) {
93 decayConstants.push_back(particle_masse);
96 std::vector<unsigned int> pindices;
97 pindices.push_back(registerConstants(decayConstants.size()));
98 MEMCPY_TO_SYMBOL(functorConstants,
100 decayConstants.size() * sizeof(fptype),
101 cIndex * sizeof(fptype),
102 cudaMemcpyHostToDevice);
103 static int cacheCount = 0;
104 cacheToUse = cacheCount++;
105 pindices.push_back(cacheToUse);
106 pindices.push_back(0); //#LS
107 pindices.push_back(0); //#SF
108 pindices.push_back(0); //#AMP
110 // This is the start of reading in the amplitudes and adding the lineshapes and Spinfactors to this PDF
111 // This is done in this way so we don't have multiple copies of one lineshape in one pdf.
112 std::vector<unsigned int> ampidx;
113 std::vector<unsigned int> nPermVec;
114 std::vector<unsigned int> ampidxstart;
116 for(auto &litude : decayInfo.amplitudes) {
117 AmpMap[amplitude->_uniqueDecayStr] = std::make_pair(std::vector<unsigned int>(0), std::vector<unsigned int>(0));
119 auto LSvec = amplitude->_LS;
121 for(auto &LSIT : LSvec) {
122 auto found = std::find_if(components.begin(), components.end(), [&LSIT](const PdfBase *L) {
123 return (*LSIT) == *(dynamic_cast<const Lineshape *>(L));
126 if(found != components.end()) {
127 AmpMap[amplitude->_uniqueDecayStr].first.push_back(std::distance(components.begin(), found));
129 components.push_back(LSIT);
130 AmpMap[amplitude->_uniqueDecayStr].first.push_back(components.size() - 1);
134 auto SFvec = amplitude->_SF;
136 for(auto &SFIT : SFvec) {
137 auto found = std::find_if(
138 SpinFactors.begin(), SpinFactors.end(), [&SFIT](const SpinFactor *S) { return (*SFIT) == (*S); });
140 if(found != SpinFactors.end()) {
141 AmpMap[amplitude->_uniqueDecayStr].second.push_back(std::distance(SpinFactors.begin(), found));
143 SpinFactors.push_back(SFIT);
144 AmpMap[amplitude->_uniqueDecayStr].second.push_back(SpinFactors.size() - 1);
148 nPermVec.push_back(amplitude->_nPerm);
149 pindices.push_back(registerParameter(amplitude->_ar));
150 pindices.push_back(registerParameter(amplitude->_ai));
152 ampidxstart.push_back(ampidx.size());
153 std::vector<unsigned int> ls = AmpMap[amplitude->_uniqueDecayStr].first;
154 std::vector<unsigned int> sf = AmpMap[amplitude->_uniqueDecayStr].second;
155 ampidx.push_back(ls.size());
156 ampidx.push_back(sf.size());
157 ampidx.push_back(amplitude->_nPerm);
158 ampidx.insert(ampidx.end(), ls.begin(), ls.end());
159 ampidx.insert(ampidx.end(), sf.begin(), sf.end());
163 AmpIndices, &(ampidxstart[0]), ampidxstart.size() * sizeof(unsigned int), 0, cudaMemcpyHostToDevice);
164 MEMCPY_TO_SYMBOL(AmpIndices,
166 ampidx.size() * sizeof(unsigned int),
167 ampidxstart.size() * sizeof(unsigned int),
168 cudaMemcpyHostToDevice);
170 pindices[2] = components.size();
171 pindices[3] = SpinFactors.size();
172 pindices[4] = AmpMap.size();
174 for(auto &component : components) {
175 reinterpret_cast<Lineshape *>(component)->setConstantIndex(cIndex);
176 pindices.push_back(reinterpret_cast<Lineshape *>(component)->getFunctionIndex());
177 pindices.push_back(reinterpret_cast<Lineshape *>(component)->getParameterIndex());
180 for(auto &SpinFactor : SpinFactors) {
181 pindices.push_back(SpinFactor->getFunctionIndex());
182 pindices.push_back(SpinFactor->getParameterIndex());
183 SpinFactor->setConstantIndex(cIndex);
186 pindices.push_back(efficiency->getFunctionIndex());
187 pindices.push_back(efficiency->getParameterIndex());
188 components.push_back(efficiency);
190 GET_FUNCTION_ADDR(ptr_to_DP);
191 initialize(pindices);
193 Integrator = new NormIntegrator(parameters);
194 redoIntegral = new bool[components.size() - 1];
195 cachedMasses = new fptype[components.size() - 1];
196 cachedWidths = new fptype[components.size() - 1];
198 for(int i = 0; i < components.size() - 1; ++i) {
199 redoIntegral[i] = true;
200 cachedMasses[i] = -1;
201 cachedWidths[i] = -1;
202 lscalculators.push_back(new LSCalculator(parameters, i));
205 for(int i = 0; i < SpinFactors.size(); ++i) {
206 sfcalculators.push_back(new SFCalculator(parameters, i));
209 for(int i = 0; i < AmpMap.size(); ++i) {
210 AmpCalcs.push_back(new AmpCalc(ampidxstart[i], parameters, nPermVec[i]));
213 // fprintf(stderr,"#Amp's %i, #LS %i, #SF %i \n", AmpMap.size(), components.size()-1, SpinFactors.size() );
215 std::vector<mcbooster::GReal_t> masses(decayInfo.particle_masses.begin() + 1, decayInfo.particle_masses.end());
216 mcbooster::PhaseSpace phsp(decayInfo.particle_masses[0], masses, MCeventsNorm);
217 phsp.Generate(mcbooster::Vector4R(decayInfo.particle_masses[0], 0.0, 0.0, 0.0));
220 auto nAcc = phsp.GetNAccepted();
221 mcbooster::BoolVector_d flags = phsp.GetAccRejFlags();
222 auto d1 = phsp.GetDaughters(0);
223 auto d2 = phsp.GetDaughters(1);
224 auto d3 = phsp.GetDaughters(2);
225 auto d4 = phsp.GetDaughters(3);
227 // auto zip_begin = thrust::make_zip_iterator(thrust::make_tuple(d1.begin(), d2.begin(), d3.begin(), d4.begin()));
228 // auto zip_end = zip_begin + d1.size();
229 // auto new_end = thrust::remove_if(zip_begin, zip_end, flags.begin(), thrust::logical_not<bool>());
231 printf("After accept-reject we will keep %.i Events for normalization.\n", static_cast<int>(nAcc));
237 mcbooster::ParticlesSet_d pset(4);
243 norm_M12 = mcbooster::RealVector_d(nAcc);
244 norm_M34 = mcbooster::RealVector_d(nAcc);
245 norm_CosTheta12 = mcbooster::RealVector_d(nAcc);
246 norm_CosTheta34 = mcbooster::RealVector_d(nAcc);
247 norm_phi = mcbooster::RealVector_d(nAcc);
249 mcbooster::VariableSet_d VarSet(5);
250 VarSet[0] = &norm_M12;
251 VarSet[1] = &norm_M34;
252 VarSet[2] = &norm_CosTheta12;
253 VarSet[3] = &norm_CosTheta34;
254 VarSet[4] = &norm_phi;
257 mcbooster::EvaluateArray<Dim5>(eval, pset, VarSet);
259 norm_SF = mcbooster::RealVector_d(nAcc * SpinFactors.size());
260 norm_LS = mcbooster::mc_device_vector<fpcomplex>(nAcc * (components.size() - 1));
263 addSpecialMask(PdfBase::ForceSeparateNorm);
266 // makes the arrays to chache the lineshape values and spinfactors in CachedResSF and the values of the amplitudes in
268 // I made the choice to have spinfactors next to the values of the lineshape in memory. I waste memory by doing this
269 // because a spinfactor is saved as complex
270 // It would be nice to test if this is better than having the spinfactors stored seperately.
271 __host__ void DPPdf::setDataSize(unsigned int dataSize, unsigned int evtSize) {
272 // Default 3 is m12, m13, evtNum for DP 2dim, 4-body decay has 5 independent vars plus evtNum = 6
273 totalEventSize = evtSize;
274 if(totalEventSize < 3)
275 throw GooFit::GeneralError("totalEventSize {} should be 3 or more", totalEventSize);
283 numEntries = dataSize;
284 cachedResSF = new thrust::device_vector<fpcomplex>(
285 dataSize * (components.size() + SpinFactors.size() - 1)); // -1 because 1 component is efficiency
286 void *dummy = thrust::raw_pointer_cast(cachedResSF->data());
287 MEMCPY_TO_SYMBOL(cResSF, &dummy, sizeof(fpcomplex *), cacheToUse * sizeof(fpcomplex *), cudaMemcpyHostToDevice);
289 cachedAMPs = new thrust::device_vector<fpcomplex>(dataSize * (AmpCalcs.size()));
290 void *dummy2 = thrust::raw_pointer_cast(cachedAMPs->data());
291 MEMCPY_TO_SYMBOL(Amps_DP, &dummy2, sizeof(fpcomplex *), cacheToUse * sizeof(fpcomplex *), cudaMemcpyHostToDevice);
296 // this is where the actual magic happens. This function does all the calculations!
297 __host__ fptype DPPdf::normalize() const {
298 // fprintf(stderr, "start normalize\n");
299 recursiveSetNormalisation(1); // Not going to normalize efficiency,
300 // so set normalisation factor to 1 so it doesn't get multiplied by zero.
301 // Copy at this time to ensure that the SpecialResonanceCalculators, which need the efficiency,
302 // don't get zeroes through multiplying by the normFactor.
303 MEMCPY_TO_SYMBOL(normalisationFactors, host_normalisation, totalParams * sizeof(fptype), 0, cudaMemcpyHostToDevice);
305 // check if MINUIT changed any parameters and if so remember that so we know
306 // we need to recalculate that lineshape and every amp, that uses that lineshape
307 for(unsigned int i = 0; i < components.size() - 1; ++i) {
308 redoIntegral[i] = forceRedoIntegrals;
310 if(!(components[i]->parametersChanged()))
313 redoIntegral[i] = true;
316 SpinsCalculated = !forceRedoIntegrals;
317 forceRedoIntegrals = false;
319 // just some thrust iterators for the calculation.
320 thrust::constant_iterator<fptype *> dataArray(dev_event_array);
321 thrust::constant_iterator<int> eventSize(totalEventSize);
322 thrust::counting_iterator<int> eventIndex(0);
324 // Calculate spinfactors only once for normalisation events and real events
325 // strided_range is a template implemented in DalitsPlotHelpers.hh
326 // it basically goes through the array by increasing the pointer by a certain amount instead of just one step.
327 if(!SpinsCalculated) {
328 for(int i = 0; i < SpinFactors.size(); ++i) {
329 unsigned int offset = components.size() - 1;
331 thrust::make_zip_iterator(thrust::make_tuple(eventIndex, dataArray, eventSize)),
332 thrust::make_zip_iterator(thrust::make_tuple(eventIndex + numEntries, dataArray, eventSize)),
333 strided_range<thrust::device_vector<fpcomplex>::iterator>(
334 cachedResSF->begin() + offset + i, cachedResSF->end(), (components.size() + SpinFactors.size() - 1))
336 *(sfcalculators[i]));
338 if(!generation_no_norm) {
340 thrust::make_zip_iterator(thrust::make_tuple(norm_M12.begin(),
342 norm_CosTheta12.begin(),
343 norm_CosTheta34.begin(),
345 thrust::make_zip_iterator(thrust::make_tuple(
346 norm_M12.end(), norm_M34.end(), norm_CosTheta12.end(), norm_CosTheta34.end(), norm_phi.end())),
347 (norm_SF.begin() + (i * MCevents)),
348 NormSpinCalculator(parameters, i));
352 SpinsCalculated = true;
355 // this calculates the values of the lineshapes and stores them in the array. It is recalculated every time
356 // parameters change.
357 for(int i = 0; i < components.size() - 1; ++i) {
358 if(redoIntegral[i]) {
360 thrust::make_zip_iterator(thrust::make_tuple(eventIndex, dataArray, eventSize)),
361 thrust::make_zip_iterator(thrust::make_tuple(eventIndex + numEntries, dataArray, eventSize)),
362 strided_range<thrust::device_vector<fpcomplex>::iterator>(
363 cachedResSF->begin() + i, cachedResSF->end(), (components.size() + SpinFactors.size() - 1))
365 *(lscalculators[i]));
369 // this is a little messy but it basically checks if the amplitude includes one of the recalculated lineshapes and
370 // if so recalculates that amplitude
371 auto AmpMapIt = AmpMap.begin();
373 for(int i = 0; i < AmpCalcs.size(); ++i) {
374 std::vector<unsigned int> redoidx((*AmpMapIt).second.first);
377 for(unsigned int j : redoidx) {
386 thrust::transform(eventIndex,
387 eventIndex + numEntries,
388 strided_range<thrust::device_vector<fpcomplex>::iterator>(
389 cachedAMPs->begin() + i, cachedAMPs->end(), AmpCalcs.size())
395 // lineshape value calculation for the normalisation, also recalculated every time parameter change
396 if(!generation_no_norm) {
397 for(int i = 0; i < components.size() - 1; ++i) {
402 thrust::make_zip_iterator(thrust::make_tuple(norm_M12.begin(),
404 norm_CosTheta12.begin(),
405 norm_CosTheta34.begin(),
407 thrust::make_zip_iterator(thrust::make_tuple(
408 norm_M12.end(), norm_M34.end(), norm_CosTheta12.end(), norm_CosTheta34.end(), norm_phi.end())),
409 (norm_LS.begin() + (i * MCevents)),
410 NormLSCalculator(parameters, i));
414 thrust::constant_iterator<fptype *> normSFaddress(thrust::raw_pointer_cast(norm_SF.data()));
415 thrust::constant_iterator<fpcomplex *> normLSaddress(thrust::raw_pointer_cast(norm_LS.data()));
416 thrust::constant_iterator<int> NumNormEvents(MCevents);
418 // this does the rest of the integration with the cached lineshape and spinfactor values for the normalization
422 if(!generation_no_norm) {
423 fptype sumIntegral = 0;
424 sumIntegral += thrust::transform_reduce(
425 thrust::make_zip_iterator(thrust::make_tuple(eventIndex, NumNormEvents, normSFaddress, normLSaddress)),
426 thrust::make_zip_iterator(
427 thrust::make_tuple(eventIndex + MCevents, NumNormEvents, normSFaddress, normLSaddress)),
430 thrust::plus<fptype>());
431 // MCevents is the number of normalisation events.
432 sumIntegral /= MCevents;
437 GooFit::abort(__FILE__, __LINE__, getName() + " NAN normalization in DPPdf", this);
439 host_normalisation[parameters] = 1.0 / ret;
440 // printf("end of normalize %f\n", ret);
445 std::tuple<mcbooster::ParticlesSet_h, mcbooster::VariableSet_h, mcbooster::RealVector_h, mcbooster::RealVector_h>
446 DPPdf::GenerateSig(unsigned int numEvents) {
447 std::vector<mcbooster::GReal_t> masses(decayInfo.particle_masses.begin() + 1, decayInfo.particle_masses.end());
448 mcbooster::PhaseSpace phsp(decayInfo.particle_masses[0], masses, numEvents, generation_offset);
449 phsp.Generate(mcbooster::Vector4R(decayInfo.particle_masses[0], 0.0, 0.0, 0.0));
451 auto d1 = phsp.GetDaughters(0);
452 auto d2 = phsp.GetDaughters(1);
453 auto d3 = phsp.GetDaughters(2);
454 auto d4 = phsp.GetDaughters(3);
456 mcbooster::ParticlesSet_d pset(4);
462 auto SigGen_M12_d = mcbooster::RealVector_d(numEvents);
463 auto SigGen_M34_d = mcbooster::RealVector_d(numEvents);
464 auto SigGen_CosTheta12_d = mcbooster::RealVector_d(numEvents);
465 auto SigGen_CosTheta34_d = mcbooster::RealVector_d(numEvents);
466 auto SigGen_phi_d = mcbooster::RealVector_d(numEvents);
468 mcbooster::VariableSet_d VarSet_d(5);
469 VarSet_d[0] = &SigGen_M12_d;
470 VarSet_d[1] = &SigGen_M34_d;
471 VarSet_d[2] = &SigGen_CosTheta12_d;
472 VarSet_d[3] = &SigGen_CosTheta34_d;
473 VarSet_d[4] = &SigGen_phi_d;
476 mcbooster::EvaluateArray<Dim5>(eval, pset, VarSet_d);
478 auto h1 = new mcbooster::Particles_h(d1);
479 auto h2 = new mcbooster::Particles_h(d2);
480 auto h3 = new mcbooster::Particles_h(d3);
481 auto h4 = new mcbooster::Particles_h(d4);
483 mcbooster::ParticlesSet_h ParSet(4);
489 auto SigGen_M12_h = new mcbooster::RealVector_h(SigGen_M12_d);
490 auto SigGen_M34_h = new mcbooster::RealVector_h(SigGen_M34_d);
491 auto SigGen_CosTheta12_h = new mcbooster::RealVector_h(SigGen_CosTheta12_d);
492 auto SigGen_CosTheta34_h = new mcbooster::RealVector_h(SigGen_CosTheta34_d);
493 auto SigGen_phi_h = new mcbooster::RealVector_h(SigGen_phi_d);
495 mcbooster::VariableSet_h VarSet(5);
496 VarSet[0] = SigGen_M12_h;
497 VarSet[1] = SigGen_M34_h;
498 VarSet[2] = SigGen_CosTheta12_h;
499 VarSet[3] = SigGen_CosTheta34_h;
500 VarSet[4] = SigGen_phi_h;
502 mcbooster::RealVector_d weights(phsp.GetWeights());
503 phsp.FreeResources();
505 auto DS = new mcbooster::RealVector_d(6 * numEvents);
506 thrust::counting_iterator<int> eventNumber(0);
510 for(int i = 0; i < 5; ++i) {
511 mcbooster::strided_range<mcbooster::RealVector_d::iterator> sr(DS->begin() + i, DS->end(), 6);
512 thrust::copy(VarSet_d[i]->begin(), VarSet_d[i]->end(), sr.begin());
515 mcbooster::strided_range<mcbooster::RealVector_d::iterator> sr(DS->begin() + 5, DS->end(), 6);
516 thrust::copy(eventNumber, eventNumber + numEvents, sr.begin());
518 dev_event_array = thrust::raw_pointer_cast(DS->data());
519 setDataSize(numEvents, 6);
521 generation_no_norm = true; // we need no normalization for generation, but we do need to make sure that norm = 1;
526 MEMCPY_TO_SYMBOL(normalisationFactors, host_normalisation, totalParams * sizeof(fptype), 0, cudaMemcpyHostToDevice);
528 thrust::device_vector<fptype> results(numEvents);
529 thrust::constant_iterator<int> eventSize(6);
530 thrust::constant_iterator<fptype *> arrayAddress(dev_event_array);
531 thrust::counting_iterator<int> eventIndex(0);
533 MetricTaker evalor(this, getMetricPointer("ptr_to_Prob"));
534 thrust::transform(thrust::make_zip_iterator(thrust::make_tuple(eventIndex, arrayAddress, eventSize)),
535 thrust::make_zip_iterator(thrust::make_tuple(eventIndex + numEvents, arrayAddress, eventSize)),
538 cudaDeviceSynchronize();
539 gooFree(dev_event_array);
542 results.begin(), results.end(), weights.begin(), weights.begin(), thrust::multiplies<mcbooster::GReal_t>());
543 mcbooster::BoolVector_d flags(numEvents);
545 thrust::counting_iterator<mcbooster::GLong_t> first(0);
546 thrust::counting_iterator<mcbooster::GLong_t> last = first + numEvents;
548 auto max = thrust::max_element(weights.begin(), weights.end());
549 thrust::transform(first, last, weights.begin(), flags.begin(), mcbooster::FlagAcceptReject((fptype)*max));
551 auto weights_h = mcbooster::RealVector_h(weights);
552 auto results_h = mcbooster::RealVector_h(results);
553 auto flags_h = mcbooster::BoolVector_h(flags);
554 cudaDeviceSynchronize();
556 return std::make_tuple(ParSet, VarSet, weights_h, flags_h);
559 SFCalculator::SFCalculator(int pIdx, unsigned int sf_idx)
560 : _spinfactor_i(sf_idx)
561 , _parameters(pIdx) {}
563 __device__ fpcomplex SFCalculator::operator()(thrust::tuple<int, fptype *, int> t) const {
564 int evtNum = thrust::get<0>(t);
565 fptype *evt = thrust::get<1>(t) + (evtNum * thrust::get<2>(t));
567 unsigned int *indices = paramIndices + _parameters; // Jump to DALITZPLOT position within parameters array
568 int parameter_i = 6 + (2 * indices[5]) + (indices[3] * 2)
569 + (_spinfactor_i * 2); // Find position of this resonance relative to DALITZPLOT start
570 unsigned int functn_i = indices[parameter_i];
571 unsigned int params_i = indices[parameter_i + 1];
573 fptype m12 = evt[indices[2 + indices[0]]];
574 fptype m34 = evt[indices[3 + indices[0]]];
575 fptype cos12 = evt[indices[4 + indices[0]]];
576 fptype cos34 = evt[indices[5 + indices[0]]];
577 fptype phi = evt[indices[6 + indices[0]]];
580 get4Vecs(vecs, indices[1], m12, m34, cos12, cos34, phi);
581 // printf("%i, %i, %f, %f, %f, %f, %f \n",evtNum, thrust::get<2>(t), m12, m34, cos12, cos34, phi );
582 // printf("vec%i %f, %f, %f, %f\n",0, vecs[0], vecs[1], vecs[2], vecs[3]);
583 // printf("vec%i %f, %f, %f, %f\n",1, vecs[4], vecs[5], vecs[6], vecs[7]);
584 // printf("vec%i %f, %f, %f, %f\n",2, vecs[8], vecs[9], vecs[10], vecs[11]);
585 // printf("vec%i %f, %f, %f, %f\n",3, vecs[12], vecs[13], vecs[14], vecs[15]);
587 auto func = reinterpret_cast<spin_function_ptr>(device_function_table[functn_i]);
588 fptype sf = (*func)(vecs, paramIndices + params_i);
589 // printf("SpinFactors %i : %.7g\n",evtNum, sf );
593 NormSpinCalculator::NormSpinCalculator(int pIdx, unsigned int sf_idx)
594 : _spinfactor_i(sf_idx)
595 , _parameters(pIdx) {}
597 __device__ fptype NormSpinCalculator::operator()(
598 thrust::tuple<mcbooster::GReal_t, mcbooster::GReal_t, mcbooster::GReal_t, mcbooster::GReal_t, mcbooster::GReal_t> t)
600 unsigned int *indices = paramIndices + _parameters; // Jump to DALITZPLOT position within parameters array
601 unsigned int numLS = indices[3];
602 unsigned int numAmps = indices[5];
603 int parameter_i = 6 + (2 * numAmps) + (numLS * 2)
604 + (_spinfactor_i * 2); // Find position of this resonance relative to DALITZPLOT start
605 unsigned int functn_i = indices[parameter_i];
606 unsigned int params_i = indices[parameter_i + 1];
608 fptype m12 = (thrust::get<0>(t));
609 fptype m34 = (thrust::get<1>(t));
610 fptype cos12 = (thrust::get<2>(t));
611 fptype cos34 = (thrust::get<3>(t));
612 fptype phi = (thrust::get<4>(t));
615 get4Vecs(vecs, indices[1], m12, m34, cos12, cos34, phi);
617 // printf("evt %i vec%i %.5g, %.5g, %.5g, %.5g\n", evtNum,0, vecs[0], vecs[1], vecs[2], vecs[3]);
618 // printf("evt %i vec%i %.5g, %.5g, %.5g, %.5g\n", evtNum,1, vecs[4], vecs[5], vecs[6], vecs[7]);
619 // printf("evt %i vec%i %.5g, %.5g, %.5g, %.5g\n", evtNum,2, vecs[8], vecs[9], vecs[10], vecs[11]);
620 // printf("evt %i vec%i %.5g, %.5g, %.5g, %.5g\n", evtNum,3, vecs[12], vecs[13], vecs[14], vecs[15]);
622 auto func = reinterpret_cast<spin_function_ptr>(device_function_table[functn_i]);
623 fptype sf = (*func)(vecs, paramIndices + params_i);
625 // printf("NormSF evt:%.5g, %.5g, %.5g, %.5g, %.5g\n", m12, m34, cos12, cos34, phi);
626 // printf("NormSF %i, %.5g\n",_spinfactor_i, sf );
631 LSCalculator::LSCalculator(int pIdx, unsigned int res_idx)
632 : _resonance_i(res_idx)
633 , _parameters(pIdx) {}
635 __device__ fpcomplex LSCalculator::operator()(thrust::tuple<int, fptype *, int> t) const {
636 // Calculates the BW values for a specific resonance.
639 int evtNum = thrust::get<0>(t);
640 fptype *evt = thrust::get<1>(t) + (evtNum * thrust::get<2>(t));
642 unsigned int *indices = paramIndices + _parameters; // Jump to DALITZPLOT position within parameters array
644 = 6 + (2 * indices[5]) + (_resonance_i * 2); // Find position of this resonance relative to DALITZPLOT start
645 unsigned int functn_i = indices[parameter_i];
646 unsigned int params_i = indices[parameter_i + 1];
647 unsigned int pair = (paramIndices + params_i)[5];
649 fptype m1 = functorConstants[indices[1] + 2];
650 fptype m2 = functorConstants[indices[1] + 3];
651 fptype m3 = functorConstants[indices[1] + 4];
652 fptype m4 = functorConstants[indices[1] + 5];
654 fptype m12 = evt[indices[2 + indices[0]]];
655 fptype m34 = evt[indices[3 + indices[0]]];
656 fptype cos12 = evt[indices[4 + indices[0]]];
657 fptype cos34 = evt[indices[5 + indices[0]]];
658 fptype phi = evt[indices[6 + indices[0]]];
661 fptype mres = pair == 0 ? m12 : m34;
662 fptype d1 = pair == 0 ? m1 : m3;
663 fptype d2 = pair == 0 ? m2 : m4;
664 ret = getResonanceAmplitude(mres, d1, d2, functn_i, params_i);
665 // printf("LS %i: mass:%f, %f i%f\n",_resonance_i, mres, ret.real, ret.imag );
668 get4Vecs(vecs, indices[1], m12, m34, cos12, cos34, phi);
670 fptype mres = getmass(pair, d1, d2, vecs, m1, m2, m3, m4);
671 ret = getResonanceAmplitude(mres, d1, d2, functn_i, params_i);
672 // printf("LS_m_calc %i: mass:%f, %f i%f\n",_resonance_i, mres, ret.real, ret.imag );
675 // if (!inDalitz(m12, m13, motherMass, daug1Mass, daug2Mass, daug3Mass)) return ret;
676 // printf("m12 %f \n", m12); // %f %f %f (%f, %f)\n ", m12, m13, m23, ret.real, ret.imag);
677 // printf("#Parameters %i, #LS %i, #SF %i, #AMP %i \n", indices[0], indices[3], indices[4], indices[5]);
678 // printf("%i mass: %.5g, BW_%i : %f %f\n",evtNum, massstore, _resonance_i, ret.real, ret.imag);
683 NormLSCalculator::NormLSCalculator(int pIdx, unsigned int res_idx)
684 : _resonance_i(res_idx)
685 , _parameters(pIdx) {}
687 __device__ fpcomplex NormLSCalculator::operator()(
688 thrust::tuple<mcbooster::GReal_t, mcbooster::GReal_t, mcbooster::GReal_t, mcbooster::GReal_t, mcbooster::GReal_t> t)
690 // Calculates the BW values for a specific resonance.
693 unsigned int *indices = paramIndices + _parameters; // Jump to DALITZPLOT position within parameters array
694 unsigned int numAmps = indices[5];
696 = 6 + (2 * numAmps) + (_resonance_i * 2); // Find position of this resonance relative to DALITZPLOT start
697 unsigned int functn_i = indices[parameter_i];
698 unsigned int params_i = indices[parameter_i + 1];
699 unsigned int pair = (paramIndices + params_i)[5];
701 fptype m1 = functorConstants[indices[1] + 2];
702 fptype m2 = functorConstants[indices[1] + 3];
703 fptype m3 = functorConstants[indices[1] + 4];
704 fptype m4 = functorConstants[indices[1] + 5];
706 fptype m12 = (thrust::get<0>(t));
707 fptype m34 = (thrust::get<1>(t));
708 fptype cos12 = (thrust::get<2>(t));
709 fptype cos34 = (thrust::get<3>(t));
710 fptype phi = (thrust::get<4>(t));
713 fptype mres = pair == 0 ? m12 : m34;
714 fptype d1 = pair == 0 ? m1 : m3;
715 fptype d2 = pair == 0 ? m2 : m4;
716 ret = getResonanceAmplitude(mres, d1, d2, functn_i, params_i);
719 get4Vecs(vecs, indices[1], m12, m34, cos12, cos34, phi);
721 fptype mres = getmass(pair, d1, d2, vecs, m1, m2, m3, m4);
722 ret = getResonanceAmplitude(mres, d1, d2, functn_i, params_i);
725 // printf("NormLS %f, %f, %f, %f, %f \n",m12, m34, cos12, cos34, phi );
726 // printf("%i, %i, %i, %i, %i \n",numLS, numSF, numAmps, offset, evtNum );
727 // printf("NLS %i, %f, %f\n",_resonance_i,ret.real, ret.imag);
729 // printf("m12 %f \n", m12); // %f %f %f (%f, %f)\n ", m12, m13, m23, ret.real, ret.imag);
730 // printf("#Parameters %i, #LS %i, #SF %i, #AMP %i \n", indices[0], indices[3], indices[4], indices[5]);
735 AmpCalc::AmpCalc(unsigned int AmpIdx, unsigned int pIdx, unsigned int nPerm)
738 , _parameters(pIdx) {}
740 __device__ fpcomplex AmpCalc::operator()(thrust::tuple<int, fptype *, int> t) const {
741 unsigned int *indices = paramIndices + _parameters;
742 unsigned int cacheToUse = indices[2];
743 unsigned int totalLS = indices[3];
744 unsigned int totalSF = indices[4];
745 unsigned int totalAMP = indices[5];
746 unsigned int offset = totalLS + totalSF;
747 unsigned int numLS = AmpIndices[totalAMP + _AmpIdx];
748 unsigned int numSF = AmpIndices[totalAMP + _AmpIdx + 1];
749 unsigned int evtNum = thrust::get<0>(t);
751 fpcomplex returnVal(0, 0);
752 unsigned int SF_step = numSF / _nPerm;
753 unsigned int LS_step = numLS / _nPerm;
755 for(int i = 0; i < _nPerm; ++i) {
759 for(int j = i * LS_step; j < (i + 1) * LS_step; ++j) {
760 tmp = (cResSF[cacheToUse][evtNum * offset + AmpIndices[totalAMP + _AmpIdx + 3 + j]]);
762 // printf("Lineshape = (%.7g, %.7g)\n", tmp.real, tmp.imag);
765 // printf("Lineshape Product = (%.7g, %.7g)\n", ret.real, ret.imag);
766 for(int j = i * SF_step; j < (i + 1) * SF_step; ++j) {
767 tmp = (cResSF[cacheToUse][evtNum * offset + totalLS + AmpIndices[totalAMP + _AmpIdx + 3 + numLS + j]]
770 // printf("SF = (%.7g, %.7g)\n", tmp.real, tmp.imag);
773 // printf("Lineshape Product * SF = (%.7g, %.7g)\n", ret.real, ret.imag);
778 returnVal *= (1 / sqrt(static_cast<fptype>(_nPerm)));
779 // printf("Amplitude Value = (%.7g, %.7g)\n", returnVal.real, returnVal.imag);
783 NormIntegrator::NormIntegrator(unsigned int pIdx)
784 : _parameters(pIdx) {}
786 __device__ fptype NormIntegrator::operator()(thrust::tuple<int, int, fptype *, fpcomplex *> t) const {
787 unsigned int *indices = paramIndices + _parameters;
788 unsigned int totalAMP = indices[5];
790 unsigned int evtNum = thrust::get<0>(t);
791 unsigned int MCevents = thrust::get<1>(t);
792 fptype *SFnorm = thrust::get<2>(t) + evtNum;
793 fpcomplex *LSnorm = thrust::get<3>(t) + evtNum;
795 fpcomplex returnVal(0, 0);
797 for(int amp = 0; amp < totalAMP; ++amp) {
798 unsigned int ampidx = AmpIndices[amp];
799 unsigned int numLS = AmpIndices[totalAMP + ampidx];
800 unsigned int numSF = AmpIndices[totalAMP + ampidx + 1];
801 unsigned int nPerm = AmpIndices[totalAMP + ampidx + 2];
802 unsigned int SF_step = numSF / nPerm;
803 unsigned int LS_step = numLS / nPerm;
804 fpcomplex ret2(0, 0);
805 // printf("%i, %i, %i, %i, %i, %i, %i, %i, %i, %f\n",ampidx, amp, numLS, numSF, nPerm,AmpIndices[totalAMP +
806 // ampidx + 3 + 0], AmpIndices[totalAMP + ampidx + 3 + 1], AmpIndices[totalAMP + ampidx + 3 + 2],
807 // AmpIndices[totalAMP + ampidx + 3 + 3], (1/sqrt((fptype)(nPerm))) );
809 for(int j = 0; j < nPerm; ++j) {
812 for(int i = j * LS_step; i < (j + 1) * LS_step; ++i) {
813 fpcomplex matrixelement(LSnorm[AmpIndices[totalAMP + ampidx + 3 + i] * MCevents]);
814 // printf("Norm BW %i, %.5g, %.5g\n",AmpIndices[totalAMP + ampidx + 3 + i] , matrixelement.real,
815 // matrixelement.imag);
816 ret *= matrixelement;
819 for(int i = j * SF_step; i < (j + 1) * SF_step; ++i) {
820 fptype matrixelement = (SFnorm[AmpIndices[totalAMP + ampidx + 3 + numLS + i] * MCevents]);
821 // printf("Norm SF %i, %.5g\n",AmpIndices[totalAMP + ampidx + 3 + i] , matrixelement);
822 ret *= matrixelement;
828 fpcomplex amp_C{cudaArray[indices[2 * amp + 6]], cudaArray[indices[2 * amp + 7]]};
829 ret2 *= (1 / sqrt(static_cast<fptype>(nPerm)));
830 // printf("Result Amplitude %i, %.5g, %.5g\n",amp, ret2.real, ret2.imag);
831 returnVal += ret2 * amp_C;
834 return thrust::norm(returnVal);
836 } // namespace GooFit