1 #include <goofit/Error.h>
2 #include <goofit/PDFs/physics/DalitzPlotPdf.h>
3 #include <goofit/detail/Complex.h>
5 #include <thrust/copy.h>
6 #include <thrust/transform_reduce.h>
10 // Functor used for fit fraction sum
11 struct CoefSumFunctor {
15 CoefSumFunctor(fpcomplex coef_i, fpcomplex coef_j)
19 __device__ fptype operator()(thrust::tuple<fpcomplex, fpcomplex> val) {
20 return (coef_i * thrust::conj<fptype>(coef_j) * thrust::get<0>(val) * thrust::conj<fptype>(thrust::get<1>(val)))
25 constexpr int resonanceOffset_DP = 4; // Offset of the first resonance into the parameter index array
26 // Offset is number of parameters, constant index, number of resonances (not calculable
27 // from nP because we don't know what the efficiency might need), and cache index. Efficiency
28 // parameters are after the resonance information.
30 // The function of this array is to hold all the cached waves; specific
31 // waves are recalculated when the corresponding resonance mass or width
32 // changes. Note that in a multithread environment each thread needs its
33 // own cache, hence the '10'. Ten threads should be enough for anyone!
35 // NOTE: This is does not support ten instances (ten threads) of resoncances now, only one set of resonances.
36 __device__ fpcomplex *cResonances[16];
38 __device__ inline int parIndexFromResIndex_DP(int resIndex) { return resonanceOffset_DP + resIndex * resonanceSize; }
41 device_DalitzPlot_calcIntegrals(fptype m12, fptype m13, int res_i, int res_j, fptype *p, unsigned int *indices) {
42 // Calculates BW_i(m12, m13) * BW_j^*(m12, m13).
43 // This calculation is in a separate function so
44 // it can be cached. Note that this function expects
45 // to be called on a normalisation grid, not on
46 // observed points, that's why it doesn't use
47 // cResonances. No need to cache the values at individual
48 // grid points - we only care about totals.
49 fptype motherMass = RO_CACHE(functorConstants[RO_CACHE(indices[1]) + 0]);
50 fptype daug1Mass = RO_CACHE(functorConstants[RO_CACHE(indices[1]) + 1]);
51 fptype daug2Mass = RO_CACHE(functorConstants[RO_CACHE(indices[1]) + 2]);
52 fptype daug3Mass = RO_CACHE(functorConstants[RO_CACHE(indices[1]) + 3]);
54 fpcomplex ret{0., 0.};
56 if(!inDalitz(m12, m13, motherMass, daug1Mass, daug2Mass, daug3Mass))
60 = motherMass * motherMass + daug1Mass * daug1Mass + daug2Mass * daug2Mass + daug3Mass * daug3Mass - m12 - m13;
62 int parameter_i = parIndexFromResIndex_DP(res_i);
63 unsigned int functn_i = RO_CACHE(indices[parameter_i + 2]);
64 unsigned int params_i = RO_CACHE(indices[parameter_i + 3]);
65 ret = getResonanceAmplitude(m12, m13, m23, functn_i, params_i);
67 int parameter_j = parIndexFromResIndex_DP(res_j);
68 unsigned int functn_j = RO_CACHE(indices[parameter_j + 2]);
69 unsigned int params_j = RO_CACHE(indices[parameter_j + 3]);
70 ret *= conj(getResonanceAmplitude(m12, m13, m23, functn_j, params_j));
75 __device__ fptype device_DalitzPlot(fptype *evt, fptype *p, unsigned int *indices) {
76 fptype motherMass = RO_CACHE(functorConstants[RO_CACHE(indices[1]) + 0]);
77 fptype daug1Mass = RO_CACHE(functorConstants[RO_CACHE(indices[1]) + 1]);
78 fptype daug2Mass = RO_CACHE(functorConstants[RO_CACHE(indices[1]) + 2]);
79 fptype daug3Mass = RO_CACHE(functorConstants[RO_CACHE(indices[1]) + 3]);
81 fptype m12 = RO_CACHE(evt[RO_CACHE(indices[2 + RO_CACHE(indices[0])])]);
82 fptype m13 = RO_CACHE(evt[RO_CACHE(indices[3 + RO_CACHE(indices[0])])]);
84 if(!inDalitz(m12, m13, motherMass, daug1Mass, daug2Mass, daug3Mass))
87 fptype evtIndex = RO_CACHE(evt[RO_CACHE(indices[4 + RO_CACHE(indices[0])])]);
89 auto evtNum = static_cast<int>(floor(0.5 + evtIndex));
91 fpcomplex totalAmp(0, 0);
92 unsigned int numResonances = RO_CACHE(indices[2]);
93 // unsigned int cacheToUse = RO_CACHE(indices[3]);
95 for(int i = 0; i < numResonances; ++i) {
96 int paramIndex = parIndexFromResIndex_DP(i);
98 = fpcomplex(RO_CACHE(p[RO_CACHE(indices[paramIndex + 0])]), RO_CACHE(p[RO_CACHE(indices[paramIndex + 1])]));
100 // potential performance improvement by
101 // double2 me = RO_CACHE(reinterpret_cast<double2*> (cResonances[i][evtNum]));
102 fpcomplex me = RO_CACHE(cResonances[i][evtNum]);
104 // fpcomplex matrixelement((cResonances[cacheToUse][evtNum*numResonances + i]).real,
105 // (cResonances[cacheToUse][evtNum*numResonances + i]).imag);
106 // fpcomplex matrixelement (me[0], me[1]);
108 totalAmp += amp * me;
111 fptype ret = thrust::norm(totalAmp);
112 int effFunctionIdx = parIndexFromResIndex_DP(numResonances);
113 fptype eff = callFunction(evt, RO_CACHE(indices[effFunctionIdx]), RO_CACHE(indices[effFunctionIdx + 1]));
116 // printf("DalitzPlot evt %i zero: %i %i %f (%f, %f).\n", evtNum, numResonances, effFunctionIdx, eff, totalAmp.real,
122 __device__ device_function_ptr ptr_to_DalitzPlot = device_DalitzPlot;
124 __host__ DalitzPlotPdf::DalitzPlotPdf(
125 std::string n, Observable m12, Observable m13, EventNumber eventNumber, DecayInfo3 decay, GooPdf *efficiency)
126 : GooPdf(n, m12, m13, eventNumber)
130 , _eventNumber(eventNumber)
131 , dalitzNormRange(nullptr)
134 , forceRedoIntegrals(true)
135 , totalEventSize(3) // Default 3 = m12, m13, evtNum
137 , integrators(nullptr)
138 , calculators(nullptr) {
139 fptype decayConstants[5];
141 for(auto &cachedWave : cachedWaves)
142 cachedWave = nullptr;
144 std::vector<unsigned int> pindices;
145 pindices.push_back(registerConstants(5));
146 decayConstants[0] = decayInfo.motherMass;
147 decayConstants[1] = decayInfo.daug1Mass;
148 decayConstants[2] = decayInfo.daug2Mass;
149 decayConstants[3] = decayInfo.daug3Mass;
150 decayConstants[4] = decayInfo.meson_radius;
152 functorConstants, decayConstants, 5 * sizeof(fptype), cIndex * sizeof(fptype), cudaMemcpyHostToDevice);
154 pindices.push_back(decayInfo.resonances.size());
155 static int cacheCount = 0;
156 cacheToUse = cacheCount++;
157 pindices.push_back(cacheToUse);
159 for(auto &resonance : decayInfo.resonances) {
160 pindices.push_back(registerParameter(resonance->amp_real));
161 pindices.push_back(registerParameter(resonance->amp_imag));
162 pindices.push_back(resonance->getFunctionIndex());
163 pindices.push_back(resonance->getParameterIndex());
164 resonance->setConstantIndex(cIndex);
165 components.push_back(resonance);
168 pindices.push_back(efficiency->getFunctionIndex());
169 pindices.push_back(efficiency->getParameterIndex());
170 components.push_back(efficiency);
172 GET_FUNCTION_ADDR(ptr_to_DalitzPlot);
173 initialize(pindices);
175 redoIntegral = new bool[decayInfo.resonances.size()];
176 cachedMasses = new fptype[decayInfo.resonances.size()];
177 cachedWidths = new fptype[decayInfo.resonances.size()];
178 integrals = new fpcomplex **[decayInfo.resonances.size()];
179 integrators = new SpecialResonanceIntegrator **[decayInfo.resonances.size()];
180 calculators = new SpecialResonanceCalculator *[decayInfo.resonances.size()];
182 for(int i = 0; i < decayInfo.resonances.size(); ++i) {
183 redoIntegral[i] = true;
184 cachedMasses[i] = -1;
185 cachedWidths[i] = -1;
186 integrators[i] = new SpecialResonanceIntegrator *[decayInfo.resonances.size()];
187 calculators[i] = new SpecialResonanceCalculator(parameters, i);
188 integrals[i] = new fpcomplex *[decayInfo.resonances.size()];
190 for(int j = 0; j < decayInfo.resonances.size(); ++j) {
191 integrals[i][j] = new fpcomplex(0, 0);
192 integrators[i][j] = new SpecialResonanceIntegrator(parameters, i, j);
196 addSpecialMask(PdfBase::ForceSeparateNorm);
199 __host__ void DalitzPlotPdf::setDataSize(unsigned int dataSize, unsigned int evtSize) {
200 // Default 3 is m12, m13, evtNum
201 totalEventSize = evtSize;
202 if(totalEventSize < 3)
203 throw GooFit::GeneralError("totalEventSize {} must be 3 or more", totalEventSize);
205 // if (cachedWaves) delete cachedWaves;
207 for(auto &cachedWave : cachedWaves) {
209 cachedWave = nullptr;
213 numEntries = dataSize;
215 for(int i = 0; i < 16; i++) {
217 cachedWaves[i] = new thrust::device_vector<fpcomplex>(m_iEventsPerTask);
219 cachedWaves[i] = new thrust::device_vector<fpcomplex>(dataSize);
221 void *dummy = thrust::raw_pointer_cast(cachedWaves[i]->data());
222 MEMCPY_TO_SYMBOL(cResonances, &dummy, sizeof(fpcomplex *), i * sizeof(fpcomplex *), cudaMemcpyHostToDevice);
228 __host__ fptype DalitzPlotPdf::normalize() const {
229 recursiveSetNormalisation(1); // Not going to normalize efficiency,
230 // so set normalisation factor to 1 so it doesn't get multiplied by zero.
231 // Copy at this time to ensure that the SpecialResonanceCalculators, which need the efficiency,
232 // don't get zeroes through multiplying by the normFactor.
233 MEMCPY_TO_SYMBOL(normalisationFactors, host_normalisation, totalParams * sizeof(fptype), 0, cudaMemcpyHostToDevice);
235 int totalBins = _m12.getNumBins() * _m13.getNumBins();
237 if(!dalitzNormRange) {
238 gooMalloc((void **)&dalitzNormRange, 6 * sizeof(fptype));
241 // This line runs once
242 static std::array<fptype, 6> host_norms{{0, 0, 0, 0, 0, 0}};
244 std::array<fptype, 6> current_host_norms{{_m12.getLowerLimit(),
245 _m12.getUpperLimit(),
246 static_cast<fptype>(_m12.getNumBins()),
247 _m13.getLowerLimit(),
248 _m13.getUpperLimit(),
249 static_cast<fptype>(_m13.getNumBins())}};
251 if(host_norms != current_host_norms) {
252 host_norms = current_host_norms;
253 MEMCPY(dalitzNormRange, host_norms.data(), 6 * sizeof(fptype), cudaMemcpyHostToDevice);
256 for(unsigned int i = 0; i < decayInfo.resonances.size(); ++i) {
257 redoIntegral[i] = forceRedoIntegrals;
259 if(!(decayInfo.resonances[i]->parametersChanged()))
262 redoIntegral[i] = true;
265 forceRedoIntegrals = false;
267 // Only do this bit if masses or widths have changed.
268 thrust::constant_iterator<fptype *> arrayAddress(dalitzNormRange);
269 thrust::counting_iterator<int> binIndex(0);
271 // NB, SpecialResonanceCalculator assumes that fit is unbinned!
272 // And it needs to know the total event size, not just observables
273 // for this particular PDF component.
274 thrust::constant_iterator<fptype *> dataArray(dev_event_array);
275 thrust::constant_iterator<int> eventSize(totalEventSize);
276 thrust::counting_iterator<int> eventIndex(0);
278 for(int i = 0; i < decayInfo.resonances.size(); ++i) {
279 if(redoIntegral[i]) {
282 thrust::make_zip_iterator(thrust::make_tuple(eventIndex, dataArray, eventSize)),
283 thrust::make_zip_iterator(thrust::make_tuple(eventIndex + m_iEventsPerTask, arrayAddress, eventSize)),
284 strided_range<thrust::device_vector<fpcomplex>::iterator>(
285 cachedWaves[i]->begin(), cachedWaves[i]->end(), 1)
290 thrust::make_zip_iterator(thrust::make_tuple(eventIndex, dataArray, eventSize)),
291 thrust::make_zip_iterator(thrust::make_tuple(eventIndex + numEntries, arrayAddress, eventSize)),
292 strided_range<thrust::device_vector<fpcomplex>::iterator>(
293 cachedWaves[i]->begin(), cachedWaves[i]->end(), 1)
299 // Possibly this can be done more efficiently by exploiting symmetry?
300 for(int j = 0; j < decayInfo.resonances.size(); ++j) {
301 if((!redoIntegral[i]) && (!redoIntegral[j]))
304 fpcomplex dummy(0, 0);
305 thrust::plus<fpcomplex> complexSum;
306 (*(integrals[i][j])) = thrust::transform_reduce(
307 thrust::make_zip_iterator(thrust::make_tuple(binIndex, arrayAddress)),
308 thrust::make_zip_iterator(thrust::make_tuple(binIndex + totalBins, arrayAddress)),
309 *(integrators[i][j]),
315 // End of time-consuming integrals.
316 fpcomplex sumIntegral(0, 0);
318 for(unsigned int i = 0; i < decayInfo.resonances.size(); ++i) {
319 int param_i = parameters + resonanceOffset_DP + resonanceSize * i;
320 fpcomplex amplitude_i(host_params[host_indices[param_i]], host_params[host_indices[param_i + 1]]);
322 for(unsigned int j = 0; j < decayInfo.resonances.size(); ++j) {
323 int param_j = parameters + resonanceOffset_DP + resonanceSize * j;
324 fpcomplex amplitude_j(host_params[host_indices[param_j]], -host_params[host_indices[param_j + 1]]);
325 // Notice complex conjugation
326 // printf("%f %f %f %f %f %f\n", amplitude_i.real(), amplitude_i.imag(), amplitude_j.real(),
327 // amplitude_j.imag(), (*(integrals[i][j])).real(), (*(integrals[i][j])).imag() );
328 sumIntegral += amplitude_i * amplitude_j * (*(integrals[i][j]));
332 fptype ret = sumIntegral.real(); // That complex number is a square, so it's fully real
333 double binSizeFactor = 1;
334 binSizeFactor *= _m12.getBinSize();
335 binSizeFactor *= _m13.getBinSize();
336 ret *= binSizeFactor;
338 host_normalisation[parameters] = 1.0 / ret;
339 // printf("%f %f\n", ret, binSizeFactor);
343 __host__ fpcomplex DalitzPlotPdf::sumCachedWave(size_t i) const {
344 const thrust::device_vector<fpcomplex> &vec = getCachedWaveNoCopy(i);
346 fpcomplex ret = thrust::reduce(vec.begin(), vec.end(), fpcomplex(0, 0), thrust::plus<fpcomplex>());
351 __host__ const std::vector<std::complex<fptype>> DalitzPlotPdf::getCachedWave(size_t i) const {
352 auto ret_thrust = getCachedWave(i);
353 std::vector<std::complex<fptype>> ret(ret_thrust.size());
354 thrust::copy(ret_thrust.begin(), ret_thrust.end(), ret.begin());
358 __host__ std::vector<std::vector<fptype>> DalitzPlotPdf::fit_fractions() {
359 GOOFIT_DEBUG("Performing fit fraction calculation, should already have a cache (does not use normalization grid)");
361 size_t n_res = getDecayInfo().resonances.size();
362 size_t nEntries = getCachedWaveNoCopy(0).size();
364 std::vector<fpcomplex> coefs(n_res);
365 std::transform(getDecayInfo().resonances.begin(),
366 getDecayInfo().resonances.end(),
368 [](ResonancePdf *res) { return fpcomplex(res->amp_real.getValue(), res->amp_imag.getValue()); });
370 fptype buffer_all = 0;
374 fpcomplex cached_i_val;
375 fpcomplex cached_j_val;
377 thrust::device_vector<fpcomplex> cached_i;
378 thrust::device_vector<fpcomplex> cached_j;
379 std::vector<std::vector<fptype>> Amps_int(n_res, std::vector<fptype>(n_res));
381 for(size_t i = 0; i < n_res; i++) {
382 for(size_t j = 0; j < n_res; j++) {
384 cached_i = getCachedWaveNoCopy(i);
385 cached_j = getCachedWaveNoCopy(j);
389 buffer += thrust::transform_reduce(
390 thrust::make_zip_iterator(thrust::make_tuple(cached_i.begin(), cached_j.begin())),
391 thrust::make_zip_iterator(thrust::make_tuple(cached_i.end(), cached_j.end())),
392 CoefSumFunctor(coef_i, coef_j),
394 thrust::plus<fptype>());
396 buffer_all += buffer;
397 Amps_int[i][j] = (buffer / nEntries);
401 fptype total_PDF = buffer_all / nEntries;
403 std::vector<std::vector<fptype>> ff(n_res, std::vector<fptype>(n_res));
405 for(size_t i = 0; i < n_res; i++)
406 for(size_t j = 0; j < n_res; j++)
407 ff[i][j] = (Amps_int[i][j] / total_PDF);
412 SpecialResonanceIntegrator::SpecialResonanceIntegrator(int pIdx, unsigned int ri, unsigned int rj)
415 , parameters(pIdx) {}
417 __device__ fpcomplex SpecialResonanceIntegrator::operator()(thrust::tuple<int, fptype *> t) const {
418 // Bin index, base address [lower, upper,getNumBins]
419 // Notice that this is basically MetricTaker::operator (binned) with the special-case knowledge
420 // that event size is two, and that the function to call is dev_DalitzPlot_calcIntegrals.
422 int globalBinNumber = thrust::get<0>(t);
423 fptype lowerBoundM12 = thrust::get<1>(t)[0];
424 fptype upperBoundM12 = thrust::get<1>(t)[1];
425 auto numBinsM12 = static_cast<int>(floor(thrust::get<1>(t)[2] + 0.5));
426 int binNumberM12 = globalBinNumber % numBinsM12;
427 fptype binCenterM12 = upperBoundM12 - lowerBoundM12;
428 binCenterM12 /= numBinsM12;
429 binCenterM12 *= (binNumberM12 + 0.5);
430 binCenterM12 += lowerBoundM12;
432 globalBinNumber /= numBinsM12;
433 fptype lowerBoundM13 = thrust::get<1>(t)[3];
434 fptype upperBoundM13 = thrust::get<1>(t)[4];
435 auto numBinsM13 = static_cast<int>(floor(thrust::get<1>(t)[5] + 0.5));
436 fptype binCenterM13 = upperBoundM13 - lowerBoundM13;
437 binCenterM13 /= numBinsM13;
438 binCenterM13 *= (globalBinNumber + 0.5);
439 binCenterM13 += lowerBoundM13;
441 unsigned int *indices = paramIndices + parameters;
443 = device_DalitzPlot_calcIntegrals(binCenterM12, binCenterM13, resonance_i, resonance_j, cudaArray, indices);
445 fptype fakeEvt[10]; // Need room for many observables in case m12 or m13 were assigned a high index in an
446 // event-weighted fit.
447 fakeEvt[indices[indices[0] + 2 + 0]] = binCenterM12;
448 fakeEvt[indices[indices[0] + 2 + 1]] = binCenterM13;
449 unsigned int numResonances = indices[2];
450 int effFunctionIdx = parIndexFromResIndex_DP(numResonances);
451 fptype eff = callFunction(fakeEvt, indices[effFunctionIdx], indices[effFunctionIdx + 1]);
453 // Multiplication by eff, not sqrt(eff), is correct:
454 // These complex numbers will not be squared when they
455 // go into the integrals. They've been squared already,
458 // printf("ret %f %f %f %f %f\n",binCenterM12, binCenterM13, ret.real, ret.imag, eff );
462 SpecialResonanceCalculator::SpecialResonanceCalculator(int pIdx, unsigned int res_idx)
463 : resonance_i(res_idx)
464 , parameters(pIdx) {}
466 __device__ fpcomplex SpecialResonanceCalculator::operator()(thrust::tuple<int, fptype *, int> t) const {
467 // Calculates the BW values for a specific resonance.
469 int evtNum = thrust::get<0>(t);
470 fptype *evt = thrust::get<1>(t) + (evtNum * thrust::get<2>(t));
472 unsigned int *indices = paramIndices + parameters; // Jump to DALITZPLOT position within parameters array
473 fptype m12 = evt[indices[2 + indices[0]]];
474 fptype m13 = evt[indices[3 + indices[0]]];
476 fptype motherMass = functorConstants[indices[1] + 0];
477 fptype daug1Mass = functorConstants[indices[1] + 1];
478 fptype daug2Mass = functorConstants[indices[1] + 2];
479 fptype daug3Mass = functorConstants[indices[1] + 3];
481 if(!inDalitz(m12, m13, motherMass, daug1Mass, daug2Mass, daug3Mass))
485 = motherMass * motherMass + daug1Mass * daug1Mass + daug2Mass * daug2Mass + daug3Mass * daug3Mass - m12 - m13;
488 = parIndexFromResIndex_DP(resonance_i); // Find position of this resonance relative to DALITZPLOT start
490 unsigned int functn_i = indices[parameter_i + 2];
491 unsigned int params_i = indices[parameter_i + 3];
493 ret = getResonanceAmplitude(m12, m13, m23, functn_i, params_i);
494 // printf("Amplitude %f %f %f (%f, %f)\n ", m12, m13, m23, ret.real, ret.imag);
498 } // namespace GooFit