1 #include <goofit/Error.h>
2 #include <goofit/PDFs/physics/TddpPdf.h>
4 #include <thrust/transform_reduce.h>
12 const int resonanceOffset = 8; // Offset of the first resonance into the parameter index array
13 // Offset is number of parameters, constant index, indices for tau, xmix, and ymix, index
14 // of resolution function, and finally number of resonances (not calculable from nP
15 // because we don't know what the efficiency and time resolution might need). Efficiency
16 // and time-resolution parameters are after the resonance information.
17 const unsigned int SPECIAL_RESOLUTION_FLAG = 999999999;
19 // The function of this array is to hold all the cached waves; specific
20 // waves are recalculated when the corresponding resonance mass or width
21 // changes. Note that in a multithread environment each thread needs its
22 // own cache, hence the '10'. Ten threads should be enough for anyone!
24 // NOTE: only one set of wave holders is supported currently.
25 __device__ WaveHolder_s *cWaves[16];
27 __device__ inline int parIndexFromResIndex(int resIndex) { return resonanceOffset + resIndex * resonanceSize; }
30 getResonanceAmplitude(fptype m12, fptype m13, fptype m23, unsigned int functionIdx, unsigned int pIndex) {
31 auto func = reinterpret_cast<resonance_function_ptr>(device_function_table[functionIdx]);
32 return (*func)(m12, m13, m23, paramIndices + pIndex);
35 __device__ ThreeComplex
36 device_Tddp_calcIntegrals(fptype m12, fptype m13, int res_i, int res_j, fptype *p, unsigned int *indices) {
37 // For calculating Dalitz-plot integrals. What's needed is the products
38 // AiAj*, AiBj*, and BiBj*, where
39 // Ai = BW_i(x, y) + BW_i(y, x)
40 // and Bi reverses the sign of the second BW.
41 // This function returns the above values at a single point.
42 // NB: Multiplication by efficiency is done by the calling function.
43 // Note that this function expects
44 // to be called on a normalisation grid, not on
45 // observed points, that's why it doesn't use
46 // cWaves. No need to cache the values at individual
47 // grid points - we only care about totals.
49 fptype motherMass = RO_CACHE(functorConstants[RO_CACHE(indices[1]) + 0]);
50 fptype daug1Mass = RO_CACHE(functorConstants[RO_CACHE(indices[1]) + 1]);
51 fptype daug2Mass = RO_CACHE(functorConstants[RO_CACHE(indices[1]) + 2]);
52 fptype daug3Mass = RO_CACHE(functorConstants[RO_CACHE(indices[1]) + 3]);
56 if(!inDalitz(m12, m13, motherMass, daug1Mass, daug2Mass, daug3Mass))
60 = motherMass * motherMass + daug1Mass * daug1Mass + daug2Mass * daug2Mass + daug3Mass * daug3Mass - m12 - m13;
62 int parameter_i = parIndexFromResIndex(res_i);
63 int parameter_j = parIndexFromResIndex(res_j);
65 // fptype amp_real = p[indices[parameter_i+0]];
66 // fptype amp_imag = p[indices[parameter_i+1]];
67 unsigned int functn_i = RO_CACHE(indices[parameter_i + 2]);
68 unsigned int params_i = RO_CACHE(indices[parameter_i + 3]);
69 fpcomplex ai = getResonanceAmplitude(m12, m13, m23, functn_i, params_i);
70 fpcomplex bi = getResonanceAmplitude(m13, m12, m23, functn_i, params_i);
72 unsigned int functn_j = RO_CACHE(indices[parameter_j + 2]);
73 unsigned int params_j = RO_CACHE(indices[parameter_j + 3]);
74 fpcomplex aj = conj(getResonanceAmplitude(m12, m13, m23, functn_j, params_j));
75 fpcomplex bj = conj(getResonanceAmplitude(m13, m12, m23, functn_j, params_j));
78 (ai * aj).real(), (ai * aj).imag(), (ai * bj).real(), (ai * bj).imag(), (bi * bj).real(), (bi * bj).imag());
82 __device__ fptype device_Tddp(fptype *evt, fptype *p, unsigned int *indices) {
83 fptype motherMass = RO_CACHE(functorConstants[RO_CACHE(indices[1]) + 0]);
84 fptype daug1Mass = RO_CACHE(functorConstants[RO_CACHE(indices[1]) + 1]);
85 fptype daug2Mass = RO_CACHE(functorConstants[RO_CACHE(indices[1]) + 2]);
86 fptype daug3Mass = RO_CACHE(functorConstants[RO_CACHE(indices[1]) + 3]);
88 fptype m12 = RO_CACHE(evt[RO_CACHE(indices[4 + RO_CACHE(indices[0])])]);
89 fptype m13 = RO_CACHE(evt[RO_CACHE(indices[5 + RO_CACHE(indices[0])])]);
91 if(!inDalitz(m12, m13, motherMass, daug1Mass, daug2Mass, daug3Mass))
94 auto evtNum = static_cast<int>(floor(0.5 + RO_CACHE(evt[indices[6 + RO_CACHE(indices[0])]])));
96 fpcomplex sumWavesA(0, 0);
97 fpcomplex sumWavesB(0, 0);
98 fpcomplex sumRateAA(0, 0);
99 fpcomplex sumRateAB(0, 0);
100 fpcomplex sumRateBB(0, 0);
102 unsigned int numResonances = RO_CACHE(indices[6]);
103 // unsigned int cacheToUse = RO_CACHE(indices[7]);
105 for(int i = 0; i < numResonances; ++i) {
106 int paramIndex = parIndexFromResIndex(i);
107 fpcomplex amp{RO_CACHE(p[RO_CACHE(indices[paramIndex + 0])]), RO_CACHE(p[RO_CACHE(indices[paramIndex + 1])])};
109 // fpcomplex matrixelement(thrust::get<0>(cWaves[cacheToUse][evtNum*numResonances + i]),
110 // thrust::get<1>(cWaves[cacheToUse][evtNum*numResonances + i]));
111 // Note, to make this more efficient we should change it to only an array of fptype's, and read double2 at a
113 fpcomplex ai{RO_CACHE(cWaves[i][evtNum].ai_real), RO_CACHE(cWaves[i][evtNum].ai_imag)};
114 fpcomplex bi{RO_CACHE(cWaves[i][evtNum].bi_real), RO_CACHE(cWaves[i][evtNum].bi_imag)};
116 fpcomplex matrixelement = ai * amp;
117 sumWavesA += matrixelement;
119 // matrixelement = fpcomplex(thrust::get<2>(cWaves[cacheToUse][evtNum*numResonances + i]),
120 // thrust::get<3>(cWaves[cacheToUse][evtNum*numResonances + i]));
121 matrixelement = bi * amp;
122 sumWavesB += matrixelement;
125 fptype _tau = RO_CACHE(p[RO_CACHE(indices[2])]);
126 fptype _xmixing = RO_CACHE(p[RO_CACHE(indices[3])]);
127 fptype _ymixing = RO_CACHE(p[RO_CACHE(indices[4])]);
129 fptype _time = RO_CACHE(evt[RO_CACHE(indices[2 + RO_CACHE(indices[0])])]);
130 fptype _sigma = RO_CACHE(evt[RO_CACHE(indices[3 + RO_CACHE(indices[0])])]);
132 // if ((gpuDebug & 1) && (0 == BLOCKIDX) && (0 == THREADIDX))
133 // if (0 == evtNum) printf("TDDP: (%f, %f) (%f, %f)\n", sumWavesA.real, sumWavesA.imag, sumWavesB.real,
135 // printf("TDDP: %f %f %f %f | %f %f %i\n", m12, m13, _time, _sigma, _xmixing, _tau, evtNum);
139 ret += (norm2(sumWavesA) + norm2(sumWavesB))*cosh(_ymixing * _time);
140 ret += (norm2(sumWavesA) - norm2(sumWavesB))*cos (_xmixing * _time);
141 sumWavesA *= conj(sumWavesB);
142 ret -= 2*sumWavesA.real * sinh(_ymixing * _time);
143 ret -= 2*sumWavesA.imag * sin (_xmixing * _time); // Notice sign difference wrt to Mikhail's code, because I have
148 fptype term1 = thrust::norm(sumWavesA) + thrust::norm(sumWavesB);
149 fptype term2 = thrust::norm(sumWavesA) - thrust::norm(sumWavesB);
150 sumWavesA *= conj(sumWavesB);
151 // printf("(%i, %i) TDDP: %f %f %f %f %f %f %f\n", BLOCKIDX, THREADIDX, term1, term2, sumWavesA.real,
152 // sumWavesA.imag, m12, m13, _tau);
154 // Cannot use callFunction on resolution function.
155 int effFunctionIdx = parIndexFromResIndex(numResonances);
156 int resFunctionIdx = RO_CACHE(indices[5]);
157 int resFunctionPar = 2 + effFunctionIdx;
161 if(resFunctionIdx == SPECIAL_RESOLUTION_FLAG) {
162 // In this case there are multiple resolution functions, they are stored after the efficiency function,
163 // and which one we use depends on the measured mother-particle mass.
165 fptype massd0 = RO_CACHE(evt[indices[7 + RO_CACHE(indices[0])]]);
166 fptype minMass = RO_CACHE(functorConstants[RO_CACHE(indices[1]) + 6]);
167 fptype md0Step = RO_CACHE(functorConstants[RO_CACHE(indices[1]) + 7]);
168 int res_to_use = (massd0 <= minMass) ? 0 : static_cast<int>(floor((massd0 - minMass) / md0Step));
169 int maxFcn = RO_CACHE(indices[2 + effFunctionIdx]);
171 if(res_to_use > maxFcn)
174 // Now calculate index of resolution function.
175 // At the end of the array are indices efficiency_function, efficiency_parameters, maxFcn, res_function_1,
176 // res_function_1_nP, par1, par2 ... res_function_2, res_function_2_nP, ...
177 res_to_use = 3 + effFunctionIdx + res_to_use * (2 + RO_CACHE(indices[effFunctionIdx + 4]));
178 // NB this assumes all resolution functions have the same number of parameters. The number
179 // of parameters in the first resolution function is stored in effFunctionIdx+3; add one to
180 // account for the index of the resolution function itself in the device function table, one
181 // to account for the number-of-parameters index, and this is the space taken up by each
182 // resolution function. Multiply by res_to_use to get the number of spaces to skip to get to
185 resFunctionIdx = RO_CACHE(indices[res_to_use]);
186 resFunctionPar = res_to_use + 1;
189 ret = (*(reinterpret_cast<device_resfunction_ptr>(device_function_table[resFunctionIdx])))(term1,
202 // For the reversed (mistagged) fraction, we make the
203 // interchange A <-> B. So term1 stays the same,
204 // term2 changes sign, and AB* becomes BA*.
205 // Efficiency remains the same for the mistagged part,
206 // because it depends on the momenta of the pi+ and pi-,
207 // which don't change even though we tagged a D0 as D0bar.
209 fptype mistag = RO_CACHE(functorConstants[RO_CACHE(indices[1]) + 5]);
211 if(mistag > 0) { // This should be either true or false for all events, so no branch is caused.
212 // See header file for explanation of 'mistag' variable - it is actually the probability
213 // of having the correct sign, given that we have a correctly reconstructed D meson.
214 mistag = RO_CACHE(evt[RO_CACHE(indices[md0_offset + 7 + RO_CACHE(indices[0])])]);
217 * (*(reinterpret_cast<device_resfunction_ptr>(device_function_table[resFunctionIdx])))(
228 &(indices[resFunctionPar]));
231 fptype eff = callFunction(evt, RO_CACHE(indices[effFunctionIdx]), RO_CACHE(indices[effFunctionIdx + 1]));
232 // internalDebug = 0;
238 __device__ device_function_ptr ptr_to_Tddp = device_Tddp;
240 __host__ TddpPdf::TddpPdf(std::string n,
245 EventNumber eventNumber,
247 MixingTimeResolution *r,
250 : GooPdf(n, _dtime, _sigmat, m12, m13, eventNumber)
255 , totalEventSize(5) // Default 5 = m12, m13, time, sigma_t, evtNum
257 for(auto &cachedWave : cachedWaves)
258 cachedWave = nullptr;
260 fptype decayConstants[6];
261 decayConstants[5] = 0;
264 registerObservable(*mistag);
266 decayConstants[5] = 1; // Flags existence of mistag
269 std::vector<unsigned int> pindices;
270 pindices.push_back(registerConstants(6));
271 decayConstants[0] = decayInfo.motherMass;
272 decayConstants[1] = decayInfo.daug1Mass;
273 decayConstants[2] = decayInfo.daug2Mass;
274 decayConstants[3] = decayInfo.daug3Mass;
275 decayConstants[4] = decayInfo.meson_radius;
277 functorConstants, decayConstants, 6 * sizeof(fptype), cIndex * sizeof(fptype), cudaMemcpyHostToDevice);
279 pindices.push_back(registerParameter(decayInfo._tau));
280 pindices.push_back(registerParameter(decayInfo._xmixing));
281 pindices.push_back(registerParameter(decayInfo._ymixing));
282 if(resolution->getDeviceFunction() < 0)
283 throw GooFit::GeneralError("The resolution device function index {} must be more than 0",
284 resolution->getDeviceFunction());
285 pindices.push_back(static_cast<unsigned int>(resolution->getDeviceFunction()));
286 pindices.push_back(decayInfo.resonances.size());
288 static int cacheCount = 0;
289 cacheToUse = cacheCount++;
290 pindices.push_back(cacheToUse);
292 for(auto &resonance : decayInfo.resonances) {
293 pindices.push_back(registerParameter(resonance->amp_real));
294 pindices.push_back(registerParameter(resonance->amp_imag));
295 pindices.push_back(resonance->getFunctionIndex());
296 pindices.push_back(resonance->getParameterIndex());
297 resonance->setConstantIndex(cIndex);
298 components.push_back(resonance);
301 pindices.push_back(efficiency->getFunctionIndex());
302 pindices.push_back(efficiency->getParameterIndex());
303 components.push_back(efficiency);
305 resolution->createParameters(pindices, this);
306 GET_FUNCTION_ADDR(ptr_to_Tddp);
307 initialize(pindices);
309 redoIntegral = new bool[decayInfo.resonances.size()];
310 cachedMasses = new fptype[decayInfo.resonances.size()];
311 cachedWidths = new fptype[decayInfo.resonances.size()];
312 integrals = new ThreeComplex **[decayInfo.resonances.size()];
313 integrators = new SpecialDalitzIntegrator **[decayInfo.resonances.size()];
314 calculators = new SpecialWaveCalculator *[decayInfo.resonances.size()];
316 for(int i = 0; i < decayInfo.resonances.size(); ++i) {
317 redoIntegral[i] = true;
318 cachedMasses[i] = -1;
319 cachedWidths[i] = -1;
320 integrators[i] = new SpecialDalitzIntegrator *[decayInfo.resonances.size()];
321 calculators[i] = new SpecialWaveCalculator(parameters, i);
322 integrals[i] = new ThreeComplex *[decayInfo.resonances.size()];
324 for(int j = 0; j < decayInfo.resonances.size(); ++j) {
325 integrals[i][j] = new ThreeComplex(0, 0, 0, 0, 0, 0);
326 integrators[i][j] = new SpecialDalitzIntegrator(parameters, i, j);
330 addSpecialMask(PdfBase::ForceSeparateNorm);
333 __host__ TddpPdf::TddpPdf(std::string n,
338 EventNumber eventNumber,
340 std::vector<MixingTimeResolution *> &r,
344 : GooPdf(n, _dtime, _sigmat, m12, m13, eventNumber, md0)
349 r[0]) // Only used for normalisation, which only depends on x and y - it doesn't matter which one we use.
350 , totalEventSize(6) // This case adds the D0 mass by default.
352 for(auto &cachedWave : cachedWaves)
353 cachedWave = nullptr;
355 fptype decayConstants[8];
356 decayConstants[5] = 0;
357 decayConstants[6] = md0.getLowerLimit();
358 decayConstants[7] = (md0.getUpperLimit() - md0.getLowerLimit()) / r.size();
361 registerObservable(*mistag);
363 decayConstants[5] = 1; // Flags existence of mistag
366 std::vector<unsigned int> pindices;
367 pindices.push_back(registerConstants(8));
368 decayConstants[0] = decayInfo.motherMass;
369 decayConstants[1] = decayInfo.daug1Mass;
370 decayConstants[2] = decayInfo.daug2Mass;
371 decayConstants[3] = decayInfo.daug3Mass;
372 decayConstants[4] = decayInfo.meson_radius;
374 functorConstants, decayConstants, 8 * sizeof(fptype), cIndex * sizeof(fptype), cudaMemcpyHostToDevice);
376 pindices.push_back(registerParameter(decayInfo._tau));
377 pindices.push_back(registerParameter(decayInfo._xmixing));
378 pindices.push_back(registerParameter(decayInfo._ymixing));
379 pindices.push_back(SPECIAL_RESOLUTION_FLAG); // Flag existence of multiple resolution functions.
380 pindices.push_back(decayInfo.resonances.size());
382 static int cacheCount = 0;
383 cacheToUse = cacheCount++;
384 pindices.push_back(cacheToUse);
386 for(auto &resonance : decayInfo.resonances) {
387 pindices.push_back(registerParameter(resonance->amp_real));
388 pindices.push_back(registerParameter(resonance->amp_imag));
389 pindices.push_back(resonance->getFunctionIndex());
390 pindices.push_back(resonance->getParameterIndex());
391 resonance->setConstantIndex(cIndex);
392 components.push_back(resonance);
395 pindices.push_back(efficiency->getFunctionIndex());
396 pindices.push_back(efficiency->getParameterIndex());
397 components.push_back(efficiency);
399 pindices.push_back(r.size() - 1); // Highest index, not number of functions.
402 if(i->getDeviceFunction() < 0)
403 throw GooFit::GeneralError("Device function index {} must be more than 0", i->getDeviceFunction());
404 pindices.push_back(static_cast<unsigned int>(i->getDeviceFunction()));
405 i->createParameters(pindices, this);
408 GET_FUNCTION_ADDR(ptr_to_Tddp);
409 initialize(pindices);
411 redoIntegral = new bool[decayInfo.resonances.size()];
412 cachedMasses = new fptype[decayInfo.resonances.size()];
413 cachedWidths = new fptype[decayInfo.resonances.size()];
414 integrals = new ThreeComplex **[decayInfo.resonances.size()];
415 integrators = new SpecialDalitzIntegrator **[decayInfo.resonances.size()];
416 calculators = new SpecialWaveCalculator *[decayInfo.resonances.size()];
418 for(int i = 0; i < decayInfo.resonances.size(); ++i) {
419 redoIntegral[i] = true;
420 cachedMasses[i] = -1;
421 cachedWidths[i] = -1;
422 integrators[i] = new SpecialDalitzIntegrator *[decayInfo.resonances.size()];
423 calculators[i] = new SpecialWaveCalculator(parameters, i);
424 integrals[i] = new ThreeComplex *[decayInfo.resonances.size()];
426 for(int j = 0; j < decayInfo.resonances.size(); ++j) {
427 integrals[i][j] = new ThreeComplex(0, 0, 0, 0, 0, 0);
428 integrators[i][j] = new SpecialDalitzIntegrator(parameters, i, j);
432 addSpecialMask(PdfBase::ForceSeparateNorm);
435 __host__ void TddpPdf::setDataSize(unsigned int dataSize, unsigned int evtSize) {
436 // Default 5 is m12, m13, time, sigma_t, evtNum
437 totalEventSize = evtSize;
438 if(totalEventSize < 5)
439 throw GooFit::GeneralError("totalEventSize {} must be 5 or more", totalEventSize);
442 for(auto &cachedWave : cachedWaves)
446 numEntries = dataSize;
448 // Ideally this would not be required, this would be called AFTER setData which will set m_iEventsPerTask
451 MPI_Comm_size(MPI_COMM_WORLD, &numProcs);
452 MPI_Comm_rank(MPI_COMM_WORLD, &myId);
454 int perTask = numEntries / numProcs;
456 int *counts = new int[numProcs];
458 for(int i = 0; i < numProcs - 1; i++)
461 counts[numProcs - 1] = numEntries - perTask * (numProcs - 1);
463 setNumPerTask(this, counts[myId]);
468 for(int i = 0; i < 16; i++) {
470 cachedWaves[i] = new thrust::device_vector<WaveHolder_s>(m_iEventsPerTask);
472 cachedWaves[i] = new thrust::device_vector<WaveHolder_s>(dataSize);
474 void *dummy = thrust::raw_pointer_cast(cachedWaves[i]->data());
475 MEMCPY_TO_SYMBOL(cWaves, &dummy, sizeof(WaveHolder_s *), i * sizeof(WaveHolder_s *), cudaMemcpyHostToDevice);
481 __host__ fptype TddpPdf::normalize() const {
482 recursiveSetNormalisation(1); // Not going to normalize efficiency,
483 // so set normalisation factor to 1 so it doesn't get multiplied by zero.
484 // Copy at this time to ensure that the SpecialWaveCalculators, which need the efficiency,
485 // don't get zeroes through multiplying by the normFactor.
486 MEMCPY_TO_SYMBOL(normalisationFactors, host_normalisation, totalParams * sizeof(fptype), 0, cudaMemcpyHostToDevice);
487 // std::cout << "TDDP normalisation " << getName() << std::endl;
489 int totalBins = _m12.getNumBins() * _m13.getNumBins();
491 if(!dalitzNormRange) {
492 gooMalloc((void **)&dalitzNormRange, 6 * sizeof(fptype));
494 auto *host_norms = new fptype[6];
495 host_norms[0] = _m12.getLowerLimit();
496 host_norms[1] = _m12.getUpperLimit();
497 host_norms[2] = _m12.getNumBins();
498 host_norms[3] = _m13.getLowerLimit();
499 host_norms[4] = _m13.getUpperLimit();
500 host_norms[5] = _m13.getNumBins();
501 MEMCPY(dalitzNormRange, host_norms, 6 * sizeof(fptype), cudaMemcpyHostToDevice);
505 for(unsigned int i = 0; i < decayInfo.resonances.size(); ++i) {
506 redoIntegral[i] = forceRedoIntegrals;
508 if(!(decayInfo.resonances[i]->parametersChanged()))
511 redoIntegral[i] = true;
514 forceRedoIntegrals = false;
516 // Only do this bit if masses or widths have changed.
517 thrust::constant_iterator<fptype *> arrayAddress(dalitzNormRange);
518 thrust::counting_iterator<int> binIndex(0);
520 // NB, SpecialWaveCalculator assumes that fit is unbinned!
521 // And it needs to know the total event size, not just observables
522 // for this particular PDF component.
523 thrust::constant_iterator<fptype *> dataArray(dev_event_array);
524 thrust::constant_iterator<int> eventSize(totalEventSize);
525 thrust::counting_iterator<int> eventIndex(0);
527 static int normCall = 0;
530 for(int i = 0; i < decayInfo.resonances.size(); ++i) {
531 if(redoIntegral[i]) {
534 thrust::make_zip_iterator(thrust::make_tuple(eventIndex, dataArray, eventSize)),
535 thrust::make_zip_iterator(thrust::make_tuple(eventIndex + m_iEventsPerTask, arrayAddress, eventSize)),
536 strided_range<thrust::device_vector<WaveHolder_s>::iterator>(
537 cachedWaves[i]->begin(), cachedWaves[i]->end(), 1)
542 thrust::make_zip_iterator(thrust::make_tuple(eventIndex, dataArray, eventSize)),
543 thrust::make_zip_iterator(thrust::make_tuple(eventIndex + numEntries, arrayAddress, eventSize)),
544 strided_range<thrust::device_vector<WaveHolder_s>::iterator>(
545 cachedWaves[i]->begin(), cachedWaves[i]->end(), 1)
549 // std::cout << "Integral for resonance " << i << " " << numEntries << " " << totalEventSize << std::endl;
552 // Possibly this can be done more efficiently by exploiting symmetry?
553 for(int j = 0; j < decayInfo.resonances.size(); ++j) {
554 if((!redoIntegral[i]) && (!redoIntegral[j]))
557 ThreeComplex dummy(0, 0, 0, 0, 0, 0);
558 SpecialComplexSum complexSum;
559 (*(integrals[i][j])) = thrust::transform_reduce(
560 thrust::make_zip_iterator(thrust::make_tuple(binIndex, arrayAddress)),
561 thrust::make_zip_iterator(thrust::make_tuple(binIndex + totalBins, arrayAddress)),
562 *(integrators[i][j]),
566 std::cout << "With resonance " << j << ": "
567 << thrust::get<0>(*(integrals[i][j])) << " "
568 << thrust::get<1>(*(integrals[i][j])) << " "
569 << thrust::get<2>(*(integrals[i][j])) << " "
570 << thrust::get<3>(*(integrals[i][j])) << " "
571 << thrust::get<4>(*(integrals[i][j])) << " "
572 << thrust::get<5>(*(integrals[i][j])) << std::endl;
577 // End of time-consuming integrals.
579 fpcomplex integralA_2(0, 0);
580 fpcomplex integralB_2(0, 0);
581 fpcomplex integralABs(0, 0);
583 for(unsigned int i = 0; i < decayInfo.resonances.size(); ++i) {
584 int param_i = parameters + resonanceOffset + resonanceSize * i;
585 fpcomplex amplitude_i(host_params[host_indices[param_i]], host_params[host_indices[param_i + 1]]);
587 for(unsigned int j = 0; j < decayInfo.resonances.size(); ++j) {
588 int param_j = parameters + resonanceOffset + resonanceSize * j;
589 fpcomplex amplitude_j(host_params[host_indices[param_j]],
590 -host_params[host_indices[param_j + 1]]); // Notice complex conjugation
592 integralA_2 += (amplitude_i * amplitude_j
593 * fpcomplex(thrust::get<0>(*(integrals[i][j])), thrust::get<1>(*(integrals[i][j]))));
594 integralABs += (amplitude_i * amplitude_j
595 * fpcomplex(thrust::get<2>(*(integrals[i][j])), thrust::get<3>(*(integrals[i][j]))));
596 integralB_2 += (amplitude_i * amplitude_j
597 * fpcomplex(thrust::get<4>(*(integrals[i][j])), thrust::get<5>(*(integrals[i][j]))));
601 int idx = i * decayInfo.resonances.size() + j;
602 if (0 == host_callnumber) std::cout << "Integral contribution " << i << ", " << j << " " << idx << " : "
603 << amplitude_i << " "
604 << amplitude_j << " ("
605 << real(amplitude_i * amplitude_j * complex<fptype>(thrust::get<0>(*(integrals[i][j])),
606 thrust::get<1>(*(integrals[i][j])))) << ", "
607 << imag(amplitude_i * amplitude_j * complex<fptype>(thrust::get<0>(*(integrals[i][j])),
608 thrust::get<1>(*(integrals[i][j])))) << ") ("
609 << real(amplitude_i * amplitude_j * complex<fptype>(thrust::get<2>(*(integrals[i][j])),
610 thrust::get<3>(*(integrals[i][j])))) << ", "
611 << imag(amplitude_i * amplitude_j * complex<fptype>(thrust::get<2>(*(integrals[i][j])),
612 thrust::get<3>(*(integrals[i][j])))) << ") ("
613 << real(amplitude_i * amplitude_j * complex<fptype>(thrust::get<4>(*(integrals[i][j])),
614 thrust::get<5>(*(integrals[i][j])))) << ", "
615 << imag(amplitude_i * amplitude_j * complex<fptype>(thrust::get<4>(*(integrals[i][j])),
616 thrust::get<5>(*(integrals[i][j])))) << ") "
617 << thrust::get<0>(*(integrals[i][j])) << ", "
618 << thrust::get<1>(*(integrals[i][j])) << ") ("
619 << thrust::get<2>(*(integrals[i][j])) << ", "
620 << thrust::get<3>(*(integrals[i][j])) << ") ("
621 << thrust::get<4>(*(integrals[i][j])) << ", "
622 << thrust::get<5>(*(integrals[i][j])) << ") ("
623 << real(integralA_2) << ", " << imag(integralA_2) << ") "
630 double dalitzIntegralOne = integralA_2.real(); // Notice that this is already the abs2, so it's real by
631 // construction; but the compiler doesn't know that.
632 double dalitzIntegralTwo = integralB_2.real();
633 double dalitzIntegralThr = integralABs.real();
634 double dalitzIntegralFou = integralABs.imag();
636 fptype tau = host_params[host_indices[parameters + 2]];
637 fptype xmixing = host_params[host_indices[parameters + 3]];
638 fptype ymixing = host_params[host_indices[parameters + 4]];
640 fptype ret = resolution->normalisation(
641 dalitzIntegralOne, dalitzIntegralTwo, dalitzIntegralThr, dalitzIntegralFou, tau, xmixing, ymixing);
643 double binSizeFactor = 1;
644 binSizeFactor *= ((_m12.getUpperLimit() - _m12.getLowerLimit()) / _m12.getNumBins());
645 binSizeFactor *= ((_m13.getUpperLimit() - _m13.getLowerLimit()) / _m13.getNumBins());
646 ret *= binSizeFactor;
648 host_normalisation[parameters] = 1.0 / ret;
649 // std::cout << "End of TDDP normalisation: " << ret << " " << host_normalisation[parameters] << " " <<
650 // binSizeFactor << std::endl;
655 SpecialDalitzIntegrator::SpecialDalitzIntegrator(int pIdx, unsigned int ri, unsigned int rj)
658 , parameters(pIdx) {}
660 __device__ ThreeComplex SpecialDalitzIntegrator::operator()(thrust::tuple<int, fptype *> t) const {
661 // Bin index, base address [lower, upper,getNumBins]
662 // Notice that this is basically MetricTaker::operator (binned) with the special-case knowledge
663 // that event size is two, and that the function to call is dev_Tddp_calcIntegrals.
665 int globalBinNumber = thrust::get<0>(t);
666 fptype lowerBoundM12 = thrust::get<1>(t)[0];
667 fptype upperBoundM12 = thrust::get<1>(t)[1];
668 auto numBinsM12 = static_cast<int>(floor(thrust::get<1>(t)[2] + 0.5));
669 int binNumberM12 = globalBinNumber % numBinsM12;
670 fptype binCenterM12 = upperBoundM12 - lowerBoundM12;
671 binCenterM12 /= numBinsM12;
672 binCenterM12 *= (binNumberM12 + 0.5);
673 binCenterM12 += lowerBoundM12;
675 globalBinNumber /= numBinsM12;
676 fptype lowerBoundM13 = thrust::get<1>(t)[3];
677 fptype upperBoundM13 = thrust::get<1>(t)[4];
678 auto numBinsM13 = static_cast<int>(floor(thrust::get<1>(t)[5] + 0.5));
679 fptype binCenterM13 = upperBoundM13 - lowerBoundM13;
680 binCenterM13 /= numBinsM13;
681 binCenterM13 *= (globalBinNumber + 0.5);
682 binCenterM13 += lowerBoundM13;
684 // if (0 == THREADIDX) cuPrintf("%i %i %i %f %f operator\n", thrust::get<0>(t), thrust::get<0>(t) % numBinsM12,
685 // globalBinNumber, binCenterM12, binCenterM13);
686 unsigned int *indices = paramIndices + parameters;
688 = device_Tddp_calcIntegrals(binCenterM12, binCenterM13, resonance_i, resonance_j, cudaArray, indices);
690 fptype fakeEvt[10]; // Need room for many observables in case m12 or m13 were assigned a high index in an
691 // event-weighted fit.
692 fakeEvt[RO_CACHE(indices[RO_CACHE(indices[0]) + 2 + 2])] = binCenterM12;
693 fakeEvt[RO_CACHE(indices[RO_CACHE(indices[0]) + 2 + 3])] = binCenterM13;
694 unsigned int numResonances = indices[6];
695 int effFunctionIdx = parIndexFromResIndex(numResonances);
696 // if (thrust::get<0>(t) == 19840) {internalDebug1 = BLOCKIDX; internalDebug2 = THREADIDX;}
697 // fptype eff = (*(reinterpret_cast<device_function_ptr>(device_function_table[indices[effFunctionIdx]])))(fakeEvt,
698 // cudaArray, paramIndices + indices[effFunctionIdx + 1]);
699 fptype eff = callFunction(fakeEvt, RO_CACHE(indices[effFunctionIdx]), RO_CACHE(indices[effFunctionIdx + 1]));
700 // if (thrust::get<0>(t) == 19840) {
701 // internalDebug1 = -1;
702 // internalDebug2 = -1;
703 // printf("Efficiency: %i %f %f %f %i\n", thrust::get<0>(t), binCenterM12, binCenterM13, eff, effFunctionIdx);
704 // printf("Efficiency: %f %f %f %f %f %i %i\n", fakeEvt[0], fakeEvt[1], fakeEvt[2], fakeEvt[3], fakeEvt[4],
705 // indices[indices[0] + 2 + 2], indices[indices[0] + 2 + 3]);
708 // Multiplication by eff, not sqrt(eff), is correct:
709 // These complex numbers will not be squared when they
710 // go into the integrals. They've been squared already,
712 thrust::get<0>(ret) *= eff;
713 thrust::get<1>(ret) *= eff;
714 thrust::get<2>(ret) *= eff;
715 thrust::get<3>(ret) *= eff;
716 thrust::get<4>(ret) *= eff;
717 thrust::get<5>(ret) *= eff;
721 SpecialWaveCalculator::SpecialWaveCalculator(int pIdx, unsigned int res_idx)
722 : resonance_i(res_idx)
723 , parameters(pIdx) {}
725 __device__ WaveHolder_s SpecialWaveCalculator::operator()(thrust::tuple<int, fptype *, int> t) const {
726 // Calculates the BW values for a specific resonance.
727 // The 'A' wave stores the value at each point, the 'B'
728 // at the opposite (reversed) point.
736 int evtNum = thrust::get<0>(t);
737 fptype *evt = thrust::get<1>(t) + (evtNum * thrust::get<2>(t));
739 unsigned int *indices = paramIndices + parameters; // Jump to TDDP position within parameters array
740 fptype m12 = RO_CACHE(evt[indices[4 + indices[0]]]);
741 fptype m13 = RO_CACHE(evt[indices[5 + indices[0]]]);
743 fptype motherMass = functorConstants[indices[1] + 0];
744 fptype daug1Mass = functorConstants[indices[1] + 1];
745 fptype daug2Mass = functorConstants[indices[1] + 2];
746 fptype daug3Mass = functorConstants[indices[1] + 3];
748 if(!inDalitz(m12, m13, motherMass, daug1Mass, daug2Mass, daug3Mass))
752 = motherMass * motherMass + daug1Mass * daug1Mass + daug2Mass * daug2Mass + daug3Mass * daug3Mass - m12 - m13;
754 int parameter_i = parIndexFromResIndex(resonance_i); // Find position of this resonance relative to TDDP start
755 unsigned int functn_i = indices[parameter_i + 2];
756 unsigned int params_i = indices[parameter_i + 3];
758 fpcomplex ai = getResonanceAmplitude(m12, m13, m23, functn_i, params_i);
759 fpcomplex bi = getResonanceAmplitude(m13, m12, m23, functn_i, params_i);
761 // printf("Amplitudes %f, %f => (%f %f) (%f %f)\n", m12, m13, ai.real, ai.imag, bi.real, bi.imag);
763 ret.ai_real = ai.real();
764 ret.ai_imag = ai.imag();
765 ret.bi_real = bi.real();
766 ret.bi_imag = bi.imag();
771 } // namespace GooFit