1 #include <goofit/Error.h>
2 #include <goofit/PDFs/physics/IncoherentSumPdf.h>
3 #include <goofit/PDFs/physics/ResonancePdf.h>
5 #include <thrust/transform_reduce.h>
9 const int resonanceOffset_incoherent = 4; // Offset of the first resonance into the parameter index array.
10 // Notice that this is different from the TddpPdf case because there's no time information.
11 // In particular the offset consists of nP, constant index, number of resonances, and cache index.
13 __device__ fpcomplex *cResonanceValues[10];
15 __device__ inline int parIndexFromResIndex_incoherent(int resIndex) {
16 return resonanceOffset_incoherent + resIndex * resonanceSize;
19 __device__ fptype device_incoherent(fptype *evt, fptype *p, unsigned int *indices) {
20 // Calculates the incoherent sum over the resonances.
21 auto evtNum = static_cast<int>(floor(0.5 + evt[indices[4 + indices[0]]]));
24 unsigned int numResonances = indices[2];
25 unsigned int cacheToUse = indices[3];
27 for(int i = 0; i < numResonances; ++i) {
28 int paramIndex = parIndexFromResIndex_incoherent(i);
29 fptype amplitude = p[indices[paramIndex + 0]];
31 fpcomplex matrixelement = cResonanceValues[cacheToUse][evtNum * numResonances + i];
32 ret += amplitude * thrust::norm(matrixelement);
35 // Multiply by efficiency
36 int effFunctionIdx = parIndexFromResIndex_incoherent(numResonances);
37 fptype eff = callFunction(evt, indices[effFunctionIdx], indices[effFunctionIdx + 1]);
44 __device__ device_function_ptr ptr_to_incoherent = device_incoherent;
46 __host__ IncoherentSumPdf::IncoherentSumPdf(
47 std::string n, Observable m12, Observable m13, EventNumber eventNumber, DecayInfo3 decay, GooPdf *eff)
48 : GooPdf(n, m12, m13, eventNumber)
52 , dalitzNormRange(nullptr)
53 , cachedResonances(nullptr)
55 , forceRedoIntegrals(true)
56 , totalEventSize(3) // Default 3 = m12, m13, evtNum. Will likely be overridden.
59 , integrators(nullptr)
60 , calculators(nullptr) {
61 std::vector<unsigned int> pindices;
62 pindices.push_back(registerConstants(5));
63 fptype decayConstants[5];
64 decayConstants[0] = decayInfo.motherMass;
65 decayConstants[1] = decayInfo.daug1Mass;
66 decayConstants[2] = decayInfo.daug2Mass;
67 decayConstants[3] = decayInfo.daug3Mass;
68 decayConstants[4] = decayInfo.meson_radius;
70 functorConstants, decayConstants, 5 * sizeof(fptype), cIndex * sizeof(fptype), cudaMemcpyHostToDevice);
72 pindices.push_back(decayInfo.resonances.size());
73 static int cacheCount = 0;
74 cacheToUse = cacheCount++;
75 pindices.push_back(cacheToUse);
77 for(auto &resonance : decayInfo.resonances) {
78 pindices.push_back(registerParameter(resonance->amp_real));
79 pindices.push_back(registerParameter(resonance->amp_real));
80 // Not going to use amp_imag, but need a dummy index so the resonance size will be consistent.
81 pindices.push_back(resonance->getFunctionIndex());
82 pindices.push_back(resonance->getParameterIndex());
83 resonance->setConstantIndex(cIndex);
84 components.push_back(resonance);
87 pindices.push_back(efficiency->getFunctionIndex());
88 pindices.push_back(efficiency->getParameterIndex());
89 components.push_back(efficiency);
91 GET_FUNCTION_ADDR(ptr_to_incoherent);
94 redoIntegral = new bool[decayInfo.resonances.size()];
95 cachedMasses = new fptype[decayInfo.resonances.size()];
96 cachedWidths = new fptype[decayInfo.resonances.size()];
97 integrals = new double[decayInfo.resonances.size()];
99 for(int i = 0; i < decayInfo.resonances.size(); ++i) {
100 redoIntegral[i] = true;
101 cachedMasses[i] = -1;
102 cachedWidths[i] = -1;
106 integrators = new SpecialIncoherentIntegrator *[decayInfo.resonances.size()];
107 calculators = new SpecialIncoherentResonanceCalculator *[decayInfo.resonances.size()];
109 for(int i = 0; i < decayInfo.resonances.size(); ++i) {
110 integrators[i] = new SpecialIncoherentIntegrator(parameters, i);
111 calculators[i] = new SpecialIncoherentResonanceCalculator(parameters, i);
114 addSpecialMask(PdfBase::ForceSeparateNorm);
117 __host__ void IncoherentSumPdf::setDataSize(unsigned int dataSize, unsigned int evtSize) {
118 // Default 3 is m12, m13, evtNum
119 totalEventSize = evtSize;
120 if(totalEventSize < 3)
121 throw GooFit::GeneralError("totalEventSize {} must be 3 or more", totalEventSize);
123 if(cachedResonances) {
124 delete cachedResonances;
127 numEntries = dataSize;
128 cachedResonances = new thrust::device_vector<fpcomplex>(dataSize * decayInfo.resonances.size());
129 void *dummy = thrust::raw_pointer_cast(cachedResonances->data());
131 cResonanceValues, &dummy, sizeof(fpcomplex *), cacheToUse * sizeof(fpcomplex *), cudaMemcpyHostToDevice);
135 __host__ fptype IncoherentSumPdf::normalize() const {
136 recursiveSetNormalisation(1); // Not going to normalize efficiency,
137 // so set normalisation factor to 1 so it doesn't get multiplied by zero.
138 // Copy at this time to ensure that the SpecialCalculators, which need the efficiency,
139 // don't get zeroes through multiplying by the normFactor.
140 MEMCPY_TO_SYMBOL(normalisationFactors, host_normalisation, totalParams * sizeof(fptype), 0, cudaMemcpyHostToDevice);
142 int totalBins = _m12.getNumBins() * _m13.getNumBins();
144 if(!dalitzNormRange) {
145 gooMalloc((void **)&dalitzNormRange, 6 * sizeof(fptype));
147 auto *host_norms = new fptype[6];
148 host_norms[0] = _m12.getLowerLimit();
149 host_norms[1] = _m12.getUpperLimit();
150 host_norms[2] = _m12.getNumBins();
151 host_norms[3] = _m13.getLowerLimit();
152 host_norms[4] = _m13.getUpperLimit();
153 host_norms[5] = _m13.getNumBins();
154 MEMCPY(dalitzNormRange, host_norms, 6 * sizeof(fptype), cudaMemcpyHostToDevice);
158 // Check if efficiency changes force redoing the integrals.
159 if(efficiency->parametersChanged()) {
160 forceRedoIntegrals = true;
163 // Check for changed masses or forced integral redo.
164 for(unsigned int i = 0; i < decayInfo.resonances.size(); ++i) {
165 redoIntegral[i] = forceRedoIntegrals;
167 if(!(decayInfo.resonances[i]->parametersChanged()))
170 redoIntegral[i] = true;
173 forceRedoIntegrals = false;
175 thrust::constant_iterator<fptype *> arrayAddress(dalitzNormRange);
176 thrust::counting_iterator<int> binIndex(0);
178 // NB, SpecialIncoherentResonanceCalculator assumes that fit is unbinned!
179 // And it needs to know the total event size, not just observables
180 // for this particular PDF component.
181 thrust::constant_iterator<fptype *> dataArray(dev_event_array);
182 thrust::constant_iterator<int> eventSize(totalEventSize);
183 thrust::counting_iterator<int> eventIndex(0);
185 for(int i = 0; i < decayInfo.resonances.size(); ++i) {
186 if(redoIntegral[i]) {
188 thrust::make_zip_iterator(thrust::make_tuple(eventIndex, dataArray, eventSize)),
189 thrust::make_zip_iterator(thrust::make_tuple(eventIndex + numEntries, arrayAddress, eventSize)),
190 strided_range<thrust::device_vector<fpcomplex>::iterator>(
191 cachedResonances->begin() + i, cachedResonances->end(), decayInfo.resonances.size())
196 static thrust::plus<fptype> cudaPlus;
197 integrals[i] = thrust::transform_reduce(
198 thrust::make_zip_iterator(thrust::make_tuple(binIndex, arrayAddress)),
199 thrust::make_zip_iterator(thrust::make_tuple(binIndex + totalBins, arrayAddress)),
206 // End of time-consuming integrals and caching of BWs over Dalitz plot.
210 for(unsigned int i = 0; i < decayInfo.resonances.size(); ++i) {
211 int param_i = parameters + resonanceOffset_incoherent + resonanceSize * i;
212 fptype amplitude = host_params[host_indices[param_i]];
213 ret += amplitude * integrals[i];
216 double binSizeFactor = 1;
217 binSizeFactor *= _m12.getBinSize();
218 binSizeFactor *= _m13.getBinSize();
219 ret *= binSizeFactor;
221 host_normalisation[parameters] = 1.0 / ret;
225 SpecialIncoherentIntegrator::SpecialIncoherentIntegrator(int pIdx, unsigned int ri)
227 , parameters(pIdx) {}
229 __device__ fptype SpecialIncoherentIntegrator::operator()(thrust::tuple<int, fptype *> t) const {
230 // Returns integral of specific BW over Dalitz plot, to be cached and
231 // multiplied by rapidly-changing amplitude.
233 // Bin index, base address [lower, upper,getNumBins]
234 // Notice that this is basically MetricTaker::operator (binned) with the special-case knowledge
235 // that event size is two, and that the function to call is getResonanceAmplitude.
237 int globalBinNumber = thrust::get<0>(t);
238 fptype lowerBoundM12 = thrust::get<1>(t)[0];
239 fptype upperBoundM12 = thrust::get<1>(t)[1];
240 auto numBinsM12 = static_cast<int>(floor(thrust::get<1>(t)[2] + 0.5));
241 int binNumberM12 = globalBinNumber % numBinsM12;
242 fptype binCenterM12 = upperBoundM12 - lowerBoundM12;
243 binCenterM12 /= numBinsM12;
244 binCenterM12 *= (binNumberM12 + 0.5);
245 binCenterM12 += lowerBoundM12;
247 globalBinNumber /= numBinsM12;
248 fptype lowerBoundM13 = thrust::get<1>(t)[3];
249 fptype upperBoundM13 = thrust::get<1>(t)[4];
250 auto numBinsM13 = static_cast<int>(floor(thrust::get<1>(t)[5] + 0.5));
251 fptype binCenterM13 = upperBoundM13 - lowerBoundM13;
252 binCenterM13 /= numBinsM13;
253 binCenterM13 *= (globalBinNumber + 0.5);
254 binCenterM13 += lowerBoundM13;
256 unsigned int *indices = paramIndices + parameters;
257 fptype motherMass = functorConstants[indices[1] + 0];
258 fptype daug1Mass = functorConstants[indices[1] + 1];
259 fptype daug2Mass = functorConstants[indices[1] + 2];
260 fptype daug3Mass = functorConstants[indices[1] + 3];
262 if(!inDalitz(binCenterM12, binCenterM13, motherMass, daug1Mass, daug2Mass, daug3Mass))
266 = parIndexFromResIndex_incoherent(resonance_i); // Find position of this resonance relative to TDDP start
267 unsigned int functn_i = indices[parameter_i + 2];
268 unsigned int params_i = indices[parameter_i + 3];
269 fptype m23 = motherMass * motherMass + daug1Mass * daug1Mass + daug2Mass * daug2Mass + daug3Mass * daug3Mass
270 - binCenterM12 - binCenterM13;
271 fpcomplex ret = getResonanceAmplitude(binCenterM12, binCenterM13, m23, functn_i, params_i);
273 unsigned int numResonances = indices[2];
274 fptype fakeEvt[10]; // Need room for many observables in case m12 or m13 were assigned a high index in an
275 // event-weighted fit.
276 fakeEvt[indices[indices[0] + 2 + 0]] = binCenterM12;
277 fakeEvt[indices[indices[0] + 2 + 1]] = binCenterM13;
278 int effFunctionIdx = parIndexFromResIndex_incoherent(numResonances);
279 fptype eff = callFunction(fakeEvt, indices[effFunctionIdx], indices[effFunctionIdx + 1]);
281 return thrust::norm(ret) * eff;
284 SpecialIncoherentResonanceCalculator::SpecialIncoherentResonanceCalculator(int pIdx, unsigned int res_idx)
285 : resonance_i(res_idx)
286 , parameters(pIdx) {}
288 __device__ fpcomplex SpecialIncoherentResonanceCalculator::operator()(thrust::tuple<int, fptype *, int> t) const {
289 // Returns the BW, or other resonance function, for a specific resonance.
290 // Is special because the value is expected to change slowly, so it's
291 // useful to cache the result.
292 int evtNum = thrust::get<0>(t);
293 fptype *evt = thrust::get<1>(t) + (evtNum * thrust::get<2>(t));
295 unsigned int *indices = paramIndices + parameters; // Jump to TDDP position within parameters array
296 fptype m12 = evt[indices[2 + indices[0]]];
297 fptype m13 = evt[indices[3 + indices[0]]];
298 fptype motherMass = functorConstants[indices[1] + 0];
299 fptype daug1Mass = functorConstants[indices[1] + 1];
300 fptype daug2Mass = functorConstants[indices[1] + 2];
301 fptype daug3Mass = functorConstants[indices[1] + 3];
303 if(!inDalitz(m12, m13, motherMass, daug1Mass, daug2Mass, daug3Mass))
307 = motherMass * motherMass + daug1Mass * daug1Mass + daug2Mass * daug2Mass + daug3Mass * daug3Mass - m12 - m13;
310 = parIndexFromResIndex_incoherent(resonance_i); // Find position of this resonance relative to TDDP start
311 unsigned int functn_i = indices[parameter_i + 2];
312 unsigned int params_i = indices[parameter_i + 3];
313 fpcomplex ret = getResonanceAmplitude(m12, m13, m23, functn_i, params_i);
318 } // namespace GooFit