GooFit  v2.1.3
IncoherentSumPdf.cu
Go to the documentation of this file.
1 #include <goofit/Error.h>
2 #include <goofit/PDFs/physics/IncoherentSumPdf.h>
3 #include <goofit/PDFs/physics/ResonancePdf.h>
4 
5 #include <thrust/transform_reduce.h>
6 
7 namespace GooFit {
8 
9 const int resonanceOffset_incoherent = 4; // Offset of the first resonance into the parameter index array.
10 // Notice that this is different from the TddpPdf case because there's no time information.
11 // In particular the offset consists of nP, constant index, number of resonances, and cache index.
12 
13 __device__ fpcomplex *cResonanceValues[10];
14 
15 __device__ inline int parIndexFromResIndex_incoherent(int resIndex) {
16  return resonanceOffset_incoherent + resIndex * resonanceSize;
17 }
18 
19 __device__ fptype device_incoherent(fptype *evt, fptype *p, unsigned int *indices) {
20  // Calculates the incoherent sum over the resonances.
21  auto evtNum = static_cast<int>(floor(0.5 + evt[indices[4 + indices[0]]]));
22 
23  fptype ret = 0;
24  unsigned int numResonances = indices[2];
25  unsigned int cacheToUse = indices[3];
26 
27  for(int i = 0; i < numResonances; ++i) {
28  int paramIndex = parIndexFromResIndex_incoherent(i);
29  fptype amplitude = p[indices[paramIndex + 0]];
30 
31  fpcomplex matrixelement = cResonanceValues[cacheToUse][evtNum * numResonances + i];
32  ret += amplitude * thrust::norm(matrixelement);
33  }
34 
35  // Multiply by efficiency
36  int effFunctionIdx = parIndexFromResIndex_incoherent(numResonances);
37  fptype eff = callFunction(evt, indices[effFunctionIdx], indices[effFunctionIdx + 1]);
38 
39  ret *= eff;
40 
41  return ret;
42 }
43 
44 __device__ device_function_ptr ptr_to_incoherent = device_incoherent;
45 
46 __host__ IncoherentSumPdf::IncoherentSumPdf(
47  std::string n, Observable m12, Observable m13, EventNumber eventNumber, DecayInfo3 decay, GooPdf *eff)
48  : GooPdf(n, m12, m13, eventNumber)
49  , decayInfo(decay)
50  , _m12(m12)
51  , _m13(m13)
52  , dalitzNormRange(nullptr)
53  , cachedResonances(nullptr)
54  , integrals(nullptr)
55  , forceRedoIntegrals(true)
56  , totalEventSize(3) // Default 3 = m12, m13, evtNum. Will likely be overridden.
57  , cacheToUse(0)
58  , efficiency(eff)
59  , integrators(nullptr)
60  , calculators(nullptr) {
61  std::vector<unsigned int> pindices;
62  pindices.push_back(registerConstants(5));
63  fptype decayConstants[5];
64  decayConstants[0] = decayInfo.motherMass;
65  decayConstants[1] = decayInfo.daug1Mass;
66  decayConstants[2] = decayInfo.daug2Mass;
67  decayConstants[3] = decayInfo.daug3Mass;
68  decayConstants[4] = decayInfo.meson_radius;
69  MEMCPY_TO_SYMBOL(
70  functorConstants, decayConstants, 5 * sizeof(fptype), cIndex * sizeof(fptype), cudaMemcpyHostToDevice);
71 
72  pindices.push_back(decayInfo.resonances.size());
73  static int cacheCount = 0;
74  cacheToUse = cacheCount++;
75  pindices.push_back(cacheToUse);
76 
77  for(auto &resonance : decayInfo.resonances) {
78  pindices.push_back(registerParameter(resonance->amp_real));
79  pindices.push_back(registerParameter(resonance->amp_real));
80  // Not going to use amp_imag, but need a dummy index so the resonance size will be consistent.
81  pindices.push_back(resonance->getFunctionIndex());
82  pindices.push_back(resonance->getParameterIndex());
83  resonance->setConstantIndex(cIndex);
84  components.push_back(resonance);
85  }
86 
87  pindices.push_back(efficiency->getFunctionIndex());
88  pindices.push_back(efficiency->getParameterIndex());
89  components.push_back(efficiency);
90 
91  GET_FUNCTION_ADDR(ptr_to_incoherent);
92  initialize(pindices);
93 
94  redoIntegral = new bool[decayInfo.resonances.size()];
95  cachedMasses = new fptype[decayInfo.resonances.size()];
96  cachedWidths = new fptype[decayInfo.resonances.size()];
97  integrals = new double[decayInfo.resonances.size()];
98 
99  for(int i = 0; i < decayInfo.resonances.size(); ++i) {
100  redoIntegral[i] = true;
101  cachedMasses[i] = -1;
102  cachedWidths[i] = -1;
103  integrals[i] = 0;
104  }
105 
106  integrators = new SpecialIncoherentIntegrator *[decayInfo.resonances.size()];
107  calculators = new SpecialIncoherentResonanceCalculator *[decayInfo.resonances.size()];
108 
109  for(int i = 0; i < decayInfo.resonances.size(); ++i) {
110  integrators[i] = new SpecialIncoherentIntegrator(parameters, i);
111  calculators[i] = new SpecialIncoherentResonanceCalculator(parameters, i);
112  }
113 
114  addSpecialMask(PdfBase::ForceSeparateNorm);
115 }
116 
117 __host__ void IncoherentSumPdf::setDataSize(unsigned int dataSize, unsigned int evtSize) {
118  // Default 3 is m12, m13, evtNum
119  totalEventSize = evtSize;
120  if(totalEventSize < 3)
121  throw GooFit::GeneralError("totalEventSize {} must be 3 or more", totalEventSize);
122 
123  if(cachedResonances) {
124  delete cachedResonances;
125  }
126 
127  numEntries = dataSize;
128  cachedResonances = new thrust::device_vector<fpcomplex>(dataSize * decayInfo.resonances.size());
129  void *dummy = thrust::raw_pointer_cast(cachedResonances->data());
130  MEMCPY_TO_SYMBOL(
131  cResonanceValues, &dummy, sizeof(fpcomplex *), cacheToUse * sizeof(fpcomplex *), cudaMemcpyHostToDevice);
132  setForceIntegrals();
133 }
134 
135 __host__ fptype IncoherentSumPdf::normalize() const {
136  recursiveSetNormalisation(1); // Not going to normalize efficiency,
137  // so set normalisation factor to 1 so it doesn't get multiplied by zero.
138  // Copy at this time to ensure that the SpecialCalculators, which need the efficiency,
139  // don't get zeroes through multiplying by the normFactor.
140  MEMCPY_TO_SYMBOL(normalisationFactors, host_normalisation, totalParams * sizeof(fptype), 0, cudaMemcpyHostToDevice);
141 
142  int totalBins = _m12.getNumBins() * _m13.getNumBins();
143 
144  if(!dalitzNormRange) {
145  gooMalloc((void **)&dalitzNormRange, 6 * sizeof(fptype));
146 
147  auto *host_norms = new fptype[6];
148  host_norms[0] = _m12.getLowerLimit();
149  host_norms[1] = _m12.getUpperLimit();
150  host_norms[2] = _m12.getNumBins();
151  host_norms[3] = _m13.getLowerLimit();
152  host_norms[4] = _m13.getUpperLimit();
153  host_norms[5] = _m13.getNumBins();
154  MEMCPY(dalitzNormRange, host_norms, 6 * sizeof(fptype), cudaMemcpyHostToDevice);
155  delete[] host_norms;
156  }
157 
158  // Check if efficiency changes force redoing the integrals.
159  if(efficiency->parametersChanged()) {
160  forceRedoIntegrals = true;
161  }
162 
163  // Check for changed masses or forced integral redo.
164  for(unsigned int i = 0; i < decayInfo.resonances.size(); ++i) {
165  redoIntegral[i] = forceRedoIntegrals;
166 
167  if(!(decayInfo.resonances[i]->parametersChanged()))
168  continue;
169 
170  redoIntegral[i] = true;
171  }
172 
173  forceRedoIntegrals = false;
174 
175  thrust::constant_iterator<fptype *> arrayAddress(dalitzNormRange);
176  thrust::counting_iterator<int> binIndex(0);
177 
178  // NB, SpecialIncoherentResonanceCalculator assumes that fit is unbinned!
179  // And it needs to know the total event size, not just observables
180  // for this particular PDF component.
181  thrust::constant_iterator<fptype *> dataArray(dev_event_array);
182  thrust::constant_iterator<int> eventSize(totalEventSize);
183  thrust::counting_iterator<int> eventIndex(0);
184 
185  for(int i = 0; i < decayInfo.resonances.size(); ++i) {
186  if(redoIntegral[i]) {
187  thrust::transform(
188  thrust::make_zip_iterator(thrust::make_tuple(eventIndex, dataArray, eventSize)),
189  thrust::make_zip_iterator(thrust::make_tuple(eventIndex + numEntries, arrayAddress, eventSize)),
190  strided_range<thrust::device_vector<fpcomplex>::iterator>(
191  cachedResonances->begin() + i, cachedResonances->end(), decayInfo.resonances.size())
192  .begin(),
193  *(calculators[i]));
194 
195  fptype dummy = 0;
196  static thrust::plus<fptype> cudaPlus;
197  integrals[i] = thrust::transform_reduce(
198  thrust::make_zip_iterator(thrust::make_tuple(binIndex, arrayAddress)),
199  thrust::make_zip_iterator(thrust::make_tuple(binIndex + totalBins, arrayAddress)),
200  *(integrators[i]),
201  dummy,
202  cudaPlus);
203  }
204  }
205 
206  // End of time-consuming integrals and caching of BWs over Dalitz plot.
207 
208  fptype ret = 0;
209 
210  for(unsigned int i = 0; i < decayInfo.resonances.size(); ++i) {
211  int param_i = parameters + resonanceOffset_incoherent + resonanceSize * i;
212  fptype amplitude = host_params[host_indices[param_i]];
213  ret += amplitude * integrals[i];
214  }
215 
216  double binSizeFactor = 1;
217  binSizeFactor *= _m12.getBinSize();
218  binSizeFactor *= _m13.getBinSize();
219  ret *= binSizeFactor;
220 
221  host_normalisation[parameters] = 1.0 / ret;
222  return ret;
223 }
224 
225 SpecialIncoherentIntegrator::SpecialIncoherentIntegrator(int pIdx, unsigned int ri)
226  : resonance_i(ri)
227  , parameters(pIdx) {}
228 
229 __device__ fptype SpecialIncoherentIntegrator::operator()(thrust::tuple<int, fptype *> t) const {
230  // Returns integral of specific BW over Dalitz plot, to be cached and
231  // multiplied by rapidly-changing amplitude.
232 
233  // Bin index, base address [lower, upper,getNumBins]
234  // Notice that this is basically MetricTaker::operator (binned) with the special-case knowledge
235  // that event size is two, and that the function to call is getResonanceAmplitude.
236 
237  int globalBinNumber = thrust::get<0>(t);
238  fptype lowerBoundM12 = thrust::get<1>(t)[0];
239  fptype upperBoundM12 = thrust::get<1>(t)[1];
240  auto numBinsM12 = static_cast<int>(floor(thrust::get<1>(t)[2] + 0.5));
241  int binNumberM12 = globalBinNumber % numBinsM12;
242  fptype binCenterM12 = upperBoundM12 - lowerBoundM12;
243  binCenterM12 /= numBinsM12;
244  binCenterM12 *= (binNumberM12 + 0.5);
245  binCenterM12 += lowerBoundM12;
246 
247  globalBinNumber /= numBinsM12;
248  fptype lowerBoundM13 = thrust::get<1>(t)[3];
249  fptype upperBoundM13 = thrust::get<1>(t)[4];
250  auto numBinsM13 = static_cast<int>(floor(thrust::get<1>(t)[5] + 0.5));
251  fptype binCenterM13 = upperBoundM13 - lowerBoundM13;
252  binCenterM13 /= numBinsM13;
253  binCenterM13 *= (globalBinNumber + 0.5);
254  binCenterM13 += lowerBoundM13;
255 
256  unsigned int *indices = paramIndices + parameters;
257  fptype motherMass = functorConstants[indices[1] + 0];
258  fptype daug1Mass = functorConstants[indices[1] + 1];
259  fptype daug2Mass = functorConstants[indices[1] + 2];
260  fptype daug3Mass = functorConstants[indices[1] + 3];
261 
262  if(!inDalitz(binCenterM12, binCenterM13, motherMass, daug1Mass, daug2Mass, daug3Mass))
263  return 0;
264 
265  int parameter_i
266  = parIndexFromResIndex_incoherent(resonance_i); // Find position of this resonance relative to TDDP start
267  unsigned int functn_i = indices[parameter_i + 2];
268  unsigned int params_i = indices[parameter_i + 3];
269  fptype m23 = motherMass * motherMass + daug1Mass * daug1Mass + daug2Mass * daug2Mass + daug3Mass * daug3Mass
270  - binCenterM12 - binCenterM13;
271  fpcomplex ret = getResonanceAmplitude(binCenterM12, binCenterM13, m23, functn_i, params_i);
272 
273  unsigned int numResonances = indices[2];
274  fptype fakeEvt[10]; // Need room for many observables in case m12 or m13 were assigned a high index in an
275  // event-weighted fit.
276  fakeEvt[indices[indices[0] + 2 + 0]] = binCenterM12;
277  fakeEvt[indices[indices[0] + 2 + 1]] = binCenterM13;
278  int effFunctionIdx = parIndexFromResIndex_incoherent(numResonances);
279  fptype eff = callFunction(fakeEvt, indices[effFunctionIdx], indices[effFunctionIdx + 1]);
280 
281  return thrust::norm(ret) * eff;
282 }
283 
284 SpecialIncoherentResonanceCalculator::SpecialIncoherentResonanceCalculator(int pIdx, unsigned int res_idx)
285  : resonance_i(res_idx)
286  , parameters(pIdx) {}
287 
288 __device__ fpcomplex SpecialIncoherentResonanceCalculator::operator()(thrust::tuple<int, fptype *, int> t) const {
289  // Returns the BW, or other resonance function, for a specific resonance.
290  // Is special because the value is expected to change slowly, so it's
291  // useful to cache the result.
292  int evtNum = thrust::get<0>(t);
293  fptype *evt = thrust::get<1>(t) + (evtNum * thrust::get<2>(t));
294 
295  unsigned int *indices = paramIndices + parameters; // Jump to TDDP position within parameters array
296  fptype m12 = evt[indices[2 + indices[0]]];
297  fptype m13 = evt[indices[3 + indices[0]]];
298  fptype motherMass = functorConstants[indices[1] + 0];
299  fptype daug1Mass = functorConstants[indices[1] + 1];
300  fptype daug2Mass = functorConstants[indices[1] + 2];
301  fptype daug3Mass = functorConstants[indices[1] + 3];
302 
303  if(!inDalitz(m12, m13, motherMass, daug1Mass, daug2Mass, daug3Mass))
304  return {0., 0.};
305 
306  fptype m23
307  = motherMass * motherMass + daug1Mass * daug1Mass + daug2Mass * daug2Mass + daug3Mass * daug3Mass - m12 - m13;
308 
309  int parameter_i
310  = parIndexFromResIndex_incoherent(resonance_i); // Find position of this resonance relative to TDDP start
311  unsigned int functn_i = indices[parameter_i + 2];
312  unsigned int params_i = indices[parameter_i + 3];
313  fpcomplex ret = getResonanceAmplitude(m12, m13, m23, functn_i, params_i);
314 
315  return ret;
316 }
317 
318 } // namespace GooFit