Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RooNLLVarNew.cxx
Go to the documentation of this file.
1/// \cond ROOFIT_INTERNAL
2
3/*
4 * Project: RooFit
5 * Authors:
6 * Jonas Rembser, CERN 2021
7 * Emmanouil Michalainas, CERN 2021
8 *
9 * Copyright (c) 2021, CERN
10 *
11 * Redistribution and use in source and binary forms,
12 * with or without modification, are permitted according to the terms
13 * listed in LICENSE (http://roofit.sourceforge.net/license.txt)
14 */
15
16/**
17\file RooNLLVarNew.cxx
18\class RooNLLVarNew
19\ingroup Roofitcore
20
21This is a simple class designed to produce the nll values needed by the fitter.
22This class calls functions from `RooBatchCompute` library to provide faster
23computation times.
24**/
25
27
28#include <RooHistPdf.h>
29#include <RooBatchCompute.h>
30#include <RooDataHist.h>
31#include <RooNaNPacker.h>
32#include <RooConstVar.h>
33#include <RooRealVar.h>
34#include <RooSetProxy.h>
36
37#include "RooFitImplHelpers.h"
38
39#include <ROOT/StringUtils.hxx>
40
41#include <TClass.h>
42#include <TMath.h>
43#include <Math/Util.h>
44
45#include <numeric>
46#include <stdexcept>
47#include <vector>
48
49ClassImp(RooFit::Detail::RooNLLVarNew);
50
51namespace RooFit {
52namespace Detail {
53
54// Declare constexpr static members to make them available if odr-used in C++14.
55constexpr const char *RooNLLVarNew::weightVarName;
56constexpr const char *RooNLLVarNew::weightVarNameSumW2;
57
58namespace {
59
60// Use RooConstVar for dummies such that they don't get included in getParameters().
61std::unique_ptr<RooConstVar> dummyVar(const char *name)
62{
63 return std::make_unique<RooConstVar>(name, name, 1.0);
64}
65
66// Helper class to represent a template pdf based on the fit dataset.
67class RooOffsetPdf : public RooAbsPdf {
68public:
69 RooOffsetPdf(const char *name, const char *title, RooArgSet const &observables, RooAbsReal &weightVar)
70 : RooAbsPdf(name, title),
71 _observables("!observables", "List of observables", this),
72 _weightVar{"!weightVar", "weightVar", this, weightVar, true, false}
73 {
74 for (RooAbsArg *obs : observables) {
75 _observables.add(*obs);
76 }
77 }
78 RooOffsetPdf(const RooOffsetPdf &other, const char *name = nullptr)
80 _observables("!servers", this, other._observables),
81 _weightVar{"!weightVar", this, other._weightVar}
82 {
83 }
84 TObject *clone(const char *newname) const override { return new RooOffsetPdf(*this, newname); }
85
86 void doEval(RooFit::EvalContext &ctx) const override
87 {
88 std::span<double> output = ctx.output();
89 std::size_t nEvents = output.size();
90
91 std::span<const double> weights = ctx.at(_weightVar);
92
93 // Create the template histogram from the data. This operation is very
94 // expensive, but since the offset only depends on the observables it
95 // only has to be done once.
96
97 RooDataHist dataHist{"data", "data", _observables};
98 // Loop over events to fill the histogram
99 for (std::size_t i = 0; i < nEvents; ++i) {
100 for (auto *var : static_range_cast<RooRealVar *>(_observables)) {
101 var->setVal(ctx.at(var)[i]);
102 }
103 dataHist.add(_observables, weights[weights.size() == 1 ? 0 : i]);
104 }
105
106 // Lookup bin weights via RooHistPdf
107 RooHistPdf pdf{"offsetPdf", "offsetPdf", _observables, dataHist};
108 for (std::size_t i = 0; i < nEvents; ++i) {
109 for (auto *var : static_range_cast<RooRealVar *>(_observables)) {
110 var->setVal(ctx.at(var)[i]);
111 }
112 output[i] = pdf.getVal(_observables);
113 }
114 }
115
116private:
117 double evaluate() const override { return 0.0; } // should never be called
118
119 RooSetProxy _observables;
121};
122
123} // namespace
124
125/** Construct a RooNLLVarNew
126\param name the name
127\param title the title
128\param pdf The pdf for which the nll is computed for
129\param observables The observabes of the pdf
130\param isExtended Set to true if this is an extended fit
131**/
132RooNLLVarNew::RooNLLVarNew(const char *name, const char *title, RooAbsPdf &pdf, RooArgSet const &observables,
133 bool isExtended, RooFit::OffsetMode offsetMode)
134 : RooAbsReal(name, title),
135 _pdf{"pdf", "pdf", this, pdf},
136 _weightVar{"weightVar", "weightVar", this, dummyVar(weightVarName)},
138 _binnedL{pdf.getAttribute("BinnedLikelihoodActive")}
139{
140 RooArgSet obs;
141 pdf.getObservables(&observables, obs);
142
143 // In the "BinnedLikelihoodActiveYields" mode, the pdf values can directly
144 // be interpreted as yields and don't need to be multiplied by the bin
145 // widths. That's why we don't need to even fill them in this case.
146 if (_binnedL && !pdf.getAttribute("BinnedLikelihoodActiveYields")) {
148 }
149
150 if (isExtended && !_binnedL) {
151 std::unique_ptr<RooAbsReal> expectedEvents = pdf.createExpectedEventsFunc(&obs);
152 if (expectedEvents) {
153 _expectedEvents =
154 std::make_unique<RooTemplateProxy<RooAbsReal>>("expectedEvents", "expectedEvents", this, *expectedEvents);
155 addOwnedComponents(std::move(expectedEvents));
156 }
157 }
158
160 enableOffsetting(offsetMode == RooFit::OffsetMode::Initial);
161 enableBinOffsetting(offsetMode == RooFit::OffsetMode::Bin);
162
163 // In the binned likelihood code path, we directly use that data weights for
164 // the offsetting.
165 if (!_binnedL && _doBinOffset) {
166 auto offsetPdf = std::make_unique<RooOffsetPdf>("_offset_pdf", "_offset_pdf", obs, *_weightVar);
167 _offsetPdf = std::make_unique<RooTemplateProxy<RooAbsPdf>>("offsetPdf", "offsetPdf", this, *offsetPdf);
168 addOwnedComponents(std::move(offsetPdf));
169 }
170}
171
172RooNLLVarNew::RooNLLVarNew(const RooNLLVarNew &other, const char *name)
174 _pdf{"pdf", this, other._pdf},
175 _weightVar{"weightVar", this, other._weightVar},
176 _weightSquaredVar{"weightSquaredVar", this, other._weightSquaredVar},
179 _doOffset{other._doOffset},
180 _simCount{other._simCount},
181 _prefix{other._prefix},
182 _binw{other._binw}
183{
184 if (other._expectedEvents) {
185 _expectedEvents = std::make_unique<RooTemplateProxy<RooAbsReal>>("expectedEvents", this, *other._expectedEvents);
186 }
187}
188
189void RooNLLVarNew::fillBinWidthsFromPdfBoundaries(RooAbsReal const &pdf, RooArgSet const &observables)
190{
191 // Check if the bin widths were already filled
192 if (!_binw.empty()) {
193 return;
194 }
195
196 if (observables.size() != 1) {
197 throw std::runtime_error("BinnedPdf optimization only works with a 1D pdf.");
198 } else {
199 auto *var = static_cast<RooRealVar *>(observables.first());
200 std::list<double> *boundaries = pdf.binBoundaries(*var, var->getMin(), var->getMax());
201 std::list<double>::iterator biter = boundaries->begin();
202 _binw.resize(boundaries->size() - 1);
203 double lastBound = (*biter);
204 ++biter;
205 int ibin = 0;
206 while (biter != boundaries->end()) {
207 _binw[ibin] = (*biter) - lastBound;
208 lastBound = (*biter);
209 ibin++;
210 ++biter;
211 }
212 }
213}
214
215void RooNLLVarNew::doEvalBinnedL(RooFit::EvalContext &ctx, std::span<const double> preds,
216 std::span<const double> weights) const
217{
220
221 const bool predsAreYields = _binw.empty();
222
223 for (std::size_t i = 0; i < preds.size(); ++i) {
224
225 // Calculate log(Poisson(N|mu) for this bin
226 double N = weights[i];
227 double mu = preds[i];
228 if (!predsAreYields) {
229 mu *= _binw[i];
230 }
231
232 if (mu <= 0 && N > 0) {
233 // Catch error condition: data present where zero events are predicted
234 logEvalError(Form("Observed %f events in bin %lu with zero event yield", N, (unsigned long)i));
235 } else {
236 result += RooFit::Detail::MathFuncs::nll(mu, N, true, _doBinOffset);
238 }
239 }
240
242}
243
244void RooNLLVarNew::doEval(RooFit::EvalContext &ctx) const
245{
246 std::span<const double> weights = ctx.at(_weightVar);
247 std::span<const double> weightsSumW2 = ctx.at(_weightSquaredVar);
248
249 if (_binnedL) {
250 return doEvalBinnedL(ctx, ctx.at(&*_pdf), _weightSquared ? weightsSumW2 : weights);
251 }
252
253 auto config = ctx.config(this);
254
255 auto probas = ctx.at(_pdf);
256
257 _sumWeight = weights.size() == 1 ? weights[0] * probas.size()
258 : RooBatchCompute::reduceSum(config, weights.data(), weights.size());
259 if (_expectedEvents && _weightSquared && _sumWeight2 == 0.0) {
260 _sumWeight2 = weights.size() == 1 ? weightsSumW2[0] * probas.size()
261 : RooBatchCompute::reduceSum(config, weightsSumW2.data(), weightsSumW2.size());
262 }
263
264 auto nllOut = RooBatchCompute::reduceNLL(config, probas, _weightSquared ? weightsSumW2 : weights,
265 _doBinOffset ? ctx.at(*_offsetPdf) : std::span<const double>{});
266
267 if (nllOut.nInfiniteValues > 0) {
268 oocoutW(&*_pdf, Eval) << "RooAbsPdf::getLogVal(" << _pdf->GetName()
269 << ") WARNING: top-level pdf has some infinite values" << std::endl;
270 }
271 for (std::size_t i = 0; i < nllOut.nNonPositiveValues; ++i) {
272 _pdf->logEvalError("getLogVal() top-level p.d.f not greater than zero");
273 }
274 for (std::size_t i = 0; i < nllOut.nNaNValues; ++i) {
275 _pdf->logEvalError("getLogVal() top-level p.d.f evaluates to NaN");
276 }
277
278 if (_expectedEvents) {
279 std::span<const double> expected = ctx.at(*_expectedEvents);
280 nllOut.nllSum += _pdf->extendedTerm(_sumWeight, expected[0], _weightSquared ? _sumWeight2 : 0.0, _doBinOffset);
281 }
282
283 finalizeResult(ctx, {nllOut.nllSum, nllOut.nllSumCarry}, _sumWeight);
284}
285
286////////////////////////////////////////////////////////////////////////////////
287/// Sets the prefix for the special variables of this NLL, like weights or bin
288/// volumes.
289/// \param[in] prefix The prefix to add to the observables and weight names.
290void RooNLLVarNew::setPrefix(std::string const &prefix)
291{
292 _prefix = prefix;
293
295}
296
297void RooNLLVarNew::resetWeightVarNames()
298{
299 _weightVar->SetName((_prefix + weightVarName).c_str());
300 _weightSquaredVar->SetName((_prefix + weightVarNameSumW2).c_str());
301 if (_offsetPdf) {
302 (*_offsetPdf)->SetName((_prefix + "_offset_pdf").c_str());
303 }
304}
305
306////////////////////////////////////////////////////////////////////////////////
307/// Toggles the weight square correction.
308void RooNLLVarNew::applyWeightSquared(bool flag)
309{
311}
312
313void RooNLLVarNew::enableOffsetting(bool flag)
314{
315 _doOffset = flag;
317}
318
319void RooNLLVarNew::finalizeResult(RooFit::EvalContext &ctx, ROOT::Math::KahanSum<double> result, double weightSum) const
320{
321 // If part of simultaneous PDF normalize probability over
322 // number of simultaneous PDFs: -sum(log(p/n)) = -sum(log(p)) + N*log(n)
323 // If we do bin-by bin offsetting, we don't do this because it cancels out
324 if (!_doBinOffset && _simCount > 1) {
325 result += weightSum * std::log(static_cast<double>(_simCount));
326 }
327
328 // Check if value offset flag is set.
329 if (_doOffset) {
330
331 // If no offset is stored enable this feature now
332 if (_offset.Sum() == 0 && _offset.Carry() == 0 && (result.Sum() != 0 || result.Carry() != 0)) {
333 _offset = result;
334 }
335 }
336 ctx.setOutputWithOffset(this, result, _offset);
337}
338
339void RooNLLVarNew::translate(RooFit::Detail::CodeSquashContext &ctx) const
340{
341 if (_binnedL && !_pdf->getAttribute("BinnedLikelihoodActiveYields")) {
342 std::stringstream errorMsg;
343 errorMsg << "RooNLLVarNew::translate(): binned likelihood optimization is only supported when raw pdf "
344 "values can be interpreted as yields."
345 << " This is not the case for HistFactory models written with ROOT versions before 6.26.00";
346 coutE(InputArguments) << errorMsg.str() << std::endl;
347 throw std::runtime_error(errorMsg.str());
348 }
349
350 std::string weightSumName = RooFit::Detail::makeValidVarName(GetName()) + "WeightSum";
351 std::string resName = RooFit::Detail::makeValidVarName(GetName()) + "Result";
352 ctx.addResult(this, resName);
353 ctx.addToGlobalScope("double " + weightSumName + " = 0.0;\n");
354 ctx.addToGlobalScope("double " + resName + " = 0.0;\n");
355
356 const bool needWeightSum = _expectedEvents || _simCount > 1;
357
358 if (needWeightSum) {
359 auto scope = ctx.beginLoop(this);
360 ctx.addToCodeBody(weightSumName + " += " + ctx.getResult(*_weightVar) + ";\n");
361 }
362 if (_simCount > 1) {
363 std::string simCountStr = std::to_string(static_cast<double>(_simCount));
364 ctx.addToCodeBody(resName + " += " + weightSumName + " * std::log(" + simCountStr + ");\n");
365 }
366
367 // Begin loop scope for the observables and weight variable. If the weight
368 // is a scalar, the context will ignore it for the loop scope. The closing
369 // brackets of the loop is written at the end of the scopes lifetime.
370 {
371 auto scope = ctx.beginLoop(this);
372 std::string term = ctx.buildCall("RooFit::Detail::MathFuncs::nll", _pdf, _weightVar, _binnedL, 0);
373 ctx.addToCodeBody(this, resName + " += " + term + ";");
374 }
375 if (_expectedEvents) {
376 std::string expected = ctx.getResult(**_expectedEvents);
377 ctx.addToCodeBody(resName + " += " + expected + " - " + weightSumName + " * std::log(" + expected + ");\n");
378 }
379}
380} // namespace Detail
381} // namespace RooFit
382
383/// \endcond
ROOT::RRangeCast< T, false, Range_t > static_range_cast(Range_t &&coll)
#define oocoutW(o, a)
#define coutE(a)
#define ClassImp(name)
Definition Rtypes.h:382
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
#define N
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
char name[80]
Definition TGX11.cxx:110
char * Form(const char *fmt,...)
Formats a string in a circular formatting buffer.
Definition TString.cxx:2489
The Kahan summation is a compensated summation algorithm, which significantly reduces numerical error...
Definition Util.h:122
Common abstract base class for objects that represent a value and a "shape" in RooFit.
Definition RooAbsArg.h:79
RooFit::OwningPtr< RooArgSet > getObservables(const RooArgSet &set, bool valueOnly=true) const
Given a set of possible observables, return the observables that this PDF depends on.
bool getAttribute(const Text_t *name) const
Check if a named attribute is set. By default, all attributes are unset.
Storage_t::size_type size() const
RooAbsArg * first() const
Abstract interface for all probability density functions.
Definition RooAbsPdf.h:40
virtual std::unique_ptr< RooAbsReal > createExpectedEventsFunc(const RooArgSet *nset) const
Returns an object that represents the expected number of events for a given normalization set,...
Abstract base class for objects that represent a real value and implements functionality common to al...
Definition RooAbsReal.h:59
virtual std::list< double > * binBoundaries(RooAbsRealLValue &obs, double xlo, double xhi) const
Retrieve bin boundaries if this distribution is binned in obs.
RooArgSet is a container object that can hold multiple RooAbsArg objects.
Definition RooArgSet.h:24
Container class to hold N-dimensional binned data.
Definition RooDataHist.h:40
void add(const RooArgSet &row, double wgt=1.0) override
Add wgt to the bin content enclosed by the coordinates passed in row.
Definition RooDataHist.h:72
A class to maintain the context for squashing of RooFit models into code.
std::string buildCall(std::string const &funcname, Args_t const &...args)
Build the code to call the function with name funcname, passing some arguments.
void addResult(RooAbsArg const *key, std::string const &value)
A function to save an expression that includes/depends on the result of the input node.
void addToCodeBody(RooAbsArg const *klass, std::string const &in)
Adds the input string to the squashed code body.
std::string const & getResult(RooAbsArg const &arg)
Gets the result for the given node using the node name.
void addToGlobalScope(std::string const &str)
Adds the given string to the string block that will be emitted at the top of the squashed function.
std::unique_ptr< LoopScope > beginLoop(RooAbsArg const *in)
Create a RAII scope for iterating over vector observables.
std::span< const double > at(RooAbsArg const *arg, RooAbsArg const *caller=nullptr)
std::span< double > output()
RooBatchCompute::Config config(RooAbsArg const *arg) const
void setOutputWithOffset(RooAbsArg const *arg, ROOT::Math::KahanSum< double > val, ROOT::Math::KahanSum< double > const &offset)
Sets the output value with an offset.
A propability density function sampled from a multidimensional histogram.
Definition RooHistPdf.h:30
Variable that can be changed from the outside.
Definition RooRealVar.h:37
Mother of all ROOT objects.
Definition TObject.h:41
double reduceSum(Config cfg, InputArr input, size_t n)
ReduceNLLOutput reduceNLL(Config cfg, std::span< const double > probas, std::span< const double > weights, std::span< const double > offsetProbas)
double nll(double pdf, double weight, int binnedL, int doBinOffset)
Definition MathFuncs.h:377
std::string makeValidVarName(std::string const &in)
The namespace RooFit contains mostly switches that change the behaviour of functions of PDFs (or othe...
Definition JSONIO.h:26
OffsetMode
For setting the offset mode with the Offset() command argument to RooAbsPdf::fitTo()
void evaluate(typename Architecture_t::Tensor_t &A, EActivationFunction f)
Apply the given activation function to each value in the given tensor A.
Definition Functions.h:98
void probas(TString dataset, TString fin="TMVA.root", Bool_t useTMVAStyle=kTRUE)
static void output()