Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RDFActionHelpers.cxx
Go to the documentation of this file.
1// Author: Enrico Guiraud, Danilo Piparo CERN 12/2016
2
3/*************************************************************************
4 * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
12#include "ROOT/RDF/Utils.hxx" // CacheLineStep
13#include "ROOT/RNTuple.hxx" // ValidateSnapshotRNTupleOutput
14
15namespace ROOT {
16namespace Internal {
17namespace RDF {
18
19CountHelper::CountHelper(const std::shared_ptr<ULong64_t> &resultCount, const unsigned int nSlots)
20 : fResultCount(resultCount), fCounts(nSlots, 0)
21{
22}
23
24void CountHelper::Exec(unsigned int slot)
25{
26 fCounts[slot]++;
27}
28
29void CountHelper::Finalize()
30{
31 *fResultCount = 0;
32 for (auto &c : fCounts) {
33 *fResultCount += c;
34 }
35}
36
37ULong64_t &CountHelper::PartialUpdate(unsigned int slot)
38{
39 return fCounts[slot];
40}
41
42void BufferedFillHelper::UpdateMinMax(unsigned int slot, double v)
43{
44 auto &thisMin = fMin[slot * CacheLineStep<BufEl_t>()];
45 auto &thisMax = fMax[slot * CacheLineStep<BufEl_t>()];
46 thisMin = std::min(thisMin, v);
47 thisMax = std::max(thisMax, v);
48}
49
50BufferedFillHelper::BufferedFillHelper(const std::shared_ptr<Hist_t> &h, const unsigned int nSlots)
51 : fResultHist(h), fNSlots(nSlots), fBufSize(fgTotalBufSize / nSlots), fPartialHists(fNSlots),
52 fMin(nSlots * CacheLineStep<BufEl_t>(), std::numeric_limits<BufEl_t>::max()),
53 fMax(nSlots * CacheLineStep<BufEl_t>(), std::numeric_limits<BufEl_t>::lowest())
54{
55 fBuffers.reserve(fNSlots);
56 fWBuffers.reserve(fNSlots);
57 for (unsigned int i = 0; i < fNSlots; ++i) {
58 Buf_t v;
59 v.reserve(fBufSize);
60 fBuffers.emplace_back(v);
61 fWBuffers.emplace_back(v);
62 }
63}
64
65void BufferedFillHelper::Exec(unsigned int slot, double v)
66{
68 fBuffers[slot].emplace_back(v);
69}
70
71void BufferedFillHelper::Exec(unsigned int slot, double v, double w)
72{
74 fBuffers[slot].emplace_back(v);
75 fWBuffers[slot].emplace_back(w);
76}
77
78Hist_t &BufferedFillHelper::PartialUpdate(unsigned int slot)
79{
81 // TODO it is inefficient to re-create the partial histogram everytime the callback is called
82 // ideally we could incrementally fill it with the latest entries in the buffers
83 partialHist = std::make_unique<Hist_t>(*fResultHist);
84 auto weights = fWBuffers[slot].empty() ? nullptr : fWBuffers[slot].data();
85 partialHist->FillN(fBuffers[slot].size(), fBuffers[slot].data(), weights);
86 return *partialHist;
87}
88
89void BufferedFillHelper::Finalize()
90{
91 for (unsigned int i = 0; i < fNSlots; ++i) {
92 if (!fWBuffers[i].empty() && fBuffers[i].size() != fWBuffers[i].size()) {
93 throw std::runtime_error("Cannot fill weighted histogram with values in containers of different sizes.");
94 }
95 }
96
97 BufEl_t globalMin = *std::min_element(fMin.begin(), fMin.end());
98 BufEl_t globalMax = *std::max_element(fMax.begin(), fMax.end());
99
100 if (fResultHist->CanExtendAllAxes() && globalMin != std::numeric_limits<BufEl_t>::max() &&
101 globalMax != std::numeric_limits<BufEl_t>::lowest()) {
102 fResultHist->SetBins(fResultHist->GetNbinsX(), globalMin, globalMax);
103 }
104
105 for (unsigned int i = 0; i < fNSlots; ++i) {
106 auto weights = fWBuffers[i].empty() ? nullptr : fWBuffers[i].data();
107 fResultHist->FillN(fBuffers[i].size(), fBuffers[i].data(), weights);
108 }
109}
110
111MeanHelper::MeanHelper(const std::shared_ptr<double> &meanVPtr, const unsigned int nSlots)
113{
114}
115
116void MeanHelper::Exec(unsigned int slot, double v)
117{
118 fCounts[slot]++;
119 // Kahan Sum:
120 double y = v - fCompensations[slot];
121 double t = fSums[slot] + y;
122 fCompensations[slot] = (t - fSums[slot]) - y;
123 fSums[slot] = t;
124}
125
126void MeanHelper::Finalize()
127{
128 double sumOfSums = 0;
129 // Kahan Sum:
130 double compensation(0);
131 double y(0);
132 double t(0);
133 for (auto &m : fSums) {
134 y = m - compensation;
135 t = sumOfSums + y;
136 compensation = (t - sumOfSums) - y;
137 sumOfSums = t;
138 }
140 for (auto &c : fCounts)
141 sumOfCounts += c;
143}
144
145double &MeanHelper::PartialUpdate(unsigned int slot)
146{
147 fPartialMeans[slot] = fSums[slot] / fCounts[slot];
148 return fPartialMeans[slot];
149}
150
151StdDevHelper::StdDevHelper(const std::shared_ptr<double> &meanVPtr, const unsigned int nSlots)
152 : fNSlots(nSlots), fResultStdDev(meanVPtr), fCounts(nSlots, 0), fMeans(nSlots, 0), fDistancesfromMean(nSlots, 0)
153{
154}
155
156void StdDevHelper::Exec(unsigned int slot, double v)
157{
158 // Applies the Welford's algorithm to the stream of values received by the thread
159 auto count = ++fCounts[slot];
160 auto delta = v - fMeans[slot];
161 auto mean = fMeans[slot] + delta / count;
162 auto delta2 = v - mean;
163 auto distance = fDistancesfromMean[slot] + delta * delta2;
164
165 fCounts[slot] = count;
166 fMeans[slot] = mean;
168}
169
170void StdDevHelper::Finalize()
171{
172 // Evaluates and merges the partial result of each set of data to get the overall standard deviation.
173 double totalElements = 0;
174 for (auto c : fCounts) {
175 totalElements += c;
176 }
177 if (totalElements == 0 || totalElements == 1) {
178 // Std deviation is not defined for 1 element.
179 *fResultStdDev = 0;
180 return;
181 }
182
183 double overallMean = 0;
184 for (unsigned int i = 0; i < fNSlots; ++i) {
185 overallMean += fCounts[i] * fMeans[i];
186 }
188
189 double variance = 0;
190 for (unsigned int i = 0; i < fNSlots; ++i) {
191 if (fCounts[i] == 0) {
192 continue;
193 }
194 auto setVariance = fDistancesfromMean[i] / (fCounts[i]);
195 variance += (fCounts[i]) * (setVariance + std::pow((fMeans[i] - overallMean), 2));
196 }
197
199 *fResultStdDev = std::sqrt(variance);
200}
201
202// External templates are disabled for gcc5 since this version wrongly omits the C++11 ABI attribute
203#if __GNUC__ > 5
213#endif
214
216 const std::string &fileName)
217{
218 TString fileMode = opts.fMode;
219 fileMode.ToLower();
220 if (fileMode != "update")
221 return;
222
223 // output file opened in "update" mode: must check whether output TTree is already present in file
224 std::unique_ptr<TFile> outFile{TFile::Open(fileName.c_str(), "update")};
225 if (!outFile || outFile->IsZombie())
226 throw std::invalid_argument("Snapshot: cannot open file \"" + fileName + "\" in update mode");
227
228 TObject *outTree = outFile->Get(treeName.c_str());
229 if (outTree == nullptr)
230 return;
231
232 // object called treeName is already present in the file
233 if (opts.fOverwriteIfExists) {
234 if (outTree->InheritsFrom("TTree")) {
235 static_cast<TTree *>(outTree)->Delete("all");
236 } else {
237 outFile->Delete(treeName.c_str());
238 }
239 } else {
240 const std::string msg = "Snapshot: tree \"" + treeName + "\" already present in file \"" + fileName +
241 "\". If you want to delete the original tree and write another, please set "
242 "RSnapshotOptions::fOverwriteIfExists to true.";
243 throw std::invalid_argument(msg);
244 }
245}
246
248 const std::string &fileName)
249{
250 TString fileMode = opts.fMode;
251 fileMode.ToLower();
252 if (fileMode != "update")
253 return;
254
255 // output file opened in "update" mode: must check whether output RNTuple is already present in file
256 std::unique_ptr<TFile> outFile{TFile::Open(fileName.c_str(), "update")};
257 if (!outFile || outFile->IsZombie())
258 throw std::invalid_argument("Snapshot: cannot open file \"" + fileName + "\" in update mode");
259
260 auto *outNTuple = outFile->Get<ROOT::RNTuple>(ntupleName.c_str());
261
262 if (outNTuple) {
263 if (opts.fOverwriteIfExists) {
264 outFile->Delete((ntupleName + ";*").c_str());
265 return;
266 } else {
267 const std::string msg = "Snapshot: RNTuple \"" + ntupleName + "\" already present in file \"" + fileName +
268 "\". If you want to delete the original ntuple and write another, please set "
269 "the 'fOverwriteIfExists' option to true in RSnapshotOptions.";
270 throw std::invalid_argument(msg);
271 }
272 }
273
274 // Also check if there is any object other than an RNTuple with the provided ntupleName.
275 TObject *outObj = outFile->Get(ntupleName.c_str());
276
277 if (!outObj)
278 return;
279
280 // An object called ntupleName is already present in the file.
281 if (opts.fOverwriteIfExists) {
282 if (auto tree = dynamic_cast<TTree *>(outObj)) {
283 tree->Delete("all");
284 } else {
285 outFile->Delete((ntupleName + ";*").c_str());
286 }
287 } else {
288 const std::string msg = "Snapshot: object \"" + ntupleName + "\" already present in file \"" + fileName +
289 "\". If you want to delete the original object and write a new RNTuple, please set "
290 "the 'fOverwriteIfExists' option to true in RSnapshotOptions.";
291 throw std::invalid_argument(msg);
292 }
293}
294
295} // end NS RDF
296} // end NS Internal
297} // end NS ROOT
298
299namespace {
300void CreateCStyleArrayBranch(TTree *inputTree, TTree &outputTree, ROOT::Internal::RDF::RBranchSet &outputBranches,
301 const std::string &inputBranchName, const std::string &outputBranchName, int basketSize)
302{
303 TBranch *inputBranch = nullptr;
304 if (inputTree) {
305 inputBranch = inputTree->GetBranch(inputBranchName.c_str());
306 if (!inputBranch) // try harder
307 inputBranch = inputTree->FindBranch(inputBranchName.c_str());
308 }
309 if (!inputBranch)
310 return;
311 const auto STLKind = TClassEdit::IsSTLCont(inputBranch->GetClassName());
312 if (STLKind == ROOT::ESTLType::kSTLvector || STLKind == ROOT::ESTLType::kROOTRVec)
313 return;
314 // must construct the leaflist for the output branch and create the branch in the output tree
315 const auto *leaf = static_cast<TLeaf *>(inputBranch->GetListOfLeaves()->UncheckedAt(0));
316 if (!leaf)
317 return;
318 const auto bname = leaf->GetName();
319 auto *sizeLeaf = leaf->GetLeafCount();
320 const auto sizeLeafName = sizeLeaf ? std::string(sizeLeaf->GetName()) : std::to_string(leaf->GetLenStatic());
321
322 // We proceed only if branch is a fixed-or-variable-sized array
323 if (sizeLeaf || leaf->GetLenStatic() > 1) {
324 if (sizeLeaf && !outputBranches.Get(sizeLeafName)) {
325 // The output array branch `bname` has dynamic size stored in leaf `sizeLeafName`, but that leaf has not been
326 // added to the output tree yet. However, the size leaf has to be available for the creation of the array
327 // branch to be successful. So we create the size leaf here.
329 // Use Original basket size for Existing Branches otherwise use Custom basket Size.
330 const auto bufSize = (basketSize > 0) ? basketSize : sizeLeaf->GetBranch()->GetBasketSize();
331 auto *outputBranch = outputTree.Branch(sizeLeafName.c_str(), static_cast<void *>(nullptr),
332 (sizeLeafName + '/' + sizeTypeStr).c_str(), bufSize);
334 }
335
336 const auto btype = leaf->GetTypeName();
338 if (rootbtype == ' ') {
339 Warning("Snapshot",
340 "RDataFrame::Snapshot: could not correctly construct a leaflist for C-style array in column %s. The "
341 "leaf is of type '%s'. This column will not be written out.",
342 bname, btype);
343 return;
344 }
345
346 const auto leaflist = std::string(bname) + "[" + sizeLeafName + "]/" + rootbtype;
347 // Use original basket size for existing branches and new basket size for new branches
348 const auto bufSize = (basketSize > 0) ? basketSize : inputBranch->GetBasketSize();
349 auto *outputBranch =
350 outputTree.Branch(outputBranchName.c_str(), static_cast<void *>(nullptr), leaflist.c_str(), bufSize);
351 outputBranch->SetTitle(inputBranch->GetTitle());
353 }
354}
355} // namespace
356
357void ROOT::Internal::RDF::SetEmptyBranchesHelper(TTree *inputTree, TTree &outputTree,
358 ROOT::Internal::RDF::RBranchSet &outputBranches,
359 const std::string &inputBranchName,
360 const std::string &outputBranchName, const std::type_info &typeInfo,
361 int basketSize)
362{
363 const auto bufSize = (basketSize > 0) ? basketSize : 32000;
365 if (!classPtr) {
366 // Case of a leaflist of fundamental type, logic taken from
367 // TTree::BranchImpRef(const char* branchname, TClass* ptrClass, EDataType datatype, void* addobj, Int_t bufsize,
368 // Int_t splitlevel)
371 if (rootTypeChar == ' ') {
372 Warning(
373 "Snapshot",
374 "RDataFrame::Snapshot: could not correctly construct a leaflist for fundamental type in column %s. This "
375 "column will not be written out.",
376 outputBranchName.c_str());
377 return;
378 }
379 std::string leafList{outputBranchName + '/' + rootTypeChar};
380 auto *outputBranch =
381 outputTree.Branch(outputBranchName.c_str(), static_cast<void *>(nullptr), leafList.c_str(), bufSize);
383 return;
384 }
385
386 // Find if there is an input branch, check for cases where we need a leaflist (e.g. C-style arrays)
388
389 // General case
391 auto *outputBranch = outputTree.Branch(outputBranchName.c_str(), classPtr->GetName(), nullptr, bufSize);
393 }
394}
#define c(i)
Definition RSha256.hxx:101
#define h(i)
Definition RSha256.hxx:106
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
unsigned long long ULong64_t
Definition RtypesCore.h:70
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
void Warning(const char *location, const char *msgfmt,...)
Use this function in warning situations.
Definition TError.cxx:229
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Representation of an RNTuple data set in a ROOT file.
Definition RNTuple.hxx:65
A TTree is a list of TBranches.
Definition TBranch.h:93
static TClass * GetClass(const char *name, Bool_t load=kTRUE, Bool_t silent=kFALSE)
Static method returning pointer to TClass of the specified class name.
Definition TClass.cxx:3074
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
Definition TFile.cxx:4131
A TLeaf describes individual elements of a TBranch See TBranch structure in TTree.
Definition TLeaf.h:57
Mother of all ROOT objects.
Definition TObject.h:41
Basic string class.
Definition TString.h:139
A TTree represents a columnar dataset.
Definition TTree.h:84
Double_t y[n]
Definition legend1.C:17
char TypeName2ROOTTypeName(const std::string &b)
Convert type name (e.g.
Definition RDFUtils.cxx:263
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
Definition RDFUtils.cxx:123
constexpr std::size_t CacheLineStep()
Stepping through CacheLineStep<T> values in a vector<T> brings you to a new cache line.
Definition Utils.hxx:224
void EnsureValidSnapshotRNTupleOutput(const RSnapshotOptions &opts, const std::string &ntupleName, const std::string &fileName)
void EnsureValidSnapshotTTreeOutput(const RSnapshotOptions &opts, const std::string &treeName, const std::string &fileName)
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
@ kROOTRVec
Definition ESTLType.h:46
@ kSTLvector
Definition ESTLType.h:30
ROOT::ESTLType STLKind(std::string_view type)
Converts STL container name to number.
ROOT::ESTLType IsSTLCont(std::string_view type)
type : type name: vector<list<classA,allocator>,allocator> result: 0 : not stl container code of cont...
A collection of options to steer the creation of the dataset on file.
TMarker m
Definition textangle.C:8