Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RTTreeDS.cxx
Go to the documentation of this file.
1#include <sstream>
2
3#include <ROOT/InternalTreeUtils.hxx> // GetTopLevelBranchNames
4#include <ROOT/RDataFrame.hxx>
6#include <ROOT/RTTreeDS.hxx>
7#include <ROOT/RDF/RLoopManager.hxx> // GetBranchNames
9#include <ROOT/RDF/Utils.hxx> // GetBranchOrLeafTypeName
10
11#include <TClassEdit.h>
12#include <TFile.h>
13#include <TTree.h>
14#include <TTreeReader.h>
15
16#ifdef R__USE_IMT
17#include <TROOT.h>
18#include <TEntryList.h>
20#include <ROOT/RSlotStack.hxx>
21#endif
22
23namespace {
25{
26 switch (entryStatus) {
27 case TTreeReader::kEntryValid: return true;
28 case TTreeReader::kIndexedFriendNoMatch: return true;
30 default: return false;
31 }
32}
33
34std::tuple<bool, std::string, ROOT::Internal::RDF::RTreeUntypedArrayColumnReader::ECollectionType>
35GetCollectionInfo(const std::string &typeName)
36{
37 const auto beginType = typeName.substr(0, typeName.find_first_of('<') + 1);
38
39 // Find TYPE from ROOT::RVec<TYPE>
40 if (auto pos = beginType.find("RVec<"); pos != std::string::npos) {
41 const auto begin = typeName.find_first_of('<', pos) + 1;
42 const auto end = typeName.find_last_of('>');
43 const auto innerTypeName = typeName.substr(begin, end - begin);
44 if (innerTypeName.find("bool") != std::string::npos)
46 else
48 }
49
50 // Find TYPE from std::array<TYPE,N>
51 if (auto pos = beginType.find("array<"); pos != std::string::npos) {
52 const auto begin = typeName.find_first_of('<', pos) + 1;
53 const auto end = typeName.find_last_of('>');
54 const auto arrTemplArgs = typeName.substr(begin, end - begin);
55 const auto lastComma = arrTemplArgs.find_last_of(',');
56 return {true, arrTemplArgs.substr(0, lastComma),
58 }
59
61}
62} // namespace
63
64// Destructor is defined here, where the data member types are actually available
66
68{
69 fTree = tree;
70
71 if (friendInfo) {
73 for (std::size_t i = 0ul; i < fFriends.size(); i++) {
74 const auto &thisFriendAlias = friendInfo->fFriendNames[i].second;
75 fTree->AddFriend(fFriends[i].get(), thisFriendAlias.c_str());
76 }
77 }
78
83 if (fTopLevelBranchNames.empty())
85}
86
87ROOT::Internal::RDF::RTTreeDS::RTTreeDS(std::shared_ptr<TTree> tree)
88{
89 assert(tree && "No tree passed to the constructor of RTTreeDS!");
90 Setup(std::move(tree));
91}
92
94{
95 assert(tree && "No tree passed to the constructor of RTTreeDS!");
96 Setup(std::move(tree), &friendInfo);
97}
98
100{
101 if (!dirPtr) {
102 throw std::runtime_error("RDataFrame: invalid TDirectory when constructing the data source.");
103 }
104 const std::string treeNameInt(treeName);
105 auto tree = dirPtr->Get<TTree>(treeName.data());
106 if (!tree) {
107 throw std::runtime_error("RDataFrame: TTree dataset '" + std::string(treeName) + "' cannot be found in '" +
108 dirPtr->GetName() + "'.");
109 }
111}
112
114{
115 std::string treeNameInt{treeName};
116 std::string fileNameGlobInt{fileNameGlob};
118 chain->Add(fileNameGlobInt.c_str());
119
120 Setup(std::move(chain));
121}
122
123ROOT::Internal::RDF::RTTreeDS::RTTreeDS(std::string_view treeName, const std::vector<std::string> &fileNameGlobs)
124{
125 std::string treeNameInt(treeName);
127 for (auto &&f : fileNameGlobs)
128 chain->Add(f.c_str());
129
130 Setup(std::move(chain));
131}
132
134{
135 return ROOT::RDataFrame(std::make_unique<ROOT::Internal::RDF::RTTreeDS>(treeName, fileNameGlob));
136}
137
139ROOT::Internal::RDF::FromTTree(std::string_view treeName, const std::vector<std::string> &fileNameGlobs)
140{
141 return ROOT::RDataFrame(std::make_unique<ROOT::Internal::RDF::RTTreeDS>(treeName, fileNameGlobs));
142}
143
145 const std::unordered_map<std::string, ROOT::RDF::Experimental::RSample *> &sampleMap) const
146{
147 // one GetTree to retrieve the TChain, another to retrieve the underlying TTree
148 auto *tree = fTreeReader->GetTree()->GetTree();
149 // tree might be missing e.g. when a file in a chain does not exist
150 if (!tree)
151 return ROOT::RDF::RSampleInfo{};
152
153 const std::string treename = ROOT::Internal::TreeUtils::GetTreeFullPaths(*tree)[0];
154 auto *file = tree->GetCurrentFile();
155 const std::string fname = file != nullptr ? file->GetName() : "#inmemorytree#";
156
157 std::pair<Long64_t, Long64_t> range = fTreeReader->GetEntriesRange();
158 R__ASSERT(range.first >= 0);
159 if (range.second == -1) {
160 range.second = tree->GetEntries(); // convert '-1', i.e. 'until the end', to the actual entry number
161 }
162 // If the tree is stored in a subdirectory, treename will be the full path to it starting with the root directory '/'
163 const std::string &id = fname + (treename.rfind('/', 0) == 0 ? "" : "/") + treename;
164 if (sampleMap.empty()) {
165 return RSampleInfo(id, range);
166 } else {
167 if (sampleMap.find(id) == sampleMap.end())
168 throw std::runtime_error("Full sample identifier '" + id + "' cannot be found in the available samples.");
169 return RSampleInfo(id, range, sampleMap.at(id));
170 }
171}
172
174{
175#ifdef R__USE_IMT
177 std::atomic<ULong64_t> entryCount(0ull);
178
179 const auto &entryList = fTree->GetEntryList() ? *fTree->GetEntryList() : TEntryList();
180 const auto &suppressErrorsForMissingBranches = lm.GetSuppressErrorsForMissingBranches();
181 auto tp{fGlobalEntryRange.has_value()
182 ? std::make_unique<ROOT::TTreeProcessorMT>(*fTree, fNSlots, fGlobalEntryRange.value(),
184 : std::make_unique<ROOT::TTreeProcessorMT>(*fTree, entryList, fNSlots, suppressErrorsForMissingBranches)};
185
186 tp->Process([&lm, &slotStack, &entryCount](TTreeReader &treeReader) {
187 lm.TTreeThreadTask(treeReader, slotStack, entryCount);
188 });
189
190 if (fGlobalEntryRange.has_value()) {
191 auto &&[begin, end] = fGlobalEntryRange.value();
192 auto &&processedEntries = entryCount.load();
193 if ((end - begin) > processedEntries) {
194 Warning("RDataFrame::Run",
195 "RDataFrame stopped processing after %lld entries, whereas an entry range (begin=%lld,end=%lld) was "
196 "requested. Consider adjusting the end value of the entry range to a maximum of %lld.",
197 processedEntries, begin, end, begin + processedEntries);
198 }
199 }
200#else
201 (void)lm;
202#endif
203}
204
206{
207 assert(fTree && "The internal TTree is not available, something went wrong.");
208 if (dynamic_cast<TChain *>(fTree.get()))
210
211 return fTree->GetCurrentFile() ? 1 : 0;
212}
213
215{
216 assert(fTree && "The internal TTree is not available, something went wrong.");
217 const auto treeName = fTree->GetName();
218 const auto isTChain = dynamic_cast<TChain *>(fTree.get()) ? true : false;
219 const auto treeType = isTChain ? "TChain" : "TTree";
220 const auto isInMemory = !isTChain && !fTree->GetCurrentFile() ? true : false;
222 const auto hasFriends = friendInfo.fFriendNames.empty() ? false : true;
223 std::stringstream ss;
224 ss << "Dataframe from " << treeType;
225 if (*treeName != 0) {
226 ss << " " << treeName;
227 }
228 if (isInMemory) {
229 ss << " (in-memory)";
230 } else {
232 const auto numFiles = files.size();
233 if (numFiles == 1) {
234 ss << " in file " << files[0];
235 } else {
236 ss << " in files\n";
237 for (auto i = 0u; i < numFiles; i++) {
238 ss << " " << files[i];
239 if (i < numFiles - 1)
240 ss << '\n';
241 }
242 }
243 }
244 if (hasFriends) {
245 const auto numFriends = friendInfo.fFriendNames.size();
246 if (numFriends == 1) {
247 ss << "\nwith friend\n";
248 } else {
249 ss << "\nwith friends\n";
250 }
251 for (auto i = 0u; i < numFriends; i++) {
252 const auto nameAlias = friendInfo.fFriendNames[i];
253 const auto files = friendInfo.fFriendFileNames[i];
254 const auto numFiles = files.size();
255 const auto subnames = friendInfo.fFriendChainSubNames[i];
256 ss << " " << nameAlias.first;
257 if (nameAlias.first != nameAlias.second)
258 ss << " (" << nameAlias.second << ")";
259 // case: TTree as friend
260 if (numFiles == 1) {
261 ss << " " << files[0];
262 }
263 // case: TChain as friend
264 else {
265 ss << '\n';
266 for (auto j = 0u; j < numFiles; j++) {
267 ss << " " << subnames[j] << " " << files[j];
268 if (j < numFiles - 1)
269 ss << '\n';
270 }
271 }
272 if (i < numFriends - 1)
273 ss << '\n';
274 }
275 }
276 return ss.str();
277}
278
279std::unique_ptr<ROOT::Detail::RDF::RColumnReaderBase>
280ROOT::Internal::RDF::RTTreeDS::CreateColumnReader(unsigned int /*slot*/, std::string_view col, const std::type_info &ti,
282{
283 // In a single thread run, use the TTreeReader data member.
284 if (fTreeReader) {
285 treeReader = fTreeReader.get();
286 }
287
288 // The TTreeReader might still not be available if CreateColumnReader was called before the start of the computation
289 // graph execution, e.g. in AddDSColumns.
290 if (!treeReader)
291 return nullptr;
292
293 if (ti == typeid(void))
294 return std::make_unique<ROOT::Internal::RDF::RTreeOpaqueColumnReader>(*treeReader, col);
295
296 const auto typeName = ROOT::Internal::RDF::TypeID2TypeName(ti);
297 if (auto &&[toConvert, innerTypeName, collType] = GetCollectionInfo(typeName); toConvert)
298 return std::make_unique<ROOT::Internal::RDF::RTreeUntypedArrayColumnReader>(*treeReader, col, innerTypeName,
299 collType);
300 else
301 return std::make_unique<ROOT::Internal::RDF::RTreeUntypedValueColumnReader>(*treeReader, col, typeName);
302}
303
305{
306 // The first entry of each tree in a chain is read in GetEntryRanges, we avoid repeating it here
307 if (fTreeReader->GetCurrentEntry() != static_cast<Long64_t>(entry))
308 fTreeReader->SetEntry(entry);
309 return ValidRead(fTreeReader->GetEntryStatus());
310}
311
313{
316 std::vector<std::string> split;
317 int dummy;
318 TClassEdit::GetSplit(colTypeName.c_str(), split, dummy);
319 auto &valueType = split[1];
320 colTypeName = "ROOT::VecOps::RVec<" + valueType + ">";
321 }
322 return colTypeName;
323}
324
325std::string ROOT::Internal::RDF::RTTreeDS::GetTypeName(std::string_view colName) const
326{
329 std::vector<std::string> split;
330 int dummy;
331 TClassEdit::GetSplit(colTypeName.c_str(), split, dummy);
332 auto &valueType = split[1];
333 colTypeName = "ROOT::VecOps::RVec<" + valueType + ">";
334 }
335 return colTypeName;
336}
337
338std::vector<std::pair<ULong64_t, ULong64_t>> ROOT::Internal::RDF::RTTreeDS::GetTTreeEntryRange(TTree &tree)
339{
340 // Restrict the range to the global range if available
341 const ULong64_t rangeBegin = fGlobalEntryRange.has_value() ? std::max(0ull, fGlobalEntryRange->first) : 0ull;
342 const ULong64_t rangeEnd = fGlobalEntryRange.has_value()
343 ? std::min(static_cast<ULong64_t>(tree.GetEntries()), fGlobalEntryRange->second)
344 : static_cast<ULong64_t>(tree.GetEntries());
345 return std::vector<std::pair<ULong64_t, ULong64_t>>{{rangeBegin, rangeEnd}};
346}
347
348std::vector<std::pair<ULong64_t, ULong64_t>> ROOT::Internal::RDF::RTTreeDS::GetTChainEntryRange(TChain &chain)
349{
350 // We are either at a complete new beginning (entry == -1) or at the
351 // end of processing of the previous tree in the chain. Go to the next
352 // entry, which should always be the first entry in a tree. This allows
353 // to get the proper tree offset for the range.
354 fTreeReader->Next();
355 if (!ValidRead(fTreeReader->GetEntryStatus()))
356 return {};
357 auto treeOffsets = chain.GetTreeOffset();
358 auto treeNumber = chain.GetTreeNumber();
361 // Restrict the range to the global range if available
362 const ULong64_t rangeBegin =
363 fGlobalEntryRange.has_value() ? std::max(thisTreeBegin, fGlobalEntryRange->first) : thisTreeBegin;
364 const ULong64_t rangeEnd =
365 fGlobalEntryRange.has_value() ? std::min(thisTreeEnd, fGlobalEntryRange->second) : thisTreeEnd;
366 return std::vector<std::pair<ULong64_t, ULong64_t>>{{rangeBegin, rangeEnd}};
367}
368
369std::vector<std::pair<ULong64_t, ULong64_t>> ROOT::Internal::RDF::RTTreeDS::GetEntryRanges()
370{
371 assert(fTreeReader && "TTreeReader is not available, this should never happen.");
372 auto treeOrChain = fTreeReader->GetTree();
373 assert(treeOrChain && "Could not retrieve TTree from TTreeReader, something went wrong.");
374
375 // End of dataset or entry range
376 if (fTreeReader->GetCurrentEntry() >= treeOrChain->GetEntriesFast() - 1 ||
377 (fGlobalEntryRange.has_value() &&
378 (static_cast<ULong64_t>(fTreeReader->GetCurrentEntry()) >= fGlobalEntryRange->first &&
379 static_cast<ULong64_t>(fTreeReader->GetCurrentEntry()) == fGlobalEntryRange->second - 1))) {
380 // Place the TTreeReader beyond the end of the dataset, so RunFinalChecks can work properly
381 fTreeReader->Next();
382 return {};
383 }
384
385 if (auto chain = dynamic_cast<TChain *>(treeOrChain)) {
386 return GetTChainEntryRange(*chain);
387 } else {
388 return GetTTreeEntryRange(*treeOrChain);
389 }
390}
391
393{
394 // At the end of the event loop, reset the TTreeReader to be ready for
395 // a possible new run.
396 if (fTreeReader)
397 fTreeReader.reset();
398}
399
401{
402 if (fNSlots == 1) {
403 assert(!fTreeReader);
404 fTreeReader = std::make_unique<TTreeReader>(fTree.get(), fTree->GetEntryList(), /*warnAboutLongerFriends*/ true);
405 if (fGlobalEntryRange.has_value() && fGlobalEntryRange->first <= std::numeric_limits<Long64_t>::max() &&
406 fGlobalEntryRange->second <= std::numeric_limits<Long64_t>::max() && fTreeReader &&
407 fTreeReader->SetEntriesRange(fGlobalEntryRange->first, fGlobalEntryRange->second) !=
409 throw std::logic_error("Something went wrong in initializing the TTreeReader.");
410 }
411 }
412}
413
415{
416 Initialize();
417 if (fTreeReader)
418 fTreeReader->SetSuppressErrorsForMissingBranches(suppressErrorsForMissingBranches);
419}
420
422{
423 if (fTreeReader->GetEntryStatus() != TTreeReader::kEntryBeyondEnd && nodesLeftNotRun) {
424 // something went wrong in the TTreeReader event loop
425 throw std::runtime_error("An error was encountered while processing the data. TTreeReader status code is: " +
426 std::to_string(fTreeReader->GetEntryStatus()));
427 }
428}
429
431{
432 assert(fTree);
433 return fTree.get();
434}
#define f(i)
Definition RSha256.hxx:104
unsigned long long ULong64_t
Definition RtypesCore.h:70
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Definition TError.h:125
void Warning(const char *location, const char *msgfmt,...)
Use this function in warning situations.
Definition TError.cxx:229
The head node of a RDF computation graph.
std::vector< std::pair< ULong64_t, ULong64_t > > GetTTreeEntryRange(TTree &tree)
Definition RTTreeDS.cxx:338
std::vector< std::string > fBranchNamesWithoutDuplicates
Definition RTTreeDS.hxx:60
std::size_t GetNFiles() const final
Returns the number of files from which the dataset is constructed.
Definition RTTreeDS.cxx:205
std::string DescribeDataset() final
Definition RTTreeDS.cxx:214
void Setup(std::shared_ptr< TTree > &&tree, const ROOT::TreeUtils::RFriendInfo *friendInfo=nullptr)
Definition RTTreeDS.cxx:67
std::vector< std::unique_ptr< TChain > > fFriends
Definition RTTreeDS.hxx:67
void Initialize() final
Convenience method called before starting an event-loop.
Definition RTTreeDS.cxx:400
void Finalize() final
Convenience method called after concluding an event-loop.
Definition RTTreeDS.cxx:392
bool SetEntry(unsigned int, ULong64_t entry) final
Advance the "cursors" returned by GetColumnReaders to the selected entry for a particular slot.
Definition RTTreeDS.cxx:304
void InitializeWithOpts(const std::set< std::string > &suppressErrorsForMissingBranches) final
Definition RTTreeDS.cxx:414
std::vector< std::string > fTopLevelBranchNames
Definition RTTreeDS.hxx:61
std::vector< std::pair< ULong64_t, ULong64_t > > GetTChainEntryRange(TChain &chain)
Definition RTTreeDS.cxx:348
std::vector< std::string > fBranchNamesWithDuplicates
Definition RTTreeDS.hxx:59
RTTreeDS(std::shared_ptr< TTree > tree)
Definition RTTreeDS.cxx:87
std::vector< std::pair< ULong64_t, ULong64_t > > GetEntryRanges() final
Return ranges of entries to distribute to tasks.
Definition RTTreeDS.cxx:369
std::shared_ptr< TTree > fTree
Definition RTTreeDS.hxx:63
std::string GetTypeName(std::string_view colName) const final
Type of a column as a string, e.g.
Definition RTTreeDS.cxx:325
A thread-safe list of N indexes (0 to size - 1).
friend std::string ROOT::Internal::RDF::GetTypeNameWithOpts(const RDataSource &, std::string_view, bool)
friend ROOT::RDF::RSampleInfo ROOT::Internal::RDF::CreateSampleInfo(const ROOT::RDF::RDataSource &, const std::unordered_map< std::string, ROOT::RDF::Experimental::RSample * > &)
friend std::unique_ptr< ROOT::Detail::RDF::RColumnReaderBase > ROOT::Internal::RDF::CreateColumnReader(ROOT::RDF::RDataSource &, unsigned int, std::string_view, const std::type_info &, TTreeReader *)
friend void ROOT::Internal::RDF::ProcessMT(RDataSource &, ROOT::Detail::RDF::RLoopManager &)
friend void ROOT::Internal::RDF::RunFinalChecks(const ROOT::RDF::RDataSource &, bool)
This type represents a sample identifier, to be used in conjunction with RDataFrame features such as ...
ROOT's RDataFrame offers a modern, high-level interface for analysis of data stored in TTree ,...
const_iterator end() const
A chain is a collection of files containing TTree objects.
Definition TChain.h:33
Describe directory structure in memory.
Definition TDirectory.h:45
A List of entry numbers in a TTree or TChain.
Definition TEntryList.h:26
A simple, robust and fast interface to read values from ROOT columnar datasets such as TTree,...
Definition TTreeReader.h:46
@ kIndexedFriendNoMatch
A friend with TTreeIndex doesn't have an entry for this index.
@ kMissingBranchWhenSwitchingTree
A branch was not found when switching to the next TTree in the chain.
@ kEntryBeyondEnd
last entry loop has reached its end
@ kEntryValid
data read okay
A TTree represents a columnar dataset.
Definition TTree.h:84
auto MakeAliasedSharedPtr(T *rawPtr)
Definition Utils.hxx:324
std::vector< std::string > GetBranchNames(TTree &t, bool allowDuplicates=true)
Get all the branches names, including the ones of the friend trees.
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
Definition RDFUtils.cxx:123
ROOT::RDataFrame FromTTree(std::string_view treeName, std::string_view fileNameGlob)
Definition RTTreeDS.cxx:133
std::string GetBranchOrLeafTypeName(TTree &t, const std::string &colName)
Return the typename of object colName stored in t, if any.
Definition RDFUtils.cxx:176
std::vector< std::string > GetTreeFullPaths(const TTree &tree)
std::vector< std::string > GetTopLevelBranchNames(TTree &t)
Get all the top-level branches names, including the ones of the friend trees.
std::unique_ptr< TChain > MakeChainForMT(const std::string &name="", const std::string &title="")
Create a TChain object with options that avoid common causes of thread contention.
std::vector< std::unique_ptr< TChain > > MakeFriends(const ROOT::TreeUtils::RFriendInfo &finfo)
Create friends from the main TTree.
ROOT::TreeUtils::RFriendInfo GetFriendInfo(const TTree &tree, bool retrieveEntries=false)
std::vector< std::string > GetFileNamesFromTree(const TTree &tree)
@ kSTLvector
Definition ESTLType.h:30
ROOT::ESTLType IsSTLCont(std::string_view type)
type : type name: vector<list<classA,allocator>,allocator> result: 0 : not stl container code of cont...
int GetSplit(const char *type, std::vector< std::string > &output, int &nestedLoc, EModType mode=TClassEdit::kNone)
Stores in output (after emptying it) the split type.
Information about friend trees of a certain TTree or TChain object.