30 default:
return false;
34std::tuple<bool, std::string, ROOT::Internal::RDF::RTreeUntypedArrayColumnReader::ECollectionType>
35GetCollectionInfo(
const std::string &typeName)
37 const auto beginType = typeName.substr(0, typeName.find_first_of(
'<') + 1);
40 if (
auto pos =
beginType.find(
"RVec<"); pos != std::string::npos) {
41 const auto begin = typeName.find_first_of(
'<', pos) + 1;
42 const auto end = typeName.find_last_of(
'>');
43 const auto innerTypeName = typeName.substr(begin, end - begin);
51 if (
auto pos =
beginType.find(
"array<"); pos != std::string::npos) {
52 const auto begin = typeName.find_first_of(
'<', pos) + 1;
53 const auto end = typeName.find_last_of(
'>');
54 const auto arrTemplArgs = typeName.substr(begin, end - begin);
73 for (std::size_t i = 0
ul; i <
fFriends.size(); i++) {
89 assert(tree &&
"No tree passed to the constructor of RTTreeDS!");
90 Setup(std::move(tree));
95 assert(tree &&
"No tree passed to the constructor of RTTreeDS!");
102 throw std::runtime_error(
"RDataFrame: invalid TDirectory when constructing the data source.");
107 throw std::runtime_error(
"RDataFrame: TTree dataset '" + std::string(
treeName) +
"' cannot be found in '" +
108 dirPtr->GetName() +
"'.");
120 Setup(std::move(
chain));
130 Setup(std::move(
chain));
145 const std::unordered_map<std::string, ROOT::RDF::Experimental::RSample *> &
sampleMap)
const
148 auto *tree = fTreeReader->GetTree()->GetTree();
154 auto *file = tree->GetCurrentFile();
155 const std::string
fname = file !=
nullptr ? file->GetName() :
"#inmemorytree#";
157 std::pair<Long64_t, Long64_t>
range = fTreeReader->GetEntriesRange();
159 if (
range.second == -1) {
160 range.second = tree->GetEntries();
168 throw std::runtime_error(
"Full sample identifier '" +
id +
"' cannot be found in the available samples.");
181 auto tp{fGlobalEntryRange.has_value()
182 ? std::make_unique<ROOT::TTreeProcessorMT>(*fTree, fNSlots, fGlobalEntryRange.value(),
190 if (fGlobalEntryRange.has_value()) {
191 auto &&[begin, end] = fGlobalEntryRange.value();
195 "RDataFrame stopped processing after %lld entries, whereas an entry range (begin=%lld,end=%lld) was "
196 "requested. Consider adjusting the end value of the entry range to a maximum of %lld.",
207 assert(fTree &&
"The internal TTree is not available, something went wrong.");
208 if (
dynamic_cast<TChain *
>(fTree.get()))
211 return fTree->GetCurrentFile() ? 1 : 0;
216 assert(fTree &&
"The internal TTree is not available, something went wrong.");
217 const auto treeName = fTree->GetName();
218 const auto isTChain =
dynamic_cast<TChain *
>(fTree.get()) ?
true :
false;
223 std::stringstream
ss;
229 ss <<
" (in-memory)";
247 ss <<
"\nwith friend\n";
249 ss <<
"\nwith friends\n";
279std::unique_ptr<ROOT::Detail::RDF::RColumnReaderBase>
293 if (
ti ==
typeid(
void))
294 return std::make_unique<ROOT::Internal::RDF::RTreeOpaqueColumnReader>(*
treeReader, col);
301 return std::make_unique<ROOT::Internal::RDF::RTreeUntypedValueColumnReader>(*
treeReader, col, typeName);
307 if (fTreeReader->GetCurrentEntry() !=
static_cast<Long64_t>(
entry))
308 fTreeReader->SetEntry(
entry);
309 return ValidRead(fTreeReader->GetEntryStatus());
316 std::vector<std::string> split;
329 std::vector<std::string> split;
341 const ULong64_t rangeBegin = fGlobalEntryRange.has_value() ? std::max(0ull, fGlobalEntryRange->first) : 0ull;
343 ? std::min(
static_cast<ULong64_t>(tree.GetEntries()), fGlobalEntryRange->second)
344 :
static_cast<ULong64_t>(tree.GetEntries());
355 if (!
ValidRead(fTreeReader->GetEntryStatus()))
371 assert(fTreeReader &&
"TTreeReader is not available, this should never happen.");
373 assert(
treeOrChain &&
"Could not retrieve TTree from TTreeReader, something went wrong.");
376 if (fTreeReader->GetCurrentEntry() >=
treeOrChain->GetEntriesFast() - 1 ||
377 (fGlobalEntryRange.has_value() &&
378 (
static_cast<ULong64_t>(fTreeReader->GetCurrentEntry()) >= fGlobalEntryRange->first &&
379 static_cast<ULong64_t>(fTreeReader->GetCurrentEntry()) == fGlobalEntryRange->second - 1))) {
386 return GetTChainEntryRange(*
chain);
404 fTreeReader = std::make_unique<TTreeReader>(fTree.get(), fTree->GetEntryList(),
true);
405 if (fGlobalEntryRange.has_value() && fGlobalEntryRange->first <= std::numeric_limits<Long64_t>::max() &&
406 fGlobalEntryRange->second <= std::numeric_limits<Long64_t>::max() && fTreeReader &&
407 fTreeReader->SetEntriesRange(fGlobalEntryRange->first, fGlobalEntryRange->second) !=
409 throw std::logic_error(
"Something went wrong in initializing the TTreeReader.");
425 throw std::runtime_error(
"An error was encountered while processing the data. TTreeReader status code is: " +
426 std::to_string(fTreeReader->GetEntryStatus()));
unsigned long long ULong64_t
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
void Warning(const char *location, const char *msgfmt,...)
Use this function in warning situations.
The head node of a RDF computation graph.
std::vector< std::pair< ULong64_t, ULong64_t > > GetTTreeEntryRange(TTree &tree)
std::vector< std::string > fBranchNamesWithoutDuplicates
std::size_t GetNFiles() const final
Returns the number of files from which the dataset is constructed.
std::string DescribeDataset() final
void Setup(std::shared_ptr< TTree > &&tree, const ROOT::TreeUtils::RFriendInfo *friendInfo=nullptr)
std::vector< std::unique_ptr< TChain > > fFriends
void Initialize() final
Convenience method called before starting an event-loop.
void Finalize() final
Convenience method called after concluding an event-loop.
bool SetEntry(unsigned int, ULong64_t entry) final
Advance the "cursors" returned by GetColumnReaders to the selected entry for a particular slot.
void InitializeWithOpts(const std::set< std::string > &suppressErrorsForMissingBranches) final
std::vector< std::string > fTopLevelBranchNames
std::vector< std::pair< ULong64_t, ULong64_t > > GetTChainEntryRange(TChain &chain)
std::vector< std::string > fBranchNamesWithDuplicates
RTTreeDS(std::shared_ptr< TTree > tree)
std::vector< std::pair< ULong64_t, ULong64_t > > GetEntryRanges() final
Return ranges of entries to distribute to tasks.
std::shared_ptr< TTree > fTree
std::string GetTypeName(std::string_view colName) const final
Type of a column as a string, e.g.
A thread-safe list of N indexes (0 to size - 1).
friend std::string ROOT::Internal::RDF::GetTypeNameWithOpts(const RDataSource &, std::string_view, bool)
friend ROOT::RDF::RSampleInfo ROOT::Internal::RDF::CreateSampleInfo(const ROOT::RDF::RDataSource &, const std::unordered_map< std::string, ROOT::RDF::Experimental::RSample * > &)
friend std::unique_ptr< ROOT::Detail::RDF::RColumnReaderBase > ROOT::Internal::RDF::CreateColumnReader(ROOT::RDF::RDataSource &, unsigned int, std::string_view, const std::type_info &, TTreeReader *)
friend void ROOT::Internal::RDF::ProcessMT(RDataSource &, ROOT::Detail::RDF::RLoopManager &)
friend void ROOT::Internal::RDF::RunFinalChecks(const ROOT::RDF::RDataSource &, bool)
This type represents a sample identifier, to be used in conjunction with RDataFrame features such as ...
ROOT's RDataFrame offers a modern, high-level interface for analysis of data stored in TTree ,...
const_iterator end() const
A chain is a collection of files containing TTree objects.
Describe directory structure in memory.
A List of entry numbers in a TTree or TChain.
A simple, robust and fast interface to read values from ROOT columnar datasets such as TTree,...
@ kIndexedFriendNoMatch
A friend with TTreeIndex doesn't have an entry for this index.
@ kMissingBranchWhenSwitchingTree
A branch was not found when switching to the next TTree in the chain.
@ kEntryBeyondEnd
last entry loop has reached its end
@ kEntryValid
data read okay
A TTree represents a columnar dataset.
auto MakeAliasedSharedPtr(T *rawPtr)
std::vector< std::string > GetBranchNames(TTree &t, bool allowDuplicates=true)
Get all the branches names, including the ones of the friend trees.
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
ROOT::RDataFrame FromTTree(std::string_view treeName, std::string_view fileNameGlob)
std::string GetBranchOrLeafTypeName(TTree &t, const std::string &colName)
Return the typename of object colName stored in t, if any.
std::vector< std::string > GetTreeFullPaths(const TTree &tree)
std::vector< std::string > GetTopLevelBranchNames(TTree &t)
Get all the top-level branches names, including the ones of the friend trees.
std::unique_ptr< TChain > MakeChainForMT(const std::string &name="", const std::string &title="")
Create a TChain object with options that avoid common causes of thread contention.
std::vector< std::unique_ptr< TChain > > MakeFriends(const ROOT::TreeUtils::RFriendInfo &finfo)
Create friends from the main TTree.
ROOT::TreeUtils::RFriendInfo GetFriendInfo(const TTree &tree, bool retrieveEntries=false)
std::vector< std::string > GetFileNamesFromTree(const TTree &tree)
ROOT::ESTLType IsSTLCont(std::string_view type)
type : type name: vector<list<classA,allocator>,allocator> result: 0 : not stl container code of cont...
int GetSplit(const char *type, std::vector< std::string > &output, int &nestedLoc, EModType mode=TClassEdit::kNone)
Stores in output (after emptying it) the split type.
Information about friend trees of a certain TTree or TChain object.