11#include "RConfigure.h"
32#include <nlohmann/json.hpp>
48using TypeInfoRef = std::reference_wrapper<const std::type_info>;
49struct TypeInfoRefHash {
53struct TypeInfoRefEqualComp {
70 {
"char",
typeid(char)},
71 {
"Char_t",
typeid(char)},
72 {
"unsigned char",
typeid(
unsigned char)},
73 {
"UChar_t",
typeid(
unsigned char)},
75 {
"Int_t",
typeid(
int)},
76 {
"unsigned",
typeid(
unsigned int)},
77 {
"unsigned int",
typeid(
unsigned int)},
78 {
"UInt_t",
typeid(
unsigned int)},
79 {
"short",
typeid(short)},
80 {
"short int",
typeid(short)},
81 {
"Short_t",
typeid(short)},
82 {
"unsigned short",
typeid(
unsigned short)},
83 {
"unsigned short int",
typeid(
unsigned short)},
84 {
"UShort_t",
typeid(
unsigned short)},
85 {
"long",
typeid(long)},
86 {
"long int",
typeid(long)},
87 {
"Long_t",
typeid(long)},
88 {
"unsigned long",
typeid(
unsigned long)},
89 {
"unsigned long int",
typeid(
unsigned long)},
90 {
"ULong_t",
typeid(
unsigned long)},
91 {
"double",
typeid(
double)},
92 {
"Double_t",
typeid(
double)},
93 {
"float",
typeid(float)},
94 {
"Float_t",
typeid(float)},
95 {
"long long",
typeid(
long long)},
96 {
"long long int",
typeid(
long long)},
97 {
"Long64_t",
typeid(
long long)},
98 {
"unsigned long long",
typeid(
unsigned long long)},
99 {
"unsigned long long int",
typeid(
unsigned long long)},
100 {
"ULong64_t",
typeid(
unsigned long long)},
101 {
"bool",
typeid(
bool)},
102 {
"Bool_t",
typeid(
bool)}};
105 return it->second.get();
108 if (!
c->GetTypeInfo()) {
109 throw std::runtime_error(
"Cannot extract type_info of type " +
name +
".");
111 return *
c->GetTypeInfo();
114 throw std::runtime_error(
"Cannot extract type_info of type " +
name +
".");
125 const static std::unordered_map<TypeInfoRef, std::string, TypeInfoRefHash, TypeInfoRefEqualComp>
typeID2TypeNameMap{
126 {
typeid(char),
"char"}, {
typeid(
unsigned char),
"unsigned char"},
127 {
typeid(
int),
"int"}, {
typeid(
unsigned int),
"unsigned int"},
128 {
typeid(short),
"short"}, {
typeid(
unsigned short),
"unsigned short"},
129 {
typeid(long),
"long"}, {
typeid(
unsigned long),
"unsigned long"},
130 {
typeid(
double),
"double"}, {
typeid(float),
"float"},
132 {
typeid(
bool),
"bool"}};
146 return "ROOT::VecOps::RVec<" +
valueType +
">";
154 throw std::runtime_error(
"Could not deduce type of leaf " +
colName);
155 if (
leaf->GetLeafCount() !=
nullptr &&
leaf->GetLenStatic() == 1) {
158 }
else if (
leaf->GetLeafCount() ==
nullptr &&
leaf->GetLenStatic() > 1) {
161 }
else if (
leaf->GetLeafCount() !=
nullptr &&
leaf->GetLenStatic() > 1) {
163 throw std::runtime_error(
"TTree leaf " +
colName +
164 " has both a leaf count and a static length. This is not supported.");
213 return be->GetTypeName();
215 return be->GetClassName();
220 leaf =
static_cast<TLeaf *
>(
branch->GetListOfLeaves()->UncheckedAt(0));
226 return std::string();
246 std::vector<std::string> split;
255 throw std::runtime_error(
"Column \"" +
colName +
256 "\" is not in a dataset and is not a custom column been defined.");
267 {
"unsigned char",
'b'},
272 {
"unsigned int",
'i'},
277 {
"unsigned short",
's'},
278 {
"unsigned short int",
's'},
283 {
"unsigned long",
'g'},
284 {
"unsigned long int",
'g'},
291 {
"long long int",
'L'},
293 {
"unsigned long long",
'l'},
294 {
"unsigned long long int",
'l'},
322 const auto dotPos = col.find(
'.');
325 std::replace(col.begin(), col.end(),
'.',
'_');
327 throw std::runtime_error(
"Column " +
oldName +
" would be written as " + col +
328 " but this column already exists. Please use Alias to select a new name for " +
330 Info(
"Snapshot",
"Column %s will be saved as %s",
oldName.c_str(), col.c_str());
343 "\nRDataFrame: An error occurred during just-in-time compilation. The lines above might indicate the cause of "
344 "the crash\n All RDF objects that have not run an event loop yet should be considered in an invalid state.\n";
345 throw std::runtime_error(
msg);
361 std::string
msg =
"\nAn error occurred during just-in-time compilation";
362 if (!context.empty())
363 msg +=
" in " + context;
365 ". The lines above might indicate the cause of the crash\nAll RDF objects that have not run their event "
366 "loop yet should be considered in an invalid state.\n";
367 throw std::runtime_error(
msg);
376 for (std::size_t i = 0
u; i < 1000u &&
substr_end != std::string::npos; ++i) {
388 const auto str =
colName.data();
390 (
'r' == str[0] ||
't' == str[0]) &&
391 0 ==
strncmp(
"df", str + 1, 2);
398 for (
const auto&
name : names) {
425 std::string
errMsg =
"RDataFrame: type mismatch: column \"" +
colName +
"\" is being used as ";
428 errMsg +=
" (extracted from type info)";
432 errMsg +=
" but the Define or Vary node advertises it as ";
436 errMsg +=
" (extracted from type info)";
440 throw std::runtime_error(
errMsg);
446 return std::find(
vec.cbegin(),
vec.cend(), str) !=
vec.cend();
452 std::shared_lock
l{fMutex};
453 if (
auto it = fStrings.find(
string); it != fStrings.end())
459 std::unique_lock
l{fMutex};
460 if (
auto it = fStrings.find(
string); it != fStrings.end())
463 return fStrings.insert(
string).first;
468 const nlohmann::ordered_json
fullData = nlohmann::ordered_json::parse(std::ifstream(
jsonFile));
470 throw std::runtime_error(
471 R
"(The input specification does not contain any samples. Please provide the samples in the specification like:
475 "trees": ["tree1", "tree2"],
476 "files": ["file1.root", "file2.root"],
477 "metadata": {"lumi": 1.0, }
480 "trees": ["tree3", "tree4"],
481 "files": ["file3.root", "file4.root"],
482 "metadata": {"lumi": 0.5, }
495 if (!
sample.contains(
"trees")) {
496 throw std::runtime_error(
"A list of tree names must be provided for sample " +
sampleName +
".");
499 if (!
sample.contains(
"files")) {
500 throw std::runtime_error(
"A list of files must be provided for sample " +
sampleName +
".");
503 if (!
sample.contains(
"metadata")) {
507 for (
const auto &metadata :
sample[
"metadata"].items()) {
508 const auto &val = metadata.value();
510 m.Add(metadata.key(), val.get<std::string>());
511 else if (val.is_number_integer())
512 m.Add(metadata.key(), val.get<
int>());
513 else if (val.is_number_float())
514 m.Add(metadata.key(), val.get<
double>());
516 throw std::logic_error(
"The metadata keys can only be of type [string|int|double].");
524 std::vector<std::string>
trees =
friends.value()[
"trees"];
525 std::vector<std::string>
files =
friends.value()[
"files"];
527 throw std::runtime_error(
"Mismatch between trees and files in a friend.");
535 if (
range.size() == 1)
537 else if (
range.size() == 2)
571 return ds.DescribeDataset();
576 const std::unordered_map<std::string, ROOT::RDF::Experimental::RSample *> &
sampleMap)
591std::unique_ptr<ROOT::Detail::RDF::RColumnReaderBase>
600 return std::move(
spec.fSamples);
#define R__LOG_DEBUG(DEBUGLEVEL,...)
unsigned long long ULong64_t
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h length
TRObject operator()(const T1 &t1) const
std::string GetTypeName() const
The head node of a RDF computation graph.
auto Insert(const std::string &string) -> decltype(fStrings)::const_iterator
Inserts the input string in the cache and returns an iterator to the cached string.
The dataset specification for RDataFrame.
Class representing a sample which is a grouping of trees and their fileglobs, and,...
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
virtual const std::vector< std::string > & GetColumnNamesNoDuplicates() const
virtual std::string GetTypeNameWithOpts(std::string_view colName, bool) const
virtual const std::vector< std::string > & GetTopLevelFieldNames() const
This type represents a sample identifier, to be used in conjunction with RDataFrame features such as ...
A log configuration for a channel, e.g.
const_iterator begin() const
const_iterator end() const
A Branch for the case of an object.
TClassRef is used to implement a permanent reference to a TClass object.
static TClass * GetClass(const char *name, Bool_t load=kTRUE, Bool_t silent=kFALSE)
Static method returning pointer to TClass of the specified class name.
A TLeaf describes individual elements of a TBranch See TBranch structure in TTree.
A simple, robust and fast interface to read values from ROOT columnar datasets such as TTree,...
A TTree represents a columnar dataset.
virtual TBranch * FindBranch(const char *name)
Return the branch that correspond to the path 'branchname', which can include the name of the tree or...
virtual TBranch * GetBranch(const char *name)
Return pointer to the branch with the given name in this tree or its friends.
virtual TLeaf * GetLeaf(const char *branchname, const char *leafname)
Return pointer to the 1st Leaf named name in any Branch of this Tree or any branch in the list of fri...
virtual TLeaf * FindLeaf(const char *name)
Find leaf..
ROOT::RLogChannel & RDFLogChannel()
void RunFinalChecks(const ROOT::RDF::RDataSource &ds, bool nodesLeftNotRun)
std::vector< std::string > ReplaceDotWithUnderscore(const std::vector< std::string > &columnNames)
Replace occurrences of '.
const std::type_info & TypeName2TypeID(const std::string &name)
Return the type_info associated to a name.
ROOT::RDF::Experimental::RDatasetSpec RetrieveSpecFromJson(const std::string &jsonFile)
Function to retrieve RDatasetSpec from JSON file provided.
std::string ComposeRVecTypeName(const std::string &valueType)
void CallInitializeWithOpts(ROOT::RDF::RDataSource &ds, const std::set< std::string > &suppressErrorsForMissingColumns)
std::string GetLeafTypeName(TLeaf *leaf, const std::string &colName)
const std::vector< std::string > & GetTopLevelFieldNames(const ROOT::RDF::RDataSource &ds)
char TypeName2ROOTTypeName(const std::string &b)
Convert type name (e.g.
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
bool IsStrInVec(const std::string &str, const std::vector< std::string > &vec)
unsigned int GetColumnWidth(const std::vector< std::string > &names, const unsigned int minColumnSpace=8u)
Get optimal column width for printing a table given the names and the desired minimal space between c...
std::string GetBranchOrLeafTypeName(TTree &t, const std::string &colName)
Return the typename of object colName stored in t, if any.
std::string DescribeDataset(ROOT::RDF::RDataSource &ds)
ROOT::RDF::RSampleInfo CreateSampleInfo(const ROOT::RDF::RDataSource &ds, const std::unordered_map< std::string, ROOT::RDF::Experimental::RSample * > &sampleMap)
std::unique_ptr< ROOT::Detail::RDF::RColumnReaderBase > CreateColumnReader(ROOT::RDF::RDataSource &ds, unsigned int slot, std::string_view col, const std::type_info &tid, TTreeReader *treeReader)
std::string ColumnName2ColumnTypeName(const std::string &colName, TTree *, RDataSource *, RDefineBase *, bool vector2RVec=true)
Return a string containing the type of the given branch.
void InterpreterCalc(const std::string &code, const std::string &context="")
Jit code in the interpreter with TInterpreter::Calc, throw in case of errors.
void CheckReaderTypeMatches(const std::type_info &colType, const std::type_info &requestedType, const std::string &colName)
bool IsInternalColumn(std::string_view colName)
Whether custom column with name colName is an "internal" column such as rdfentry_ or rdfslot_.
std::vector< ROOT::RDF::Experimental::RSample > MoveOutSamples(ROOT::RDF::Experimental::RDatasetSpec &spec)
void ProcessMT(ROOT::RDF::RDataSource &ds, ROOT::Detail::RDF::RLoopManager &lm)
std::string GetTypeNameWithOpts(const ROOT::RDF::RDataSource &ds, std::string_view colName, bool vector2RVec)
void InterpreterDeclare(const std::string &code)
Declare code in the interpreter via the TInterpreter::Declare method, throw in case of errors.
const std::vector< std::string > & GetColumnNamesNoDuplicates(const ROOT::RDF::RDataSource &ds)
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
Bool_t IsImplicitMTEnabled()
Returns true if the implicit multi-threading in ROOT is enabled.
UInt_t GetThreadPoolSize()
Returns the size of ROOT's thread pool.
ROOT::ESTLType IsSTLCont(std::string_view type)
type : type name: vector<list<classA,allocator>,allocator> result: 0 : not stl container code of cont...
int GetSplit(const char *type, std::vector< std::string > &output, int &nestedLoc, EModType mode=TClassEdit::kNone)
Stores in output (after emptying it) the split type.