Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
df037_TTreeEventMatching.py
Go to the documentation of this file.
1# \file
2# \ingroup tutorial_dataframe
3# \notebook -nodraw
4#
5# This example shows processing of a TTree-based dataset with horizontal
6# concatenations (friends) and event matching (based on TTreeIndex). In case
7# the current event being processed does not match one (or more) of the friend
8# datasets, one can use the FilterAvailable and DefaultValueFor functionalities
9# to act upon the situation.
10#
11# \macro_code
12# \macro_output
13#
14# \date September 2024
15# \author Vincenzo Eduardo Padulano (CERN)
16import array
17import os
18
19import ROOT
20
21
22class DatasetContext:
23 """A helper class to create the dataset for the tutorial below."""
24
25 main_file = "df037_TTreeEventMatching_py_main.root"
26 aux_file_1 = "df037_TTreeEventMatching_py_aux_1.root"
27 aux_file_2 = "df037_TTreeEventMatching_py_aux_2.root"
28 main_tree_name = "events"
29 aux_tree_name_1 = "auxdata_1"
30 aux_tree_name_2 = "auxdata_2"
31
32 def __init__(self):
33 with ROOT.TFile(self.main_file, "RECREATE"):
34 main_tree = ROOT.TTree(self.main_tree_name, self.main_tree_name)
35 idx = array.array("i", [0]) # any array can also be a numpy array
36 x = array.array("i", [0])
37 main_tree.Branch("idx", idx, "idx/I")
38 main_tree.Branch("x", x, "x/I")
39
40 idx[0] = 1
41 x[0] = 1
43 idx[0] = 2
44 x[0] = 2
46 idx[0] = 3
47 x[0] = 3
49
51
52 # The first auxiliary file has matching indices 1 and 2, but not 3
53 with ROOT.TFile(self.aux_file_1, "RECREATE"):
54 aux_tree_1 = ROOT.TTree(self.aux_tree_name_1, self.aux_tree_name_1)
55 idx = array.array("i", [0]) # any array can also be a numpy array
56 y = array.array("i", [0])
57 aux_tree_1.Branch("idx", idx, "idx/I")
58 aux_tree_1.Branch("y", y, "y/I")
59
60 idx[0] = 1
61 y[0] = 4
63 idx[0] = 2
64 y[0] = 5
66
68
69 # The second auxiliary file has matching indices 1 and 3, but not 2
70 with ROOT.TFile(self.aux_file_2, "RECREATE"):
71 aux_tree_2 = ROOT.TTree(self.aux_tree_name_2, self.aux_tree_name_2)
72 idx = array.array("i", [0]) # any array can also be a numpy array
73 z = array.array("i", [0])
74 aux_tree_2.Branch("idx", idx, "idx/I")
75 aux_tree_2.Branch("z", z, "z/I")
76
77 idx[0] = 1
78 z[0] = 6
80 idx[0] = 3
81 z[0] = 7
83
85
86 def __enter__(self):
87 return self
88
89 def __exit__(self, *_):
90 os.remove(self.main_file)
91 os.remove(self.aux_file_1)
92 os.remove(self.aux_file_2)
93
94
95def df037_TTreeEventMatching(dataset: DatasetContext):
96 # The input dataset has one main TTree and two auxiliary. The 'idx' branch
97 # is used as the index to match events between the trees.
98 # - The main tree has 3 entries, with 'idx' values(1, 2, 3).
99 # - The first auxiliary tree has 2 entries, with 'idx' values(1, 2).
100 # - The second auxiliary tree has 2 entries, with 'idx' values(1, 3).
101 # The two auxiliary trees are concatenated horizontally with the main one.
104
108
112
113 main_chain.AddFriend(aux_chain_1)
114 main_chain.AddFriend(aux_chain_2)
115
116 # Create an RDataFrame to process the input dataset. The DefaultValueFor and
117 # FilterAvailable functionalities can be used to decide what to do for
118 # the events that do not match entirely according to the index column 'idx'
119 df = ROOT.RDataFrame(main_chain)
120
121 aux_tree_1_colidx = dataset.aux_tree_name_1 + ".idx"
122 aux_tree_1_coly = dataset.aux_tree_name_1 + ".y"
123 aux_tree_2_colidx = dataset.aux_tree_name_2 + ".idx"
124 aux_tree_2_colz = dataset.aux_tree_name_2 + ".z"
125
126 default_value = ROOT.std.numeric_limits[int].min()
127
128 # Example 1: provide default values for all columns in case there was no
129 # match
130 display_1 = (
131 df.DefaultValueFor(aux_tree_1_colidx, default_value)
132 .DefaultValueFor(aux_tree_1_coly, default_value)
133 .DefaultValueFor(aux_tree_2_colidx, default_value)
134 .DefaultValueFor(aux_tree_2_colz, default_value)
135 .Display(("idx", aux_tree_1_colidx, aux_tree_2_colidx, "x", aux_tree_1_coly, aux_tree_2_colz))
136 )
137
138 # Example 2: skip the entire entry when there was no match for a column
139 # in the first auxiliary tree, but keep the entries when there is no match
140 # in the second auxiliary tree and provide a default value for those
141 display_2 = (
142 df.DefaultValueFor(aux_tree_2_colidx, default_value)
143 .DefaultValueFor(aux_tree_2_colz, default_value)
144 .FilterAvailable(aux_tree_1_coly)
145 .Display(("idx", aux_tree_1_colidx, aux_tree_2_colidx, "x", aux_tree_1_coly, aux_tree_2_colz))
146 )
147
148 # Example 3: Keep entries from the main tree for which there is no
149 # corresponding match in entries of the first auxiliary tree
150 display_3 = df.FilterMissing(aux_tree_1_colidx).Display(("idx", "x"))
151
152 print("Example 1: provide default values for all columns")
154 print("Example 2: always skip the entry when there is no match")
156 print("Example 3: keep entries from the main tree for which there is no match in the auxiliary tree")
158
159
160if __name__ == "__main__":
161 with DatasetContext() as dataset:
162 df037_TTreeEventMatching(dataset)
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
ROOT's RDataFrame offers a modern, high-level interface for analysis of data stored in TTree ,...