Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
df036_missingBranches.py
Go to the documentation of this file.
1# \file
2# \ingroup tutorial_dataframe
3# \notebook -nodraw
4#
5# This example shows how to process a dataset where entries might be
6# incomplete due to one or more missing branches in one or more of the files
7# in the dataset. It shows usage of the FilterAvailable and DefaultValueFor
8# RDataFrame functionalities to act upon the missing entries.
9#
10# \macro_code
11# \macro_output
12#
13# \date September 2024
14# \author Vincenzo Eduardo Padulano (CERN)
15import array
16import os
17
18import ROOT
19
20
21class DatasetContext:
22 """A helper class to create the dataset for the tutorial below."""
23
24 filenames = [
25 "df036_missingBranches_py_file_1.root",
26 "df036_missingBranches_py_file_2.root",
27 "df036_missingBranches_py_file_3.root",
28 ]
29 treenames = ["tree_1", "tree_2", "tree_3"]
30 nentries = 5
31
32 def __init__(self):
33 with ROOT.TFile(self.filenames[0], "RECREATE"):
34 t = ROOT.TTree(self.treenames[0], self.treenames[0])
35 x = array.array("i", [0]) # any array can also be a numpy array
36 y = array.array("i", [0])
37 t.Branch("x", x, "x/I")
38 t.Branch("y", y, "y/I")
39
40 for i in range(1, self.nentries + 1):
41 x[0] = i
42 y[0] = 2 * i
43 t.Fill()
44
45 t.Write()
46
47 with ROOT.TFile(self.filenames[1], "RECREATE"):
48 t = ROOT.TTree(self.treenames[1], self.treenames[1])
49 y = array.array("i", [0]) # any array can also be a numpy array
50 t.Branch("y", y, "y/I")
51
52 for i in range(1, self.nentries + 1):
53 y[0] = 3 * i
54 t.Fill()
55
56 t.Write()
57
58 with ROOT.TFile(self.filenames[2], "RECREATE"):
59 t = ROOT.TTree(self.treenames[2], self.treenames[2])
60 x = array.array("i", [0]) # any array can also be a numpy array
61 t.Branch("x", x, "x/I")
62
63 for i in range(1, self.nentries + 1):
64 x[0] = 4 * i
65 t.Fill()
66
67 t.Write()
68
69 def __enter__(self):
70 """Enable using the class as a context manager."""
71 return self
72
73 def __exit__(self, *_):
74 """
75 Enable using the class as a context manager. At the end of the context,
76 remove the files created.
77 """
78 for filename in self.filenames:
79 os.remove(filename)
80
81
82def df036_missingBranches(dataset: DatasetContext):
83 # The input dataset contains three files, with one TTree each.
84 # The first contains branches (x, y), the second only branch y, the third
85 # only branch x. The TChain will process the three files, encountering a
86 # different missing branch when switching to the next tree
87 chain = ROOT.TChain()
88 for fname, tname in zip(dataset.filenames, dataset.treenames):
89 chain.Add(fname + "?#" + tname)
90
91 df = ROOT.RDataFrame(chain)
92
93 default_value = ROOT.std.numeric_limits[int].min()
94
95 # Example 1: provide a default value for all missing branches
96 display_1 = (
97 df.DefaultValueFor("x", default_value)
98 .DefaultValueFor("y", default_value)
99 .Display(columnList=("x", "y"), nRows=15)
100 )
101
102 # Example 2: provide a default value for branch y, but skip events where
103 # branch x is missing
104 display_2 = df.DefaultValueFor("y", default_value).FilterAvailable("x").Display(columnList=("x", "y"), nRows=15)
105
106 # Example 3: only keep events where branch y is missing and display values for branch x
107 display_3 = df.FilterMissing("y").Display(columnList=("x",), nRows=15)
108
109 print("Example 1: provide a default value for all missing branches")
111 print("Example 2: provide a default value for branch y, but skip events where branch x is missing")
113 print("Example 3: only keep events where branch y is missing and display values for branch x")
115
116
117if __name__ == "__main__":
118 with DatasetContext() as dataset:
119 df036_missingBranches(dataset)
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
ROOT's RDataFrame offers a modern, high-level interface for analysis of data stored in TTree ,...