Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
TMVA_SOFIE_GNN_Application.C
Go to the documentation of this file.
1/// \file
2/// \ingroup tutorial_ml
3/// \notebook -nodraw
4// Macro evaluating a GNN model which was generated with the Parser macro
5//
6// \macro_code
7
8// need to add include path to find generated model file
9#ifdef __CLING__
12#endif
13
14#include "encoder.hxx"
15#include "core.hxx"
16#include "decoder.hxx"
17#include "output_transform.hxx"
18
19#include "TMVA/SOFIE_common.hxx"
20#include "TRandom3.h"
21#include "TH1.h"
22#include "TCanvas.h"
23#include "TFile.h"
24#include "TTree.h"
25#include "TSystem.h"
26#include "ROOT/RDataFrame.hxx"
27
28using namespace TMVA::Experimental;
29using namespace TMVA::Experimental::SOFIE;
30
31double check_mem(std::string s = ""){
33 printf("%s - ",s.c_str());
35 printf(" Rmem = %8.3f MB, Vmem = %8.f3 MB \n",
36 p.fMemResident /1024., /// convert memory from kB to MB
37 p.fMemVirtual /1024.
38 );
39 return p.fMemResident / 1024.;
40}
41
42
43template<class T>
45 std::cout << " shape : " << ConvertShapeToString(t.GetShape()) << " size : " << t.GetSize() << "\n";
46 auto & shape = t.GetShape();
47 auto p = t.GetData();
48 size_t nrows = (shape.size() > 1) ? shape[0] : 1;
49 size_t ncols = (shape.size() > 1) ? t.GetStrides()[0] : shape[0];
50 for (size_t i = 0; i < nrows; i++) {
51 for (size_t j = 0; j < ncols; j++) {
52 if (j==ncols-1) {
53 if (j>10) std::cout << "... ";
54 std::cout << *p << std::endl;
55 }
56 else if (j<10)
57 std::cout << *p << ", ";
58 p++;
59 }
60 }
61 std::cout << std::endl;
62}
63void Print(GNN_Data & d, std::string txt = "") {
64 if (!txt.empty()) std::cout << std::endl << txt << std::endl;
65 std::cout << "node data:"; PrintTensor(d.node_data);
66 std::cout << "edge data:"; PrintTensor(d.edge_data);
67 std::cout << "global data:"; PrintTensor(d.global_data);
68 std::cout << "edge index:"; PrintTensor(d.edge_index);
69}
70
71struct SOFIE_GNN {
72 bool verbose = false;
73 TMVA_SOFIE_encoder::Session encoder;
74 TMVA_SOFIE_core::Session core;
75 TMVA_SOFIE_decoder::Session decoder;
76 TMVA_SOFIE_output_transform::Session output_transform;
77
78 std::vector<GNN_Data> Infer(const GNN_Data & data, int nsteps) {
79 // infer function
80 auto input_data = Copy(data);
81 if (verbose) Print(input_data,"input_data");
82 encoder.infer(input_data);
83 // latent0 is result of encoder. Need to copy because this stays the same
84 auto latent0 = Copy(input_data);
85 GNN_Data latent = input_data; // this can be a view
86 std::vector<GNN_Data> outputData;
87 for (int i = 0; i < nsteps; i++) {
88 if (verbose) Print(latent0,"input decoded data");
89 if (verbose) Print(latent,"latent data");
91 if (verbose) Print(core_input, "after concatenate");
92 core.infer(core_input);
93 if (verbose) Print(core_input, "after core inference");
94 // here I need to copy
96 decoder.infer(core_input);
98 outputData.push_back(Copy(core_input));
99 }
100 return outputData;
101 }
102
103 SOFIE_GNN(bool v = false) : verbose(v) {}
104
105};
106
107const int num_max_nodes = 10;
108const int num_max_edges = 30;
109const int NODE_FEATURE_SIZE = 4;
110const int EDGE_FEATURE_SIZE = 4;
112
113std::vector<GNN_Data> GenerateData(int nevts, int seed) {
114 TRandom3 r(seed);
115 std::vector<GNN_Data> dataSet;
116 dataSet.reserve(nevts);
117 for (int i = 0; i < nevts; i++) {
118 // generate first number of nodes and edges
119 // size_t num_nodes = num_max_nodes;//r.Integer(num_max_nodes-2) + 2;
120 // size_t num_edges = num_max_edges; //r.Integer(num_max_edges-1) + 1;
121 size_t num_nodes = r.Integer(num_max_nodes-2) + 2;
122 size_t num_edges = r.Integer(num_max_edges-1) + 1;
123 GNN_Data gd;
124 gd.node_data = RTensor<float>({num_nodes, NODE_FEATURE_SIZE});
125 gd.edge_data = RTensor<float>({num_edges, EDGE_FEATURE_SIZE});
126 gd.global_data = RTensor<float>({1, GLOBAL_FEATURE_SIZE});
127 gd.edge_index = RTensor<int>({2, num_edges});
128
129 auto genValue = [&]() { return r.Rndm()*10 -5; };
130 auto genLink = [&] () { return r.Integer(num_nodes);};
131 std::generate(gd.node_data.begin(), gd.node_data.end(), genValue);
132 std::generate(gd.edge_data.begin(), gd.edge_data.end(), genValue);
133 std::generate(gd.global_data.begin(), gd.global_data.end(), genValue);
134 std::generate(gd.edge_index.begin(), gd.edge_index.end(), genLink);
135 dataSet.emplace_back(gd);
136 }
137 return dataSet;
138}
139
140std::vector<GNN_Data> ReadData(std::string treename, std::string filename) {
141 bool verbose = false;
143 auto ndata = df.Take<ROOT::RVec<float>>("node_data");
144 auto edata = df.Take<ROOT::RVec<float>>("edge_data");
145 auto gdata = df.Take<ROOT::RVec<float>>("global_data");
146 auto rdata = df.Take<ROOT::RVec<int>>("receivers");
147 auto sdata = df.Take<ROOT::RVec<int>>("senders");
148 int nevts = ndata.GetPtr()->size();
149 std::vector<GNN_Data> dataSet;
150 dataSet.reserve(nevts);
151 for (int i = 0; i < nevts; i++) {
152 GNN_Data gd;
153 auto & n = (*(ndata.GetPtr()))[i];
154 size_t num_nodes = n.size()/NODE_FEATURE_SIZE;
155 auto & e = (*(edata.GetPtr()))[i];
156 size_t num_edges = e.size()/EDGE_FEATURE_SIZE;
157 auto & g = (*(gdata.GetPtr()))[i];
158 gd.node_data = RTensor<float>(n.data(), {num_nodes, NODE_FEATURE_SIZE});
159 gd.edge_data = RTensor<float>(e.data(), {num_edges, EDGE_FEATURE_SIZE});
160 gd.global_data = RTensor<float>(g.data(), {1, GLOBAL_FEATURE_SIZE});
161 gd.edge_index = RTensor<int>({2, num_edges});
162 auto & r = (*(rdata.GetPtr()))[i];
163 auto & s = (*(sdata.GetPtr()))[i];
164 // need to copy receivers/senders in edge_index tensor
165 std::copy(r.begin(), r.end(), gd.edge_index.GetData());
166 std::copy(s.begin(), s.end(), gd.edge_index.GetData()+num_edges);
167
168 dataSet.emplace_back(Copy(gd)); // need to copy data in vector to own
169 if (i < 1 && verbose) Print(dataSet[i],"Input for Event" + std::to_string(i));
170 }
171 return dataSet;
172}
173
174
175void TMVA_SOFIE_GNN_Application (bool verbose = false)
176{
177 check_mem("Initial memory");
178 SOFIE_GNN gnn;
179 check_mem("After creating GNN");
180
181
182 const int seed = 111;
183 const int nproc_steps = 5;
184 // generate the input data
185
186 int nevts;
187 //std::cout << "generating data\n";
188 //nevts = 100;
189 //auto inputData = GenerateData(nevts, seed);
190
191 std::cout << "reading data\n";
192 auto inputData = ReadData("gdata","graph_data.root");
193 nevts = inputData.size();
194
195 //std::cout << "padding data\n";
196 //PadData(inputData) ;
197
198 auto h1 = new TH1D("h1","SOFIE Node data",40,1,0);
199 auto h2 = new TH1D("h2","SOFIE Edge data",40,1,0);
200 auto h3 = new TH1D("h3","SOFIE Global data",40,1,0);
201 std::cout << "doing inference...\n";
202
203
204 check_mem("Before evaluating");
205 TStopwatch w; w.Start();
206 for (int i = 0; i < nevts; i++) {
207 auto result = gnn.Infer(inputData[i], nproc_steps);
208 // compute resulting mean and plot them
209 auto & lr = result.back();
210 if (i < 1 && verbose) Print(lr,"Output for Event" + std::to_string(i));
211 h1->Fill(TMath::Mean(lr.node_data.begin(), lr.node_data.end()));
212 h2->Fill(TMath::Mean(lr.edge_data.begin(), lr.edge_data.end()));
213 h3->Fill(TMath::Mean(lr.global_data.begin(), lr.global_data.end()));
214 }
215 w.Stop();
216 w.Print();
217 check_mem("End evaluation");
218 auto c1 = new TCanvas("c1","SOFIE Results");
219 c1->Divide(1,3);
220 c1->cd(1); h1->Draw();
221 c1->cd(2); h2->Draw();
222 c1->cd(3); h3->Draw();
223
224
225 // compare with the reference
226 auto c2 = new TCanvas("c2","Reference Results");
227 auto file = TFile::Open("graph_data.root");
228 auto o1 = file->Get("h1");
229 auto o2 = file->Get("h2");
230 auto o3 = file->Get("h3");
231 c2->Divide(1,3);
232 c2->cd(1); o1->Draw();
233 c2->cd(2); o2->Draw();
234 c2->cd(3); o3->Draw();
235
236}
237
238int main () {
239
241}
242
243
#define d(i)
Definition RSha256.hxx:102
#define g(i)
Definition RSha256.hxx:105
#define e(i)
Definition RSha256.hxx:103
#define R__ADD_INCLUDE_PATH(PATH)
Definition Rtypes.h:473
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
winID h TVirtualViewer3D TVirtualGLPainter p
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char filename
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
void Print(GNN_Data &d, std::string txt="")
const int EDGE_FEATURE_SIZE
const int GLOBAL_FEATURE_SIZE
const int num_max_edges
std::vector< GNN_Data > ReadData(std::string treename, std::string filename)
void PrintTensor(RTensor< T > &t)
const int num_max_nodes
void TMVA_SOFIE_GNN_Application(bool verbose=false)
const int NODE_FEATURE_SIZE
double check_mem(std::string s="")
std::vector< GNN_Data > GenerateData(int nevts, int seed)
R__EXTERN TSystem * gSystem
Definition TSystem.h:572
RResultPtr< COLL > Take(std::string_view column="")
Return a collection of values of a column (lazy action, returns a std::vector by default).
ROOT's RDataFrame offers a modern, high-level interface for analysis of data stored in TTree ,...
const_iterator begin() const
const_iterator end() const
A "std::vector"-like collection of values implementing handy operation to analyse them.
Definition RVec.hxx:1530
The Canvas class.
Definition TCanvas.h:23
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
Definition TFile.cxx:4131
1-D histogram with a double per channel (see TH1 documentation)
Definition TH1.h:925
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
Definition TH1.cxx:3316
void Draw(Option_t *option="") override
Draw this histogram with options.
Definition TH1.cxx:3038
Random number generator class based on M.
Definition TRandom3.h:27
Stopwatch class.
Definition TStopwatch.h:28
virtual int GetProcInfo(ProcInfo_t *info) const
Returns cpu and memory used by this process into the ProcInfo_t structure.
Definition TSystem.cxx:2501
return c1
Definition legend1.C:41
const Int_t n
Definition legend1.C:16
TH1F * h1
Definition legend1.C:5
return c2
Definition legend2.C:14
std::string ConvertShapeToString(std::vector< size_t > shape)
TMVA::Experimental::RTensor< T > Concatenate(TMVA::Experimental::RTensor< T > &t1, TMVA::Experimental::RTensor< T > &t2, int axis=0)
GNN_Data Copy(const GNN_Data &data)
Double_t Mean(Long64_t n, const T *a, const Double_t *w=nullptr)
Returns the weighted mean of an array a with length n.
Definition TMath.h:1169
TMVA_SOFIE_core::Session core
TMVA_SOFIE_output_transform::Session output_transform
TMVA_SOFIE_decoder::Session decoder
TMVA_SOFIE_encoder::Session encoder
std::vector< GNN_Data > Infer(const GNN_Data &data, int nsteps)