Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
TMVA_CNN_Classification.py
Go to the documentation of this file.
1## \file
2## \ingroup tutorial_ml
3## \notebook
4## TMVA Classification Example Using a Convolutional Neural Network
5##
6## This is an example of using a CNN in TMVA. We do classification using a toy image data set
7## that is generated when running the example macro
8##
9## \macro_image
10## \macro_output
11## \macro_code
12##
13## \author Harshal Shende
14
15
16# TMVA Classification Example Using a Convolutional Neural Network
17
18
19## Helper function to create input images data
20## we create a signal and background 2D histograms from 2d gaussians
21## with a location (means in X and Y) different for each event
22## The difference between signal and background is in the gaussian width.
23## The width for the background gaussian is slightly larger than the signal width by few % values
24
25import ROOT
26
27import os
28import importlib.util
29
30opt = [1, 1, 1, 1, 1]
31useTMVACNN = opt[0] if len(opt) > 0 else False
32useKerasCNN = opt[1] if len(opt) > 1 else False
33useTMVADNN = opt[2] if len(opt) > 2 else False
34useTMVABDT = opt[3] if len(opt) > 3 else False
35usePyTorchCNN = opt[4] if len(opt) > 4 else False
36
37TMVA = ROOT.TMVA
38TFile = ROOT.TFile
39
41
42def MakeImagesTree(n, nh, nw):
43 # image size (nh x nw)
44 ntot = nh * nw
45 fileOutName = "images_data_16x16.root"
46 nRndmEvts = 10000 # number of events we use to fill each image
47 delta_sigma = 0.1 # 5% difference in the sigma
48 pixelNoise = 5
49
50 sX1 = 3
51 sY1 = 3
52 sX2 = sX1 + delta_sigma
53 sY2 = sY1 - delta_sigma
54 h1 = ROOT.TH2D("h1", "h1", nh, 0, 10, nw, 0, 10)
55 h2 = ROOT.TH2D("h2", "h2", nh, 0, 10, nw, 0, 10)
56 f1 = ROOT.TF2("f1", "xygaus")
57 f2 = ROOT.TF2("f2", "xygaus")
58 sgn = ROOT.TTree("sig_tree", "signal_tree")
59 bkg = ROOT.TTree("bkg_tree", "background_tree")
60
61 f = TFile(fileOutName, "RECREATE")
62 x1 = ROOT.std.vector["float"](ntot)
63 x2 = ROOT.std.vector["float"](ntot)
64
65 # create signal and background trees with a single branch
66 # an std::vector<float> of size nh x nw containing the image data
67 bkg.Branch("vars", "std::vector<float>", x1)
68 sgn.Branch("vars", "std::vector<float>", x2)
69
72
73 f1.SetParameters(1, 5, sX1, 5, sY1)
74 f2.SetParameters(1, 5, sX2, 5, sY2)
76 ROOT.Info("TMVA_CNN_Classification", "Filling ROOT tree \n")
77 for i in range(n):
78 if i % 1000 == 0:
79 print("Generating image event ...", i)
80
81 h1.Reset()
82 h2.Reset()
83 # generate random means in range [3,7] to be not too much on the border
88
89 h1.FillRandom(f1, nRndmEvts)
90 h2.FillRandom(f2, nRndmEvts)
91
92 for k in range(nh):
93 for l in range(nw):
94 m = k * nw + l
95 # add some noise in each bin
96 x1[m] = h1.GetBinContent(k + 1, l + 1) + ROOT.gRandom.Gaus(0, pixelNoise)
97 x2[m] = h2.GetBinContent(k + 1, l + 1) + ROOT.gRandom.Gaus(0, pixelNoise)
98
99 sgn.Fill()
100 bkg.Fill()
101
102 sgn.Write()
103 bkg.Write()
104
105 print("Signal and background tree with images data written to the file %s", f.GetName())
106 sgn.Print()
107 bkg.Print()
108 f.Close()
109
110hasGPU = "tmva-gpu" in ROOT.gROOT.GetConfigFeatures()
111hasCPU = "tmva-cpu" in ROOT.gROOT.GetConfigFeatures()
112
113nevt = 1000 # use a larger value to get better results
114
115if (not hasCPU and not hasGPU) :
116 ROOT.Warning("TMVA_CNN_Classificaton","ROOT is not supporting tmva-cpu and tmva-gpu skip using TMVA-DNN and TMVA-CNN")
117 useTMVACNN = False
118 useTMVADNN = False
119
120if not "tmva-pymva" in ROOT.gROOT.GetConfigFeatures():
121 useKerasCNN = False
122 usePyTorchCNN = False
123else:
125
126if not useTMVACNN:
128 "TMVA_CNN_Classificaton",
129 "TMVA is not build with GPU or CPU multi-thread support. Cannot use TMVA Deep Learning for CNN",
130 )
131
132writeOutputFile = True
133
134num_threads = 4 # use max 4 threads
135max_epochs = 10 # maximum number of epochs used for training
136
137
138# do enable MT running
139if "imt" in ROOT.gROOT.GetConfigFeatures():
140 ROOT.EnableImplicitMT(num_threads)
141 ROOT.gSystem.Setenv("OMP_NUM_THREADS", "1") # switch OFF MT in OpenBLAS
142 print("Running with nthreads = {}".format(ROOT.GetThreadPoolSize()))
143else:
144 print("Running in serial mode since ROOT does not support MT")
145
146
147
148
149outputFile = None
150if writeOutputFile:
151 outputFile = TFile.Open("TMVA_CNN_ClassificationOutput.root", "RECREATE")
152
153
154## Create TMVA Factory
155
156# Create the Factory class. Later you can choose the methods
157# whose performance you'd like to investigate.
158
159# The factory is the major TMVA object you have to interact with. Here is the list of parameters you need to pass
160
161# - The first argument is the base of the name of all the output
162# weight files in the directory weight/ that will be created with the
163# method parameters
164
165# - The second argument is the output file for the training results
166
167# - The third argument is a string option defining some general configuration for the TMVA session.
168# For example all TMVA output can be suppressed by removing the "!" (not) in front of the "Silent" argument in the
169# option string
170
171# - note that we disable any pre-transformation of the input variables and we avoid computing correlations between
172# input variables
173
174
175factory = TMVA.Factory(
176 "TMVA_CNN_Classification",
177 outputFile,
178 V=False,
179 ROC=True,
180 Silent=False,
181 Color=True,
182 AnalysisType="Classification",
183 Transformations=None,
184 Correlations=False,
185)
186
187
188## Declare DataLoader(s)
189
190# The next step is to declare the DataLoader class that deals with input variables
191
192# Define the input variables that shall be used for the MVA training
193# note that you may also use variable expressions, which can be parsed by TTree::Draw( "expression" )]
194
195# In this case the input data consists of an image of 16x16 pixels. Each single pixel is a branch in a ROOT TTree
196
197loader = TMVA.DataLoader("dataset")
198
199
200## Setup Dataset(s)
201
202# Define input data file and signal and background trees
203
204
205imgSize = 16 * 16
206inputFileName = "images_data_16x16.root"
207
208# if the input file does not exist create it
209if ROOT.gSystem.AccessPathName(inputFileName):
210 MakeImagesTree(nevt, 16, 16)
211
212inputFile = TFile.Open(inputFileName)
213if inputFile is None:
214 ROOT.Warning("TMVA_CNN_Classification", "Error opening input file %s - exit", inputFileName.Data())
215
216
217# inputFileName = "tmva_class_example.root"
218
219
220# --- Register the training and test trees
221
222signalTree = inputFile.Get("sig_tree")
223backgroundTree = inputFile.Get("bkg_tree")
224
225nEventsSig = signalTree.GetEntries()
226nEventsBkg = backgroundTree.GetEntries()
227
228# global event weights per tree (see below for setting event-wise weights)
229signalWeight = 1.0
230backgroundWeight = 1.0
231
232# You can add an arbitrary number of signal or background trees
233loader.AddSignalTree(signalTree, signalWeight)
234loader.AddBackgroundTree(backgroundTree, backgroundWeight)
235
236## add event variables (image)
237## use new method (from ROOT 6.20 to add a variable array for all image data)
238loader.AddVariablesArray("vars", imgSize)
239
240# Set individual event weights (the variables must exist in the original TTree)
241# for signal : factory->SetSignalWeightExpression ("weight1*weight2");
242# for background: factory->SetBackgroundWeightExpression("weight1*weight2");
243# loader->SetBackgroundWeightExpression( "weight" );
244
245# Apply additional cuts on the signal and background samples (can be different)
246mycuts = "" # for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1";
247mycutb = "" # for example: TCut mycutb = "abs(var1)<0.5";
248
249# Tell the factory how to use the training and testing events
250# If no numbers of events are given, half of the events in the tree are used
251# for training, and the other half for testing:
252# loader.PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" );
253# It is possible also to specify the number of training and testing events,
254# note we disable the computation of the correlation matrix of the input variables
255
256nTrainSig = 0.8 * nEventsSig
257nTrainBkg = 0.8 * nEventsBkg
258
259# build the string options for DataLoader::PrepareTrainingAndTestTree
260
262 mycuts,
263 mycutb,
264 nTrain_Signal=nTrainSig,
265 nTrain_Background=nTrainBkg,
266 SplitMode="Random",
267 SplitSeed=100,
268 NormMode="NumEvents",
269 V=False,
270 CalcCorrelations=False,
271)
272
273
274# DataSetInfo : [dataset] : Added class "Signal"
275# : Add Tree sig_tree of type Signal with 10000 events
276# DataSetInfo : [dataset] : Added class "Background"
277# : Add Tree bkg_tree of type Background with 10000 events
278
279# signalTree.Print();
280
281# Booking Methods
282
283# Here we book the TMVA methods. We book a Boosted Decision Tree method (BDT)
284
285
286# Boosted Decision Trees
287if useTMVABDT:
289 loader,
291 "BDT",
292 V=False,
293 NTrees=400,
294 MinNodeSize="2.5%",
295 MaxDepth=2,
296 BoostType="AdaBoost",
297 AdaBoostBeta=0.5,
298 UseBaggedBoost=True,
299 BaggedSampleFraction=0.5,
300 SeparationType="GiniIndex",
301 nCuts=20,
302 )
303
304
305#### Booking Deep Neural Network
306
307# Here we book the DNN of TMVA. See the example TMVA_Higgs_Classification.C for a detailed description of the
308# options
309
310if useTMVADNN:
311 layoutString = ROOT.TString(
312 "DENSE|100|RELU,BNORM,DENSE|100|RELU,BNORM,DENSE|100|RELU,BNORM,DENSE|100|RELU,DENSE|1|LINEAR"
313 )
314
315 # Training strategies
316 # one can catenate several training strings with different parameters (e.g. learning rates or regularizations
317 # parameters) The training string must be concatenated with the `|` delimiter
318 trainingString1 = ROOT.TString(
319 "LearningRate=1e-3,Momentum=0.9,Repetitions=1,"
320 "ConvergenceSteps=5,BatchSize=100,TestRepetitions=1,"
321 "WeightDecay=1e-4,Regularization=None,"
322 "Optimizer=ADAM,DropConfig=0.0+0.0+0.0+0."
323 ) # + "|" + trainingString2 + ...
324 trainingString1 += ",MaxEpochs=" + str(max_epochs)
325
326 # Build now the full DNN Option string
327 dnnMethodName = "TMVA_DNN_CPU"
328
329 # use GPU if available
330 dnnOptions = "CPU"
331 if hasGPU :
332 dnnOptions = "GPU"
333 dnnMethodName = "TMVA_DNN_GPU"
334
336 loader,
338 dnnMethodName,
339 H=False,
340 V=True,
341 ErrorStrategy="CROSSENTROPY",
342 VarTransform=None,
343 WeightInitialization="XAVIER",
344 Layout=layoutString,
345 TrainingStrategy=trainingString1,
346 Architecture=dnnOptions
347 )
348
349
350### Book Convolutional Neural Network in TMVA
351
352# For building a CNN one needs to define
353
354# - Input Layout : number of channels (in this case = 1) | image height | image width
355# - Batch Layout : batch size | number of channels | image size = (height*width)
356
357# Then one add Convolutional layers and MaxPool layers.
358
359# - For Convolutional layer the option string has to be:
360# - CONV | number of units | filter height | filter width | stride height | stride width | padding height | paddig
361# width | activation function
362
363# - note in this case we are using a filer 3x3 and padding=1 and stride=1 so we get the output dimension of the
364# conv layer equal to the input
365
366# - note we use after the first convolutional layer a batch normalization layer. This seems to help significantly the
367# convergence
368
369# - For the MaxPool layer:
370# - MAXPOOL | pool height | pool width | stride height | stride width
371
372# The RESHAPE layer is needed to flatten the output before the Dense layer
373
374# Note that to run the CNN is required to have CPU or GPU support
375
376
377if useTMVACNN:
378 # Training strategies.
379 trainingString1 = ROOT.TString(
380 "LearningRate=1e-3,Momentum=0.9,Repetitions=1,"
381 "ConvergenceSteps=5,BatchSize=100,TestRepetitions=1,"
382 "WeightDecay=1e-4,Regularization=None,"
383 "Optimizer=ADAM,DropConfig=0.0+0.0+0.0+0.0"
384 )
385 trainingString1 += ",MaxEpochs=" + str(max_epochs)
386
387 ## New DL (CNN)
388 cnnMethodName = "TMVA_CNN_CPU"
389 cnnOptions = "CPU"
390 # use GPU if available
391 if hasGPU:
392 cnnOptions = "GPU"
393 cnnMethodName = "TMVA_CNN_GPU"
394
396 loader,
398 cnnMethodName,
399 H=False,
400 V=True,
401 ErrorStrategy="CROSSENTROPY",
402 VarTransform=None,
403 WeightInitialization="XAVIER",
404 InputLayout="1|16|16",
405 Layout="CONV|10|3|3|1|1|1|1|RELU,BNORM,CONV|10|3|3|1|1|1|1|RELU,MAXPOOL|2|2|1|1,RESHAPE|FLAT,DENSE|100|RELU,DENSE|1|LINEAR",
406 TrainingStrategy=trainingString1,
407 Architecture=cnnOptions,
408 )
409
410
411### Book Convolutional Neural Network in Keras using a generated model
412
413
414if usePyTorchCNN:
415 ROOT.Info("TMVA_CNN_Classification", "Using Convolutional PyTorch Model")
416 pyTorchFileName = str(ROOT.gROOT.GetTutorialDir())
417 pyTorchFileName += "/machine_learning/PyTorch_Generate_CNN_Model.py"
418 # check that pytorch can be imported and file defining the model exists
419 torch_spec = importlib.util.find_spec("torch")
420 if torch_spec is not None and os.path.exists(pyTorchFileName):
421 #cmd = str(ROOT.TMVA.Python_Executable()) + " " + pyTorchFileName
422 #os.system(cmd)
423 #import PyTorch_Generate_CNN_Model
424 ROOT.Info("TMVA_CNN_Classification", "Booking PyTorch CNN model")
426 loader,
428 "PyTorch",
429 H=True,
430 V=False,
431 VarTransform=None,
432 FilenameModel="PyTorchModelCNN.pt",
433 FilenameTrainedModel="PyTorchTrainedModelCNN.pt",
434 NumEpochs=max_epochs,
435 BatchSize=100,
436 UserCode=str(pyTorchFileName)
437 )
438 else:
440 "TMVA_CNN_Classification",
441 "PyTorch is not installed or model building file is not existing - skip using PyTorch",
442 )
443
444if useKerasCNN:
445 ROOT.Info("TMVA_CNN_Classification", "Building convolutional keras model")
446 # create python script which can be executed
447 # create 2 conv2d layer + maxpool + dense
448 import tensorflow
449 from tensorflow.keras.models import Sequential
450 from tensorflow.keras.optimizers import Adam
451
452 # from keras.initializers import TruncatedNormal
453 # from keras import initializations
454 from tensorflow.keras.layers import Input, Dense, Dropout, Flatten, Conv2D, MaxPooling2D, Reshape
455
456 # from keras.callbacks import ReduceLROnPlateau
457 model = Sequential()
458 model.add(Reshape((16, 16, 1), input_shape=(256,)))
459 model.add(Conv2D(10, kernel_size=(3, 3), kernel_initializer="TruncatedNormal", activation="relu", padding="same"))
460 model.add(Conv2D(10, kernel_size=(3, 3), kernel_initializer="TruncatedNormal", activation="relu", padding="same"))
461 # stride for maxpool is equal to pool size
462 model.add(MaxPooling2D(pool_size=(2, 2)))
463 model.add(Flatten())
464 model.add(Dense(64, activation="tanh"))
465 # model.add(Dropout(0.2))
466 model.add(Dense(2, activation="sigmoid"))
467 model.compile(loss="binary_crossentropy", optimizer=Adam(learning_rate=0.001), weighted_metrics=["accuracy"])
468 model.save("model_cnn.h5")
470
471 if not os.path.exists("model_cnn.h5"):
472 raise FileNotFoundError("Error creating Keras model file - skip using Keras")
473 else:
474 # book PyKeras method only if Keras model could be created
475 ROOT.Info("TMVA_CNN_Classification", "Booking convolutional keras model")
477 loader,
479 "PyKeras",
480 H=True,
481 V=False,
482 VarTransform=None,
483 FilenameModel="model_cnn.h5",
484 FilenameTrainedModel="trained_model_cnn.h5",
485 NumEpochs=max_epochs,
486 BatchSize=100,
487 GpuOptions="allow_growth=True",
488 ) # needed for RTX NVidia card and to avoid TF allocates all GPU memory
489
490
491
492## Train Methods
493
495
496## Test and Evaluate Methods
497
499
501
502## Plot ROC Curve
503
504c1 = factory.GetROCCurve(loader)
505c1.Draw()
506
507# close outputfile to save output file
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t UChar_t len
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t format
A ROOT file is an on-disk file, usually with extension .root, that stores objects in a file-system-li...
Definition TFile.h:131
This is the main MVA steering class.
Definition Factory.h:80