170   for (
UInt_t i=0; i<fEventSample.size(); i++) 
delete fEventSample[i];
 
  171   for (
UInt_t i=0; i<fForest.size(); i++)      
delete fForest[i];
 
 
  230   DeclareOptionRef(fGDTau=-1,             
"GDTau",          
"Gradient-directed (GD) path: default fit cut-off");
 
  231   DeclareOptionRef(fGDTauPrec=0.01,       
"GDTauPrec",      
"GD path: precision of tau");
 
  232   DeclareOptionRef(fGDPathStep=0.01,      
"GDStep",         
"GD path: step size");
 
  233   DeclareOptionRef(fGDNPathSteps=10000,   
"GDNSteps",       
"GD path: number of steps");
 
  234   DeclareOptionRef(fGDErrScale=1.1,       
"GDErrScale",     
"Stop scan when error > scale*errmin");
 
  235   DeclareOptionRef(fLinQuantile,           
"LinQuantile",  
"Quantile of linear terms (removes outliers)");
 
  236   DeclareOptionRef(fGDPathEveFrac=0.5,    
"GDPathEveFrac",  
"Fraction of events used for the path search");
 
  237   DeclareOptionRef(fGDValidEveFrac=0.5,   
"GDValidEveFrac", 
"Fraction of events used for the validation");
 
  239   DeclareOptionRef(fMinFracNEve=0.1,      
"fEventsMin",     
"Minimum fraction of events in a splittable node");
 
  240   DeclareOptionRef(fMaxFracNEve=0.9,      
"fEventsMax",     
"Maximum fraction of events in a splittable node");
 
  241   DeclareOptionRef(fNTrees=20,            
"nTrees",         
"Number of trees in forest.");
 
  243   DeclareOptionRef(fForestTypeS=
"AdaBoost",  
"ForestType",   
"Method to use for forest generation (AdaBoost or RandomForest)");
 
  244   AddPreDefVal(
TString(
"AdaBoost"));
 
  245   AddPreDefVal(
TString(
"Random"));
 
  247   DeclareOptionRef(fRuleMinDist=0.001,    
"RuleMinDist",    
"Minimum distance between rules");
 
  248   DeclareOptionRef(fMinimp=0.01,          
"MinImp",         
"Minimum rule importance accepted");
 
  250   DeclareOptionRef(fModelTypeS=
"ModRuleLinear", 
"Model",    
"Model to be used");
 
  251   AddPreDefVal(
TString(
"ModRule"));
 
  252   AddPreDefVal(
TString(
"ModRuleLinear"));
 
  253   AddPreDefVal(
TString(
"ModLinear"));
 
  254   DeclareOptionRef(fRuleFitModuleS=
"RFTMVA",  
"RuleFitModule",
"Which RuleFit module to use");
 
  255   AddPreDefVal(
TString(
"RFTMVA"));
 
  256   AddPreDefVal(
TString(
"RFFriedman"));
 
  258   DeclareOptionRef(fRFWorkDir=
"./rulefit", 
"RFWorkDir",    
"Friedman\'s RuleFit module (RFF): working dir");
 
  259   DeclareOptionRef(fRFNrules=2000,         
"RFNrules",     
"RFF: Mximum number of rules");
 
  260   DeclareOptionRef(fRFNendnodes=4,         
"RFNendnodes",  
"RFF: Average number of end nodes");
 
 
  268   if (IgnoreEventsWithNegWeightsInTraining()) {
 
  269      Log() << kFATAL << 
"Mechanism to ignore events with negative weights in training not yet available for method: " 
  270            << GetMethodTypeName()
 
  271            << 
" --> please remove \"IgnoreNegWeightsInTraining\" option from booking string." 
  275   fRuleFitModuleS.ToLower();
 
  276   if      (fRuleFitModuleS == 
"rftmva")     fUseRuleFitJF = 
kFALSE;
 
  277   else if (fRuleFitModuleS == 
"rffriedman") fUseRuleFitJF = 
kTRUE;
 
  278   else                                      fUseRuleFitJF = 
kTRUE;
 
  282   else if (fSepTypeS == 
"giniindex")              fSepType = 
new GiniIndex();
 
  283   else if (fSepTypeS == 
"crossentropy")           fSepType = 
new CrossEntropy();
 
  286   fModelTypeS.ToLower();
 
  287   if      (fModelTypeS == 
"modlinear" ) fRuleFit.SetModelLinear();
 
  288   else if (fModelTypeS == 
"modrule" )   fRuleFit.SetModelRules();
 
  289   else                                  fRuleFit.SetModelFull();
 
  291   fPruneMethodS.ToLower();
 
  296   fForestTypeS.ToLower();
 
  297   if      (fForestTypeS == 
"random" )   fUseBoost = 
kFALSE;
 
  298   else if (fForestTypeS == 
"adaboost" ) fUseBoost = 
kTRUE;
 
  299   else                                  fUseBoost = 
kTRUE;
 
  304   if (fUseBoost && (!fUseRuleFitJF)) fTreeEveFrac = 1.0;
 
  308   if (fTreeEveFrac<=0) {
 
  311      fTreeEveFrac = min( 0.5, (100.0 +6.0*sqrt(
n))/
n);
 
  314   VerifyRange(Log(), 
"nTrees",        fNTrees,0,100000,20);
 
  315   VerifyRange(Log(), 
"MinImp",        fMinimp,0.0,1.0,0.0);
 
  316   VerifyRange(Log(), 
"GDTauPrec",     fGDTauPrec,1
e-5,5
e-1);
 
  317   VerifyRange(Log(), 
"GDTauMin",      fGDTauMin,0.0,1.0);
 
  318   VerifyRange(Log(), 
"GDTauMax",      fGDTauMax,fGDTauMin,1.0);
 
  319   VerifyRange(Log(), 
"GDPathStep",    fGDPathStep,0.0,100.0,0.01);
 
  320   VerifyRange(Log(), 
"GDErrScale",    fGDErrScale,1.0,100.0,1.1);
 
  321   VerifyRange(Log(), 
"GDPathEveFrac", fGDPathEveFrac,0.01,0.9,0.5);
 
  322   VerifyRange(Log(), 
"GDValidEveFrac",fGDValidEveFrac,0.01,1.0-fGDPathEveFrac,1.0-fGDPathEveFrac);
 
  323   VerifyRange(Log(), 
"fEventsMin",    fMinFracNEve,0.0,1.0);
 
  324   VerifyRange(Log(), 
"fEventsMax",    fMaxFracNEve,fMinFracNEve,1.0);
 
  326   fRuleFit.GetRuleEnsemblePtr()->SetLinQuantile(fLinQuantile);
 
  327   fRuleFit.GetRuleFitParamsPtr()->SetGDTauRange(fGDTauMin,fGDTauMax);
 
  328   fRuleFit.GetRuleFitParamsPtr()->SetGDTau(fGDTau);
 
  329   fRuleFit.GetRuleFitParamsPtr()->SetGDTauPrec(fGDTauPrec);
 
  330   fRuleFit.GetRuleFitParamsPtr()->SetGDTauScan(fGDTauScan);
 
  331   fRuleFit.GetRuleFitParamsPtr()->SetGDPathStep(fGDPathStep);
 
  332   fRuleFit.GetRuleFitParamsPtr()->SetGDNPathSteps(fGDNPathSteps);
 
  333   fRuleFit.GetRuleFitParamsPtr()->SetGDErrScale(fGDErrScale);
 
  334   fRuleFit.SetImportanceCut(fMinimp);
 
  335   fRuleFit.SetRuleMinDist(fRuleMinDist);
 
  341      Log() << kINFO << 
"" << 
Endl;
 
  342      Log() << kINFO << 
"--------------------------------------" <<
Endl;
 
  343      Log() << kINFO << 
"Friedmans RuleFit module is selected." << 
Endl;
 
  344      Log() << kINFO << 
"Only the following options are used:" << 
Endl;
 
  345      Log() << kINFO <<  
Endl;
 
  353      Log() << kINFO << 
"--------------------------------------" <<
Endl;
 
  354      Log() << kINFO << 
Endl;
 
  364   fRuleFit.UseImportanceVisHists();
 
  366   fRuleFit.SetMsgType( Log().GetMinType() );
 
  368   if (HasTrainingTree()) InitEventSample();
 
 
  378   fMonitorNtuple= 
new TTree(
"MonitorNtuple_RuleFit",
"RuleFit variables");
 
  379   fMonitorNtuple->Branch(
"importance",&fNTImportance,
"importance/D");
 
  380   fMonitorNtuple->Branch(
"support",&fNTSupport,
"support/D");
 
  381   fMonitorNtuple->Branch(
"coefficient",&fNTCoefficient,
"coefficient/D");
 
  382   fMonitorNtuple->Branch(
"ncuts",&fNTNcuts,
"ncuts/I");
 
  383   fMonitorNtuple->Branch(
"nvars",&fNTNvars,
"nvars/I");
 
  384   fMonitorNtuple->Branch(
"type",&fNTType,
"type/I");
 
  385   fMonitorNtuple->Branch(
"ptag",&fNTPtag,
"ptag/D");
 
  386   fMonitorNtuple->Branch(
"pss",&fNTPss,
"pss/D");
 
  387   fMonitorNtuple->Branch(
"psb",&fNTPsb,
"psb/D");
 
  388   fMonitorNtuple->Branch(
"pbs",&fNTPbs,
"pbs/D");
 
  389   fMonitorNtuple->Branch(
"pbb",&fNTPbb,
"pbb/D");
 
  390   fMonitorNtuple->Branch(
"soversb",&fNTSSB,
"soversb/D");
 
 
  399   SetSignalReferenceCut( 0.0 );
 
  403   fLinQuantile   = 0.025;       
 
  406   fSepTypeS      = 
"GiniIndex"; 
 
  407   fPruneMethodS  = 
"NONE";      
 
  408   fPruneStrength = 3.5;         
 
 
  423   if (Data()->GetNEvents()==0) Log() << kFATAL << 
"<Init> Data().TrainingTree() is zero pointer" << 
Endl;
 
  428      fEventSample.push_back( 
new Event(*
ev));
 
  430   if (fTreeEveFrac<=0) {
 
  432      fTreeEveFrac = min( 0.5, (100.0 +6.0*sqrt(
n))/
n);
 
  434   if (fTreeEveFrac>1.0) fTreeEveFrac=1.0;
 
  436   std::shuffle(fEventSample.begin(), fEventSample.end(), std::default_random_engine{});
 
  438   Log() << kDEBUG << 
"Set sub-sample fraction to " << fTreeEveFrac << 
Endl;
 
 
  448  if(!IsSilentFile()) InitMonitorNtuple();
 
  451   this->InitEventSample();
 
  459   fRuleFit.GetRuleEnsemblePtr()->ClearRuleMap();
 
 
  469   if (IsNormalised()) Log() << kFATAL << 
"\"Normalise\" option cannot be used with RuleFit; " 
  470                             << 
"please remove the option from the configuration string, or " 
  471                             << 
"use \"!Normalise\"" 
  487   fRuleFit.Initialize( 
this );
 
  493   Log() << kDEBUG << 
"Fitting rule coefficients ..." << 
Endl;
 
  494   fRuleFit.FitCoefficients();
 
  497   Log() << kDEBUG << 
"Computing rule and variable importance" << 
Endl;
 
  498   fRuleFit.CalcImportance();
 
  501   fRuleFit.GetRuleEnsemblePtr()->
Print();
 
  505        Log() << kDEBUG << 
"Filling rule ntuple" << 
Endl;
 
  506        UInt_t nrules = fRuleFit.GetRuleEnsemble().GetRulesConst().size();
 
  509            rule            = fRuleFit.GetRuleEnsemble().GetRulesConst(i);
 
  510            fNTImportance   = 
rule->GetRelImportance();
 
  511            fNTSupport      = 
rule->GetSupport();
 
  512            fNTCoefficient  = 
rule->GetCoefficient();
 
  513            fNTType         = (
rule->IsSignalRule() ? 1:-1 );
 
  514            fNTNvars        = 
rule->GetRuleCut()->GetNvars();
 
  515            fNTNcuts        = 
rule->GetRuleCut()->GetNcuts();
 
  516            fNTPtag         = fRuleFit.GetRuleEnsemble().GetRulePTag(i); 
 
  517            fNTPss          = fRuleFit.GetRuleEnsemble().GetRulePSS(i);
 
  518            fNTPsb          = fRuleFit.GetRuleEnsemble().GetRulePSB(i);
 
  519            fNTPbs          = fRuleFit.GetRuleEnsemble().GetRulePBS(i);
 
  520            fNTPbb          = fRuleFit.GetRuleEnsemble().GetRulePBB(i);
 
  521            fNTSSB          = 
rule->GetSSB();
 
  522            fMonitorNtuple->Fill();
 
  525        fRuleFit.MakeVisHists();
 
  526        fRuleFit.MakeDebugHists();
 
  528   Log() << kDEBUG << 
"Training done" << 
Endl;
 
 
  537   fRuleFit.InitPtrs( 
this );
 
  540   std::vector<const TMVA::Event*> tmp;
 
  543      tmp.push_back(event);
 
  545   fRuleFit.SetTrainingEvents( tmp );
 
  549   rfAPI->WelcomeMessage();
 
  554   Log() << kINFO << 
"Training ..." << 
Endl;
 
  555   rfAPI->TrainRuleFit();
 
  557   Log() << kDEBUG << 
"reading model summary from rf_go.exe output" << 
Endl;
 
  558   rfAPI->ReadModelSum();
 
  562   Log() << kDEBUG << 
"calculating rule and variable importance" << 
Endl;
 
  563   fRuleFit.CalcImportance();
 
  566   fRuleFit.GetRuleEnsemblePtr()->
Print();
 
  568   if(!IsSilentFile())fRuleFit.MakeVisHists();
 
  572   Log() << kDEBUG << 
"done training" << 
Endl;
 
 
  581   fRanking = 
new Ranking( GetName(), 
"Importance" );
 
  584      fRanking->AddRank( 
Rank( GetInputLabel(
ivar), fRuleFit.GetRuleEnsemble().GetVarImportance(
ivar) ) );
 
 
  595   fRuleFit.GetRuleEnsemble().AddXMLTo( parent );
 
 
  603   fRuleFit.GetRuleEnsemblePtr()->ReadRaw( 
istr );
 
 
  611   fRuleFit.GetRuleEnsemblePtr()->ReadFromXML( 
wghtnode );
 
 
  622   return fRuleFit.EvalEvent( *GetEvent() );
 
 
  631   Log() << kINFO << 
"Write monitoring ntuple to file: " << BaseDir()->GetPath() << 
Endl;
 
  632   fMonitorNtuple->
Write();
 
 
  641   fout << 
"   // not implemented for class: \"" << className << 
"\"" << std::endl;
 
  642   fout << 
"};" << std::endl;
 
  643   fout << 
"void   " << className << 
"::Initialize(){}" << std::endl;
 
  644   fout << 
"void   " << className << 
"::Clear(){}" << std::endl;
 
  645   fout << 
"double " << className << 
"::GetMvaValue__( const std::vector<double>& inputValues ) const {" << std::endl;
 
  646   fout << 
"   double rval=" << std::setprecision(10) << fRuleFit.GetRuleEnsemble().GetOffset() << 
";" << std::endl;
 
  647   MakeClassRuleCuts(
fout);
 
  648   MakeClassLinear(
fout);
 
  649   fout << 
"   return rval;" << std::endl;
 
  650   fout << 
"}" << std::endl;
 
  651   fout << std::setprecision(
dp);
 
 
  660   if (!fRuleFit.GetRuleEnsemble().DoRules()) {
 
  661      fout << 
"   //" << std::endl;
 
  662      fout << 
"   // ==> MODEL CONTAINS NO RULES <==" << std::endl;
 
  663      fout << 
"   //" << std::endl;
 
  667   const std::vector< Rule* > *
rules = &(
rens->GetRulesConst());
 
  670   std::list< std::pair<Double_t,Int_t> > 
sortedRules;
 
  676   fout << 
"   //" << std::endl;
 
  677   fout << 
"   // here follows all rules ordered in importance (most important first)" << std::endl;
 
  678   fout << 
"   // at the end of each line, the relative importance of the rule is given" << std::endl;
 
  679   fout << 
"   //" << std::endl;
 
  681   for ( std::list< std::pair<double,int> >::reverse_iterator 
itpair = 
sortedRules.rbegin();
 
  687      fout << 
"   if (" << std::flush;
 
  695         if (
ic>0) 
fout << 
"&&" << std::flush;
 
  697            fout << 
"(" << std::setprecision(10) << 
valmin << std::flush;
 
  698            fout << 
"<inputValues[" << 
sel << 
"])" << std::flush;
 
  702            fout << 
"(inputValues[" << 
sel << 
"]" << std::flush;
 
  703            fout << 
"<" << std::setprecision(10) << 
valmax << 
")" <<std::flush;
 
  706      fout << 
") rval+=" << std::setprecision(10) << (*rules)[
ir]->GetCoefficient() << 
";" << std::flush;
 
  709   fout << std::setprecision(
dp);
 
 
  717   if (!fRuleFit.GetRuleEnsemble().DoLinear()) {
 
  718      fout << 
"   //" << std::endl;
 
  719      fout << 
"   // ==> MODEL CONTAINS NO LINEAR TERMS <==" << std::endl;
 
  720      fout << 
"   //" << std::endl;
 
  723   fout << 
"   //" << std::endl;
 
  724   fout << 
"   // here follows all linear terms" << std::endl;
 
  725   fout << 
"   // at the end of each line, the relative importance of the term is given" << std::endl;
 
  726   fout << 
"   //" << std::endl;
 
  730      if (
rens->IsLinTermOK(
il)) {
 
  736              << std::setprecision(10) << 
rens->GetLinCoefficients(
il)*
norm 
  737              << 
"*std::min( double(" << std::setprecision(10) << 
rens->GetLinDP(
il)
 
  738              << 
"), std::max( double(inputValues[" << 
il << 
"]), double(" << std::setprecision(10) << 
rens->GetLinDM(
il) << 
")));" 
 
  758   Log() << col << 
"--- Short description:" << 
colres << 
Endl;
 
  760   Log() << 
"This method uses a collection of so called rules to create a" << 
Endl;
 
  761   Log() << 
"discriminating scoring function. Each rule consists of a series" << 
Endl;
 
  762   Log() << 
"of cuts in parameter space. The ensemble of rules are created" << 
Endl;
 
  763   Log() << 
"from a forest of decision trees, trained using the training data." << 
Endl;
 
  764   Log() << 
"Each node (apart from the root) corresponds to one rule." << 
Endl;
 
  765   Log() << 
"The scoring function is then obtained by linearly combining" << 
Endl;
 
  766   Log() << 
"the rules. A fitting procedure is applied to find the optimum" << 
Endl;
 
  767   Log() << 
"set of coefficients. The goal is to find a model with few rules" << 
Endl;
 
  768   Log() << 
"but with a strong discriminating power." << 
Endl;
 
  770   Log() << col << 
"--- Performance optimisation:" << 
colres << 
Endl;
 
  772   Log() << 
"There are two important considerations to make when optimising:" << 
Endl;
 
  774   Log() << 
"  1. Topology of the decision tree forest" << 
brk << 
Endl;
 
  775   Log() << 
"  2. Fitting of the coefficients" << 
Endl;
 
  777   Log() << 
"The maximum complexity of the rules is defined by the size of" << 
Endl;
 
  778   Log() << 
"the trees. Large trees will yield many complex rules and capture" << 
Endl;
 
  779   Log() << 
"higher order correlations. On the other hand, small trees will" << 
Endl;
 
  780   Log() << 
"lead to a smaller ensemble with simple rules, only capable of" << 
Endl;
 
  781   Log() << 
"modeling simple structures." << 
Endl;
 
  782   Log() << 
"Several parameters exists for controlling the complexity of the" << 
Endl;
 
  783   Log() << 
"rule ensemble." << 
Endl;
 
  785   Log() << 
"The fitting procedure searches for a minimum using a gradient" << 
Endl;
 
  786   Log() << 
"directed path. Apart from step size and number of steps, the" << 
Endl;
 
  787   Log() << 
"evolution of the path is defined by a cut-off parameter, tau." << 
Endl;
 
  788   Log() << 
"This parameter is unknown and depends on the training data." << 
Endl;
 
  789   Log() << 
"A large value will tend to give large weights to a few rules." << 
Endl;
 
  790   Log() << 
"Similarly, a small value will lead to a large set of rules" << 
Endl;
 
  791   Log() << 
"with similar weights." << 
Endl;
 
  793   Log() << 
"A final point is the model used; rules and/or linear terms." << 
Endl;
 
  794   Log() << 
"For a given training sample, the result may improve by adding" << 
Endl;
 
  795   Log() << 
"linear terms. If best performance is obtained using only linear" << 
Endl;
 
  796   Log() << 
"terms, it is very likely that the Fisher discriminant would be" << 
Endl;
 
  797   Log() << 
"a better choice. Ideally the fitting procedure should be able to" << 
Endl;
 
  798   Log() << 
"make this choice by giving appropriate weights for either terms." << 
Endl;
 
  800   Log() << col << 
"--- Performance tuning via configuration options:" << 
colres << 
Endl;
 
  802   Log() << 
"I.  TUNING OF RULE ENSEMBLE:" << 
Endl;
 
  804   Log() << 
"   " << col << 
"ForestType  " << 
colres 
  805         << 
": Recommended is to use the default \"AdaBoost\"." << 
brk << 
Endl;
 
  806   Log() << 
"   " << col << 
"nTrees      " << 
colres 
  807         << 
": More trees leads to more rules but also slow" << 
Endl;
 
  808   Log() << 
"                 performance. With too few trees the risk is" << 
Endl;
 
  809   Log() << 
"                 that the rule ensemble becomes too simple." << 
brk << 
Endl;
 
  810   Log() << 
"   " << col << 
"fEventsMin  " << 
colres << 
brk << 
Endl;
 
  811   Log() << 
"   " << col << 
"fEventsMax  " << 
colres 
  812         << 
": With a lower min, more large trees will be generated" << 
Endl;
 
  813   Log() << 
"                 leading to more complex rules." << 
Endl;
 
  814   Log() << 
"                 With a higher max, more small trees will be" << 
Endl;
 
  815   Log() << 
"                 generated leading to more simple rules." << 
Endl;
 
  816   Log() << 
"                 By changing this range, the average complexity" << 
Endl;
 
  817   Log() << 
"                 of the rule ensemble can be controlled." << 
brk << 
Endl;
 
  818   Log() << 
"   " << col << 
"RuleMinDist " << 
colres 
  819         << 
": By increasing the minimum distance between" << 
Endl;
 
  820   Log() << 
"                 rules, fewer and more diverse rules will remain." << 
Endl;
 
  821   Log() << 
"                 Initially it is a good idea to keep this small" << 
Endl;
 
  822   Log() << 
"                 or zero and let the fitting do the selection of" << 
Endl;
 
  823   Log() << 
"                 rules. In order to reduce the ensemble size," << 
Endl;
 
  824   Log() << 
"                 the value can then be increased." << 
Endl;
 
  827   Log() << 
"II. TUNING OF THE FITTING:" << 
Endl;
 
  829   Log() << 
"   " << col << 
"GDPathEveFrac " << 
colres 
  830         << 
": fraction of events in path evaluation" << 
Endl;
 
  831   Log() << 
"                 Increasing this fraction will improve the path" << 
Endl;
 
  832   Log() << 
"                 finding. However, a too high value will give few" << 
Endl;
 
  833   Log() << 
"                 unique events available for error estimation." << 
Endl;
 
  834   Log() << 
"                 It is recommended to use the default = 0.5." << 
brk << 
Endl;
 
  835   Log() << 
"   " << col << 
"GDTau         " << 
colres 
  836         << 
": cutoff parameter tau" << 
Endl;
 
  837   Log() << 
"                 By default this value is set to -1.0." << 
Endl;
 
  839   Log() << 
"                 This means that the cut off parameter is" << 
Endl;
 
  840   Log() << 
"                 automatically estimated. In most cases" << 
Endl;
 
  841   Log() << 
"                 this should be fine. However, you may want" << 
Endl;
 
  842   Log() << 
"                 to fix this value if you already know it" << 
Endl;
 
  843   Log() << 
"                 and want to reduce on training time." << 
brk << 
Endl;
 
  844   Log() << 
"   " << col << 
"GDTauPrec     " << 
colres 
  845         << 
": precision of estimated tau" << 
Endl;
 
  846   Log() << 
"                 Increase this precision to find a more" << 
Endl;
 
  847   Log() << 
"                 optimum cut-off parameter." << 
brk << 
Endl;
 
  848   Log() << 
"   " << col << 
"GDNStep       " << 
colres 
  849         << 
": number of steps in path search" << 
Endl;
 
  850   Log() << 
"                 If the number of steps is too small, then" << 
Endl;
 
  851   Log() << 
"                 the program will give a warning message." << 
Endl;
 
  853   Log() << 
"III. WARNING MESSAGES" << 
Endl;
 
  855   Log() << col << 
"Risk(i+1)>=Risk(i) in path" << 
colres << 
brk << 
Endl;
 
  856   Log() << col << 
"Chaotic behaviour of risk evolution." << 
colres << 
Endl;
 
  858   Log() << 
"                 The error rate was still decreasing at the end" << 
Endl;
 
  859   Log() << 
"                 By construction the Risk should always decrease." << 
Endl;
 
  860   Log() << 
"                 However, if the training sample is too small or" << 
Endl;
 
  861   Log() << 
"                 the model is overtrained, such warnings can" << 
Endl;
 
  862   Log() << 
"                 occur." << 
Endl;
 
  863   Log() << 
"                 The warnings can safely be ignored if only a" << 
Endl;
 
  864   Log() << 
"                 few (<3) occur. If more warnings are generated," << 
Endl;
 
  865   Log() << 
"                 the fitting fails." << 
Endl;
 
  866   Log() << 
"                 A remedy may be to increase the value" << 
brk << 
Endl;
 
  868         << col << 
"GDValidEveFrac" << 
colres 
  869         << 
" to 1.0 (or a larger value)." << 
brk << 
Endl;
 
  870   Log() << 
"                 In addition, if " 
  871         << col << 
"GDPathEveFrac" << 
colres 
  872         << 
" is too high" << 
Endl;
 
  873   Log() << 
"                 the same warnings may occur since the events" << 
Endl;
 
  874   Log() << 
"                 used for error estimation are also used for" << 
Endl;
 
  875   Log() << 
"                 path estimation." << 
Endl;
 
  876   Log() << 
"                 Another possibility is to modify the model - " << 
Endl;
 
  877   Log() << 
"                 See above on tuning the rule ensemble." << 
Endl;
 
  879   Log() << col << 
"The error rate was still decreasing at the end of the path" 
  881   Log() << 
"                 Too few steps in path! Increase " 
  882         << col << 
"GDNSteps" <<  
colres << 
"." << 
Endl;
 
  884   Log() << col << 
"Reached minimum early in the search" << 
colres << 
Endl;
 
  886   Log() << 
"                 Minimum was found early in the fitting. This" << 
Endl;
 
  887   Log() << 
"                 may indicate that the used step size " 
  889   Log() << 
"                 was too large. Reduce it and rerun." << 
Endl;
 
  890   Log() << 
"                 If the results still are not OK, modify the" << 
Endl;
 
  891   Log() << 
"                 model either by modifying the rule ensemble" << 
Endl;
 
  892   Log() << 
"                 or add/remove linear terms" << 
Endl;
 
 
#define REGISTER_METHOD(CLASS)
for example
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t sel
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
Bool_t WriteOptionsReference() const
Implementation of the CrossEntropy as separation criterion.
Class that contains all the data information.
static void SetIsTraining(bool on)
Implementation of a Decision Tree.
Implementation of the GiniIndex as separation criterion.
Virtual base Class for all MVA method.
J Friedman's RuleFit method.
void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
void MakeClassLinear(std::ostream &) const
print out the linear terms
void GetHelpMessage() const
get help message text
void TrainJFRuleFit()
training of rules using Jerome Friedmans implementation
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t)
RuleFit can handle classification with 2 classes.
void ProcessOptions()
process the options specified by the user
void ReadWeightsFromStream(std::istream &istr)
read rules from an std::istream
void AddWeightsXMLTo(void *parent) const
add the rules to XML node
void InitEventSample(void)
write all Events from the Tree into a vector of Events, that are more easily manipulated.
void MakeClassRuleCuts(std::ostream &) const
print out the rule cuts
void InitMonitorNtuple()
initialize the monitoring ntuple
virtual ~MethodRuleFit(void)
destructor
void Init(void)
default initialization
TTree * fMonitorNtuple
pointer to monitor rule ntuple
void WriteMonitoringHistosToFile(void) const
write special monitoring histograms to file (here ntuple)
MethodRuleFit(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
standard constructor
void ReadWeightsFromXML(void *wghtnode)
read rules from XML node
void DeclareOptions()
define the options (their key words) that can be set in the option string know options.
Double_t GetMvaValue(Double_t *err=nullptr, Double_t *errUpper=nullptr)
returns MVA value for given event
const Ranking * CreateRanking()
computes ranking of input variables
void TrainTMVARuleFit()
training of rules using TMVA implementation
Implementation of the MisClassificationError as separation criterion.
Ranking for variables in method (implementation)
A class describing a 'rule cut'.
J Friedman's RuleFit method.
Implementation of a rule.
Implementation of the SdivSqrtSplusB as separation criterion.
Timing information for training and evaluation of MVA methods.
Singleton class for Global types used by TMVA.
virtual Int_t Write(const char *name=nullptr, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
virtual void Print(Option_t *option="") const
This method must be overridden when a class wants to print itself.
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
A TTree represents a columnar dataset.
create variable transformations
MsgLogger & Endl(MsgLogger &ml)