27#ifndef TMVA_DNN_RMSPROP 
   28#define TMVA_DNN_RMSPROP 
   43template <
typename Architecture_t, 
typename Layer_t = VGeneralLayer<Architecture_t>,
 
   44          typename DeepNet_t = TDeepNet<Architecture_t, Layer_t>>
 
   47   using Matrix_t = 
typename Architecture_t::Matrix_t;
 
   48   using Scalar_t = 
typename Architecture_t::Scalar_t;
 
   54   std::vector<std::vector<Matrix_t>>
 
   56   std::vector<std::vector<Matrix_t>>
 
   61   std::vector<std::vector<Matrix_t>>
 
   63   std::vector<std::vector<Matrix_t>>
 
   65   std::vector<std::vector<Matrix_t>>
 
   67   std::vector<std::vector<Matrix_t>>
 
   71   void UpdateWeights(
size_t layerIndex, std::vector<Matrix_t> &weights, 
const std::vector<Matrix_t> &weightGradients);
 
   74   void UpdateBiases(
size_t layerIndex, std::vector<Matrix_t> &biases, 
const std::vector<Matrix_t> &biasGradients);
 
  106template <
typename Architecture_t, 
typename Layer_t, 
typename DeepNet_t>
 
  109   : 
VOptimizer<Architecture_t, Layer_t, DeepNet_t>(learningRate, deepNet), fMomentum(momentum), fRho(rho),
 
  112   std::vector<Layer_t *> &layers = deepNet.
GetLayers();
 
  113   const size_t layersNSlices = layers.size();
 
  123   for (
size_t i = 0; i < layersNSlices; i++) {
 
  124      const size_t weightsNSlices = (layers[i]->GetWeights()).
size();
 
  127      Architecture_t::CreateWeightTensors(
fWeightUpdates[i], layers[i]->GetWeights());
 
  129      for (
size_t j = 0; j < weightsNSlices; j++) {
 
  134      const size_t biasesNSlices = (layers[i]->GetBiases()).
size();
 
  137      Architecture_t::CreateWeightTensors( 
fBiasUpdates[i], layers[i]->GetBiases()); 
 
  139      for (
size_t j = 0; j < biasesNSlices; j++) {
 
  144      Architecture_t::CreateWeightTensors(
fWorkBiasTensor1[i], layers[i]->GetBiases());
 
  146      Architecture_t::CreateWeightTensors(
fWorkBiasTensor2[i], layers[i]->GetBiases());
 
  151template <
typename Architecture_t, 
typename Layer_t, 
typename DeepNet_t>
 
  153                                                                 const std::vector<Matrix_t> &weightGradients) -> 
void 
  155   std::vector<Matrix_t> ¤tLayerPastSquaredWeightGradients = this->GetPastSquaredWeightGradientsAt(layerIndex);
 
  156   std::vector<Matrix_t> ¤tLayerWeightUpdates = this->GetWeightUpdatesAt(layerIndex);
 
  158   for (
size_t k = 0; k < currentLayerPastSquaredWeightGradients.size(); k++) {
 
  161      auto &accumulation = fWorkWeightTensor1[layerIndex][k];
 
  162      auto ¤tSquaredWeightGradients = fWorkWeightTensor2[layerIndex][k];
 
  167      Architecture_t::Copy(currentSquaredWeightGradients, weightGradients[k]);
 
  168      Architecture_t::SquareElementWise(currentSquaredWeightGradients);
 
  169      Architecture_t::ScaleAdd(accumulation, currentLayerPastSquaredWeightGradients[k], this->GetRho());
 
  170      Architecture_t::ScaleAdd(accumulation, currentSquaredWeightGradients, 1 - (this->GetRho()));
 
  171      Architecture_t::Copy(currentLayerPastSquaredWeightGradients[k], accumulation);
 
  175      auto &dummy = fWorkWeightTensor2[layerIndex][k]; 
 
  176      Architecture_t::Copy(dummy, currentLayerPastSquaredWeightGradients[k]);
 
  177      Architecture_t::ConstAdd(dummy, this->GetEpsilon());
 
  178      Architecture_t::SqrtElementWise(dummy);
 
  179      Architecture_t::ReciprocalElementWise(dummy);
 
  180      Architecture_t::Hadamard(dummy, weightGradients[k]);
 
  182      Architecture_t::ScaleAdd(accumulation, currentLayerWeightUpdates[k], this->GetMomentum());
 
  183      Architecture_t::ScaleAdd(accumulation, dummy, this->GetLearningRate());
 
  184      Architecture_t::Copy(currentLayerWeightUpdates[k], accumulation);
 
  189   for (
size_t i = 0; i < weights.size(); i++) {
 
  190      Architecture_t::ScaleAdd(weights[i], currentLayerWeightUpdates[i], -1.0);
 
  195template <
typename Architecture_t, 
typename Layer_t, 
typename DeepNet_t>
 
  197                                                                const std::vector<Matrix_t> &biasGradients) -> 
void 
  199   std::vector<Matrix_t> ¤tLayerPastSquaredBiasGradients = this->GetPastSquaredBiasGradientsAt(layerIndex);
 
  200   std::vector<Matrix_t> ¤tLayerBiasUpdates = this->GetBiasUpdatesAt(layerIndex);
 
  202   for (
size_t k = 0; k < currentLayerPastSquaredBiasGradients.size(); k++) {
 
  205      auto &accumulation = fWorkBiasTensor1[layerIndex][k];
 
  206      auto ¤tSquaredBiasGradients = fWorkBiasTensor2[layerIndex][k];
 
  210      Architecture_t::Copy(currentSquaredBiasGradients, biasGradients[k]);
 
  211      Architecture_t::SquareElementWise(currentSquaredBiasGradients);
 
  212      Architecture_t::ScaleAdd(accumulation, currentLayerPastSquaredBiasGradients[k], this->GetRho());
 
  213      Architecture_t::ScaleAdd(accumulation, currentSquaredBiasGradients, 1 - (this->GetRho()));
 
  214      Architecture_t::Copy(currentLayerPastSquaredBiasGradients[k], accumulation);
 
  218      auto &dummy = fWorkBiasTensor2[layerIndex][k]; 
 
  220      Architecture_t::Copy(dummy, currentLayerPastSquaredBiasGradients[k]);
 
  221      Architecture_t::ConstAdd(dummy, this->GetEpsilon());
 
  222      Architecture_t::SqrtElementWise(dummy);
 
  223      Architecture_t::ReciprocalElementWise(dummy);
 
  224      Architecture_t::Hadamard(dummy, biasGradients[k]);
 
  226      Architecture_t::ScaleAdd(accumulation, currentLayerBiasUpdates[k], this->GetMomentum());
 
  227      Architecture_t::ScaleAdd(accumulation, dummy, this->GetLearningRate());
 
  228      Architecture_t::Copy(currentLayerBiasUpdates[k], accumulation);
 
  233   for (
size_t i = 0; i < biases.size(); i++) {
 
  234      Architecture_t::ScaleAdd(biases[i], currentLayerBiasUpdates[i], -1.0);
 
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
Scalar_t fRho
The Rho constant used by the optimizer.
typename Architecture_t::Scalar_t Scalar_t
void UpdateWeights(size_t layerIndex, std::vector< Matrix_t > &weights, const std::vector< Matrix_t > &weightGradients)
Update the weights, given the current weight gradients.
~TRMSProp()=default
Destructor.
std::vector< Matrix_t > & GetPastSquaredWeightGradientsAt(size_t i)
std::vector< std::vector< Matrix_t > > fWorkBiasTensor2
working tensor used to keep a temporary copy of bias or bias gradients
std::vector< std::vector< Matrix_t > > fPastSquaredWeightGradients
The sum of the square of the past weight gradients associated with the deep net.
std::vector< std::vector< Matrix_t > > & GetBiasUpdates()
std::vector< std::vector< Matrix_t > > fWorkWeightTensor2
working tensor used to keep a temporary copy of weights or weight gradients
Scalar_t GetEpsilon() const
std::vector< std::vector< Matrix_t > > fWorkBiasTensor1
working tensor used to keep a temporary copy of bias or bias gradients
Scalar_t fMomentum
The momentum used for training.
std::vector< std::vector< Matrix_t > > & GetPastSquaredBiasGradients()
Scalar_t fEpsilon
The Smoothing term used to avoid division by zero.
TRMSProp(DeepNet_t &deepNet, Scalar_t learningRate=0.001, Scalar_t momentum=0.0, Scalar_t rho=0.9, Scalar_t epsilon=1e-7)
Constructor.
std::vector< std::vector< Matrix_t > > fPastSquaredBiasGradients
The sum of the square of the past bias gradients associated with the deep net.
std::vector< std::vector< Matrix_t > > fWeightUpdates
The accumulation of the past Weights for performing updates.
typename Architecture_t::Matrix_t Matrix_t
void UpdateBiases(size_t layerIndex, std::vector< Matrix_t > &biases, const std::vector< Matrix_t > &biasGradients)
Update the biases, given the current bias gradients.
std::vector< Matrix_t > & GetBiasUpdatesAt(size_t i)
std::vector< std::vector< Matrix_t > > & GetWeightUpdates()
std::vector< std::vector< Matrix_t > > fWorkWeightTensor1
working tensor used to keep a temporary copy of weights or weight gradients
std::vector< Matrix_t > & GetWeightUpdatesAt(size_t i)
std::vector< std::vector< Matrix_t > > & GetPastSquaredWeightGradients()
std::vector< std::vector< Matrix_t > > fBiasUpdates
The accumulation of the past Biases for performing updates.
Scalar_t GetMomentum() const
Getters.
std::vector< Matrix_t > & GetPastSquaredBiasGradientsAt(size_t i)
std::vector< Layer_t * > & GetLayers()
create variable transformations