36   float alpha = 1.0, beta = 0.0;
 
   39   cudaStream_t s = 
output.GetComputeStream();
 
   40   cublasSetStream(
input.GetCublasHandle(), s);
 
   41   cublasSgemm(
input.GetCublasHandle(),
 
   42               CUBLAS_OP_N, CUBLAS_OP_T,
 
   60   double alpha = 1.0, beta = 0.0;
 
   63   cudaStream_t s = 
output.GetComputeStream();
 
   64   cublasSetStream(
input.GetCublasHandle(), s);
 
   65   cublasDgemm(
input.GetCublasHandle(),
 
   66               CUBLAS_OP_N, CUBLAS_OP_T,
 
   75template<
typename AFloat>
 
   82   ::TMVA::DNN::Cuda::AddRowWise<<<gridDims, blockDims, 0, s>>>(
 
   90template<
typename AFloat>
 
  109   if (activation_gradients_backward.
GetSize() > 0) {
 
  111      Matrix_t  activation_gradients_backward_m = activation_gradients_backward.
GetMatrix();
 
  128template<
typename AFloat>
 
  135                   m * 
n * 
sizeof(AFloat), cudaMemcpyDeviceToDevice, 0);
 
  139template<
typename AFloat>
 
  146      n * 
sizeof(AFloat), cudaMemcpyDeviceToDevice, 0);
 
  150template<
typename AFloat>
 
  153   size_t temp = imgDim - fltDim + 2 * padding;
 
  154   if (temp % stride || temp + stride <= 0) {
 
  155      Fatal(
"calculateDimension", 
"Not compatible hyper parameters for layer - (imageDim, filterDim, padding, stride)" 
  156            " %zu , %zu , %zu , %zu", imgDim, fltDim, padding, stride);
 
  158   return temp / stride + 1;
 
  182template<
typename AFloat>
 
  191                           size_t zeroPaddingHeight,
 
  192                           size_t zeroPaddingWidth)
 
  201                                                            fltHeight, fltWidth, strideRows, strideCols,
 
  202                                                            zeroPaddingHeight, zeroPaddingWidth);
 
  206template<
typename AFloat>
 
  219                                                                   filterHeight, filterWidth, numFilters);
 
  223template <
typename AFloat>
 
  235template <
typename AFloat>
 
  254   for(
size_t event = 0; 
event < 
input.GetFirstSize(); 
event++) {
 
  261      MultiplyTranspose(output_m, weights, inputPrime_m);
 
  262      AddConvBiases(output_m, biases);
 
  269   Copy(inputActivationFunc, 
output);
 
  275template<
typename AFloat>
 
  302   ActivationFunctionBackward(df, outputTensor, activationGradients, inputActivationFunc,
 
  310   CalculateConvActivationGradients(activationGradientsBackward, df, weights, batchSize, inputHeight, inputWidth, depth,
 
  311                                     height, 
width, filterDepth, filterHeight, filterWidth);
 
  315   CalculateConvWeightGradients(weightGradients, df, activationBackward, batchSize, inputHeight, inputWidth, depth,
 
  316                                 height, 
width, filterDepth, filterHeight, filterWidth, nLocalViews);
 
  319   CalculateConvBiasGradients(biasGradients, df, batchSize, depth, nLocalViews);
 
  323template<
typename AFloat>
 
  338   if (activationGradientsBackward.
GetSize() == 0) 
return;
 
  341   RotateWeights(rotWeights, weights, filterDepth, filterHeight, filterWidth, weights.
GetNrows());
 
  344   size_t tempZeroPaddingHeight = (size_t)(floor((inputHeight - 
height + filterHeight - 1) / 2));
 
  345   size_t tempZeroPaddingWidth = (size_t)(floor((inputWidth - 
width + filterWidth - 1) / 2));
 
  348   size_t tempNLocalViews = inputHeight * inputWidth;
 
  349   size_t tempNLocalViewPixels = depth * filterHeight * filterWidth;
 
  352   size_t tempStrideRows = 1;
 
  353   size_t tempStrideCols = 1;
 
  358   for(
size_t event = 0; 
event < batchSize; 
event++) {
 
  359      Im2col(dfPrime, df.
At(
event).GetMatrix(), 
height, 
width, filterHeight, filterWidth, tempStrideRows, tempStrideCols,
 
  360             tempZeroPaddingHeight, tempZeroPaddingWidth);
 
  363      MultiplyTranspose(agb_m, rotWeights, dfPrime);
 
  368template<
typename AFloat>
 
  384    weightGradients.
Zero();
 
  386    const size_t filterSize = filterHeight * filterWidth;
 
  387    const size_t nLocalViewPixels = filterDepth * filterSize;
 
  394    const size_t tempStrideRows = 1;
 
  395    const size_t tempStrideCols = 1;
 
  398    const size_t tempZeroPaddingHeight = (
height - inputHeight + filterHeight - 1) / 2;
 
  399    const size_t tempZeroPaddingWidth = (
width - inputWidth + filterWidth - 1) / 2;
 
  404    for(
size_t event = 0; 
event < batchSize; 
event++) {
 
  405        Im2col(activationsPrime, activationsBackward.
At(
event).GetMatrix(), inputHeight, inputWidth, filterHeight, filterWidth,
 
  406               tempStrideRows, tempStrideCols, tempZeroPaddingHeight, tempZeroPaddingWidth);
 
  408        Multiply(resPrime, df.
At(
event).GetMatrix(), activationsPrime);
 
  415template<
typename AFloat>
 
  422    biasGradients.
Zero();
 
  424    for (
size_t event = 0; 
event < batchSize; 
event++) {
 
  431template<
typename AFloat>
 
  437    cudaStream_t s = 
output.GetComputeStream();
 
  438    ::TMVA::DNN::Cuda::AddBiases<<<gridDims, blockDims, 0, s>>>(
 
  467template<
typename AFloat>
 
  480   size_t depth = C.GetCSize();
 
  481   size_t bsize = C.GetFirstSize();
 
  487   for(
size_t event = 0; 
event < bsize; 
event++) {
 
  493                                                                 C.GetDataPointerAt(
event), depth, imgHeight, imgWidth,
 
  494                                                                 fltHeight, fltWidth, strideRows, strideCols);
 
  498template<
typename AFloat>
 
  514   size_t depth = activationGradientsBackward.
GetCSize();
 
  520                    activationGradientsBackward.
GetWSize());
 
  523   for(
size_t event = 0; 
event < bsize; 
event++) {
 
  525      ::TMVA::DNN::Cuda::MaxPoolBackward<<<gridDims, blockDims, 0, s>>>(activationGradientsBackward.
GetDataPointerAt(
event),
 
  528                                                                     depth, imgHeight, imgWidth, fltHeight, fltWidth,
 
  529                                                                     strideRows, strideCols);
 
  534template<
typename AFloat>
 
  547template <
typename AReal>
 
  591template<
typename AFloat>
 
  669template<
typename AFloat>
 
  728template <
typename AFloat>
 
  742template <
typename AFloat>
 
  753template <
typename AFloat>
 
void Fatal(const char *location, const char *msgfmt,...)
Use this function in case of a fatal error. It will abort the program.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void input
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t height
cudaStream_t GetComputeStream() const
size_t GetNoElements() const
const AFloat * GetDataPointer() const
TCudaTensor< AFloat > At(size_t i) const
const AFloat * GetDataPointerAt(size_t i) const
const Shape_t & GetShape() const
cudaStream_t GetComputeStream() const
MemoryLayout GetLayout() const
TCudaMatrix< AFloat > GetMatrix() const
const AFloat * GetDataPointer() const
size_t GetFirstSize() const
static void Backward(Tensor_t &activationGradientsBackward, Matrix_t &weightGradients, Matrix_t &biasGradients, const Tensor_t &df, const Tensor_t &activationGradients, const Matrix_t &weights, const Tensor_t &activationBackward)
Perform the complete backward propagation step.
static void ConvLayerBackward(Tensor_t &activationGradientsBackward, Matrix_t &weightGradients, Matrix_t &biasGradients, Tensor_t &df, Tensor_t &activationGradients, const Matrix_t &weights, const Tensor_t &activationBackward, const Tensor_t &outputTensor, EActivationFunction activFunc, const ConvDescriptors_t &, ConvWorkspace_t &, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t nLocalViews)
Perform the complete backward propagation step in a Convolutional Layer.
static void CalculateConvWeightGradients(Matrix_t &weightGradients, const Tensor_t &df, const Tensor_t &activations_backward, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t nLocalViews)
Utility function for calculating the weight gradients of the convolutional layer.
static size_t calculateDimension(size_t imgDim, size_t fltDim, size_t padding, size_t stride)
Calculate how many neurons "fit" in the output layer, given the input as well as the layer's hyperpar...
static void ConvLayerForward(Tensor_t &output, Tensor_t &inputActivationFunc, const Tensor_t &input, const Matrix_t &weights, const Matrix_t &biases, const DNN::CNN::TConvParams ¶ms, EActivationFunction activFunc, Tensor_t &, const ConvDescriptors_t &, ConvWorkspace_t &)
Forward propagation in the Convolutional layer.
static void CalculateConvActivationGradients(Tensor_t &activationGradientsBackward, const Tensor_t &df, const Matrix_t &weights, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth)
Utility function for calculating the activation gradients of the layer before the convolutional layer...
static void SumRows(Matrix_t &B, const Matrix_t &A)
extra functions defined only for CPU architecture !!!
static void Flatten(Tensor_t &A, const Tensor_t &B)
Flattens the tensor B, such that each matrix, is stretched in one row, resulting with a matrix A.
static void MaxPoolLayerBackward(Tensor_t &activationGradientsBackward, const Tensor_t &activationGradients, const Tensor_t &indexMatrix, const Tensor_t &, const Tensor_t &, const PoolingDescriptors_t &, PoolingWorkspace_t &, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t nLocalViews)
Perform the complete backward propagation step in a Pooling Layer.
static void AddRowWise(Matrix_t &output, const Matrix_t &biases)
Add the vectors biases row-wise to the matrix output.
static void Multiply(Matrix_t &C, const Matrix_t &A, const Matrix_t &B)
Standard multiplication of two matrices A and B with the result being written into C.
static void Downsample(Tensor_t &A, Tensor_t &B, const Tensor_t &C, const PoolingDescriptors_t &, PoolingWorkspace_t &, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols)
Downsample the matrix C to the matrix A, using max operation, such that the winning indices are store...
static void SumColumns(Matrix_t &B, const Matrix_t &A, Scalar_t alpha=1.0, Scalar_t beta=0.)
Sum columns of (m x n) matrix A and write the results into the first m elements in A.
static void RotateWeights(Matrix_t &A, const Matrix_t &B, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t numFilters)
Rotates the matrix B, which is representing a weights, and stores them in the matrix A.
static void Im2col(Matrix_t &A, const Matrix_t &B, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t zeroPaddingHeight, size_t zeroPaddingWidth)
Transform the matrix B in local view format, suitable for convolution, and store it in matrix A.
static void CalculateConvBiasGradients(Matrix_t &biasGradients, const Tensor_t &df, size_t batchSize, size_t depth, size_t nLocalViews)
Utility function for calculating the bias gradients of the convolutional layer.
static void PrepareInternals(Tensor_t &)
Dummy placeholder - preparation is currently only required for the CUDA architecture.
static void Deflatten(Tensor_t &A, const Tensor_t &B)
Transforms each row of B to a matrix and stores it in the tensor B.
static void MultiplyTranspose(Matrix_t &output, const Matrix_t &input, const Matrix_t &weights)
Matrix-multiply input with the transpose of weights and write the results into output.
static void BatchNormLayerForwardTraining(int axis, const Tensor_t &x, Tensor_t &y, Matrix_t &gamma, Matrix_t &beta, Matrix_t &mean, Matrix_t &, Matrix_t &iVariance, Matrix_t &runningMeans, Matrix_t &runningVars, Scalar_t nTrainedBatches, Scalar_t momentum, Scalar_t epsilon, const TensorDescriptor_t &bnParDescriptor)
The input from each batch are normalized during training to have zero mean and unit variance and they...
static void BatchNormLayerBackward(int axis, const Tensor_t &x, const Tensor_t &dy, Tensor_t &dx, Matrix_t &gamma, Matrix_t &dgamma, Matrix_t &dbeta, const Matrix_t &mean, const Matrix_t &variance, const Matrix_t &iVariance, Scalar_t epsilon, const TensorDescriptor_t &)
static void Copy(Matrix_t &B, const Matrix_t &A)
static void BatchNormLayerForwardInference(int axis, const Tensor_t &x, Matrix_t &gamma, Matrix_t &beta, Tensor_t &y, const Matrix_t &runningMeans, const Matrix_t &runningVars, Scalar_t epsilon, const TensorDescriptor_t &)
During inference the inputs are not normalized using the batch mean but the previously computed at ru...
static void Rearrange(Tensor_t &out, const Tensor_t &in)
Rearrage data according to time fill B x T x D out with T x B x D matrix in.
static void Reshape(Matrix_t &A, const Matrix_t &B)
Transform the matrix B to a matrix with different dimensions A.
static void AddConvBiases(Matrix_t &output, const Matrix_t &biases)
Add the biases in the Convolutional Layer.
static void TransposeMultiply(Matrix_t &output, const Matrix_t &input, const Matrix_t &Weights, Scalar_t alpha=1.0, Scalar_t beta=0.)
Matrix multiplication of two matrices A and B^T (transposed) with the result being written into C.
static void ScaleAdd(Matrix_t &A, const Matrix_t &B, Scalar_t beta=1.0)
Adds a the elements in matrix B scaled by c to the elements in the matrix A.
static dim3 BlockDims2D()
static dim3 GridDims2D(int nrows, int ncols)
EActivationFunction
Enum that represents layer activation functions.
create variable transformations
size_t strideRows
The number of row pixels to slid the filter each step.
size_t filterHeight
The height of the filter.
size_t inputHeight
The height of the previous layer or input.
size_t paddingWidth
The number of zero layers left and right of the input.
size_t filterWidth
The width of the filter.
size_t paddingHeight
The number of zero layers added top and bottom of the input.
size_t inputWidth
The width of the previous layer or input.
size_t strideCols
The number of column pixels to slid the filter each step.