36   float alpha = 1.0, beta = 0.0;
 
   38   cudaStream_t s = A.GetComputeStream();
 
   45               A.GetDataPointer(), 
m,   
 
   46               B.GetDataPointer(), k,   
 
   48               C.GetDataPointer(), 
m);  
 
   50   C.SetComputeStream(s);
 
 
   63   double alpha = 1.0, beta = 0.0;
 
   65   cudaStream_t s = A.GetComputeStream();
 
   72               A.GetDataPointer(), 
m,   
 
   73               B.GetDataPointer(), k,   
 
   75               C.GetDataPointer(), 
m);  
 
   77   C.SetComputeStream(s);
 
 
   85                                      float alpha, 
float beta)
 
   93   cudaStream_t s = A.GetComputeStream();
 
  100               A.GetDataPointer(), k,     
 
  101               B.GetDataPointer(), k,     
 
  103               C.GetDataPointer(), 
m);    
 
  105   C.SetComputeStream(s);
 
 
  112                                      double alpha, 
double beta)
 
  120   cudaStream_t s = A.GetComputeStream();
 
  127               A.GetDataPointer(), k,     
 
  128               B.GetDataPointer(), k,     
 
  130               C.GetDataPointer(), 
m);    
 
  132   C.SetComputeStream(s);
 
 
  136template<
typename AFloat>
 
  142   cudaStream_t s = A.GetComputeStream();
 
  143   ::TMVA::DNN::Cuda::Hadamard<<<gridDims, blockDims, 0, s>>>(B.GetDataPointer(),
 
  147   B.SetComputeStream(s);
 
  150template<
typename AFloat>
 
  155   int ncols = A.GetFirstSize();   
 
  156   int nrows = A.GetFirstStride(); 
 
  158      ncols = A.GetWSize(); 
 
  159      nrows = A.GetHSize(); 
 
  163   cudaStream_t s = A.GetComputeStream();
 
  164   ::TMVA::DNN::Cuda::Hadamard<<<gridDims, blockDims, 0, s>>>(B.GetDataPointer(),
 
  167   B.SetComputeStream(s);
 
  171template<
typename AFloat>
 
  176   cudaStream_t s = A.GetComputeStream();
 
  179   ::TMVA::DNN::Cuda::ReduceMatrix<<<gridDims, blockDims, 0, s>>>(
 
 
  191                              float alpha, 
float beta)
 
  198   cudaStream_t s = A.GetComputeStream();
 
  204               A.GetDataPointer(), 
m,             
 
  206               & beta, B.GetDataPointer(), 1);    
 
  208   B.SetComputeStream(s);
 
 
  215                               double alpha, 
double beta)
 
  222   cudaStream_t s = A.GetComputeStream();
 
  228               A.GetDataPointer(), 
m,             
 
  230               & beta, B.GetDataPointer(), 1);    
 
  232   B.SetComputeStream(s);
 
 
  242    float alpha = 1.0, beta = 0.0;
 
  244    cudaStream_t s = A.GetComputeStream();
 
  250                A.GetDataPointer(), 
m,             
 
  252                & beta, B.GetDataPointer(), 1);    
 
  254    B.SetComputeStream(s);
 
 
  265    double alpha = 1.0, beta = 0.0;
 
  267    cudaStream_t s = A.GetComputeStream();
 
  273                A.GetDataPointer(), 
m,             
 
  275                & beta, B.GetDataPointer(), 1);    
 
  277    B.SetComputeStream(s);
 
 
  290template<
typename AFloat>
 
  293    if (A.GetNrows() != B.GetNrows() || A.GetNcols() != B.GetNcols()) {
 
  294        Fatal(
"AlmostEquals", 
"The passed matrices have unequal shapes.");
 
  299    cudaStream_t s = A.GetComputeStream();
 
  305    ::TMVA::DNN::Cuda::AlmostEquals<<<gridDims, blockDims, 0, s>>>(
dResult, A.GetDataPointer(), B.GetDataPointer(),
 
  306                                                                   epsilon, A.GetNrows(), A.GetNcols());
 
 
  323   cublasSaxpy(A.GetCublasHandle(), A.GetNoElements(), &alpha,
 
  324               A.GetDataPointer(), 1,
 
  325               B.GetDataPointer(), 1);
 
 
  336   cublasDaxpy(A.GetCublasHandle(), A.GetNoElements(), &alpha,
 
  337               A.GetDataPointer(), 1,
 
  338               B.GetDataPointer(), 1);
 
 
  342template<
typename AFloat>
 
  348   for (
size_t i = 0; i < A.GetFirstSize(); ++i) {
 
  351      ScaleAdd(
B_m, 
A_m, alpha);
 
  356template<
typename AFloat>
 
  361   cudaStream_t s = A.GetComputeStream();
 
  362   ::TMVA::DNN::Cuda::ConstAdd<<<gridDims, blockDims, 0, s>>>(
 
 
  370template<
typename AFloat>
 
  375   cudaStream_t s = A.GetComputeStream();
 
  376   ::TMVA::DNN::Cuda::ConstMult<<<gridDims, blockDims, 0, s>>>(
 
 
  384template<
typename AFloat>
 
  389   cudaStream_t s = A.GetComputeStream();
 
  390   ::TMVA::DNN::Cuda::ReciprocalElementWise<<<gridDims, blockDims, 0, s>>>(
 
 
  397template<
typename AFloat>
 
  402   cudaStream_t s = A.GetComputeStream();
 
  403   ::TMVA::DNN::Cuda::SquareElementWise<<<gridDims, blockDims, 0, s>>>(
 
 
  410template<
typename AFloat>
 
  415   cudaStream_t s = A.GetComputeStream();
 
  416   ::TMVA::DNN::Cuda::SqrtElementWise<<<gridDims, blockDims, 0, s>>>(
 
 
  424template<
typename AFloat>
 
  429   cudaStream_t s = A.GetComputeStream();
 
  430   ::TMVA::DNN::Cuda::AdamUpdate<<<gridDims, blockDims, 0, s>>>(
 
 
  440template<
typename AFloat>
 
  445   cudaStream_t s = A.GetComputeStream();
 
  446   ::TMVA::DNN::Cuda::AdamUpdateFirstMom<<<gridDims, blockDims, 0, s>>>(
 
  450       (
int) A.GetNcols(), beta);
 
 
  454template<
typename AFloat>
 
  459   cudaStream_t s = A.GetComputeStream();
 
  460   ::TMVA::DNN::Cuda::AdamUpdateSecondMom<<<gridDims, blockDims, 0, s>>>(
 
  464       (
int) A.GetNcols(), beta);
 
 
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
void Fatal(const char *location, const char *msgfmt,...)
Use this function in case of a fatal error. It will abort the program.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
static AFloat GetDeviceReturn()
Transfer the value in the device return buffer to the host.
static AFloat * GetDeviceReturnPointer()
Return device pointer to the device return buffer.
static void ResetDeviceReturn(AFloat value=0.0)
Set the return buffer on the device to the specified value.
static bool AlmostEquals(const Matrix_t &A, const Matrix_t &B, double epsilon=0.1)
Check two matrices for equality, taking floating point arithmetic errors into account.
static void SqrtElementWise(Matrix_t &A)
Square root each element of the matrix A and write the result into A.
static void SumRows(Matrix_t &B, const Matrix_t &A)
extra functions defined only for CPU architecture !!!
static void Multiply(Matrix_t &C, const Matrix_t &A, const Matrix_t &B)
Standard multiplication of two matrices A and B with the result being written into C.
static void AdamUpdate(Matrix_t &A, const Matrix_t &M, const Matrix_t &V, Scalar_t alpha, Scalar_t eps)
Adam updates.
static void SumColumns(Matrix_t &B, const Matrix_t &A, Scalar_t alpha=1.0, Scalar_t beta=0.)
Sum columns of (m x n) matrix A and write the results into the first m elements in A.
static void AdamUpdateSecondMom(Matrix_t &A, const Matrix_t &B, Scalar_t beta)
static void Hadamard(Tensor_t &A, const Tensor_t &B)
In-place Hadamard (element-wise) product of matrices A and B with the result being written into A.
static void ReciprocalElementWise(Matrix_t &A)
Reciprocal each element of the matrix A and write the result into A.
static void SquareElementWise(Matrix_t &A)
Square each element of the matrix A and write the result into A.
static Scalar_t Sum(const Matrix_t &A)
Compute the sum of all elements in A.
static void ConstMult(Matrix_t &A, Scalar_t beta)
Multiply the constant beta to all the elements of matrix A and write the result into A.
static void AdamUpdateFirstMom(Matrix_t &A, const Matrix_t &B, Scalar_t beta)
static void ConstAdd(Matrix_t &A, Scalar_t beta)
Add the constant beta to all the elements of matrix A and write the result into A.
static void TransposeMultiply(Matrix_t &output, const Matrix_t &input, const Matrix_t &Weights, Scalar_t alpha=1.0, Scalar_t beta=0.)
Matrix multiplication of two matrices A and B^T (transposed) with the result being written into C.
static void ScaleAdd(Matrix_t &A, const Matrix_t &B, Scalar_t beta=1.0)
Adds a the elements in matrix B scaled by c to the elements in the matrix A.
static dim3 BlockDims2D()
static dim3 GridDims2D(int nrows, int ncols)
create variable transformations