Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
ROperator_ConvTranspose.icc
Go to the documentation of this file.
1#ifndef TMVA_SOFIE_ROPERATOR_CONVTRANSPOSE_I
2#define TMVA_SOFIE_ROPERATOR_CONVTRANSPOSE_I
3
4#include <memory>
5#include <sstream>
6#include <algorithm>
7#include <stdexcept>
8#include <vector>
9#include <cassert>
10
11#include <TMVA/SOFIE_common.hxx>
12
13namespace TMVA {
14namespace Experimental {
15namespace SOFIE {
16
17template <typename T>
18auto ROperator_ConvTranspose<T>::ShapeInference(std::vector<std::vector<size_t>> input)
19 -> std::vector<std::vector<size_t>>
20{
21 const std::vector<size_t> &inputShape = input[0];
22 const std::vector<size_t> &weightShape = input[1];
23 size_t size = inputShape.size();
24 // Dimension of the conv transpose op
25 fDim = size - 2;
26 // Number of groups
27 if (fAttrGroup == 0)
28 fAttrGroup = 1;
29 if (fAttrStrides.empty()) {
30 fAttrStrides = std::vector<size_t>(fDim, 1);
31 }
32 if (fAttrDilations.empty()) {
33 fAttrDilations = std::vector<size_t>(fDim, 1);
34 }
35 // The shape of the kernel is kw for 1d image, kh x Kw for 2d images and kd x kh x kw for a 3d image
36 if (fAttrKernelShape.empty()) {
37 fAttrKernelShape.resize(fDim);
38 for (size_t i = 0; i < fDim; i++)
39 fAttrKernelShape[i] = fShapeW[i + 2] + (fAttrDilations[i] - 1) * (fShapeW[i + 2] - 1);
40 }
41 if (fAttrOutputPadding.empty())
42 fAttrOutputPadding = std::vector<size_t>(fDim, 0);
43
44 // The Shape of the output is batch_size x out_channel x out_w for a 1d image,
45 // batch_size x out_channel x out_h x out_w for a 2d image and
46 // batch_size x out_channel x out_d x out_h x out_w for a 3d image
47 // where out_channel = weight_shape[1] * group
48 std::vector<size_t> outShape(size);
49 outShape[0] = inputShape[0];
50 outShape[1] = weightShape[1] * fAttrGroup;
51
52
53 // Generate the padding
54 if (fAttrPads.empty() ) {
55 fAttrPads = std::vector<size_t>(2 * fDim, 0);
56 if (fAttrOutputShape.size() == fDim) {
57 //LM: to be checked...
58 // for time being not support
59 throw
60 std::runtime_error("ConvTranspose with output_shape explicitly set not yet supported.");
61 /*
62 std::vector<size_t> totalPadding(fDim, 1);
63 for (size_t i = 0; i < fDim; i++) {
64 size_t j = i + 2;
65 totalPadding[i] =
66 fAttrStrides[i] * (fAttrOutputShape[i] - 1) + fAttrOutputPadding[i] + fAttrKernelShape[i] - fShapeX[j];
67 }
68
69 for (size_t i = 0; i < fDim; i++) {
70 size_t end_i = i + fDim;
71 if (fAttrAutopad == "SAME_UPPER") {
72 fAttrPads[i] = totalPadding[i] / 2;
73 fAttrPads[end_i] = totalPadding[i] - fAttrPads[i];
74 } else {
75 fAttrPads[end_i] = totalPadding[i] / 2;
76 fAttrPads[i] = totalPadding[i] - fAttrPads[end_i];
77 }
78 }
79 */
80 }
81 if (fAttrAutopad != "NOTSET") {
82 throw
83 std::runtime_error("ConvTranspose with padding SAME_UPPER or SMAE_LOWER not supported");
84 }
85 }
86 if (fAttrOutputShape.empty()) {
87 fAttrOutputShape.resize(fDim);
88 for (size_t i = 0; i < fDim; i++) {
89 size_t j = i + 2;
90 fAttrOutputShape[i] = fAttrStrides[i] * (inputShape[j] - 1) + fAttrKernelShape[i] + fAttrOutputPadding[i] - fAttrPads[i] - fAttrPads[fDim+i];
91 }
92 } else {
93 // The shape of the output is explicitly set
94 // TODO Generate the padding from the output shape and the input shape
95 throw
96 std::runtime_error("ConvTranspose with output_shape explicitly set not yet supported.");
97 }
98
99 for (size_t i = 0; i < fDim; i++)
100 outShape[i + 2] = fAttrOutputShape[i];
101 std::vector<std::vector<size_t>> ret({outShape});
102 return ret;
103}
104
105template <typename T>
107
108 fUseSession = model.UseSession();
109 if (!model.CheckIfTensorAlreadyExist(fNX)) {
110 throw std::runtime_error("TMVA SOFIE Conv Transpose op Input Tensor " + fNX + " is not found in model");
111 }
112 fShapeX = model.GetTensorShape(fNX);
113 if (fShapeX.size() < 3 || fShapeX.size() > 5) {
114 std::cout << fNX << " : " << ConvertShapeToString(fShapeX) << std::endl;
115 throw std::runtime_error("TMVA SOFIE Conv Transpose Op input data tensor" + fNX +
116 " is not of 3,4 or 5 dimensions");
117 }
118 fDim = fShapeX.size() - 2;
119 if (!model.CheckIfTensorAlreadyExist(fNW)) {
120 throw std::runtime_error("TMVA SOFIE Conv op Input weight Tensor " + fNW + " is not found in model");
121 }
122 fShapeW = model.GetTensorShape(fNW);
123 if (fShapeW.size() < 3 || fShapeW.size() > 5) {
124 std::cout << fNW << " : " << ConvertShapeToString(fShapeW) << std::endl;
125 throw std::runtime_error("TMVA SOFIE Conv Transpose Op input weight tensor" + fNW +
126 " is not of 3,4 or 5 dimensions");
127 }
128 fShapeY = ShapeInference({fShapeX, fShapeW})[0];
129
130 model.AddIntermediateTensor(fNY, model.GetTensorType(fNX), fShapeY);
131 if (fNB != "") {
132 if (!model.CheckIfTensorAlreadyExist(fNB)) {
133 throw std::runtime_error("TMVA SOFIE ConvTrans op Input Tensor " + fNB + " is not found in model");
134 }
135 fShapeB = model.GetTensorShape(fNB);
136 if (fShapeB.size() < 1)
137 throw std::runtime_error("TMVA SOFIE ConvTrans op: Bias Tensor has empty shape");
138
139 size_t bsize = ConvertShapeToLength(fShapeB);
140 size_t ysize = ConvertShapeToLength(fShapeY);
141 // broadcasting is needed if first stride of B is not same of Y
142 bool broadcast_needed = (bsize != ysize);
143 // Broadcast the bias B
144 if (broadcast_needed) {
145 // we assume bias tensor size is equal to number of filters that is the second dimension in
146 // the output tensor
147 if (bsize != fShapeY[1] )
148 throw std::runtime_error("TMVA SOFIE ConvTrans op: Bias Tensor has wrong shape: " +
149 ConvertShapeToString(fShapeB));
150
151 auto original_data = model.GetInitializedTensorData(fNB);
152
153 if (fType != "float")
154 throw std::runtime_error("TMVA SOFIE ConvTrans op: Broadcasting for non-float type tensors is not supported");
155 // here the acual broadcasting
156 if (!fUseSession) {
157 // Broadcast B from M to N x M x Od x Oh x Ow
158 std::shared_ptr<void> new_data_ptr(
159 UTILITY::BroadcastConvBias<float>(static_cast<float *>(original_data.get()), bsize, fShapeY),
160 std::default_delete<float[]>());
161
162 model.UpdateInitializedTensor(fNB, model.GetTensorType(fNB), fShapeY, new_data_ptr);
163 fShapeB = model.GetTensorShape(fNB);
164 fNBroadcastedB = fNB; // use same name
165 } else {
166 // In case of session add broadcasting code in Session constructor and in GenerateInitCode
167 // we need to add a new intermediate tensor for broadcasted bias tensor
168 fNBroadcastedB = "Broadcasted" + fNB;
169 model.AddIntermediateTensor(fNBroadcastedB, model.GetTensorType(fNB), fShapeY);
170 }
171 }
172 else {
173 // bias tensor is already correct shape, no need to broadcast
174 if (fShapeY != fShapeB)
175 throw std::runtime_error("TMVA SOFIE ConvTrans op: Broadcasting is not needed but bias has wrong shape" +
176 ConvertShapeToString(fShapeB));
177 fNBroadcastedB = fNB;
178 }
179 }
180
181 size_t kernelSize = 1;
182 size_t inputSize = 1;
183 for (size_t i = 0; i < fDim; i++) {
184 inputSize *= fShapeX[2+ i];
185 kernelSize *= fAttrKernelShape[i];
186 }
187
188 std::vector<size_t> shape1 = {fShapeW[0], fShapeW[1], kernelSize};
189 std::vector<size_t> shape2 = {fShapeW[1], kernelSize, inputSize};
190 model.AddIntermediateTensor(fNX +"_f", ConvertStringToType(fType), shape1 );
191 model.AddIntermediateTensor(fNX +"_xcol", ConvertStringToType(fType), shape2 );
192 fConvK = fNX +"_f";
193 fImcol = fNX +"_xcol";
194 fOutputTensorNames.emplace_back(fConvK);
195 fOutputTensorNames.emplace_back(fImcol);
196}
197
198template <typename T>
200{
201 std::stringstream out;
202 // generate initialization code for broadcasting of bias tensor
203 size_t bsize = ConvertShapeToLength(fShapeB);
204 size_t ysize = ConvertShapeToLength(fShapeY);
205 if (bsize != ysize && !fNBroadcastedB.empty()) {
206 // include a separate scope to avoid defining unique operator temp variables
207 out << SP << "{\n";
208 out << SP << SP << "float * data = TMVA::Experimental::SOFIE::UTILITY::BroadcastConvBias<float>(tensor_"
209 << fNB << ", " << bsize << ", " << ConvertShapeToString(fShapeY) << ");\n";
210 out << SP << SP << "std::copy(data, data + " << ConvertShapeToLength(fShapeY) << ", tensor_" << fNBroadcastedB << ");\n";
211 out << SP << SP << "delete[] data;\n";
212 out << SP << "}\n";
213 }
214 return out.str();
215}
216
217template <typename T>
219{
220 OpName = "op_" + OpName;
221
222 if (fShapeX.empty() || fShapeW.empty() || (fNB != "" && fShapeB.empty()) || fShapeY.empty()) {
223 throw std::runtime_error("TMVA SOFIE Conv Op called to Generate without being initialized first");
224 }
225
226 std::stringstream out;
227
228 size_t bsize = fShapeX[0];
229 size_t kDepth = (fDim > 2) ? fShapeW[2] : 1; // kernel depth
230 size_t kHeight = (fDim > 1) ? fShapeW[fDim] : 1; // kernel height
231 size_t kWidth = fShapeW[fDim + 1]; // kernel width
232
233 size_t iDepth = (fDim > 2) ? fShapeX[2] : 1; // input depth
234 size_t iHeight = (fDim > 1) ? fShapeX[fDim] : 1; // input height
235 size_t iWidth = fShapeX[fDim + 1]; // input width
236
237 size_t oDepth = (fDim > 2) ? fShapeY[2] : 1; // output depth
238 size_t oHeight = (fDim > 1) ? fShapeY[fDim] : 1; // ouput height
239 size_t oWidth = fShapeY[fDim + 1]; // output width
240
241 out << "\n//---- operator ConvTranspose " << OpName << "\n";
242
243 // create first matrix with convolution kernels
244 if (!fUseSession) {
245 size_t kernelSize = fAttrKernelShape[0];
246 if (fDim > 1)
247 kernelSize *= fAttrKernelShape[1];
248 out << SP << fType << " tensor_" << fNX << "_f[" << fShapeW[0] * fShapeW[1] * kernelSize << "] = {0};\n";
249 }
250
251 // vectorize the (dilated)convolution kernels into a matrix
252 // The shape of the kernel is W for 1d image, H x W for 2d image and D x H x W
253 // for 3d image
254 size_t id = (fDim > 2) ? fDim - 3 : 2;
255 size_t ih = (fDim > 1) ? fDim - 2 : 1;
256 size_t iw = fDim - 1;
257 size_t wstrideDil = fAttrDilations[iw];
258 size_t hstride = kWidth;
259 size_t hstrideDil = fAttrKernelShape[iw];
260 if (fDim > 1)
261 hstrideDil *= fAttrDilations[ih];
262 // stride dilated in the height
263 size_t dstride = kHeight * kWidth;
264 size_t dstrideDil = fAttrKernelShape[iw];
265 if (fDim > 1)
266 dstrideDil *= fAttrKernelShape[ih];
267 if (fDim > 2)
268 dstrideDil *= fAttrDilations[id];
269 size_t icstride = kHeight * kWidth * kDepth;
270 size_t icstrideDil = 1;
271 for (size_t i = 0; i < fDim; i++)
272 icstrideDil *= fAttrKernelShape[i];
273 size_t ocstride = fShapeW[1] * icstride;
274 size_t ocstrideDil = fShapeW[1] * icstrideDil;
275
276 // The shape of f is [M/group, kHeight x kWidth]
277 out << SP << "for (std::size_t ic = 0; ic < " << fShapeW[0] << "; ic++) {\n";
278 out << SP << SP << "for (std::size_t oc = 0; oc < " << fShapeW[1] << "; oc++) {\n";
279 //out << SP << SP << SP << "size_t kIndex = 0;\n"; // filter index
280 if (fDim > 2)
281 out << SP << SP << SP << "for (std::size_t kd = 0; kd < " << kDepth << "; kd++) {\n";
282 if (fDim > 1)
283 out << SP << SP << SP << "for (std::size_t kh = 0; kh < " << kHeight << "; kh++) {\n";
284 out << SP << SP << SP << SP << "for (std::size_t kw = 0; kw < " << kWidth << "; kw++) {\n";
285
286 out << SP << SP << SP << SP << SP << "tensor_" << fNX << "_f[ic * " << ocstrideDil << " + oc * " << icstrideDil;
287 if (fDim > 2)
288 out << " + kd * " << dstrideDil;
289 if (fDim > 1)
290 out << " + kh * " << hstrideDil;
291 out << " + kw * " << wstrideDil << " ] = tensor_" << fNW << "[ic * " << ocstride << " + oc * " << icstride;
292
293 if (fDim > 2)
294 out << " + kd * " << dstride;
295 if (fDim > 1)
296 out << " + kh * " << hstride;
297 out << " + kw ];\n";
298
299 // here we rotate the input kernel tranforming 0,1,2,...N-1 in N-1,N-2,...,2,1,0
300 // out << " + " << icstride -1 << " - kIndex ];\n"; // tranform 1,2,3,4 in 4,3,2,1
301 // out << SP << SP << SP << SP << SP << "kIndex++;\n"; // update input filter index
302
303 out << SP << SP << SP << SP << "}\n";
304 if (fDim > 1)
305 out << SP << SP << SP << "}\n";
306 if (fDim > 2)
307 out << SP << SP << SP << "}\n";
308
309 out << SP << SP << "}\n";
310 out << SP << "}\n";
311
312 out << SP << "char " << OpName << "_transA = 'N';\n";
313 out << SP << "char " << OpName << "_transB = 'T';\n";
314 out << SP << "int " << OpName << "_m = " << iHeight * iWidth * iDepth << ";\n";
315 out << SP << "int " << OpName << "_n = " << icstrideDil*fShapeW[1] << ";\n"; // output channels * filters
316 out << SP << "int " << OpName << "_k = " << fShapeW[0] << ";\n"; // input channels
317 out << SP << "float " << OpName << "_alpha = 1.0;\n";
318 out << SP << "float " << OpName << "_beta = 0.0;\n";
319
320 if (!fUseSession) {
321 out << SP << fType << " tensor_" << fNX << "_xcol[" << fShapeW[0]*icstrideDil * oDepth * oHeight * oWidth << "] = {0};\n";
322 }
323
324 // Loop on batch size
325 out << SP << "for (size_t n = 0; n < " << bsize << "; n++) {\n";
326
327 // IM2COL: Unroll the input tensor
328 // order input data as (e.g. kernel 2x2) and (xa,ya) is channel 1 and (xb,yb) is channel 2
329 // (xa1,..,xak,ya1,..yak)(xb1,...,xbk,yb1,..,ybk)
330 // (xa2,...xak+1,ya1,...yak)(......)
331 // trick for speed is using caffe im2col and output a matrix which contains filtered values as rows.
332 // By doing this one has consecutive memory reads and writes
333 // Resulting matrix op_xcol is (output channels * filter_h * filter_w , output_h * output_w)
334 if (fDim == 1) {
335 if (fAttrPads[0] != fAttrPads[1]) {
336 std::cout << "TMVA SOFIE Operator Conv: asymmetric padding not supported. Assume an average padding "
337 << std::endl;
338 fAttrPads[0] = (fAttrPads[0] + fAttrPads[1]) / 2;
339 }
340 fAttrPads[1] = 0;
341 }
342 if (fDim == 2) {
343 if (fAttrPads[0] != fAttrPads[2] || fAttrPads[1] != fAttrPads[3]) {
344 std::cout << "TMVA SOFIE Operator ConvTranspose: asymmetric padding not supported. Assume an average padding "
345 << std::endl;
346 fAttrPads[0] = (fAttrPads[0] + fAttrPads[2]) / 2;
347 fAttrPads[1] = (fAttrPads[1] + fAttrPads[3]) / 2;
348 }
349 }
350 if (fDim == 3) {
351 if (fAttrPads[0] != fAttrPads[3] || fAttrPads[1] != fAttrPads[4] || fAttrPads[2] != fAttrPads[5]) {
352 std::cout << "TMVA SOFIE Operator ConvTranspose: asymmetric padding not supported. Assume an average padding "
353 << std::endl;
354 fAttrPads[0] = (fAttrPads[0] + fAttrPads[3]) / 2;
355 fAttrPads[1] = (fAttrPads[1] + fAttrPads[4]) / 2;
356 fAttrPads[2] = (fAttrPads[2] + fAttrPads[5]) / 2;
357 }
358 }
359
360 if (fAttrGroup == 1) {
361 out << SP << SP << "size_t x_offset = n * " << fShapeX[1] * iDepth * iHeight * iWidth << ";\n";
362 out << SP << SP << "size_t out_offset = n * " << fShapeY[1] * oDepth * oHeight * oWidth << ";\n";
363
364 // DO BLAS before:
365 // BLAS
366 out << SP << SP << "BLAS::sgemm_(&" << OpName << "_transA, &" << OpName << "_transB, &" << OpName << "_m, &"
367 << OpName << "_n, &" << OpName << "_k, &" << OpName << "_alpha, "
368 << "tensor_" << fNX << " + x_offset, &" << OpName << "_m,\n"; // use m if op_xcol is not transpose , otherwise k
369 out << SP << SP << SP << "tensor_" << fNX <<"_f, &" << OpName << "_n, &" << OpName << "_beta, tensor_"
370 << fNX <<"_xcol, &" << OpName << "_m);\n";
371
372 // when using im2col - resulting matrix is transposed, is (input_c * filter_h * filter_w, output_h *
373 // output_w)
374 // before using col2im I need to transpose matrix
375 if (fDim < 3) {
376 out << SP << SP << "TMVA::Experimental::SOFIE::UTILITY::col2im<float>(tensor_" << fNX << "_xcol,"
377 // channels, height, width, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h,
378 // dilation_w,
379 << fShapeY[1] << "," << oHeight << "," << oWidth << ",";
380 if (fDim == 1)
381 out << "1, " << fAttrKernelShape[0] << ",0," << fAttrPads[0] << ",1," << fAttrStrides[0] << ",1,"
382 << fAttrDilations[0];
383 else // dim ==2
384 out << fAttrKernelShape[0] << "," << fAttrKernelShape[1] << "," << fAttrPads[0] << "," << fAttrPads[1]
385 << "," << fAttrStrides[0] << "," << fAttrStrides[1] << "," << fAttrDilations[0] << ","
386 << fAttrDilations[1];
387 out << ", tensor_" << fNY << " + out_offset);\n\n ";
388 } else {
389 // 3d : needs a col2im for 3d
390 throw std::runtime_error("TMVA SOFIE 3D Conv Transpose not yet supported");
391 out << SP << SP << "TMVA::Experimental::SOFIE::UTILITY::Im2col_3d<float>(tensor_" << fNX
392 << " + x_offset,"
393 // channels, d, h, w, k_d, k_h, k_w, pad_d, pad_h, pad_w, stride_d, stride_h, stride_w,
394 // dilation_d, dilation_h, dilation_w,
395 //
396 << fShapeX[1] << "," << oDepth << "," << oHeight << "," << oWidth << "," << fAttrKernelShape[0] << ","
397 << fAttrKernelShape[1] << "," << fAttrKernelShape[2] << "," << fAttrPads[0] << "," << fAttrPads[1] << ","
398 << fAttrPads[2] << "," << fAttrStrides[0] << "," << fAttrStrides[1] << "," << fAttrStrides[2] << ","
399 << fAttrDilations[0] << "," << fAttrDilations[1] << "," << fAttrDilations[2] <<
400 ",tensor_" << fNX <<"_xcol);\n\n ";
401 }
402 // // BLAS
403 // out << SP << SP << "BLAS::sgemm_(&" << OpName << "_transA, &" << OpName << "_transB, &" << OpName << "_m, &"
404 // << OpName << "_n, &" << OpName << "_k, &" << OpName << "_alpha, tensor_" << fNX << "_xcol, &" << OpName
405 // << "_m,\n"; // use m if op_xcol is not transpose , otherwise k
406 // out << SP << SP << SP <<"tensor_" << fNX << "_f, &" << OpName << "_k, &" << OpName << "_beta, tensor_" << fNY
407 // << " + out_offset, &" << OpName << "_m);\n";
408 } else {
409 // case of group transposed convolution
410 // Unroll (IM2COL) the input tensor- make loop on groups and repeat operations (IM2COL + GEMM for each
411 // group)
412 out << SP << SP << "for (size_t g = 0; g < " << fAttrGroup << "; g++) {\n";
413 out << SP << SP << "size_t x_offset = n * " << fShapeX[1] * iHeight * iWidth << " + g * "
414 << fShapeX[1] * iHeight * iWidth / fAttrGroup << ";\n ";
415 out << SP << SP << "size_t out_offset = n * " << fShapeY[1] * oHeight * oWidth << " + g * "
416 << fShapeY[1] * oHeight * oWidth / fAttrGroup << ";\n ";
417
418 // do BLAS here (LM: probably need an offset for op_f the kernels)
419 out << SP << SP << "BLAS::sgemm_(&" << OpName << "_transA, &" << OpName << "_transB, &" << OpName << "_m, &"
420 << OpName << "_n, &" << OpName << "_k, &" << OpName << "_alpha, "
421 << "tensor_" << fNX << " + x_offset, &" << OpName
422 << "_m,\n"; // use m if op_xcol is not transpose , otherwise k
423 out << SP << SP << SP << "tensor_" << fNX << "_f, &" << OpName << "_n, &" << OpName
424 << "_beta, tensor_" << fNX << "_xcol , &" << OpName << "_m);\n";
425
426 if (fDim < 3) {
427 out << SP << SP << "TMVA::Experimental::SOFIE::UTILITY::col2im<float>(tensor_" << fNX << "_xcol,"
428 // channels, height, width, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h,
429 // dilation_w,
430 << fShapeY[1] << "," << oHeight << "," << oWidth << ",";
431 if (fDim == 1)
432 out << "1, " << fAttrKernelShape[0] << ",0," << fAttrPads[0] << ",1," << fAttrStrides[0] << ",1,"
433 << fAttrDilations[0];
434 else // dim ==2
435 out << fAttrKernelShape[0] << "," << fAttrKernelShape[1] << "," << fAttrPads[0] << "," << fAttrPads[1]
436 << "," << fAttrStrides[0] << "," << fAttrStrides[1] << "," << fAttrDilations[0] << ","
437 << fAttrDilations[1];
438 out << ", tensor_" << fNY << " + out_offset);\n\n ";
439 } else {
440 // 3d im2col
441 throw std::runtime_error("TMVA SOFIE 3D Conv Transpose not yet supported");
442
443 out << SP << SP << "TMVA::Experimental::SOFIE::UTILITY::Im2col_3d<float>(tensor_" << fNX
444 << " + x_offset,"
445 // channels, d, h, w, k_d, k_h, k_w, pad_d, pad_h, pad_w, stride_d, stride_h, stride_w,
446 // dilation_d, dilation_h, dilation_w,
447 //
448 << fShapeX[1] << "," << oDepth << "," << oHeight << "," << oWidth << "," << fAttrKernelShape[0] << ","
449 << fAttrKernelShape[1] << "," << fAttrKernelShape[2] << "," << fAttrPads[0] << "," << fAttrPads[1] << ","
450 << fAttrPads[2] << "," << fAttrStrides[0] << "," << fAttrStrides[1] << "," << fAttrStrides[2] << ","
451 << fAttrDilations[0] << "," << fAttrDilations[1] << "," << fAttrDilations[2] << "," << "tensor_" << fNX
452 << "_xcol);\n\n ";
453 }
454
455 // // BLAS
456 // // offset g must be g * k * n
457 // out << SP << SP << SP << "size_t offset_f = g * " << fShapeW[0] * fShapeW[1] * icstrideDil / fAttrGroup << ";\n";
458 // out << SP << SP << "BLAS::sgemm_(&" << OpName << "_transA, &" << OpName << "_transB, &" << OpName << "_m, &"
459 // << OpName << "_n, &" << OpName << "_k, &" << OpName << "_alpha, tensor_" << fNX << "_xcol, &" << OpName
460 // << "_m,\n"; // use m if op_xcol is not transpose , otherwise k
461 // out << SP << SP << SP << "tensor_" << fNX << "_f + offset_f, &" << OpName << "_k, &" << OpName << "_beta, tensor_" << fNY
462 // << " + out_offset"
463 // << ", &" << OpName << "_m);\n";
464
465 out << SP << SP << "}\n"; // end of group loop
466 }
467
468 out << SP << "}\n"; // end of batch size loop
469
470 if (fNBroadcastedB != "") {
471 out << SP << "int " << OpName << "_size = " << fShapeY[0] * fShapeY[1] * oDepth * oHeight * oWidth << ";\n";
472 out << SP << "float " << OpName << "_gamma = 1.0;\n";
473 out << SP << "int " << OpName << "_incx = 1;\n";
474 out << SP << "int " << OpName << "_incy = 1;\n";
475
476 out << SP << "BLAS::saxpy_(&" << OpName << "_size, &" << OpName << "_gamma, tensor_" << fNBroadcastedB << ", &"
477 << OpName << "_incx, tensor_" << fNY << ", &" << OpName << "_incy);\n";
478 }
479
480 return out.str();
481}
482
483} // namespace SOFIE
484} // namespace Experimental
485} // namespace TMVA
486
487#endif
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void input
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize id
void AddIntermediateTensor(std::string tensor_name, ETensorType type, std::vector< Dim > dim_shape)
Definition RModel.cxx:200
bool CheckIfTensorAlreadyExist(std::string tensor_name)
Definition RModel.cxx:95
const ETensorType & GetTensorType(std::string name) const
Definition RModel.cxx:67
const std::vector< size_t > & GetTensorShape(std::string name) const
Definition RModel.cxx:29
std::shared_ptr< void > GetInitializedTensorData(std::string tensor_name)
Definition RModel.cxx:261
void UpdateInitializedTensor(std::string tensor_name, ETensorType type, std::vector< std::size_t > shape, std::shared_ptr< void > data)
Definition RModel.cxx:252
void Initialize(RModel &) override
Initialize the model.
std::string GenerateInitCode() override
Generate code for initializing the op.
std::string Generate(std::string opName) override
Generate the inference code.
std::vector< std::vector< size_t > > ShapeInference(std::vector< std::vector< size_t > >) override
Infers the shape of the input tensors.
std::string ConvertShapeToString(std::vector< size_t > shape)
ETensorType ConvertStringToType(std::string type)
std::size_t ConvertShapeToLength(std::vector< size_t > shape)
create variable transformations