36#define BEGIN blockDim.x *blockIdx.x + threadIdx.x 
   37#define STEP blockDim.x *gridDim.x 
   51   for (
int pdf = 1; pdf < nPdfs; pdf++)
 
   58   Batch m = batches[0], m0 = batches[1], 
c = batches[2], 
p = batches[3];
 
   60      const double t = 
m[i] / m0[i];
 
   61      const double u = 1 - t * t;
 
   74   Batch coef0 = batches[0];
 
   75   Batch coef1 = batches[1];
 
   76   Batch tagFlav = batches[2];
 
   77   Batch delMistag = batches[3];
 
   78   Batch mixState = batches[4];
 
   79   Batch mistag = batches[5];
 
   83         coef0[i] * (1.0 - tagFlav[i] * delMistag[0]) + coef1[i] * (mixState[i] * (1.0 - 2.0 * mistag[0]));
 
   90   const int degree = nCoef - 1;
 
   93   Batch xData = batches[0];
 
   96   double binomial = 1.0;
 
   97   for (
int k = 0; k < nCoef; k++) {
 
   99      binomial = (binomial * (degree - k)) / (k + 1);
 
  105         powX[i] = pow_1_X[i] = 1.0;
 
  112      for (
int k = 2; k <= degree; k += 2)
 
  114            pow_1_X[i] *= _1_X[i] * _1_X[i];
 
  118            pow_1_X[i] *= _1_X[i];
 
  122         _1_X[i] = 1 / _1_X[i];
 
  124      for (
int k = 0; k < nCoef; k++)
 
  130            pow_1_X[i] *= _1_X[i];
 
  136         double powX = 1.0, pow_1_X = 1.0;
 
  137         for (
int k = 1; k <= degree; k++)
 
  139         const double _1_X = 1 / (1 - X);
 
  140         for (
int k = 0; k < nCoef; k++) {
 
  149   for (
int k = 0; k < nCoef; k++) {
 
  151      binomial = (binomial * (degree - k)) / (k + 1);
 
  157   Batch X = batches[0], M = batches[1], SL = batches[2], SR = batches[3];
 
  159      double arg = X[i] - M[i];
 
  170   Batch X = batches[0], M = batches[1], W = batches[2];
 
  172      const double arg = X[i] - M[i];
 
  173      batches.
_output[i] = 1 / (arg * arg + 0.25 * W[i] * W[i]);
 
  179   Batch X = batches[0], XP = batches[1], SP = batches[2], XI = batches[3], 
R1 = batches[4], 
R2 = batches[5];
 
  180   const double r3 = log(2.0);
 
  181   const double r6 = exp(-6.0);
 
  182   const double r7 = 2 * sqrt(2 * log(2.0));
 
  185      const double r1 = XI[i] * 
fast_isqrt(XI[i] * XI[i] + 1);
 
  186      const double r4 = 1 / 
fast_isqrt(XI[i] * XI[i] + 1);
 
  187      const double hp = 1 / (SP[i] * r7);
 
  188      const double x1 = XP[i] + 0.5 * SP[i] * r7 * (r1 - 1);
 
  189      const double x2 = XP[i] + 0.5 * SP[i] * r7 * (r1 + 1);
 
  192      if (XI[i] > r6 || XI[i] < -r6)
 
  195      double factor = 1, 
y = X[i] - 
x1, Yp = XP[i] - 
x1, yi = r4 - XI[i], rho = 
R1[i];
 
  204      batches.
_output[i] = rho * 
y * 
y / Yp / Yp - r3 + factor * 4 * r3 * 
y * hp * r5 * r4 / yi / yi;
 
  205      if (X[i] >= 
x1 && X[i] < 
x2) {
 
  207            fast_log(1 + 4 * XI[i] * r4 * (X[i] - XP[i]) * hp) / 
fast_log(1 + 2 * XI[i] * (XI[i] - r4));
 
  210      if (X[i] >= 
x1 && X[i] < 
x2 && XI[i] < r6 && XI[i] > -r6)
 
  211         batches.
_output[i] = -4 * r3 * (X[i] - XP[i]) * (X[i] - XP[i]) * hp * hp;
 
  219   Batch M = batches[0], M0 = batches[1], S = batches[2], A = batches[3], 
N = batches[4];
 
  221      const double t = (M[i] - M0[i]) / S[i];
 
  222      if ((A[i] > 0 && t >= -A[i]) || (A[i] < 0 && -t >= A[i]))
 
  223         batches.
_output[i] = -0.5 * t * t;
 
  225         batches.
_output[i] = 
N[i] / (
N[i] - A[i] * A[i] - A[i] * t);
 
  228         batches.
_output[i] -= 0.5 * A[i] * A[i];
 
  237   Batch xData = batches[0];
 
  248         prev[i][0] = batches.
_output[i] = 1.0;
 
  251      for (
int k = 0; k < nCoef; k++)
 
  256            const double next = 2 * X[i] * prev[i][1] - prev[i][0];
 
  257            prev[i][0] = prev[i][1];
 
  262         double prev0 = 1.0, prev1 = 2 * (xData[i] - 0.5 * (
xmax + 
xmin)) / (
xmax - 
xmin), X = prev1;
 
  264         for (
int k = 0; k < nCoef; k++) {
 
  268            const double next = 2 * X * prev1 - prev0;
 
  277   Batch X = batches[0];
 
  278   const double ndof = batches.
extraArg(0);
 
  279   const double gamma = 1 / std::tgamma(ndof / 2.0);
 
  283   constexpr double ln2 = 0.693147180559945309417232121458;
 
  285      double arg = (ndof - 2) * 
fast_log(X[i]) - X[i] - ndof * ln2;
 
  293      batches.
_output[i] = 0.0 + (batches[0][i] == 1.0);
 
  299   Batch DM = batches[0], DM0 = batches[1], C = batches[2], A = batches[3], B = batches[4];
 
  301      const double ratio = DM[i] / DM0[i];
 
  302      const double arg1 = (DM0[i] - DM[i]) / C[i];
 
  303      const double arg2 = A[i] * 
fast_log(ratio);
 
  314   Batch x = batches[0], 
c = batches[1];
 
  321   Batch X = batches[0], 
G = batches[1], B = batches[2], M = batches[3];
 
  322   double gamma = -std::lgamma(
G[0]);
 
  325         batches.
_output[i] = (
G[i] == 1.0) / B[i];
 
  326      else if (
G.isItVector())
 
  327         batches.
_output[i] = -std::lgamma(
G[i]);
 
  333         const double invBeta = 1 / B[i];
 
  334         double arg = (X[i] - M[i]) * invBeta;
 
  337         batches.
_output[i] += arg * (
G[i] - 1);
 
  345   const double root2 = std::sqrt(2.);
 
  346   const double root2pi = std::sqrt(2. * std::atan2(0., -1.));
 
  348   const bool isMinus = batches.
extraArg(0) < 0.0;
 
  349   const bool isPlus = batches.
extraArg(0) > 0.0;
 
  353      const double x = batches[0][i];
 
  354      const double mean = batches[1][i] * batches[2][i];
 
  355      const double sigma = batches[3][i] * batches[4][i];
 
  356      const double tau = batches[5][i];
 
  360         double xprime = (
x - mean) / 
sigma;
 
  361         double result = std::exp(-0.5 * xprime * xprime) / (
sigma * root2pi);
 
  362         if (!isMinus && !isPlus)
 
  364         batches._output[i] = 
result;
 
  367         const double xprime = (
x - mean) / tau;
 
  368         const double c = 
sigma / (root2 * tau);
 
  369         const double u = xprime / (2 * 
c);
 
  376         batches._output[i] = 
result;
 
  384   auto mean = batches[1];
 
  385   auto sigma = batches[2];
 
  387      const double arg = 
x[i] - mean[i];
 
  388      const double halfBySigmaSq = -0.5 / (
sigma[i] * 
sigma[i]);
 
  396      batches.
_output[i] = batches[0][i];
 
  407         batches.
_output[i] *= batches[1][i];
 
  412   Batch mass = batches[0], mu = batches[1], lambda = batches[2], gamma = batches[3], delta = batches[4];
 
  414   const double massThreshold = batches.
extraArg(0);
 
  417      const double arg = (mass[i] - mu[i]) / lambda[i];
 
  421      const double asinh_arg = asinh(arg);
 
  423      const double expo = gamma[i] + delta[i] * asinh_arg;
 
  425         delta[i] * 
fast_exp(-0.5 * expo * expo) * 
fast_isqrt(1. + arg * arg) / (sqrtTwoPi * lambda[i]);
 
  427      const double passThrough = mass[i] >= massThreshold;
 
  438   auto case0 = [](
double x) {
 
  439      const double a1[3] = {0.04166666667, -0.01996527778, 0.02709538966};
 
  441      return 0.3989422803 * 
fast_exp(-1 / u - 0.5 * (
x + 1)) * (1 + (a1[0] + (a1[1] + a1[2] * u) * u) * u);
 
  443   auto case1 = [](
double x) {
 
  444      constexpr double p1[5] = {0.4259894875, -0.1249762550, 0.03984243700, -0.006298287635, 0.001511162253};
 
  445      constexpr double q1[5] = {1.0, -0.3388260629, 0.09594393323, -0.01608042283, 0.003778942063};
 
  447      return fast_exp(-u - 0.5 * (
x + 1)) * (p1[0] + (p1[1] + (p1[2] + (p1[3] + p1[4] * 
x) * 
x) * 
x) * 
x) /
 
  448             (q1[0] + (q1[1] + (q1[2] + (q1[3] + q1[4] * 
x) * 
x) * 
x) * 
x);
 
  450   auto case2 = [](
double x) {
 
  451      constexpr double p2[5] = {0.1788541609, 0.1173957403, 0.01488850518, -0.001394989411, 0.0001283617211};
 
  452      constexpr double q2[5] = {1.0, 0.7428795082, 0.3153932961, 0.06694219548, 0.008790609714};
 
  453      return (p2[0] + (p2[1] + (p2[2] + (p2[3] + p2[4] * 
x) * 
x) * 
x) * 
x) /
 
  454             (q2[0] + (q2[1] + (q2[2] + (q2[3] + q2[4] * 
x) * 
x) * 
x) * 
x);
 
  456   auto case3 = [](
double x) {
 
  457      constexpr double p3[5] = {0.1788544503, 0.09359161662, 0.006325387654, 0.00006611667319, -0.000002031049101};
 
  458      constexpr double q3[5] = {1.0, 0.6097809921, 0.2560616665, 0.04746722384, 0.006957301675};
 
  459      return (p3[0] + (p3[1] + (p3[2] + (p3[3] + p3[4] * 
x) * 
x) * 
x) * 
x) /
 
  460             (q3[0] + (q3[1] + (q3[2] + (q3[3] + q3[4] * 
x) * 
x) * 
x) * 
x);
 
  462   auto case4 = [](
double x) {
 
  463      constexpr double p4[5] = {0.9874054407, 118.6723273, 849.2794360, -743.7792444, 427.0262186};
 
  464      constexpr double q4[5] = {1.0, 106.8615961, 337.6496214, 2016.712389, 1597.063511};
 
  465      const double u = 1 / 
x;
 
  466      return u * u * (p4[0] + (p4[1] + (p4[2] + (p4[3] + p4[4] * u) * u) * u) * u) /
 
  467             (q4[0] + (q4[1] + (q4[2] + (q4[3] + q4[4] * u) * u) * u) * u);
 
  469   auto case5 = [](
double x) {
 
  470      constexpr double p5[5] = {1.003675074, 167.5702434, 4789.711289, 21217.86767, -22324.94910};
 
  471      constexpr double q5[5] = {1.0, 156.9424537, 3745.310488, 9834.698876, 66924.28357};
 
  472      const double u = 1 / 
x;
 
  473      return u * u * (p5[0] + (p5[1] + (p5[2] + (p5[3] + p5[4] * u) * u) * u) * u) /
 
  474             (q5[0] + (q5[1] + (q5[2] + (q5[3] + q5[4] * u) * u) * u) * u);
 
  476   auto case6 = [](
double x) {
 
  477      constexpr double p6[5] = {1.000827619, 664.9143136, 62972.92665, 475554.6998, -5743609.109};
 
  478      constexpr double q6[5] = {1.0, 651.4101098, 56974.73333, 165917.4725, -2815759.939};
 
  479      const double u = 1 / 
x;
 
  480      return u * u * (p6[0] + (p6[1] + (p6[2] + (p6[3] + p6[4] * u) * u) * u) * u) /
 
  481             (q6[0] + (q6[1] + (q6[2] + (q6[3] + q6[4] * u) * u) * u) * u);
 
  483   auto case7 = [](
double x) {
 
  484      const double a2[2] = {-1.845568670, -4.284640743};
 
  486      return u * u * (1 + (a2[0] + a2[1] * u) * u);
 
  489   Batch X = batches[0], M = batches[1], S = batches[2];
 
  492      batches.
_output[i] = (X[i] - M[i]) / S[i];
 
  497      else if (batches.
_output[i] < -5.5)
 
  499      else if (batches.
_output[i] < -1.0)
 
  501      else if (batches.
_output[i] < 1.0)
 
  503      else if (batches.
_output[i] < 5.0)
 
  505      else if (batches.
_output[i] < 12.0)
 
  507      else if (batches.
_output[i] < 50.0)
 
  509      else if (batches.
_output[i] < 300.)
 
  517   Batch X = batches[0], M0 = batches[1], K = batches[2];
 
  518   const double rootOf2pi = 2.506628274631000502415765284811;
 
  520      double lnxOverM0 = 
fast_log(X[i] / M0[i]);
 
  524      double arg = lnxOverM0 / lnk;
 
  532   auto rawVal = batches[0];
 
  533   auto normVal = batches[1];
 
  535   int nEvalErrorsType0 = 0;
 
  536   int nEvalErrorsType1 = 0;
 
  537   int nEvalErrorsType2 = 0;
 
  542      if (normVal[i] < 0. || (normVal[i] == 0. && rawVal[i] != 0)) {
 
  546      } 
else if (rawVal[i] < 0.) {
 
  550      } 
else if (std::isnan(rawVal[i])) {
 
  555         out = (rawVal[i] == 0. && normVal[i] == 0.) ? 0. : rawVal[i] / normVal[i];
 
  560   if (nEvalErrorsType0 > 0)
 
  562   if (nEvalErrorsType1 > 1)
 
  564   if (nEvalErrorsType2 > 2)
 
  578   Batch X = batches[0], P = batches[1], W = batches[2], T = batches[3];
 
  579   constexpr double xi = 2.3548200450309494; 
 
  581      double argasinh = 0.5 * xi * T[i];
 
  582      double argln = argasinh + 1 / 
fast_isqrt(argasinh * argasinh + 1);
 
  585      double argln2 = 1 - (X[i] - P[i]) * T[i] / W[i];
 
  587      batches.
_output[i] = ln / asinh;
 
  589      batches.
_output[i] -= 2.0 / xi / xi * asinh * asinh;
 
  599   Batch x = batches[0], mean = batches[1];
 
  600   bool protectNegative = batches.
extraArg(0);
 
  601   bool noRounding = batches.
extraArg(1);
 
  603      const double x_i = noRounding ? 
x[i] : floor(
x[i]);
 
  604      batches.
_output[i] = std::lgamma(x_i + 1.);
 
  608      const double x_i = noRounding ? 
x[i] : floor(
x[i]);
 
  609      const double logMean = 
fast_log(mean[i]);
 
  610      const double logPoisson = x_i * logMean - mean[i] - batches.
_output[i];
 
  619      if (protectNegative && mean[i] < 0)
 
  626   const int nCoef = batches.
extraArg(0);
 
  627   const std::size_t nEvents = batches.
getNEvents();
 
  630   for (
size_t i = 
BEGIN; i < nEvents; i += 
STEP) {
 
  631      batches.
_output[i] = batches[nCoef - 1][i];
 
  636   for (
int k = nCoef - 2; k >= 0; k--) {
 
  637      for (
size_t i = 
BEGIN; i < nEvents; i += 
STEP) {
 
  645   const int nPdfs = batches.
extraArg(0);
 
  649   for (
int pdf = 0; pdf < nPdfs; pdf++) {
 
  651         batches.
_output[i] *= batches[pdf][i];
 
  659      batches.
_output[i] = batches[0][i] / batches[1][i];
 
  666   const bool isMinus = batches.
extraArg(0) < 0.0;
 
  667   const bool isPlus = batches.
extraArg(0) > 0.0;
 
  669      double x = batches[0][i];
 
  671      const bool isOutOfSign = (isMinus && 
x > 0.0) || (isPlus && 
x < 0.0);
 
  672      batches.
_output[i] = isOutOfSign ? 0.0 : 
fast_exp(-std::abs(
x) / batches[1][i]);
 
  678   const bool isMinus = batches.
extraArg(0) < 0.0;
 
  679   const bool isPlus = batches.
extraArg(0) > 0.0;
 
  681      double x = batches[0][i];
 
  683      const bool isOutOfSign = (isMinus && 
x > 0.0) || (isPlus && 
x < 0.0);
 
  690   const bool isMinus = batches.
extraArg(0) < 0.0;
 
  691   const bool isPlus = batches.
extraArg(0) > 0.0;
 
  693      double x = batches[0][i];
 
  695      const bool isOutOfSign = (isMinus && 
x > 0.0) || (isPlus && 
x < 0.0);
 
  702   const bool isMinus = batches.
extraArg(0) < 0.0;
 
  703   const bool isPlus = batches.
extraArg(0) > 0.0;
 
  705      double x = batches[0][i];
 
  707      const bool isOutOfSign = (isMinus && 
x > 0.0) || (isPlus && 
x < 0.0);
 
  711         const double tscaled = std::abs(
x) / batches[1][i];
 
  719   const bool isMinus = batches.
extraArg(0) < 0.0;
 
  720   const bool isPlus = batches.
extraArg(0) > 0.0;
 
  722      double x = batches[0][i];
 
  724      const bool isOutOfSign = (isMinus && 
x > 0.0) || (isPlus && 
x < 0.0);
 
  728         const double tscaled = std::abs(
x) / batches[1][i];
 
  736   const bool isMinus = batches.
extraArg(0) < 0.0;
 
  737   const bool isPlus = batches.
extraArg(0) > 0.0;
 
  739      double x = batches[0][i];
 
  741      const bool isOutOfSign = (isMinus && 
x > 0.0) || (isPlus && 
x < 0.0);
 
  742      batches.
_output[i] = isOutOfSign ? 0.0 : 
fast_exp(-std::abs(
x) / batches[1][i]) * sinh(
x * batches[2][i] * 0.5);
 
  748   const bool isMinus = batches.
extraArg(0) < 0.0;
 
  749   const bool isPlus = batches.
extraArg(0) > 0.0;
 
  751      double x = batches[0][i];
 
  753      const bool isOutOfSign = (isMinus && 
x > 0.0) || (isPlus && 
x < 0.0);
 
  754      batches.
_output[i] = isOutOfSign ? 0.0 : 
fast_exp(-std::abs(
x) / batches[1][i]) * cosh(
x * batches[2][i] * .5);
 
  760   Batch X = batches[0], M = batches[1], W = batches[2], S = batches[3];
 
  761   const double invSqrt2 = 0.707106781186547524400844362105;
 
  763      const double arg = (X[i] - M[i]) * (X[i] - M[i]);
 
  764      if (S[i] == 0.0 && W[i] == 0.0)
 
  766      else if (S[i] == 0.0)
 
  767         batches.
_output[i] = 1 / (arg + 0.25 * W[i] * W[i]);
 
  768      else if (W[i] == 0.0)
 
  771         batches.
_output[i] = invSqrt2 / S[i];
 
  775      if (S[i] != 0.0 && W[i] != 0.0) {
 
  778         const double factor = W[i] > 0.0 ? 0.5 : -0.5;
 
  779         RooHeterogeneousMath::STD::complex<double> z(batches.
_output[i] * (X[i] - M[i]),
 
  780                                                      factor * batches.
_output[i] * W[i]);
 
winID h TVirtualViewer3D TVirtualGLPainter p
 
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
 
Option_t Option_t TPoint TPoint const char x2
 
Option_t Option_t TPoint TPoint const char x1
 
__roodevice__ std::size_t getNEvents() const
 
__roodevice__ std::size_t getNExtraArgs() const
 
__roodevice__ void setExtraArg(std::size_t i, double val)
 
__roodevice__ double extraArg(std::size_t i) const
 
__rooglobal__ void computeTruthModelCosBasis(BatchesHandle batches)
 
__rooglobal__ void computeExponential(BatchesHandle batches)
 
__rooglobal__ void computeDstD0BG(BatchesHandle batches)
 
__rooglobal__ void computeLandau(BatchesHandle batches)
 
__rooglobal__ void computeArgusBG(BatchesHandle batches)
 
__rooglobal__ void computeBukin(BatchesHandle batches)
 
__rooglobal__ void computeNovosibirsk(BatchesHandle batches)
 
__rooglobal__ void computeNormalizedPdf(BatchesHandle batches)
 
__rooglobal__ void computePolynomial(BatchesHandle batches)
 
__rooglobal__ void computeTruthModelSinhBasis(BatchesHandle batches)
 
__rooglobal__ void computePoisson(BatchesHandle batches)
 
__rooglobal__ void computeTruthModelCoshBasis(BatchesHandle batches)
 
__rooglobal__ void computeBifurGauss(BatchesHandle batches)
 
__rooglobal__ void computeTruthModelSinBasis(BatchesHandle batches)
 
__rooglobal__ void computeRatio(BatchesHandle batches)
 
__rooglobal__ void computeAddPdf(BatchesHandle batches)
 
__rooglobal__ void computeTruthModelLinBasis(BatchesHandle batches)
 
__rooglobal__ void computeChiSquare(BatchesHandle batches)
 
__rooglobal__ void computeTruthModelQuadBasis(BatchesHandle batches)
 
__rooglobal__ void computeDeltaFunction(BatchesHandle batches)
 
__rooglobal__ void computeChebychev(BatchesHandle batches)
 
__rooglobal__ void computeIdentity(BatchesHandle batches)
 
__rooglobal__ void computeLognormal(BatchesHandle batches)
 
__rooglobal__ void computeGaussModelExpBasis(BatchesHandle batches)
 
__rooglobal__ void computeVoigtian(BatchesHandle batches)
 
__rooglobal__ void computeTruthModelExpBasis(BatchesHandle batches)
 
__rooglobal__ void computeGaussian(BatchesHandle batches)
 
__rooglobal__ void computeBernstein(BatchesHandle batches)
 
__rooglobal__ void computeNegativeLogarithms(BatchesHandle batches)
 
__rooglobal__ void computeGamma(BatchesHandle batches)
 
std::vector< void(*)(BatchesHandle)> getFunctions()
Returns a std::vector of pointers to the compute functions in this file.
 
__rooglobal__ void computeJohnson(BatchesHandle batches)
 
__rooglobal__ void computeBreitWigner(BatchesHandle batches)
 
__rooglobal__ void computeProdPdf(BatchesHandle batches)
 
__rooglobal__ void computeCBShape(BatchesHandle batches)
 
__rooglobal__ void computeBMixDecay(BatchesHandle batches)
 
Namespace for dispatching RooFit computations to various backends.
 
__roodevice__ double fast_exp(double x)
 
__roodevice__ double fast_sin(double x)
 
constexpr std::size_t bufferSize
 
__roodevice__ double fast_log(double x)
 
__roodevice__ double fast_cos(double x)
 
__roodevice__ double fast_isqrt(double x)
 
__roodevice__ __roohost__ STD::complex< double > faddeeva(STD::complex< double > z)
 
__roohost__ __roodevice__ STD::complex< double > evalCerf(double swt, double u, double c)
 
constexpr Double_t TwoPi()
 
#define R1(v, w, x, y, z, i)
 
#define R2(v, w, x, y, z, i)
 
__roodevice__ static __roohost__ double packFloatIntoNaN(float payload)
Pack float into mantissa of a NaN.