36#error "RF_ARCH should always be defined" 
   46   batches._extraArgs = extraArgs.data();
 
   47   batches._nEvents = nEvents;
 
   48   batches._nBatches = nBatches;
 
   49   batches._nExtraArgs = extraArgs.size();
 
   53void fillArrays(std::vector<Batch> &arrays, 
const VarVector &vars, 
double *buffer)
 
   56   arrays.resize(vars.size());
 
   57   for (
size_t i = 0; i < vars.size(); i++) {
 
   58      const std::span<const double> &span = vars[i];
 
   61         ss << 
"The span number " << i << 
" passed to Batches::Batches() is empty!";
 
   62         throw std::runtime_error(ss.str());
 
   63      } 
else if (span.size() > 1)
 
   64         arrays[i].set(span.data(), 
true);
 
   78class RooBatchComputeClass : 
public RooBatchComputeInterface {
 
   94#error "It's unexpected that _QUOTEVAL_ is defined at this point!" 
   96#define _QUOTEVAL_(x) _QUOTE_(x) 
   99      std::transform(out.begin(), out.end(), out.begin(), [](
unsigned char c) { return std::tolower(c); });
 
  116      static std::vector<double> buffer;
 
  123         std::size_t nEventsPerThread = nEvents / nThreads + (nEvents % nThreads > 0);
 
  126         nThreads = nEvents / nEventsPerThread + (nEvents % nEventsPerThread > 0);
 
  128         auto task = [&](std::size_t idx) -> 
int {
 
  132            std::vector<Batch> arrays;
 
  133            fillBatches(batches, 
output, nEventsPerThread, vars.size(), extraArgs);
 
  134            fillArrays(arrays, vars, buffer.data());
 
  135            batches.
_arrays = arrays.data();
 
  139            if (idx == nThreads - 1) {
 
  155         std::vector<std::size_t> indices(nThreads);
 
  156         for (
unsigned int i = 1; i < nThreads; i++) {
 
  159         ex.Map(task, indices);
 
  164         std::vector<Batch> arrays;
 
  165         fillBatches(batches, 
output, nEvents, vars.size(), extraArgs);
 
  166         fillArrays(arrays, vars, buffer.data());
 
  167         batches.
_arrays = arrays.data();
 
  183                             std::span<const double> offsetProbas) 
override;
 
  188inline std::pair<double, double> getLog(
double prob, 
ReduceNLLOutput &out)
 
  190   if (std::abs(prob) > 1e6) {
 
  195      out.nNonPositiveValues++;
 
  196      return {std::log(prob), -prob};
 
  199   if (std::isnan(prob)) {
 
  204   return {std::log(prob), 0.0};
 
  215                                                std::span<const double> weights, std::span<const double> offsetProbas)
 
  219   double badness = 0.0;
 
  223   for (std::size_t i = 0; i < 
probas.size(); ++i) {
 
  225      const double eventWeight = weights.size() > 1 ? weights[i] : weights[0];
 
  227      if (0. == eventWeight)
 
  230      std::pair<double, double> logOut = getLog(probas[i], out);
 
  231      double term = logOut.first;
 
  232      badness += logOut.second;
 
  234      if (!offsetProbas.empty()) {
 
  235         term -= std::log(offsetProbas[i]);
 
  238      term *= -eventWeight;
 
  243   out.nllSum = nllSum.
Sum();
 
  249      out.nllSumCarry = 0.0;
 
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void input
These classes encapsulate the necessary data for the computations.
This class implements the interface to execute the same task multiple times, sequentially or in paral...
unsigned GetPoolSize() const
Return the number of pooled workers.
The Kahan summation is a compensated summation algorithm, which significantly reduces numerical error...
static KahanSum< T, N > Accumulate(Iterator begin, Iterator end, T initialValue=T{})
Iterate over a range and return an instance of a KahanSum.
void Add(T x)
Single-element accumulation. Will not vectorise.
Minimal configuration struct to steer the evaluation of a single node with the RooBatchCompute librar...
__roodevice__ std::size_t getNEvents() const
void advance(std::size_t nEvents)
void setNEvents(std::size_t n)
This class overrides some RooBatchComputeInterface functions, for the purpose of providing a cuda spe...
void compute(Config const &, Computer computer, RestrictArr output, size_t nEvents, const VarVector &vars, ArgVector &extraArgs) override
Compute multiple values using optimized functions.
std::string architectureName() const override
const std::vector< void(*)(BatchesHandle)> _computeFunctions
ReduceNLLOutput reduceNLL(RooBatchCompute::Config const &cfg, std::span< const double > probas, std::span< const double > weights, std::span< const double > offsetProbas) override
double reduceSum(Config const &, InputArr input, size_t n) override
Return the sum of an input array.
double reduceSum(RooBatchCompute::Config const &cfg, InputArr input, size_t n) override
Return the sum of an input array.
Architecture architecture() const override
ReduceNLLOutput reduceNLL(Config const &, std::span< const double > probas, std::span< const double > weights, std::span< const double > offsetProbas) override
void(off) SmallVectorTemplateBase< T
Bool_t IsImplicitMTEnabled()
Returns true if the implicit multi-threading in ROOT is enabled.
static RooBatchComputeClass computeObj
Static object to trigger the constructor which overwrites the dispatch pointer.
std::vector< void(*)(BatchesHandle)> getFunctions()
Returns a std::vector of pointers to the compute functions in this file.
Namespace for dispatching RooFit computations to various backends.
R__EXTERN RooBatchComputeInterface * dispatchCPU
This dispatch pointer points to an implementation of the compute library, provided one has been loade...
std::vector< std::span< const double > > VarVector
constexpr std::size_t bufferSize
const double *__restrict InputArr
std::vector< double > ArgVector
double *__restrict RestrictArr
void probas(TString dataset, TString fin="TMVA.root", Bool_t useTMVAStyle=kTRUE)
__roodevice__ static __roohost__ double packFloatIntoNaN(float payload)
Pack float into mantissa of a NaN.
static float unpackNaN(double val)
If val is NaN and a this NaN has been tagged as containing a payload, unpack the float from the manti...