40#pragma GCC diagnostic push 
   41#pragma GCC diagnostic ignored "-Wshadow" 
   42#pragma GCC diagnostic ignored "-Wunused-parameter" 
   44#include <arrow/table.h> 
   47#pragma GCC diagnostic pop 
   62#define ROOT_ARROW_STL_CONVERSION(c_type, ArrowType_)  \ 
   64   struct RootConversionTraits<c_type> {               \ 
   65   using ArrowType = ::arrow::ArrowType_;              \ 
   86   bool fCachedBool{
false}; 
 
  102      using ArrayType = 
typename arrow::TypeTraits<ArrowType>::ArrayType;
 
  103      auto values = 
reinterpret_cast<ArrayType *
>(array.values().get());
 
  104      auto offset = array.value_offset(entry);
 
  107      RVec<T> tmp(
reinterpret_cast<T *
>((
void *)values->raw_values()) + 
offset, array.value_length(entry));
 
  108      std::swap(cache, tmp);
 
  109      return (
void *)(&cache);
 
  118   virtual arrow::Status 
Visit(arrow::Int32Array 
const &array) 
final 
  120      *fResult = (
void *)(array.raw_values() + fCurrentEntry);
 
  121      return arrow::Status::OK();
 
  124   virtual arrow::Status 
Visit(arrow::Int64Array 
const &array) 
final 
  126      *fResult = (
void *)(array.raw_values() + fCurrentEntry);
 
  127      return arrow::Status::OK();
 
  131   virtual arrow::Status 
Visit(arrow::UInt32Array 
const &array) 
final 
  133      *fResult = (
void *)(array.raw_values() + fCurrentEntry);
 
  134      return arrow::Status::OK();
 
  137   virtual arrow::Status 
Visit(arrow::UInt64Array 
const &array) 
final 
  139      *fResult = (
void *)(array.raw_values() + fCurrentEntry);
 
  140      return arrow::Status::OK();
 
  143   virtual arrow::Status 
Visit(arrow::FloatArray 
const &array) 
final 
  145      *fResult = (
void *)(array.raw_values() + fCurrentEntry);
 
  146      return arrow::Status::OK();
 
  149   virtual arrow::Status 
Visit(arrow::DoubleArray 
const &array) 
final 
  151      *fResult = (
void *)(array.raw_values() + fCurrentEntry);
 
  152      return arrow::Status::OK();
 
  155   virtual arrow::Status 
Visit(arrow::BooleanArray 
const &array) 
final 
  157      fCachedBool = array.Value(fCurrentEntry);
 
  158      *fResult = 
reinterpret_cast<void *
>(&fCachedBool);
 
  159      return arrow::Status::OK();
 
  162   virtual arrow::Status 
Visit(arrow::StringArray 
const &array) 
final 
  164      fCachedString = array.GetString(fCurrentEntry);
 
  165      *fResult = 
reinterpret_cast<void *
>(&fCachedString);
 
  166      return arrow::Status::OK();
 
  169   virtual arrow::Status 
Visit(arrow::ListArray 
const &array) 
final 
  171      switch (array.value_type()->id()) {
 
  172      case arrow::Type::FLOAT: {
 
  173         *fResult = getTypeErasedPtrFrom(array, fCurrentEntry, fCachedRVecFloat);
 
  174         return arrow::Status::OK();
 
  176      case arrow::Type::DOUBLE: {
 
  177         *fResult = getTypeErasedPtrFrom(array, fCurrentEntry, fCachedRVecDouble);
 
  178         return arrow::Status::OK();
 
  180      case arrow::Type::UINT32: {
 
  181         *fResult = getTypeErasedPtrFrom(array, fCurrentEntry, fCachedRVecUInt);
 
  182         return arrow::Status::OK();
 
  184      case arrow::Type::UINT64: {
 
  185         *fResult = getTypeErasedPtrFrom(array, fCurrentEntry, fCachedRVecULong64);
 
  186         return arrow::Status::OK();
 
  188      case arrow::Type::INT32: {
 
  189         *fResult = getTypeErasedPtrFrom(array, fCurrentEntry, fCachedRVecInt);
 
  190         return arrow::Status::OK();
 
  192      case arrow::Type::INT64: {
 
  193         *fResult = getTypeErasedPtrFrom(array, fCurrentEntry, fCachedRVecLong64);
 
  194         return arrow::Status::OK();
 
  196      default: 
return arrow::Status::TypeError(
"Type not supported");
 
  200   using ::arrow::ArrayVisitor::Visit;
 
  223      for (
auto &chunk : chunks) {
 
  225         next += chunk->length();
 
  236      std::vector<void *> 
result;
 
  256      for (
size_t ce = 
fChunkIndex.size(); ci != ce; ++ci) {
 
  272         std::string msg = 
"Could not get pointer for slot ";
 
  273         msg += std::to_string(slot) + 
" looking at entry " + std::to_string(entry);
 
  274         throw std::runtime_error(msg);
 
  302   arrow::Status 
Visit(
const arrow::Int64Type &)
 override 
  305      return arrow::Status::OK();
 
  307   arrow::Status 
Visit(
const arrow::Int32Type &)
 override 
  310      return arrow::Status::OK();
 
  312   arrow::Status 
Visit(
const arrow::UInt64Type &)
 override 
  315      return arrow::Status::OK();
 
  317   arrow::Status 
Visit(
const arrow::UInt32Type &)
 override 
  320      return arrow::Status::OK();
 
  322   arrow::Status 
Visit(
const arrow::FloatType &)
 override 
  325      return arrow::Status::OK();
 
  327   arrow::Status 
Visit(
const arrow::DoubleType &)
 override 
  330      return arrow::Status::OK();
 
  332   arrow::Status 
Visit(
const arrow::StringType &)
 override 
  335      return arrow::Status::OK();
 
  337   arrow::Status 
Visit(
const arrow::BooleanType &)
 override 
  340      return arrow::Status::OK();
 
  342   arrow::Status 
Visit(
const arrow::ListType &
l)
 override 
  348      fTypeName.push_back(
"ROOT::VecOps::RVec<%s>");
 
  349      return l.value_type()->Accept(
this);
 
  354      std::string 
result = 
"%s";
 
  356      for (
size_t i = 0; i < 
fTypeName.size(); ++i) {
 
  363   using ::arrow::TypeVisitor::Visit;
 
  370   virtual arrow::Status 
Visit(
const arrow::Int64Type &)
 override { 
return arrow::Status::OK(); }
 
  371   virtual arrow::Status 
Visit(
const arrow::UInt64Type &)
 override { 
return arrow::Status::OK(); }
 
  372   virtual arrow::Status 
Visit(
const arrow::Int32Type &)
 override { 
return arrow::Status::OK(); }
 
  373   virtual arrow::Status 
Visit(
const arrow::UInt32Type &)
 override { 
return arrow::Status::OK(); }
 
  374   virtual arrow::Status 
Visit(
const arrow::FloatType &)
 override { 
return arrow::Status::OK(); }
 
  375   virtual arrow::Status 
Visit(
const arrow::DoubleType &)
 override { 
return arrow::Status::OK(); }
 
  376   virtual arrow::Status 
Visit(
const arrow::StringType &)
 override { 
return arrow::Status::OK(); }
 
  377   virtual arrow::Status 
Visit(
const arrow::BooleanType &)
 override { 
return arrow::Status::OK(); }
 
  378   virtual arrow::Status 
Visit(
const arrow::ListType &)
 override { 
return arrow::Status::OK(); }
 
  380   using ::arrow::TypeVisitor::Visit;
 
  389   : fTable{inTable}, fColumnNames{inColumns}
 
  396   auto filterWantedColumns = [&columnNames, &table]() {
 
  397      if (columnNames.empty()) {
 
  398         for (
auto &field : table->schema()->fields()) {
 
  399            columnNames.push_back(field->name());
 
  405   using ColumnType = 
decltype(
fTable->column(0));
 
  407   auto getRecordsFirstColumn = [&columnNames, &table]() {
 
  408      if (columnNames.empty()) {
 
  409         throw std::runtime_error(
"At least one column required");
 
  411      const auto name = columnNames.front();
 
  412      const auto columnIdx = table->schema()->GetFieldIndex(
name);
 
  413      return table->column(columnIdx)->length();
 
  417   auto verifyColumnSize = [&table](ColumnType column, 
int columnIdx, 
int nRecords) {
 
  418      if (column->length() != nRecords) {
 
  419         std::string msg = 
"Column ";
 
  420         msg += table->schema()->field(columnIdx)->name() + 
" has a different number of entries.";
 
  421         throw std::runtime_error(msg);
 
  426   auto verifyColumnType = [&table](ColumnType column, 
int columnIdx) {
 
  427      auto verifyType = std::make_unique<VerifyValidColumnType>();
 
  428      auto result = column->type()->Accept(verifyType.get());
 
  429      if (
result.ok() == 
false) {
 
  430         std::string msg = 
"Column ";
 
  431         msg += table->schema()->field(columnIdx)->name() + 
" contains an unsupported type.";
 
  432         throw std::runtime_error(msg);
 
  438   auto addColumnToGetterIndex = [&
index](
int columnId) { 
index.push_back(std::make_pair(columnId, 
index.size())); };
 
  442   auto resetGetterIndex = [&
index]() { 
index.clear(); };
 
  445   filterWantedColumns();
 
  447   auto nRecords = getRecordsFirstColumn();
 
  449      auto columnIdx = 
fTable->schema()->GetFieldIndex(columnName);
 
  450      addColumnToGetterIndex(columnIdx);
 
  452      auto column = 
fTable->column(columnIdx);
 
  453      verifyColumnSize(column, columnIdx, nRecords);
 
  454      verifyColumnType(column, columnIdx);
 
  477   auto field = 
fTable->schema()->GetFieldByName(std::string(colName));
 
  479      std::string msg = 
"The dataset does not have column ";
 
  481      throw std::runtime_error(msg);
 
  484   auto status = field->type()->Accept(&typeGetter);
 
  485   if (status.ok() == 
false) {
 
  486      std::string msg = 
"RArrowDS does not support a column of type ";
 
  487      msg += field->type()->name();
 
  488      throw std::runtime_error(msg);
 
  490   return typeGetter.
result();
 
  495   auto field = 
fTable->schema()->GetFieldByName(std::string(colName));
 
  506      getter->SetEntry(slot, entry);
 
  515      getter->UncachedSlotLookup(slot, entry);
 
  519void splitInEqualRanges(std::vector<std::pair<ULong64_t, ULong64_t>> &ranges, 
int nRecords, 
unsigned int nSlots)
 
  522   const auto chunkSize = nRecords / nSlots;
 
  523   const auto remainder = 1U == nSlots ? 0 : nRecords % nSlots;
 
  529      ranges.emplace_back(start, end);
 
  532   ranges.back().second += remainder;
 
  535int getNRecords(std::shared_ptr<arrow::Table> &table, std::vector<std::string> &columnNames)
 
  537   auto index = table->schema()->GetFieldIndex(columnNames.front());
 
  538   return table->column(
index)->length();
 
  548std::shared_ptr<arrow::ChunkedArray>
 
  549getData<std::shared_ptr<arrow::ChunkedArray>>(std::shared_ptr<arrow::ChunkedArray> 
p)
 
  556   assert(0U == 
fNSlots && 
"Setting the number of slots even if the number of slots is different from zero.");
 
  562   for (
size_t ci = 0; ci != nColumns; ++ci) {
 
  564      fValueGetters.emplace_back(std::make_unique<ROOT::Internal::RDF::TValueGetter>(nSlots, chunkedArray->chunks()));
 
  573   auto findGetterIndex = [&
index](
unsigned int column) {
 
  574      for (
auto &entry : 
index) {
 
  575         if (entry.first == column) {
 
  579      throw std::runtime_error(
"No column found at index " + std::to_string(column));
 
  582   const int columnIdx = 
fTable->schema()->GetFieldIndex(std::string(colName));
 
  583   const int getterIdx = findGetterIndex(columnIdx);
 
  584   assert(getterIdx != -1);
 
#define ROOT_ARROW_STL_CONVERSION(c_type, ArrowType_)
unsigned long long ULong64_t
winID h TVirtualViewer3D TVirtualGLPainter p
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h offset
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
ULong64_t fCurrentEntry
The entry in the array which should be looked up.
std::string fCachedString
void ** fResult
The pointer to update.
virtual arrow::Status Visit(arrow::StringArray const &array) final
virtual arrow::Status Visit(arrow::ListArray const &array) final
virtual arrow::Status Visit(arrow::Int64Array const &array) final
virtual arrow::Status Visit(arrow::DoubleArray const &array) final
RVec< UInt_t > fCachedRVecUInt
RVec< ULong64_t > fCachedRVecULong64
RVec< Int_t > fCachedRVecInt
void SetEntry(ULong64_t entry)
virtual arrow::Status Visit(arrow::Int32Array const &array) final
Check if we are asking the same entry as before.
virtual arrow::Status Visit(arrow::BooleanArray const &array) final
RVec< Long64_t > fCachedRVecLong64
RVec< float > fCachedRVecFloat
virtual arrow::Status Visit(arrow::UInt32Array const &array) final
Check if we are asking the same entry as before.
virtual arrow::Status Visit(arrow::FloatArray const &array) final
void * getTypeErasedPtrFrom(arrow::ListArray const &array, int32_t entry, RVec< T > &cache)
ArrayPtrVisitor(void **result)
RVec< double > fCachedRVecDouble
virtual arrow::Status Visit(arrow::UInt64Array const &array) final
Helper class which keeps track for each slot where to get the entry.
std::vector< ULong64_t > fLastChunkPerSlot
std::vector< ArrayPtrVisitor > fArrayVisitorPerSlot
std::vector< ULong64_t > fFirstEntryPerChunk
std::vector< ULong64_t > fLastEntryPerSlot
std::vector< void * > SlotPtrs()
This returns the ptr to the ptr to actual data.
TValueGetter(size_t slots, arrow::ArrayVector chunks)
arrow::ArrayVector fChunks
void SetEntry(unsigned int slot, ULong64_t entry)
Set the current entry to be retrieved.
std::vector< ULong64_t > fChunkIndex
Since data can be chunked in different arrays we need to construct an index which contains the first ...
void UncachedSlotLookup(unsigned int slot, ULong64_t entry)
std::vector< void * > fValuesPtrPerSlot
std::vector< std::pair< ULong64_t, ULong64_t > > GetEntryRanges() final
Return ranges of entries to distribute to tasks.
RArrowDS(std::shared_ptr< arrow::Table > table, std::vector< std::string > const &columns)
Constructor to create an Arrow RDataSource for RDataFrame.
void SetNSlots(unsigned int nSlots) final
Inform RDataSource of the number of processing slots (i.e.
bool HasColumn(std::string_view colName) const final
Checks if the dataset has a certain column.
void InitSlot(unsigned int slot, ULong64_t firstEntry) final
Convenience method called at the start of the data processing associated to a slot.
std::string GetLabel() final
Return a string representation of the datasource type.
void Initialize() final
Convenience method called before starting an event-loop.
std::shared_ptr< arrow::Table > fTable
std::vector< std::pair< size_t, size_t > > fGetterIndex
std::string GetTypeName(std::string_view colName) const final
Type of a column as a string, e.g.
std::vector< void * > GetColumnReadersImpl(std::string_view name, const std::type_info &type) final
This needs to return a pointer to the pointer each value getter will point to.
std::vector< std::unique_ptr< ROOT::Internal::RDF::TValueGetter > > fValueGetters
bool SetEntry(unsigned int slot, ULong64_t entry) final
Advance the "cursors" returned by GetColumnReaders to the selected entry for a particular slot.
std::vector< std::string > fColumnNames
std::vector< std::pair< ULong64_t, ULong64_t > > fEntryRanges
const std::vector< std::string > & GetColumnNames() const final
Returns a reference to the collection of the dataset's column names.
Helper to get the contents of a given column.
arrow::Status Visit(const arrow::DoubleType &) override
arrow::Status Visit(const arrow::BooleanType &) override
arrow::Status Visit(const arrow::UInt32Type &) override
arrow::Status Visit(const arrow::ListType &l) override
arrow::Status Visit(const arrow::FloatType &) override
arrow::Status Visit(const arrow::UInt64Type &) override
arrow::Status Visit(const arrow::StringType &) override
arrow::Status Visit(const arrow::Int64Type &) override
std::vector< std::string > fTypeName
arrow::Status Visit(const arrow::Int32Type &) override
Helper to determine if a given Column is a supported type.
virtual arrow::Status Visit(const arrow::BooleanType &) override
virtual arrow::Status Visit(const arrow::StringType &) override
virtual arrow::Status Visit(const arrow::Int32Type &) override
virtual arrow::Status Visit(const arrow::DoubleType &) override
virtual arrow::Status Visit(const arrow::Int64Type &) override
virtual arrow::Status Visit(const arrow::UInt32Type &) override
virtual arrow::Status Visit(const arrow::ListType &) override
virtual arrow::Status Visit(const arrow::FloatType &) override
virtual arrow::Status Visit(const arrow::UInt64Type &) override
ROOT's RDataFrame offers a modern, high-level interface for analysis of data stored in TTree ,...
A "std::vector"-like collection of values implementing handy operation to analyse them.
void splitInEqualRanges(std::vector< std::pair< ULong64_t, ULong64_t > > &ranges, int nRecords, unsigned int nSlots)
int getNRecords(std::shared_ptr< arrow::Table > &table, std::vector< std::string > &columnNames)
RDataFrame MakeArrowDataFrame(std::shared_ptr< arrow::Table > table, std::vector< std::string > const &columnNames)
Factory method to create a Apache Arrow RDataFrame.
std::shared_ptr< arrow::ChunkedArray > getData(T p)
RDataFrame FromArrow(std::shared_ptr< arrow::Table > table, std::vector< std::string > const &columnNames)
Factory method to create a Apache Arrow RDataFrame.
This file contains a specialised ROOT message handler to test for diagnostic in unit tests.
TSeq< unsigned int > TSeqU