97   return "CSV data source";
 
  108const std::unordered_map<RCsvDS::ColType_t, std::string>
 
  109   RCsvDS::fgColTypeMap({{
'O', 
"bool"}, {
'D', 
"double"}, {
'L', 
"Long64_t"}, {
'T', 
"std::string"}});
 
  115   for (
auto &col : columns) {
 
  126   for (
auto &col : columns) {
 
  131         record.emplace_back(
new double((col != 
"nan") ? std::stod(col) : std::numeric_limits<double>::quiet_NaN()));
 
  136            record.emplace_back(
new Long64_t(std::stoll(col)));
 
  139            record.emplace_back(
new Long64_t(0));
 
  145         record.emplace_back(
b);
 
  147            std::istringstream(col) >> std::boolalpha >> *
b;
 
  155         record.emplace_back(
new std::string(col));
 
  166   for (
size_t i = 0u; i < 
size; ++i) {
 
  167      fHeaders.push_back(
"Col" + std::to_string(i));
 
  173   const auto colType = 
GetType(colName);
 
  175   if ((colType == 
'D' && 
typeid(
double) != ti) || (colType == 
'L' && 
typeid(
Long64_t) != ti) ||
 
  176       (colType == 
'T' && 
typeid(std::string) != ti) || (colType == 
'O' && 
typeid(
bool) != ti)) {
 
  177      std::string err = 
"The type selected for column \"";
 
  179      err += 
"\" does not correspond to column type, which is ";
 
  181      throw std::runtime_error(err);
 
  185   const auto index = std::distance(colNames.begin(), std::find(colNames.begin(), colNames.end(), colName));
 
  186   std::vector<void *> ret(
fNSlots);
 
  189      if (ti == 
typeid(
double)) {
 
  191      } 
else if (ti == 
typeid(
Long64_t)) {
 
  193      } 
else if (ti == 
typeid(std::string)) {
 
  207         std::string msg = 
"There is no column with name \"" + col.first + 
"\".";
 
  209            msg += 
"\nSince the input csv file does not contain headers, valid column names";
 
  210            msg += 
" are [\"Col0\", ..., \"Col" + std::to_string(columns.size() - 1) + 
"\"].";
 
  212         throw std::runtime_error(msg);
 
  214      if (std::string(
"ODLT").find(col.second) == std::string::npos) {
 
  215         std::string msg = 
"Type alias '" + std::string(1, col.second) + 
"' is not supported.\n";
 
  216         msg += 
"Supported type aliases are 'O' for boolean, 'D' for double, 'L' for Long64_t, 'T' for std::string.";
 
  217         throw std::runtime_error(msg);
 
  224   const auto second_line = 
fCsvFile->GetFilePos();
 
  226   for (
auto i = 0u; i < columns.size(); ++i) {
 
  228      if (userSpecifiedType != 
fColTypes.end()) {
 
  234      for (
auto extraRowsRead = 0u; extraRowsRead < 10u && columns[i] == 
"nan"; ++extraRowsRead) {
 
  239         if (temp_columns[i] != 
"nan")
 
  240            columns[i] = temp_columns[i]; 
 
  245      if (columns[i] == 
"nan") {
 
  278   std::vector<std::string> columns;
 
  280   for (
size_t i = 0; i < 
line.size(); ++i) {
 
  291   const size_t prevPos = i; 
 
  293   for (; i < 
line.size(); ++i) {
 
  296      } 
else if (
line[i] == 
'"') {
 
  298         if (
line[i + 1] != 
'"') {
 
  308   if (prevPos == i || val == 
"nan" || val == 
"NaN") 
 
  309      columns.emplace_back(
"nan");
 
  311      columns.emplace_back(std::move(val));
 
  316      columns.emplace_back(
"nan");
 
  332               std::unordered_map<std::string, char> &&colTypes)
 
  333   : fReadHeaders(readHeaders), fCsvFile(
ROOT::Internal::
RRawFile::Create(fileName)), fDelimiter(delimiter),
 
  334     fLinesChunkSize(linesChunkSize), fColTypes(std::move(colTypes))
 
  343         std::string msg = 
"Error reading headers of CSV file ";
 
  345         throw std::runtime_error(msg);
 
  353   } 
while (
line.empty() && !eof);
 
  371      std::string msg = 
"Could not infer column types of CSV file ";
 
  373      throw std::runtime_error(msg);
 
  380      for (
size_t i = 0; i < record.size(); ++i) {
 
  385            delete static_cast<double *
>(
p);
 
  393            delete static_cast<bool *
>(
p);
 
  397            delete static_cast<std::string *
>(
p);
 
  434      if (
line.empty()) 
continue; 
 
  441      std::string msg = 
"";
 
  444         msg += 
"Column \"" + col + 
"\" of type " + colT + 
" contains empty cell(s) or NaN(s).\n";
 
  445         msg += 
"There is no `nan` equivalent for type " + colT + 
", hence ";
 
  446         msg += std::string(colT == 
"Long64_t" ? 
"`0`" : 
"`false`") + 
" is stored.\n";
 
  448      msg += 
"Please manually set the column type to `double` (with `D`) in `FromCSV` to read NaNs instead.\n";
 
  449      Warning(
"RCsvDS", 
"%s", msg.c_str());
 
  454         Info(
"GetEntryRanges", 
"Attempted to read entire CSV file into memory, %zu lines read", 
fRecords.size());
 
  456         Info(
"GetEntryRanges", 
"Attempted to read chunk of %lld lines of CSV file into memory, %zu lines read", 
fLinesChunkSize, 
fRecords.size());
 
  460   std::vector<std::pair<ULong64_t, ULong64_t>> entryRanges;
 
  461   const auto nRecords = 
fRecords.size();
 
  465   const auto chunkSize = nRecords / 
fNSlots;
 
  473      entryRanges.emplace_back(start, end);
 
  476   entryRanges.back().second += remainder;
 
  487      std::string msg = 
"The dataset does not have column ";
 
  489      throw std::runtime_error(msg);
 
  509   const auto recordPos = entry - 
offset;
 
  512      auto dataPtr = 
fRecords[recordPos][colIndex];
 
  538   assert(0U == 
fNSlots && 
"Setting the number of slots even if the number of slots is different from zero.");
 
  542   const auto nColumns = 
fHeaders.size();
 
  559                   std::unordered_map<std::string, char> &&colTypes)
 
  562      std::make_unique<RCsvDS>(fileName, readHeaders, delimiter, linesChunkSize, std::move(colTypes)));
 
  567                            std::unordered_map<std::string, char> &&colTypes)
 
  569   return FromCSV(fileName, readHeaders, delimiter, linesChunkSize, std::move(colTypes));
 
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
unsigned long long ULong64_t
void Info(const char *location, const char *msgfmt,...)
Use this function for informational messages.
void Warning(const char *location, const char *msgfmt,...)
Use this function in warning situations.
winID h TVirtualViewer3D TVirtualGLPainter p
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h offset
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
The RRawFile provides read-only access to local and remote files.
std::string GetTypeName(std::string_view colName) const final
Type of a column as a string, e.g.
void FillRecord(const std::string &, Record_t &)
void Finalize() final
Convenience method called after concluding an event-loop.
ColType_t GetType(std::string_view colName) const
std::vector< std::vector< double > > fDoubleEvtValues
void InferType(const std::string &, unsigned int)
void SetNSlots(unsigned int nSlots) final
Inform RDataSource of the number of processing slots (i.e.
static const std::unordered_map< ColType_t, std::string > fgColTypeMap
size_t ParseValue(const std::string &, std::vector< std::string > &, size_t)
static const TRegexp fgTrueRegex
void GenerateHeaders(size_t)
std::vector< std::vector< void * > > fColAddresses
const std::vector< std::string > & GetColumnNames() const final
Returns a reference to the collection of the dataset's column names.
const Long64_t fLinesChunkSize
std::string AsString() final
std::vector< std::string > fHeaders
ULong64_t fEntryRangesRequested
ULong64_t fProcessedLines
bool HasColumn(std::string_view colName) const final
Checks if the dataset has a certain column.
void InferColTypes(std::vector< std::string > &)
std::unordered_map< std::string, ColType_t > fColTypes
std::vector< std::vector< Long64_t > > fLong64EvtValues
static const TRegexp fgDoubleRegex2
std::vector< Record_t > fRecords
RCsvDS(std::string_view fileName, bool readHeaders=true, char delimiter=',', Long64_t linesChunkSize=-1LL, std::unordered_map< std::string, char > &&colTypes={})
Constructor to create a CSV RDataSource for RDataFrame.
std::set< std::string > fColContainingEmpty
static const TRegexp fgFalseRegex
static const TRegexp fgDoubleRegex3
void ValidateColTypes(std::vector< std::string > &) const
static const TRegexp fgIntRegex
std::vector< std::string > ParseColumns(const std::string &)
void FillHeaders(const std::string &)
std::unique_ptr< ROOT::Internal::RRawFile > fCsvFile
std::vector< std::pair< ULong64_t, ULong64_t > > GetEntryRanges() final
Return ranges of entries to distribute to tasks.
std::string GetLabel() final
Return a string representation of the datasource type.
std::vector< void * > GetColumnReadersImpl(std::string_view, const std::type_info &) final
type-erased vector of pointers to pointers to column values - one per slot
static const TRegexp fgDoubleRegex1
std::vector< std::vector< std::string > > fStringEvtValues
std::vector< std::deque< bool > > fBoolEvtValues
bool SetEntry(unsigned int slot, ULong64_t entry) final
Advance the "cursors" returned by GetColumnReaders to the selected entry for a particular slot.
std::list< ColType_t > fColTypesList
std::vector< void * > Record_t
ROOT's RDataFrame offers a modern, high-level interface for analysis of data stored in TTree ,...
Regular expression class.
Ssiz_t Index(const TString &str, Ssiz_t *len, Ssiz_t start=0) const
Find the first occurrence of the regexp in string and return the position, or -1 if there is no match...
RDataFrame MakeCsvDataFrame(std::string_view fileName, bool readHeaders, char delimiter, Long64_t linesChunkSize, std::unordered_map< std::string, char > &&colTypes)
RDataFrame FromCSV(std::string_view fileName, bool readHeaders=true, char delimiter=',', Long64_t linesChunkSize=-1LL, std::unordered_map< std::string, char > &&colTypes={})
Factory method to create a CSV RDataFrame.
This file contains a specialised ROOT message handler to test for diagnostic in unit tests.
TSeq< unsigned int > TSeqU