95std::string RCsvDS::AsString()
97 return "CSV data source";
101const TRegexp RCsvDS::fgIntRegex(
"^[-+]?[0-9]+$");
102const TRegexp RCsvDS::fgDoubleRegex1(
"^[-+]?[0-9]+\\.[0-9]*$");
103const TRegexp RCsvDS::fgDoubleRegex2(
"^[-+]?[0-9]*\\.[0-9]+$");
104const TRegexp RCsvDS::fgDoubleRegex3(
"^[-+]?[0-9]*\\.[0-9]+[eEdDqQ][-+]?[0-9]+$");
105const TRegexp RCsvDS::fgTrueRegex(
"^true$");
106const TRegexp RCsvDS::fgFalseRegex(
"^false$");
108const std::unordered_map<RCsvDS::ColType_t, std::string>
109 RCsvDS::fgColTypeMap({{
'O',
"bool"}, {
'D',
"double"}, {
'L',
"Long64_t"}, {
'T',
"std::string"}});
111void RCsvDS::FillHeaders(
const std::string &
line)
131 record.emplace_back(
new double((col !=
"nan") ? std::stod(col) : std::numeric_limits<double>::quiet_NaN()));
147 std::istringstream(col) >> std::boolalpha >> *
b;
155 record.emplace_back(
new std::string(col));
163void RCsvDS::GenerateHeaders(
size_t size)
166 for (
size_t i = 0
u; i <
size; ++i) {
167 fHeaders.push_back(
"Col" + std::to_string(i));
171std::vector<void *> RCsvDS::GetColumnReadersImpl(std::string_view
colName,
const std::type_info &
ti)
176 (
colType ==
'T' &&
typeid(std::string) !=
ti) || (
colType ==
'O' &&
typeid(
bool) !=
ti)) {
177 std::string err =
"The type selected for column \"";
179 err +=
"\" does not correspond to column type, which is ";
181 throw std::runtime_error(err);
189 if (
ti ==
typeid(
double)) {
193 }
else if (
ti ==
typeid(std::string)) {
203void RCsvDS::ValidateColTypes(std::vector<std::string> &
columns)
const
207 std::string
msg =
"There is no column with name \"" + col.first +
"\".";
209 msg +=
"\nSince the input csv file does not contain headers, valid column names";
210 msg +=
" are [\"Col0\", ..., \"Col" + std::to_string(
columns.size() - 1) +
"\"].";
212 throw std::runtime_error(
msg);
214 if (std::string(
"ODLT").find(col.second) == std::string::npos) {
215 std::string
msg =
"Type alias '" + std::string(1, col.second) +
"' is not supported.\n";
216 msg +=
"Supported type aliases are 'O' for boolean, 'D' for double, 'L' for Long64_t, 'T' for std::string.";
217 throw std::runtime_error(
msg);
222void RCsvDS::InferColTypes(std::vector<std::string> &
columns)
226 for (
auto i = 0
u; i <
columns.size(); ++i) {
255void RCsvDS::InferType(
const std::string &col,
unsigned int idxCol)
276std::vector<std::string> RCsvDS::ParseColumns(
const std::string &
line)
278 std::vector<std::string>
columns;
280 for (
size_t i = 0; i <
line.size(); ++i) {
287size_t RCsvDS::ParseValue(
const std::string &
line, std::vector<std::string> &
columns,
size_t i)
293 for (; i <
line.size(); ++i) {
296 }
else if (
line[i] ==
'"') {
298 if (
line[i + 1] !=
'"') {
308 if (
prevPos == i || val ==
"nan" || val ==
"NaN")
311 columns.emplace_back(std::move(val));
332 std::unordered_map<std::string, char> &&
colTypes)
343 std::string
msg =
"Error reading headers of CSV file ";
345 throw std::runtime_error(
msg);
353 }
while (
line.empty() && !
eof);
371 std::string
msg =
"Could not infer column types of CSV file ";
373 throw std::runtime_error(
msg);
377void RCsvDS::FreeRecords()
380 for (
size_t i = 0; i <
record.size(); ++i) {
385 delete static_cast<double *
>(
p);
393 delete static_cast<bool *
>(
p);
397 delete static_cast<std::string *
>(
p);
413void RCsvDS::Finalize()
421const std::vector<std::string> &RCsvDS::GetColumnNames()
const
426std::vector<std::pair<ULong64_t, ULong64_t>> RCsvDS::GetEntryRanges()
434 if (
line.empty())
continue;
441 std::string
msg =
"";
444 msg +=
"Column \"" + col +
"\" of type " +
colT +
" contains empty cell(s) or NaN(s).\n";
445 msg +=
"There is no `nan` equivalent for type " +
colT +
", hence ";
446 msg += std::string(
colT ==
"Long64_t" ?
"`0`" :
"`false`") +
" is stored.\n";
448 msg +=
"Please manually set the column type to `double` (with `D`) in `FromCSV` to read NaNs instead.\n";
454 Info(
"GetEntryRanges",
"Attempted to read entire CSV file into memory, %zu lines read",
fRecords.size());
456 Info(
"GetEntryRanges",
"Attempted to read chunk of %lld lines of CSV file into memory, %zu lines read",
fLinesChunkSize,
fRecords.size());
460 std::vector<std::pair<ULong64_t, ULong64_t>>
entryRanges;
487 std::string
msg =
"The dataset does not have column ";
489 throw std::runtime_error(
msg);
495std::string RCsvDS::GetTypeName(std::string_view
colName)
const
500bool RCsvDS::HasColumn(std::string_view
colName)
const
536void RCsvDS::SetNSlots(
unsigned int nSlots)
538 assert(0U ==
fNSlots &&
"Setting the number of slots even if the number of slots is different from zero.");
553std::string RCsvDS::GetLabel()
559 std::unordered_map<std::string, char> &&
colTypes)
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
unsigned long long ULong64_t
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
void Info(const char *location, const char *msgfmt,...)
Use this function for informational messages.
void Warning(const char *location, const char *msgfmt,...)
Use this function in warning situations.
winID h TVirtualViewer3D TVirtualGLPainter p
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h offset
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
The RRawFile provides read-only access to local and remote files.
std::string GetTypeName(std::string_view colName) const final
Type of a column as a string, e.g.
void FillRecord(const std::string &, Record_t &)
ColType_t GetType(std::string_view colName) const
std::vector< std::vector< double > > fDoubleEvtValues
void InferType(const std::string &, unsigned int)
static const std::unordered_map< ColType_t, std::string > fgColTypeMap
size_t ParseValue(const std::string &, std::vector< std::string > &, size_t)
static const TRegexp fgTrueRegex
void GenerateHeaders(size_t)
std::vector< std::vector< void * > > fColAddresses
const std::vector< std::string > & GetColumnNames() const final
Returns a reference to the collection of the dataset's column names.
const Long64_t fLinesChunkSize
std::vector< std::string > fHeaders
ULong64_t fEntryRangesRequested
ULong64_t fProcessedLines
bool HasColumn(std::string_view colName) const final
Checks if the dataset has a certain column.
void InferColTypes(std::vector< std::string > &)
std::unordered_map< std::string, ColType_t > fColTypes
std::vector< std::vector< Long64_t > > fLong64EvtValues
static const TRegexp fgDoubleRegex2
std::vector< Record_t > fRecords
std::set< std::string > fColContainingEmpty
static const TRegexp fgFalseRegex
static const TRegexp fgDoubleRegex3
void ValidateColTypes(std::vector< std::string > &) const
static const TRegexp fgIntRegex
std::vector< std::string > ParseColumns(const std::string &)
void FillHeaders(const std::string &)
std::unique_ptr< ROOT::Internal::RRawFile > fCsvFile
static const TRegexp fgDoubleRegex1
std::vector< std::vector< std::string > > fStringEvtValues
std::vector< std::deque< bool > > fBoolEvtValues
std::list< ColType_t > fColTypesList
std::vector< void * > Record_t
ROOT's RDataFrame offers a modern, high-level interface for analysis of data stored in TTree ,...
const_iterator begin() const
const_iterator end() const
Regular expression class.
RDataFrame FromCSV(std::string_view fileName, bool readHeaders=true, char delimiter=',', Long64_t linesChunkSize=-1LL, std::unordered_map< std::string, char > &&colTypes={})
Factory method to create a CSV RDataFrame.
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
TSeq< unsigned int > TSeqU