Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RFieldUtils.cxx
Go to the documentation of this file.
1/// \file RFieldUtils.cxx
2/// \ingroup NTuple
3/// \author Jonas Hahnfeld <jonas.hahnfeld@cern.ch>
4/// \date 2024-11-19
5
7
8#include <ROOT/RField.hxx>
9#include <ROOT/RLogger.hxx>
10#include <ROOT/RNTupleUtil.hxx>
11
12#include <TClass.h>
13#include <TClassEdit.h>
14#include <TDictAttributeMap.h>
15
16#include <algorithm>
17#include <charconv>
18#include <limits>
19#include <string>
20#include <string_view>
21#include <system_error>
22#include <unordered_map>
23#include <utility>
24#include <vector>
25
26namespace {
27
28const std::unordered_map<std::string_view, std::string_view> typeTranslationMap{
29 {"Bool_t", "bool"},
30 {"Float_t", "float"},
31 {"Double_t", "double"},
32 {"string", "std::string"},
33
34 {"byte", "std::byte"},
35 {"Char_t", "char"},
36 {"int8_t", "std::int8_t"},
37 {"UChar_t", "unsigned char"},
38 {"uint8_t", "std::uint8_t"},
39
40 {"Short_t", "short"},
41 {"int16_t", "std::int16_t"},
42 {"UShort_t", "unsigned short"},
43 {"uint16_t", "std::uint16_t"},
44
45 {"Int_t", "int"},
46 {"int32_t", "std::int32_t"},
47 {"UInt_t", "unsigned int"},
48 {"unsigned", "unsigned int"},
49 {"uint32_t", "std::uint32_t"},
50
51 // Long_t and ULong_t follow the platform's size of long and unsigned long: They are 64 bit on 64-bit Linux and
52 // macOS, but 32 bit on 32-bit platforms and Windows (regardless of pointer size).
53 {"Long_t", "long"},
54 {"ULong_t", "unsigned long"},
55
56 {"Long64_t", "long long"},
57 {"int64_t", "std::int64_t"},
58 {"ULong64_t", "unsigned long long"},
59 {"uint64_t", "std::uint64_t"}};
60
61// Natively supported types drop the default template arguments and the CV qualifiers in template arguments.
62// Any types used as a template argument of user classes will keep [U]Long64_t template arguments for the type alias,
63// e.g. MyClass<std::vector<Long64_t>> will normalize to `MyClass<std::vector<std::int64_t>>` but keep the original
64// spelling in the type alias.
65bool IsUserClass(const std::string &typeName)
66{
67 return typeName.rfind("std::", 0) != 0 && typeName.rfind("ROOT::VecOps::RVec<", 0) != 0;
68}
69
70// Recursively normalizes a template argument using the regular type name normalizer F as a helper.
71template <typename F>
72std::string GetNormalizedTemplateArg(const std::string &arg, bool keepQualifier, F fnTypeNormalizer)
73{
74 R__ASSERT(!arg.empty());
75
76 if (std::isdigit(arg[0]) || arg[0] == '-') {
77 // Integer template argument
79 }
80
81 if (!keepQualifier)
82 return fnTypeNormalizer(arg);
83
84 std::string qualifier;
85 // Type name template argument; template arguments must keep their CV qualifier
86 if (arg.substr(0, 6) == "const " || (arg.length() > 14 && arg.substr(9, 6) == "const "))
87 qualifier += "const ";
88 if (arg.substr(0, 9) == "volatile " || (arg.length() > 14 && arg.substr(6, 9) == "volatile "))
89 qualifier += "volatile ";
90 return qualifier + fnTypeNormalizer(arg);
91}
92
93using AnglePos = std::pair<std::string::size_type, std::string::size_type>;
94std::vector<AnglePos> FindTemplateAngleBrackets(const std::string &typeName)
95{
96 std::vector<AnglePos> result;
97 std::string::size_type currentPos = 0;
98 while (currentPos < typeName.size()) {
99 const auto posOpen = typeName.find('<', currentPos);
100 if (posOpen == std::string::npos) {
101 // If there are no more templates, the function is done.
102 break;
103 }
104
105 auto posClose = posOpen + 1;
106 int level = 1;
107 while (posClose < typeName.size()) {
108 const auto c = typeName[posClose];
109 if (c == '<') {
110 level++;
111 } else if (c == '>') {
112 if (level == 1) {
113 break;
114 }
115 level--;
116 }
117 posClose++;
118 }
119 // We should have found a closing angle bracket at the right level.
120 R__ASSERT(posClose < typeName.size());
121 result.emplace_back(posOpen, posClose);
122
123 // If we are not at the end yet, the following two characeters should be :: for nested types.
124 if (posClose < typeName.size() - 1) {
125 R__ASSERT(typeName.substr(posClose + 1, 2) == "::");
126 }
127 currentPos = posClose + 1;
128 }
129
130 return result;
131}
132
133template <typename F>
135{
137 R__ASSERT(!angleBrackets.empty());
138
139 std::string normName;
140 std::string::size_type currentPos = 0;
141 for (std::size_t i = 0; i < angleBrackets.size(); i++) {
142 const auto [posOpen, posClose] = angleBrackets[i];
143 // Append the type prefix until the open angle bracket.
145
146 const auto argList = templatedTypeName.substr(posOpen + 1, posClose - posOpen - 1);
148 R__ASSERT(!templateArgs.empty());
149
151 for (const auto &a : templateArgs) {
153 }
154
155 normName[normName.size() - 1] = '>';
156 currentPos = posClose + 1;
157 }
158
159 // Append the rest of the type from the last closing angle bracket.
160 const auto lastClosePos = angleBrackets.back().second;
162
164}
165
166} // namespace
167
168std::string ROOT::Internal::GetCanonicalTypePrefix(const std::string &typeName)
169{
170 std::string canonicalType{TClassEdit::CleanType(typeName.c_str(), /*mode=*/1)};
171 if (canonicalType.substr(0, 7) == "struct ") {
172 canonicalType.erase(0, 7);
173 } else if (canonicalType.substr(0, 5) == "enum ") {
174 canonicalType.erase(0, 5);
175 } else if (canonicalType.substr(0, 2) == "::") {
176 canonicalType.erase(0, 2);
177 }
178
179 // TClassEdit::CleanType inserts blanks between closing angle brackets, as they were required before C++11. We want
180 // to remove them for RNTuple.
181 auto angle = canonicalType.find('<');
182 if (angle != std::string::npos) {
183 auto dst = canonicalType.begin() + angle;
184 auto end = canonicalType.end();
185 for (auto src = dst; src != end; ++src) {
186 if (*src == ' ') {
187 auto next = src + 1;
188 if (next != end && *next == '>') {
189 // Skip this space before a closing angle bracket.
190 continue;
191 }
192 }
193 *(dst++) = *src;
194 }
195 canonicalType.erase(dst, end);
196 }
197
198 if (canonicalType.substr(0, 6) == "array<") {
199 canonicalType = "std::" + canonicalType;
200 } else if (canonicalType.substr(0, 7) == "atomic<") {
201 canonicalType = "std::" + canonicalType;
202 } else if (canonicalType.substr(0, 7) == "bitset<") {
203 canonicalType = "std::" + canonicalType;
204 } else if (canonicalType.substr(0, 4) == "map<") {
205 canonicalType = "std::" + canonicalType;
206 } else if (canonicalType.substr(0, 9) == "multimap<") {
207 canonicalType = "std::" + canonicalType;
208 } else if (canonicalType.substr(0, 9) == "multiset<") {
209 canonicalType = "std::" + canonicalType;
210 }
211 if (canonicalType.substr(0, 5) == "pair<") {
212 canonicalType = "std::" + canonicalType;
213 } else if (canonicalType.substr(0, 4) == "set<") {
214 canonicalType = "std::" + canonicalType;
215 } else if (canonicalType.substr(0, 6) == "tuple<") {
216 canonicalType = "std::" + canonicalType;
217 } else if (canonicalType.substr(0, 11) == "unique_ptr<") {
218 canonicalType = "std::" + canonicalType;
219 } else if (canonicalType.substr(0, 14) == "unordered_map<") {
220 canonicalType = "std::" + canonicalType;
221 } else if (canonicalType.substr(0, 19) == "unordered_multimap<") {
222 canonicalType = "std::" + canonicalType;
223 } else if (canonicalType.substr(0, 19) == "unordered_multiset<") {
224 canonicalType = "std::" + canonicalType;
225 } else if (canonicalType.substr(0, 14) == "unordered_set<") {
226 canonicalType = "std::" + canonicalType;
227 } else if (canonicalType.substr(0, 8) == "variant<") {
228 canonicalType = "std::" + canonicalType;
229 } else if (canonicalType.substr(0, 7) == "vector<") {
230 canonicalType = "std::" + canonicalType;
231 } else if (canonicalType.substr(0, 11) == "ROOT::RVec<") {
232 canonicalType = "ROOT::VecOps::RVec<" + canonicalType.substr(11);
233 }
234
235 if (auto it = typeTranslationMap.find(canonicalType); it != typeTranslationMap.end()) {
236 canonicalType = it->second;
237 }
238
239 // Map fundamental integer types to stdint integer types (e.g. int --> std::int32_t)
240 if (canonicalType == "signed char") {
242 } else if (canonicalType == "unsigned char") {
244 } else if (canonicalType == "short" || canonicalType == "short int" || canonicalType == "signed short" ||
245 canonicalType == "signed short int") {
247 } else if (canonicalType == "unsigned short" || canonicalType == "unsigned short int") {
249 } else if (canonicalType == "int" || canonicalType == "signed" || canonicalType == "signed int") {
251 } else if (canonicalType == "unsigned" || canonicalType == "unsigned int") {
253 } else if (canonicalType == "long" || canonicalType == "long int" || canonicalType == "signed long" ||
254 canonicalType == "signed long int") {
256 } else if (canonicalType == "unsigned long" || canonicalType == "unsigned long int") {
258 } else if (canonicalType == "long long" || canonicalType == "long long int" || canonicalType == "signed long long" ||
259 canonicalType == "signed long long int") {
261 } else if (canonicalType == "unsigned long long" || canonicalType == "unsigned long long int") {
263 }
264
265 return canonicalType;
266}
267
269{
271 // RNTuple resolves Double32_t for the normalized type name but keeps Double32_t for the type alias
272 // (also in template parameters)
273 if (canonicalTypePrefix == "Double32_t")
274 return "double";
275
276 if (canonicalTypePrefix.find('<') == std::string::npos) {
277 // If there are no templates, the function is done.
278 return canonicalTypePrefix;
279 }
280
281 std::string normName{canonicalTypePrefix};
282 NormalizeTemplateArguments(normName, GetRenormalizedTypeName);
283
284 return normName;
285}
286
289{
291 if (canonicalTypePrefix.find('<') == std::string::npos) {
292 // If there are no templates, the function is done.
293 return false;
294 }
295
296 bool result = false;
298 auto fnCheckLong64 = [&](const std::string &arg) -> std::string {
299 if ((arg == "Long64_t" || arg == "ULong64_t") && hasTemplatedUserClassParent) {
300 result = true;
301 return arg;
302 }
303
304 std::string renormalizedArgAlias;
306 result = true;
308 }
309
310 return GetRenormalizedTypeName(arg);
311 };
312
315
316 return result;
317}
318
320{
324 std::string canonicalTypePrefix;
327
328 if (canonicalTypePrefix.find('<') == std::string::npos) {
329 // If there are no templates, the function is done.
330 return canonicalTypePrefix;
331 }
332
334 R__ASSERT(!angleBrackets.empty());
335
336 // For user-defined class types, we will need to get the default-initialized template arguments.
338
339 std::string normName;
340 std::string::size_type currentPos = 0;
341 for (std::size_t i = 0; i < angleBrackets.size(); i++) {
342 const auto [posOpen, posClose] = angleBrackets[i];
343 // Append the type prefix until the open angle bracket.
345
346 const auto argList = canonicalTypePrefix.substr(posOpen + 1, posClose - posOpen - 1);
347 const auto templateArgs = TokenizeTypeList(argList);
348 R__ASSERT(!templateArgs.empty());
349
350 for (const auto &a : templateArgs) {
352 }
353
354 // For user-defined classes, append default-initialized template arguments.
355 if (isUserClass) {
356 const auto cl = TClass::GetClass(canonicalTypePrefix.substr(0, posClose + 1).c_str());
357 if (cl) {
358 const std::string expandedName = cl->GetName();
360 // We can have fewer pairs than angleBrackets, for example in case of type aliases.
362
364 const auto expandedArgList =
368
369 for (std::size_t j = templateArgs.size(); j < expandedTemplateArgs.size(); ++j) {
370 normName +=
372 }
373 }
374 }
375
376 normName[normName.size() - 1] = '>';
377 currentPos = posClose + 1;
378 }
379
380 // Append the rest of the type from the last closing angle bracket.
381 const auto lastClosePos = angleBrackets.back().second;
383
384 return normName;
385}
386
387std::string ROOT::Internal::GetNormalizedInteger(long long val)
388{
389 return std::to_string(val);
390}
391
392std::string ROOT::Internal::GetNormalizedInteger(unsigned long long val)
393{
394 if (val > std::numeric_limits<std::int64_t>::max())
395 return std::to_string(val) + "u";
396 return std::to_string(val);
397}
398
406
407long long ROOT::Internal::ParseIntTypeToken(const std::string &intToken)
408{
409 std::size_t nChars = 0;
410 long long res = std::stoll(intToken, &nChars);
411 if (nChars == intToken.size())
412 return res;
413
414 assert(nChars < intToken.size());
415 if (nChars == 0) {
416 throw RException(R__FAIL("invalid integer type token: " + intToken));
417 }
418
419 auto suffix = intToken.substr(nChars);
420 std::transform(suffix.begin(), suffix.end(), suffix.begin(), ::toupper);
421 if (suffix == "L" || suffix == "LL")
422 return res;
423 if (res >= 0 && (suffix == "U" || suffix == "UL" || suffix == "ULL"))
424 return res;
425
426 throw RException(R__FAIL("invalid integer type token: " + intToken));
427}
428
429unsigned long long ROOT::Internal::ParseUIntTypeToken(const std::string &uintToken)
430{
431 std::size_t nChars = 0;
432 unsigned long long res = std::stoull(uintToken, &nChars);
433 if (nChars == uintToken.size())
434 return res;
435
436 assert(nChars < uintToken.size());
437 if (nChars == 0) {
438 throw RException(R__FAIL("invalid integer type token: " + uintToken));
439 }
440
441 auto suffix = uintToken.substr(nChars);
442 std::transform(suffix.begin(), suffix.end(), suffix.begin(), ::toupper);
443 if (suffix == "U" || suffix == "L" || suffix == "LL" || suffix == "UL" || suffix == "ULL")
444 return res;
445
446 throw RException(R__FAIL("invalid integer type token: " + uintToken));
447}
448
450{
451 auto am = cl->GetAttributeMap();
452 if (!am || !am->HasKey("rntuple.streamerMode"))
453 return ERNTupleSerializationMode::kUnset;
454
455 std::string value = am->GetPropertyAsString("rntuple.streamerMode");
456 std::transform(value.begin(), value.end(), value.begin(), ::toupper);
457 if (value == "TRUE") {
458 return ERNTupleSerializationMode::kForceStreamerMode;
459 } else if (value == "FALSE") {
460 return ERNTupleSerializationMode::kForceNativeMode;
461 } else {
462 R__LOG_WARNING(ROOT::Internal::NTupleLog()) << "invalid setting for 'rntuple.streamerMode' class attribute: "
463 << am->GetPropertyAsString("rntuple.streamerMode");
464 return ERNTupleSerializationMode::kUnset;
465 }
466}
467
468std::tuple<std::string, std::vector<std::size_t>> ROOT::Internal::ParseArrayType(const std::string &typeName)
469{
470 std::vector<std::size_t> sizeVec;
471
472 // Only parse outer array definition, i.e. the right `]` should be at the end of the type name
473 std::string prefix{typeName};
474 while (prefix.back() == ']') {
475 auto posRBrace = prefix.size() - 1;
476 auto posLBrace = prefix.rfind('[', posRBrace);
477 if (posLBrace == std::string_view::npos) {
478 throw RException(R__FAIL(std::string("invalid array type: ") + typeName));
479 }
480
481 const std::size_t size = ParseUIntTypeToken(prefix.substr(posLBrace + 1, posRBrace - posLBrace - 1));
482 if (size == 0) {
483 throw RException(R__FAIL(std::string("invalid array size: ") + typeName));
484 }
485
486 sizeVec.insert(sizeVec.begin(), size);
487 prefix.resize(posLBrace);
488 }
489 return std::make_tuple(prefix, sizeVec);
490}
491
492std::vector<std::string> ROOT::Internal::TokenizeTypeList(std::string_view templateType)
493{
494 std::vector<std::string> result;
495 if (templateType.empty())
496 return result;
497
498 const char *eol = templateType.data() + templateType.length();
499 const char *typeBegin = templateType.data();
500 const char *typeCursor = templateType.data();
501 unsigned int nestingLevel = 0;
502 while (typeCursor != eol) {
503 switch (*typeCursor) {
504 case '<': ++nestingLevel; break;
505 case '>': --nestingLevel; break;
506 case ',':
507 if (nestingLevel == 0) {
508 result.push_back(std::string(typeBegin, typeCursor - typeBegin));
509 typeBegin = typeCursor + 1;
510 }
511 break;
512 }
513 typeCursor++;
514 }
515 result.push_back(std::string(typeBegin, typeCursor - typeBegin));
516 return result;
517}
518
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:299
#define R__LOG_WARNING(...)
Definition RLogger.hxx:358
#define c(i)
Definition RSha256.hxx:101
#define a(i)
Definition RSha256.hxx:99
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Definition TError.h:125
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
Option_t Option_t TPoint TPoint angle
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t src
Base class for all ROOT issued exceptions.
Definition RError.hxx:79
Classes with dictionaries that can be inspected by TClass.
Definition RField.hxx:288
const_iterator begin() const
const_iterator end() const
TClass instances represent classes, structs and namespaces in the ROOT type system.
Definition TClass.h:84
static TClass * GetClass(const char *name, Bool_t load=kTRUE, Bool_t silent=kFALSE)
Static method returning pointer to TClass of the specified class name.
Definition TClass.cxx:3074
TDictAttributeMap * GetAttributeMap() const
ERNTupleSerializationMode
Possible settings for the "rntuple.streamerMode" class attribute in the dictionary.
std::tuple< std::string, std::vector< std::size_t > > ParseArrayType(const std::string &typeName)
Parse a type name of the form T[n][m]... and return the base type T and a vector that contains,...
ROOT::RLogChannel & NTupleLog()
Log channel for RNTuple diagnostics.
unsigned long long ParseUIntTypeToken(const std::string &uintToken)
std::string GetNormalizedInteger(const std::string &intTemplateArg)
Appends 'll' or 'ull' to the where necessary and strips the suffix if not needed.
bool NeedsMetaNameAsAlias(const std::string &metaNormalizedName, std::string &renormalizedAlias, bool isArgInTemplatedUserClass=false)
Checks if the meta normalized name is different from the RNTuple normalized name in a way that would ...
ERNTupleSerializationMode GetRNTupleSerializationMode(TClass *cl)
std::string GetCanonicalTypePrefix(const std::string &typeName)
Applies RNTuple specific type name normalization rules (see specs) that help the string parsing in RF...
std::string GetNormalizedUnresolvedTypeName(const std::string &origName)
Applies all RNTuple type normalization rules except typedef resolution.
std::string GetRenormalizedDemangledTypeName(const std::type_info &ti)
Given a type info ask ROOT meta to demangle it, then renormalize the resulting type name for RNTuple.
std::string GetRenormalizedTypeName(const std::string &metaNormalizedName)
Given a type name normalized by ROOT meta, renormalize it for RNTuple. E.g., insert std::prefix.
std::vector< std::string > TokenizeTypeList(std::string_view templateType)
Used in RFieldBase::Create() in order to get the comma-separated list of template types E....
long long ParseIntTypeToken(const std::string &intToken)
std::string GetDemangledTypeName(const std::type_info &t)
std::string CleanType(const char *typeDesc, int mode=0, const char **tail=nullptr)
Cleanup type description, redundant blanks removed and redundant tail ignored return *tail = pointer ...
@ kDropComparator
Definition TClassEdit.h:84
@ kDropStlDefault
Definition TClassEdit.h:83