Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTupleDescriptor.cxx
Go to the documentation of this file.
1/// \file RNTupleDescriptor.cxx
2/// \ingroup NTuple
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \author Javier Lopez-Gomez <javier.lopez.gomez@cern.ch>
5/// \date 2018-10-04
6
7/*************************************************************************
8 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
9 * All rights reserved. *
10 * *
11 * For the licensing terms see $ROOTSYS/LICENSE. *
12 * For the list of contributors see $ROOTSYS/README/CREDITS. *
13 *************************************************************************/
14
15#include <ROOT/RError.hxx>
16#include <ROOT/RFieldBase.hxx>
17#include <ROOT/RNTuple.hxx>
19#include <ROOT/RNTupleModel.hxx>
20#include <ROOT/RNTupleUtil.hxx>
21#include <ROOT/RPage.hxx>
22#include <string_view>
23
24#include <RZip.h>
25#include <TError.h>
27
28#include <algorithm>
29#include <cstdint>
30#include <deque>
31#include <functional>
32#include <iostream>
33#include <set>
34#include <utility>
35
37
39{
40 return fFieldId == other.fFieldId && fFieldVersion == other.fFieldVersion && fTypeVersion == other.fTypeVersion &&
41 fFieldName == other.fFieldName && fFieldDescription == other.fFieldDescription &&
42 fTypeName == other.fTypeName && fTypeAlias == other.fTypeAlias && fNRepetitions == other.fNRepetitions &&
43 fStructure == other.fStructure && fParentId == other.fParentId &&
44 fProjectionSourceId == other.fProjectionSourceId && fLinkIds == other.fLinkIds &&
45 fLogicalColumnIds == other.fLogicalColumnIds && other.fTypeChecksum == other.fTypeChecksum;
46}
47
49{
50 RFieldDescriptor clone;
51 clone.fFieldId = fFieldId;
52 clone.fFieldVersion = fFieldVersion;
53 clone.fTypeVersion = fTypeVersion;
54 clone.fFieldName = fFieldName;
55 clone.fFieldDescription = fFieldDescription;
56 clone.fTypeName = fTypeName;
57 clone.fTypeAlias = fTypeAlias;
58 clone.fNRepetitions = fNRepetitions;
59 clone.fStructure = fStructure;
60 clone.fParentId = fParentId;
61 clone.fProjectionSourceId = fProjectionSourceId;
62 clone.fLinkIds = fLinkIds;
63 clone.fColumnCardinality = fColumnCardinality;
64 clone.fLogicalColumnIds = fLogicalColumnIds;
65 clone.fTypeChecksum = fTypeChecksum;
66 return clone;
67}
68
69std::unique_ptr<ROOT::RFieldBase>
71{
72 if (GetStructure() == ROOT::ENTupleStructure::kStreamer) {
73 auto streamerField = std::make_unique<ROOT::RStreamerField>(GetFieldName(), GetTypeName());
74 streamerField->SetOnDiskId(fFieldId);
75 return streamerField;
76 }
77
78 // The structure may be unknown if the descriptor comes from a deserialized field with an unknown structural role.
79 // For forward compatibility, we allow this case and return an InvalidField.
80 if (GetStructure() == ROOT::ENTupleStructure::kUnknown) {
81 if (options.GetReturnInvalidOnError()) {
82 auto invalidField = std::make_unique<ROOT::RInvalidField>(GetFieldName(), GetTypeName(), "",
84 invalidField->SetOnDiskId(fFieldId);
85 return invalidField;
86 } else {
87 throw RException(R__FAIL("unexpected on-disk field structure value for field \"" + GetFieldName() + "\""));
88 }
89 }
90
91 // Untyped records and collections
92 if (GetTypeName().empty()) {
93 switch (GetStructure()) {
95 std::vector<std::unique_ptr<ROOT::RFieldBase>> memberFields;
96 memberFields.reserve(fLinkIds.size());
97 for (auto id : fLinkIds) {
98 const auto &memberDesc = ntplDesc.GetFieldDescriptor(id);
99 auto field = memberDesc.CreateField(ntplDesc, options);
101 return field;
102 memberFields.emplace_back(std::move(field));
103 }
104 auto recordField = std::make_unique<ROOT::RRecordField>(GetFieldName(), std::move(memberFields));
105 recordField->SetOnDiskId(fFieldId);
106 return recordField;
107 }
109 if (fLinkIds.size() != 1) {
110 throw RException(R__FAIL("unsupported untyped collection for field \"" + GetFieldName() + "\""));
111 }
112 auto itemField = ntplDesc.GetFieldDescriptor(fLinkIds[0]).CreateField(ntplDesc, options);
114 return itemField;
115 auto collectionField = ROOT::RVectorField::CreateUntyped(GetFieldName(), std::move(itemField));
116 collectionField->SetOnDiskId(fFieldId);
117 return collectionField;
118 }
119 default: throw RException(R__FAIL("unsupported untyped field structure for field \"" + GetFieldName() + "\""));
120 }
121 }
122
123 try {
124 const auto &fieldName = GetFieldName();
125 const auto &typeName = GetTypeAlias().empty() ? GetTypeName() : GetTypeAlias();
126 // NOTE: Unwrap() here may throw an exception, hence the try block.
127 // If options.fReturnInvalidOnError is false we just rethrow it, otherwise we return an InvalidField wrapping the
128 // error.
129 auto field = ROOT::Internal::CallFieldBaseCreate(fieldName, typeName, options, &ntplDesc, fFieldId).Unwrap();
130 field->SetOnDiskId(fFieldId);
131
132 for (auto &subfield : *field) {
133 const auto subfieldId = ntplDesc.FindFieldId(subfield.GetFieldName(), subfield.GetParent()->GetOnDiskId());
134 subfield.SetOnDiskId(subfieldId);
136 auto &invalidField = static_cast<ROOT::RInvalidField &>(subfield);
137 // A subfield being invalid "infects" its entire ancestry.
138 return invalidField.Clone(fieldName);
139 }
140 }
141
142 return field;
143 } catch (const RException &ex) {
144 if (options.GetReturnInvalidOnError())
145 return std::make_unique<ROOT::RInvalidField>(GetFieldName(), GetTypeName(), ex.GetError().GetReport(),
147 else
148 throw ex;
149 }
150}
151
153{
155 return false;
156
157 // Skip untyped structs
158 if (fTypeName.empty())
159 return false;
160
161 if (fStructure == ROOT::ENTupleStructure::kRecord) {
162 if (fTypeName.compare(0, 10, "std::pair<") == 0)
163 return false;
164 if (fTypeName.compare(0, 11, "std::tuple<") == 0)
165 return false;
166 }
167
168 return true;
169}
170
171////////////////////////////////////////////////////////////////////////////////
172
174{
175 return fLogicalColumnId == other.fLogicalColumnId && fPhysicalColumnId == other.fPhysicalColumnId &&
176 fBitsOnStorage == other.fBitsOnStorage && fType == other.fType && fFieldId == other.fFieldId &&
177 fIndex == other.fIndex && fRepresentationIndex == other.fRepresentationIndex &&
178 fValueRange == other.fValueRange;
179}
180
182{
183 RColumnDescriptor clone;
184 clone.fLogicalColumnId = fLogicalColumnId;
185 clone.fPhysicalColumnId = fPhysicalColumnId;
186 clone.fBitsOnStorage = fBitsOnStorage;
187 clone.fType = fType;
188 clone.fFieldId = fFieldId;
189 clone.fIndex = fIndex;
190 clone.fFirstElementIndex = fFirstElementIndex;
191 clone.fRepresentationIndex = fRepresentationIndex;
192 clone.fValueRange = fValueRange;
193 return clone;
194}
195
196////////////////////////////////////////////////////////////////////////////////
197
200{
201 const auto N = fCumulativeNElements.size();
202 R__ASSERT(N > 0);
203 R__ASSERT(N == fPageInfos.size());
204
205 std::size_t left = 0;
206 std::size_t right = N - 1;
207 std::size_t midpoint = N;
208 while (left <= right) {
209 midpoint = (left + right) / 2;
210 if (fCumulativeNElements[midpoint] <= idxInCluster) {
211 left = midpoint + 1;
212 continue;
213 }
214
215 if ((midpoint == 0) || (fCumulativeNElements[midpoint - 1] <= idxInCluster))
216 break;
217
218 right = midpoint - 1;
219 }
221
222 auto pageInfo = fPageInfos[midpoint];
223 decltype(idxInCluster) firstInPage = (midpoint == 0) ? 0 : fCumulativeNElements[midpoint - 1];
225 R__ASSERT((firstInPage + pageInfo.GetNElements()) > idxInCluster);
227}
228
229std::size_t
232 std::size_t pageSize)
233{
234 R__ASSERT(fPhysicalColumnId == columnRange.GetPhysicalColumnId());
235 R__ASSERT(!columnRange.IsSuppressed());
236
237 const auto nElements =
238 std::accumulate(fPageInfos.begin(), fPageInfos.end(), 0U,
239 [](std::size_t n, const auto &pageInfo) { return n + pageInfo.GetNElements(); });
240 const auto nElementsRequired = static_cast<std::uint64_t>(columnRange.GetNElements());
241
243 return 0U;
244 R__ASSERT((nElementsRequired > nElements) && "invalid attempt to shrink RPageRange");
245
246 std::vector<RPageInfo> pageInfos;
247 // Synthesize new `RPageInfo`s as needed
248 const std::uint64_t nElementsPerPage = pageSize / element.GetSize();
252 pageInfo.SetNElements(std::min(nElementsPerPage, nRemainingElements));
255 locator.SetNBytesOnStorage(element.GetPackedSize(pageInfo.GetNElements()));
256 pageInfo.SetLocator(locator);
257 pageInfos.emplace_back(pageInfo);
258 nRemainingElements -= pageInfo.GetNElements();
259 }
260
261 pageInfos.insert(pageInfos.end(), std::make_move_iterator(fPageInfos.begin()),
262 std::make_move_iterator(fPageInfos.end()));
263 std::swap(fPageInfos, pageInfos);
265}
266
268{
269 return fClusterId == other.fClusterId && fFirstEntryIndex == other.fFirstEntryIndex &&
270 fNEntries == other.fNEntries && fColumnRanges == other.fColumnRanges && fPageRanges == other.fPageRanges;
271}
272
274{
275 std::uint64_t nbytes = 0;
276 for (const auto &pr : fPageRanges) {
277 for (const auto &pi : pr.second.GetPageInfos()) {
278 nbytes += pi.GetLocator().GetNBytesOnStorage();
279 }
280 }
281 return nbytes;
282}
283
285{
286 RClusterDescriptor clone;
287 clone.fClusterId = fClusterId;
288 clone.fFirstEntryIndex = fFirstEntryIndex;
289 clone.fNEntries = fNEntries;
290 clone.fColumnRanges = fColumnRanges;
291 for (const auto &d : fPageRanges)
292 clone.fPageRanges.emplace(d.first, d.second.Clone());
293 return clone;
294}
295
296////////////////////////////////////////////////////////////////////////////////
297
299{
300 return fContentId == other.fContentId && fTypeName == other.fTypeName && fTypeVersion == other.fTypeVersion;
301}
302
304{
306 clone.fContentId = fContentId;
307 clone.fTypeVersion = fTypeVersion;
308 clone.fTypeName = fTypeName;
309 clone.fContent = fContent;
310 return clone;
311}
312
313////////////////////////////////////////////////////////////////////////////////
314
316{
317 // clang-format off
318 return fName == other.fName &&
319 fDescription == other.fDescription &&
320 fNEntries == other.fNEntries &&
321 fGeneration == other.fGeneration &&
322 fFieldZeroId == other.fFieldZeroId &&
323 fFieldDescriptors == other.fFieldDescriptors &&
324 fColumnDescriptors == other.fColumnDescriptors &&
325 fClusterGroupDescriptors == other.fClusterGroupDescriptors &&
326 fClusterDescriptors == other.fClusterDescriptors;
327 // clang-format on
328}
329
331{
333 for (const auto &cd : fClusterDescriptors) {
334 if (!cd.second.ContainsColumn(physicalColumnId))
335 continue;
336 auto columnRange = cd.second.GetColumnRange(physicalColumnId);
337 result = std::max(result, columnRange.GetFirstElementIndex() + columnRange.GetNElements());
338 }
339 return result;
340}
341
344{
345 std::string leafName(fieldName);
346 auto posDot = leafName.find_last_of('.');
347 if (posDot != std::string::npos) {
348 auto parentName = leafName.substr(0, posDot);
349 leafName = leafName.substr(posDot + 1);
350 parentId = FindFieldId(parentName, parentId);
351 }
352 auto itrFieldDesc = fFieldDescriptors.find(parentId);
353 if (itrFieldDesc == fFieldDescriptors.end())
355 for (const auto linkId : itrFieldDesc->second.GetLinkIds()) {
356 if (fFieldDescriptors.at(linkId).GetFieldName() == leafName)
357 return linkId;
358 }
360}
361
363{
365 return "";
366
367 const auto &fieldDescriptor = fFieldDescriptors.at(fieldId);
368 auto prefix = GetQualifiedFieldName(fieldDescriptor.GetParentId());
369 if (prefix.empty())
370 return fieldDescriptor.GetFieldName();
371 return prefix + "." + fieldDescriptor.GetFieldName();
372}
373
375{
376 std::string typeName = fieldDesc.GetTypeName();
377
378 // ROOT v6.34, with spec versions before 1.0.0.1, did not properly renormalize the type name.
379 R__ASSERT(fVersionEpoch == 1);
380 if (fVersionMajor == 0 && fVersionMinor == 0 && fVersionPatch < 1) {
381 typeName = ROOT::Internal::GetRenormalizedTypeName(typeName);
382 }
383
384 return typeName;
385}
386
388{
389 return FindFieldId(fieldName, GetFieldZeroId());
390}
391
393 std::uint32_t columnIndex,
394 std::uint16_t representationIndex) const
395{
396 auto itr = fFieldDescriptors.find(fieldId);
397 if (itr == fFieldDescriptors.cend())
399 if (columnIndex >= itr->second.GetColumnCardinality())
401 const auto idx = representationIndex * itr->second.GetColumnCardinality() + columnIndex;
402 if (itr->second.GetLogicalColumnIds().size() <= idx)
404 return itr->second.GetLogicalColumnIds()[idx];
405}
406
408 std::uint32_t columnIndex,
409 std::uint16_t representationIndex) const
410{
411 auto logicalId = FindLogicalColumnId(fieldId, columnIndex, representationIndex);
414 return GetColumnDescriptor(logicalId).GetPhysicalId();
415}
416
419{
420 if (GetNClusterGroups() == 0)
422
423 // Binary search in the cluster group list, followed by a binary search in the clusters of that cluster group
424
425 std::size_t cgLeft = 0;
426 std::size_t cgRight = GetNClusterGroups() - 1;
427 while (cgLeft <= cgRight) {
428 const std::size_t cgMidpoint = (cgLeft + cgRight) / 2;
429 const auto &clusterIds = GetClusterGroupDescriptor(fSortedClusterGroupIds[cgMidpoint]).GetClusterIds();
430 R__ASSERT(!clusterIds.empty());
431
432 const auto &clusterDesc = GetClusterDescriptor(clusterIds.front());
433 // this may happen if the RNTuple has an empty schema
434 if (!clusterDesc.ContainsColumn(physicalColumnId))
436
437 const auto firstElementInGroup = clusterDesc.GetColumnRange(physicalColumnId).GetFirstElementIndex();
439 // Look into the lower half of cluster groups
441 cgRight = cgMidpoint - 1;
442 continue;
443 }
444
445 const auto &lastColumnRange = GetClusterDescriptor(clusterIds.back()).GetColumnRange(physicalColumnId);
446 if ((lastColumnRange.GetFirstElementIndex() + lastColumnRange.GetNElements()) <= index) {
447 // Look into the upper half of cluster groups
448 cgLeft = cgMidpoint + 1;
449 continue;
450 }
451
452 // Binary search in the current cluster group; since we already checked the element range boundaries,
453 // the element must be in that cluster group.
454 std::size_t clusterLeft = 0;
455 std::size_t clusterRight = clusterIds.size() - 1;
456 while (clusterLeft <= clusterRight) {
457 const std::size_t clusterMidpoint = (clusterLeft + clusterRight) / 2;
459 const auto &columnRange = GetClusterDescriptor(clusterId).GetColumnRange(physicalColumnId);
460
461 if (columnRange.Contains(index))
462 return clusterId;
463
464 if (columnRange.GetFirstElementIndex() > index) {
467 continue;
468 }
469
470 if (columnRange.GetFirstElementIndex() + columnRange.GetNElements() <= index) {
472 continue;
473 }
474 }
475 R__ASSERT(false);
476 }
478}
479
481{
482 if (GetNClusterGroups() == 0)
484
485 // Binary search in the cluster group list, followed by a binary search in the clusters of that cluster group
486
487 std::size_t cgLeft = 0;
488 std::size_t cgRight = GetNClusterGroups() - 1;
489 while (cgLeft <= cgRight) {
490 const std::size_t cgMidpoint = (cgLeft + cgRight) / 2;
491 const auto &cgDesc = GetClusterGroupDescriptor(fSortedClusterGroupIds[cgMidpoint]);
492
493 if (cgDesc.GetMinEntry() > entryIdx) {
495 cgRight = cgMidpoint - 1;
496 continue;
497 }
498
499 if (cgDesc.GetMinEntry() + cgDesc.GetEntrySpan() <= entryIdx) {
500 cgLeft = cgMidpoint + 1;
501 continue;
502 }
503
504 // Binary search in the current cluster group; since we already checked the element range boundaries,
505 // the element must be in that cluster group.
506 const auto &clusterIds = cgDesc.GetClusterIds();
507 R__ASSERT(!clusterIds.empty());
508 std::size_t clusterLeft = 0;
509 std::size_t clusterRight = clusterIds.size() - 1;
510 while (clusterLeft <= clusterRight) {
511 const std::size_t clusterMidpoint = (clusterLeft + clusterRight) / 2;
512 const auto &clusterDesc = GetClusterDescriptor(clusterIds[clusterMidpoint]);
513
514 if (clusterDesc.GetFirstEntryIndex() > entryIdx) {
517 continue;
518 }
519
520 if (clusterDesc.GetFirstEntryIndex() + clusterDesc.GetNEntries() <= entryIdx) {
522 continue;
523 }
524
526 }
527 R__ASSERT(false);
528 }
530}
531
533{
534 // TODO(jblomer): we may want to shortcut the common case and check if clusterId + 1 contains
535 // firstEntryInNextCluster. This shortcut would currently always trigger. We do not want, however, to depend
536 // on the linearity of the descriptor IDs, so we should only enable the shortcut if we can ensure that the
537 // binary search code path remains tested.
538 const auto &clusterDesc = GetClusterDescriptor(clusterId);
539 const auto firstEntryInNextCluster = clusterDesc.GetFirstEntryIndex() + clusterDesc.GetNEntries();
540 return FindClusterId(firstEntryInNextCluster);
541}
542
544{
545 // TODO(jblomer): we may want to shortcut the common case and check if clusterId - 1 contains
546 // firstEntryInNextCluster. This shortcut would currently always trigger. We do not want, however, to depend
547 // on the linearity of the descriptor IDs, so we should only enable the shortcut if we can ensure that the
548 // binary search code path remains tested.
549 const auto &clusterDesc = GetClusterDescriptor(clusterId);
550 if (clusterDesc.GetFirstEntryIndex() == 0)
552 return FindClusterId(clusterDesc.GetFirstEntryIndex() - 1);
553}
554
555std::vector<ROOT::DescriptorId_t>
557{
558 auto fieldZeroId = desc.GetFieldZeroId();
559
560 std::vector<ROOT::DescriptorId_t> fields;
561 for (const auto fieldId : fFieldIdsOrder) {
562 if (desc.GetFieldDescriptor(fieldId).GetParentId() == fieldZeroId)
563 fields.emplace_back(fieldId);
564 }
565 return fields;
566}
567
573
575 : fNTuple(ntuple)
576{
577 std::deque<ROOT::DescriptorId_t> fieldIdQueue{ntuple.GetFieldZeroId()};
578
579 while (!fieldIdQueue.empty()) {
580 auto currFieldId = fieldIdQueue.front();
581 fieldIdQueue.pop_front();
582
583 const auto &columns = ntuple.GetFieldDescriptor(currFieldId).GetLogicalColumnIds();
584 fColumns.insert(fColumns.end(), columns.begin(), columns.end());
585
586 for (const auto &field : ntuple.GetFieldIterable(currFieldId)) {
587 auto fieldId = field.GetId();
588 fieldIdQueue.push_back(fieldId);
589 }
590 }
591}
592
593std::vector<std::uint64_t> ROOT::RNTupleDescriptor::GetFeatureFlags() const
594{
595 std::vector<std::uint64_t> result;
596 unsigned int base = 0;
597 std::uint64_t flags = 0;
598 for (auto f : fFeatureFlags) {
599 if ((f > 0) && ((f % 64) == 0))
600 throw RException(R__FAIL("invalid feature flag: " + std::to_string(f)));
601 while (f > base + 64) {
602 result.emplace_back(flags);
603 flags = 0;
604 base += 64;
605 }
606 f -= base;
607 flags |= 1 << f;
608 }
609 result.emplace_back(flags);
610 return result;
611}
612
614 std::vector<RClusterDescriptor> &clusterDescs)
615{
617 if (iter == fClusterGroupDescriptors.end())
618 return R__FAIL("invalid attempt to add details of unknown cluster group");
619 if (iter->second.HasClusterDetails())
620 return R__FAIL("invalid attempt to re-populate cluster group details");
621 if (iter->second.GetNClusters() != clusterDescs.size())
622 return R__FAIL("mismatch of number of clusters");
623
624 std::vector<ROOT::DescriptorId_t> clusterIds;
625 for (unsigned i = 0; i < clusterDescs.size(); ++i) {
626 clusterIds.emplace_back(clusterDescs[i].GetId());
627 auto [_, success] = fClusterDescriptors.emplace(clusterIds.back(), std::move(clusterDescs[i]));
628 if (!success) {
629 return R__FAIL("invalid attempt to re-populate existing cluster");
630 }
631 }
633 return fClusterDescriptors[a].GetFirstEntryIndex() < fClusterDescriptors[b].GetFirstEntryIndex();
634 });
636 cgBuilder.AddSortedClusters(clusterIds);
637 iter->second = cgBuilder.MoveDescriptor().Unwrap();
638 return RResult<void>::Success();
639}
640
642{
644 if (iter == fClusterGroupDescriptors.end())
645 return R__FAIL("invalid attempt to drop cluster details of unknown cluster group");
646 if (!iter->second.HasClusterDetails())
647 return R__FAIL("invalid attempt to drop details of cluster group summary");
648
649 for (auto clusterId : iter->second.GetClusterIds())
651 iter->second = iter->second.CloneSummary();
652 return RResult<void>::Success();
653}
654
655std::unique_ptr<ROOT::RNTupleModel> ROOT::RNTupleDescriptor::CreateModel(const RCreateModelOptions &options) const
656{
657 // Collect all top-level fields that have invalid columns (recursively): by default if we find any we throw an
658 // exception; if we are in ForwardCompatible mode, we proceed but skip of all those top-level fields.
659 std::unordered_set<ROOT::DescriptorId_t> invalidFields;
660 for (const auto &colDesc : GetColumnIterable()) {
662 auto fieldId = colDesc.GetFieldId();
663 while (1) {
664 const auto &field = GetFieldDescriptor(fieldId);
665 if (field.GetParentId() == GetFieldZeroId())
666 break;
667 fieldId = field.GetParentId();
668 }
669 invalidFields.insert(fieldId);
670
671 // No need to look for all invalid fields if we're gonna error out anyway
672 if (!options.GetForwardCompatible())
673 break;
674 }
675 }
676
677 if (!options.GetForwardCompatible() && !invalidFields.empty())
679 "cannot create Model: descriptor contains unknown column types. Use 'SetForwardCompatible(true)' on the "
680 "RCreateModelOptions to create a partial model containing only the fields made up by known columns."));
681
682 auto fieldZero = std::make_unique<ROOT::RFieldZero>();
683 fieldZero->SetOnDiskId(GetFieldZeroId());
684 auto model = options.GetCreateBare() ? RNTupleModel::CreateBare(std::move(fieldZero))
685 : RNTupleModel::Create(std::move(fieldZero));
687 createFieldOpts.SetReturnInvalidOnError(options.GetForwardCompatible());
688 createFieldOpts.SetEmulateUnknownTypes(options.GetEmulateUnknownTypes());
689 for (const auto &topDesc : GetTopLevelFields()) {
690 if (invalidFields.count(topDesc.GetId()) > 0) {
691 // Field contains invalid columns: skip it
692 continue;
693 }
694
695 auto field = topDesc.CreateField(*this, createFieldOpts);
696
697 // If we got an InvalidField here, figure out if it's a hard error or if the field must simply be skipped.
698 // The only case where it's not a hard error is if the field has an unknown structure, as that case is
699 // covered by the ForwardCompatible flag (note that if the flag is off we would not get here
700 // in the first place, so we don't need to check for that flag again).
701 if (field->GetTraits() & ROOT::RFieldBase::kTraitInvalidField) {
702 const auto &invalid = static_cast<const RInvalidField &>(*field);
703 const auto cat = invalid.GetCategory();
705 if (mustThrow)
706 throw invalid.GetError();
707
708 // Not a hard error: skip the field and go on.
709 continue;
710 }
711
712 if (options.GetReconstructProjections() && topDesc.IsProjectedField()) {
713 model->AddProjectedField(std::move(field), [this](const std::string &targetName) -> std::string {
714 return GetQualifiedFieldName(GetFieldDescriptor(FindFieldId(targetName)).GetProjectionSourceId());
715 });
716 } else {
717 model->AddField(std::move(field));
718 }
719 }
720 model->Freeze();
721 return model;
722}
723
725{
726 RNTupleDescriptor clone;
727 clone.fName = fName;
732 // OnDiskHeaderSize, OnDiskHeaderXxHash3 not copied because they may come from a merged header + extension header
733 // and therefore not represent the actual sources's header.
734 // OnDiskFooterSize not copied because it contains information beyond the schema, for example the clustering.
735
736 for (const auto &d : fFieldDescriptors)
737 clone.fFieldDescriptors.emplace(d.first, d.second.Clone());
738 for (const auto &d : fColumnDescriptors)
739 clone.fColumnDescriptors.emplace(d.first, d.second.Clone());
740
741 for (const auto &d : fExtraTypeInfoDescriptors)
742 clone.fExtraTypeInfoDescriptors.emplace_back(d.Clone());
744 clone.fHeaderExtension = std::make_unique<RHeaderExtension>(*fHeaderExtension);
745
746 return clone;
747}
748
750{
752
757
761 clone.fNEntries = fNEntries;
762 clone.fNClusters = fNClusters;
763 clone.fGeneration = fGeneration;
764 for (const auto &d : fClusterGroupDescriptors)
765 clone.fClusterGroupDescriptors.emplace(d.first, d.second.Clone());
767 for (const auto &d : fClusterDescriptors)
768 clone.fClusterDescriptors.emplace(d.first, d.second.Clone());
769 return clone;
770}
771
772////////////////////////////////////////////////////////////////////////////////
773
775{
776 return fClusterGroupId == other.fClusterGroupId && fClusterIds == other.fClusterIds &&
777 fMinEntry == other.fMinEntry && fEntrySpan == other.fEntrySpan && fNClusters == other.fNClusters;
778}
779
781{
783 clone.fClusterGroupId = fClusterGroupId;
784 clone.fPageListLocator = fPageListLocator;
785 clone.fPageListLength = fPageListLength;
786 clone.fMinEntry = fMinEntry;
787 clone.fEntrySpan = fEntrySpan;
788 clone.fNClusters = fNClusters;
789 return clone;
790}
791
793{
794 RClusterGroupDescriptor clone = CloneSummary();
795 clone.fClusterIds = fClusterIds;
796 return clone;
797}
798
799////////////////////////////////////////////////////////////////////////////////
800
803 std::uint64_t firstElementIndex,
804 std::uint32_t compressionSettings,
806{
807 if (physicalId != pageRange.fPhysicalColumnId)
808 return R__FAIL("column ID mismatch");
809 if (fCluster.fColumnRanges.count(physicalId) > 0)
810 return R__FAIL("column ID conflict");
812 for (const auto &pi : pageRange.fPageInfos) {
813 columnRange.IncrementNElements(pi.GetNElements());
814 }
815 fCluster.fPageRanges[physicalId] = pageRange.Clone();
816 fCluster.fColumnRanges[physicalId] = columnRange;
817 return RResult<void>::Success();
818}
819
822{
823 if (fCluster.fColumnRanges.count(physicalId) > 0)
824 return R__FAIL("column ID conflict");
825
827 columnRange.SetPhysicalColumnId(physicalId);
828 columnRange.SetIsSuppressed(true);
829 fCluster.fColumnRanges[physicalId] = columnRange;
830 return RResult<void>::Success();
831}
832
835{
836 for (auto &[_, columnRange] : fCluster.fColumnRanges) {
837 if (!columnRange.IsSuppressed())
838 continue;
839 R__ASSERT(columnRange.GetFirstElementIndex() == ROOT::kInvalidNTupleIndex);
840
841 const auto &columnDesc = desc.GetColumnDescriptor(columnRange.GetPhysicalColumnId());
842 const auto &fieldDesc = desc.GetFieldDescriptor(columnDesc.GetFieldId());
843 // We expect only few columns and column representations per field, so we do a linear search
844 for (const auto otherColumnLogicalId : fieldDesc.GetLogicalColumnIds()) {
846 if (otherColumnDesc.GetRepresentationIndex() == columnDesc.GetRepresentationIndex())
847 continue;
848 if (otherColumnDesc.GetIndex() != columnDesc.GetIndex())
849 continue;
850
851 // Found corresponding column of a different column representation
852 const auto &otherColumnRange = fCluster.GetColumnRange(otherColumnDesc.GetPhysicalId());
853 if (otherColumnRange.IsSuppressed())
854 continue;
855
856 columnRange.SetFirstElementIndex(otherColumnRange.GetFirstElementIndex());
857 columnRange.SetNElements(otherColumnRange.GetNElements());
858 break;
859 }
860
861 if (columnRange.GetFirstElementIndex() == ROOT::kInvalidNTupleIndex) {
862 return R__FAIL(std::string("cannot find non-suppressed column for column ID ") +
863 std::to_string(columnRange.GetPhysicalColumnId()) +
864 ", cluster ID: " + std::to_string(fCluster.GetId()));
865 }
866 }
867 return RResult<void>::Success();
868}
869
872{
873 /// Carries out a depth-first traversal of a field subtree rooted at `rootFieldId`. For each field, `visitField` is
874 /// called passing the field ID and the number of overall repetitions, taking into account the repetitions of each
875 /// parent field in the hierarchy.
877 const auto &visitField, const auto &enterSubtree) -> void {
879 for (const auto &f : desc.GetFieldIterable(rootFieldId)) {
880 const std::uint64_t nRepetitions = std::max(f.GetNRepetitions(), std::uint64_t{1U}) * nRepetitionsAtThisLevel;
882 }
883 };
884
885 // Extended columns can only be part of the header extension
886 if (!desc.GetHeaderExtension())
887 return *this;
888
889 // Ensure that all columns in the header extension have their associated `R(Column|Page)Range`
890 // Extended columns can be attached both to fields of the regular header and to fields of the extension header
891 for (const auto &topLevelField : desc.GetTopLevelFields()) {
893 topLevelField.GetId(), std::max(topLevelField.GetNRepetitions(), std::uint64_t{1U}),
894 [&](ROOT::DescriptorId_t fieldId, std::uint64_t nRepetitions) {
895 for (const auto &c : desc.GetColumnIterable(fieldId)) {
896 const ROOT::DescriptorId_t physicalId = c.GetPhysicalId();
897 auto &columnRange = fCluster.fColumnRanges[physicalId];
898
899 // Initialize a RColumnRange for `physicalId` if it was not there. Columns that were created during model
900 // extension won't have on-disk metadata for the clusters that were already committed before the model
901 // was extended. Therefore, these need to be synthetically initialized upon reading.
902 if (columnRange.GetPhysicalColumnId() == ROOT::kInvalidDescriptorId) {
903 columnRange.SetPhysicalColumnId(physicalId);
904 columnRange.SetFirstElementIndex(0);
905 columnRange.SetNElements(0);
906 columnRange.SetIsSuppressed(c.IsSuppressedDeferredColumn());
907 }
908 // Fixup the RColumnRange and RPageRange in deferred columns. We know what the first element index and
909 // number of elements should have been if the column was not deferred; fix those and let
910 // `ExtendToFitColumnRange()` synthesize RPageInfos accordingly.
911 // Note that a deferred column (i.e, whose first element index is > 0) already met the criteria of
912 // `ROOT::RFieldBase::EntryToColumnElementIndex()`, i.e. it is a principal column reachable from the
913 // field zero excluding subfields of collection and variant fields.
914 if (c.IsDeferredColumn()) {
915 columnRange.SetFirstElementIndex(fCluster.GetFirstEntryIndex() * nRepetitions);
916 columnRange.SetNElements(fCluster.GetNEntries() * nRepetitions);
917 if (!columnRange.IsSuppressed()) {
918 auto &pageRange = fCluster.fPageRanges[physicalId];
919 pageRange.fPhysicalColumnId = physicalId;
920 const auto element = ROOT::Internal::RColumnElementBase::Generate<void>(c.GetType());
921 pageRange.ExtendToFitColumnRange(columnRange, *element, ROOT::Internal::RPage::kPageZeroSize);
922 }
923 } else if (!columnRange.IsSuppressed()) {
924 fCluster.fPageRanges[physicalId].fPhysicalColumnId = physicalId;
925 }
926 }
927 },
929 }
930 return *this;
931}
932
934{
935 if (fCluster.fClusterId == ROOT::kInvalidDescriptorId)
936 return R__FAIL("unset cluster ID");
937 if (fCluster.fNEntries == 0)
938 return R__FAIL("empty cluster");
939 for (auto &pr : fCluster.fPageRanges) {
940 if (fCluster.fColumnRanges.count(pr.first) == 0) {
941 return R__FAIL("missing column range");
942 }
943 pr.second.fCumulativeNElements.clear();
944 pr.second.fCumulativeNElements.reserve(pr.second.fPageInfos.size());
946 for (const auto &pi : pr.second.fPageInfos) {
947 sum += pi.GetNElements();
948 pr.second.fCumulativeNElements.emplace_back(sum);
949 }
950 }
952 std::swap(result, fCluster);
953 return result;
954}
955
956////////////////////////////////////////////////////////////////////////////////
957
960{
962 builder.ClusterGroupId(clusterGroupDesc.GetId())
963 .PageListLocator(clusterGroupDesc.GetPageListLocator())
964 .PageListLength(clusterGroupDesc.GetPageListLength())
965 .MinEntry(clusterGroupDesc.GetMinEntry())
966 .EntrySpan(clusterGroupDesc.GetEntrySpan())
967 .NClusters(clusterGroupDesc.GetNClusters());
968 return builder;
969}
970
972{
973 if (fClusterGroup.fClusterGroupId == ROOT::kInvalidDescriptorId)
974 return R__FAIL("unset cluster group ID");
976 std::swap(result, fClusterGroup);
977 return result;
978}
979
980////////////////////////////////////////////////////////////////////////////////
981
983{
984 if (fExtraTypeInfo.fContentId == EExtraTypeInfoIds::kInvalid)
985 throw RException(R__FAIL("invalid extra type info content id"));
987 std::swap(result, fExtraTypeInfo);
988 return result;
989}
990
991////////////////////////////////////////////////////////////////////////////////
992
994{
995 if (fDescriptor.fFieldDescriptors.count(fieldId) == 0)
996 return R__FAIL("field with id '" + std::to_string(fieldId) + "' doesn't exist");
997 return RResult<void>::Success();
998}
999
1001{
1002 if (fDescriptor.fVersionEpoch != RNTuple::kVersionEpoch) {
1003 return R__FAIL("unset or unsupported RNTuple epoch version");
1004 }
1005
1006 // Reuse field name validity check
1007 auto validName = ROOT::Internal::EnsureValidNameForRNTuple(fDescriptor.GetName(), "Field");
1008 if (!validName) {
1010 }
1011
1012 for (const auto &[fieldId, fieldDesc] : fDescriptor.fFieldDescriptors) {
1013 // parent not properly set?
1014 if (fieldId != fDescriptor.GetFieldZeroId() && fieldDesc.GetParentId() == ROOT::kInvalidDescriptorId) {
1015 return R__FAIL("field with id '" + std::to_string(fieldId) + "' has an invalid parent id");
1016 }
1017
1018 // Same number of columns in every column representation?
1019 const auto columnCardinality = fieldDesc.GetColumnCardinality();
1020 if (columnCardinality == 0)
1021 continue;
1022
1023 // In AddColumn, we already checked that all but the last representation are complete.
1024 // Check that the last column representation is complete, i.e. has all columns.
1025 const auto &logicalColumnIds = fieldDesc.GetLogicalColumnIds();
1026 const auto nColumns = logicalColumnIds.size();
1027 // If we have only a single column representation, the following condition is true by construction
1028 if ((nColumns + 1) == columnCardinality)
1029 continue;
1030
1031 const auto &lastColumn = fDescriptor.GetColumnDescriptor(logicalColumnIds.back());
1032 if (lastColumn.GetIndex() + 1 != columnCardinality)
1033 return R__FAIL("field with id '" + std::to_string(fieldId) + "' has incomplete column representations");
1034 }
1035
1036 return RResult<void>::Success();
1037}
1038
1040{
1041 EnsureValidDescriptor().ThrowOnError();
1042 fDescriptor.fSortedClusterGroupIds.reserve(fDescriptor.fClusterGroupDescriptors.size());
1043 for (const auto &[id, _] : fDescriptor.fClusterGroupDescriptors)
1044 fDescriptor.fSortedClusterGroupIds.emplace_back(id);
1045 std::sort(fDescriptor.fSortedClusterGroupIds.begin(), fDescriptor.fSortedClusterGroupIds.end(),
1047 return fDescriptor.fClusterGroupDescriptors[a].GetMinEntry() <
1048 fDescriptor.fClusterGroupDescriptors[b].GetMinEntry();
1049 });
1051 std::swap(result, fDescriptor);
1052 return result;
1053}
1054
1056 std::uint16_t versionMinor, std::uint16_t versionPatch)
1057{
1059 throw RException(R__FAIL("unsupported RNTuple epoch version: " + std::to_string(versionEpoch)));
1060 }
1061 fDescriptor.fVersionEpoch = versionEpoch;
1062 fDescriptor.fVersionMajor = versionMajor;
1063 fDescriptor.fVersionMinor = versionMinor;
1064 fDescriptor.fVersionPatch = versionPatch;
1065}
1066
1068{
1069 fDescriptor.fVersionEpoch = RNTuple::kVersionEpoch;
1070 fDescriptor.fVersionMajor = RNTuple::kVersionMajor;
1071 fDescriptor.fVersionMinor = RNTuple::kVersionMinor;
1072 fDescriptor.fVersionPatch = RNTuple::kVersionPatch;
1073}
1074
1076 const std::string_view description)
1077{
1078 fDescriptor.fName = std::string(name);
1079 fDescriptor.fDescription = std::string(description);
1080}
1081
1083{
1084 if (flag % 64 == 0)
1085 throw RException(R__FAIL("invalid feature flag: " + std::to_string(flag)));
1086 fDescriptor.fFeatureFlags.insert(flag);
1087}
1088
1090{
1091 if (fColumn.GetLogicalId() == ROOT::kInvalidDescriptorId)
1092 return R__FAIL("invalid logical column id");
1093 if (fColumn.GetPhysicalId() == ROOT::kInvalidDescriptorId)
1094 return R__FAIL("invalid physical column id");
1095 if (fColumn.GetFieldId() == ROOT::kInvalidDescriptorId)
1096 return R__FAIL("invalid field id, dangling column");
1097
1098 // NOTE: if the column type is unknown we don't want to fail, as we might be reading an RNTuple
1099 // created with a future version of ROOT. In this case we just skip the valid bit range check,
1100 // as we have no idea what the valid range is.
1101 // In general, reading the metadata of an unknown column is fine, it becomes an error only when
1102 // we try to read the actual data contained in it.
1103 if (fColumn.GetType() != ENTupleColumnType::kUnknown) {
1104 const auto [minBits, maxBits] = ROOT::Internal::RColumnElementBase::GetValidBitRange(fColumn.GetType());
1105 if (fColumn.GetBitsOnStorage() < minBits || fColumn.GetBitsOnStorage() > maxBits)
1106 return R__FAIL("invalid column bit width");
1107 }
1108
1109 return fColumn.Clone();
1110}
1111
1119
1122{
1124 fieldDesc.FieldVersion(field.GetFieldVersion())
1125 .TypeVersion(field.GetTypeVersion())
1126 .FieldName(field.GetFieldName())
1127 .FieldDescription(field.GetDescription())
1128 .TypeName(field.GetTypeName())
1129 .TypeAlias(field.GetTypeAlias())
1130 .Structure(field.GetStructure())
1131 .NRepetitions(field.GetNRepetitions());
1133 fieldDesc.TypeChecksum(field.GetTypeChecksum());
1134 return fieldDesc;
1135}
1136
1138{
1139 if (fField.GetId() == ROOT::kInvalidDescriptorId) {
1140 return R__FAIL("invalid field id");
1141 }
1142 if (fField.GetStructure() == ROOT::ENTupleStructure::kInvalid) {
1143 return R__FAIL("invalid field structure");
1144 }
1145 // FieldZero is usually named "" and would be a false positive here
1146 if (fField.GetParentId() != ROOT::kInvalidDescriptorId) {
1147 auto validName = ROOT::Internal::EnsureValidNameForRNTuple(fField.GetFieldName(), "Field");
1148 if (!validName) {
1150 }
1151 if (fField.GetFieldName().empty()) {
1152 return R__FAIL("name cannot be empty string \"\"");
1153 }
1154 }
1155 return fField.Clone();
1156}
1157
1159{
1160 fDescriptor.fFieldDescriptors.emplace(fieldDesc.GetId(), fieldDesc.Clone());
1161 if (fDescriptor.fHeaderExtension)
1162 fDescriptor.fHeaderExtension->MarkExtendedField(fieldDesc);
1163 if (fieldDesc.GetFieldName().empty() && fieldDesc.GetParentId() == ROOT::kInvalidDescriptorId) {
1164 fDescriptor.fFieldZeroId = fieldDesc.GetId();
1165 }
1166}
1167
1170{
1172 if (!(fieldExists = EnsureFieldExists(fieldId)))
1174 if (!(fieldExists = EnsureFieldExists(linkId)))
1175 return R__FAIL("child field with id '" + std::to_string(linkId) + "' doesn't exist in NTuple");
1176
1177 if (linkId == fDescriptor.GetFieldZeroId()) {
1178 return R__FAIL("cannot make FieldZero a child field");
1179 }
1180 // fail if field already has another valid parent
1181 auto parentId = fDescriptor.fFieldDescriptors.at(linkId).GetParentId();
1183 return R__FAIL("field '" + std::to_string(linkId) + "' already has a parent ('" + std::to_string(parentId) + ")");
1184 }
1185 if (fieldId == linkId) {
1186 return R__FAIL("cannot make field '" + std::to_string(fieldId) + "' a child of itself");
1187 }
1188 fDescriptor.fFieldDescriptors.at(linkId).fParentId = fieldId;
1189 fDescriptor.fFieldDescriptors.at(fieldId).fLinkIds.push_back(linkId);
1190 return RResult<void>::Success();
1191}
1192
1195{
1197 if (!(fieldExists = EnsureFieldExists(sourceId)))
1199 if (!(fieldExists = EnsureFieldExists(targetId)))
1200 return R__FAIL("projected field with id '" + std::to_string(targetId) + "' doesn't exist in NTuple");
1201
1202 if (targetId == fDescriptor.GetFieldZeroId()) {
1203 return R__FAIL("cannot make FieldZero a projected field");
1204 }
1205 if (sourceId == targetId) {
1206 return R__FAIL("cannot make field '" + std::to_string(targetId) + "' a projection of itself");
1207 }
1208 if (fDescriptor.fFieldDescriptors.at(sourceId).IsProjectedField()) {
1209 return R__FAIL("cannot make field '" + std::to_string(targetId) + "' a projection of an already projected field");
1210 }
1211 // fail if target field already has another valid projection source
1212 auto &targetDesc = fDescriptor.fFieldDescriptors.at(targetId);
1213 if (targetDesc.IsProjectedField() && targetDesc.GetProjectionSourceId() != sourceId) {
1214 return R__FAIL("field '" + std::to_string(targetId) + "' has already a projection source ('" +
1215 std::to_string(targetDesc.GetProjectionSourceId()) + ")");
1216 }
1217 fDescriptor.fFieldDescriptors.at(targetId).fProjectionSourceId = sourceId;
1218 return RResult<void>::Success();
1219}
1220
1222{
1223 const auto fieldId = columnDesc.GetFieldId();
1224 const auto columnIndex = columnDesc.GetIndex();
1225 const auto representationIndex = columnDesc.GetRepresentationIndex();
1226
1227 auto fieldExists = EnsureFieldExists(fieldId);
1228 if (!fieldExists) {
1230 }
1231 auto &fieldDesc = fDescriptor.fFieldDescriptors.find(fieldId)->second;
1232
1233 if (columnDesc.IsAliasColumn()) {
1234 if (columnDesc.GetType() != fDescriptor.GetColumnDescriptor(columnDesc.GetPhysicalId()).GetType())
1235 return R__FAIL("alias column type mismatch");
1236 }
1237 if (fDescriptor.FindLogicalColumnId(fieldId, columnIndex, representationIndex) != ROOT::kInvalidDescriptorId) {
1238 return R__FAIL("column index clash");
1239 }
1240 if (columnIndex > 0) {
1241 if (fDescriptor.FindLogicalColumnId(fieldId, columnIndex - 1, representationIndex) == ROOT::kInvalidDescriptorId)
1242 return R__FAIL("out of bounds column index");
1243 }
1244 if (representationIndex > 0) {
1245 if (fDescriptor.FindLogicalColumnId(fieldId, 0, representationIndex - 1) == ROOT::kInvalidDescriptorId) {
1246 return R__FAIL("out of bounds representation index");
1247 }
1248 if (columnIndex == 0) {
1249 assert(fieldDesc.fColumnCardinality > 0);
1250 if (fDescriptor.FindLogicalColumnId(fieldId, fieldDesc.fColumnCardinality - 1, representationIndex - 1) ==
1252 return R__FAIL("incomplete column representations");
1253 }
1254 } else {
1255 if (columnIndex >= fieldDesc.fColumnCardinality)
1256 return R__FAIL("irregular column representations");
1257 }
1258 } else {
1259 // This will set the column cardinality to the number of columns of the first representation
1260 fieldDesc.fColumnCardinality = columnIndex + 1;
1261 }
1262
1263 const auto logicalId = columnDesc.GetLogicalId();
1264 fieldDesc.fLogicalColumnIds.emplace_back(logicalId);
1265
1266 if (!columnDesc.IsAliasColumn())
1267 fDescriptor.fNPhysicalColumns++;
1268 fDescriptor.fColumnDescriptors.emplace(logicalId, std::move(columnDesc));
1269 if (fDescriptor.fHeaderExtension)
1270 fDescriptor.fHeaderExtension->MarkExtendedColumn(columnDesc);
1271
1272 return RResult<void>::Success();
1273}
1274
1276{
1277 const auto id = clusterGroup.GetId();
1278 if (fDescriptor.fClusterGroupDescriptors.count(id) > 0)
1279 return R__FAIL("cluster group id clash");
1280 fDescriptor.fNEntries = std::max(fDescriptor.fNEntries, clusterGroup.GetMinEntry() + clusterGroup.GetEntrySpan());
1281 fDescriptor.fNClusters += clusterGroup.GetNClusters();
1282 fDescriptor.fClusterGroupDescriptors.emplace(id, std::move(clusterGroup));
1283 return RResult<void>::Success();
1284}
1285
1287{
1288 fDescriptor.fName = "";
1289 fDescriptor.fDescription = "";
1290 fDescriptor.fFieldDescriptors.clear();
1291 fDescriptor.fColumnDescriptors.clear();
1292 fDescriptor.fClusterDescriptors.clear();
1293 fDescriptor.fClusterGroupDescriptors.clear();
1294 fDescriptor.fHeaderExtension.reset();
1295}
1296
1301
1303{
1304 if (!fDescriptor.fHeaderExtension)
1305 fDescriptor.fHeaderExtension = std::make_unique<RNTupleDescriptor::RHeaderExtension>();
1306}
1307
1309{
1310 if (fDescriptor.GetNLogicalColumns() == 0)
1311 return;
1312 R__ASSERT(fDescriptor.GetNPhysicalColumns() > 0);
1313
1314 for (ROOT::DescriptorId_t id = fDescriptor.GetNLogicalColumns() - 1; id >= fDescriptor.GetNPhysicalColumns(); --id) {
1315 auto c = fDescriptor.fColumnDescriptors[id].Clone();
1316 R__ASSERT(c.IsAliasColumn());
1317 R__ASSERT(id == c.GetLogicalId());
1318 fDescriptor.fColumnDescriptors.erase(id);
1319 for (auto &link : fDescriptor.fFieldDescriptors[c.fFieldId].fLogicalColumnIds) {
1320 if (link == c.fLogicalColumnId) {
1321 link += offset;
1322 break;
1323 }
1324 }
1325 c.fLogicalColumnId += offset;
1326 R__ASSERT(fDescriptor.fColumnDescriptors.count(c.fLogicalColumnId) == 0);
1327 fDescriptor.fColumnDescriptors.emplace(c.fLogicalColumnId, std::move(c));
1328 }
1329}
1330
1332{
1333 auto clusterId = clusterDesc.GetId();
1334 if (fDescriptor.fClusterDescriptors.count(clusterId) > 0)
1335 return R__FAIL("cluster id clash");
1336 fDescriptor.fClusterDescriptors.emplace(clusterId, std::move(clusterDesc));
1337 return RResult<void>::Success();
1338}
1339
1342{
1343 // Make sure we have no duplicates
1344 if (std::find(fDescriptor.fExtraTypeInfoDescriptors.begin(), fDescriptor.fExtraTypeInfoDescriptors.end(),
1345 extraTypeInfoDesc) != fDescriptor.fExtraTypeInfoDescriptors.end()) {
1346 return R__FAIL("extra type info duplicates");
1347 }
1348 fDescriptor.fExtraTypeInfoDescriptors.emplace_back(std::move(extraTypeInfoDesc));
1349 return RResult<void>::Success();
1350}
1351
1353{
1354 auto it = std::find(fDescriptor.fExtraTypeInfoDescriptors.begin(), fDescriptor.fExtraTypeInfoDescriptors.end(),
1356 if (it != fDescriptor.fExtraTypeInfoDescriptors.end())
1357 *it = std::move(extraTypeInfoDesc);
1358 else
1359 fDescriptor.fExtraTypeInfoDescriptors.emplace_back(std::move(extraTypeInfoDesc));
1360}
1361
1363{
1365 const auto &desc = GetDescriptor();
1366
1367 std::function<void(const RFieldDescriptor &)> fnWalkFieldTree;
1369 if (fieldDesc.IsCustomClass()) {
1370 // Add streamer info for this class to streamerInfoMap
1371 auto cl = TClass::GetClass(fieldDesc.GetTypeName().c_str());
1372 if (!cl) {
1373 throw RException(R__FAIL(std::string("cannot get TClass for ") + fieldDesc.GetTypeName()));
1374 }
1375 auto streamerInfo = cl->GetStreamerInfo(fieldDesc.GetTypeVersion());
1376 if (!streamerInfo) {
1377 throw RException(R__FAIL(std::string("cannot get streamerInfo for ") + fieldDesc.GetTypeName()));
1378 }
1380 }
1381
1382 // Recursively traverse sub fields
1383 for (const auto &subFieldDesc : desc.GetFieldIterable(fieldDesc)) {
1385 }
1386 };
1387
1388 fnWalkFieldTree(desc.GetFieldZero());
1389
1390 // Add the streamer info records from streamer fields: because of runtime polymorphism we may need to add additional
1391 // types not covered by the type names stored in the field headers
1392 for (const auto &extraTypeInfo : desc.GetExtraTypeInfoIterable()) {
1393 if (extraTypeInfo.GetContentId() != EExtraTypeInfoIds::kStreamerInfo)
1394 continue;
1395 // Ideally, we would avoid deserializing the streamer info records of the streamer fields that we just serialized.
1396 // However, this happens only once at the end of writing and only when streamer fields are used, so the
1397 // preference here is for code simplicity.
1399 }
1400
1401 return streamerInfoMap;
1402}
1403
1408
1414
1421
1424{
1425 return GetFieldIterable(GetFieldDescriptor(fieldId));
1426}
1427
1434
1436{
1437 return GetFieldIterable(GetFieldZeroId());
1438}
1439
1441 const std::function<bool(ROOT::DescriptorId_t, ROOT::DescriptorId_t)> &comparator) const
1442{
1443 return GetFieldIterable(GetFieldZeroId(), comparator);
1444}
1445
1450
1456
1462
1467
1472
#define R__FORWARD_ERROR(res)
Short-hand to return an RResult<T> in an error state (i.e. after checking)
Definition RError.hxx:303
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:299
#define d(i)
Definition RSha256.hxx:102
#define b(i)
Definition RSha256.hxx:100
#define f(i)
Definition RSha256.hxx:104
#define c(i)
Definition RSha256.hxx:101
#define a(i)
Definition RSha256.hxx:99
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Definition TError.h:125
#define N
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h offset
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize id
char name[80]
Definition TGX11.cxx:110
void cd(Int_t id=-1)
#define _(A, B)
Definition cfortran.h:108
A helper class for piece-wise construction of an RClusterDescriptor.
RResult< void > MarkSuppressedColumnRange(ROOT::DescriptorId_t physicalId)
Books the given column ID as being suppressed in this cluster.
RResult< void > CommitColumnRange(ROOT::DescriptorId_t physicalId, std::uint64_t firstElementIndex, std::uint32_t compressionSettings, const RClusterDescriptor::RPageRange &pageRange)
RClusterDescriptorBuilder & AddExtendedColumnRanges(const RNTupleDescriptor &desc)
Add column and page ranges for columns created during late model extension missing in this cluster.
RResult< void > CommitSuppressedColumnRanges(const RNTupleDescriptor &desc)
Sets the first element index and number of elements for all the suppressed column ranges.
RResult< RClusterDescriptor > MoveDescriptor()
Move out the full cluster descriptor including page locations.
A helper class for piece-wise construction of an RClusterGroupDescriptor.
RClusterGroupDescriptorBuilder & EntrySpan(std::uint64_t entrySpan)
RClusterGroupDescriptorBuilder & PageListLocator(const RNTupleLocator &pageListLocator)
static RClusterGroupDescriptorBuilder FromSummary(const RClusterGroupDescriptor &clusterGroupDesc)
RClusterGroupDescriptorBuilder & PageListLength(std::uint64_t pageListLength)
RClusterGroupDescriptorBuilder & MinEntry(std::uint64_t minEntry)
RResult< RClusterGroupDescriptor > MoveDescriptor()
RClusterGroupDescriptorBuilder & ClusterGroupId(ROOT::DescriptorId_t clusterGroupId)
RClusterGroupDescriptorBuilder & NClusters(std::uint32_t nClusters)
RResult< RColumnDescriptor > MakeDescriptor() const
Attempt to make a column descriptor.
A column element encapsulates the translation between basic C++ types and their column representation...
static std::pair< std::uint16_t, std::uint16_t > GetValidBitRange(ROOT::ENTupleColumnType type)
Most types have a fixed on-disk bit width.
RResult< RExtraTypeInfoDescriptor > MoveDescriptor()
A helper class for piece-wise construction of an RFieldDescriptor.
RFieldDescriptorBuilder()=default
Make an empty dangling field descriptor.
RResult< RFieldDescriptor > MakeDescriptor() const
Attempt to make a field descriptor.
static RFieldDescriptorBuilder FromField(const ROOT::RFieldBase &field)
Make a new RFieldDescriptorBuilder based off a live RNTuple field.
void SetNTuple(const std::string_view name, const std::string_view description)
void SetSchemaFromExisting(const RNTupleDescriptor &descriptor)
Copies the "schema" part of descriptor into the builder's descriptor.
RResult< void > AddColumn(RColumnDescriptor &&columnDesc)
RResult< void > AddFieldProjection(ROOT::DescriptorId_t sourceId, ROOT::DescriptorId_t targetId)
void ReplaceExtraTypeInfo(RExtraTypeInfoDescriptor &&extraTypeInfoDesc)
RResult< void > AddExtraTypeInfo(RExtraTypeInfoDescriptor &&extraTypeInfoDesc)
void ShiftAliasColumns(std::uint32_t offset)
Shift column IDs of alias columns by offset
void SetVersion(std::uint16_t versionEpoch, std::uint16_t versionMajor, std::uint16_t versionMinor, std::uint16_t versionPatch)
void BeginHeaderExtension()
Mark the beginning of the header extension; any fields and columns added after a call to this functio...
RResult< void > AddCluster(RClusterDescriptor &&clusterDesc)
RResult< void > EnsureValidDescriptor() const
Checks whether invariants hold:
RResult< void > AddFieldLink(ROOT::DescriptorId_t fieldId, ROOT::DescriptorId_t linkId)
void Reset()
Clears so-far stored clusters, fields, and columns and return to a pristine RNTupleDescriptor.
void AddField(const RFieldDescriptor &fieldDesc)
ROOT::Internal::RNTupleSerializer::StreamerInfoMap_t BuildStreamerInfos() const
Get the streamer info records for custom classes. Currently requires the corresponding dictionaries t...
RResult< void > AddClusterGroup(RClusterGroupDescriptor &&clusterGroup)
RResult< void > EnsureFieldExists(ROOT::DescriptorId_t fieldId) const
A helper class for serializing and deserialization of the RNTuple binary format.
std::map< Int_t, TVirtualStreamerInfo * > StreamerInfoMap_t
static RResult< StreamerInfoMap_t > DeserializeStreamerInfos(const std::string &extraTypeInfoContent)
The window of element indexes of a particular column in a particular cluster.
Records the partition of data into pages for a particular column in a particular cluster.
RPageInfoExtended Find(ROOT::NTupleSize_t idxInCluster) const
Find the page in the RPageRange that contains the given element. The element must exist.
std::size_t ExtendToFitColumnRange(const RColumnRange &columnRange, const ROOT::Internal::RColumnElementBase &element, std::size_t pageSize)
Extend this RPageRange to fit the given RColumnRange.
Metadata for RNTuple clusters.
ROOT::NTupleSize_t fFirstEntryIndex
Clusters can be swapped by adjusting the entry offsets of the cluster and all ranges.
std::unordered_map< ROOT::DescriptorId_t, RColumnRange > fColumnRanges
ROOT::DescriptorId_t fClusterId
RClusterDescriptor Clone() const
bool operator==(const RClusterDescriptor &other) const
RColumnRangeIterable GetColumnRangeIterable() const
Returns an iterator over pairs { columnId, columnRange }. The iteration order is unspecified.
std::unordered_map< ROOT::DescriptorId_t, RPageRange > fPageRanges
std::uint64_t GetNBytesOnStorage() const
Clusters are bundled in cluster groups.
RNTupleLocator fPageListLocator
The page list that corresponds to the cluster group.
RClusterGroupDescriptor Clone() const
std::vector< ROOT::DescriptorId_t > fClusterIds
The cluster IDs can be empty if the corresponding page list is not loaded.
std::uint64_t fMinEntry
The minimum first entry number of the clusters in the cluster group.
std::uint32_t fNClusters
Number of clusters is always known even if the cluster IDs are not (yet) populated.
std::uint64_t fPageListLength
Uncompressed size of the page list.
std::uint64_t fEntrySpan
Number of entries that are (partially for sharded clusters) covered by this cluster group.
bool operator==(const RClusterGroupDescriptor &other) const
RClusterGroupDescriptor CloneSummary() const
Creates a clone without the cluster IDs.
Metadata stored for every column of an RNTuple.
ROOT::DescriptorId_t fPhysicalColumnId
Usually identical to the logical column ID, except for alias columns where it references the shadowed...
bool operator==(const RColumnDescriptor &other) const
ROOT::DescriptorId_t fLogicalColumnId
The actual column identifier, which is the link to the corresponding field.
ROOT::DescriptorId_t fFieldId
Every column belongs to one and only one field.
std::int64_t fFirstElementIndex
The absolute value specifies the index for the first stored element for this column.
std::uint32_t fIndex
A field can be serialized into several columns, which are numbered from zero to $n$.
std::uint16_t fBitsOnStorage
The size in bits of elements of this column.
std::uint16_t fRepresentationIndex
A field may use multiple column representations, which are numbered from zero to $m$.
ROOT::ENTupleColumnType fType
The on-disk column type.
std::optional< RValueRange > fValueRange
Optional value range (used e.g. by quantized real fields)
RColumnDescriptor Clone() const
Get a copy of the descriptor.
Base class for all ROOT issued exceptions.
Definition RError.hxx:79
Field specific extra type information from the header / extenstion header.
bool operator==(const RExtraTypeInfoDescriptor &other) const
RExtraTypeInfoDescriptor Clone() const
EExtraTypeInfoIds fContentId
Specifies the meaning of the extra information.
std::string fTypeName
The type name the extra information refers to; empty for RNTuple-wide extra information.
std::string fContent
The content format depends on the content ID and may be binary.
std::uint32_t fTypeVersion
Type version the extra type information is bound to.
A field translates read and write calls from/to underlying columns to/from tree values.
@ kTraitInvalidField
This field is an instance of RInvalidField and can be safely static_cast to it.
@ kTraitTypeChecksum
The TClass checksum is set and valid.
Metadata stored for every field of an RNTuple.
std::unique_ptr< ROOT::RFieldBase > CreateField(const RNTupleDescriptor &ntplDesc, const ROOT::RCreateFieldOptions &options={}) const
In general, we create a field simply from the C++ type name.
std::uint32_t fFieldVersion
The version of the C++-type-to-column translation mechanics.
ROOT::DescriptorId_t fFieldId
RFieldDescriptor Clone() const
Get a copy of the descriptor.
std::uint64_t fNRepetitions
The number of elements per entry for fixed-size arrays.
std::uint32_t fColumnCardinality
The number of columns in the column representations of the field.
ROOT::DescriptorId_t fProjectionSourceId
For projected fields, the source field ID.
bool operator==(const RFieldDescriptor &other) const
std::string fFieldDescription
Free text set by the user.
ROOT::DescriptorId_t fParentId
Establishes sub field relationships, such as classes and collections.
bool IsCustomClass() const
Tells if the field describes a user-defined class rather than a fundamental type, a collection,...
std::string fTypeAlias
A typedef or using directive that resolved to the type name during field creation.
ROOT::ENTupleStructure fStructure
The structural information carried by this field in the data model tree.
std::vector< ROOT::DescriptorId_t > fLinkIds
The pointers in the other direction from parent to children.
std::string fFieldName
The leaf name, not including parent fields.
std::uint32_t fTypeVersion
The version of the C++ type itself.
std::string fTypeName
The C++ type that was used when writing the field.
std::vector< ROOT::DescriptorId_t > fLogicalColumnIds
The ordered list of columns attached to this field: first by representation index then by column inde...
std::optional< std::uint32_t > fTypeChecksum
For custom classes, we store the ROOT TClass reported checksum to facilitate the use of I/O rules tha...
Used in RFieldBase::Check() to record field creation failures.
Definition RField.hxx:72
@ kGeneric
Generic unrecoverable error.
@ kUnknownStructure
The field could not be created because its descriptor had an unknown structural role.
Used to loop over all the clusters of an RNTuple (in unspecified order)
Used to loop over all the cluster groups of an RNTuple (in unspecified order)
Used to loop over a field's associated columns.
std::vector< ROOT::DescriptorId_t > fColumns
The descriptor ids of the columns ordered by field, representation, and column index.
RColumnDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &fieldDesc)
Used to loop over all the extra type info record of an RNTuple (in unspecified order)
Used to loop over a field's child fields.
std::vector< ROOT::DescriptorId_t > GetTopLevelFields(const RNTupleDescriptor &desc) const
Return a vector containing the IDs of the top-level fields defined in the extension header,...
The on-storage metadata of an RNTuple.
const RColumnDescriptor & GetColumnDescriptor(ROOT::DescriptorId_t columnId) const
ROOT::DescriptorId_t FindNextClusterId(ROOT::DescriptorId_t clusterId) const
RFieldDescriptorIterable GetFieldIterable(const RFieldDescriptor &fieldDesc) const
std::set< unsigned int > fFeatureFlags
std::unordered_map< ROOT::DescriptorId_t, RClusterGroupDescriptor > fClusterGroupDescriptors
const RFieldDescriptor & GetFieldDescriptor(ROOT::DescriptorId_t fieldId) const
std::uint64_t fNPhysicalColumns
Updated by the descriptor builder when columns are added.
ROOT::DescriptorId_t fFieldZeroId
Set by the descriptor builder.
std::uint64_t fNEntries
Updated by the descriptor builder when the cluster groups are added.
RClusterGroupDescriptorIterable GetClusterGroupIterable() const
RColumnDescriptorIterable GetColumnIterable() const
bool operator==(const RNTupleDescriptor &other) const
std::uint64_t fOnDiskFooterSize
Like fOnDiskHeaderSize, contains both cluster summaries and page locations.
std::uint16_t fVersionMinor
Set by the descriptor builder when deserialized.
ROOT::DescriptorId_t FindClusterId(ROOT::NTupleSize_t entryIdx) const
std::vector< std::uint64_t > GetFeatureFlags() const
ROOT::DescriptorId_t GetFieldZeroId() const
Returns the logical parent of all top-level RNTuple data fields.
std::unique_ptr< ROOT::RNTupleModel > CreateModel(const RCreateModelOptions &options=RCreateModelOptions()) const
Re-create the C++ model from the stored metadata.
std::string GetTypeNameForComparison(const RFieldDescriptor &fieldDesc) const
Adjust the type name of the passed RFieldDescriptor for comparison with another renormalized type nam...
std::unordered_map< ROOT::DescriptorId_t, RClusterDescriptor > fClusterDescriptors
Potentially a subset of all the available clusters.
ROOT::DescriptorId_t FindPhysicalColumnId(ROOT::DescriptorId_t fieldId, std::uint32_t columnIndex, std::uint16_t representationIndex) const
RExtraTypeInfoDescriptorIterable GetExtraTypeInfoIterable() const
const RHeaderExtension * GetHeaderExtension() const
Return header extension information; if the descriptor does not have a header extension,...
std::uint64_t fNClusters
Updated by the descriptor builder when the cluster groups are added.
std::uint64_t fOnDiskHeaderXxHash3
Set by the descriptor builder when deserialized.
ROOT::DescriptorId_t FindFieldId(std::string_view fieldName, ROOT::DescriptorId_t parentId) const
std::string fName
The RNTuple name needs to be unique in a given storage location (file)
std::uint64_t fOnDiskHeaderSize
Set by the descriptor builder when deserialized.
RResult< void > DropClusterGroupDetails(ROOT::DescriptorId_t clusterGroupId)
std::uint16_t fVersionMajor
Set by the descriptor builder when deserialized.
std::vector< ROOT::DescriptorId_t > fSortedClusterGroupIds
References cluster groups sorted by entry range and thus allows for binary search.
std::unordered_map< ROOT::DescriptorId_t, RColumnDescriptor > fColumnDescriptors
ROOT::DescriptorId_t FindLogicalColumnId(ROOT::DescriptorId_t fieldId, std::uint32_t columnIndex, std::uint16_t representationIndex) const
std::unordered_map< ROOT::DescriptorId_t, RFieldDescriptor > fFieldDescriptors
ROOT::NTupleSize_t GetNElements(ROOT::DescriptorId_t physicalColumnId) const
RResult< void > AddClusterGroupDetails(ROOT::DescriptorId_t clusterGroupId, std::vector< RClusterDescriptor > &clusterDescs)
Methods to load and drop cluster group details (cluster IDs and page locations)
std::uint16_t fVersionPatch
Set by the descriptor builder when deserialized.
std::string fDescription
Free text from the user.
RFieldDescriptorIterable GetTopLevelFields() const
std::uint16_t fVersionEpoch
Set by the descriptor builder when deserialized.
std::vector< RExtraTypeInfoDescriptor > fExtraTypeInfoDescriptors
RNTupleDescriptor Clone() const
std::string GetQualifiedFieldName(ROOT::DescriptorId_t fieldId) const
Walks up the parents of the field ID and returns a field name of the form a.b.c.d In case of invalid ...
RClusterDescriptorIterable GetClusterIterable() const
RNTupleDescriptor CloneSchema() const
Creates a descriptor containing only the schema information about this RNTuple, i....
std::uint64_t fGeneration
The generation of the descriptor.
ROOT::DescriptorId_t FindPrevClusterId(ROOT::DescriptorId_t clusterId) const
std::unique_ptr< RHeaderExtension > fHeaderExtension
Generic information about the physical location of data.
static std::unique_ptr< RNTupleModel > Create()
static std::unique_ptr< RNTupleModel > CreateBare()
Creates a "bare model", i.e. an RNTupleModel with no default entry.
static constexpr std::uint16_t kVersionPatch
Definition RNTuple.hxx:79
static constexpr std::uint16_t kVersionMajor
Definition RNTuple.hxx:77
static constexpr std::uint16_t kVersionEpoch
Definition RNTuple.hxx:76
static constexpr std::uint16_t kVersionMinor
Definition RNTuple.hxx:78
const_iterator begin() const
const_iterator end() const
The class is used as a return type for operations that can fail; wraps a value of type T or an RError...
Definition RError.hxx:197
static std::unique_ptr< RVectorField > CreateUntyped(std::string_view fieldName, std::unique_ptr< RFieldBase > itemField)
static TClass * GetClass(const char *name, Bool_t load=kTRUE, Bool_t silent=kFALSE)
Static method returning pointer to TClass of the specified class name.
Definition TClass.cxx:3074
struct void * fTypeName
Definition cppyy.h:9
const Int_t n
Definition legend1.C:16
Double_t ex[n]
Definition legend1.C:17
RResult< void > EnsureValidNameForRNTuple(std::string_view name, std::string_view where)
Check whether a given string is a valid name according to the RNTuple specification.
ROOT::RResult< std::unique_ptr< ROOT::RFieldBase > > CallFieldBaseCreate(const std::string &fieldName, const std::string &typeName, const ROOT::RCreateFieldOptions &options, const ROOT::RNTupleDescriptor *desc, ROOT::DescriptorId_t fieldId)
std::string GetRenormalizedTypeName(const std::string &metaNormalizedName)
Given a type name normalized by ROOT meta, renormalize it for RNTuple. E.g., insert std::prefix.
std::uint64_t DescriptorId_t
Distriniguishes elements of the same type within a descriptor, e.g. different fields.
constexpr NTupleSize_t kInvalidNTupleIndex
std::uint64_t NTupleSize_t
Integer type long enough to hold the maximum number of entries in a column.
constexpr DescriptorId_t kInvalidDescriptorId
Additional information about a page in an in-memory RPageRange.
Information about a single page in the context of a cluster's page range.
static uint64_t sum(uint64_t i)
Definition Factory.cxx:2345