Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RMiniFile.cxx
Go to the documentation of this file.
1/// \file RMiniFile.cxx
2/// \ingroup NTuple ROOT7
3/// \author Jakob Blomer <jblomer@cern.ch>
4/// \date 2019-12-22
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#include "Rtypes.h"
17#include <ROOT/RConfig.hxx>
18#include <ROOT/RError.hxx>
19#include <ROOT/RMiniFile.hxx>
20#include <ROOT/RRawFile.hxx>
21#include <ROOT/RNTupleZip.hxx>
24
25#include <Byteswap.h>
26#include <TBufferFile.h>
27#include <TDirectory.h>
28#include <TError.h>
29#include <TFile.h>
30#include <TKey.h>
31#include <TObjString.h>
33
34#include <xxhash.h>
35
36#include <algorithm>
37#include <cerrno>
38#include <cstdio>
39#include <cstring>
40#include <memory>
41#include <string>
42#include <chrono>
43
44#ifdef R__LINUX
45#include <fcntl.h>
46#endif
47
48#ifndef R__LITTLE_ENDIAN
49#ifdef R__BYTESWAP
50// `R__BYTESWAP` is defined in RConfig.hxx for little-endian architectures; undefined otherwise
51#define R__LITTLE_ENDIAN 1
52#else
53#define R__LITTLE_ENDIAN 0
54#endif
55#endif /* R__LITTLE_ENDIAN */
56
57namespace {
58
59// The following types are used to read and write the TFile binary format
60
61/// Big-endian 16-bit unsigned integer
62class RUInt16BE {
63private:
64 std::uint16_t fValBE = 0;
65 static std::uint16_t Swap(std::uint16_t val)
66 {
67#if R__LITTLE_ENDIAN == 1
68 return RByteSwap<sizeof(val)>::bswap(val);
69#else
70 return val;
71#endif
72 }
73
74public:
75 RUInt16BE() = default;
76 explicit RUInt16BE(const std::uint16_t val) : fValBE(Swap(val)) {}
77 operator std::uint16_t() const { return Swap(fValBE); }
78 RUInt16BE &operator=(const std::uint16_t val)
79 {
80 fValBE = Swap(val);
81 return *this;
82 }
83};
84
85/// Big-endian 32-bit unsigned integer
86class RUInt32BE {
87private:
88 std::uint32_t fValBE = 0;
89 static std::uint32_t Swap(std::uint32_t val)
90 {
91#if R__LITTLE_ENDIAN == 1
92 return RByteSwap<sizeof(val)>::bswap(val);
93#else
94 return val;
95#endif
96 }
97
98public:
99 RUInt32BE() = default;
100 explicit RUInt32BE(const std::uint32_t val) : fValBE(Swap(val)) {}
101 operator std::uint32_t() const { return Swap(fValBE); }
102 RUInt32BE &operator=(const std::uint32_t val)
103 {
104 fValBE = Swap(val);
105 return *this;
106 }
107};
108
109/// Big-endian 32-bit signed integer
110class RInt32BE {
111private:
112 std::int32_t fValBE = 0;
113 static std::int32_t Swap(std::int32_t val)
114 {
115#if R__LITTLE_ENDIAN == 1
116 return RByteSwap<sizeof(val)>::bswap(val);
117#else
118 return val;
119#endif
120 }
121
122public:
123 RInt32BE() = default;
124 explicit RInt32BE(const std::int32_t val) : fValBE(Swap(val)) {}
125 operator std::int32_t() const { return Swap(fValBE); }
126 RInt32BE &operator=(const std::int32_t val)
127 {
128 fValBE = Swap(val);
129 return *this;
130 }
131};
132
133/// Big-endian 64-bit unsigned integer
134class RUInt64BE {
135private:
136 std::uint64_t fValBE = 0;
137 static std::uint64_t Swap(std::uint64_t val)
138 {
139#if R__LITTLE_ENDIAN == 1
140 return RByteSwap<sizeof(val)>::bswap(val);
141#else
142 return val;
143#endif
144 }
145
146public:
147 RUInt64BE() = default;
148 explicit RUInt64BE(const std::uint64_t val) : fValBE(Swap(val)) {}
149 operator std::uint64_t() const { return Swap(fValBE); }
150 RUInt64BE &operator=(const std::uint64_t val)
151 {
152 fValBE = Swap(val);
153 return *this;
154 }
155};
156
157#pragma pack(push, 1)
158/// A name (type, identifies, ...) in the TFile binary format
159struct RTFString {
160 unsigned char fLName{0};
161 char fData[255];
162 RTFString() = default;
163 RTFString(const std::string &str)
164 {
165 // The length of strings with 255 characters and longer are encoded with a 32-bit integer following the first
166 // byte. This is currently not handled.
167 R__ASSERT(str.length() < 255);
168 fLName = str.length();
169 memcpy(fData, str.data(), fLName);
170 }
171 std::size_t GetSize() const
172 {
173 // A length of 255 is special and means that the first byte is followed by a 32-bit integer with the actual
174 // length.
175 R__ASSERT(fLName != 255);
176 return 1 + fLName;
177 }
178};
179
180/// The timestamp format used in TFile; the default constructor initializes with the current time
181struct RTFDatetime {
182 RUInt32BE fDatetime;
183 RTFDatetime()
184 {
185 auto now = std::chrono::system_clock::now();
186 auto tt = std::chrono::system_clock::to_time_t(now);
187 auto tm = *localtime(&tt);
188 fDatetime = (tm.tm_year + 1900 - 1995) << 26 | (tm.tm_mon + 1) << 22 | tm.tm_mday << 17 | tm.tm_hour << 12 |
189 tm.tm_min << 6 | tm.tm_sec;
190 }
191 explicit RTFDatetime(RUInt32BE val) : fDatetime(val) {}
192};
193
194/// The key part of a TFile record excluding the class, object, and title names
195struct RTFKey {
196 RInt32BE fNbytes{0};
197 RUInt16BE fVersion{4};
198 RUInt32BE fObjLen{0};
199 RTFDatetime fDatetime;
200 RUInt16BE fKeyLen{0};
201 RUInt16BE fCycle{1};
202 union {
203 struct {
204 RUInt32BE fSeekKey{0};
205 RUInt32BE fSeekPdir{0};
206 } fInfoShort;
207 struct {
208 RUInt64BE fSeekKey{0};
209 RUInt64BE fSeekPdir{0};
210 } fInfoLong;
211 };
212
213 std::uint32_t fKeyHeaderSize{18 + sizeof(fInfoShort)}; // not part of serialization
214
215 RTFKey() : fInfoShort() {}
216 RTFKey(std::uint64_t seekKey, std::uint64_t seekPdir, const RTFString &clName, const RTFString &objName,
217 const RTFString &titleName, std::size_t szObjInMem, std::size_t szObjOnDisk = 0)
218 {
219 R__ASSERT(szObjInMem <= std::numeric_limits<std::uint32_t>::max());
220 R__ASSERT(szObjOnDisk <= std::numeric_limits<std::uint32_t>::max());
221 fObjLen = szObjInMem;
222 if ((seekKey > static_cast<unsigned int>(std::numeric_limits<std::int32_t>::max())) ||
223 (seekPdir > static_cast<unsigned int>(std::numeric_limits<std::int32_t>::max()))) {
224 fKeyHeaderSize = 18 + sizeof(fInfoLong);
225 fKeyLen = fKeyHeaderSize + clName.GetSize() + objName.GetSize() + titleName.GetSize();
226 fInfoLong.fSeekKey = seekKey;
227 fInfoLong.fSeekPdir = seekPdir;
228 fVersion = fVersion + 1000;
229 } else {
230 fKeyHeaderSize = 18 + sizeof(fInfoShort);
231 fKeyLen = fKeyHeaderSize + clName.GetSize() + objName.GetSize() + titleName.GetSize();
232 fInfoShort.fSeekKey = seekKey;
233 fInfoShort.fSeekPdir = seekPdir;
234 }
235 fNbytes = fKeyLen + ((szObjOnDisk == 0) ? szObjInMem : szObjOnDisk);
236 }
237
238 void MakeBigKey()
239 {
240 if (fVersion >= 1000)
241 return;
242 std::uint32_t seekKey = fInfoShort.fSeekKey;
243 std::uint32_t seekPdir = fInfoShort.fSeekPdir;
244 fInfoLong.fSeekKey = seekKey;
245 fInfoLong.fSeekPdir = seekPdir;
246 fKeyHeaderSize = fKeyHeaderSize + sizeof(fInfoLong) - sizeof(fInfoShort);
247 fKeyLen = fKeyLen + sizeof(fInfoLong) - sizeof(fInfoShort);
248 fNbytes = fNbytes + sizeof(fInfoLong) - sizeof(fInfoShort);
249 fVersion = fVersion + 1000;
250 }
251
252 std::uint32_t GetSize() const
253 {
254 // Negative size indicates a gap in the file
255 if (fNbytes < 0)
256 return -fNbytes;
257 return fNbytes;
258 }
259
260 std::uint32_t GetHeaderSize() const
261 {
262 if (fVersion >= 1000)
263 return 18 + sizeof(fInfoLong);
264 return 18 + sizeof(fInfoShort);
265 }
266
267 std::uint64_t GetSeekKey() const
268 {
269 if (fVersion >= 1000)
270 return fInfoLong.fSeekKey;
271 return fInfoShort.fSeekKey;
272 }
273};
274
275/// The TFile global header
276struct RTFHeader {
277 char fMagic[4]{'r', 'o', 'o', 't'};
278 RUInt32BE fVersion{(ROOT_VERSION_CODE >> 16) * 10000 + ((ROOT_VERSION_CODE & 0xFF00) >> 8) * 100 +
279 (ROOT_VERSION_CODE & 0xFF)};
280 RUInt32BE fBEGIN{100};
281 union {
282 struct {
283 RUInt32BE fEND{0};
284 RUInt32BE fSeekFree{0};
285 RUInt32BE fNbytesFree{0};
286 RUInt32BE fNfree{1};
287 RUInt32BE fNbytesName{0};
288 unsigned char fUnits{4};
289 RUInt32BE fCompress{0};
290 RUInt32BE fSeekInfo{0};
291 RUInt32BE fNbytesInfo{0};
292 } fInfoShort;
293 struct {
294 RUInt64BE fEND{0};
295 RUInt64BE fSeekFree{0};
296 RUInt32BE fNbytesFree{0};
297 RUInt32BE fNfree{1};
298 RUInt32BE fNbytesName{0};
299 unsigned char fUnits{8};
300 RUInt32BE fCompress{0};
301 RUInt64BE fSeekInfo{0};
302 RUInt32BE fNbytesInfo{0};
303 } fInfoLong;
304 };
305
306 RTFHeader() : fInfoShort() {}
307 RTFHeader(int compression) : fInfoShort() { fInfoShort.fCompress = compression; }
308
309 void SetBigFile()
310 {
311 if (fVersion >= 1000000)
312 return;
313
314 // clang-format off
315 std::uint32_t end = fInfoShort.fEND;
316 std::uint32_t seekFree = fInfoShort.fSeekFree;
317 std::uint32_t nbytesFree = fInfoShort.fNbytesFree;
318 std::uint32_t nFree = fInfoShort.fNfree;
319 std::uint32_t nbytesName = fInfoShort.fNbytesName;
320 std::uint32_t compress = fInfoShort.fCompress;
321 std::uint32_t seekInfo = fInfoShort.fSeekInfo;
322 std::uint32_t nbytesInfo = fInfoShort.fNbytesInfo;
323 fInfoLong.fEND = end;
324 fInfoLong.fSeekFree = seekFree;
325 fInfoLong.fNbytesFree = nbytesFree;
326 fInfoLong.fNfree = nFree;
327 fInfoLong.fNbytesName = nbytesName;
328 fInfoLong.fUnits = 8;
329 fInfoLong.fCompress = compress;
330 fInfoLong.fSeekInfo = seekInfo;
331 fInfoLong.fNbytesInfo = nbytesInfo;
332 fVersion = fVersion + 1000000;
333 // clang-format on
334 }
335
336 bool IsBigFile(std::uint64_t offset = 0) const
337 {
338 return (fVersion >= 1000000) || (offset > static_cast<unsigned int>(std::numeric_limits<std::int32_t>::max()));
339 }
340
341 std::uint32_t GetSize() const
342 {
343 std::uint32_t sizeHead = 4 + sizeof(fVersion) + sizeof(fBEGIN);
344 if (IsBigFile())
345 return sizeHead + sizeof(fInfoLong);
346 return sizeHead + sizeof(fInfoShort);
347 }
348
349 std::uint64_t GetEnd() const
350 {
351 if (IsBigFile())
352 return fInfoLong.fEND;
353 return fInfoShort.fEND;
354 }
355
356 void SetEnd(std::uint64_t value)
357 {
358 if (IsBigFile(value)) {
359 SetBigFile();
360 fInfoLong.fEND = value;
361 } else {
362 fInfoShort.fEND = value;
363 }
364 }
365
366 std::uint64_t GetSeekFree() const
367 {
368 if (IsBigFile())
369 return fInfoLong.fSeekFree;
370 return fInfoShort.fSeekFree;
371 }
372
373 void SetSeekFree(std::uint64_t value)
374 {
375 if (IsBigFile(value)) {
376 SetBigFile();
377 fInfoLong.fSeekFree = value;
378 } else {
379 fInfoShort.fSeekFree = value;
380 }
381 }
382
383 void SetNbytesFree(std::uint32_t value)
384 {
385 if (IsBigFile()) {
386 fInfoLong.fNbytesFree = value;
387 } else {
388 fInfoShort.fNbytesFree = value;
389 }
390 }
391
392 void SetNbytesName(std::uint32_t value)
393 {
394 if (IsBigFile()) {
395 fInfoLong.fNbytesName = value;
396 } else {
397 fInfoShort.fNbytesName = value;
398 }
399 }
400
401 std::uint64_t GetSeekInfo() const
402 {
403 if (IsBigFile())
404 return fInfoLong.fSeekInfo;
405 return fInfoShort.fSeekInfo;
406 }
407
408 void SetSeekInfo(std::uint64_t value)
409 {
410 if (IsBigFile(value)) {
411 SetBigFile();
412 fInfoLong.fSeekInfo = value;
413 } else {
414 fInfoShort.fSeekInfo = value;
415 }
416 }
417
418 void SetNbytesInfo(std::uint32_t value)
419 {
420 if (IsBigFile()) {
421 fInfoLong.fNbytesInfo = value;
422 } else {
423 fInfoShort.fNbytesInfo = value;
424 }
425 }
426
427 void SetCompression(std::uint32_t value)
428 {
429 if (IsBigFile()) {
430 fInfoLong.fCompress = value;
431 } else {
432 fInfoShort.fCompress = value;
433 }
434 }
435};
436
437/// A reference to an unused byte-range in a TFile
438struct RTFFreeEntry {
439 RUInt16BE fVersion{1};
440 union {
441 struct {
442 RUInt32BE fFirst{0};
443 RUInt32BE fLast{0};
444 } fInfoShort;
445 struct {
446 RUInt64BE fFirst{0};
447 RUInt64BE fLast{0};
448 } fInfoLong;
449 };
450
451 RTFFreeEntry() : fInfoShort() {}
452 void Set(std::uint64_t first, std::uint64_t last)
453 {
454 if (last > static_cast<unsigned int>(std::numeric_limits<std::int32_t>::max())) {
455 fVersion = fVersion + 1000;
456 fInfoLong.fFirst = first;
457 fInfoLong.fLast = last;
458 } else {
459 fInfoShort.fFirst = first;
460 fInfoShort.fLast = last;
461 }
462 }
463 std::uint32_t GetSize() { return (fVersion >= 1000) ? 18 : 10; }
464};
465
466/// The header of the directory key index
467struct RTFKeyList {
468 RUInt32BE fNKeys;
469 std::uint32_t GetSize() const { return sizeof(RTFKeyList); }
470 explicit RTFKeyList(std::uint32_t nKeys) : fNKeys(nKeys) {}
471};
472
473/// A streamed TDirectory (TFile) object
474struct RTFDirectory {
475 RUInt16BE fClassVersion{5};
476 RTFDatetime fDateC;
477 RTFDatetime fDateM;
478 RUInt32BE fNBytesKeys{0};
479 RUInt32BE fNBytesName{0};
480 // The version of the key has to tell whether offsets are 32bit or 64bit long
481 union {
482 struct {
483 RUInt32BE fSeekDir{100};
484 RUInt32BE fSeekParent{0};
485 RUInt32BE fSeekKeys{0};
486 } fInfoShort;
487 struct {
488 RUInt64BE fSeekDir{100};
489 RUInt64BE fSeekParent{0};
490 RUInt64BE fSeekKeys{0};
491 } fInfoLong;
492 };
493
494 RTFDirectory() : fInfoShort() {}
495
496 // In case of a short TFile record (<2G), 3 padding ints are written after the UUID
497 std::uint32_t GetSize() const
498 {
499 if (fClassVersion >= 1000)
500 return sizeof(RTFDirectory);
501 return 18 + sizeof(fInfoShort);
502 }
503
504 std::uint64_t GetSeekKeys() const
505 {
506 if (fClassVersion >= 1000)
507 return fInfoLong.fSeekKeys;
508 return fInfoShort.fSeekKeys;
509 }
510
511 void SetSeekKeys(std::uint64_t seekKeys)
512 {
513 if (seekKeys > static_cast<unsigned int>(std::numeric_limits<std::int32_t>::max())) {
514 std::uint32_t seekDir = fInfoShort.fSeekDir;
515 std::uint32_t seekParent = fInfoShort.fSeekParent;
516 fInfoLong.fSeekDir = seekDir;
517 fInfoLong.fSeekParent = seekParent;
518 fInfoLong.fSeekKeys = seekKeys;
519 fClassVersion = fClassVersion + 1000;
520 } else {
521 fInfoShort.fSeekKeys = seekKeys;
522 }
523 }
524};
525
526/// A zero UUID stored at the end of the TFile record
527struct RTFUUID {
528 RUInt16BE fVersionClass{1};
529 unsigned char fUUID[16] = {0};
530
531 RTFUUID() = default;
532 std::uint32_t GetSize() const { return sizeof(RTFUUID); }
533};
534
535/// A streamed RNTuple class
536///
537/// NOTE: this must be kept in sync with RNTuple.hxx.
538/// Aside ensuring consistency between the two classes' members, you need to make sure
539/// that fVersionClass matches the class version of RNTuple.
540struct RTFNTuple {
541 RUInt32BE fByteCount{0x40000000 | (sizeof(RTFNTuple) - sizeof(fByteCount))};
542 RUInt16BE fVersionClass{2};
543 RUInt16BE fVersionEpoch{0};
544 RUInt16BE fVersionMajor{0};
545 RUInt16BE fVersionMinor{0};
546 RUInt16BE fVersionPatch{0};
547 RUInt64BE fSeekHeader{0};
548 RUInt64BE fNBytesHeader{0};
549 RUInt64BE fLenHeader{0};
550 RUInt64BE fSeekFooter{0};
551 RUInt64BE fNBytesFooter{0};
552 RUInt64BE fLenFooter{0};
553 RUInt64BE fMaxKeySize{0};
554
555 static constexpr std::uint32_t GetSizePlusChecksum() { return sizeof(RTFNTuple) + sizeof(std::uint64_t); }
556
557 RTFNTuple() = default;
558 explicit RTFNTuple(const ROOT::RNTuple &inMemoryAnchor)
559 {
560 fVersionEpoch = inMemoryAnchor.GetVersionEpoch();
561 fVersionMajor = inMemoryAnchor.GetVersionMajor();
562 fVersionMinor = inMemoryAnchor.GetVersionMinor();
563 fVersionPatch = inMemoryAnchor.GetVersionPatch();
564 fSeekHeader = inMemoryAnchor.GetSeekHeader();
565 fNBytesHeader = inMemoryAnchor.GetNBytesHeader();
566 fLenHeader = inMemoryAnchor.GetLenHeader();
567 fSeekFooter = inMemoryAnchor.GetSeekFooter();
568 fNBytesFooter = inMemoryAnchor.GetNBytesFooter();
569 fLenFooter = inMemoryAnchor.GetLenFooter();
570 fMaxKeySize = inMemoryAnchor.GetMaxKeySize();
571 }
572 std::uint32_t GetSize() const { return sizeof(RTFNTuple); }
573 // The byte count and class version members are not checksummed
574 std::uint32_t GetOffsetCkData() { return sizeof(fByteCount) + sizeof(fVersionClass); }
575 std::uint32_t GetSizeCkData() { return GetSize() - GetOffsetCkData(); }
576 unsigned char *GetPtrCkData() { return reinterpret_cast<unsigned char *>(this) + GetOffsetCkData(); }
577};
578
579/// The bare file global header
580struct RBareFileHeader {
581 char fMagic[7]{'r', 'n', 't', 'u', 'p', 'l', 'e'};
582 RUInt32BE fRootVersion{(ROOT_VERSION_CODE >> 16) * 10000 + ((ROOT_VERSION_CODE & 0xFF00) >> 8) * 100 +
583 (ROOT_VERSION_CODE & 0xFF)};
584 RUInt32BE fFormatVersion{1};
585 RUInt32BE fCompress{0};
586 RTFNTuple fNTuple;
587 // followed by the ntuple name
588};
589#pragma pack(pop)
590
591/// The artifical class name shown for opaque RNTuple keys (see TBasket)
592constexpr char const *kBlobClassName = "RBlob";
593/// The class name of the RNTuple anchor
594constexpr char const *kNTupleClassName = "ROOT::RNTuple";
595
596} // anonymous namespace
597
598namespace ROOT {
599namespace Experimental {
600namespace Internal {
601/// If a TFile container is written by a C stream (simple file), on dataset commit, the file header
602/// and the TFile record need to be updated
604 RTFHeader fHeader;
605 RTFDirectory fFileRecord;
606 std::uint64_t fSeekNTuple{0}; // Remember the offset for the keys list
607 std::uint64_t fSeekFileRecord{0};
608};
609
610/// The RKeyBlob writes an invisible key into a TFile. That is, a key that is not indexed in the list of keys,
611/// like a TBasket.
612/// NOTE: out of anonymous namespace because otherwise ClassDefInline fails to compile
613/// on some platforms.
614class RKeyBlob : public TKey {
615public:
616 RKeyBlob() = default;
617
618 explicit RKeyBlob(TFile *file) : TKey(file)
619 {
621 fVersion += 1000;
622 fKeylen = Sizeof();
623 }
624
625 /// Register a new key for a data record of size nbytes
626 void Reserve(size_t nbytes, std::uint64_t *seekKey)
627 {
628 Create(nbytes);
629 *seekKey = fSeekKey;
630 }
631
633};
634
635} // namespace Internal
636} // namespace Experimental
637} // namespace ROOT
638
639// Computes how many chunks do we need to fit `nbytes` of payload, considering that the
640// first chunk also needs to house the offsets of the other chunks and no chunk can
641// be bigger than `maxChunkSize`. When saved to a TFile, each chunk is part of a separate TKey.
642static size_t ComputeNumChunks(size_t nbytes, size_t maxChunkSize)
643{
644 constexpr size_t kChunkOffsetSize = sizeof(std::uint64_t);
645
647 size_t nChunks = (nbytes + maxChunkSize - 1) / maxChunkSize;
648 assert(nChunks > 1);
649 size_t nbytesTail = nbytes % maxChunkSize;
650 size_t nbytesExtra = (nbytesTail > 0) * (maxChunkSize - nbytesTail);
653 ++nChunks;
655 }
656
657 // We don't support having more chunkOffsets than what fits in one chunk.
658 // For a reasonable-sized maxKeySize it looks very unlikely that we can have more chunks
659 // than we can fit in the first `maxKeySize` bytes. E.g. for maxKeySize = 1GiB we can fit
660 // 134217728 chunk offsets, making our multi-key blob's capacity exactly 128 PiB.
662
663 return nChunks;
664}
665
667
670{
671 char ident[4];
672 ReadBuffer(ident, 4, 0);
673 if (std::string(ident, 4) == "root")
674 return GetNTupleProper(ntupleName);
675 fIsBare = true;
676 return GetNTupleBare(ntupleName);
677}
678
679/// Searches for a key with the given name and type in the key index of the given directory.
680/// Return 0 if the key was not found.
682 std::string_view keyName,
683 std::string_view typeName)
684{
685 RTFDirectory directory;
687
688 RTFKey key;
689 RUInt32BE nKeys;
690 std::uint64_t offset = directory.GetSeekKeys();
691 ReadBuffer(&key, sizeof(key), offset);
692 offset += key.fKeyLen;
693 ReadBuffer(&nKeys, sizeof(nKeys), offset);
694 offset += sizeof(nKeys);
695
696 for (unsigned int i = 0; i < nKeys; ++i) {
697 ReadBuffer(&key, sizeof(key), offset);
698 auto offsetNextKey = offset + key.fKeyLen;
699
700 offset += key.GetHeaderSize();
701 RTFString name;
702 ReadBuffer(&name, 1, offset);
703 ReadBuffer(&name, name.GetSize(), offset);
704 if (std::string_view(name.fData, name.fLName) != typeName) {
706 continue;
707 }
708 offset += name.GetSize();
709 ReadBuffer(&name, 1, offset);
710 ReadBuffer(&name, name.GetSize(), offset);
711 if (std::string_view(name.fData, name.fLName) == keyName) {
712 return key.GetSeekKey();
713 }
715 }
716
717 // Not found
718 return 0;
719}
720
723{
724 RTFHeader fileHeader;
725 ReadBuffer(&fileHeader, sizeof(fileHeader), 0);
726
727 RTFKey key;
728 RTFString name;
729 ReadBuffer(&key, sizeof(key), fileHeader.fBEGIN);
730 // Skip over the entire key length, including the class name, object name, and title stored in it.
731 std::uint64_t offset = fileHeader.fBEGIN + key.fKeyLen;
732 // Skip over the name and title of the TNamed preceding the TFile (root TDirectory) entry.
733 ReadBuffer(&name, 1, offset);
734 offset += name.GetSize();
735 ReadBuffer(&name, 1, offset);
736 offset += name.GetSize();
737
738 // split ntupleName by '/' character to open datasets in subdirectories.
739 std::string ntuplePathTail(ntuplePath);
740 if (!ntuplePathTail.empty() && ntuplePathTail[0] == '/')
741 ntuplePathTail = ntuplePathTail.substr(1);
742 auto pos = std::string::npos;
743 while ((pos = ntuplePathTail.find('/')) != std::string::npos) {
744 auto directoryName = ntuplePathTail.substr(0, pos);
745 ntuplePathTail.erase(0, pos + 1);
746
747 offset = SearchInDirectory(offset, directoryName, "TDirectory");
748 if (offset == 0) {
749 return R__FAIL("no directory named '" + std::string(directoryName) + "' in file '" + fRawFile->GetUrl() + "'");
750 }
751 ReadBuffer(&key, sizeof(key), offset);
752 offset = key.GetSeekKey() + key.fKeyLen;
753 }
754 // no more '/' delimiter in ntuplePath
756
757 offset = SearchInDirectory(offset, ntupleName, kNTupleClassName);
758 if (offset == 0) {
759 return R__FAIL("no RNTuple named '" + std::string(ntupleName) + "' in file '" + fRawFile->GetUrl() + "'");
760 }
761
762 ReadBuffer(&key, sizeof(key), offset);
763 offset = key.GetSeekKey() + key.fKeyLen;
764
765 // size of a RTFNTuple version 2 (min supported version); future anchor versions can grow.
766 constexpr size_t kMinNTupleSize = 78;
767 static_assert(kMinNTupleSize == RTFNTuple::GetSizePlusChecksum());
768 if (key.fObjLen < kMinNTupleSize) {
769 return R__FAIL("invalid anchor size: " + std::to_string(key.fObjLen) + " < " + std::to_string(sizeof(RTFNTuple)));
770 }
771 // The object length can be smaller than the size of RTFNTuple if it comes from a past RNTuple class version,
772 // or larger than it if it comes from a future RNTuple class version.
773 auto bufAnchor = std::make_unique<unsigned char[]>(std::max<size_t>(key.fObjLen, sizeof(RTFNTuple)));
774 RTFNTuple *ntuple = new (bufAnchor.get()) RTFNTuple;
775
776 auto objNbytes = key.GetSize() - key.fKeyLen;
778 if (objNbytes != key.fObjLen) {
780 decompressor.Unzip(bufAnchor.get(), objNbytes, key.fObjLen);
781 }
782
783 // We require that future class versions only append members and store the checksum in the last 8 bytes
784 // Checksum calculation: strip byte count, class version, fChecksum member
785 auto lenCkData = key.fObjLen - ntuple->GetOffsetCkData() - sizeof(uint64_t);
786 auto ckCalc = XXH3_64bits(ntuple->GetPtrCkData(), lenCkData);
787 uint64_t ckOnDisk;
788
789 RUInt64BE *ckOnDiskPtr = reinterpret_cast<RUInt64BE *>(bufAnchor.get() + key.fObjLen - sizeof(uint64_t));
790 ckOnDisk = static_cast<uint64_t>(*ckOnDiskPtr);
791 if (ckCalc != ckOnDisk) {
792 return R__FAIL("RNTuple anchor checksum mismatch");
793 }
794
795 return CreateAnchor(ntuple->fVersionEpoch, ntuple->fVersionMajor, ntuple->fVersionMinor, ntuple->fVersionPatch,
796 ntuple->fSeekHeader, ntuple->fNBytesHeader, ntuple->fLenHeader, ntuple->fSeekFooter,
797 ntuple->fNBytesFooter, ntuple->fLenFooter, ntuple->fMaxKeySize);
798}
799
802{
803 RBareFileHeader fileHeader;
804 ReadBuffer(&fileHeader, sizeof(fileHeader), 0);
805 RTFString name;
806 auto offset = sizeof(fileHeader);
807 ReadBuffer(&name, 1, offset);
808 ReadBuffer(&name, name.GetSize(), offset);
809 std::string_view foundName(name.fData, name.fLName);
810 if (foundName != ntupleName) {
811 return R__FAIL("expected RNTuple named '" + std::string(ntupleName) + "' but instead found '" +
812 std::string(foundName) + "' in file '" + fRawFile->GetUrl() + "'");
813 }
814 offset += name.GetSize();
815
816 RTFNTuple ntuple;
817 ReadBuffer(&ntuple, sizeof(ntuple), offset);
818 std::uint64_t onDiskChecksum;
820 auto checksum = XXH3_64bits(ntuple.GetPtrCkData(), ntuple.GetSizeCkData());
821 if (checksum != static_cast<uint64_t>(onDiskChecksum))
822 return R__FAIL("RNTuple bare file: anchor checksum mismatch");
823
824 return CreateAnchor(ntuple.fVersionEpoch, ntuple.fVersionMajor, ntuple.fVersionMinor, ntuple.fVersionPatch,
825 ntuple.fSeekHeader, ntuple.fNBytesHeader, ntuple.fLenHeader, ntuple.fSeekFooter,
826 ntuple.fNBytesFooter, ntuple.fLenFooter, ntuple.fMaxKeySize);
827}
828
830{
831 size_t nread;
832 if (fMaxKeySize == 0 || nbytes <= fMaxKeySize) {
833 // Fast path: read single blob
834 nread = fRawFile->ReadAt(buffer, nbytes, offset);
835 } else {
836 // Read chunked blob. See RNTupleFileWriter::WriteBlob() for details.
837 const size_t nChunks = ComputeNumChunks(nbytes, fMaxKeySize);
838 const size_t nbytesChunkOffsets = (nChunks - 1) * sizeof(std::uint64_t);
839 const size_t nbytesFirstChunk = fMaxKeySize - nbytesChunkOffsets;
840 uint8_t *bufCur = reinterpret_cast<uint8_t *>(buffer);
841
842 // Read first chunk
843 nread = fRawFile->ReadAt(bufCur, fMaxKeySize, offset);
844 R__ASSERT(nread == fMaxKeySize);
845 // NOTE: we read the entire chunk in `bufCur`, but we only advance the pointer by `nbytesFirstChunk`,
846 // since the last part of `bufCur` will later be overwritten by the next chunk's payload.
847 // We do this to avoid a second ReadAt to read in the chunk offsets.
850
851 const auto chunkOffsets = std::make_unique<std::uint64_t[]>(nChunks - 1);
853
855 std::uint64_t *curChunkOffset = &chunkOffsets[0];
856
857 do {
858 std::uint64_t chunkOffset;
861
862 const size_t bytesToRead = std::min<size_t>(fMaxKeySize, remainingBytes);
863 // Ensure we don't read outside of the buffer
864 R__ASSERT(static_cast<size_t>(bufCur - reinterpret_cast<uint8_t *>(buffer)) <= nbytes - bytesToRead);
865
866 auto nbytesRead = fRawFile->ReadAt(bufCur, bytesToRead, chunkOffset);
868
872 } while (remainingBytes > 0);
873 }
875}
876
877////////////////////////////////////////////////////////////////////////////////
878
880
882{
883 static_assert(kHeaderBlockSize % kBlockAlign == 0, "invalid header block size");
884 if (bufferSize % kBlockAlign != 0)
885 throw RException(R__FAIL("Buffer size not a multiple of alignment: " + std::to_string(bufferSize)));
886 fBlockSize = bufferSize;
887
888 std::align_val_t blockAlign{kBlockAlign};
889 fHeaderBlock = static_cast<unsigned char *>(::operator new[](kHeaderBlockSize, blockAlign));
890 memset(fHeaderBlock, 0, kHeaderBlockSize);
891 fBlock = static_cast<unsigned char *>(::operator new[](fBlockSize, blockAlign));
892 memset(fBlock, 0, fBlockSize);
893}
894
896{
897 if (fFile)
898 fclose(fFile);
899
900 std::align_val_t blockAlign{kBlockAlign};
901 if (fHeaderBlock)
902 ::operator delete[](fHeaderBlock, blockAlign);
903 if (fBlock)
904 ::operator delete[](fBlock, blockAlign);
905}
906
907namespace {
908int FSeek64(FILE *stream, std::int64_t offset, int origin)
909{
910#ifdef R__SEEK64
911 return fseeko64(stream, offset, origin);
912#else
913 return fseek(stream, offset, origin);
914#endif
915}
916} // namespace
917
919{
920 // Write the last partially filled block, which may still need appropriate alignment for Direct I/O.
921 // If it is the first block, get the updated header block.
922 if (fBlockOffset == 0) {
923 std::size_t headerBlockSize = kHeaderBlockSize;
924 if (headerBlockSize > fFilePos) {
925 headerBlockSize = fFilePos;
926 }
927 memcpy(fBlock, fHeaderBlock, headerBlockSize);
928 }
929
930 std::size_t retval = FSeek64(fFile, fBlockOffset, SEEK_SET);
931 if (retval)
932 throw RException(R__FAIL(std::string("Seek failed: ") + strerror(errno)));
933
934 std::size_t lastBlockSize = fFilePos - fBlockOffset;
935 R__ASSERT(lastBlockSize <= fBlockSize);
936 if (fDirectIO) {
937 // Round up to a multiple of kBlockAlign.
938 lastBlockSize += kBlockAlign - 1;
939 lastBlockSize = (lastBlockSize / kBlockAlign) * kBlockAlign;
940 R__ASSERT(lastBlockSize <= fBlockSize);
941 }
942 retval = fwrite(fBlock, 1, lastBlockSize, fFile);
943 if (retval != lastBlockSize)
944 throw RException(R__FAIL(std::string("write failed: ") + strerror(errno)));
945
946 // Write the (updated) header block, unless it was part of the write above.
947 if (fBlockOffset > 0) {
948 retval = FSeek64(fFile, 0, SEEK_SET);
949 if (retval)
950 throw RException(R__FAIL(std::string("Seek failed: ") + strerror(errno)));
951
952 retval = fwrite(fHeaderBlock, 1, kHeaderBlockSize, fFile);
953 if (retval != RFileSimple::kHeaderBlockSize)
954 throw RException(R__FAIL(std::string("write failed: ") + strerror(errno)));
955 }
956
957 retval = fflush(fFile);
958 if (retval)
959 throw RException(R__FAIL(std::string("Flush failed: ") + strerror(errno)));
960}
961
963 std::int64_t offset)
964{
965 R__ASSERT(fFile);
966 size_t retval;
967 if ((offset >= 0) && (static_cast<std::uint64_t>(offset) != fFilePos)) {
968 fFilePos = offset;
969 }
970
971 // Keep header block to overwrite on commit.
972 if (fFilePos < kHeaderBlockSize) {
973 std::size_t headerBytes = nbytes;
974 if (fFilePos + headerBytes > kHeaderBlockSize) {
975 headerBytes = kHeaderBlockSize - fFilePos;
976 }
977 memcpy(fHeaderBlock + fFilePos, buffer, headerBytes);
978 }
979
980 R__ASSERT(fFilePos >= fBlockOffset);
981
982 while (nbytes > 0) {
983 std::uint64_t posInBlock = fFilePos % fBlockSize;
984 std::uint64_t blockOffset = fFilePos - posInBlock;
985 if (blockOffset != fBlockOffset) {
986 // Write the block.
987 retval = FSeek64(fFile, fBlockOffset, SEEK_SET);
988 if (retval)
989 throw RException(R__FAIL(std::string("Seek failed: ") + strerror(errno)));
990
991 retval = fwrite(fBlock, 1, fBlockSize, fFile);
992 if (retval != fBlockSize)
993 throw RException(R__FAIL(std::string("write failed: ") + strerror(errno)));
994
995 // Null the buffer contents for good measure.
996 memset(fBlock, 0, fBlockSize);
997 }
998
999 fBlockOffset = blockOffset;
1000 std::size_t blockSize = nbytes;
1001 if (blockSize > fBlockSize - posInBlock) {
1002 blockSize = fBlockSize - posInBlock;
1003 }
1004 memcpy(fBlock + posInBlock, buffer, blockSize);
1005 buffer = static_cast<const unsigned char *>(buffer) + blockSize;
1006 nbytes -= blockSize;
1007 fFilePos += blockSize;
1008 }
1009}
1010
1012 const void *buffer, std::size_t nbytes, std::size_t len, std::int64_t offset, std::uint64_t directoryOffset,
1013 const std::string &className, const std::string &objectName, const std::string &title)
1014{
1015 if (offset > 0)
1016 fKeyOffset = offset;
1017 RTFString strClass{className};
1018 RTFString strObject{objectName};
1019 RTFString strTitle{title};
1020
1021 RTFKey key(fKeyOffset, directoryOffset, strClass, strObject, strTitle, len, nbytes);
1022 Write(&key, key.fKeyHeaderSize, fKeyOffset);
1023 Write(&strClass, strClass.GetSize());
1024 Write(&strObject, strObject.GetSize());
1025 Write(&strTitle, strTitle.GetSize());
1026 auto offsetData = fFilePos;
1027 // The next key starts after the data.
1028 fKeyOffset = offsetData + nbytes;
1029 if (buffer)
1030 Write(buffer, nbytes);
1031
1032 return offsetData;
1033}
1034
1035////////////////////////////////////////////////////////////////////////////////
1036
1038 std::int64_t offset)
1039{
1040 fDirectory->GetFile()->Seek(offset);
1041 bool rv = fDirectory->GetFile()->WriteBuffer((char *)(buffer), nbytes);
1042 if (rv)
1043 throw RException(R__FAIL("WriteBuffer failed."));
1044}
1045
1046std::uint64_t
1048{
1049 std::uint64_t offsetKey;
1050 RKeyBlob keyBlob(fDirectory->GetFile());
1051 // Since it is unknown beforehand if offsetKey is beyond the 2GB limit or not,
1052 // RKeyBlob will always reserve space for a big key (version >= 1000)
1053 keyBlob.Reserve(nbytes, &offsetKey);
1054
1055 auto offset = offsetKey;
1056 RTFString strClass{kBlobClassName};
1057 RTFString strObject;
1058 RTFString strTitle;
1060 // Follow the fact that RKeyBlob is a big key unconditionally (see above)
1061 keyHeader.MakeBigKey();
1062
1063 Write(&keyHeader, keyHeader.fKeyHeaderSize, offset);
1064 offset += keyHeader.fKeyHeaderSize;
1065 Write(&strClass, strClass.GetSize(), offset);
1066 offset += strClass.GetSize();
1067 Write(&strObject, strObject.GetSize(), offset);
1068 offset += strObject.GetSize();
1069 Write(&strTitle, strTitle.GetSize(), offset);
1070 offset += strTitle.GetSize();
1071 auto offsetData = offset;
1072 if (buffer)
1073 Write(buffer, nbytes, offset);
1074
1075 return offsetData;
1076}
1077
1078////////////////////////////////////////////////////////////////////////////////
1079
1081 : fNTupleName(name)
1082{
1083 fFileSimple.fControlBlock = std::make_unique<ROOT::Experimental::Internal::RTFileControlBlock>();
1085 auto infoRNTuple = RNTuple::Class()->GetStreamerInfo();
1087}
1088
1090
1091std::unique_ptr<ROOT::Experimental::Internal::RNTupleFileWriter>
1094 const RNTupleWriteOptions &options)
1095{
1096 std::string fileName(path);
1097 size_t idxDirSep = fileName.find_last_of("\\/");
1098 if (idxDirSep != std::string::npos) {
1099 fileName.erase(0, idxDirSep + 1);
1100 }
1101#ifdef R__LINUX
1102 int flags = O_WRONLY | O_CREAT | O_TRUNC;
1103#ifdef O_LARGEFILE
1104 // Add the equivalent flag that is passed by fopen64.
1105 flags |= O_LARGEFILE;
1106#endif
1107 if (options.GetUseDirectIO()) {
1108 flags |= O_DIRECT;
1109 }
1110 int fd = open(std::string(path).c_str(), flags, 0666);
1111 FILE *fileStream = fdopen(fd, "wb");
1112#else
1113#ifdef R__SEEK64
1114 FILE *fileStream = fopen64(std::string(path.data(), path.size()).c_str(), "wb");
1115#else
1116 FILE *fileStream = fopen(std::string(path.data(), path.size()).c_str(), "wb");
1117#endif
1118#endif
1120 // RNTupleFileWriter::RFileSimple does its own buffering, turn off additional buffering from C stdio.
1121 std::setvbuf(fileStream, nullptr, _IONBF, 0);
1122
1123 auto writer = std::unique_ptr<RNTupleFileWriter>(new RNTupleFileWriter(ntupleName, options.GetMaxKeySize()));
1124 writer->fFileSimple.fFile = fileStream;
1125 writer->fFileSimple.fDirectIO = options.GetUseDirectIO();
1126 writer->fFileSimple.AllocateBuffers(options.GetWriteBufferSize());
1127 writer->fFileName = fileName;
1128
1129 int defaultCompression = options.GetCompression();
1130 switch (containerFormat) {
1131 case EContainerFormat::kTFile: writer->WriteTFileSkeleton(defaultCompression); break;
1132 case EContainerFormat::kBare:
1133 writer->fIsBare = true;
1134 writer->WriteBareFileSkeleton(defaultCompression);
1135 break;
1136 default: R__ASSERT(false && "Internal error: unhandled container format");
1137 }
1138
1139 return writer;
1140}
1141
1142std::unique_ptr<ROOT::Experimental::Internal::RNTupleFileWriter>
1144 std::uint64_t maxKeySize)
1145{
1146 TFile *file = fileOrDirectory.GetFile();
1147 if (!file)
1148 throw RException(R__FAIL("invalid attempt to add an RNTuple to a directory that is not backed by a file"));
1149 assert(file->IsBinary());
1150
1151 auto writer = std::unique_ptr<RNTupleFileWriter>(new RNTupleFileWriter(ntupleName, maxKeySize));
1152 writer->fFileProper.fDirectory = &fileOrDirectory;
1153 return writer;
1154}
1155
1161
1163{
1164 if (fFileProper) {
1165 // Easy case, the ROOT file header and the RNTuple streaming is taken care of by TFile
1166 fFileProper.fDirectory->WriteObject(&fNTupleAnchor, fNTupleName.c_str());
1167
1168 // Make sure the streamer info records used in the RNTuple are written to the file
1170 buf.SetParent(fFileProper.fDirectory->GetFile());
1171 for (auto [_, info] : fStreamerInfoMap)
1172 buf.TagStreamerInfo(info);
1173
1174 fFileProper.fDirectory->GetFile()->Write();
1175 return;
1176 }
1177
1178 // Writing by C file stream: prepare the container format header and stream the RNTuple anchor object
1179 R__ASSERT(fFileSimple);
1180
1181 if (fIsBare) {
1182 RTFNTuple ntupleOnDisk(fNTupleAnchor);
1183 // Compute the checksum
1184 std::uint64_t checksum = XXH3_64bits(ntupleOnDisk.GetPtrCkData(), ntupleOnDisk.GetSizeCkData());
1185 memcpy(fFileSimple.fHeaderBlock + fFileSimple.fControlBlock->fSeekNTuple, &ntupleOnDisk, ntupleOnDisk.GetSize());
1186 memcpy(fFileSimple.fHeaderBlock + fFileSimple.fControlBlock->fSeekNTuple + ntupleOnDisk.GetSize(), &checksum,
1187 sizeof(checksum));
1188 fFileSimple.Flush();
1189 return;
1190 }
1191
1192 WriteTFileNTupleKey();
1193 WriteTFileKeysList();
1194 WriteTFileStreamerInfo();
1195 WriteTFileFreeList();
1196
1197 // Update header and TFile record
1198 memcpy(fFileSimple.fHeaderBlock, &fFileSimple.fControlBlock->fHeader, fFileSimple.fControlBlock->fHeader.GetSize());
1199 R__ASSERT(fFileSimple.fControlBlock->fSeekFileRecord + fFileSimple.fControlBlock->fFileRecord.GetSize() <
1200 RFileSimple::kHeaderBlockSize);
1201 memcpy(fFileSimple.fHeaderBlock + fFileSimple.fControlBlock->fSeekFileRecord,
1202 &fFileSimple.fControlBlock->fFileRecord, fFileSimple.fControlBlock->fFileRecord.GetSize());
1203
1204 fFileSimple.Flush();
1205}
1206
1208{
1209 auto writeKey = [this](const void *payload, size_t nBytes, size_t length) {
1210 std::uint64_t offset;
1211 if (fFileSimple) {
1212 if (fIsBare) {
1213 offset = fFileSimple.fKeyOffset;
1214 fFileSimple.Write(payload, nBytes);
1215 fFileSimple.fKeyOffset += nBytes;
1216 } else {
1217 offset = fFileSimple.WriteKey(payload, nBytes, length, -1, 100, kBlobClassName);
1218 }
1219 } else {
1220 offset = fFileProper.WriteKey(payload, nBytes, length);
1221 }
1222 return offset;
1223 };
1224
1225 const std::uint64_t maxKeySize = fNTupleAnchor.fMaxKeySize;
1226 R__ASSERT(maxKeySize > 0);
1227 // We don't need the object length except for seeing compression ratios in TFile::Map()
1228 // Make sure that the on-disk object length fits into the TKey header.
1229 if (static_cast<std::uint64_t>(len) > static_cast<std::uint64_t>(std::numeric_limits<std::uint32_t>::max()))
1230 len = nbytes;
1231
1232 if (nbytes <= maxKeySize) {
1233 // Fast path: only write 1 key.
1234 return writeKey(data, nbytes, len);
1235 }
1236
1237 /**
1238 * Writing a key bigger than the max allowed size. In this case we split the payload
1239 * into multiple keys, reserving the end of the first key payload for pointers to the
1240 * next ones. E.g. if a key needs to be split into 3 chunks, the first chunk will have
1241 * the format:
1242 * +--------------------+
1243 * | |
1244 * | Data |
1245 * |--------------------|
1246 * | pointer to chunk 2 |
1247 * | pointer to chunk 3 |
1248 * +--------------------+
1249 */
1250 const size_t nChunks = ComputeNumChunks(nbytes, maxKeySize);
1251 const size_t nbytesChunkOffsets = (nChunks - 1) * sizeof(std::uint64_t);
1253 // Skip writing the first chunk, it will be written last (in the file) below.
1254
1255 const uint8_t *chunkData = reinterpret_cast<const uint8_t *>(data) + nbytesFirstChunk;
1257
1258 const auto chunkOffsetsToWrite = std::make_unique<std::uint64_t[]>(nChunks - 1);
1259 std::uint64_t chunkOffsetIdx = 0;
1260
1261 do {
1262 const size_t bytesNextChunk = std::min<size_t>(remainingBytes, maxKeySize);
1263 const std::uint64_t offset = writeKey(chunkData, bytesNextChunk, bytesNextChunk);
1264
1267
1270
1271 } while (remainingBytes > 0);
1272
1273 // Write the first key, with part of the data and the pointers to (logically) following keys appended.
1274 const std::uint64_t firstOffset = ReserveBlob(maxKeySize, maxKeySize);
1275 WriteIntoReservedBlob(data, nbytesFirstChunk, firstOffset);
1276 const std::uint64_t chunkOffsetsOffset = firstOffset + nbytesFirstChunk;
1277 WriteIntoReservedBlob(chunkOffsetsToWrite.get(), nbytesChunkOffsets, chunkOffsetsOffset);
1278
1279 return firstOffset;
1280}
1281
1283{
1284 // ReserveBlob cannot be used to reserve a multi-key blob
1285 R__ASSERT(nbytes <= fNTupleAnchor.GetMaxKeySize());
1286
1287 std::uint64_t offset;
1288 if (fFileSimple) {
1289 if (fIsBare) {
1290 offset = fFileSimple.fKeyOffset;
1291 fFileSimple.fKeyOffset += nbytes;
1292 } else {
1293 offset = fFileSimple.WriteKey(/*buffer=*/nullptr, nbytes, len, -1, 100, kBlobClassName);
1294 }
1295 } else {
1296 offset = fFileProper.WriteKey(/*buffer=*/nullptr, nbytes, len);
1297 }
1298 return offset;
1299}
1300
1302 std::int64_t offset)
1303{
1304 if (fFileSimple) {
1305 fFileSimple.Write(buffer, nbytes, offset);
1306 } else {
1307 fFileProper.Write(buffer, nbytes, offset);
1308 }
1309}
1310
1311std::uint64_t
1313{
1314 auto offset = WriteBlob(data, nbytes, lenHeader);
1315 fNTupleAnchor.fLenHeader = lenHeader;
1316 fNTupleAnchor.fNBytesHeader = nbytes;
1317 fNTupleAnchor.fSeekHeader = offset;
1318 return offset;
1319}
1320
1321std::uint64_t
1323{
1324 auto offset = WriteBlob(data, nbytes, lenFooter);
1325 fNTupleAnchor.fLenFooter = lenFooter;
1326 fNTupleAnchor.fNBytesFooter = nbytes;
1327 fNTupleAnchor.fSeekFooter = offset;
1328 return offset;
1329}
1330
1332{
1333 RBareFileHeader bareHeader;
1334 bareHeader.fCompress = defaultCompression;
1335 fFileSimple.Write(&bareHeader, sizeof(bareHeader), 0);
1336 RTFString ntupleName{fNTupleName};
1337 fFileSimple.Write(&ntupleName, ntupleName.GetSize());
1338
1339 // Write zero-initialized ntuple to reserve the space; will be overwritten on commit
1340 RTFNTuple ntupleOnDisk;
1341 fFileSimple.fControlBlock->fSeekNTuple = fFileSimple.fFilePos;
1342 fFileSimple.Write(&ntupleOnDisk, ntupleOnDisk.GetSize());
1343 std::uint64_t checksum = 0;
1344 fFileSimple.Write(&checksum, sizeof(checksum));
1345 fFileSimple.fKeyOffset = fFileSimple.fFilePos;
1346}
1347
1349{
1350 // The streamer info record is a TList of TStreamerInfo object. We cannot use
1351 // RNTupleSerializer::SerializeStreamerInfos because that uses TBufferIO::WriteObject.
1352 // This would prepend the streamed TList with self-decription information.
1353 // The streamer info record is just the streamed TList.
1354
1356 for (auto [_, info] : fStreamerInfoMap) {
1358 }
1359
1360 // We will stream the list with a TBufferFile. When reading the streamer info records back,
1361 // the read buffer includes the key and the streamed list. Therefore, we need to start streaming
1362 // with an offset of the key length. Otherwise, the offset for referencing duplicate objects in the
1363 // buffer will point to the wrong places.
1364
1365 // Figure out key length
1366 RTFString strTList{"TList"};
1367 RTFString strStreamerInfo{"StreamerInfo"};
1368 RTFString strStreamerTitle{"Doubly linked list"};
1369 fFileSimple.fControlBlock->fHeader.SetSeekInfo(fFileSimple.fKeyOffset);
1370 auto keyLen =
1371 RTFKey(fFileSimple.fControlBlock->fHeader.GetSeekInfo(), 100, strTList, strStreamerInfo, strStreamerTitle, 0)
1372 .fKeyLen;
1373
1374 TBufferFile buffer(TBuffer::kWrite, keyLen + 1);
1375 buffer.SetBufferOffset(keyLen);
1376 streamerInfoList.Streamer(buffer);
1377 assert(buffer.Length() > keyLen);
1378 const auto bufPayload = buffer.Buffer() + keyLen;
1379 const auto lenPayload = buffer.Length() - keyLen;
1380
1382 auto zipStreamerInfos = std::make_unique<unsigned char[]>(lenPayload);
1384
1385 fFileSimple.WriteKey(zipStreamerInfos.get(), szZipStreamerInfos, lenPayload,
1386 fFileSimple.fControlBlock->fHeader.GetSeekInfo(), 100, "TList", "StreamerInfo",
1387 "Doubly linked list");
1388 fFileSimple.fControlBlock->fHeader.SetNbytesInfo(fFileSimple.fFilePos -
1389 fFileSimple.fControlBlock->fHeader.GetSeekInfo());
1390}
1391
1393{
1394 RTFString strEmpty;
1395 RTFString strRNTupleClass{"ROOT::RNTuple"};
1396 RTFString strRNTupleName{fNTupleName};
1397 RTFString strFileName{fFileName};
1398
1399 RTFKey keyRNTuple(fFileSimple.fControlBlock->fSeekNTuple, 100, strRNTupleClass, strRNTupleName, strEmpty,
1400 RTFNTuple::GetSizePlusChecksum());
1401
1402 fFileSimple.fControlBlock->fFileRecord.SetSeekKeys(fFileSimple.fKeyOffset);
1403 RTFKeyList keyList{1};
1404 RTFKey keyKeyList(fFileSimple.fControlBlock->fFileRecord.GetSeekKeys(), 100, strEmpty, strFileName, strEmpty,
1405 keyList.GetSize() + keyRNTuple.fKeyLen);
1406 fFileSimple.Write(&keyKeyList, keyKeyList.fKeyHeaderSize, fFileSimple.fControlBlock->fFileRecord.GetSeekKeys());
1407 fFileSimple.Write(&strEmpty, strEmpty.GetSize());
1408 fFileSimple.Write(&strFileName, strFileName.GetSize());
1409 fFileSimple.Write(&strEmpty, strEmpty.GetSize());
1410 fFileSimple.Write(&keyList, keyList.GetSize());
1411 fFileSimple.Write(&keyRNTuple, keyRNTuple.fKeyHeaderSize);
1412 // Write class name, object name, and title for this key.
1413 fFileSimple.Write(&strRNTupleClass, strRNTupleClass.GetSize());
1414 fFileSimple.Write(&strRNTupleName, strRNTupleName.GetSize());
1415 fFileSimple.Write(&strEmpty, strEmpty.GetSize());
1416 fFileSimple.fControlBlock->fFileRecord.fNBytesKeys =
1417 fFileSimple.fFilePos - fFileSimple.fControlBlock->fFileRecord.GetSeekKeys();
1418 fFileSimple.fKeyOffset = fFileSimple.fFilePos;
1419}
1420
1422{
1423 fFileSimple.fControlBlock->fHeader.SetSeekFree(fFileSimple.fKeyOffset);
1424 RTFString strEmpty;
1425 RTFString strFileName{fFileName};
1426 RTFFreeEntry freeEntry;
1427 RTFKey keyFreeList(fFileSimple.fControlBlock->fHeader.GetSeekFree(), 100, strEmpty, strFileName, strEmpty,
1428 freeEntry.GetSize());
1429 std::uint64_t firstFree = fFileSimple.fControlBlock->fHeader.GetSeekFree() + keyFreeList.GetSize();
1430 freeEntry.Set(firstFree, std::max(2000000000ULL, ((firstFree / 1000000000ULL) + 1) * 1000000000ULL));
1431 fFileSimple.WriteKey(&freeEntry, freeEntry.GetSize(), freeEntry.GetSize(),
1432 fFileSimple.fControlBlock->fHeader.GetSeekFree(), 100, "", fFileName, "");
1433 fFileSimple.fControlBlock->fHeader.SetNbytesFree(fFileSimple.fFilePos -
1434 fFileSimple.fControlBlock->fHeader.GetSeekFree());
1435 fFileSimple.fControlBlock->fHeader.SetEnd(fFileSimple.fFilePos);
1436}
1437
1439{
1440 RTFString strRNTupleClass{"ROOT::RNTuple"};
1441 RTFString strRNTupleName{fNTupleName};
1442 RTFString strEmpty;
1443
1444 RTFNTuple ntupleOnDisk(fNTupleAnchor);
1445 RUInt64BE checksum{XXH3_64bits(ntupleOnDisk.GetPtrCkData(), ntupleOnDisk.GetSizeCkData())};
1446 fFileSimple.fControlBlock->fSeekNTuple = fFileSimple.fKeyOffset;
1447
1448 char keyBuf[RTFNTuple::GetSizePlusChecksum()];
1449
1450 // concatenate the RNTuple anchor with its checksum
1451 memcpy(keyBuf, &ntupleOnDisk, sizeof(RTFNTuple));
1452 memcpy(keyBuf + sizeof(RTFNTuple), &checksum, sizeof(checksum));
1453
1454 fFileSimple.WriteKey(keyBuf, sizeof(keyBuf), sizeof(keyBuf), fFileSimple.fControlBlock->fSeekNTuple, 100,
1455 "ROOT::RNTuple", fNTupleName, "");
1456}
1457
1459{
1460 RTFString strTFile{"TFile"};
1461 RTFString strFileName{fFileName};
1462 RTFString strEmpty;
1463
1464 fFileSimple.fControlBlock->fHeader = RTFHeader(defaultCompression);
1465
1466 RTFUUID uuid;
1467
1468 // First record of the file: the TFile object at offset 100
1469 RTFKey keyRoot(100, 0, strTFile, strFileName, strEmpty,
1470 sizeof(RTFDirectory) + strFileName.GetSize() + strEmpty.GetSize() + uuid.GetSize());
1471 std::uint32_t nbytesName = keyRoot.fKeyLen + strFileName.GetSize() + 1;
1472 fFileSimple.fControlBlock->fFileRecord.fNBytesName = nbytesName;
1473 fFileSimple.fControlBlock->fHeader.SetNbytesName(nbytesName);
1474
1475 fFileSimple.Write(&keyRoot, keyRoot.fKeyHeaderSize, 100);
1476 // Write class name, object name, and title for the TFile key.
1477 fFileSimple.Write(&strTFile, strTFile.GetSize());
1478 fFileSimple.Write(&strFileName, strFileName.GetSize());
1479 fFileSimple.Write(&strEmpty, strEmpty.GetSize());
1480 // Write the name and title of the TNamed preceding the TFile entry.
1481 fFileSimple.Write(&strFileName, strFileName.GetSize());
1482 fFileSimple.Write(&strEmpty, strEmpty.GetSize());
1483 // Will be overwritten on commit
1484 fFileSimple.fControlBlock->fSeekFileRecord = fFileSimple.fFilePos;
1485 fFileSimple.Write(&fFileSimple.fControlBlock->fFileRecord, fFileSimple.fControlBlock->fFileRecord.GetSize());
1486 fFileSimple.Write(&uuid, uuid.GetSize());
1487
1488 // Padding bytes to allow the TFile record to grow for a big file
1489 RUInt32BE padding{0};
1490 for (int i = 0; i < 3; ++i)
1491 fFileSimple.Write(&padding, sizeof(padding));
1492 fFileSimple.fKeyOffset = fFileSimple.fFilePos;
1493}
#define R__FAIL(msg)
Short-hand to return an RResult<T> in an error state; the RError is implicitly converted into RResult...
Definition RError.hxx:290
static size_t ComputeNumChunks(size_t nbytes, size_t maxChunkSize)
#define ROOT_VERSION_CODE
Definition RVersion.hxx:24
#define ClassDefInlineOverride(name, id)
Definition Rtypes.h:358
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Definition TError.h:125
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h offset
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h length
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t UChar_t len
char name[80]
Definition TGX11.cxx:110
Binding & operator=(OUT(*fun)(void))
void ReadBuffer(char *&buffer) override
T1 fFirst
Definition X11Events.mm:86
#define _(A, B)
Definition cfortran.h:108
The RKeyBlob writes an invisible key into a TFile.
void Reserve(size_t nbytes, std::uint64_t *seekKey)
Register a new key for a data record of size nbytes.
std::uint64_t SearchInDirectory(std::uint64_t &offsetDir, std::string_view keyName, std::string_view typeName)
Searches for a key with the given name and type in the key index of the directory starting at offsetD...
RResult< RNTuple > GetNTupleBare(std::string_view ntupleName)
Used when the file container turns out to be a bare file.
RResult< RNTuple > GetNTuple(std::string_view ntupleName)
Extracts header and footer location for the RNTuple identified by ntupleName.
RResult< RNTuple > GetNTupleProper(std::string_view ntuplePath)
Used when the file turns out to be a TFile container.
void ReadBuffer(void *buffer, size_t nbytes, std::uint64_t offset)
Reads a given byte range from the file into the provided memory buffer.
Helper class to compress data blocks in the ROOT compression frame format.
Helper class to uncompress data blocks in the ROOT compression frame format.
Write RNTuple data blocks in a TFile or a bare file container.
RNTupleFileWriter(std::string_view name, std::uint64_t maxKeySize)
std::uint64_t WriteBlob(const void *data, size_t nbytes, size_t len)
Writes a new record as an RBlob key into the file.
std::uint64_t WriteNTupleFooter(const void *data, size_t nbytes, size_t lenFooter)
Writes the compressed footer and registeres its location; lenFooter is the size of the uncompressed f...
void WriteTFileKeysList()
Write the TList with the RNTuple key.
void UpdateStreamerInfos(const RNTupleSerializer::StreamerInfoMap_t &streamerInfos)
Ensures that the streamer info records passed as argument are written to the file.
void Commit()
Writes the RNTuple key to the file so that the header and footer keys can be found.
std::uint64_t ReserveBlob(size_t nbytes, size_t len)
Reserves a new record as an RBlob key in the file.
RFileSimple fFileSimple
For simple use cases, survives without libRIO dependency.
void WriteTFileNTupleKey()
The only key that will be visible in file->ls()
void WriteTFileFreeList()
Last record in the file.
EContainerFormat
For testing purposes, RNTuple data can be written into a bare file container instead of a ROOT file.
static std::unique_ptr< RNTupleFileWriter > Recreate(std::string_view ntupleName, std::string_view path, EContainerFormat containerFormat, const RNTupleWriteOptions &options)
Create or truncate the local file given by path with the new empty RNTuple identified by ntupleName.
static std::unique_ptr< RNTupleFileWriter > Append(std::string_view ntupleName, TDirectory &fileOrDirectory, std::uint64_t maxKeySize)
The directory parameter can also be a TFile object (TFile inherits from TDirectory).
void WriteTFileSkeleton(int defaultCompression)
For a TFile container written by a C file stream, write the header and TFile object.
void WriteBareFileSkeleton(int defaultCompression)
For a bare file, which is necessarily written by a C file stream, write file header.
void WriteTFileStreamerInfo()
Write the compressed streamer info record with the description of the RNTuple class.
std::uint64_t WriteNTupleHeader(const void *data, size_t nbytes, size_t lenHeader)
Writes the compressed header and registeres its location; lenHeader is the size of the uncompressed h...
RNTuple fNTupleAnchor
Header and footer location of the ntuple, written on Commit()
void WriteIntoReservedBlob(const void *buffer, size_t nbytes, std::int64_t offset)
Write into a reserved record; the caller is responsible for making sure that the written byte range i...
RNTupleSerializer::StreamerInfoMap_t fStreamerInfoMap
Set of streamer info records that should be written to the file.
static std::uint32_t SerializeUInt64(std::uint64_t val, void *buffer)
std::map< Int_t, TVirtualStreamerInfo * > StreamerInfoMap_t
static std::uint32_t DeserializeUInt64(const void *buffer, std::uint64_t &val)
Base class for all ROOT issued exceptions.
Definition RError.hxx:78
Common user-tunable settings for storing ntuples.
The class is used as a return type for operations that can fail; wraps a value of type T or an RError...
Definition RError.hxx:194
The RRawFile provides read-only access to local and remote files.
Definition RRawFile.hxx:43
Representation of an RNTuple data set in a ROOT file.
Definition RNTuple.hxx:69
std::uint64_t fMaxKeySize
The maximum size for a TKey payload. Payloads bigger than this size will be written as multiple blobs...
Definition RNTuple.hxx:109
static TClass * Class()
The concrete implementation of TBuffer for writing/reading to/from a ROOT file or socket.
Definition TBufferFile.h:47
void TagStreamerInfo(TVirtualStreamerInfo *info) override
Mark the classindex of the current file as using this TStreamerInfo.
void SetParent(TObject *parent)
Set parent owning this buffer.
Definition TBuffer.cxx:270
@ kWrite
Definition TBuffer.h:73
void SetBufferOffset(Int_t offset=0)
Definition TBuffer.h:93
Int_t Length() const
Definition TBuffer.h:100
char * Buffer() const
Definition TBuffer.h:96
Describe directory structure in memory.
Definition TDirectory.h:45
A ROOT file is an on-disk file, usually with extension .root, that stores objects in a file-system-li...
Definition TFile.h:53
Bool_t IsBinary() const
Definition TFile.h:259
Book space in a file, create I/O buffers, to fill them, (un)compress them.
Definition TKey.h:28
Int_t Sizeof() const override
Return the size in bytes of the key header structure.
Definition TKey.cxx:1342
Int_t fVersion
Key version identifier.
Definition TKey.h:39
Short_t fKeylen
Number of bytes for the key itself.
Definition TKey.h:43
Long64_t fSeekKey
Location of object on file.
Definition TKey.h:45
virtual void Create(Int_t nbytes, TFile *f=nullptr)
Create a TKey object of specified size.
Definition TKey.cxx:460
TString fClassName
Object Class name.
Definition TKey.h:47
A doubly linked list.
Definition TList.h:38
#define Swap(a, b)
Definition geom.c:201
RNTuple CreateAnchor(std::uint16_t versionEpoch, std::uint16_t versionMajor, std::uint16_t versionMinor, std::uint16_t versionPatch, std::uint64_t seekHeader, std::uint64_t nbytesHeader, std::uint64_t lenHeader, std::uint64_t seekFooter, std::uint64_t nbytesFooter, std::uint64_t lenFooter, std::uint64_t maxKeySize)
Definition RNTuple.cxx:64
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
Helper templated class for swapping bytes; specializations for N={2,4,8} are provided below.
Definition Byteswap.h:124
void Write(const void *buffer, size_t nbytes, std::int64_t offset)
Low-level writing using a TFile.
std::uint64_t WriteKey(const void *buffer, size_t nbytes, size_t len)
Writes an RBlob opaque key with the provided buffer as data record and returns the offset of the reco...
std::unique_ptr< ROOT::Experimental::Internal::RTFileControlBlock > fControlBlock
Keeps track of TFile control structures, which need to be updated on committing the data set.
void Write(const void *buffer, size_t nbytes, std::int64_t offset=-1)
Writes bytes in the open stream, either at fFilePos or at the given offset.
std::uint64_t WriteKey(const void *buffer, std::size_t nbytes, std::size_t len, std::int64_t offset=-1, std::uint64_t directoryOffset=100, const std::string &className="", const std::string &objectName="", const std::string &title="")
Writes a TKey including the data record, given by buffer, into fFile; returns the file offset to the ...
If a TFile container is written by a C stream (simple file), on dataset commit, the file header and t...
auto * tt
Definition textangle.C:16