Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RNTupleJoinTable.hxx
Go to the documentation of this file.
1/// \file ROOT/RNTupleJoinTable.hxx
2/// \ingroup NTuple
3/// \author Florine de Geus <florine.de.geus@cern.ch>
4/// \date 2024-04-02
5/// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6/// is welcome!
7
8/*************************************************************************
9 * Copyright (C) 1995-2024, Rene Brun and Fons Rademakers. *
10 * All rights reserved. *
11 * *
12 * For the licensing terms see $ROOTSYS/LICENSE. *
13 * For the list of contributors see $ROOTSYS/README/CREDITS. *
14 *************************************************************************/
15
16#ifndef ROOT_RNTupleJoinTable
17#define ROOT_RNTupleJoinTable
18
19#include <ROOT/RField.hxx>
20
21#include <memory>
22#include <string>
23#include <unordered_map>
24#include <vector>
25
26namespace ROOT {
27namespace Experimental {
28namespace Internal {
29// clang-format off
30/**
31\class ROOT::Experimental::Internal::RNTupleJoinTable
32\ingroup NTuple
33\brief Builds a join table on one or several fields of an RNTuple so it can be joined onto other RNTuples.
34*/
35// clang-format on
37public:
38 using JoinValue_t = std::uint64_t;
39 using PartitionKey_t = std::uint64_t;
41
42private:
43 // clang-format off
44 /**
45 \class ROOT::Experimental::Internal::RNTupleJoinTable::REntryMapping
46 \ingroup NTuple
47 \brief Provides a mapping from one or several join field values to an entry index.
48 */
49 // clang-format on
51 private:
52 //////////////////////////////////////////////////////////////////////////
53 /// Container for the combined hashes of join field values.
55 std::vector<JoinValue_t> fJoinFieldValues;
56
58
59 inline bool operator==(const RCombinedJoinFieldValue &other) const
60 {
61 return other.fJoinFieldValues == fJoinFieldValues;
62 }
63 };
64
65 /////////////////////////////////////////////////////////////////////////////
66 /// Hash combining the individual join field value hashes from RCombinedJoinFieldValue. Uses the implementation
67 /// from `boost::hash_combine`. See
68 /// https://www.boost.org/doc/libs/1_87_0/libs/container_hash/doc/html/hash.html#notes_hash_combine for more
69 /// background. In particular, it mentions: "Several improvements of the 64 bit function have been subsequently
70 /// proposed, by [David Stafford](https://zimbry.blogspot.com/2011/09/better-bit-mixing-improving-on.html), [Pelle
71 /// Evensen](https://mostlymangling.blogspot.com/2019/12/stronger-better-morer-moremur-better.html), and [Jon
72 /// Maiga](http://jonkagstrom.com/mx3/mx3_rev2.html). We currently use Jon Maiga’s function."
73 ///
74 /// \note
75 /// \parblock
76 /// Copyright 2005-2014 Daniel James.
77 /// Copyright 2021, 2022 Peter Dimov.
78 /// Distributed under the Boost Software License, Version 1.0.
79 /// https://www.boost.org/LICENSE_1_0.txt
80 ///
81 /// Based on Peter Dimov's proposal
82 /// http://www.open-std.org/JTC1/SC22/WG21/docs/papers/2005/n1756.pdf
83 /// issue 6.18.
84 ///
85 /// Boost Software License - Version 1.0 - August 17th, 2003
86 ///
87 /// Permission is hereby granted, free of charge, to any person or organization
88 /// obtaining a copy of the software and accompanying documentation covered by
89 /// this license (the "Software") to use, reproduce, display, distribute,
90 /// execute, and transmit the Software, and to prepare derivative works of the
91 /// Software, and to permit third-parties to whom the Software is furnished to
92 /// do so, all subject to the following:
93 ///
94 /// The copyright notices in the Software and this entire statement, including
95 /// the above license grant, this restriction and the following disclaimer,
96 /// must be included in all copies of the Software, in whole or in part, and
97 /// all derivative works of the Software, unless such copies or derivative
98 /// works are solely in the form of machine-executable object code generated by
99 /// a source language processor.
100 ///
101 /// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
102 /// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
103 /// FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
104 /// SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
105 /// FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
106 /// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
107 /// DEALINGS IN THE SOFTWARE.
108 /// \endparblock
110 inline std::size_t operator()(const RCombinedJoinFieldValue &joinFieldVal) const
111 {
112 std::size_t seed = 0;
113 for (const auto &fieldVal : joinFieldVal.fJoinFieldValues) {
114 seed ^= mix(seed + 0x9e3779b9 + fieldVal);
115 }
116 return seed;
117 }
118
119 inline std::size_t mix(std::size_t init) const
120 {
121 init ^= init >> 32;
122 init *= 0xe9846af9b1a615d;
123 init ^= init >> 32;
124 init *= 0xe9846af9b1a615d;
125 init ^= init >> 28;
126 return init;
127 }
128 };
129
130 /// The mapping itself. Maps field values (or combinations thereof in case the join key is composed of multiple
131 /// fields) to their respective entry numbers.
132 std::unordered_map<RCombinedJoinFieldValue, std::vector<ROOT::NTupleSize_t>, RCombinedJoinFieldValueHash>
134
135 /// Names of the join fields used for the mapping to their respective entry indexes.
136 std::vector<std::string> fJoinFieldNames;
137
138 /// The size (in bytes) for each join field, corresponding to `fJoinFieldNames`. This information is stored to be
139 /// able to properly cast incoming void pointers to the join field values in `GetEntryIndexes`.
140 std::vector<std::size_t> fJoinFieldValueSizes;
141
142 public:
143 //////////////////////////////////////////////////////////////////////////
144 /// \brief Get the entry indexes for this entry mapping.
145 const std::vector<ROOT::NTupleSize_t> *GetEntryIndexes(std::vector<void *> valuePtrs) const;
146
147 //////////////////////////////////////////////////////////////////////////
148 /// \brief Create a new entry mapping.
149 ///
150 /// \param[in] pageSource The page source of the RNTuple with the entries to map.
151 /// \param[in] joinFieldNames Names of the join fields to use in the mapping.
152 REntryMapping(ROOT::Internal::RPageSource &pageSource, const std::vector<std::string> &joinFieldNames);
153 };
154 /// Names of the join fields used for the mapping to their respective entry indexes.
155 std::vector<std::string> fJoinFieldNames;
156
157 /// Partitions of one or multiple entry mappings.
158 std::unordered_map<PartitionKey_t, std::vector<std::unique_ptr<REntryMapping>>> fPartitions;
159
160 /////////////////////////////////////////////////////////////////////////////
161 /// \brief Create an a new RNTupleJoinTable for the RNTuple represented by the provided page source.
162 ///
163 /// \param[in] joinFieldNames The names of the join fields to use for the join table. Only integral-type fields are
164 /// allowed.
165 RNTupleJoinTable(const std::vector<std::string> &joinFieldNames) : fJoinFieldNames(joinFieldNames) {}
166
167public:
172 ~RNTupleJoinTable() = default;
173
174 /////////////////////////////////////////////////////////////////////////////
175 /// \brief Create an RNTupleJoinTable from an existing RNTuple.
176 ///
177 /// \param[in] joinFieldNames The names of the join fields to use for the join table. Only integral-type fields are
178 /// allowed.
179 ///
180 /// \return A pointer to the newly-created join table.
181 static std::unique_ptr<RNTupleJoinTable> Create(const std::vector<std::string> &joinFieldNames);
182
183 /////////////////////////////////////////////////////////////////////////////
184 /// \brief Add an entry mapping to the join table.
185 ///
186 ///
187 /// \param[in] pageSource The page source of the RNTuple with the entries to map.
188 /// \param[in] partitionKey Which partition to add the mapping to. If not provided, it will be added to the default
189 /// partition.
190 ///
191 /// \return A reference to the updated join table.
193
194 /////////////////////////////////////////////////////////////////////////////
195 /// \brief Get all entry indexes for the given join field value(s) within a partition.
196 ///
197 /// \param[in] valuePtrs A vector of pointers to the join field values to look up.
198 /// \param[in] partitionKey The partition key to use for the lookup. If not provided, it will use the default
199 /// partition key.
200 ///
201 /// \return The entry numbers that correspond to `valuePtrs`. When there are no corresponding entries, an empty
202 /// vector is returned.
203 std::vector<ROOT::NTupleSize_t>
204 GetEntryIndexes(const std::vector<void *> &valuePtrs, PartitionKey_t partitionKey = kDefaultPartitionKey) const;
205
206 /////////////////////////////////////////////////////////////////////////////
207 /// \brief Get all entry indexes for the given join field value(s) for a specific set of partitions.
208 ///
209 /// \param[in] valuePtrs A vector of pointers to the join field values to look up.
210 /// \param[in] partitionKeys The partition keys to use for the lookup.
211 ///
212 /// \return The entry numbers that correspond to `valuePtrs`, grouped by partition. When there are no corresponding
213 /// entries, an empty map is returned.
214 std::unordered_map<PartitionKey_t, std::vector<ROOT::NTupleSize_t>>
215 GetPartitionedEntryIndexes(const std::vector<void *> &valuePtrs,
216 const std::vector<PartitionKey_t> &partitionKeys) const;
217
218 /////////////////////////////////////////////////////////////////////////////
219 /// \brief Get all entry indexes for the given join field value(s) for all partitions.
220 ///
221 /// \param[in] valuePtrs A vector of pointers to the join field values to look up.
222 ///
223 /// \return The entry numbers that correspond to `valuePtrs`, grouped by partition. When there are no corresponding
224 /// entries, an empty map is returned.
225 std::unordered_map<PartitionKey_t, std::vector<ROOT::NTupleSize_t>>
226 GetPartitionedEntryIndexes(const std::vector<void *> &valuePtrs) const;
227};
228} // namespace Internal
229} // namespace Experimental
230} // namespace ROOT
231
232#endif // ROOT_RNTupleJoinTable
Provides a mapping from one or several join field values to an entry index.
std::vector< std::size_t > fJoinFieldValueSizes
The size (in bytes) for each join field, corresponding to fJoinFieldNames.
std::vector< std::string > fJoinFieldNames
Names of the join fields used for the mapping to their respective entry indexes.
REntryMapping(ROOT::Internal::RPageSource &pageSource, const std::vector< std::string > &joinFieldNames)
Create a new entry mapping.
std::unordered_map< RCombinedJoinFieldValue, std::vector< ROOT::NTupleSize_t >, RCombinedJoinFieldValueHash > fMapping
The mapping itself.
const std::vector< ROOT::NTupleSize_t > * GetEntryIndexes(std::vector< void * > valuePtrs) const
Get the entry indexes for this entry mapping.
Builds a join table on one or several fields of an RNTuple so it can be joined onto other RNTuples.
std::unordered_map< PartitionKey_t, std::vector< ROOT::NTupleSize_t > > GetPartitionedEntryIndexes(const std::vector< void * > &valuePtrs, const std::vector< PartitionKey_t > &partitionKeys) const
Get all entry indexes for the given join field value(s) for a specific set of partitions.
RNTupleJoinTable(RNTupleJoinTable &&other)=delete
std::vector< ROOT::NTupleSize_t > GetEntryIndexes(const std::vector< void * > &valuePtrs, PartitionKey_t partitionKey=kDefaultPartitionKey) const
Get all entry indexes for the given join field value(s) within a partition.
RNTupleJoinTable & operator=(RNTupleJoinTable &&other)=delete
RNTupleJoinTable & Add(ROOT::Internal::RPageSource &pageSource, PartitionKey_t partitionKey=kDefaultPartitionKey)
Add an entry mapping to the join table.
RNTupleJoinTable & operator=(const RNTupleJoinTable &other)=delete
static std::unique_ptr< RNTupleJoinTable > Create(const std::vector< std::string > &joinFieldNames)
Create an RNTupleJoinTable from an existing RNTuple.
std::vector< std::string > fJoinFieldNames
Names of the join fields used for the mapping to their respective entry indexes.
RNTupleJoinTable(const RNTupleJoinTable &other)=delete
RNTupleJoinTable(const std::vector< std::string > &joinFieldNames)
Create an a new RNTupleJoinTable for the RNTuple represented by the provided page source.
static constexpr PartitionKey_t kDefaultPartitionKey
std::unordered_map< PartitionKey_t, std::vector< std::unique_ptr< REntryMapping > > > fPartitions
Partitions of one or multiple entry mappings.
Abstract interface to read data from an ntuple.
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
Hash combining the individual join field value hashes from RCombinedJoinFieldValue.
std::size_t operator()(const RCombinedJoinFieldValue &joinFieldVal) const