Skip to content

Commit ad44658

Browse files
committed
[ntuple] Make RNTupleProcessors named
Naming `RNTupleProcessors` is necessary to make the `RNTupleJoinProcessor` composable. Currently, we use the name of the auxiliary ntuple as prefix to the field names, but when this is abstracted to another processor, we need something else to identify these fields. Adding names to processors also fixes the current issue where two RNTuples with the same name cannot be joined. Providing a name is optional; by default, for single processors the name of the ntuple is used. For chain processors, the name of the first ntuple in the chain is used. For join processors, the name of the main ntuple is used.
1 parent 990597b commit ad44658

File tree

2 files changed

+146
-14
lines changed

2 files changed

+146
-14
lines changed

tree/ntuple/v7/inc/ROOT/RNTupleProcessor.hxx

Lines changed: 94 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ protected:
122122
bool IsAuxiliary() const { return fNTupleIdx > 0; }
123123
};
124124

125+
std::string fProcessorName;
125126
std::vector<RNTupleOpenSpec> fNTuples;
126127
std::unique_ptr<REntry> fEntry;
127128
std::unique_ptr<Internal::RPageSource> fPageSource;
@@ -163,12 +164,17 @@ protected:
163164
/////////////////////////////////////////////////////////////////////////////
164165
/// \brief Create a new base RNTupleProcessor.
165166
///
166-
/// \param[in] ntuples The input RNTuples for processing
167+
/// \param[in] processorName Name of the processor. By default, this is the name of the underlying RNTuple for
168+
/// RNTupleSingleProcessor, the name of the first processor for RNTupleChainProcessor, or the name of the primary
169+
/// RNTuple for RNTupleJoinProcessor.
167170
/// \param[in] model The RNTupleModel representing the entries returned by the processor.
168171
///
169172
/// \note Before processing, a model *must* exist. However, this is handled downstream by the RNTupleProcessor's
170173
/// factory functions (CreateSingle, CreateChain and CreateJoin) and constructors.
171-
RNTupleProcessor(std::unique_ptr<RNTupleModel> model) : fModel(std::move(model)) {}
174+
RNTupleProcessor(std::string_view processorName, std::unique_ptr<RNTupleModel> model)
175+
: fProcessorName(processorName), fModel(std::move(model))
176+
{
177+
}
172178

173179
public:
174180
RNTupleProcessor(const RNTupleProcessor &) = delete;
@@ -191,6 +197,14 @@ public:
191197
/// This method is only relevant for the RNTupleChainProcessor. For the other processors, 0 is always returned.
192198
std::size_t GetCurrentProcessorNumber() const { return fCurrentProcessorNumber; }
193199

200+
/////////////////////////////////////////////////////////////////////////////
201+
/// \brief Get the name of the processor.
202+
///
203+
/// Unless this name was explicitly specified during creation of the processor, this is the name of the underlying
204+
/// RNTuple for RNTupleSingleProcessor, the name of the first processor for RNTupleChainProcessor, or the name of the
205+
/// primary RNTuple for RNTupleJoinProcessor.
206+
const std::string &GetProcessorName() const { return fProcessorName; }
207+
194208
const RNTupleModel &GetModel() const { return *fModel; }
195209

196210
/////////////////////////////////////////////////////////////////////////////
@@ -260,6 +274,20 @@ public:
260274
static std::unique_ptr<RNTupleProcessor>
261275
Create(const RNTupleOpenSpec &ntuple, std::unique_ptr<RNTupleModel> model = nullptr);
262276

277+
/////////////////////////////////////////////////////////////////////////////
278+
/// \brief Create an `RNTupleProcessor` for a single RNTuple.
279+
///
280+
/// \param[in] ntuple The name and storage location of the RNTuple to process.
281+
/// \param[in] processorName The name to give to the processor. Use
282+
/// Create(const RNTupleOpenSpec &, std::unique_ptr<RNTupleModel>) to automatically use the name of the input RNTuple
283+
/// instead.
284+
/// \param[in] model An RNTupleModel specifying which fields can be read by the processor. If no model is provided,
285+
/// one will be created based on the descriptor of the first ntuple specified.
286+
///
287+
/// \return A pointer to the newly created RNTupleProcessor.
288+
static std::unique_ptr<RNTupleProcessor>
289+
Create(const RNTupleOpenSpec &ntuple, std::string_view processorName, std::unique_ptr<RNTupleModel> model = nullptr);
290+
263291
/////////////////////////////////////////////////////////////////////////////
264292
/// \brief Create a new RNTuple processor chain for vertical concatenation of RNTuples.
265293
///
@@ -271,6 +299,21 @@ public:
271299
static std::unique_ptr<RNTupleProcessor>
272300
CreateChain(const std::vector<RNTupleOpenSpec> &ntuples, std::unique_ptr<RNTupleModel> model = nullptr);
273301

302+
/////////////////////////////////////////////////////////////////////////////
303+
/// \brief Create a new RNTuple processor chain for vertical combinations of RNTuples.
304+
///
305+
/// \param[in] ntuples A list specifying the names and locations of the ntuples to process.
306+
/// \param[in] processorName The name to give to the processor. Use
307+
/// CreateChain(const RNTupleOpenSpec &, std::unique_ptr<RNTupleModel>) to automatically use the name of the first
308+
/// input RNTuple instead.
309+
/// \param[in] model An RNTupleModel specifying which fields can be read by the processor. If no model is provided,
310+
/// one will be created based on the descriptor of the first ntuple specified.
311+
///
312+
/// \return A pointer to the newly created RNTupleProcessor.
313+
static std::unique_ptr<RNTupleProcessor> CreateChain(const std::vector<RNTupleOpenSpec> &ntuples,
314+
std::string_view processorName,
315+
std::unique_ptr<RNTupleModel> model = nullptr);
316+
274317
/////////////////////////////////////////////////////////////////////////////
275318
/// \brief Create a new RNTuple processor chain for vertical concatenation of previously created processors.
276319
///
@@ -282,6 +325,21 @@ public:
282325
static std::unique_ptr<RNTupleProcessor> CreateChain(std::vector<std::unique_ptr<RNTupleProcessor>> innerProcessors,
283326
std::unique_ptr<RNTupleModel> model = nullptr);
284327

328+
/////////////////////////////////////////////////////////////////////////////
329+
/// \brief Create a new RNTuple processor chain for vertically combining other RNTupleProcessors.
330+
///
331+
/// \param[in] innerProcessors A list with the processors to chain.
332+
/// \param[in] processorName The name to give to the processor. Use
333+
/// CreateChain(std::vector<std::unique_ptr<RNTupleProcessor>>, std::unique_ptr<RNTupleModel>) to automatically use
334+
/// the name of the first inner processor instead.
335+
/// \param[in] model An RNTupleModel specifying which fields can be read by the processor. If no model is provided,
336+
/// one will be created based on the descriptor of the first ntuple specified.
337+
///
338+
/// \return A pointer to the newly created RNTupleProcessor.
339+
static std::unique_ptr<RNTupleProcessor> CreateChain(std::vector<std::unique_ptr<RNTupleProcessor>> innerProcessors,
340+
std::string_view processorName,
341+
std::unique_ptr<RNTupleModel> model = nullptr);
342+
285343
/////////////////////////////////////////////////////////////////////////////
286344
/// \brief Create a new RNTuple processor for horizontallly concatenated RNTuples.
287345
///
@@ -301,6 +359,29 @@ public:
301359
static std::unique_ptr<RNTupleProcessor> CreateJoin(const std::vector<RNTupleOpenSpec> &ntuples,
302360
const std::vector<std::string> &joinFields,
303361
std::vector<std::unique_ptr<RNTupleModel>> models = {});
362+
363+
/////////////////////////////////////////////////////////////////////////////
364+
/// \brief Create a new RNTuple processor for horizontally combined RNTuples.
365+
///
366+
/// \param[in] ntuples A list specifying the names and locations of the ntuples to process. The first ntuple in the
367+
/// list will be considered the primary ntuple and drives the processor iteration loop. Subsequent ntuples are
368+
/// considered auxiliary, whose entries to be read are determined by the primary ntuple (which does not necessarily
369+
/// have to be sequential).
370+
/// \param[in] joinFields The names of the fields on which to join, in case the specified ntuples are unaligned.
371+
/// The join is made based on the combined join field values, and therefore each field has to be present in each
372+
/// specified RNTuple. If an empty list is provided, it is assumed that the specified ntuple are fully aligned, and
373+
/// `RNTupleIndex` will not be used.
374+
/// \param[in] processorName The name to give to the processor. Use
375+
/// CreateJoin(const std::vector<RNTupleOpenSpec> &, const std::vector<std::string> &, std::unique_ptr<RNTupleModel>)
376+
/// to automatically use the name of the input RNTuple instead.
377+
/// \param[in] models A list of models for the ntuples. This list must either contain a model for each ntuple in
378+
/// `ntuples` (following the specification order), or be empty. When the list is empty, the default model (i.e.
379+
//// containing all fields) will be used for each ntuple.
380+
///
381+
/// \return A pointer to the newly created RNTupleProcessor.
382+
static std::unique_ptr<RNTupleProcessor>
383+
CreateJoin(const std::vector<RNTupleOpenSpec> &ntuples, const std::vector<std::string> &joinFields,
384+
std::string_view processorName, std::vector<std::unique_ptr<RNTupleModel>> models = {});
304385
};
305386

306387
// clang-format off
@@ -343,8 +424,11 @@ private:
343424
/// \brief Constructs a new RNTupleProcessor for processing a single RNTuple.
344425
///
345426
/// \param[in] ntuple The source specification (name and storage location) for the RNTuple to process.
427+
/// \param[in] processorName Name of the processor. Unless specified otherwise in RNTupleProcessor::Create, this is
428+
/// the name of the underlying RNTuple.
346429
/// \param[in] model The model that specifies which fields should be read by the processor.
347-
RNTupleSingleProcessor(const RNTupleOpenSpec &ntuple, std::unique_ptr<RNTupleModel> model);
430+
RNTupleSingleProcessor(const RNTupleOpenSpec &ntuple, std::string_view processorName,
431+
std::unique_ptr<RNTupleModel> model);
348432
};
349433

350434
// clang-format off
@@ -382,12 +466,14 @@ private:
382466
/// \brief Constructs a new RNTupleChainProcessor.
383467
///
384468
/// \param[in] ntuples The source specification (name and storage location) for each RNTuple to process.
469+
/// \param[in] processorName Name of the processor. Unless specified otherwise in RNTupleProcessor::CreateChain, this
470+
/// is the name of the first inner processor.
385471
/// \param[in] model The model that specifies which fields should be read by the processor. The pointer returned by
386472
/// RNTupleModel::MakeField can be used to access a field's value during the processor iteration. When no model is
387473
/// specified, it is created from the descriptor of the first RNTuple specified in `ntuples`.
388474
///
389475
/// RNTuples are processed in the order in which they are specified.
390-
RNTupleChainProcessor(std::vector<std::unique_ptr<RNTupleProcessor>> processors,
476+
RNTupleChainProcessor(std::vector<std::unique_ptr<RNTupleProcessor>> processors, std::string_view processorName,
391477
std::unique_ptr<RNTupleModel> model);
392478
};
393479

@@ -427,10 +513,13 @@ private:
427513
/// \brief Constructs a new RNTupleJoinProcessor.
428514
///
429515
/// \param[in] mainNTuple The source specification (name and storage location) of the primary RNTuple.
516+
/// \param[in] processorName Name of the processor. Unless specified otherwise in RNTupleProcessor::CreateJoin, this
517+
/// is the name of the main RNTuple.
430518
/// \param[in] model The model that specifies which fields should be read by the processor. The pointer returned by
431519
/// RNTupleModel::MakeField can be used to access a field's value during the processor iteration. When no model is
432520
/// specified, it is created from the RNTuple's descriptor.
433-
RNTupleJoinProcessor(const RNTupleOpenSpec &mainNTuple, std::unique_ptr<RNTupleModel> model = nullptr);
521+
RNTupleJoinProcessor(const RNTupleOpenSpec &mainNTuple, std::string_view processorName,
522+
std::unique_ptr<RNTupleModel> model = nullptr);
434523

435524
/////////////////////////////////////////////////////////////////////////////
436525
/// \brief Add an auxiliary RNTuple to the processor.

tree/ntuple/v7/src/RNTupleProcessor.cxx

Lines changed: 52 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,14 @@ void EnsureUniqueNTupleNames(const std::vector<RNTupleOpenSpec> &ntuples)
3434
std::unique_ptr<ROOT::Experimental::RNTupleProcessor>
3535
ROOT::Experimental::RNTupleProcessor::Create(const RNTupleOpenSpec &ntuple, std::unique_ptr<RNTupleModel> model)
3636
{
37-
return std::unique_ptr<RNTupleSingleProcessor>(new RNTupleSingleProcessor(ntuple, std::move(model)));
37+
return Create(ntuple, ntuple.fNTupleName, std::move(model));
38+
}
39+
40+
std::unique_ptr<ROOT::Experimental::RNTupleProcessor>
41+
ROOT::Experimental::RNTupleProcessor::Create(const RNTupleOpenSpec &ntuple, std::string_view processorName,
42+
std::unique_ptr<RNTupleModel> model)
43+
{
44+
return std::unique_ptr<RNTupleSingleProcessor>(new RNTupleSingleProcessor(ntuple, processorName, std::move(model)));
3845
}
3946

4047
std::unique_ptr<ROOT::Experimental::RNTupleProcessor>
@@ -44,6 +51,16 @@ ROOT::Experimental::RNTupleProcessor::CreateChain(const std::vector<RNTupleOpenS
4451
if (ntuples.empty())
4552
throw RException(R__FAIL("at least one RNTuple must be provided"));
4653

54+
return CreateChain(ntuples, ntuples[0].fNTupleName, std::move(model));
55+
}
56+
57+
std::unique_ptr<ROOT::Experimental::RNTupleProcessor>
58+
ROOT::Experimental::RNTupleProcessor::CreateChain(const std::vector<RNTupleOpenSpec> &ntuples,
59+
std::string_view processorName, std::unique_ptr<RNTupleModel> model)
60+
{
61+
if (ntuples.empty())
62+
throw RException(R__FAIL("at least one RNTuple must be provided"));
63+
4764
std::vector<std::unique_ptr<RNTupleProcessor>> innerProcessors;
4865
innerProcessors.reserve(ntuples.size());
4966

@@ -58,7 +75,7 @@ ROOT::Experimental::RNTupleProcessor::CreateChain(const std::vector<RNTupleOpenS
5875
innerProcessors.emplace_back(Create(ntuple, model->Clone()));
5976
}
6077

61-
return CreateChain(std::move(innerProcessors), std::move(model));
78+
return CreateChain(std::move(innerProcessors), processorName, std::move(model));
6279
}
6380

6481
std::unique_ptr<ROOT::Experimental::RNTupleProcessor>
@@ -68,18 +85,40 @@ ROOT::Experimental::RNTupleProcessor::CreateChain(std::vector<std::unique_ptr<RN
6885
if (innerProcessors.empty())
6986
throw RException(R__FAIL("at least one inner processor must be provided"));
7087

88+
auto processorName = innerProcessors[0]->GetProcessorName();
89+
return CreateChain(std::move(innerProcessors), processorName, std::move(model));
90+
}
91+
92+
std::unique_ptr<ROOT::Experimental::RNTupleProcessor>
93+
ROOT::Experimental::RNTupleProcessor::CreateChain(std::vector<std::unique_ptr<RNTupleProcessor>> innerProcessors,
94+
std::string_view processorName, std::unique_ptr<RNTupleModel> model)
95+
{
96+
if (innerProcessors.empty())
97+
throw RException(R__FAIL("at least one inner processor must be provided"));
98+
7199
// If no model is provided, infer it from the first inner processor.
72100
if (!model) {
73101
model = innerProcessors[0]->GetModel().Clone();
74102
}
75103

76104
return std::unique_ptr<RNTupleChainProcessor>(
77-
new RNTupleChainProcessor(std::move(innerProcessors), std::move(model)));
105+
new RNTupleChainProcessor(std::move(innerProcessors), processorName, std::move(model)));
106+
}
107+
108+
std::unique_ptr<ROOT::Experimental::RNTupleProcessor>
109+
ROOT::Experimental::RNTupleProcessor::CreateJoin(const std::vector<RNTupleOpenSpec> &ntuples,
110+
const std::vector<std::string> &joinFields,
111+
std::vector<std::unique_ptr<RNTupleModel>> models)
112+
{
113+
if (ntuples.empty())
114+
throw RException(R__FAIL("at least one RNTuple must be provided"));
115+
return CreateJoin(ntuples, joinFields, ntuples[0].fNTupleName, std::move(models));
78116
}
79117

80118
std::unique_ptr<ROOT::Experimental::RNTupleProcessor>
81119
ROOT::Experimental::RNTupleProcessor::CreateJoin(const std::vector<RNTupleOpenSpec> &ntuples,
82120
const std::vector<std::string> &joinFields,
121+
std::string_view processorName,
83122
std::vector<std::unique_ptr<RNTupleModel>> models)
84123
{
85124
if (ntuples.size() < 1)
@@ -103,9 +142,10 @@ ROOT::Experimental::RNTupleProcessor::CreateJoin(const std::vector<RNTupleOpenSp
103142

104143
std::unique_ptr<RNTupleJoinProcessor> processor;
105144
if (models.size() > 0) {
106-
processor = std::unique_ptr<RNTupleJoinProcessor>(new RNTupleJoinProcessor(ntuples[0], std::move(models[0])));
145+
processor = std::unique_ptr<RNTupleJoinProcessor>(
146+
new RNTupleJoinProcessor(ntuples[0], processorName, std::move(models[0])));
107147
} else {
108-
processor = std::unique_ptr<RNTupleJoinProcessor>(new RNTupleJoinProcessor(ntuples[0]));
148+
processor = std::unique_ptr<RNTupleJoinProcessor>(new RNTupleJoinProcessor(ntuples[0], processorName));
109149
}
110150

111151
for (unsigned i = 1; i < ntuples.size(); ++i) {
@@ -144,8 +184,9 @@ void ROOT::Experimental::RNTupleProcessor::ConnectField(RFieldContext &fieldCont
144184
//------------------------------------------------------------------------------
145185

146186
ROOT::Experimental::RNTupleSingleProcessor::RNTupleSingleProcessor(const RNTupleOpenSpec &ntuple,
187+
std::string_view processorName,
147188
std::unique_ptr<RNTupleModel> model)
148-
: RNTupleProcessor(std::move(model)), fNTupleSpec(ntuple)
189+
: RNTupleProcessor(processorName, std::move(model)), fNTupleSpec(ntuple)
149190
{
150191
if (!fModel) {
151192
fPageSource = Internal::RPageSource::Create(fNTupleSpec.fNTupleName, fNTupleSpec.fStorage);
@@ -216,8 +257,9 @@ void ROOT::Experimental::RNTupleSingleProcessor::Connect()
216257
//------------------------------------------------------------------------------
217258

218259
ROOT::Experimental::RNTupleChainProcessor::RNTupleChainProcessor(
219-
std::vector<std::unique_ptr<RNTupleProcessor>> processors, std::unique_ptr<RNTupleModel> model)
220-
: RNTupleProcessor(std::move(model)), fInnerProcessors(std::move(processors))
260+
std::vector<std::unique_ptr<RNTupleProcessor>> processors, std::string_view processorName,
261+
std::unique_ptr<RNTupleModel> model)
262+
: RNTupleProcessor(processorName, std::move(model)), fInnerProcessors(std::move(processors))
221263
{
222264
fInnerNEntries.assign(fInnerProcessors.size(), kInvalidNTupleIndex);
223265

@@ -303,8 +345,9 @@ ROOT::NTupleSize_t ROOT::Experimental::RNTupleChainProcessor::LoadEntry(ROOT::NT
303345
//------------------------------------------------------------------------------
304346

305347
ROOT::Experimental::RNTupleJoinProcessor::RNTupleJoinProcessor(const RNTupleOpenSpec &mainNTuple,
348+
std::string_view processorName,
306349
std::unique_ptr<RNTupleModel> model)
307-
: RNTupleProcessor(nullptr)
350+
: RNTupleProcessor(processorName, nullptr)
308351
{
309352
fNTuples.emplace_back(mainNTuple);
310353
fPageSource = Internal::RPageSource::Create(mainNTuple.fNTupleName, mainNTuple.fStorage);

0 commit comments

Comments
 (0)